diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-11 12:23:18 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-11 13:03:20 -0400 |
commit | a8fd5a8dee066d0008e7667b0c9e6a60cd5f3a2e (patch) | |
tree | f05095d4b6458a709034a182649e6d16b6a8558a | |
parent | 5ea953292441e31e37ae074e48d8b3b5ce1d9440 (diff) |
Support page directories outside PRAMIN or in SYS_MEM
- Re-read PRAMIN configuration after update to verify change applies
- Return a page_dir_config_t rather than just an address and page.
table version from `get_bar2_pdb()`.
- Less verbose logging for MMU-related functions by default.
- Perform all conversion from SYS_MEM/VID_MEM addresses to kernel
addresses inside the translation functions, via the new function
'pd_deref()`.
- Support use of an I/O MMU, page tables/directories outside the
current PRAMIN window, and page tables/directories arbitrarially
located in SYS_MEM or VID_MEM on different levels of the same tree.
- Heavily improve documentation and add references for Version 1 and
Version 0 page tables.
- Improve logging in `runlist.c` to include runlist and chip IDs.
- Update all users of search_page_directory* to use the new API.
- Remove now-unused supporting functions from `mmu.c`.
Tested on GTX 970, GTX 1060 3GB, Jetson TX2, Titan V, Jetson Xavier,
and RTX 2080 Ti.
-rw-r--r-- | bus.c | 50 | ||||
-rw-r--r-- | mmu.c | 206 | ||||
-rw-r--r-- | nvdebug.h | 158 | ||||
-rw-r--r-- | nvdebug_entry.c | 2 | ||||
-rw-r--r-- | runlist.c | 33 |
5 files changed, 268 insertions, 181 deletions
@@ -57,35 +57,25 @@ relocate: | |||
57 | window.base = (u32)(addr >> 16); // Safe, due to above range check | 57 | window.base = (u32)(addr >> 16); // Safe, due to above range check |
58 | window.target = target; | 58 | window.target = target; |
59 | nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, window.raw); | 59 | nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, window.raw); |
60 | // Wait for the window to move by re-reading (as done in nvgpu driver) | ||
61 | (void) nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); | ||
60 | return (int)(addr & 0xffffull); | 62 | return (int)(addr & 0xffffull); |
61 | } | 63 | } |
62 | 64 | ||
63 | 65 | /* Get a copy of the BAR2 page directory configuration (base and aperture) | |
64 | /* Get a persistent pointer to the page directory base | 66 | @param pd Pointer at which to store the configuration, including a pointer |
65 | @param pdb Dereferencable pointer to the zeroeth entry of top-level page | 67 | and aperture for the zeroth entry of the top-level page directory |
66 | directory (PD3) for the BAR2 register region. | 68 | (PD3 for V2 page tables). This pointer **may not** be directly |
67 | Note: The returned pointer will be into the PRAMIN space. If the PRAMIN | 69 | dereferencable, and the caller may need to shift the BAR2 window. |
68 | window is moved to a region that does not cover the BAR2 page table, | 70 | @return 0 on success, -errno on error. |
69 | this ***will move the window***. | 71 | Note: This may move the PRAMIN window. |
70 | Note: Even if the page table is located in SYS_MEM, we route reads/writes via | ||
71 | PRAMIN. This ensures that we always see what the GPU sees, and that | ||
72 | includes any passes through I/O MMUs or IOVA spaces. | ||
73 | */ | 72 | */ |
74 | int get_bar2_pdb(struct nvdebug_state *g, void **pdb, bool *is_v2_pdb) { | 73 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) { |
75 | static void* cached_pdb = NULL; | ||
76 | static bool cached_is_v2_pdb = false; | ||
77 | static long pd_hash = 0; | ||
78 | int ret; | 74 | int ret; |
79 | bar_config_block_t bar2_block; | 75 | bar_config_block_t bar2_block; |
80 | page_dir_config_t pd_config; | ||
81 | uint64_t pdb_vram; | ||
82 | 76 | ||
83 | // Use cached base as long as it's still pointing to the same thing | 77 | if (!pd) |
84 | if (cached_pdb && readl(cached_pdb) == pd_hash) { | 78 | return -EINVAL; |
85 | *pdb = cached_pdb; | ||
86 | *is_v2_pdb = cached_is_v2_pdb; | ||
87 | return 0; | ||
88 | } | ||
89 | 79 | ||
90 | if (!g->bar2) | 80 | if (!g->bar2) |
91 | return -ENXIO; | 81 | return -ENXIO; |
@@ -107,24 +97,10 @@ int get_bar2_pdb(struct nvdebug_state *g, void **pdb, bool *is_v2_pdb) { | |||
107 | } | 97 | } |
108 | printk(KERN_INFO "[nvdebug] BAR2 inst block at off %x in PRAMIN\n", ret); | 98 | printk(KERN_INFO "[nvdebug] BAR2 inst block at off %x in PRAMIN\n", ret); |
109 | // Pull the page directory base configuration from the instance block | 99 | // Pull the page directory base configuration from the instance block |
110 | if ((pd_config.raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) { | 100 | if ((pd->raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) { |
111 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n"); | 101 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n"); |
112 | return -ENOTSUPP; | 102 | return -ENOTSUPP; |
113 | } | 103 | } |
114 | pdb_vram = pd_config.page_dir_hi; | ||
115 | pdb_vram <<= 20; | ||
116 | pdb_vram |= pd_config.page_dir_lo; | ||
117 | pdb_vram <<= 12; | ||
118 | printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx (config raw: %llx)\n", pdb_vram, pd_config.raw); | ||
119 | // Setup PRAMIN to point at the page directory | ||
120 | if ((ret = addr_to_pramin_mut(g, pdb_vram, pd_config.target)) < 0) { | ||
121 | printk(KERN_ERR "[nvdebug] Invalid BAR2/3 PDB configuration! BAR2/3 inaccessible.\n"); | ||
122 | return ret; | ||
123 | } | ||
124 | |||
125 | *pdb = cached_pdb = g->regs + NV_PRAMIN + ret; | ||
126 | pd_hash = readl(cached_pdb); | ||
127 | *is_v2_pdb = cached_is_v2_pdb = pd_config.is_ver2; | ||
128 | 104 | ||
129 | return 0; | 105 | return 0; |
130 | } | 106 | } |
@@ -1,117 +1,129 @@ | |||
1 | // Helpers to deal with NVIDIA's MMU and associated page tables | 1 | /* Copyright 2024 Joshua Bakita |
2 | * Helpers to deal with NVIDIA's MMU and associated page tables | ||
3 | */ | ||
4 | #include <linux/err.h> // ERR_PTR() etc. | ||
5 | #include <linux/iommu.h> // iommu_get_domain_for_dev() and iommu_iova_to_phys() | ||
2 | #include <linux/kernel.h> // Kernel types | 6 | #include <linux/kernel.h> // Kernel types |
3 | 7 | ||
4 | #include "nvdebug.h" | 8 | #include "nvdebug.h" |
5 | 9 | ||
6 | /* One of the oldest ways to access video memory on NVIDIA GPUs is by using | 10 | // Uncomment to print every PDE and PTE walked for debugging |
7 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) | 11 | //#define DEBUG |
8 | space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs | 12 | #ifdef DEBUG |
9 | and appear to be used today to bootstrap page table configuration. | 13 | #define printk_debug printk |
14 | #else | ||
15 | #define printk_debug(...) | ||
16 | #endif | ||
10 | 17 | ||
11 | Why is it mapped at a location called NVIDIA Private RAM Instance? Because | 18 | /* Convert a page directory (PD) pointer and aperture to be kernel-accessible |
12 | this used to point to the entirety of intance RAM, which was seperate from | ||
13 | VRAM on older NVIDIA GPUs. | ||
14 | */ | ||
15 | 19 | ||
16 | /* Convert a physical VRAM address to an offset in the PRAMIN window | 20 | I/O MMU handling inspired by amdgpu_iomem_read() in amdgpu_ttm.c of the |
17 | @param addr VRAM address to convert | 21 | AMDGPU driver. |
18 | @return -errno on error, PRAMIN offset on success | ||
19 | 22 | ||
20 | Note: Use off2PRAMIN() instead if you want a dereferenceable address | 23 | @param addr Pointer from page directory entry (PDE) |
21 | Note: PRAMIN window is only 1MB, so returning an int is safe | 24 | @param pd_ap PD-type aperture (target address space) for `addr` |
22 | */ | 25 | @return A dereferencable kernel address, or an ERR_PTR-wrapped error |
23 | static int vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) { | 26 | */ |
24 | uint64_t pramin_base_va; | 27 | void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_TARGET pd_ap) { |
25 | bar0_window_t window; | 28 | struct iommu_domain *dom; |
26 | window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); | 29 | phys_addr_t phys; |
27 | // Check if the address is valid (49 bits are addressable on-GPU) | 30 | |
28 | if (addr & ~0x0001ffffffffffff) { | 31 | // Validate arguments |
29 | printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n", | 32 | if (unlikely(!IS_PD_TARGET(pd_ap) || pd_ap == PD_AND_TARGET_INVALID || !addr)) |
30 | addr, __func__); | 33 | return ERR_PTR(-EINVAL); |
31 | return -EINVAL; | 34 | |
35 | // VID_MEM accesses are the simple common-case | ||
36 | if (pd_ap == PD_AND_TARGET_VID_MEM) { | ||
37 | // Using BAR2 requires a page-table traversal. As this function is part | ||
38 | // of the page-table traversal process, it must instead use PRAMIN. | ||
39 | int off = addr_to_pramin_mut(g, addr, TARGET_VID_MEM); | ||
40 | if (off < 0) | ||
41 | return ERR_PTR(off); | ||
42 | return g->regs + NV_PRAMIN + off; | ||
32 | } | 43 | } |
33 | // For unclear (debugging?) reasons, PRAMIN can point to SYSMEM | 44 | /* SYS_MEM accesses are rare. Only nvgpu (Jetson driver), nouveau, and this |
34 | if (window.target != TARGET_VID_MEM) | 45 | * driver are known to create page directory entries in SYS_MEM. |
35 | return -EFAULT; | 46 | * |
36 | pramin_base_va = ((uint64_t)window.base) << 16; | 47 | * On systems using an I/O MMU, or some other I/O virtual address space, |
37 | // Protect against out-of-bounds accesses | 48 | * these are **not** physical addresses, and must first be translated |
38 | if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN) | 49 | * through the I/O MMU before use. |
39 | return -ERANGE; | 50 | * Example default meaning of a SYS_MEM address for a few CPUs: |
40 | return addr - pramin_base_va; | 51 | * - Jetson Xavier : physical address |
41 | } | 52 | * - AMD 3950X : I/O MMU address |
53 | * - Phenom II x4 : physical address | ||
54 | */ | ||
55 | // Check for, and translate through, the I/O MMU (if any) | ||
56 | if ((dom = iommu_get_domain_for_dev(g->dev))) { | ||
57 | phys = iommu_iova_to_phys(dom, addr); | ||
58 | printk(KERN_ERR "[nvdebug] I/O MMU translated SYS_MEM I/O VA %#lx to physical address %llx.\n", addr, phys); | ||
59 | } else | ||
60 | phys = addr; | ||
42 | 61 | ||
43 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window | 62 | if (!phys) |
44 | // @return A dereferencable address, or 0 (an invalid physical address) on err | ||
45 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { | ||
46 | int off = vram2PRAMIN(g, phy); | ||
47 | if (off == -ERANGE) | ||
48 | printk(KERN_ERR "[nvdebug] Page table walk off end of PRAMIN!\n"); | ||
49 | if (off < 0) | ||
50 | return 0; | 63 | return 0; |
51 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); | ||
52 | } | ||
53 | 64 | ||
54 | /* FIXME | 65 | return phys_to_virt(addr); |
55 | void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) { | ||
56 | return g->bar2 + off; | ||
57 | } | 66 | } |
58 | */ | ||
59 | 67 | ||
60 | // Internal helper for search_page_directory(). | 68 | // Internal helper for search_page_directory(). |
61 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | 69 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, |
62 | void __iomem *pde_offset, | 70 | uintptr_t pde_addr, |
63 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | 71 | enum PD_TARGET pde_target, |
64 | uint64_t addr_to_find, | 72 | uint64_t addr_to_find, |
65 | uint32_t level) { | 73 | uint32_t level) { |
66 | uint64_t res, i; | 74 | uint64_t res, i; |
67 | void __iomem *next; | 75 | void __iomem *pde_kern; |
68 | page_dir_entry_t entry; | 76 | page_dir_entry_t entry; |
69 | if (level > sizeof(NV_MMU_PT_V2_SZ)) | 77 | if (level > sizeof(NV_MMU_PT_V2_SZ)) |
70 | return 0; | 78 | return 0; |
71 | // Hack to workaround PDE0 being double-size and strangely formatted | 79 | // Hack to workaround PDE0 being double-size and strangely formatted |
72 | if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16) | 80 | if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16) |
73 | pde_offset += 8; | 81 | pde_addr += 8; |
74 | entry.raw_w = readq(pde_offset); | 82 | // Translate a VID_MEM/SYS_MEM-space address to something kernel-accessible |
83 | pde_kern = pd_deref(g, pde_addr, pde_target); | ||
84 | if (IS_ERR_OR_NULL(pde_kern)) { | ||
85 | printk(KERN_ERR "[nvdebug] %s: Unable to resolve %#lx in GPU %s to a kernel-accessible address. Error %ld.\n", __func__, pde_addr, pd_target_to_text(pde_target), PTR_ERR(pde_kern)); | ||
86 | return 0; | ||
87 | } | ||
88 | // Read the page directory entry (a pointer to another directory, or a PTE) | ||
89 | entry.raw_w = readq(pde_kern); | ||
75 | // If we reached an invalid (unpopulated) PDE, walk back up the tree | 90 | // If we reached an invalid (unpopulated) PDE, walk back up the tree |
76 | if (entry.target == PD_AND_TARGET_INVALID) | 91 | if (entry.target == PD_AND_TARGET_INVALID) |
77 | return 0; | 92 | return 0; |
78 | // Succeed when we reach a PTE with the address we want | 93 | // Succeed when we reach a PTE with the address we want |
79 | if (entry.is_pte) { | 94 | if (entry.is_pte) { |
80 | // TODO: Handle huge pages here | 95 | // TODO: Handle huge pages here |
81 | printk(KERN_INFO "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, entry.is_privileged, entry.is_readonly, entry.atomics_disabled, entry.raw_w); | 96 | printk_debug(KERN_INFO "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, entry.is_privileged, entry.is_readonly, entry.atomics_disabled, entry.raw_w); |
82 | return (uint64_t)entry.addr << 12 == addr_to_find; | 97 | return (uint64_t)entry.addr << 12 == addr_to_find; |
83 | } | 98 | } |
84 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %#018llx in ap '%s' vol '%d' at lvl %d (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, level, entry.raw_w); | 99 | printk_debug(KERN_INFO "[nvdebug] Found PDE pointing to %#018llx in ap '%s' vol '%d' at lvl %d (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, level, entry.raw_w); |
85 | // Depth-first search of the page table | 100 | // Depth-first search of the page table |
86 | for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { | 101 | for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { |
87 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); | 102 | uint64_t next = ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i; |
88 | // off2addr can fail | 103 | res = search_page_directory_subtree(g, next, entry.target, addr_to_find, level + 1); |
89 | if (!next || !entry.addr_w) { | ||
90 | printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__); | ||
91 | return 0; | ||
92 | } | ||
93 | res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1); | ||
94 | if (res) | 104 | if (res) |
95 | return res | (i << NV_MMU_PT_V2_LSB[level + 1]); | 105 | return res | (i << NV_MMU_PT_V2_LSB[level + 1]); |
96 | } | 106 | } |
97 | return 0; | 107 | return 0; |
98 | } | 108 | } |
99 | 109 | ||
100 | /* GPU Physical address -> Virtual address ("reverse" translation) | 110 | /* GPU Physical address -> Virtual address ("reverse" translation) for V2 tables |
111 | |||
112 | Depth-first search a page directory of the GPU MMU for where a particular | ||
113 | physical address is mapped. Upon finding a mapping, the virtual address is | ||
114 | returned. | ||
101 | 115 | ||
102 | Depth-first search a page directory of the GPU MMU for where a particular | 116 | The page directory may be located in VID_MEM, SYS_MEM, or some combination of |
103 | physical address is mapped. Upon finding a mapping, the virtual address is | 117 | the two. |
104 | returned. | ||
105 | 118 | ||
106 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries | 119 | @param pd_config Page Directory configuration, containing pointer and |
107 | @param off2addr Func to convert VRAM phys addresses to valid CPU VAs | 120 | aperture for the start of the PDE3 entries |
108 | @param addr_to_find Physical address to reconstruct the virtual address of | 121 | @param addr_to_find Physical address to reconstruct the virtual address of |
109 | @return 0 on error, otherwise the virtual address at which addr_to_find is | 122 | @return 0 on error, otherwise the virtual address at which addr_to_find is |
110 | mapped into by this page table. (Zero is not a valid virtual address) | 123 | mapped into by this page table. (Zero is not a valid virtual address) |
111 | */ | 124 | */ |
112 | uint64_t search_page_directory(struct nvdebug_state *g, | 125 | uint64_t search_page_directory(struct nvdebug_state *g, |
113 | void __iomem *pde_offset, | 126 | page_dir_config_t pd_config, |
114 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
115 | uint64_t addr_to_find) { | 127 | uint64_t addr_to_find) { |
116 | uint64_t res, i; | 128 | uint64_t res, i; |
117 | // Make sure that the query is page-aligned | 129 | // Make sure that the query is page-aligned |
@@ -119,57 +131,62 @@ uint64_t search_page_directory(struct nvdebug_state *g, | |||
119 | printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find); | 131 | printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find); |
120 | return 0; | 132 | return 0; |
121 | } | 133 | } |
122 | printk(KERN_INFO "[nvdebug] Searching for addr %#018llx in page table with base %#018llx\n", (u64)addr_to_find, (u64)pde_offset); | 134 | printk(KERN_INFO "[nvdebug] Searching for addr %#018llx in page table with base %#018lx\n", addr_to_find, (uintptr_t)pd_config.page_dir << 12); |
123 | // Search the top-level page directory (PDE3) | 135 | // Search the top-level page directory (PDE3) |
124 | for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++) | 136 | for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++) |
125 | if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0))) | 137 | if ((res = search_page_directory_subtree(g, ((uintptr_t)pd_config.page_dir << 12) + NV_MMU_PT_V2_ENTRY_SZ[0] * i, INST2PD_TARGET(pd_config.target), addr_to_find, 0))) |
126 | return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]); | 138 | return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]); |
127 | return 0; | 139 | return 0; |
128 | } | 140 | } |
129 | 141 | ||
130 | /* GMMU Page Tables Version 1 | 142 | /* GPU Physical address -> Virtual address ("reverse" translation) for V1 tables |
131 | This page table only contains 2 levels and is used in the Fermi, Kepler, and | 143 | (See `search_page_directory()` for documentation.) |
132 | Maxwell architectures | 144 | */ |
133 | */ | ||
134 | // Number of entries in the PDE and PTE levels | ||
135 | static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!! | ||
136 | // Which bit index is the least significant in indexing each page level | ||
137 | static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!! | ||
138 | uint64_t search_v1_page_directory(struct nvdebug_state *g, | 145 | uint64_t search_v1_page_directory(struct nvdebug_state *g, |
139 | void __iomem *pde_offset, | 146 | page_dir_config_t pd_config, |
140 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
141 | uint64_t addr_to_find) { | 147 | uint64_t addr_to_find) { |
142 | uint64_t j, i = 0; | 148 | uint64_t j, i = 0; |
143 | page_dir_entry_v1_t pde; | 149 | page_dir_entry_v1_t pde; |
144 | page_tbl_entry_v1_t pte; | 150 | page_tbl_entry_v1_t pte; |
145 | void __iomem *pte_offset; | 151 | uintptr_t pte_offset, pde_offset; |
152 | void __iomem *pte_addr, *pde_addr; | ||
146 | // For each PDE | 153 | // For each PDE |
147 | do { | 154 | do { |
155 | // Index the list of page directory entries | ||
156 | pde_offset = ((uint64_t)pd_config.page_dir << 12) + i * sizeof(page_dir_entry_v1_t); | ||
157 | // Convert the VID_MEM/SYS_MEM address to a kernel-accessible addr | ||
158 | pde_addr = pd_deref(g, pde_offset, INST2PD_TARGET(pd_config.target)); | ||
159 | if (IS_ERR_OR_NULL(pde_addr)) { | ||
160 | printk(KERN_ERR "[nvdebug] %s: Unable to resolve %#lx in GPU %s to a kernel-accessible address. Error %ld.\n", __func__, pde_offset, pd_target_to_text(INST2PD_TARGET(pd_config.target)), -PTR_ERR(pde_addr)); | ||
161 | return 0; | ||
162 | } | ||
148 | // readq doesn't seem to work on BAR0 | 163 | // readq doesn't seem to work on BAR0 |
149 | pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4); | 164 | pde.raw = readl(pde_addr + 4); |
150 | pde.raw <<= 32; | 165 | pde.raw <<= 32; |
151 | pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t)); | 166 | pde.raw |= readl(pde_addr); |
152 | // Verify PDE is present | 167 | // Verify PDE is present |
153 | if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID) | 168 | if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID) |
154 | continue; | 169 | continue; |
155 | // Convert to a dereferencable pointer from CPU virtual address space | ||
156 | pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12); | ||
157 | if (!pte_offset) | ||
158 | continue; | ||
159 | // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw); | 170 | // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw); |
160 | // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw); | 171 | printk_debug(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.alt_target, pde.raw); |
161 | // For each PTE | 172 | // For each PTE |
162 | for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) { | 173 | for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) { |
163 | // Don't overrun the PRAMIN window | 174 | // Index the list of page table entries starting at pde.alt_addr |
164 | if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN) | 175 | pte_offset = ((uint64_t)pde.alt_addr << 12) + j * sizeof(page_tbl_entry_v1_t); |
176 | // Convert the VID_MEM/SYS_MEM address to a kernel-accessible addr | ||
177 | pte_addr = pd_deref(g, pte_offset, V12PD_TARGET(pde.alt_target)); | ||
178 | if (IS_ERR_OR_NULL(pte_addr)) { | ||
179 | printk(KERN_ERR "[nvdebug] %s: Unable to resolve %#lx in GPU %s to a kernel-accessible address. Error %ld.\n", __func__, pte_offset, pd_target_to_text(V12PD_TARGET(pde.alt_target)), -PTR_ERR(pte_addr)); | ||
165 | return 0; | 180 | return 0; |
166 | pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4); | 181 | } |
182 | // Read page table entry, avoiding readq | ||
183 | pte.raw = readl(pte_addr + 4); | ||
167 | pte.raw <<= 32; | 184 | pte.raw <<= 32; |
168 | pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t)); | 185 | pte.raw |= readl(pte_addr); |
169 | // Skip non-present PTEs | 186 | // Skip non-present PTEs |
170 | if (!pte.is_present) | 187 | if (!pte.is_present) |
171 | continue; | 188 | continue; |
172 | // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw); | 189 | printk_debug(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw); |
173 | // If we find a matching PTE, return its virtual address | 190 | // If we find a matching PTE, return its virtual address |
174 | if ((uint64_t)pte.addr << 12 == addr_to_find) | 191 | if ((uint64_t)pte.addr << 12 == addr_to_find) |
175 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; | 192 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; |
@@ -178,9 +195,6 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, | |||
178 | return 0; | 195 | return 0; |
179 | } | 196 | } |
180 | 197 | ||
181 | /* GMMU Page Tables Version 0 | ||
182 | This page table only contains 2 levels and is used in the Tesla architecture | ||
183 | */ | ||
184 | /* *** UNTESTED *** | 198 | /* *** UNTESTED *** |
185 | #define NV_MMU_PT_V0_SZ 2048 | 199 | #define NV_MMU_PT_V0_SZ 2048 |
186 | #define NV_MMU_PT_V0_LSB 29 | 200 | #define NV_MMU_PT_V0_LSB 29 |
@@ -818,6 +818,14 @@ typedef union { | |||
818 | } bar_config_block_t; | 818 | } bar_config_block_t; |
819 | 819 | ||
820 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | 820 | /* BAR0 PRAMIN (Private RAM Instance) window configuration |
821 | One of the oldest ways to access video memory on NVIDIA GPUs is by using | ||
822 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) | ||
823 | space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs | ||
824 | and appear to be used today to bootstrap page table configuration. | ||
825 | |||
826 | Why is it mapped at a location called NVIDIA Private RAM Instance? Because | ||
827 | this used to point to the entirety of intance RAM, which was seperate from | ||
828 | VRAM on older NVIDIA GPUs. | ||
821 | 829 | ||
822 | BASE : Base of window >> 16 in [TARGET] virtual address space | 830 | BASE : Base of window >> 16 in [TARGET] virtual address space |
823 | TARGET : Which address space BASE points into | 831 | TARGET : Which address space BASE points into |
@@ -843,7 +851,7 @@ typedef union { | |||
843 | typedef union { | 851 | typedef union { |
844 | struct { | 852 | struct { |
845 | uint32_t target:2; | 853 | uint32_t target:2; |
846 | uint32_t vol:1; | 854 | uint32_t is_volatile:1; |
847 | uint32_t padding0:1; | 855 | uint32_t padding0:1; |
848 | uint32_t fault_replay_tex:1; | 856 | uint32_t fault_replay_tex:1; |
849 | uint32_t fault_replay_gcc:1; | 857 | uint32_t fault_replay_gcc:1; |
@@ -853,6 +861,10 @@ typedef union { | |||
853 | uint32_t page_dir_lo:20; | 861 | uint32_t page_dir_lo:20; |
854 | uint32_t page_dir_hi:32; | 862 | uint32_t page_dir_hi:32; |
855 | } __attribute__((packed)); | 863 | } __attribute__((packed)); |
864 | struct { | ||
865 | uint32_t pad:12; | ||
866 | uint64_t page_dir:52; // Confirmed working on Xavier and tama | ||
867 | } __attribute__((packed)); | ||
856 | uint64_t raw; | 868 | uint64_t raw; |
857 | } page_dir_config_t; | 869 | } page_dir_config_t; |
858 | 870 | ||
@@ -888,6 +900,14 @@ typedef union { | |||
888 | The following arrays merely represent different projections of Fig. 1, and | 900 | The following arrays merely represent different projections of Fig. 1, and |
889 | only one is strictly needed to reconstruct all the others. However, due to | 901 | only one is strictly needed to reconstruct all the others. However, due to |
890 | the complexity of page tables, we include all of these to aid in readability. | 902 | the complexity of page tables, we include all of these to aid in readability. |
903 | |||
904 | Support: Pascal, Volta, Turing, Ampere, Ada, Ampere, Hopper*, Blackwell* | ||
905 | Note: *Hopper introduces Version 3 Page Tables, but is backwards-compatible. | ||
906 | The newer version adds a PD4 level to support 57-bit virtual | ||
907 | addresses, and slightly shifts the PDE and PTE fields. | ||
908 | |||
909 | See also: gp100-mmu-format.pdf in open-gpu-doc. In open-gpu-kernel-modules | ||
910 | this is synonymously the "NEW" and "VER2" layout. | ||
891 | */ | 911 | */ |
892 | // How many nodes/entries per level in V2 of NVIDIA's page table format | 912 | // How many nodes/entries per level in V2 of NVIDIA's page table format |
893 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | 913 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; |
@@ -907,6 +927,12 @@ enum PD_TARGET { | |||
907 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 | 927 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 |
908 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 | 928 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 |
909 | }; | 929 | }; |
930 | // The low bit is unset on page directory (PD) targets | ||
931 | #define IS_PD_TARGET(target) (!(target & 0x1u)) | ||
932 | // Convert from an enum INST_TARGET to an enum PD_TARGET | ||
933 | #define INST2PD_TARGET(target) ((target & 0x2) ? (target << 1) : (!target) << 1) | ||
934 | // Convert from an enum V1_PD_TARGET to an enum PD_TARGET | ||
935 | #define V12PD_TARGET(target) (target << 1) | ||
910 | static inline const char *pd_target_to_text(enum PD_TARGET t) { | 936 | static inline const char *pd_target_to_text(enum PD_TARGET t) { |
911 | switch (t) { | 937 | switch (t) { |
912 | case PD_AND_TARGET_INVALID: | 938 | case PD_AND_TARGET_INVALID: |
@@ -928,13 +954,10 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) { | |||
928 | } | 954 | } |
929 | 955 | ||
930 | // Page Directory Entry/Page Table Entry V2 type | 956 | // Page Directory Entry/Page Table Entry V2 type |
931 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | 957 | // Note: As the meaning of target (bits 2:1) at a PDE-level changes if the |
932 | // is a PTE or not, this combines them into a single target field to | 958 | // entry is a large-page PTE or not. To simply the logic, we combine them |
933 | // simplify comparisons. | 959 | // into a single target field to simplify comparisons. |
934 | // Support: Pascal, Volta, Turing, Ampere, Ada | 960 | #define TARGET_PEER 1 |
935 | // | ||
936 | // V3 introduced with Hopper, but Hopper and Blackwell also support V2 | ||
937 | // | ||
938 | typedef union { | 961 | typedef union { |
939 | // Page Directory Entry (PDE) | 962 | // Page Directory Entry (PDE) |
940 | struct { | 963 | struct { |
@@ -965,21 +988,74 @@ typedef union { | |||
965 | uint64_t raw_w; | 988 | uint64_t raw_w; |
966 | } page_dir_entry_t; | 989 | } page_dir_entry_t; |
967 | 990 | ||
968 | // Page Directory Entry/Page Table Entry V1 type | 991 | /* GMMU Page Tables Version 1 |
969 | // Support: Fermi, Kepler, Maxwell | 992 | These page tables contain 2 levels and are used in the Fermi, Kepler, and |
993 | Maxwell architectures to support a 40-bit virtual address space. | ||
994 | |||
995 | Version 1 Page Tables may be configured to support either 64 KiB or 128 KiB | ||
996 | large pages. Table addressing differs between the modes---even if the table | ||
997 | contains no large pages. The format for 4 KiB pages in each mode is shown | ||
998 | below. | ||
999 | |||
1000 | V1 of NVIDIA's page table format uses 1 level of PDEs and a level of PTEs. | ||
1001 | How the virtual address is sliced to yield an index into each level and a | ||
1002 | page offset is shown by Fig 1 and Fig 2 (for 64 KiB and 128 KiB large page | ||
1003 | modes respectively). | ||
1004 | |||
1005 | == Figure 1: 64 KiB mode == | ||
1006 | Page Offset (12 bits) <----------------------------------+ | ||
1007 | Page Table Entry (PTE) (13 bits) <--------------+ | | ||
1008 | Page Directory Entry (PDE) (13 bits) <-+ | | | ||
1009 | ^ ^ ^ | ||
1010 | Virtual address: [39, 25] [24, 12] [11, 0] | ||
1011 | |||
1012 | == Figure 2: 128 KiB mode == | ||
1013 | Page Offset (12 bits) <----------------------------------+ | ||
1014 | Page Table Entry (PTE) (14 bits) <--------------+ | | ||
1015 | Page Directory Entry (PDE) (12 bits) <-+ | | | ||
1016 | ^ ^ ^ | ||
1017 | Virtual address: [39, 26] [25, 12] [11, 0] | ||
1018 | |||
1019 | |||
1020 | Support: Fermi, Kepler, Maxwell, Pascal* | ||
1021 | Note: *Pascal introduces Version 2 Page Tables, but is backwards-compatible. | ||
1022 | Note: We only implement the 64-KiB-large-page mode in nvdebug. | ||
1023 | |||
1024 | See also: mm_gk20a.c in nvgpu (Jetson GPU driver) and kern_gmmu_fmt_gm10x.c | ||
1025 | in open-gpu-kernel-modules (open-source NVRM variant). This is | ||
1026 | synonymously the "VER1" and unversioned layout in | ||
1027 | open-gpu-kernel-modules, with some differences noted in Appdx 1. | ||
1028 | |||
1029 | == Appdx 1 == | ||
1030 | In open-gpu-kernel-modules, the unversioned MMU layout adds: | ||
1031 | - Bit 35: NV_MMU_PTE_LOCK synonym for NV_MMU_PTE_ATOMIC_DISABLE | ||
1032 | - Bit 62: NV_MMU_PTE_READ_DISABLE overlapping NV_MMU_PTE_COMPTAGLINE | ||
1033 | - Bit 63: NV_MMU_PTE_WRITE_DISABLE overlapping NV_MMU_PTE_COMPTAGLINE | ||
1034 | And removes: | ||
1035 | - Bit 40, 41, 42, 43 from NV_MMU_PTE_KIND | ||
1036 | The PDE layouts are identical. Given that the unversioned defines seem to | ||
1037 | predate renaming and/or field extension/relocation, they are likely artifacts | ||
1038 | from the page table development process, and have no meaning now. | ||
1039 | */ | ||
1040 | // Number of entries in the PDE and PTE levels | ||
1041 | static const int NV_MMU_PT_V1_SZ[2] = {8192, 8192}; | ||
1042 | // Which bit index is the least significant in indexing each page level | ||
1043 | static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; | ||
1044 | |||
1045 | // V1 Page Directory Entry target | ||
970 | enum V1_PD_TARGET { | 1046 | enum V1_PD_TARGET { |
971 | PD_TARGET_INVALID = 0, | 1047 | PD_TARGET_INVALID = 0, |
972 | PD_TARGET_VID_MEM = 1, | 1048 | PD_TARGET_VID_MEM = 1, |
973 | PD_TARGET_SYS_MEM_COHERENT = 2, | 1049 | PD_TARGET_SYS_MEM_COHERENT = 2, |
974 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, | 1050 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, |
975 | }; | 1051 | }; |
976 | // Page Directory Entry (PDE) | 1052 | // V1 Page Directory Entry (PDE) |
977 | typedef union { | 1053 | typedef union { |
978 | // Large page fields | 1054 | // Large page fields |
979 | struct { | 1055 | struct { |
980 | // 0:32 | 1056 | // 0:32 |
981 | enum V1_PD_TARGET target:2; | 1057 | enum V1_PD_TARGET target:2; |
982 | uint32_t padding0:2; | 1058 | uint32_t padding0:2; // Documented as "PDE_SIZE"? |
983 | uint64_t addr:28; // May be wider? | 1059 | uint64_t addr:28; // May be wider? |
984 | // 32:63 | 1060 | // 32:63 |
985 | uint32_t padding2:3; | 1061 | uint32_t padding2:3; |
@@ -998,45 +1074,58 @@ typedef union { | |||
998 | } __attribute__((packed)); | 1074 | } __attribute__((packed)); |
999 | uint64_t raw; | 1075 | uint64_t raw; |
1000 | } page_dir_entry_v1_t; | 1076 | } page_dir_entry_v1_t; |
1001 | // Page Table Entry (PTE) | 1077 | |
1002 | // Reconstructed from info in Jetson nvgpu driver | 1078 | // V1 Page Table Entry (PTE) |
1003 | typedef union { | 1079 | typedef union { |
1004 | struct { | 1080 | struct { |
1005 | // 0:32 | 1081 | // 0:32 |
1006 | bool is_present:1; | 1082 | bool is_present:1; |
1007 | bool is_privileged:1; | 1083 | bool is_privileged:1; |
1008 | bool is_readonly:1; | 1084 | bool is_readonly:1; |
1009 | uint32_t padding0:1; | 1085 | bool is_encrypted:1; |
1010 | uint64_t addr:28; | 1086 | uint64_t addr:28; |
1011 | // 32:63 | 1087 | // 32:63 |
1012 | bool is_volatile:1; | 1088 | bool is_volatile:1; |
1013 | enum INST_TARGET:2; | 1089 | enum INST_TARGET:2; |
1014 | uint32_t padding1:1; | 1090 | bool atomics_disabled:1; |
1015 | uint32_t kind:8; | 1091 | uint32_t kind:8; |
1016 | uint32_t comptag:17; | 1092 | uint32_t comptag:20; |
1017 | uint32_t padding2:1; | ||
1018 | bool is_read_disabled:1; | ||
1019 | bool is_write_disabled:1; | ||
1020 | } __attribute__((packed)); | 1093 | } __attribute__((packed)); |
1021 | uint64_t raw; | 1094 | uint64_t raw; |
1022 | } page_tbl_entry_v1_t; | 1095 | } page_tbl_entry_v1_t; |
1023 | //enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | 1096 | |
1024 | //enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | 1097 | /* GMMU Page Tables Version 0 |
1025 | //static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | 1098 | This page table contains 2 levels to support a 40-bit virtual address space, |
1026 | /* PDE V0 (nv50/Tesla) | 1099 | and is used in the Tesla (2.0?) architecture. |
1100 | |||
1101 | It is unclear what NVIDIA calls this page table layout. It predates V1, so we | ||
1102 | call it V0. | ||
1103 | |||
1104 | See also: https://envytools.readthedocs.io/en/latest/hw/memory/g80-vm.html | ||
1105 | */ | ||
1106 | /* | ||
1107 | // What size pages are in the pointed-to page table? | ||
1108 | enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | ||
1109 | // How large is the pointed-to page table? | ||
1110 | enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | ||
1111 | // Given a page table size, how many entries does it have? | ||
1112 | static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | ||
1113 | |||
1114 | // PDE V0 (nv50/Tesla) | ||
1027 | typedef union { | 1115 | typedef union { |
1028 | struct { | 1116 | struct { |
1029 | enum V1_PDE_TYPE type:2; | 1117 | enum V0_PDE_TYPE type:2; |
1030 | enum INST_TARGET target:2; | 1118 | enum INST_TARGET target:2; |
1031 | uint32_t padding0:1; | 1119 | uint32_t padding0:1; |
1032 | enum V1_PDE_SIZE sublevel_size:2; | 1120 | enum V0_PDE_SIZE sublevel_size:2; |
1033 | uint32_t padding1:5; | 1121 | uint32_t padding1:5; |
1034 | uint32_t addr:28; | 1122 | uint32_t addr:28; |
1035 | uint32_t padding2:24; | 1123 | uint32_t padding2:24; |
1036 | } __attribute__((packed)); | 1124 | } __attribute__((packed)); |
1037 | uint64_t raw; | 1125 | uint64_t raw; |
1038 | } page_dir_entry_v1_t;*/ | 1126 | } page_dir_entry_v0_t; |
1039 | /* PTE V0 (nv50) | 1127 | |
1128 | // PTE V0 (nv50) for small pages | ||
1040 | typedef union { | 1129 | typedef union { |
1041 | struct { | 1130 | struct { |
1042 | bool is_present:1; | 1131 | bool is_present:1; |
@@ -1055,7 +1144,8 @@ typedef union { | |||
1055 | uint32_t padding5:1; | 1144 | uint32_t padding5:1; |
1056 | } __attribute__((packed)); | 1145 | } __attribute__((packed)); |
1057 | uint64_t raw; | 1146 | uint64_t raw; |
1058 | } page_tbl_entry_v1_t;*/ | 1147 | } page_tbl_entry_v0_t; |
1148 | */ | ||
1059 | 1149 | ||
1060 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 1150 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
1061 | 1151 | ||
@@ -1077,6 +1167,8 @@ struct nvdebug_state { | |||
1077 | struct gk20a *g; | 1167 | struct gk20a *g; |
1078 | // Pointer to PCI device needed for pci_iounmap | 1168 | // Pointer to PCI device needed for pci_iounmap |
1079 | struct pci_dev *pcid; | 1169 | struct pci_dev *pcid; |
1170 | // Pointer to generic device struct (both platform and pcie devices) | ||
1171 | struct device *dev; | ||
1080 | }; | 1172 | }; |
1081 | 1173 | ||
1082 | /*const struct runlist_funcs { | 1174 | /*const struct runlist_funcs { |
@@ -1152,13 +1244,11 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | |||
1152 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); | 1244 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); |
1153 | uint64_t search_page_directory( | 1245 | uint64_t search_page_directory( |
1154 | struct nvdebug_state *g, | 1246 | struct nvdebug_state *g, |
1155 | void __iomem *pde_offset, | 1247 | page_dir_config_t pd_config, |
1156 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
1157 | uint64_t addr_to_find); | 1248 | uint64_t addr_to_find); |
1158 | uint64_t search_v1_page_directory( | 1249 | uint64_t search_v1_page_directory( |
1159 | struct nvdebug_state *g, | 1250 | struct nvdebug_state *g, |
1160 | void __iomem *pde_offset, | 1251 | page_dir_config_t pd_config, |
1161 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
1162 | uint64_t addr_to_find); | 1252 | uint64_t addr_to_find); |
1163 | 1253 | ||
1164 | 1254 | ||
@@ -1252,4 +1342,4 @@ static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | |||
1252 | } | 1342 | } |
1253 | // Defined in bus.c | 1343 | // Defined in bus.c |
1254 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); | 1344 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); |
1255 | int get_bar2_pdb(struct nvdebug_state *g, void **pdb, bool *is_v2_pdb); | 1345 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 0cf5344..68e4d71 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -109,6 +109,7 @@ int probe_and_cache_devices(void) { | |||
109 | g_nvdebug_state[i].chip_id = ids.chip_id; | 109 | g_nvdebug_state[i].chip_id = ids.chip_id; |
110 | g_nvdebug_state[i].pcid = NULL; | 110 | g_nvdebug_state[i].pcid = NULL; |
111 | g_nvdebug_state[i].bar3 = NULL; | 111 | g_nvdebug_state[i].bar3 = NULL; |
112 | g_nvdebug_state[i].dev = dev; | ||
112 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | 113 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", |
113 | ids.chip_id, ARCH2NAME(ids.architecture)); | 114 | ids.chip_id, ARCH2NAME(ids.architecture)); |
114 | i++; | 115 | i++; |
@@ -131,6 +132,7 @@ int probe_and_cache_devices(void) { | |||
131 | if (!g_nvdebug_state[i].bar3) | 132 | if (!g_nvdebug_state[i].bar3) |
132 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | 133 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); |
133 | g_nvdebug_state[i].pcid = pcid; | 134 | g_nvdebug_state[i].pcid = pcid; |
135 | g_nvdebug_state[i].dev = &pcid->dev; | ||
134 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | 136 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); |
135 | if (ids.raw == -1) { | 137 | if (ids.raw == -1) { |
136 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | 138 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); |
@@ -9,7 +9,8 @@ | |||
9 | 9 | ||
10 | // Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer | 10 | // Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer |
11 | // **If enabled, PRAMIN may not be otherwise used while walking the runlist!** | 11 | // **If enabled, PRAMIN may not be otherwise used while walking the runlist!** |
12 | #define FALLBACK_TO_PRAMIN | 12 | // Runlists can only be printed on the Jetson TX2 if this is enabled. |
13 | //#define FALLBACK_TO_PRAMIN | ||
13 | 14 | ||
14 | /* Get runlist head and info (incl. length) | 15 | /* Get runlist head and info (incl. length) |
15 | @param rl_id Which runlist to obtain? | 16 | @param rl_id Which runlist to obtain? |
@@ -20,6 +21,7 @@ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl | |||
20 | uint64_t runlist_iova; | 21 | uint64_t runlist_iova; |
21 | enum INST_TARGET runlist_target; | 22 | enum INST_TARGET runlist_target; |
22 | uint16_t runlist_len; | 23 | uint16_t runlist_len; |
24 | int err; | ||
23 | #ifdef FALLBACK_TO_PRAMIN | 25 | #ifdef FALLBACK_TO_PRAMIN |
24 | int off; | 26 | int off; |
25 | #endif // FALLBACK_TO_PRAMIN | 27 | #endif // FALLBACK_TO_PRAMIN |
@@ -33,9 +35,9 @@ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl | |||
33 | return -EIO; | 35 | return -EIO; |
34 | runlist_iova = ((uint64_t)rl.ptr) << 12; | 36 | runlist_iova = ((uint64_t)rl.ptr) << 12; |
35 | runlist_target = rl.target; | 37 | runlist_target = rl.target; |
36 | printk(KERN_INFO "[nvdebug] Runlist %d: %d entries @ %llx in %s (config raw: %#018llx)\n", | ||
37 | rl_id, rl.len, runlist_iova, target_to_text(rl.target), rl.raw); | ||
38 | runlist_len = rl.len; | 38 | runlist_len = rl.len; |
39 | printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx)\n", | ||
40 | rl_id, g->chip_id, rl.len, runlist_iova, target_to_text(rl.target), rl.raw); | ||
39 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { | 41 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { |
40 | runlist_base_tu102_t base; | 42 | runlist_base_tu102_t base; |
41 | runlist_submit_tu102_t submit; | 43 | runlist_submit_tu102_t submit; |
@@ -46,6 +48,8 @@ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl | |||
46 | runlist_iova = ((uint64_t)base.ptr) << 12; | 48 | runlist_iova = ((uint64_t)base.ptr) << 12; |
47 | runlist_target = base.target; | 49 | runlist_target = base.target; |
48 | runlist_len = submit.len; | 50 | runlist_len = submit.len; |
51 | printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx %#018llx)\n", | ||
52 | rl_id, g->chip_id, submit.len, runlist_iova, target_to_text(runlist_target), base.raw, submit.raw); | ||
49 | } | 53 | } |
50 | // Return early on an empty runlist | 54 | // Return early on an empty runlist |
51 | if (!runlist_len) | 55 | if (!runlist_len) |
@@ -53,24 +57,25 @@ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl | |||
53 | 57 | ||
54 | // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping | 58 | // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping |
55 | if (runlist_target == TARGET_VID_MEM) { | 59 | if (runlist_target == TARGET_VID_MEM) { |
56 | void __iomem *bar2_page_dir; | ||
57 | bool pdb_is_ver2; | ||
58 | uint64_t runlist_bar_vaddr; | 60 | uint64_t runlist_bar_vaddr; |
61 | page_dir_config_t pd_config; | ||
59 | 62 | ||
60 | if (get_bar2_pdb(g, &bar2_page_dir, &pdb_is_ver2) < 0) | 63 | if ((err = get_bar2_pdb(g, &pd_config)) < 0) |
61 | return -EIO; | 64 | goto attempt_pramin_access; |
62 | 65 | ||
63 | if (pdb_is_ver2) | 66 | if (pd_config.is_ver2) |
64 | runlist_bar_vaddr = search_page_directory(g, bar2_page_dir, phy2PRAMIN, runlist_iova); | 67 | runlist_bar_vaddr = search_page_directory(g, pd_config, runlist_iova); |
65 | else | 68 | else |
66 | runlist_bar_vaddr = search_v1_page_directory(g, bar2_page_dir, phy2PRAMIN, runlist_iova); | 69 | runlist_bar_vaddr = search_v1_page_directory(g, pd_config, runlist_iova); |
67 | if (!runlist_bar_vaddr) { | 70 | if (!runlist_bar_vaddr) { |
68 | printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n"); | 71 | printk(KERN_WARNING "[nvdebug] Unable to find runlist %d mapping in BAR2/3 page tables for %x.\n", rl_id, g->chip_id); |
72 | err = -EOPNOTSUPP; | ||
69 | goto attempt_pramin_access; | 73 | goto attempt_pramin_access; |
70 | } | 74 | } |
71 | printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr); | 75 | |
76 | printk(KERN_INFO "[nvdebug] Runlist %d for %x @ %llx in BAR2 virtual address space.\n", rl_id, g->chip_id, runlist_bar_vaddr); | ||
72 | if (!g->bar2) { | 77 | if (!g->bar2) { |
73 | printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n"); | 78 | printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped for %x.\n", g->chip_id); |
74 | return -ENODEV; | 79 | return -ENODEV; |
75 | } | 80 | } |
76 | rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr; | 81 | rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr; |
@@ -91,7 +96,7 @@ attempt_pramin_access: | |||
91 | rl_iter->len = runlist_len; | 96 | rl_iter->len = runlist_len; |
92 | return 0; | 97 | return 0; |
93 | #else | 98 | #else |
94 | return -EOPNOTSUPP; | 99 | return err; |
95 | #endif // FALLBACK_TO_PRAMIN | 100 | #endif // FALLBACK_TO_PRAMIN |
96 | } | 101 | } |
97 | 102 | ||