aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-06-22 12:52:59 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-06-22 12:52:59 -0400
commit306a03d18b305e4e573be3b2931978fa10679eb9 (patch)
tree349570dfbe5f531e903c949c3f663627ee1097a8
parentf4b83713672acaf88a526b930b8e417453f6edc5 (diff)
Quick dump of current state for Ben to review.
-rw-r--r--Makefile13
-rw-r--r--device_info_procfs.c126
-rw-r--r--mmu.c251
-rw-r--r--nvdebug.h719
-rw-r--r--nvdebug_entry.c288
-rw-r--r--runlist.c221
-rw-r--r--runlist_procfs.c188
-rw-r--r--stubs.h80
8 files changed, 1614 insertions, 272 deletions
diff --git a/Makefile b/Makefile
index 18c07e8..2dc90c7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,14 @@
1obj-m += nvdebug.o 1obj-m += nvdebug.o
2nvdebug-objs = runlist_procfs.o runlist.o nvdebug_entry.o 2nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o nvdebug_entry.o
3KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" 3KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
4# -mfentry above if not building due to mcount missing
4 5
5# TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) 6# TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...)
6#ccflags-y += -I$(PWD)/include 7ccflags-y += -I$(PWD)/include
7ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include 8#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include
8ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu 9#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu
9ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include 10#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include
10ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi 11#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi
11 12
12all: 13all:
13 make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules 14 make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
diff --git a/device_info_procfs.c b/device_info_procfs.c
new file mode 100644
index 0000000..cd6c53c
--- /dev/null
+++ b/device_info_procfs.c
@@ -0,0 +1,126 @@
1#include "nvdebug.h"
2#include <linux/seq_file.h> // For seq_* functions and types
3#include <linux/uaccess.h> // For copy_to_user()
4
5// Generic register printing function, used for PTOP_*_NUM registers (+more)
6// @param f File being read from. `data` field is register offset to read.
7// @param buf User buffer for result
8// @param size Length of user buffer
9// @param off Requested offset. Updated by number of characters written.
10// @return -errno on error, otherwise number of bytes written to *buf
11// Note: Parent `data` field MUST be the GPU index
12static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) {
13 char out[16];
14 int chars_written;
15 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
16 if (size < 16 || *off != 0)
17 return 0;
18 // 32 bit register will always take less than 16 characters to print
19 chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f))));
20 if (copy_to_user(buf, out, chars_written))
21 printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);
22 *off += chars_written;
23 return chars_written;
24}
25const struct file_operations nvdebug_read_reg32_file_ops = {
26 .read = nvdebug_reg32_read,
27};
28
29//// ==v== PTOP_DEVICE_INFO ==v== ////
30
31// Called to start or resume a sequence. Prior to 4.19, *pos is unreliable.
32// Initializes iterator `idx` state and returns it. Ends sequence on NULL.
33static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) {
34 static int idx;
35 // If start of sequence, reset `idx`
36 if (*pos == 0)
37 idx = 0;
38 // Number of possible info entries is fixed, and list is sparse
39 if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1)
40 return NULL;
41 return &idx;
42}
43
44// Steps to next record. Returns new value of `idx`.
45// Calls show() on non-NULL return
46static void* device_info_file_seq_next(struct seq_file *s, void *idx,
47 loff_t *pos) {
48 (*pos)++; // Required by seq interface
49 // Number of possible info entries is fixed, and list is sparse
50 if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1)
51 return NULL;
52 return idx;
53}
54
55// Print info at index *idx. Returns non-zero on error.
56static int device_info_file_seq_show(struct seq_file *s, void *idx) {
57 ptop_device_info_t curr_info;
58 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
59
60 curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx));
61 // Check for read errors
62 if (curr_info.raw == -1)
63 return -EIO;
64
65 // Parse and print the data
66 switch(curr_info.info_type) {
67 case INFO_TYPE_DATA:
68 // As of early 2022, only the ENUM2 format of this entry exists
69 if (curr_info.is_not_enum2)
70 break;
71 seq_printf(s, "| BAR0 Base %#.8x\n"
72 "| instance %d\n",
73 curr_info.pri_base << 12, curr_info.inst_id);
74 if (curr_info.fault_id_is_valid)
75 seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id);
76 break;
77 case INFO_TYPE_ENUM:
78 if (curr_info.engine_is_valid)
79 seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum);
80 if (curr_info.runlist_is_valid)
81 seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum);
82 if (curr_info.intr_is_valid)
83 seq_printf(s, "| Interrupt ID: %2d\n", curr_info.intr_enum);
84 if (curr_info.reset_is_valid)
85 seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum);
86 break;
87 case INFO_TYPE_ENGINE_TYPE:
88 seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type);
89 if (curr_info.engine_type < ENGINE_TYPES_LEN)
90 seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
91 else
92 seq_printf(s, "Unknown Engine, introduced post-Ampere)\n");
93 break;
94 case INFO_TYPE_NOT_VALID:
95 default:
96 // Device info records are sparse, so skip unset or unknown ones
97 return 0;
98 }
99
100 // Draw a line between each device entry
101 if (!curr_info.has_next_entry)
102 seq_printf(s, "+---------------------+\n");
103 return 0;
104}
105
106static void device_info_file_seq_stop(struct seq_file *s, void *idx) {
107 // No cleanup needed
108}
109
110static const struct seq_operations device_info_file_seq_ops = {
111 .start = device_info_file_seq_start,
112 .next = device_info_file_seq_next,
113 .stop = device_info_file_seq_stop,
114 .show = device_info_file_seq_show,
115};
116
117static int device_info_file_open(struct inode *inode, struct file *f) {
118 return seq_open(f, &device_info_file_seq_ops);
119}
120
121const struct file_operations device_info_file_ops = {
122 .open = device_info_file_open,
123 .read = seq_read,
124 .llseek = seq_lseek,
125 .release = seq_release,
126};
diff --git a/mmu.c b/mmu.c
new file mode 100644
index 0000000..26c7af5
--- /dev/null
+++ b/mmu.c
@@ -0,0 +1,251 @@
1// Helpers to deal with NVIDIA's MMU and associated page tables
2#include <linux/kernel.h> // Kernel types
3
4#include "nvdebug.h"
5
6/* One of the oldest ways to access video memory on NVIDIA GPUs is by using
7 a configurable 1MB window into VRAM which is mapped into BAR0 (register)
8 space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
9 and appear to be used today to bootstrap page table configuration.
10
11 Why is it mapped at a location called NVIDIA Private RAM Instance? Because
12 this used to point to the entirety of intance RAM, which was seperate from
13 VRAM on older NVIDIA GPUs.
14*/
15
16/* Convert a physical VRAM address to an offset in the PRAMIN window
17 @param addr VRAM address to convert
18 @return 0 on error, PRAMIN offset on success
19
20 Note: Use off2PRAMIN() instead if you want a dereferenceable address
21*/
22uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
23 uint64_t pramin_base_va;
24 bar0_window_t window;
25 window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
26 // Check if the address is valid (49 bits are addressable on-GPU)
27 if (addr & ~0x0001ffffffffffff) {
28 printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
29 addr, __func__);
30 return 0;
31 }
32 // For unclear (debugging?) reasons, PRAMIN can point to SYSMEM
33 if (window.target != TARGET_VID_MEM)
34 return 0;
35 pramin_base_va = ((uint64_t)window.base) << 16;
36 // Protect against out-of-bounds accesses
37 if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN)
38 return 0;
39 return addr - pramin_base_va;
40}
41
42/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
43 straight-forward starting with Pascal ("page table version 2"), except for a
44 few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
45
46 All you really need to know is that any given Page Directory Entry (PDE)
47 contains a pointer to the start of a 4k page densely filled with PDEs or Page
48 Table Entries (PTEs).
49
50 == Page Table Refresher ==
51 Page tables convert virtual addresses to physical addresses, and they do this
52 via a tree structure. Leafs (PTEs) contain a physical address, and the path
53 from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
54 When decending, the virtual address is sliced into pieces, and one slice is
55 used at each level (as an index) to select the next-visited node (in level+1).
56
57 V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
58 PTEs. How the virtual address is sliced to yield an index into each level and
59 a page offset is shown by Fig 1.
60
61 == Figure 1 ==
62 Page Offset (12 bits) <---------------------------------------+
63 Page Table Entry (PTE) (9 bits) <--------------------+ |
64 Page Directory Entry (PDE) 0 (8 bits) <-----+ | |
65 PDE1 (8 bits) <--------------------+ | | |
66 PDE2 (8 bits) <-----------+ | | | |
67 PDE3 (2 bits) <--+ | | | | |
68 ^ ^ ^ ^ ^ ^
69 Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
70
71 The following arrays merely represent different projections of Fig. 1, and
72 only one is strictly needed to reconstruct all the others. However, due to
73 the complexity of page tables, we include all of these to aid in readability.
74*/
75// How many nodes/entries per level in V2 of NVIDIA's page table format
76static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
77// Size in bytes of an entry at a particular level
78static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
79// Which bit index is the least significant in indexing each page level
80static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
81
82// Convert a GPU physical address to CPU virtual address via the PRAMIN window
83void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
84 return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
85}
86
87/* FIXME
88void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
89 return g->bar2 + off;
90}
91*/
92
93uint64_t search_page_directory_subtree(struct nvdebug_state *g,
94 void __iomem *pde_offset,
95 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
96 uint64_t addr_to_find,
97 uint32_t level) {
98 uint64_t res, i;
99 void __iomem *next;
100 page_dir_entry_t entry;
101 if (level > sizeof(NV_MMU_PT_V2_SZ))
102 return 0;
103 // Hack to workaround PDE0 being double-size and strangely formatted
104 if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16)
105 pde_offset += 8;
106 entry.raw = readl(pde_offset);
107 // If we reached an invalid (unpopulated) PDE, walk back up the tree
108 if (entry.target == PD_AND_TARGET_INVALID)
109 return 0;
110 // Succeed when we reach a PTE with the address we want
111 if (entry.is_pte) {
112 printk(KERN_INFO "[nvdebug] PTE for phy addr %llx (raw: %x)\n", ((u64)entry.addr) << 12, entry.raw);
113 return (uint64_t)entry.addr << 12 == addr_to_find;
114 }
115 printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw);
116 // Depth-first search of the page table
117 for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) {
118 next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
119 // off2addr can fail
120 if (!next) {
121 printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__);
122 return 0;
123 }
124 res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1);
125 if (res)
126 return res | (i << NV_MMU_PT_V2_LSB[level + 1]);
127 }
128 return 0;
129}
130
131/* Search a page directory of the GPU MMU
132 @param pde_offset Dereferenceable pointer to the start of the PDE3 entries
133 @param off2addr Func to converts VRAM phys addresses to valid CPU VAs
134 @param addr_to_find Physical address to reconstruct the virtual address of
135 @return 0 on error, otherwise the virtual address at which addr_to_find is
136 mapped into by this page table.
137*/
138uint64_t search_page_directory(struct nvdebug_state *g,
139 void __iomem *pde_offset,
140 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
141 uint64_t addr_to_find) {
142 uint64_t res, i;
143 // Make sure that the query is page-aligned
144 if (addr_to_find & 0xfff) {
145 printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find);
146 return 0;
147 }
148 // Search the top-level page directory (PDE3)
149 for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++)
150 if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0)))
151 return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]);
152 return 0;
153}
154
155/* GMMU Page Tables Version 1
156 This page table only contains 2 levels and is used in the Fermi, Kepler, and
157 Maxwell architectures
158*/
159// Number of entries in the PDE and PTE levels
160static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!!
161// Which bit index is the least significant in indexing each page level
162static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!!
163uint64_t search_v1_page_directory(struct nvdebug_state *g,
164 void __iomem *pde_offset,
165 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
166 uint64_t addr_to_find) {
167 uint64_t j, i = 0;
168 page_dir_entry_v1_t pde;
169 page_tbl_entry_v1_t pte;
170 void __iomem *pte_offset;
171 // For each PDE
172 do {
173 // readq doesn't seem to work on BAR0
174 pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4);
175 pde.raw <<= 32;
176 pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t));
177 // Verify PDE is present
178 if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID)
179 continue;
180 // Convert to a dereferencable pointer from CPU virtual address space
181 pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12);
182 if (!pte_offset)
183 continue;
184// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw);
185// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw);
186 // For each PTE
187 for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) {
188 // Don't overrun the PRAMIN window
189 if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN)
190 return 0;
191 pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4);
192 pte.raw <<= 32;
193 pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t));
194 // Skip non-present PTEs
195 if (!pte.is_present)
196 continue;
197// printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw);
198 // If we find a matching PTE, return its virtual address
199 if ((uint64_t)pte.addr << 12 == addr_to_find)
200 return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1];
201
202 }
203 } while (++i < NV_MMU_PT_V1_SZ[0]);
204 return 0;
205}
206
207/* GMMU Page Tables Version 0
208 This page table only contains 2 levels and is used in the Tesla architecture
209*/
210/* *** UNTESTED ***
211#define NV_MMU_PT_V0_SZ 2048
212#define NV_MMU_PT_V0_LSB 29
213uint64_t search_v0_page_directory(struct nvdebug_state *g,
214 void __iomem *pde_offset,
215 void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t),
216 uint32_t addr_to_find) {
217 int j, i = 0;
218 page_dir_entry_v0_t pde;
219 page_tbl_entry_v0_t pte;
220 void __iomem *pte_offset;
221 // For each PDE
222 do {
223 // readq doesn't seem to work on BAR0
224 pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4);
225 pde.raw <<= 32;
226 pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t));
227 //if (pde.raw)
228 //printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw);
229 // Skip unpopulated PDEs
230 if (pde.type == NOT_PRESENT)
231 continue;
232 //printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12);
233 pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12);
234 // For each PTE
235 for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) {
236 pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4);
237 pte.raw <<= 32;
238 pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t));
239 // Skip non-present PTEs
240 if (!pte.is_present)
241 continue;
242 // If we find a matching PTE, return its virtual address
243 //if (pte.addr != 0x5555555)
244 // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present");
245 if (pte.addr << 12 == addr_to_find)
246 return i << NV_MMU_PT_V0_LSB | j << 12;
247 }
248 } while (++i < NV_MMU_PT_V0_SZ);
249 return 0; // No match
250}
251*/
diff --git a/nvdebug.h b/nvdebug.h
index 9ac71da..1882756 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -5,14 +5,18 @@
5// TODO(jbakita): Don't depend on these. 5// TODO(jbakita): Don't depend on these.
6#include <nvgpu/gk20a.h> // For struct gk20a 6#include <nvgpu/gk20a.h> // For struct gk20a
7#include <os/linux/os_linux.h> // For struct nvgpu_os_linux 7#include <os/linux/os_linux.h> // For struct nvgpu_os_linux
8#include <linux/proc_fs.h> // For PDE_DATA() macro
8 9
9/* Runlist Channel 10/* Runlist Channel
10 A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue 11 A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue
11 of GPU commands. These commands are typically queued from userspace. 12 of GPU commands. These commands are typically queued from userspace.
12 13
13 `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU 14 Prior to Volta, channels could also exist independent of a TSG. These are
14 virtual address space for this context. All channels in a TSG point to the 15 called "bare channels" in the Jetson nvgpu driver.
15 same GPU Instance Block (?). 16
17 `INST_PTR` points to a GPU Instance Block which contains FIFO states, virtual
18 address space configuration for this context, and a pointer to the page
19 tables. All channels in a TSG point to the same GPU Instance Block (?).
16 20
17 "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and 21 "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and
18 thereby which PBDMA will run the channel. Increasing values select 22 thereby which PBDMA will run the channel. Increasing values select
@@ -30,7 +34,13 @@
30 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN 34 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN
31 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) 35 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID)
32 RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if 36 RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if
33 more than one PBDMA is supported by the runlist 37 more than one PBDMA is supported by the runlist,
38 additionally, "A value of 0 targets the first FE
39 pipe, which can process all FE driven engines:
40 Graphics, Compute, Inline2Memory, and TwoD. A value
41 of 1 targets the second FE pipe, which can only
42 process Compute work. Note that GRCE work is allowed
43 on either runqueue.)"
34 44
35 INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer 45 INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer
36 INST_PTR_HI : upper 32 bit of instance block pointer 46 INST_PTR_HI : upper 32 bit of instance block pointer
@@ -39,6 +49,9 @@
39 USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer 49 USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer
40 USERD_PTR_HI : upper 32 bits of USERD pointer 50 USERD_PTR_HI : upper 32 bits of USERD pointer
41 USERD_TARGET (TGU) : aperture of the USERD data structure 51 USERD_TARGET (TGU) : aperture of the USERD data structure
52
53 Channels were around since at least Fermi, but were rearranged with Volta to
54 add a USERD pointer, a longer INST pointer, and a runqueue selector flag.
42*/ 55*/
43enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; 56enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1};
44enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; 57enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3};
@@ -52,11 +65,12 @@ static inline char* target_to_text(enum INST_TARGET t) {
52 return "SYS_MEM_NONCOHERENT"; 65 return "SYS_MEM_NONCOHERENT";
53 default: 66 default:
54 printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); 67 printk(KERN_WARNING "[nvdebug] Invalid aperture!\n");
55 return NULL; 68 return "INVALID";
56 } 69 }
57} 70}
58 71
59struct runlist_chan { 72// Support: Volta, Ampere, Turing
73struct gv100_runlist_chan {
60// 0:63 74// 0:63
61 enum ENTRY_TYPE entry_type:1; 75 enum ENTRY_TYPE entry_type:1;
62 uint32_t runqueue_selector:1; 76 uint32_t runqueue_selector:1;
@@ -71,6 +85,20 @@ struct runlist_chan {
71 uint32_t inst_ptr_hi:32; 85 uint32_t inst_ptr_hi:32;
72} __attribute__((packed)); 86} __attribute__((packed));
73 87
88// Support: Fermi, Kepler*, Maxwell, Pascal
89// *In Kepler, inst fields may be unpopulated?
90struct gm107_runlist_chan {
91 uint32_t chid:12;
92 uint32_t padding0:1;
93 enum ENTRY_TYPE entry_type:1;
94 uint32_t padding1:18;
95 uint32_t inst_ptr_lo:20;
96 enum INST_TARGET inst_target:2; // Totally guessing on this
97 uint32_t padding2:10;
98} __attribute__((packed));
99
100#define gk110_runlist_chan gm107_runlist_chan
101
74/* Runlist TSG (TimeSlice Group) 102/* Runlist TSG (TimeSlice Group)
75 The runlist is composed of timeslice groups (TSG). Each TSG corresponds 103 The runlist is composed of timeslice groups (TSG). Each TSG corresponds
76 to a single virtual address space on the GPU and contains `TSG_LENGTH` 104 to a single virtual address space on the GPU and contains `TSG_LENGTH`
@@ -85,8 +113,15 @@ struct runlist_chan {
85 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice 113 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice
86 TSG_LENGTH : number of channels that are part of this timeslice group 114 TSG_LENGTH : number of channels that are part of this timeslice group
87 TSGID : identifier of the Timeslice group (overlays ENTRY_ID) 115 TSGID : identifier of the Timeslice group (overlays ENTRY_ID)
116
117 TSGs appear to have been introduced with Kepler and stayed the same until
118 they were rearranged at the time of channel rearrangement to support longer
119 GPU instance addresses with Volta.
88*/ 120*/
89struct entry_tsg { 121
122// Support: Volta, Ampere*, Turing*
123// *These treat the top 8 bits of TSGID as GFID (unused)
124struct gv100_runlist_tsg {
90// 0:63 125// 0:63
91 enum ENTRY_TYPE entry_type:1; 126 enum ENTRY_TYPE entry_type:1;
92 uint64_t padding:15; 127 uint64_t padding:15;
@@ -101,14 +136,28 @@ struct entry_tsg {
101} __attribute__((packed)); 136} __attribute__((packed));
102#define MAX_TSGID (1 << 12) 137#define MAX_TSGID (1 << 12)
103 138
139// Support: Kepler (v2?), Maxwell, Pascal
140// Same fields as Volta except tsg_length is 6 bits rather than 8
141// Last 32 bits appear to contain an undocumented inst ptr
142struct gk110_runlist_tsg {
143 uint32_t tsgid:12;
144 uint32_t padding0:1;
145 enum ENTRY_TYPE entry_type:1;
146 uint32_t timeslice_scale:4;
147 uint32_t timeslice_timeout:8;
148 uint32_t tsg_length:6;
149 uint32_t padding1:32;
150} __attribute__((packed));
151
152
104enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; 153enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
105 154
106/* Preempt a TSG or Channel by ID 155/* Preempt a TSG or Channel by ID
107 ID/CHID : Id of TSG or channel to preempt 156 ID/CHID : Id of TSG or channel to preempt
108 IS_PENDING : ???? 157 IS_PENDING : Is a context switch pending?
109 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG 158 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG
110 159
111 Support: Kepler, Maxwell, Pascal, Volta 160 Support: Kepler, Maxwell, Pascal, Volta, Turing
112*/ 161*/
113#define NV_PFIFO_PREEMPT 0x00002634 162#define NV_PFIFO_PREEMPT 0x00002634
114typedef union { 163typedef union {
@@ -195,26 +244,36 @@ typedef union {
195 */ 244 */
196 245
197// Note: This is different with Turing 246// Note: This is different with Turing
198// Support: Kepler, Maxwell, Pascal, Volta 247// Support: Fermi, Kepler, Maxwell, Pascal, Volta
199#define NV_PFIFO_RUNLIST_BASE 0x00002270 248#define NV_PFIFO_RUNLIST_BASE 0x00002270
249#define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8)
200typedef union { 250typedef union {
201 struct { 251 struct {
202 uint32_t ptr:28; 252 uint32_t ptr:28;
203 uint32_t type:2; 253 enum INST_TARGET target:2;
204 uint32_t padding:2; 254 uint32_t padding:2;
205 } __attribute__((packed)); 255 } __attribute__((packed));
206 uint32_t raw; 256 uint32_t raw;
207} runlist_base_t; 257} runlist_base_t;
208 258
209// Support: Kepler, Maxwell, Pascal, Volta 259// Support: Kepler, Maxwell, Pascal, Volta
260// Works on Fermi, but id is one bit longer and is b11111
210#define NV_PFIFO_RUNLIST 0x00002274 261#define NV_PFIFO_RUNLIST 0x00002274
262#define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8)
211typedef union { 263typedef union {
264 // RUNLIST fields
212 struct { 265 struct {
213 uint32_t len:16; 266 uint32_t len:16;
214 uint32_t padding:4; 267 uint32_t padding:4;
215 uint32_t id:4; 268 uint32_t id:4; // Runlist ID (each engine may have a seperate runlist)
216 uint32_t padding2:8; 269 uint32_t padding2:8;
217 } __attribute__((packed)); 270 } __attribute__((packed));
271 // ENG_RUNLIST fields that differ
272 struct {
273 uint32_t padding3:20;
274 bool is_pending:1; // Is runlist not yet committed?
275 uint32_t padding4:11;
276 } __attribute__((packed));
218 uint32_t raw; 277 uint32_t raw;
219} runlist_info_t; 278} runlist_info_t;
220 279
@@ -301,63 +360,631 @@ typedef union {
301 uint32_t raw; 360 uint32_t raw;
302} runlist_disable_t; 361} runlist_disable_t;
303 362
363/* Read GPU descriptors from the Master Controller (MC)
364
365 MINOR_REVISION : Legacy (only used with Celvin in Nouveau)
366 MAJOR_REVISION : Legacy (only used with Celvin in Nouveau)
367 IMPLEMENTATION : Which implementation of the GPU architecture
368 ARCHITECTURE : Which GPU architecture
369
370 CHIP_ID = IMPLEMENTATION + ARCHITECTURE << 4
371 CHIP_ID : Unique ID of all chips since Kelvin
372
373 Support: Kelvin, Rankline, Curie, Tesla, Fermi, Kepler, Maxwell, Pascal,
374 Volta, Turing, Ampere
375*/
376#define NV_MC_BOOT_0 0x00000000
377#define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060
378#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU
379#define NV_CHIP_ID_KEPLER 0x0E0
380#define NV_CHIP_ID_VOLTA 0x140
381
382inline static const char* ARCH2NAME(uint32_t arch) {
383 switch (arch) {
384 case 0x01:
385 return "Celsius";
386 case 0x02:
387 return "Kelvin";
388 case 0x03:
389 return "Rankline";
390 case 0x04:
391 case 0x06: // 0x06 is (nForce 6XX integrated only)
392 return "Curie";
393 // 0x07 is unused/skipped
394 case 0x05: // First Tesla card was released before the nForce 6XX
395 case 0x08:
396 case 0x09:
397 case 0x0A:
398 return "Tesla";
399 // 0x0B is unused/skipped
400 case 0x0C:
401 case 0x0D:
402 return "Fermi";
403 case 0x0E:
404 case 0x0F:
405 case 0x11:
406 return "Kepler";
407 case 0x12:
408 return "Maxwell";
409 case 0x13:
410 return "Pascal";
411 case 0x14:
412 case 0x15: // Volta integrated
413 return "Volta";
414 case 0x16:
415 return "Turing";
416 case 0x17:
417 return "Ampere";
418 case 0x18:
419 case 0x19:
420 return "Hopper (?) or Lovelace (?)";
421 default:
422 if (arch < 0x19)
423 return "[unknown historical architecture]";
424 else
425 return "[future]";
426 }
427}
428
429typedef union {
430 // Fields as defined in the NVIDIA reference
431 struct {
432 uint32_t minor_revision:4;
433 uint32_t major_revision:4;
434 uint32_t reserved:4;
435 uint32_t padding0:8;
436 uint32_t implementation:4;
437 uint32_t architecture:5;
438 uint32_t padding1:3;
439 } __attribute__((packed));
440 uint32_t raw;
441 // Arch << 4 + impl is also often used
442 struct {
443 uint32_t padding2:20;
444 uint32_t chip_id:9;
445 uint32_t padding3:3;
446 } __attribute__((packed));
447} mc_boot_0_t;
448
449enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};
450enum ENGINE_TYPES {
451 ENGINE_GRAPHICS = 0, // GRAPHICS [/compute]
452 ENGINE_COPY0 = 1, // [raw/physical] COPY #0
453 ENGINE_COPY1 = 2, // [raw/physical] COPY #1
454 ENGINE_COPY2 = 3, // [raw/physical] COPY #2
455
456 ENGINE_MSPDEC = 8, // Picture DECoder
457 ENGINE_MSPPP = 9, // [Video] Post Processing
458 ENGINE_MSVLD = 10, // [Video] Variable Length Decoder
459 ENGINE_MSENC = 11, // [Video] ENCoding
460 ENGINE_VIC = 12, // Video Image Compositor
461 ENGINE_SEC = 13, // SEquenCer [?]
462 ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0
463 ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1
464 ENGINE_NVDEC = 16, // Nvidia Video DECoder
465
466 ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least]
467 ENGINE_LCE = 19, // Logical Copy Engine
468 ENGINE_GSP = 20, // Gpu System Processor
469 ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+)
470};
471#define ENGINE_TYPES_LEN 22
472static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
473 "Graphics/Compute",
474 "COPY0",
475 "COPY1",
476 "COPY2",
477 "Unknown Engine ID#4",
478 "Unknown Engine ID#5",
479 "Unknown Engine ID#6",
480 "Unknown Engine ID#7",
481 "MSPDEC: Picture Decoder",
482 "MSPPP: Post Processing",
483 "MSVLD: Variable Length Decoder",
484 "MSENC: Encoder",
485 "VIC: Video Image Compositor",
486 "SEC: Sequencer",
487 "NVENC0: NVIDIA Video Encoder #0",
488 "NVENC1: NVIDIA Video Encoder #1",
489 "NVDEC: NVIDIA Video Decoder",
490 "Unknown Engine ID#17",
491 "IOCTRL: I/O Controller",
492 "LCE: Logical Copy Engine",
493 "GSP: GPU System Processor",
494 "NVJPG: NVIDIA JPEG Decoder",
495};
496
497/* GPU engine information and control register offsets
498 Each engine is described by one or more entries (terminated by an entry with
499 the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A
500 typical device, such as the graphics/compute engine and any copy engines, are
501 described by three entries, one of each type.
502
503 The PTOP_DEVICE_INFO table is sparsely populated (entries of type
504 INFO_TYPE_NOT_VALID may be intermingled with valid entries), so any traversal
505 code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate
506 upon reaching the first entry of INFO_TYPE_NOT_VALID.
507
508 INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry?
509 HAS_NEXT_ENTRY : Does the following entry refer to the same engine?
510
511 == INFO_TYPE_DATA fields ==
512 PRI_BASE : BAR0 base = (PRI_BASE << 12) aka 4k aligned.
513 INST_ID : "Note that some instanced [engines] (such as logical copy
514 engines aka LCE) share a PRI_BASE across all [engines] of
515 the same engine type; such [engines] require an additional
516 offset: instanced base = BAR0 base + stride * INST_ID.
517 FAULT_ID_IS_VALID : Does this engine have its own bind point and fault ID
518 with the MMU?
519 FAULT_ID : "The MMU fault id used by this [engine]. These IDs
520 correspond to the NV_PFAULT_MMU_ENG_ID define list."
521
522 == INFO_TYPE_ENUM fields ==
523 ENGINE_IS_VALID : Is this engine a host engine?
524 ENGINE_ENUM : "[T]he host engine ID for the current [engine] if it is
525 a host engine, meaning Host can send methods to the
526 engine. This id is used to index into any register array
527 whose __SIZE_1 is equal to NV_HOST_NUM_ENGINES. A given
528 ENGINE_ENUM can be present for at most one device in the
529 table. Devices corresponding to all ENGINE_ENUM ids 0
530 through NV_HOST_NUM_ENGINES - 1 must be present in the
531 device info table."
532 RUNLIST_IS_VALID : Is this engine a host engine with a runlist?
533 RUNLIST_ENUM : "[T]he Host runlist ID on which methods for the current
534 [engine] should be submitted... The runlist id is used to
535 index into any register array whose __SIZE_1 is equal to
536 NV_HOST_NUM_RUNLISTS. [Engines] corresponding to all
537 RUNLIST_ENUM ids 0 through NV_HOST_NUM_RUNLISTS - 1 must
538 be present in the device info table."
539 INTR_IS_VALID : Does this device have an interrupt?
540 INTR_ENUM : Interrupt ID for use with "the NV_PMC_INTR_*_DEVICE
541 register bitfields."
542 RESET_IS_VALID : Does this engine have a reset ID?
543 RESET_ENUM : Reset ID for use indexing the "NV_PMC_ENABLE_DEVICE(i)
544 and NV_PMC_ELPG_ENABLE_DEVICE(i) register bitfields."
545
546 == INFO_TYPE_ENGINE_TYPE fields ==
547 ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES)
548
549 Support: Kepler, Maxwell, Pascal, Volta, Ampere
550 See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info.
551*/
552#define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4)
553#define NV_PTOP_DEVICE_INFO__SIZE_1 64
554typedef union {
555 // DATA type fields
556 struct {
557 enum DEVICE_INFO_TYPE info_type:2;
558 bool fault_id_is_valid:1;
559 uint32_t fault_id:7;
560 uint32_t padding0:2;
561 uint32_t pri_base:12;
562 uint32_t padding1:2;
563 uint32_t inst_id:4;
564 uint32_t is_not_enum2:1;
565 bool has_next_entry:1;
566 } __attribute__((packed));
567 // ENUM type fields
568 struct {
569 uint32_t padding2:2;
570 bool reset_is_valid:1;
571 bool intr_is_valid:1;
572 bool runlist_is_valid:1;
573 bool engine_is_valid:1;
574 uint32_t padding3:3;
575 uint32_t reset_enum:5;
576 uint32_t padding4:1;
577 uint32_t intr_enum:5;
578 uint32_t padding5:1;
579 uint32_t runlist_enum:4;
580 uint32_t padding6:1;
581 uint32_t engine_enum:4;
582 uint32_t padding7:2;
583 } __attribute__((packed));
584 // ENGINE_TYPE type fields
585 struct {
586 uint32_t padding8:2;
587 enum ENGINE_TYPES engine_type:29;
588 uint32_t padding9:1;
589 } __attribute__((packed));
590 uint32_t raw;
591} ptop_device_info_t;
592
593#define NV_PTOP_SCAL_NUM_GPCS 0x00022430
594#define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434
595#define NV_PTOP_SCAL_NUM_CES 0x00022444
596// PCE_MAP is Volta+ only
597#define NV_CE_PCE_MAP 0x00104028
598
599// GPC and TPC masks
600// Support: Maxwell+
601#define NV_FUSE_GPC 0x00021c1c
602#define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4)
603
604/* Location of the 1Kb instance block with page tables for BAR1 and BAR2.
605 Support: Fermi+ (?), Pascal
606*/
607#define NV_PBUS_BAR1_BLOCK 0x00001704
608#define NV_PBUS_BAR2_BLOCK 0x00001714
609typedef union {
610 struct {
611 uint32_t ptr:28;
612 enum INST_TARGET target:2;
613 uint32_t padding0:1;
614 bool is_virtual:1;
615 } __attribute__((packed));
616 uint32_t raw;
617 struct {
618 uint32_t map:30;
619 uint32_t padding1:2;
620 } __attribute__((packed));
621} bar_config_block_t;
622
623/* BAR0 PRAMIN (Private RAM Instance) window configuration
624
625 BASE : Base of window >> 16 in [TARGET] virtual address space
626 TARGET : Which address space BASE points into
627
628 Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes
629
630 Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere
631*/
632#define NV_PBUS_BAR0_WINDOW 0x00001700
633#define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window)
634#define NV_PRAMIN_LEN 0x00100000
635typedef union {
636 struct {
637 uint32_t base:24;
638 enum INST_TARGET target:2;
639 uint32_t padding0:6;
640 } __attribute__((packed));
641 uint32_t raw;
642} bar0_window_t;
643
644// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere
645#define NV_PRAMIN_PDB_CONFIG_OFF 0x200
646typedef union {
647 struct {
648 uint32_t target:2;
649 uint32_t vol:1;
650 uint32_t padding0:1;
651 uint32_t fault_replay_tex:1;
652 uint32_t fault_replay_gcc:1;
653 uint32_t padding1:4;
654 bool is_ver2:1;
655 bool is_64k_big_page:1; // 128Kb otherwise
656 uint32_t page_dir_lo:20;
657 uint32_t page_dir_hi:32;
658 } __attribute__((packed));
659 uint64_t raw;
660} page_dir_config_t;
661
662/* Page directory entry
663
664 Note: Format changed with Pascal (how?)
665
666 Support: Pascal, Volta, Turing, Ampere
667*/
668// FIXME: PDE/PTEs are actually 64 bits =S
669// Important: Aperture keys are different with PDEs
670enum PD_TARGET {
671 PD_AND_TARGET_INVALID = 0, // b000
672 PD_AND_TARGET_VID_MEM = 2, // b010
673 PD_AND_TARGET_SYS_MEM_COHERENT = 4, // b100
674 PD_AND_TARGET_SYS_MEM_NONCOHERENT = 6, // b110
675 PTE_AND_TARGET_VID_MEM = 1, // b001
676 PTE_AND_TARGET_PEER = 3, // b011
677 PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101
678 PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111
679};
680static inline char* pd_target_to_text(enum PD_TARGET t) {
681 switch (t) {
682 case PD_AND_TARGET_INVALID:
683 return "INVALID";
684 case PD_AND_TARGET_VID_MEM:
685 case PTE_AND_TARGET_VID_MEM:
686 return "VID_MEM";
687 case PTE_AND_TARGET_PEER:
688 return "PEER";
689 case PD_AND_TARGET_SYS_MEM_COHERENT:
690 case PTE_AND_TARGET_SYS_MEM_COHERENT:
691 return "SYS_MEM_COHERENT";
692 case PD_AND_TARGET_SYS_MEM_NONCOHERENT:
693 case PTE_AND_TARGET_SYS_MEM_NONCOHERENT:
694 return "SYS_MEM_NONCOHERENT";
695 default:
696 printk(KERN_WARNING "[nvdebug] Invalid aperture!\n");
697 return NULL;
698 }
699}
700
701// PDE/PTE V2 type
702// Note: As the meaning of target (bits 2:1) changes depending on if the entry
703// is a PTE or not, this combines them into a single target field to
704// simplify comparisons.
705// Support: Pascal, Turing, Ampere
706typedef union {
707 // Page Directory Entry (PDE)
708 struct {
709 bool is_pte:1;
710 uint32_t __target:2;
711 bool is_volatile:1;
712 uint32_t padding1:4;
713 uint32_t addr:24;
714 } __attribute__((packed));
715 // Page Table Entry (PTE)
716 struct {
717 enum PD_TARGET target:3;
718 uint32_t __is_volatile:1;
719 bool is_encrypted:1;
720 bool is_privileged:1;
721 bool is_readonly:1;
722 bool atomics_disabled:1;
723 uint32_t __addr:24;
724 } __attribute__((packed));
725 uint32_t raw;
726} page_dir_entry_t;
727
728// PDE/PTE V1 types
729// Support: Fermi, Kepler, Maxwell
730enum V1_PD_TARGET {
731 PD_TARGET_INVALID = 0,
732 PD_TARGET_VID_MEM = 1,
733 PD_TARGET_SYS_MEM_COHERENT = 2,
734 PD_TARGET_SYS_MEM_NONCOHERENT = 3,
735};
736// Page Directory Entry (PDE)
737typedef union {
738// Large page fields
739 struct {
740// 0:32
741 enum V1_PD_TARGET target:2;
742 uint32_t padding0:2;
743 uint64_t addr:28; // May be wider?
744// 32:63
745 uint32_t padding2:3;
746 uint32_t is_volatile:1; // Might have counted wrong?
747 uint32_t padding3:28;
748 } __attribute__((packed));
749// Small page fields
750 struct {
751// 0:32
752 uint32_t padding00:32;
753// 32:63
754 enum V1_PD_TARGET alt_target:2;
755 uint32_t alt_is_volatile:1; // Might have counted wrong?
756 uint32_t padding03:1;
757 uint64_t alt_addr:28;
758 } __attribute__((packed));
759 uint64_t raw;
760} page_dir_entry_v1_t;
761// Page Table Entry (PTE)
762// Reconstructed from info in Jetson nvgpu driver
763typedef union {
764 struct {
765// 0:32
766 bool is_present:1;
767 bool is_privileged:1;
768 bool is_readonly:1;
769 uint32_t padding0:1;
770 uint64_t addr:28;
771// 32:63
772 bool is_volatile:1;
773 enum INST_TARGET:2;
774 uint32_t padding1:1;
775 uint32_t kind:8;
776 uint32_t comptag:17;
777 uint32_t padding2:1;
778 bool is_read_disabled:1;
779 bool is_write_disabled:1;
780 } __attribute__((packed));
781 uint64_t raw;
782} page_tbl_entry_v1_t;
783//enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3};
784//enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3};
785//static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024};
786/* PDE V0 (nv50/Tesla)
787typedef union {
788 struct {
789 enum V1_PDE_TYPE type:2;
790 enum INST_TARGET target:2;
791 uint32_t padding0:1;
792 enum V1_PDE_SIZE sublevel_size:2;
793 uint32_t padding1:5;
794 uint32_t addr:28;
795 uint32_t padding2:24;
796 } __attribute__((packed));
797 uint64_t raw;
798} page_dir_entry_v1_t;*/
799/* PTE V0 (nv50)
800typedef union {
801 struct {
802 bool is_present:1;
803 uint32_t padding3:2;
804 bool is_readonly:1;
805 enum INST_TARGET target:2;
806 bool is_privileged:1;
807 uint32_t contig_blk_sz:3;
808 uint32_t padding4:2;
809 uint32_t addr:28;
810 uint32_t storage_type:7; // ???
811 uint32_t compression_mode:2; // ???
812 uint32_t compression_tag:12; // ???
813 bool is_long_partition_cycle:1; // ???
814 bool is_encrypted:1;
815 uint32_t padding5:1;
816 } __attribute__((packed));
817 uint64_t raw;
818} page_tbl_entry_v1_t;*/
819
304// TODO(jbakita): Maybe put the above GPU types in a different file. 820// TODO(jbakita): Maybe put the above GPU types in a different file.
305 821
306#define for_chan_in_tsg(chan, tsg) \ 822#define NV_PCI_VENDOR 0x10de
307 for (chan = (struct runlist_chan*)(tsg + 1); \ 823struct nvdebug_state {
308 (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ 824 // Pointer to the mapped base address of the GPU control registers (obtained
309 chan++) 825 // via ioremap() originally). For embedded GPUs, we extract this from their
826 // struct nvgpu_os_linux. For discrete GPUs, we create our own mapping of
827 // BAR0 with pci_iomap(). Access via nvgpu_readl/writel functions.
828 void __iomem *regs;
829 // Depending on the architecture, BAR2 or BAR3 are used to access PRAMIN
830 union {
831 void __iomem *bar2;
832 void __iomem *bar3;
833 };
834 int chip_id;
835 // Additional state from the built-in driver. Only set iff
836 // chip_id == NV_CHIP_ID_GV11B
837 struct gk20a *g;
838 // Pointer to PCI device needed for pci_iounmap
839 struct pci_dev *pcid;
840};
841
842/*const struct runlist_funcs {
843 u8 size;
844 enum ENTRY_TYPE (*entry_type)(struct nvdebug_state *, void *);
845 uint32_t (*chid)(struct nvdebug_state *, void *);
846 uint32_t (*inst_ptr_lo)(struct nvdebug_state *, void *);
847 enum INST_TARGET (*inst_target)(struct nvdebug_state *, void *):
848 uint32_t (*tsgid)(struct nvdebug_state *, void *);
849 uint32_t (*timeslice_scale)(struct nvdebug_state *, void *);
850 uint32_t (*timeslice_timeout)(struct nvdebug_state *, void *);
851 uint32_t (*tsg_length)(struct nvdebug_state *, void *);
852};*/
853
854// This disgusting macro is a crutch to work around the fact that runlists were
855// different prior to Volta.
856#define VERSIONED_RL_ACCESSOR(_ENTRY_TYPE, type, prop) \
857 __attribute__((unused)) \
858 static type (prop)(const struct nvdebug_state *g, const void *raw) { \
859 if (g->chip_id > NV_CHIP_ID_VOLTA) { \
860 const struct gv100_runlist_ ## _ENTRY_TYPE *entry = (struct gv100_runlist_ ## _ENTRY_TYPE*)raw; \
861 return entry->prop; \
862 } else if (g->chip_id > NV_CHIP_ID_KEPLER) { \
863 const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \
864 return entry->prop; \
865 } else { \
866 printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \
867 return (type)0; \
868 } \
869 }
870
871VERSIONED_RL_ACCESSOR(chan, uint32_t, chid);
872VERSIONED_RL_ACCESSOR(chan, uint32_t, inst_ptr_lo);
873VERSIONED_RL_ACCESSOR(chan, enum INST_TARGET, inst_target);
874VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsgid);
875VERSIONED_RL_ACCESSOR(tsg, enum ENTRY_TYPE, entry_type);
876VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_scale);
877VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_timeout);
878VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length);
310 879
311#define next_tsg(tsg) \ 880
312 (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length 881#define NV_RL_ENTRY_SIZE(g) \
882 ((g)->chip_id >= NV_CHIP_ID_VOLTA ? sizeof(struct gv100_runlist_tsg) : sizeof(struct gk110_runlist_tsg))
883
884#define for_chan_in_tsg(g, chan, tsg) \
885 for (chan = (typeof(chan))(((u8*)tsg) + NV_RL_ENTRY_SIZE(g)); \
886 (u8*)chan < ((u8*)tsg) + (1 + tsg_length(g, tsg)) * NV_RL_ENTRY_SIZE(g); \
887 chan = (typeof(chan))(((u8*)chan) + NV_RL_ENTRY_SIZE(g)))
888
889#define next_tsg(g, tsg) \
890 (typeof(tsg))((u8*)(tsg) + NV_RL_ENTRY_SIZE(g) * (tsg_length(g, tsg) + 1))
313 891
314struct runlist_iter { 892struct runlist_iter {
315 struct entry_tsg *curr_tsg; 893 // Pointer to either a TSG or channel entry (they're the same size)
894 void *curr_entry;
895 // This should be set to tsg_length when a TSG is reached, and
896 // decremented as each subsequent channel is printed. This allows us to
897 // track which channel are and are not part of the TSG.
898 int channels_left_in_tsg;
899 // Total runlist length, etc
316 runlist_info_t rl_info; 900 runlist_info_t rl_info;
317}; 901};
318 902
903#define NVDEBUG_MAX_DEVICES 8
904extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
905
319// Defined in runlist.c 906// Defined in runlist.c
320struct gk20a* get_live_gk20a(void); 907int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter);
321int get_runlist_iter(struct runlist_iter *rl_iter); 908int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id);
322int preempt_tsg(uint32_t tsg_id); 909
910// Defined in mmu.c
911uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr);
912void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy);
913uint64_t search_page_directory(
914 struct nvdebug_state *g,
915 void __iomem *pde_offset,
916 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
917 uint64_t addr_to_find);
918uint64_t search_v1_page_directory(
919 struct nvdebug_state *g,
920 void __iomem *pde_offset,
921 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
922 uint64_t addr_to_find);
923
323 924
324static inline struct gk20a *get_gk20a(struct device *dev) { 925static inline struct gk20a *get_gk20a(struct device *dev) {
325 // XXX: Only works because gk20a* is the first member of gk20a_platform 926 // XXX: Only works because gk20a* is the first member of gk20a_platform
326 return *((struct gk20a**)dev_get_drvdata(dev)); 927 return *((struct gk20a**)dev_get_drvdata(dev));
327} 928}
328 929
329// Functionally identical to nvgpu_readl() 930// We us the data field of the proc_dir_entry ("PDE" in this function) to store
931// our index into the g_nvdebug_state array
932static inline int seq2gpuidx(struct seq_file *s) {
933 const struct file *f = s->file;
934 return (uintptr_t)PDE_DATA(file_inode(f));
935}
936static inline int file2gpuidx(const struct file *f) {
937 return (uintptr_t)PDE_DATA(file_inode(f));
938}
939static inline int file2parentgpuidx(const struct file *f) {
940 // Should be safe to call on ProcFS entries, as our parent should (?)
941 // still exist if we're called. If not, there are worse races in this
942 // module.
943 return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode);
944}
945
946#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
947
948// Similar to nvgpu_readl()
330// (except we don't try to resolve situations where regs is NULL) 949// (except we don't try to resolve situations where regs is NULL)
331static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { 950static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
332 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); 951 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
333 if (unlikely(!g_os->regs)) { 952 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n");
334 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); 953 return -1;
335 return -1; 954 }
336 } 955 return readl(s->regs + r);
337 return readl(g_os->regs + r);
338} 956}
339 957
340// quadword version of nvdebug_readl() 958// quadword version of nvdebug_readl()
341static inline u64 nvdebug_readq(struct gk20a* g, u32 r) { 959static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
342 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); 960 u64 ret;
343 u64 ret; 961 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
344 if (unlikely(!g_os->regs)) { 962 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n");
345 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); 963 return -1;
346 return -1; 964 }
347 }
348 // readq seems to always return the uppermost 32 bits as 0, so workaround with readl 965 // readq seems to always return the uppermost 32 bits as 0, so workaround with readl
349 ret = readl(g_os->regs + r); 966 ret = readl(s->regs + r);
350 ret |= ((u64)readl(g_os->regs + r + 4)) << 32; 967 ret |= ((u64)readl(s->regs + r + 4)) << 32;
351 return ret; 968 return ret;
352} 969}
353 970
354// Functionally identical to nvgpu_writel() 971// Similar to nvgpu_writel()
355static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { 972static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
356 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); 973 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
357 if (unlikely(!g_os->regs)) { 974 printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n");
975 return;
976 }
977 writel_relaxed(v, s->regs + r);
978 wmb();
979}
980
981// quadword version of nvdebug_writel()
982// XXX: This probably doesn't work XXX: Untested
983static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
984 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
358 printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); 985 printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n");
359 return; 986 return;
360 } 987 }
361 writel_relaxed(v, g_os->regs + r); 988 writeq_relaxed(v, s->regs + r);
362 wmb(); 989 wmb();
363} 990}
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 0854b8b..695b5fd 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -2,64 +2,282 @@
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 */ 3 */
4 4
5/* TODO
6 * - Add sysfs trigger for a preemption
7 */
8
9#include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type 5#include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type
6#include <linux/interrupt.h> // For hooking the nvidia driver interrupts
10#include <linux/kernel.h> 7#include <linux/kernel.h>
11#include <linux/module.h> 8#include <linux/module.h>
12#include <linux/proc_fs.h> // So we can set up entries in /proc 9#include <linux/pci.h> // For PCI device scanning
10#include <linux/proc_fs.h> // So we can set up entries in /proc
13 11
14#include "nvdebug.h" 12#include "nvdebug.h"
13#include "stubs.h"
15 14
16// LIAR. But without this we can't use GPL-only exported symbols like 15// MIT is GPL-compatible. We need to be GPL-compatible for symbols like
17// platform_bus_type or bus_find_device_by_name... 16// platform_bus_type or bus_find_device_by_name...
18MODULE_LICENSE("GPL"); 17MODULE_LICENSE("Dual MIT/GPL");
19MODULE_AUTHOR("Joshua Bakita"); 18MODULE_AUTHOR("Joshua Bakita");
20MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); 19MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
21MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now
22 20
23extern const struct file_operations runlist_file_ops; 21extern const struct file_operations runlist_file_ops;
24extern const struct file_operations preempt_tsg_file_ops; 22extern const struct file_operations preempt_tsg_file_ops;
25extern const struct file_operations disable_channel_file_ops; 23extern const struct file_operations disable_channel_file_ops;
26extern const struct file_operations enable_channel_file_ops; 24extern const struct file_operations enable_channel_file_ops;
27extern const struct file_operations switch_to_tsg_file_ops; 25extern const struct file_operations switch_to_tsg_file_ops;
26extern const struct file_operations device_info_file_ops;
27extern const struct file_operations nvdebug_read_reg32_file_ops;
28
29// Bus types are global symbols in the kernel
30extern struct bus_type platform_bus_type;
31struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
32unsigned int g_nvdebug_devices = 0;
33
34// TEMP
35irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) {
36 printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num);
37 return IRQ_NONE; // We don't actually handle any interrupts. Pass them on.
38}
39
40// Find any and all NVIDIA GPUs in the system
41// Note: This function fails if any of them are in a bad state
42int probe_and_cache_device(void) {
43 // platform bus (SoC) iterators
44 struct device *dev = NULL;
45 struct device *temp_dev;
46 // PCI search iterator and search query
47 struct pci_dev *pcid = NULL;
48 // This query pattern is mirrored off nouveau
49 struct pci_device_id query = {
50 .vendor = NV_PCI_VENDOR, // Match NVIDIA devices
51 .device = PCI_ANY_ID,
52 .subvendor = PCI_ANY_ID,
53 .subdevice = PCI_ANY_ID,
54 .class_mask = 0xff << 16,
55 .class = PCI_BASE_CLASS_DISPLAY << 16, // Match display devs
56 };
57 int i = 0;
58 // Search the platform bus for the first device that matches our name
59 // Search for GV10B (Jetson Xavier)
60 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b")))
61 dev = temp_dev;
62 // Search for GP10B (Jetson TX2)
63 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b")))
64 dev = temp_dev;
65 // TODO: Support other platform bus devices (gk20a, gm20b)
66 if (dev) {
67 struct nvgpu_os_linux *l;
68 mc_boot_0_t ids;
69 g_nvdebug_state[i].g = get_gk20a(dev);
70 l = container_of(g_nvdebug_state[i].g, struct nvgpu_os_linux, g);
71 g_nvdebug_state[i].regs = l->regs;
72 if (!g_nvdebug_state[i].regs)
73 return -EADDRNOTAVAIL;
74 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
75 if (ids.raw == -1)
76 return -EADDRNOTAVAIL;
77 g_nvdebug_state[i].chip_id = ids.chip_id;
78 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.",
79 ids.chip_id, ARCH2NAME(ids.architecture));
80 i++;
81 }
82 // Search the PCI bus and iterate through all matches
83 // FIXME: State rollback
84 while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) {
85 mc_boot_0_t ids;
86 g_nvdebug_state[i].g = NULL;
87 // Map BAR0 (GPU control registers)
88 g_nvdebug_state[i].regs = pci_iomap(pcid, 0, 0);
89 if (!g_nvdebug_state[i].regs) {
90 pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n");
91 return -EADDRNOTAVAIL;
92 }
93 // Map BAR3 (CPU-accessible mappings of GPU DRAM)
94 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0);
95 // Try mapping only the lower half of BAR3 on fail
96 // (vesafb may map the top half for display)
97 if (!g_nvdebug_state[i].bar3)
98 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2);
99 g_nvdebug_state[i].pcid = pcid;
100 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
101 if (ids.raw == -1) {
102 pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
103 return -EADDRNOTAVAIL;
104 }
105 g_nvdebug_state[i].chip_id = ids.chip_id;
106 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.",
107 ids.chip_id, ARCH2NAME(ids.architecture));
108 // TEMP
109 if (request_irq(pcid->irq, nvdebug_irq_tap, IRQF_SHARED, "nvdebug tap", pcid)) {
110 printk(KERN_WARNING "[nvdebug] Unable to initialize IRQ tap\n");
111 }
112 i++;
113 }
114 // Return the number of devices we found
115 if (i > 0)
116 return i;
117 return -ENODEV;
118}
119
120// Create files `/proc/gpu#/runlist#`, world readable
121int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
122 ptop_device_info_t info;
123 struct proc_dir_entry *rl_entry;
124 int i, rl_id;
125 char runlist_name[12];
126 int max_rl_id = 0; // Always at least one runlist
127 // Figure out how many runlists there are by checking the device info
128 // registers. Runlists are always numbered sequentially, so we just have
129 // to find the highest-valued one and add 1 to get the number of runlists.
130 for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) {
131 info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i));
132 if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid)
133 continue;
134 if (info.runlist_enum > max_rl_id)
135 max_rl_id = info.runlist_enum;
136 }
137 // Create files to read each runlist. The read handling code looks at the
138 // PDE_DATA associated with the file to determine what the runlist ID is.
139 for (rl_id = 0; rl_id <= max_rl_id; rl_id++) {
140 snprintf(runlist_name, 12, "runlist%d", rl_id);
141 rl_entry = proc_create_data(
142 runlist_name, 0444, dir, &runlist_file_ops,
143 (void*)(uintptr_t)rl_id);
144 if (!rl_entry)
145 return -ENOMEM;
146 }
147 return 0;
148}
149
150// Create files /proc/gpu#
151// TODO: Don't run this on unsupported GPUs
152int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
153 char file_name[20];
154 int i;
155 struct proc_dir_entry *gpc_tpc_mask_entry;
156 // Get a bitmask of which GPCs are disabled
157 uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC);
158 // Get maximum number of enabled GPCs for this chip
159 uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS);
160 // For each enabled GPC, expose a mask of disabled TPCs
161 for (i = 0; i < max_gpcs; i++) {
162 // Do nothing if GPC is disabled
163 if ((1 << i) & gpcs_mask)
164 continue;
165 // If GPC is enabled, create an entry to read disabled TPCs mask
166 snprintf(file_name, 20, "gpc%d_tpc_mask", i);
167 gpc_tpc_mask_entry = proc_create_data(
168 file_name, 0444, dir, &nvdebug_read_reg32_file_ops,
169 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i));
170 if (!gpc_tpc_mask_entry)
171 return -ENOMEM;
172 }
173 return 0;
174}
28 175
29int __init nvdebug_init(void) { 176int __init nvdebug_init(void) {
30 struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry, 177 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry,
31 *enable_channel_entry, *switch_to_tsg_entry; 178 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry,
32 // Create file `/proc/preempt_tsg`, world readable 179 *num_gpcs_entry;
33 rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); 180 int rl_create_err, tpc_masks_create_err;
34 // Create file `/proc/preempt_tsg`, world writable 181 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state
35 preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops); 182 int res = probe_and_cache_device();
36 // Create file `/proc/disable_channel`, world writable 183 if (res < 0)
37 disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops); 184 return res;
38 // Create file `/proc/enable_channel`, world writable 185 g_nvdebug_devices = res;
39 enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops); 186 // Create seperate ProcFS directories for each gpu
40 // Create file `/proc/switch_to_tsg`, world writable 187 while (res--) {
41 switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops); 188 char device_id_str[7];
42 // ProcFS entry creation only fails if out of memory 189 uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id
43 if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) { 190 // Create directory /proc/gpu# where # is the GPU number
44 remove_proc_entry("runlist", NULL); 191 snprintf(device_id_str, 7, "gpu%ld", device_id);
45 remove_proc_entry("preempt_tsg", NULL); 192 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
46 remove_proc_entry("disable_channel", NULL); 193 goto out_nomem;
47 remove_proc_entry("enable_channel", NULL); 194 // Create files `/proc/gpu#/runlist#`, world readable
48 remove_proc_entry("switch_to_tsg", NULL); 195 rl_create_err = create_runlist_files(device_id, dir);
49 printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); 196 // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
50 return -ENOMEM; 197 tpc_masks_create_err = create_tpc_mask_files(device_id, dir);
198 // Create file `/proc/gpu#/preempt_tsg`, world writable
199 preempt_entry = proc_create_data(
200 "preempt_tsg", 0222, dir, &preempt_tsg_file_ops,
201 (void*)device_id);
202 // Create file `/proc/gpu#/disable_channel`, world writable
203 disable_channel_entry = proc_create_data(
204 "disable_channel", 0222, dir, &disable_channel_file_ops,
205 (void*)device_id);
206 // Create file `/proc/gpu#/enable_channel`, world writable
207 enable_channel_entry = proc_create_data(
208 "enable_channel", 0222, dir, &enable_channel_file_ops,
209 (void*)device_id);
210 // Create file `/proc/gpu#/switch_to_tsg`, world writable
211 switch_to_tsg_entry = proc_create_data(
212 "switch_to_tsg", 0222, dir, &switch_to_tsg_file_ops,
213 (void*)device_id);
214 // Create file `/proc/gpu#/device_info`, world readable
215 device_info_entry = proc_create_data(
216 "device_info", 0444, dir, &device_info_file_ops,
217 (void*)device_id);
218 // Create file `/proc/gpu#/num_gpcs`, world readable
219 num_gpcs_entry = proc_create_data(
220 "num_gpcs", 0444, dir, &nvdebug_read_reg32_file_ops,
221 (void*)NV_PTOP_SCAL_NUM_GPCS);
222 // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable
223 num_gpcs_entry = proc_create_data(
224 "num_tpc_per_gpc", 0444, dir, &nvdebug_read_reg32_file_ops,
225 (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC);
226 // Create file `/proc/gpu#/num_ces`, world readable
227 num_gpcs_entry = proc_create_data(
228 "num_ces", 0444, dir, &nvdebug_read_reg32_file_ops,
229 (void*)NV_PTOP_SCAL_NUM_CES);
230 // Create file `/proc/gpu#/num_ces`, world readable
231 num_gpcs_entry = proc_create_data(
232 "gpc_mask", 0444, dir, &nvdebug_read_reg32_file_ops,
233 (void*)NV_FUSE_GPC);
234 // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+
235 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) {
236 // TODO: Redo to num_pces
237 // Create file `/proc/gpu#/pce_map`, world readable
238 num_gpcs_entry = proc_create_data(
239 "pce_map", 0444, dir, &nvdebug_read_reg32_file_ops,
240 (void*)NV_CE_PCE_MAP);
241 }
242 // ProcFS entry creation only fails if out of memory
243 if (rl_create_err || tpc_masks_create_err || !preempt_entry ||
244 !disable_channel_entry || !enable_channel_entry ||
245 !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry)
246 goto out_nomem;
51 } 247 }
248 // (See Makefile if you want to know the origin of GIT_HASH.)
52 printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); 249 printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n");
53 return 0; 250 return 0;
251out_nomem:
252 // Make sure to clear all ProcFS directories on error
253 while (res < g_nvdebug_devices) {
254 char device_id_str[7];
255 snprintf(device_id_str, 7, "gpu%d", res);
256 remove_proc_subtree(device_id_str, NULL);
257 res++;
258 }
259 return -ENOMEM;
54} 260}
55 261
56static void __exit nvdebug_exit(void) { 262static void __exit nvdebug_exit(void) {
57 remove_proc_entry("runlist", NULL); 263 struct nvdebug_state *g;
58 remove_proc_entry("preempt_tsg", NULL); 264 // Deinitialize each device
59 remove_proc_entry("disable_channel", NULL); 265 while (g_nvdebug_devices--) {
60 remove_proc_entry("enable_channel", NULL); 266 // Remove procfs directory
61 remove_proc_entry("switch_to_tsg", NULL); 267 char device_id[7];
62 printk(KERN_INFO "[nvdebug] Exiting...\n"); 268 snprintf(device_id, 7, "gpu%d", g_nvdebug_devices);
269 remove_proc_subtree(device_id, NULL);
270 // Free BAR mappings
271 g = &g_nvdebug_state[g_nvdebug_devices];
272 if (g && g->regs)
273 pci_iounmap(g->pcid, g->regs);
274 if (g && g->bar2)
275 pci_iounmap(g->pcid, g->bar2);
276 // TEMP
277 free_irq(g->pcid->irq, g->pcid);
278 printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id);
279 }
280 printk(KERN_INFO "[nvdebug] Module exit complete.\n");
63} 281}
64 282
65module_init(nvdebug_init); 283module_init(nvdebug_init);
diff --git a/runlist.c b/runlist.c
index c8ff99f..94be18e 100644
--- a/runlist.c
+++ b/runlist.c
@@ -1,122 +1,127 @@
1#include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type
2//#include <linux/iommu.h> // For struct iommu_domain
3#include <linux/kernel.h> // Kernel types 1#include <linux/kernel.h> // Kernel types
4#include <asm/io.h>
5 2
6#include "nvdebug.h" 3#include "nvdebug.h"
7 4
8// Bus types are global symbols in the kernel
9extern struct bus_type platform_bus_type;
10
11struct gk20a* get_live_gk20a(void) {
12 struct device *dev = NULL;
13 struct device *temp_dev;
14 struct gk20a *g;
15 struct nvgpu_os_linux *l;
16 // Get the last device that matches our name
17 while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) {
18 dev = temp_dev;
19 printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev));
20 }
21 if (!dev)
22 return NULL;
23 g = get_gk20a(dev);
24 // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
25 // - A GPU address (type is sysmem_coherent)
26 // - A physical address (dereferencing after ioremap crashes)
27 // - A kernel virtual address (dereferencing segfaults)
28 // So maybe it's some sort of custom thing? This is an address that the GPU
29 // can use, so it would make most sense for it to be a physical address.
30 //
31 // BUT, it can't possibly be a physical address, as it would refer to an
32 // address greater than the maximum one on our system (by a lot!).
33 // Maybe I'm reading the runlist base wrong?
34 // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
35 // address! So, what's this I/O address space? All I know is that it's what
36 // nvgpu_mem_get_addr() returns. That function returns the result of either:
37 // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?)
38 // converts an IPA to a PA?
39 // - nvgpu_mem_iommu_translate
40 //
41 // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
42 // returns SYSMEM.
43 //
44 // To convert a physical address to a IOMMU address, we add a bit
45 //
46 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
47 // before because the GPU had simply gone to sleep and invalidated its
48 // register state, so nvgpu_readl() was simply returning garbage.
49 l = container_of(g, struct nvgpu_os_linux, g);
50 if (!l->regs)
51 return NULL;
52 return g;
53}
54
55/* Get runlist head and info (incl. length) 5/* Get runlist head and info (incl. length)
56 @param rl_iter Location at which to store output 6 @param rl_iter Location at which to store output
7 @param rl_id Which runlist to obtain?
57*/ 8*/
58int get_runlist_iter(struct runlist_iter *rl_iter) { 9int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) {
59 struct entry_tsg head; 10 runlist_base_t rl_base;
60 runlist_base_t rl_base; 11 runlist_info_t rl_info;
61 runlist_info_t rl_info; 12 u64 runlist_iova;
62 u64 runlist_iova; 13 *rl_iter = (struct runlist_iter){0};
63 struct gk20a *g = get_live_gk20a(); 14 rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id));
64 if (!g) 15 // Check that reads are working
16 if (rl_base.raw == -1)
65 return -EIO; 17 return -EIO;
66 rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); 18 // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
67 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); 19 // - A GPU address (type is sysmem_coherent)
68 runlist_iova = ((u64)rl_base.ptr) << 12; 20 // - A physical address (dereferencing after ioremap crashes)
69 printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", 21 // - A kernel virtual address (dereferencing segfaults)
70 rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); 22 // So maybe it's some sort of custom thing? This is an address that the GPU
71 // TODO: Support reading video memory 23 // can use, so it would make most sense for it to be a physical address.
72 if (rl_base.type == TARGET_VID_MEM) { 24 //
73 printk(KERN_ERR "[nvdebug] Runlist is located in video memory. Access to video memory is unimplemented."); 25 // BUT, it can't possibly be a physical address, as it would refer to an
74 return -ENOTSUPP; 26 // address greater than the maximum one on our system (by a lot!).
27 // Maybe I'm reading the runlist base wrong?
28 // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
29 // address! So, what's this I/O address space? All I know is that it's what
30 // nvgpu_mem_get_addr() returns. That function returns the result of either:
31 // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?)
32 // converts an IPA to a PA?
33 // - nvgpu_mem_iommu_translate
34 //
35 // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
36 // returns SYSMEM.
37 //
38 // To convert a physical address to a IOMMU address, we add a bit
39 //
40 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
41 // before because the GPU had simply gone to sleep and invalidated its
42 // register state, so nvgpu_readl() was simply returning garbage.
43 rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id));
44 runlist_iova = ((u64)rl_base.ptr) << 12;
45 printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n",
46 rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw);
47 printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id);
48 // Return early on an empty runlist
49 if (!rl_info.len)
50 return 0;
51 // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping
52 if (rl_base.target == TARGET_VID_MEM) {
53 printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental.");
54 bar_config_block_t bar1_block, bar2_block;
55 bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK);
56 printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical");
57 bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK);
58 printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical");
59 uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12);
60 if (!bar_inst_pramin_offset) {
61 printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
62 return -EOPNOTSUPP;
63 }
64 /* TODO: Support BAR1?
65 bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12);
66 if (!bar_inst_pramin_offset) {
67 printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n");
68 return -EOPNOTSUPP;
69 }*/
70 // Instance blocks (size == 1kb) contain many things, but we only care about
71 // the section which describes the location of the page directory (page table)
72 uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF;
73 page_dir_config_t pd_config;
74 pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN);
75 uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi;
76 bar_pdb_vram_addr <<= 20;
77 bar_pdb_vram_addr |= pd_config.page_dir_lo;
78 bar_pdb_vram_addr <<= 12;
79 printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw);
80 // TODO: SYSMEM support for page table location
81 if (pd_config.target != TARGET_VID_MEM) {
82 printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n");
83 return -EOPNOTSUPP;
84 }
85 uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr);
86 if (!bar_pdb_pramin_offset) {
87 printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
88 return -EOPNOTSUPP;
89 }
90 uint64_t runlist_bar_vaddr;
91 if (pd_config.is_ver2)
92 runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova);
93 else
94 runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova);
95 if (!runlist_bar_vaddr) {
96 printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n");
97 return -EOPNOTSUPP;
98 }
99 printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr);
100 /* XXX: Old test code
101 uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd);
102 //walk_pd_subtree(bar2_pd_pramin_offset);
103 uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova);
104 page_dir_entry_t pde_0;
105 pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset);
106 uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12));
107 uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12);
108 uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr);
109 printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2);
110 */
111 if (!g->bar3) {
112 printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n");
113 return -ENODEV;
114 }
115 rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr;
116 } else {
117 // Directly access the runlist if stored in SYS_MEM (physically addressed)
118 rl_iter->curr_entry = phys_to_virt(runlist_iova);
75 } 119 }
76 // Segfaults 120 rl_iter->rl_info = rl_info;
77 //u32 attempted_read = ioread32(runlist_iova); 121 return 0;
78 //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read);
79
80 // Errors out
81 //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg));
82 //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr);
83
84 /* Overcomplicated?
85 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
86 if (!domain) {
87 printk(KERN_INFO "[nvdebug] No IOMMU domain!\n");
88 return -EIO;
89 }
90 u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova);
91 printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr);
92 */
93
94 printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova));
95 printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova));
96 head = *(struct entry_tsg*)phys_to_virt(runlist_iova);
97
98 rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova);
99 rl_iter->rl_info = rl_info;
100 return 0;
101 //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
102 //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
103 //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
104 //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
105 //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid);
106
107 //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL));
108 //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL));
109 //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes
110 //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg));
111 /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
112 printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
113 printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
114 printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
115 printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */
116} 122}
117 123
118int preempt_tsg(uint32_t tsg_id) { 124int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) {
119 struct gk20a *g = get_live_gk20a();
120 runlist_info_t rl_info; 125 runlist_info_t rl_info;
121 pfifo_preempt_t pfifo_preempt; 126 pfifo_preempt_t pfifo_preempt;
122 runlist_disable_t rl_disable; 127 runlist_disable_t rl_disable;
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 411f844..a6b0d94 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -6,7 +6,14 @@
6#define RUNLIST_PROCFS_NAME "runlist" 6#define RUNLIST_PROCFS_NAME "runlist"
7#define DETAILED_CHANNEL_INFO 7#define DETAILED_CHANNEL_INFO
8 8
9static int runlist_detail_seq_show_chan(struct seq_file *s, struct gk20a *g, uint32_t chid) { 9/* Print channel details using PCCSR (Programmable Channel Control System RAM?)
10 * @param s Pointer to state from seq_file subsystem to pass to seq_printf
11 * @param g Pointer to our internal GPU state
12 * @param chid ID of channel to print details on, range [0, 512)
13 * @param prefix Text string to prefix each line with, or empty string
14 */
15#ifdef DETAILED_CHANNEL_INFO
16static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) {
10 channel_ctrl_t chan; 17 channel_ctrl_t chan;
11 char *loc_txt; 18 char *loc_txt;
12 u64 instance_ptr; 19 u64 instance_ptr;
@@ -16,23 +23,37 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct gk20a *g, uin
16 return -EIO; 23 return -EIO;
17 instance_ptr = chan.inst_ptr; 24 instance_ptr = chan.inst_ptr;
18 instance_ptr <<= 12; 25 instance_ptr <<= 12;
19 seq_printf(s, " +- Channel Info %-4d -+\n", chid); 26 seq_printf(s, "%s+- Channel Info %-4d -+\n", prefix, chid);
20 seq_printf(s, " | Enabled: %d|\n", chan.enable); 27 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
21 seq_printf(s, " | Next: %d|\n", chan.next); 28 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
22 seq_printf(s, " | Force CTX Reload: %d|\n", chan.force_ctx_reload); 29 seq_printf(s, "%s| Force CTX Reload: %d|\n", prefix, chan.force_ctx_reload);
23 seq_printf(s, " | Enable set: %d|\n", chan.enable_set); 30 seq_printf(s, "%s| Enable set: %d|\n", prefix, chan.enable_set);
24 seq_printf(s, " | Enable clear: %d|\n", chan.enable_clear); 31 seq_printf(s, "%s| Enable clear: %d|\n", prefix, chan.enable_clear);
25 seq_printf(s, " | PBDMA Faulted: %d|\n", chan.pbdma_faulted); 32 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
26 seq_printf(s, " | ENG Faulted: %d|\n", chan.eng_faulted); 33 seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted);
27 seq_printf(s, " | Status: %2d|\n", chan.status); 34 seq_printf(s, "%s| Status: %2d|\n", prefix, chan.status);
28 seq_printf(s, " | Busy: %d|\n", chan.busy); 35 seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy);
29 seq_printf(s, " | Instance PTR: |\n"); 36 seq_printf(s, "%s| Instance PTR: |\n", prefix);
30 seq_printf(s, " | %#018llx |\n", instance_ptr); 37 seq_printf(s, "%s| %#018llx |\n", prefix, instance_ptr);
31 seq_printf(s, " | %-20s|\n", loc_txt); 38 seq_printf(s, "%s| %-20s|\n", prefix, loc_txt);
32 seq_printf(s, " | Instance bound: %d|\n", chan.inst_bind); 39 seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind);
33 seq_printf(s, " +---------------------+\n"); 40 // START TEMP
41 // "runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id"
42 // GR, GRCE, and ASYNC_CE
43 // Note that this appears to be broken??
44 // Peek into the channel instance RAM
45 if (chan.inst_target == TARGET_SYS_MEM_COHERENT) {
46 seq_printf(s, "%s| Target Engine: %2d|\n", prefix, *(uint32_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*43/*NV_RAMFC_TARGET*/) & 0x1f);
47 seq_printf(s, "%s| PDB LO: %#08x|\n", prefix, *(uint32_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*128/*NV_RAMIN_PAGE_DIR_BASE_LO*/) & 0xfffff000);
48 seq_printf(s, "%s| Num subcontexts: %2ld|\n", prefix, hweight64(*(uint64_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*166/*NV_RAMIN_SC_PDB_VALID*/)));
49 // This appears to be unset on Xavier
50 //seq_printf(s, "%s| PAS ID: %8ld|\n", prefix, *(uint32_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*135/*NV_RAMIN_PASID*/) & 0xfffff);
51 }
52 // END TEMP
53 seq_printf(s, "%s+---------------------+\n", prefix);
34 return 0; 54 return 0;
35} 55}
56#endif
36 57
37#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) 58#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
38// Bug workaround. See comment in runlist_file_seq_start() 59// Bug workaround. See comment in runlist_file_seq_start()
@@ -41,10 +62,14 @@ static loff_t pos_fixup;
41 62
42static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) { 63static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) {
43 static struct runlist_iter rl_iter; 64 static struct runlist_iter rl_iter;
65 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
44 // *pos == 0 for first call after read of file 66 // *pos == 0 for first call after read of file
45 if (*pos == 0) { 67 if (*pos == 0) {
46 int err = get_runlist_iter(&rl_iter); 68 int err = get_runlist_iter(g, seq2gpuidx(s), &rl_iter);
47 if (err) 69 if (err)
70 return ERR_PTR(err);
71 // Don't try to print an empty runlist
72 if (rl_iter.rl_info.len <= 0)
48 return NULL; 73 return NULL;
49 return &rl_iter; 74 return &rl_iter;
50 } 75 }
@@ -68,12 +93,13 @@ static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter,
68 loff_t *pos) { 93 loff_t *pos) {
69 struct runlist_iter* rl_iter = raw_rl_iter; 94 struct runlist_iter* rl_iter = raw_rl_iter;
70 void *ret = NULL; 95 void *ret = NULL;
71 // Advance by one TSG + channels under last TSG 96 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
72 *pos += 1 + rl_iter->curr_tsg->tsg_length; 97 // Advance by one TSG or channel
98 (*pos)++;
99 rl_iter->curr_entry += NV_RL_ENTRY_SIZE(g);
73 // Verify we haven't reached the end of the runlist 100 // Verify we haven't reached the end of the runlist
74 // rl_info.len is the num of tsg entries + total num of channel entries 101 // rl_info.len is the num of tsg entries + total num of channel entries
75 if (*pos < rl_iter->rl_info.len) { 102 if (*pos < rl_iter->rl_info.len) {
76 rl_iter->curr_tsg = next_tsg(rl_iter->curr_tsg);
77 ret = rl_iter; 103 ret = rl_iter;
78 } 104 }
79#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) 105#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
@@ -88,57 +114,57 @@ static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) {
88} 114}
89 115
90static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { 116static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
91 struct entry_tsg* tsg = ((struct runlist_iter*)raw_rl_iter)->curr_tsg; 117 struct runlist_iter *rl_iter = raw_rl_iter;
92 struct runlist_chan* chan; 118 void *entry = rl_iter->curr_entry;
93 struct gk20a *g = get_live_gk20a(); 119 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
94 if (!g) 120 if (entry_type(g, entry) == ENTRY_TYPE_TSG) {
95 return -EIO; 121 if (rl_iter->channels_left_in_tsg) {
96 if (tsg->entry_type != ENTRY_TYPE_TSG) { 122 printk(KERN_WARNING "[nvdebug] Found a TSG @ %px when %d channels were still expected under the previous TSG in the runlist!\n", entry, rl_iter->channels_left_in_tsg);
97 printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in tsg print logic!\n"); 123 return -EIO;
98 return -EIO; 124 }
99 } 125 rl_iter->channels_left_in_tsg = tsg_length(g, entry);
100 seq_printf(s, "+---- TSG Entry %-2d----+\n", tsg->tsgid); 126 seq_printf(s, "+---- TSG Entry %-3d---+\n", tsgid(g, entry));
101 seq_printf(s, "| Scale: %-13d|\n", tsg->timeslice_scale); 127 seq_printf(s, "| Scale: %-13d|\n", timeslice_scale(g, entry));
102 seq_printf(s, "| Timeout: %-11d|\n", tsg->timeslice_timeout); 128 seq_printf(s, "| Timeout: %-11d|\n", timeslice_timeout(g, entry));
103 seq_printf(s, "+---------------------+\n"); 129 seq_printf(s, "| Length: %-12d|\n", tsg_length(g, entry));
104 for_chan_in_tsg(chan, tsg) { 130 seq_printf(s, "+---------------------+\n");
131 } else {
132 char *indt = "";
105#ifndef DETAILED_CHANNEL_INFO 133#ifndef DETAILED_CHANNEL_INFO
106 char* loc_txt; 134 u64 instance_ptr = 0;
107 u64 instance_ptr;
108#endif 135#endif
109 if (chan->entry_type != ENTRY_TYPE_CHAN) { 136 if (rl_iter->channels_left_in_tsg) {
110 printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in channel print logic!\n"); 137 indt = " ";
111 return -EIO; 138 rl_iter->channels_left_in_tsg--;
112 } 139 }
113#ifdef DETAILED_CHANNEL_INFO 140#ifdef DETAILED_CHANNEL_INFO
114 runlist_detail_seq_show_chan(s, g, chan->chid); 141 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
115#else 142#else
116 loc_txt = target_to_text(chan->inst_target);
117 if (!loc_txt) {
118 printk(KERN_WARNING "[nvdebug] Invalid apature in channel print logic!\n");
119 return -EIO;
120 }
121 // Reconstruct pointer to channel instance block 143 // Reconstruct pointer to channel instance block
122 instance_ptr = chan->inst_ptr_hi; 144 if (g->chip_id >= NV_CHIP_ID_VOLTA) {
123 instance_ptr <<= 32; 145 instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi;
124 instance_ptr |= chan->inst_ptr_lo << 12; 146 instance_ptr <<= 32;
125 147 }
126 seq_printf(s, " +- Channel Entry %-4d-+\n", chan->chid); 148 instance_ptr |= inst_ptr_lo(g, entry) << 12;
127 seq_printf(s, " | Runqueue Selector: %d|\n", chan->runqueue_selector); 149
128 seq_printf(s, " | Instance PTR: |\n"); 150 seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry));
129 seq_printf(s, " | %#018llx |\n", instance_ptr); 151 if (g->chip_id >= NV_CHIP_ID_VOLTA)
130 seq_printf(s, " | %-20s|\n", loc_txt); 152 seq_printf(s, "%s| Runqueue Selector: %d|\n", indt,
131 seq_printf(s, " +---------------------+\n"); 153 ((struct gv100_runlist_chan*)entry)->runqueue_selector);
154 seq_printf(s, "%s| Instance PTR: |\n", indt);
155 seq_printf(s, "%s| %#018llx |\n", indt, instance_ptr);
156 seq_printf(s, "%s| %-20s|\n", indt, target_to_text(inst_target(g, entry)));
157 seq_printf(s, "%s+---------------------+\n", indt);
132#endif 158#endif
133 } 159 }
134 return 0; 160 return 0;
135} 161}
136 162
137static const struct seq_operations runlist_file_seq_ops = { 163static const struct seq_operations runlist_file_seq_ops = {
138 .start = runlist_file_seq_start, 164 .start = runlist_file_seq_start,
139 .next = runlist_file_seq_next, 165 .next = runlist_file_seq_next,
140 .stop = runlist_file_seq_stop, 166 .stop = runlist_file_seq_stop,
141 .show = runlist_file_seq_show, 167 .show = runlist_file_seq_show,
142}; 168};
143 169
144static int runlist_file_open(struct inode *inode, struct file *f) { 170static int runlist_file_open(struct inode *inode, struct file *f) {
@@ -157,6 +183,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
157 uint32_t target_tsgid; 183 uint32_t target_tsgid;
158 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 184 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
159 int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); 185 int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
186 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
160 if (err) 187 if (err)
161 return err; 188 return err;
162 189
@@ -165,7 +192,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
165 return -ERANGE; 192 return -ERANGE;
166 193
167 // Execute preemption 194 // Execute preemption
168 err = preempt_tsg(target_tsgid); 195 err = preempt_tsg(g, target_tsgid);
169 if (err) 196 if (err)
170 return err; 197 return err;
171 198
@@ -181,9 +208,9 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
181 uint32_t target_channel; 208 uint32_t target_channel;
182 channel_ctrl_t chan; 209 channel_ctrl_t chan;
183 int err; 210 int err;
184 struct gk20a *g = get_live_gk20a(); 211 runlist_info_t rl_info;
185 if (!g) 212 runlist_disable_t rl_disable;
186 return -EIO; 213 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
187 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 214 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
188 err = kstrtou32_from_user(buffer, count, 0, &target_channel); 215 err = kstrtou32_from_user(buffer, count, 0, &target_channel);
189 if (err) 216 if (err)
@@ -195,7 +222,16 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
195 // Disable channel 222 // Disable channel
196 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); 223 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));
197 chan.enable_clear = true; 224 chan.enable_clear = true;
225 // disable sched
226 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
227 rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
228 rl_disable.raw |= BIT(rl_info.id);
229 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
230 // disable chan
198 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); 231 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
232 // enable sched
233 rl_disable.raw &= ~BIT(rl_info.id);
234 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
199 235
200 return count; 236 return count;
201} 237}
@@ -209,9 +245,7 @@ ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
209 uint32_t target_channel; 245 uint32_t target_channel;
210 channel_ctrl_t chan; 246 channel_ctrl_t chan;
211 int err; 247 int err;
212 struct gk20a *g = get_live_gk20a(); 248 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
213 if (!g)
214 return -EIO;
215 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 249 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
216 err = kstrtou32_from_user(buffer, count, 0, &target_channel); 250 err = kstrtou32_from_user(buffer, count, 0, &target_channel);
217 if (err) 251 if (err)
@@ -235,14 +269,12 @@ const struct file_operations enable_channel_file_ops = {
235ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, 269ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
236 size_t count, loff_t *off) { 270 size_t count, loff_t *off) {
237 uint32_t target_tsgid; 271 uint32_t target_tsgid;
238 struct runlist_chan* chan; 272 struct gv100_runlist_chan* chan;
239 channel_ctrl_t chan_ctl; 273 channel_ctrl_t chan_ctl;
240 struct runlist_iter rl_iter; 274 struct runlist_iter rl_iter;
241 int err; 275 int err;
242 loff_t pos = 0; 276 loff_t pos = 0;
243 struct gk20a *g = get_live_gk20a(); 277 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
244 if (!g)
245 return -EIO;
246 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 278 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
247 err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); 279 err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
248 if (err) 280 if (err)
@@ -251,32 +283,34 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
251 if (target_tsgid > MAX_TSGID) 283 if (target_tsgid > MAX_TSGID)
252 return -ERANGE; 284 return -ERANGE;
253 285
254 err = get_runlist_iter(&rl_iter); 286 err = get_runlist_iter(g, 0, &rl_iter);
255 if (err) 287 if (err)
256 return err; 288 return err;
257 289
258 // Iterate through all TSGs 290 // Iterate through all TSGs
259 while (pos < rl_iter.rl_info.len) { 291 while (pos < rl_iter.rl_info.len) {
260 if (rl_iter.curr_tsg->tsgid == target_tsgid) { 292 if (tsgid(g, rl_iter.curr_entry) == target_tsgid) {
261 // Enable channels of target TSG 293 // Enable channels of target TSG
262 for_chan_in_tsg(chan, rl_iter.curr_tsg) { 294 for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
263 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); 295 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
264 chan_ctl.enable_set = true; 296 chan_ctl.enable_set = true;
265 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); 297 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
266 } 298 }
267 } else { 299 } else {
300 // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro?
268 // Disable all other channels 301 // Disable all other channels
269 for_chan_in_tsg(chan, rl_iter.curr_tsg) { 302 // (This is how the Jetson nvgpu driver disables TSGs)
303 for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
270 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); 304 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
271 chan_ctl.enable_clear = true; 305 chan_ctl.enable_clear = true;
272 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); 306 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
273 } 307 }
274 } 308 }
275 pos += 1 + rl_iter.curr_tsg->tsg_length; 309 pos += 1 + tsg_length(g, rl_iter.curr_entry);
276 rl_iter.curr_tsg = next_tsg(rl_iter.curr_tsg); 310 rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);
277 } 311 }
278 // Switch to next TSG with active channels (should be our TSG) 312 // Switch to next TSG with active channels (should be our TSG)
279 err = preempt_tsg(target_tsgid); 313 err = preempt_tsg(g, target_tsgid);
280 if (err) 314 if (err)
281 return err; 315 return err;
282 316
diff --git a/stubs.h b/stubs.h
new file mode 100644
index 0000000..bfcc0d7
--- /dev/null
+++ b/stubs.h
@@ -0,0 +1,80 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Helpful private functions copied from elsewhere in the kernel tree
4 * DO NOT MODIFY
5 */
6#include <linux/version.h>
7
8// Functions from drivers/pci/pci.h
9/**
10 * pci_match_one_device - Tell if a PCI device structure has a matching
11 * PCI device id structure
12 * @id: single PCI device id structure to match
13 * @dev: the PCI device structure to match against
14 *
15 * Returns the matching pci_device_id structure or %NULL if there is no match.
16 */
17static inline const struct pci_device_id *
18pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
19{
20 if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
21 (id->device == PCI_ANY_ID || id->device == dev->device) &&
22 (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
23 (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
24 !((id->class ^ dev->class) & id->class_mask))
25 return id;
26 return NULL;
27}
28
29// Functions from drivers/pci/search.h
30#include <linux/device.h>
31#include <linux/pci.h>
32extern struct bus_type pci_bus_type;
33
34#if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)
35static int match_pci_dev_by_id(struct device *dev, void *data)
36#else
37static int match_pci_dev_by_id(struct device *dev, const void *data)
38#endif
39{
40 struct pci_dev *pdev = to_pci_dev(dev);
41 const struct pci_device_id *id = data;
42
43 if (pci_match_one_device(id, pdev))
44 return 1;
45 return 0;
46}
47
48/*
49 * pci_get_dev_by_id - begin or continue searching for a PCI device by id
50 * @id: pointer to struct pci_device_id to match for the device
51 * @from: Previous PCI device found in search, or %NULL for new search.
52 *
53 * Iterates through the list of known PCI devices. If a PCI device is found
54 * with a matching id a pointer to its device structure is returned, and the
55 * reference count to the device is incremented. Otherwise, %NULL is returned.
56 * A new search is initiated by passing %NULL as the @from argument. Otherwise
57 * if @from is not %NULL, searches continue from next device on the global
58 * list. The reference count for @from is always decremented if it is not
59 * %NULL.
60 *
61 * This is an internal function for use by the other search functions in
62 * this file.
63 */
64static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id,
65 struct pci_dev *from)
66{
67 struct device *dev;
68 struct device *dev_start = NULL;
69 struct pci_dev *pdev = NULL;
70
71 if (from)
72 dev_start = &from->dev;
73 dev = bus_find_device(&pci_bus_type, dev_start, (void *)id,
74 match_pci_dev_by_id);
75 if (dev)
76 pdev = to_pci_dev(dev);
77 pci_dev_put(from);
78 return pdev;
79}
80