aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-06-22 12:52:59 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-06-22 12:52:59 -0400
commit306a03d18b305e4e573be3b2931978fa10679eb9 (patch)
tree349570dfbe5f531e903c949c3f663627ee1097a8 /nvdebug_entry.c
parentf4b83713672acaf88a526b930b8e417453f6edc5 (diff)
Quick dump of current state for Ben to review.
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c288
1 files changed, 253 insertions, 35 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 0854b8b..695b5fd 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -2,64 +2,282 @@
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 */ 3 */
4 4
5/* TODO
6 * - Add sysfs trigger for a preemption
7 */
8
9#include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type 5#include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type
6#include <linux/interrupt.h> // For hooking the nvidia driver interrupts
10#include <linux/kernel.h> 7#include <linux/kernel.h>
11#include <linux/module.h> 8#include <linux/module.h>
12#include <linux/proc_fs.h> // So we can set up entries in /proc 9#include <linux/pci.h> // For PCI device scanning
10#include <linux/proc_fs.h> // So we can set up entries in /proc
13 11
14#include "nvdebug.h" 12#include "nvdebug.h"
13#include "stubs.h"
15 14
16// LIAR. But without this we can't use GPL-only exported symbols like 15// MIT is GPL-compatible. We need to be GPL-compatible for symbols like
17// platform_bus_type or bus_find_device_by_name... 16// platform_bus_type or bus_find_device_by_name...
18MODULE_LICENSE("GPL"); 17MODULE_LICENSE("Dual MIT/GPL");
19MODULE_AUTHOR("Joshua Bakita"); 18MODULE_AUTHOR("Joshua Bakita");
20MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); 19MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
21MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now
22 20
23extern const struct file_operations runlist_file_ops; 21extern const struct file_operations runlist_file_ops;
24extern const struct file_operations preempt_tsg_file_ops; 22extern const struct file_operations preempt_tsg_file_ops;
25extern const struct file_operations disable_channel_file_ops; 23extern const struct file_operations disable_channel_file_ops;
26extern const struct file_operations enable_channel_file_ops; 24extern const struct file_operations enable_channel_file_ops;
27extern const struct file_operations switch_to_tsg_file_ops; 25extern const struct file_operations switch_to_tsg_file_ops;
26extern const struct file_operations device_info_file_ops;
27extern const struct file_operations nvdebug_read_reg32_file_ops;
28
29// Bus types are global symbols in the kernel
30extern struct bus_type platform_bus_type;
31struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
32unsigned int g_nvdebug_devices = 0;
33
34// TEMP
35irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) {
36 printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num);
37 return IRQ_NONE; // We don't actually handle any interrupts. Pass them on.
38}
39
40// Find any and all NVIDIA GPUs in the system
41// Note: This function fails if any of them are in a bad state
42int probe_and_cache_device(void) {
43 // platform bus (SoC) iterators
44 struct device *dev = NULL;
45 struct device *temp_dev;
46 // PCI search iterator and search query
47 struct pci_dev *pcid = NULL;
48 // This query pattern is mirrored off nouveau
49 struct pci_device_id query = {
50 .vendor = NV_PCI_VENDOR, // Match NVIDIA devices
51 .device = PCI_ANY_ID,
52 .subvendor = PCI_ANY_ID,
53 .subdevice = PCI_ANY_ID,
54 .class_mask = 0xff << 16,
55 .class = PCI_BASE_CLASS_DISPLAY << 16, // Match display devs
56 };
57 int i = 0;
58 // Search the platform bus for the first device that matches our name
59 // Search for GV10B (Jetson Xavier)
60 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b")))
61 dev = temp_dev;
62 // Search for GP10B (Jetson TX2)
63 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b")))
64 dev = temp_dev;
65 // TODO: Support other platform bus devices (gk20a, gm20b)
66 if (dev) {
67 struct nvgpu_os_linux *l;
68 mc_boot_0_t ids;
69 g_nvdebug_state[i].g = get_gk20a(dev);
70 l = container_of(g_nvdebug_state[i].g, struct nvgpu_os_linux, g);
71 g_nvdebug_state[i].regs = l->regs;
72 if (!g_nvdebug_state[i].regs)
73 return -EADDRNOTAVAIL;
74 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
75 if (ids.raw == -1)
76 return -EADDRNOTAVAIL;
77 g_nvdebug_state[i].chip_id = ids.chip_id;
78 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.",
79 ids.chip_id, ARCH2NAME(ids.architecture));
80 i++;
81 }
82 // Search the PCI bus and iterate through all matches
83 // FIXME: State rollback
84 while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) {
85 mc_boot_0_t ids;
86 g_nvdebug_state[i].g = NULL;
87 // Map BAR0 (GPU control registers)
88 g_nvdebug_state[i].regs = pci_iomap(pcid, 0, 0);
89 if (!g_nvdebug_state[i].regs) {
90 pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n");
91 return -EADDRNOTAVAIL;
92 }
93 // Map BAR3 (CPU-accessible mappings of GPU DRAM)
94 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0);
95 // Try mapping only the lower half of BAR3 on fail
96 // (vesafb may map the top half for display)
97 if (!g_nvdebug_state[i].bar3)
98 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2);
99 g_nvdebug_state[i].pcid = pcid;
100 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
101 if (ids.raw == -1) {
102 pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
103 return -EADDRNOTAVAIL;
104 }
105 g_nvdebug_state[i].chip_id = ids.chip_id;
106 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.",
107 ids.chip_id, ARCH2NAME(ids.architecture));
108 // TEMP
109 if (request_irq(pcid->irq, nvdebug_irq_tap, IRQF_SHARED, "nvdebug tap", pcid)) {
110 printk(KERN_WARNING "[nvdebug] Unable to initialize IRQ tap\n");
111 }
112 i++;
113 }
114 // Return the number of devices we found
115 if (i > 0)
116 return i;
117 return -ENODEV;
118}
119
120// Create files `/proc/gpu#/runlist#`, world readable
121int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
122 ptop_device_info_t info;
123 struct proc_dir_entry *rl_entry;
124 int i, rl_id;
125 char runlist_name[12];
126 int max_rl_id = 0; // Always at least one runlist
127 // Figure out how many runlists there are by checking the device info
128 // registers. Runlists are always numbered sequentially, so we just have
129 // to find the highest-valued one and add 1 to get the number of runlists.
130 for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) {
131 info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i));
132 if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid)
133 continue;
134 if (info.runlist_enum > max_rl_id)
135 max_rl_id = info.runlist_enum;
136 }
137 // Create files to read each runlist. The read handling code looks at the
138 // PDE_DATA associated with the file to determine what the runlist ID is.
139 for (rl_id = 0; rl_id <= max_rl_id; rl_id++) {
140 snprintf(runlist_name, 12, "runlist%d", rl_id);
141 rl_entry = proc_create_data(
142 runlist_name, 0444, dir, &runlist_file_ops,
143 (void*)(uintptr_t)rl_id);
144 if (!rl_entry)
145 return -ENOMEM;
146 }
147 return 0;
148}
149
150// Create files /proc/gpu#
151// TODO: Don't run this on unsupported GPUs
152int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
153 char file_name[20];
154 int i;
155 struct proc_dir_entry *gpc_tpc_mask_entry;
156 // Get a bitmask of which GPCs are disabled
157 uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC);
158 // Get maximum number of enabled GPCs for this chip
159 uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS);
160 // For each enabled GPC, expose a mask of disabled TPCs
161 for (i = 0; i < max_gpcs; i++) {
162 // Do nothing if GPC is disabled
163 if ((1 << i) & gpcs_mask)
164 continue;
165 // If GPC is enabled, create an entry to read disabled TPCs mask
166 snprintf(file_name, 20, "gpc%d_tpc_mask", i);
167 gpc_tpc_mask_entry = proc_create_data(
168 file_name, 0444, dir, &nvdebug_read_reg32_file_ops,
169 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i));
170 if (!gpc_tpc_mask_entry)
171 return -ENOMEM;
172 }
173 return 0;
174}
28 175
29int __init nvdebug_init(void) { 176int __init nvdebug_init(void) {
30 struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry, 177 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry,
31 *enable_channel_entry, *switch_to_tsg_entry; 178 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry,
32 // Create file `/proc/preempt_tsg`, world readable 179 *num_gpcs_entry;
33 rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); 180 int rl_create_err, tpc_masks_create_err;
34 // Create file `/proc/preempt_tsg`, world writable 181 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state
35 preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops); 182 int res = probe_and_cache_device();
36 // Create file `/proc/disable_channel`, world writable 183 if (res < 0)
37 disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops); 184 return res;
38 // Create file `/proc/enable_channel`, world writable 185 g_nvdebug_devices = res;
39 enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops); 186 // Create seperate ProcFS directories for each gpu
40 // Create file `/proc/switch_to_tsg`, world writable 187 while (res--) {
41 switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops); 188 char device_id_str[7];
42 // ProcFS entry creation only fails if out of memory 189 uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id
43 if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) { 190 // Create directory /proc/gpu# where # is the GPU number
44 remove_proc_entry("runlist", NULL); 191 snprintf(device_id_str, 7, "gpu%ld", device_id);
45 remove_proc_entry("preempt_tsg", NULL); 192 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
46 remove_proc_entry("disable_channel", NULL); 193 goto out_nomem;
47 remove_proc_entry("enable_channel", NULL); 194 // Create files `/proc/gpu#/runlist#`, world readable
48 remove_proc_entry("switch_to_tsg", NULL); 195 rl_create_err = create_runlist_files(device_id, dir);
49 printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); 196 // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
50 return -ENOMEM; 197 tpc_masks_create_err = create_tpc_mask_files(device_id, dir);
198 // Create file `/proc/gpu#/preempt_tsg`, world writable
199 preempt_entry = proc_create_data(
200 "preempt_tsg", 0222, dir, &preempt_tsg_file_ops,
201 (void*)device_id);
202 // Create file `/proc/gpu#/disable_channel`, world writable
203 disable_channel_entry = proc_create_data(
204 "disable_channel", 0222, dir, &disable_channel_file_ops,
205 (void*)device_id);
206 // Create file `/proc/gpu#/enable_channel`, world writable
207 enable_channel_entry = proc_create_data(
208 "enable_channel", 0222, dir, &enable_channel_file_ops,
209 (void*)device_id);
210 // Create file `/proc/gpu#/switch_to_tsg`, world writable
211 switch_to_tsg_entry = proc_create_data(
212 "switch_to_tsg", 0222, dir, &switch_to_tsg_file_ops,
213 (void*)device_id);
214 // Create file `/proc/gpu#/device_info`, world readable
215 device_info_entry = proc_create_data(
216 "device_info", 0444, dir, &device_info_file_ops,
217 (void*)device_id);
218 // Create file `/proc/gpu#/num_gpcs`, world readable
219 num_gpcs_entry = proc_create_data(
220 "num_gpcs", 0444, dir, &nvdebug_read_reg32_file_ops,
221 (void*)NV_PTOP_SCAL_NUM_GPCS);
222 // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable
223 num_gpcs_entry = proc_create_data(
224 "num_tpc_per_gpc", 0444, dir, &nvdebug_read_reg32_file_ops,
225 (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC);
226 // Create file `/proc/gpu#/num_ces`, world readable
227 num_gpcs_entry = proc_create_data(
228 "num_ces", 0444, dir, &nvdebug_read_reg32_file_ops,
229 (void*)NV_PTOP_SCAL_NUM_CES);
230 // Create file `/proc/gpu#/num_ces`, world readable
231 num_gpcs_entry = proc_create_data(
232 "gpc_mask", 0444, dir, &nvdebug_read_reg32_file_ops,
233 (void*)NV_FUSE_GPC);
234 // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+
235 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) {
236 // TODO: Redo to num_pces
237 // Create file `/proc/gpu#/pce_map`, world readable
238 num_gpcs_entry = proc_create_data(
239 "pce_map", 0444, dir, &nvdebug_read_reg32_file_ops,
240 (void*)NV_CE_PCE_MAP);
241 }
242 // ProcFS entry creation only fails if out of memory
243 if (rl_create_err || tpc_masks_create_err || !preempt_entry ||
244 !disable_channel_entry || !enable_channel_entry ||
245 !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry)
246 goto out_nomem;
51 } 247 }
248 // (See Makefile if you want to know the origin of GIT_HASH.)
52 printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); 249 printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n");
53 return 0; 250 return 0;
251out_nomem:
252 // Make sure to clear all ProcFS directories on error
253 while (res < g_nvdebug_devices) {
254 char device_id_str[7];
255 snprintf(device_id_str, 7, "gpu%d", res);
256 remove_proc_subtree(device_id_str, NULL);
257 res++;
258 }
259 return -ENOMEM;
54} 260}
55 261
56static void __exit nvdebug_exit(void) { 262static void __exit nvdebug_exit(void) {
57 remove_proc_entry("runlist", NULL); 263 struct nvdebug_state *g;
58 remove_proc_entry("preempt_tsg", NULL); 264 // Deinitialize each device
59 remove_proc_entry("disable_channel", NULL); 265 while (g_nvdebug_devices--) {
60 remove_proc_entry("enable_channel", NULL); 266 // Remove procfs directory
61 remove_proc_entry("switch_to_tsg", NULL); 267 char device_id[7];
62 printk(KERN_INFO "[nvdebug] Exiting...\n"); 268 snprintf(device_id, 7, "gpu%d", g_nvdebug_devices);
269 remove_proc_subtree(device_id, NULL);
270 // Free BAR mappings
271 g = &g_nvdebug_state[g_nvdebug_devices];
272 if (g && g->regs)
273 pci_iounmap(g->pcid, g->regs);
274 if (g && g->bar2)
275 pci_iounmap(g->pcid, g->bar2);
276 // TEMP
277 free_irq(g->pcid->irq, g->pcid);
278 printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id);
279 }
280 printk(KERN_INFO "[nvdebug] Module exit complete.\n");
63} 281}
64 282
65module_init(nvdebug_init); 283module_init(nvdebug_init);