diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-22 12:52:59 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-22 12:52:59 -0400 |
commit | 306a03d18b305e4e573be3b2931978fa10679eb9 (patch) | |
tree | 349570dfbe5f531e903c949c3f663627ee1097a8 /nvdebug_entry.c | |
parent | f4b83713672acaf88a526b930b8e417453f6edc5 (diff) |
Quick dump of current state for Ben to review.
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 288 |
1 files changed, 253 insertions, 35 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 0854b8b..695b5fd 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -2,64 +2,282 @@ | |||
2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /* TODO | ||
6 | * - Add sysfs trigger for a preemption | ||
7 | */ | ||
8 | |||
9 | #include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type | 5 | #include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type |
6 | #include <linux/interrupt.h> // For hooking the nvidia driver interrupts | ||
10 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
11 | #include <linux/module.h> | 8 | #include <linux/module.h> |
12 | #include <linux/proc_fs.h> // So we can set up entries in /proc | 9 | #include <linux/pci.h> // For PCI device scanning |
10 | #include <linux/proc_fs.h> // So we can set up entries in /proc | ||
13 | 11 | ||
14 | #include "nvdebug.h" | 12 | #include "nvdebug.h" |
13 | #include "stubs.h" | ||
15 | 14 | ||
16 | // LIAR. But without this we can't use GPL-only exported symbols like | 15 | // MIT is GPL-compatible. We need to be GPL-compatible for symbols like |
17 | // platform_bus_type or bus_find_device_by_name... | 16 | // platform_bus_type or bus_find_device_by_name... |
18 | MODULE_LICENSE("GPL"); | 17 | MODULE_LICENSE("Dual MIT/GPL"); |
19 | MODULE_AUTHOR("Joshua Bakita"); | 18 | MODULE_AUTHOR("Joshua Bakita"); |
20 | MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | 19 | MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); |
21 | MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now | ||
22 | 20 | ||
23 | extern const struct file_operations runlist_file_ops; | 21 | extern const struct file_operations runlist_file_ops; |
24 | extern const struct file_operations preempt_tsg_file_ops; | 22 | extern const struct file_operations preempt_tsg_file_ops; |
25 | extern const struct file_operations disable_channel_file_ops; | 23 | extern const struct file_operations disable_channel_file_ops; |
26 | extern const struct file_operations enable_channel_file_ops; | 24 | extern const struct file_operations enable_channel_file_ops; |
27 | extern const struct file_operations switch_to_tsg_file_ops; | 25 | extern const struct file_operations switch_to_tsg_file_ops; |
26 | extern const struct file_operations device_info_file_ops; | ||
27 | extern const struct file_operations nvdebug_read_reg32_file_ops; | ||
28 | |||
29 | // Bus types are global symbols in the kernel | ||
30 | extern struct bus_type platform_bus_type; | ||
31 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | ||
32 | unsigned int g_nvdebug_devices = 0; | ||
33 | |||
34 | // TEMP | ||
35 | irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { | ||
36 | printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num); | ||
37 | return IRQ_NONE; // We don't actually handle any interrupts. Pass them on. | ||
38 | } | ||
39 | |||
40 | // Find any and all NVIDIA GPUs in the system | ||
41 | // Note: This function fails if any of them are in a bad state | ||
42 | int probe_and_cache_device(void) { | ||
43 | // platform bus (SoC) iterators | ||
44 | struct device *dev = NULL; | ||
45 | struct device *temp_dev; | ||
46 | // PCI search iterator and search query | ||
47 | struct pci_dev *pcid = NULL; | ||
48 | // This query pattern is mirrored off nouveau | ||
49 | struct pci_device_id query = { | ||
50 | .vendor = NV_PCI_VENDOR, // Match NVIDIA devices | ||
51 | .device = PCI_ANY_ID, | ||
52 | .subvendor = PCI_ANY_ID, | ||
53 | .subdevice = PCI_ANY_ID, | ||
54 | .class_mask = 0xff << 16, | ||
55 | .class = PCI_BASE_CLASS_DISPLAY << 16, // Match display devs | ||
56 | }; | ||
57 | int i = 0; | ||
58 | // Search the platform bus for the first device that matches our name | ||
59 | // Search for GV10B (Jetson Xavier) | ||
60 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) | ||
61 | dev = temp_dev; | ||
62 | // Search for GP10B (Jetson TX2) | ||
63 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) | ||
64 | dev = temp_dev; | ||
65 | // TODO: Support other platform bus devices (gk20a, gm20b) | ||
66 | if (dev) { | ||
67 | struct nvgpu_os_linux *l; | ||
68 | mc_boot_0_t ids; | ||
69 | g_nvdebug_state[i].g = get_gk20a(dev); | ||
70 | l = container_of(g_nvdebug_state[i].g, struct nvgpu_os_linux, g); | ||
71 | g_nvdebug_state[i].regs = l->regs; | ||
72 | if (!g_nvdebug_state[i].regs) | ||
73 | return -EADDRNOTAVAIL; | ||
74 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
75 | if (ids.raw == -1) | ||
76 | return -EADDRNOTAVAIL; | ||
77 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
78 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | ||
79 | ids.chip_id, ARCH2NAME(ids.architecture)); | ||
80 | i++; | ||
81 | } | ||
82 | // Search the PCI bus and iterate through all matches | ||
83 | // FIXME: State rollback | ||
84 | while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { | ||
85 | mc_boot_0_t ids; | ||
86 | g_nvdebug_state[i].g = NULL; | ||
87 | // Map BAR0 (GPU control registers) | ||
88 | g_nvdebug_state[i].regs = pci_iomap(pcid, 0, 0); | ||
89 | if (!g_nvdebug_state[i].regs) { | ||
90 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); | ||
91 | return -EADDRNOTAVAIL; | ||
92 | } | ||
93 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) | ||
94 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); | ||
95 | // Try mapping only the lower half of BAR3 on fail | ||
96 | // (vesafb may map the top half for display) | ||
97 | if (!g_nvdebug_state[i].bar3) | ||
98 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | ||
99 | g_nvdebug_state[i].pcid = pcid; | ||
100 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
101 | if (ids.raw == -1) { | ||
102 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
103 | return -EADDRNOTAVAIL; | ||
104 | } | ||
105 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
106 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", | ||
107 | ids.chip_id, ARCH2NAME(ids.architecture)); | ||
108 | // TEMP | ||
109 | if (request_irq(pcid->irq, nvdebug_irq_tap, IRQF_SHARED, "nvdebug tap", pcid)) { | ||
110 | printk(KERN_WARNING "[nvdebug] Unable to initialize IRQ tap\n"); | ||
111 | } | ||
112 | i++; | ||
113 | } | ||
114 | // Return the number of devices we found | ||
115 | if (i > 0) | ||
116 | return i; | ||
117 | return -ENODEV; | ||
118 | } | ||
119 | |||
120 | // Create files `/proc/gpu#/runlist#`, world readable | ||
121 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | ||
122 | ptop_device_info_t info; | ||
123 | struct proc_dir_entry *rl_entry; | ||
124 | int i, rl_id; | ||
125 | char runlist_name[12]; | ||
126 | int max_rl_id = 0; // Always at least one runlist | ||
127 | // Figure out how many runlists there are by checking the device info | ||
128 | // registers. Runlists are always numbered sequentially, so we just have | ||
129 | // to find the highest-valued one and add 1 to get the number of runlists. | ||
130 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) { | ||
131 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i)); | ||
132 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) | ||
133 | continue; | ||
134 | if (info.runlist_enum > max_rl_id) | ||
135 | max_rl_id = info.runlist_enum; | ||
136 | } | ||
137 | // Create files to read each runlist. The read handling code looks at the | ||
138 | // PDE_DATA associated with the file to determine what the runlist ID is. | ||
139 | for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { | ||
140 | snprintf(runlist_name, 12, "runlist%d", rl_id); | ||
141 | rl_entry = proc_create_data( | ||
142 | runlist_name, 0444, dir, &runlist_file_ops, | ||
143 | (void*)(uintptr_t)rl_id); | ||
144 | if (!rl_entry) | ||
145 | return -ENOMEM; | ||
146 | } | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | // Create files /proc/gpu# | ||
151 | // TODO: Don't run this on unsupported GPUs | ||
152 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | ||
153 | char file_name[20]; | ||
154 | int i; | ||
155 | struct proc_dir_entry *gpc_tpc_mask_entry; | ||
156 | // Get a bitmask of which GPCs are disabled | ||
157 | uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC); | ||
158 | // Get maximum number of enabled GPCs for this chip | ||
159 | uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); | ||
160 | // For each enabled GPC, expose a mask of disabled TPCs | ||
161 | for (i = 0; i < max_gpcs; i++) { | ||
162 | // Do nothing if GPC is disabled | ||
163 | if ((1 << i) & gpcs_mask) | ||
164 | continue; | ||
165 | // If GPC is enabled, create an entry to read disabled TPCs mask | ||
166 | snprintf(file_name, 20, "gpc%d_tpc_mask", i); | ||
167 | gpc_tpc_mask_entry = proc_create_data( | ||
168 | file_name, 0444, dir, &nvdebug_read_reg32_file_ops, | ||
169 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); | ||
170 | if (!gpc_tpc_mask_entry) | ||
171 | return -ENOMEM; | ||
172 | } | ||
173 | return 0; | ||
174 | } | ||
28 | 175 | ||
29 | int __init nvdebug_init(void) { | 176 | int __init nvdebug_init(void) { |
30 | struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry, | 177 | struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, |
31 | *enable_channel_entry, *switch_to_tsg_entry; | 178 | *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, |
32 | // Create file `/proc/preempt_tsg`, world readable | 179 | *num_gpcs_entry; |
33 | rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); | 180 | int rl_create_err, tpc_masks_create_err; |
34 | // Create file `/proc/preempt_tsg`, world writable | 181 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state |
35 | preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops); | 182 | int res = probe_and_cache_device(); |
36 | // Create file `/proc/disable_channel`, world writable | 183 | if (res < 0) |
37 | disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops); | 184 | return res; |
38 | // Create file `/proc/enable_channel`, world writable | 185 | g_nvdebug_devices = res; |
39 | enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops); | 186 | // Create seperate ProcFS directories for each gpu |
40 | // Create file `/proc/switch_to_tsg`, world writable | 187 | while (res--) { |
41 | switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops); | 188 | char device_id_str[7]; |
42 | // ProcFS entry creation only fails if out of memory | 189 | uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id |
43 | if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) { | 190 | // Create directory /proc/gpu# where # is the GPU number |
44 | remove_proc_entry("runlist", NULL); | 191 | snprintf(device_id_str, 7, "gpu%ld", device_id); |
45 | remove_proc_entry("preempt_tsg", NULL); | 192 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
46 | remove_proc_entry("disable_channel", NULL); | 193 | goto out_nomem; |
47 | remove_proc_entry("enable_channel", NULL); | 194 | // Create files `/proc/gpu#/runlist#`, world readable |
48 | remove_proc_entry("switch_to_tsg", NULL); | 195 | rl_create_err = create_runlist_files(device_id, dir); |
49 | printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); | 196 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
50 | return -ENOMEM; | 197 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); |
198 | // Create file `/proc/gpu#/preempt_tsg`, world writable | ||
199 | preempt_entry = proc_create_data( | ||
200 | "preempt_tsg", 0222, dir, &preempt_tsg_file_ops, | ||
201 | (void*)device_id); | ||
202 | // Create file `/proc/gpu#/disable_channel`, world writable | ||
203 | disable_channel_entry = proc_create_data( | ||
204 | "disable_channel", 0222, dir, &disable_channel_file_ops, | ||
205 | (void*)device_id); | ||
206 | // Create file `/proc/gpu#/enable_channel`, world writable | ||
207 | enable_channel_entry = proc_create_data( | ||
208 | "enable_channel", 0222, dir, &enable_channel_file_ops, | ||
209 | (void*)device_id); | ||
210 | // Create file `/proc/gpu#/switch_to_tsg`, world writable | ||
211 | switch_to_tsg_entry = proc_create_data( | ||
212 | "switch_to_tsg", 0222, dir, &switch_to_tsg_file_ops, | ||
213 | (void*)device_id); | ||
214 | // Create file `/proc/gpu#/device_info`, world readable | ||
215 | device_info_entry = proc_create_data( | ||
216 | "device_info", 0444, dir, &device_info_file_ops, | ||
217 | (void*)device_id); | ||
218 | // Create file `/proc/gpu#/num_gpcs`, world readable | ||
219 | num_gpcs_entry = proc_create_data( | ||
220 | "num_gpcs", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
221 | (void*)NV_PTOP_SCAL_NUM_GPCS); | ||
222 | // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable | ||
223 | num_gpcs_entry = proc_create_data( | ||
224 | "num_tpc_per_gpc", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
225 | (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); | ||
226 | // Create file `/proc/gpu#/num_ces`, world readable | ||
227 | num_gpcs_entry = proc_create_data( | ||
228 | "num_ces", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
229 | (void*)NV_PTOP_SCAL_NUM_CES); | ||
230 | // Create file `/proc/gpu#/num_ces`, world readable | ||
231 | num_gpcs_entry = proc_create_data( | ||
232 | "gpc_mask", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
233 | (void*)NV_FUSE_GPC); | ||
234 | // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ | ||
235 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { | ||
236 | // TODO: Redo to num_pces | ||
237 | // Create file `/proc/gpu#/pce_map`, world readable | ||
238 | num_gpcs_entry = proc_create_data( | ||
239 | "pce_map", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
240 | (void*)NV_CE_PCE_MAP); | ||
241 | } | ||
242 | // ProcFS entry creation only fails if out of memory | ||
243 | if (rl_create_err || tpc_masks_create_err || !preempt_entry || | ||
244 | !disable_channel_entry || !enable_channel_entry || | ||
245 | !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry) | ||
246 | goto out_nomem; | ||
51 | } | 247 | } |
248 | // (See Makefile if you want to know the origin of GIT_HASH.) | ||
52 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); | 249 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); |
53 | return 0; | 250 | return 0; |
251 | out_nomem: | ||
252 | // Make sure to clear all ProcFS directories on error | ||
253 | while (res < g_nvdebug_devices) { | ||
254 | char device_id_str[7]; | ||
255 | snprintf(device_id_str, 7, "gpu%d", res); | ||
256 | remove_proc_subtree(device_id_str, NULL); | ||
257 | res++; | ||
258 | } | ||
259 | return -ENOMEM; | ||
54 | } | 260 | } |
55 | 261 | ||
56 | static void __exit nvdebug_exit(void) { | 262 | static void __exit nvdebug_exit(void) { |
57 | remove_proc_entry("runlist", NULL); | 263 | struct nvdebug_state *g; |
58 | remove_proc_entry("preempt_tsg", NULL); | 264 | // Deinitialize each device |
59 | remove_proc_entry("disable_channel", NULL); | 265 | while (g_nvdebug_devices--) { |
60 | remove_proc_entry("enable_channel", NULL); | 266 | // Remove procfs directory |
61 | remove_proc_entry("switch_to_tsg", NULL); | 267 | char device_id[7]; |
62 | printk(KERN_INFO "[nvdebug] Exiting...\n"); | 268 | snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); |
269 | remove_proc_subtree(device_id, NULL); | ||
270 | // Free BAR mappings | ||
271 | g = &g_nvdebug_state[g_nvdebug_devices]; | ||
272 | if (g && g->regs) | ||
273 | pci_iounmap(g->pcid, g->regs); | ||
274 | if (g && g->bar2) | ||
275 | pci_iounmap(g->pcid, g->bar2); | ||
276 | // TEMP | ||
277 | free_irq(g->pcid->irq, g->pcid); | ||
278 | printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); | ||
279 | } | ||
280 | printk(KERN_INFO "[nvdebug] Module exit complete.\n"); | ||
63 | } | 281 | } |
64 | 282 | ||
65 | module_init(nvdebug_init); | 283 | module_init(nvdebug_init); |