aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2024-04-09 13:07:19 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2024-04-09 13:07:19 -0400
commit4768fe31f114c5ad788012db5518ce8e37f79c7a (patch)
tree03fe90108bf9341b8b9d299df3ba8a6245c509d0 /nvdebug_entry.c
parent31964208e4dc0243b6b31b9967c77a791aeb995c (diff)
Correctly handle startup errors and fix gpc*_mask APIs
- Do not create gpc*_mask files on pre-Maxwell GPUs (tested unavailable on the K5000s) - Use correct register offsets for gpc*_mask files on Ampere+ GPUs - Document GPC and TPC count and fuse registers. - Correctly handle errors for creation of all ProcFS files - Remove unecessary error-handling temp variables in nvdebug_entry - Misc naming, comment, and layout cleanup
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c150
1 files changed, 92 insertions, 58 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 7593a3a..0cf5344 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -12,7 +12,8 @@
12#include "nvdebug.h" 12#include "nvdebug.h"
13#include "stubs.h" 13#include "stubs.h"
14 14
15// Enable to intercept and log GPU interrupts 15// Enable to intercept and log GPU interrupts. Historically used to benchmark
16// interrupt latency.
16#define INTERRUPT_DEBUG 0 17#define INTERRUPT_DEBUG 0
17 18
18// MIT is GPL-compatible. We need to be GPL-compatible for symbols like 19// MIT is GPL-compatible. We need to be GPL-compatible for symbols like
@@ -31,14 +32,16 @@ extern struct file_operations copy_topology_file_ops;
31extern struct file_operations nvdebug_read_reg32_file_ops; 32extern struct file_operations nvdebug_read_reg32_file_ops;
32extern struct file_operations nvdebug_read_reg_range_file_ops; 33extern struct file_operations nvdebug_read_reg_range_file_ops;
33 34
34// Bus types are global symbols in the kernel
35extern struct bus_type platform_bus_type;
36struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 35struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
37unsigned int g_nvdebug_devices = 0; 36unsigned int g_nvdebug_devices = 0;
37// Bus types are global symbols in the kernel
38extern struct bus_type platform_bus_type;
38 39
39// Starting in Kernel 5.6, proc_ops is required instead of file_operations 40// Starting in Kernel 5.6, proc_ops is required instead of file_operations.
41// As file_operations is larger than proc_ops, we can overwrite the memory
42// backing the file_operations struct to follow the proc_ops layout, and then
43// cast on newer kernels.
40#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) 44#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
41// This rewrites the struct to the proc_ops layout on newer kernels
42const struct proc_ops* compat_ops(const struct file_operations* ops) { 45const struct proc_ops* compat_ops(const struct file_operations* ops) {
43 struct proc_ops new_ops = {}; 46 struct proc_ops new_ops = {};
44 new_ops.proc_open = ops->open; 47 new_ops.proc_open = ops->open;
@@ -64,7 +67,7 @@ irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) {
64 67
65// Find any and all NVIDIA GPUs in the system 68// Find any and all NVIDIA GPUs in the system
66// Note: This function fails if any of them are in a bad state 69// Note: This function fails if any of them are in a bad state
67int probe_and_cache_device(void) { 70int probe_and_cache_devices(void) {
68 // platform bus (SoC) iterators 71 // platform bus (SoC) iterators
69 struct device *dev = NULL; 72 struct device *dev = NULL;
70 struct device *temp_dev; 73 struct device *temp_dev;
@@ -143,13 +146,14 @@ int probe_and_cache_device(void) {
143#endif // INTERRUPT_DEBUG 146#endif // INTERRUPT_DEBUG
144 i++; 147 i++;
145 } 148 }
146 // Return the number of devices we found 149 // Return the number of devices found
147 if (i > 0) 150 if (i > 0)
148 return i; 151 return i;
149 return -ENODEV; 152 return -ENODEV;
150} 153}
151 154
152// Create files `/proc/gpu#/runlist#`, world readable 155// Create files `/proc/gpu#/runlist#`, world readable
156// Support: Fermi, Maxwell, Pascal, Volta, Turing
153int create_runlist_files(int device_id, struct proc_dir_entry *dir) { 157int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
154 ptop_device_info_gk104_t info; 158 ptop_device_info_gk104_t info;
155 struct proc_dir_entry *rl_entry; 159 struct proc_dir_entry *rl_entry;
@@ -179,16 +183,24 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
179 return 0; 183 return 0;
180} 184}
181 185
182// Create files /proc/gpu# 186// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
183// TODO: Don't run this on unsupported GPUs 187// Support: Maxwell+
184int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { 188int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
189 struct nvdebug_state* g = &g_nvdebug_state[device_id];
185 char file_name[20]; 190 char file_name[20];
186 int i; 191 int i;
187 struct proc_dir_entry *gpc_tpc_mask_entry; 192 struct proc_dir_entry *gpc_tpc_mask_entry;
188 // Get a bitmask of which GPCs are disabled
189 uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC);
190 // Get maximum number of enabled GPCs for this chip 193 // Get maximum number of enabled GPCs for this chip
191 uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); 194 uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS);
195 // Get a bitmask of which GPCs are disabled
196 uint32_t gpcs_mask;
197 if (g->chip_id < NV_CHIP_ID_AMPERE)
198 gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GM107);
199 else
200 gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GA100);
201 // Verify the reads succeeded
202 if (max_gpcs == -1 || gpcs_mask == -1)
203 return -EIO;
192 // For each enabled GPC, expose a mask of disabled TPCs 204 // For each enabled GPC, expose a mask of disabled TPCs
193 for (i = 0; i < max_gpcs; i++) { 205 for (i = 0; i < max_gpcs; i++) {
194 // Do nothing if GPC is disabled 206 // Do nothing if GPC is disabled
@@ -196,9 +208,14 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
196 continue; 208 continue;
197 // If GPC is enabled, create an entry to read disabled TPCs mask 209 // If GPC is enabled, create an entry to read disabled TPCs mask
198 snprintf(file_name, 20, "gpc%d_tpc_mask", i); 210 snprintf(file_name, 20, "gpc%d_tpc_mask", i);
199 gpc_tpc_mask_entry = proc_create_data( 211 if (g->chip_id < NV_CHIP_ID_AMPERE)
200 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 212 gpc_tpc_mask_entry = proc_create_data(
201 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); 213 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
214 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GM107(i));
215 else
216 gpc_tpc_mask_entry = proc_create_data(
217 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
218 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GA100(i));
202 if (!gpc_tpc_mask_entry) 219 if (!gpc_tpc_mask_entry)
203 return -ENOMEM; 220 return -ENOMEM;
204 } 221 }
@@ -206,64 +223,84 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
206} 223}
207 224
208int __init nvdebug_init(void) { 225int __init nvdebug_init(void) {
209 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, 226 struct proc_dir_entry *dir;
210 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, 227 int err, res;
211 *num_gpcs_entry;
212 int rl_create_err, tpc_masks_create_err;
213 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state 228 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state
214 int res = probe_and_cache_device(); 229 if ((res = probe_and_cache_devices()) < 0)
215 if (res < 0)
216 return res; 230 return res;
217 g_nvdebug_devices = res; 231 g_nvdebug_devices = res;
218 // Create seperate ProcFS directories for each gpu 232 // Create seperate ProcFS directories for each gpu
219 while (res--) { 233 while (res--) {
220 char device_id_str[7]; 234 char device_id_str[7];
221 uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id 235 // Create a wider copy of the GPU ID to allow us to abuse the *data
236 // field of proc_dir_entry to store the GPU ID.
237 uintptr_t device_id = res;
222 // Create directory /proc/gpu# where # is the GPU number 238 // Create directory /proc/gpu# where # is the GPU number
239 // As ProcFS entry creation only fails if out of memory, we auto-skip
240 // to handling that on any error in creating ProcFS files.
223 snprintf(device_id_str, 7, "gpu%ld", device_id); 241 snprintf(device_id_str, 7, "gpu%ld", device_id);
224 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) 242 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
225 goto out_nomem; 243 goto out_nomem;
226 // Create files `/proc/gpu#/runlist#`, world readable 244 // Create files `/proc/gpu#/runlist#`, world readable
227 if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) 245 if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE)
228 create_runlist_files(device_id, dir); 246 if ((err = create_runlist_files(device_id, dir)))
229 // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable 247 goto out_err;
230 tpc_masks_create_err = create_tpc_mask_files(device_id, dir);
231 // Create file `/proc/gpu#/preempt_tsg`, world writable 248 // Create file `/proc/gpu#/preempt_tsg`, world writable
232 preempt_entry = proc_create_data( 249 if (!proc_create_data(
233 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), 250 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops),
234 (void*)device_id); 251 (void*)device_id))
252 goto out_nomem;
235 // Create file `/proc/gpu#/disable_channel`, world writable 253 // Create file `/proc/gpu#/disable_channel`, world writable
236 disable_channel_entry = proc_create_data( 254 if (!proc_create_data(
237 "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), 255 "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops),
238 (void*)device_id); 256 (void*)device_id))
257 goto out_nomem;
239 // Create file `/proc/gpu#/enable_channel`, world writable 258 // Create file `/proc/gpu#/enable_channel`, world writable
240 enable_channel_entry = proc_create_data( 259 if (!proc_create_data(
241 "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), 260 "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops),
242 (void*)device_id); 261 (void*)device_id))
262 goto out_nomem;
243 // Create file `/proc/gpu#/switch_to_tsg`, world writable 263 // Create file `/proc/gpu#/switch_to_tsg`, world writable
244 switch_to_tsg_entry = proc_create_data( 264 if (!proc_create_data(
245 "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), 265 "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops),
246 (void*)device_id); 266 (void*)device_id))
267 goto out_nomem;
247 // Create file `/proc/gpu#/device_info`, world readable 268 // Create file `/proc/gpu#/device_info`, world readable
248 device_info_entry = proc_create_data( 269 if (!proc_create_data(
249 "device_info", 0444, dir, compat_ops(&device_info_file_ops), 270 "device_info", 0444, dir, compat_ops(&device_info_file_ops),
250 (void*)device_id); 271 (void*)device_id))
272 goto out_nomem;
251 // Create file `/proc/gpu#/num_gpcs`, world readable 273 // Create file `/proc/gpu#/num_gpcs`, world readable
252 num_gpcs_entry = proc_create_data( 274 if (!proc_create_data(
253 "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 275 "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
254 (void*)NV_PTOP_SCAL_NUM_GPCS); 276 (void*)NV_PTOP_SCAL_NUM_GPCS))
277 goto out_nomem;
255 // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable 278 // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable
256 num_gpcs_entry = proc_create_data( 279 if (!proc_create_data(
257 "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 280 "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
258 (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); 281 (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC))
259 // Create file `/proc/gpu#/num_ces`, world readable 282 goto out_nomem;
260 num_gpcs_entry = proc_create_data(
261 "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
262 (void*)NV_PTOP_SCAL_NUM_CES);
263 // Create file `/proc/gpu#/num_ces`, world readable 283 // Create file `/proc/gpu#/num_ces`, world readable
264 num_gpcs_entry = proc_create_data( 284 if (!proc_create_data(
265 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 285 "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
266 (void*)NV_FUSE_GPC); 286 (void*)NV_PTOP_SCAL_NUM_CES))
287 goto out_nomem;
288 // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable (Maxwell+)
289 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL)
290 if ((err = create_tpc_mask_files(device_id, dir)))
291 goto out_err;
292 // Create file `/proc/gpu#/gpc_mask`, world readable (Maxwell+)
293 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE) {
294 if (!proc_create_data(
295 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
296 (void*)NV_FUSE_GPC_GA100))
297 goto out_nomem;
298 } else if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) {
299 if (!proc_create_data(
300 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
301 (void*)NV_FUSE_GPC_GM107))
302 goto out_nomem;
303 }
267 // Create files exposing LCE and PCE configuration (Pascal+) 304 // Create files exposing LCE and PCE configuration (Pascal+)
268 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { 305 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) {
269 // Create file `/proc/gpu#/copy_topology`, world readable 306 // Create file `/proc/gpu#/copy_topology`, world readable
@@ -277,16 +314,13 @@ int __init nvdebug_init(void) {
277 (void*)NV_CE_PCE_MAP)) 314 (void*)NV_CE_PCE_MAP))
278 goto out_nomem; 315 goto out_nomem;
279 } 316 }
280 // ProcFS entry creation only fails if out of memory
281 if (rl_create_err || tpc_masks_create_err || !preempt_entry ||
282 !disable_channel_entry || !enable_channel_entry ||
283 !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry)
284 goto out_nomem;
285 } 317 }
286 // (See Makefile if you want to know the origin of GIT_HASH.) 318 // (See Makefile if you want to know the origin of GIT_HASH.)
287 printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); 319 printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n");
288 return 0; 320 return 0;
289out_nomem: 321out_nomem:
322 err = -ENOMEM;
323out_err:
290 // Make sure to clear all ProcFS directories on error 324 // Make sure to clear all ProcFS directories on error
291 while (res < g_nvdebug_devices) { 325 while (res < g_nvdebug_devices) {
292 char device_id_str[7]; 326 char device_id_str[7];
@@ -294,7 +328,7 @@ out_nomem:
294 remove_proc_subtree(device_id_str, NULL); 328 remove_proc_subtree(device_id_str, NULL);
295 res++; 329 res++;
296 } 330 }
297 return -ENOMEM; 331 return err;
298} 332}
299 333
300static void __exit nvdebug_exit(void) { 334static void __exit nvdebug_exit(void) {