diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-09 13:07:19 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-09 13:07:19 -0400 |
commit | 4768fe31f114c5ad788012db5518ce8e37f79c7a (patch) | |
tree | 03fe90108bf9341b8b9d299df3ba8a6245c509d0 /nvdebug_entry.c | |
parent | 31964208e4dc0243b6b31b9967c77a791aeb995c (diff) |
Correctly handle startup errors and fix gpc*_mask APIs
- Do not create gpc*_mask files on pre-Maxwell GPUs (tested
unavailable on the K5000s)
- Use correct register offsets for gpc*_mask files on Ampere+ GPUs
- Document GPC and TPC count and fuse registers.
- Correctly handle errors for creation of all ProcFS files
- Remove unecessary error-handling temp variables in nvdebug_entry
- Misc naming, comment, and layout cleanup
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 150 |
1 files changed, 92 insertions, 58 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 7593a3a..0cf5344 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -12,7 +12,8 @@ | |||
12 | #include "nvdebug.h" | 12 | #include "nvdebug.h" |
13 | #include "stubs.h" | 13 | #include "stubs.h" |
14 | 14 | ||
15 | // Enable to intercept and log GPU interrupts | 15 | // Enable to intercept and log GPU interrupts. Historically used to benchmark |
16 | // interrupt latency. | ||
16 | #define INTERRUPT_DEBUG 0 | 17 | #define INTERRUPT_DEBUG 0 |
17 | 18 | ||
18 | // MIT is GPL-compatible. We need to be GPL-compatible for symbols like | 19 | // MIT is GPL-compatible. We need to be GPL-compatible for symbols like |
@@ -31,14 +32,16 @@ extern struct file_operations copy_topology_file_ops; | |||
31 | extern struct file_operations nvdebug_read_reg32_file_ops; | 32 | extern struct file_operations nvdebug_read_reg32_file_ops; |
32 | extern struct file_operations nvdebug_read_reg_range_file_ops; | 33 | extern struct file_operations nvdebug_read_reg_range_file_ops; |
33 | 34 | ||
34 | // Bus types are global symbols in the kernel | ||
35 | extern struct bus_type platform_bus_type; | ||
36 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 35 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
37 | unsigned int g_nvdebug_devices = 0; | 36 | unsigned int g_nvdebug_devices = 0; |
37 | // Bus types are global symbols in the kernel | ||
38 | extern struct bus_type platform_bus_type; | ||
38 | 39 | ||
39 | // Starting in Kernel 5.6, proc_ops is required instead of file_operations | 40 | // Starting in Kernel 5.6, proc_ops is required instead of file_operations. |
41 | // As file_operations is larger than proc_ops, we can overwrite the memory | ||
42 | // backing the file_operations struct to follow the proc_ops layout, and then | ||
43 | // cast on newer kernels. | ||
40 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | 44 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) |
41 | // This rewrites the struct to the proc_ops layout on newer kernels | ||
42 | const struct proc_ops* compat_ops(const struct file_operations* ops) { | 45 | const struct proc_ops* compat_ops(const struct file_operations* ops) { |
43 | struct proc_ops new_ops = {}; | 46 | struct proc_ops new_ops = {}; |
44 | new_ops.proc_open = ops->open; | 47 | new_ops.proc_open = ops->open; |
@@ -64,7 +67,7 @@ irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { | |||
64 | 67 | ||
65 | // Find any and all NVIDIA GPUs in the system | 68 | // Find any and all NVIDIA GPUs in the system |
66 | // Note: This function fails if any of them are in a bad state | 69 | // Note: This function fails if any of them are in a bad state |
67 | int probe_and_cache_device(void) { | 70 | int probe_and_cache_devices(void) { |
68 | // platform bus (SoC) iterators | 71 | // platform bus (SoC) iterators |
69 | struct device *dev = NULL; | 72 | struct device *dev = NULL; |
70 | struct device *temp_dev; | 73 | struct device *temp_dev; |
@@ -143,13 +146,14 @@ int probe_and_cache_device(void) { | |||
143 | #endif // INTERRUPT_DEBUG | 146 | #endif // INTERRUPT_DEBUG |
144 | i++; | 147 | i++; |
145 | } | 148 | } |
146 | // Return the number of devices we found | 149 | // Return the number of devices found |
147 | if (i > 0) | 150 | if (i > 0) |
148 | return i; | 151 | return i; |
149 | return -ENODEV; | 152 | return -ENODEV; |
150 | } | 153 | } |
151 | 154 | ||
152 | // Create files `/proc/gpu#/runlist#`, world readable | 155 | // Create files `/proc/gpu#/runlist#`, world readable |
156 | // Support: Fermi, Maxwell, Pascal, Volta, Turing | ||
153 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | 157 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { |
154 | ptop_device_info_gk104_t info; | 158 | ptop_device_info_gk104_t info; |
155 | struct proc_dir_entry *rl_entry; | 159 | struct proc_dir_entry *rl_entry; |
@@ -179,16 +183,24 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | |||
179 | return 0; | 183 | return 0; |
180 | } | 184 | } |
181 | 185 | ||
182 | // Create files /proc/gpu# | 186 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
183 | // TODO: Don't run this on unsupported GPUs | 187 | // Support: Maxwell+ |
184 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | 188 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { |
189 | struct nvdebug_state* g = &g_nvdebug_state[device_id]; | ||
185 | char file_name[20]; | 190 | char file_name[20]; |
186 | int i; | 191 | int i; |
187 | struct proc_dir_entry *gpc_tpc_mask_entry; | 192 | struct proc_dir_entry *gpc_tpc_mask_entry; |
188 | // Get a bitmask of which GPCs are disabled | ||
189 | uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC); | ||
190 | // Get maximum number of enabled GPCs for this chip | 193 | // Get maximum number of enabled GPCs for this chip |
191 | uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); | 194 | uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); |
195 | // Get a bitmask of which GPCs are disabled | ||
196 | uint32_t gpcs_mask; | ||
197 | if (g->chip_id < NV_CHIP_ID_AMPERE) | ||
198 | gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GM107); | ||
199 | else | ||
200 | gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GA100); | ||
201 | // Verify the reads succeeded | ||
202 | if (max_gpcs == -1 || gpcs_mask == -1) | ||
203 | return -EIO; | ||
192 | // For each enabled GPC, expose a mask of disabled TPCs | 204 | // For each enabled GPC, expose a mask of disabled TPCs |
193 | for (i = 0; i < max_gpcs; i++) { | 205 | for (i = 0; i < max_gpcs; i++) { |
194 | // Do nothing if GPC is disabled | 206 | // Do nothing if GPC is disabled |
@@ -196,9 +208,14 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | |||
196 | continue; | 208 | continue; |
197 | // If GPC is enabled, create an entry to read disabled TPCs mask | 209 | // If GPC is enabled, create an entry to read disabled TPCs mask |
198 | snprintf(file_name, 20, "gpc%d_tpc_mask", i); | 210 | snprintf(file_name, 20, "gpc%d_tpc_mask", i); |
199 | gpc_tpc_mask_entry = proc_create_data( | 211 | if (g->chip_id < NV_CHIP_ID_AMPERE) |
200 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 212 | gpc_tpc_mask_entry = proc_create_data( |
201 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); | 213 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
214 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GM107(i)); | ||
215 | else | ||
216 | gpc_tpc_mask_entry = proc_create_data( | ||
217 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
218 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GA100(i)); | ||
202 | if (!gpc_tpc_mask_entry) | 219 | if (!gpc_tpc_mask_entry) |
203 | return -ENOMEM; | 220 | return -ENOMEM; |
204 | } | 221 | } |
@@ -206,64 +223,84 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | |||
206 | } | 223 | } |
207 | 224 | ||
208 | int __init nvdebug_init(void) { | 225 | int __init nvdebug_init(void) { |
209 | struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, | 226 | struct proc_dir_entry *dir; |
210 | *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, | 227 | int err, res; |
211 | *num_gpcs_entry; | ||
212 | int rl_create_err, tpc_masks_create_err; | ||
213 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state | 228 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state |
214 | int res = probe_and_cache_device(); | 229 | if ((res = probe_and_cache_devices()) < 0) |
215 | if (res < 0) | ||
216 | return res; | 230 | return res; |
217 | g_nvdebug_devices = res; | 231 | g_nvdebug_devices = res; |
218 | // Create seperate ProcFS directories for each gpu | 232 | // Create seperate ProcFS directories for each gpu |
219 | while (res--) { | 233 | while (res--) { |
220 | char device_id_str[7]; | 234 | char device_id_str[7]; |
221 | uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id | 235 | // Create a wider copy of the GPU ID to allow us to abuse the *data |
236 | // field of proc_dir_entry to store the GPU ID. | ||
237 | uintptr_t device_id = res; | ||
222 | // Create directory /proc/gpu# where # is the GPU number | 238 | // Create directory /proc/gpu# where # is the GPU number |
239 | // As ProcFS entry creation only fails if out of memory, we auto-skip | ||
240 | // to handling that on any error in creating ProcFS files. | ||
223 | snprintf(device_id_str, 7, "gpu%ld", device_id); | 241 | snprintf(device_id_str, 7, "gpu%ld", device_id); |
224 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) | 242 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
225 | goto out_nomem; | 243 | goto out_nomem; |
226 | // Create files `/proc/gpu#/runlist#`, world readable | 244 | // Create files `/proc/gpu#/runlist#`, world readable |
227 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) | 245 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) |
228 | create_runlist_files(device_id, dir); | 246 | if ((err = create_runlist_files(device_id, dir))) |
229 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable | 247 | goto out_err; |
230 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); | ||
231 | // Create file `/proc/gpu#/preempt_tsg`, world writable | 248 | // Create file `/proc/gpu#/preempt_tsg`, world writable |
232 | preempt_entry = proc_create_data( | 249 | if (!proc_create_data( |
233 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | 250 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), |
234 | (void*)device_id); | 251 | (void*)device_id)) |
252 | goto out_nomem; | ||
235 | // Create file `/proc/gpu#/disable_channel`, world writable | 253 | // Create file `/proc/gpu#/disable_channel`, world writable |
236 | disable_channel_entry = proc_create_data( | 254 | if (!proc_create_data( |
237 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), | 255 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), |
238 | (void*)device_id); | 256 | (void*)device_id)) |
257 | goto out_nomem; | ||
239 | // Create file `/proc/gpu#/enable_channel`, world writable | 258 | // Create file `/proc/gpu#/enable_channel`, world writable |
240 | enable_channel_entry = proc_create_data( | 259 | if (!proc_create_data( |
241 | "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), | 260 | "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), |
242 | (void*)device_id); | 261 | (void*)device_id)) |
262 | goto out_nomem; | ||
243 | // Create file `/proc/gpu#/switch_to_tsg`, world writable | 263 | // Create file `/proc/gpu#/switch_to_tsg`, world writable |
244 | switch_to_tsg_entry = proc_create_data( | 264 | if (!proc_create_data( |
245 | "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), | 265 | "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), |
246 | (void*)device_id); | 266 | (void*)device_id)) |
267 | goto out_nomem; | ||
247 | // Create file `/proc/gpu#/device_info`, world readable | 268 | // Create file `/proc/gpu#/device_info`, world readable |
248 | device_info_entry = proc_create_data( | 269 | if (!proc_create_data( |
249 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), | 270 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), |
250 | (void*)device_id); | 271 | (void*)device_id)) |
272 | goto out_nomem; | ||
251 | // Create file `/proc/gpu#/num_gpcs`, world readable | 273 | // Create file `/proc/gpu#/num_gpcs`, world readable |
252 | num_gpcs_entry = proc_create_data( | 274 | if (!proc_create_data( |
253 | "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 275 | "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
254 | (void*)NV_PTOP_SCAL_NUM_GPCS); | 276 | (void*)NV_PTOP_SCAL_NUM_GPCS)) |
277 | goto out_nomem; | ||
255 | // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable | 278 | // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable |
256 | num_gpcs_entry = proc_create_data( | 279 | if (!proc_create_data( |
257 | "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 280 | "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
258 | (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); | 281 | (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC)) |
259 | // Create file `/proc/gpu#/num_ces`, world readable | 282 | goto out_nomem; |
260 | num_gpcs_entry = proc_create_data( | ||
261 | "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
262 | (void*)NV_PTOP_SCAL_NUM_CES); | ||
263 | // Create file `/proc/gpu#/num_ces`, world readable | 283 | // Create file `/proc/gpu#/num_ces`, world readable |
264 | num_gpcs_entry = proc_create_data( | 284 | if (!proc_create_data( |
265 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 285 | "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
266 | (void*)NV_FUSE_GPC); | 286 | (void*)NV_PTOP_SCAL_NUM_CES)) |
287 | goto out_nomem; | ||
288 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable (Maxwell+) | ||
289 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) | ||
290 | if ((err = create_tpc_mask_files(device_id, dir))) | ||
291 | goto out_err; | ||
292 | // Create file `/proc/gpu#/gpc_mask`, world readable (Maxwell+) | ||
293 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE) { | ||
294 | if (!proc_create_data( | ||
295 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
296 | (void*)NV_FUSE_GPC_GA100)) | ||
297 | goto out_nomem; | ||
298 | } else if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) { | ||
299 | if (!proc_create_data( | ||
300 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
301 | (void*)NV_FUSE_GPC_GM107)) | ||
302 | goto out_nomem; | ||
303 | } | ||
267 | // Create files exposing LCE and PCE configuration (Pascal+) | 304 | // Create files exposing LCE and PCE configuration (Pascal+) |
268 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { | 305 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { |
269 | // Create file `/proc/gpu#/copy_topology`, world readable | 306 | // Create file `/proc/gpu#/copy_topology`, world readable |
@@ -277,16 +314,13 @@ int __init nvdebug_init(void) { | |||
277 | (void*)NV_CE_PCE_MAP)) | 314 | (void*)NV_CE_PCE_MAP)) |
278 | goto out_nomem; | 315 | goto out_nomem; |
279 | } | 316 | } |
280 | // ProcFS entry creation only fails if out of memory | ||
281 | if (rl_create_err || tpc_masks_create_err || !preempt_entry || | ||
282 | !disable_channel_entry || !enable_channel_entry || | ||
283 | !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry) | ||
284 | goto out_nomem; | ||
285 | } | 317 | } |
286 | // (See Makefile if you want to know the origin of GIT_HASH.) | 318 | // (See Makefile if you want to know the origin of GIT_HASH.) |
287 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); | 319 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); |
288 | return 0; | 320 | return 0; |
289 | out_nomem: | 321 | out_nomem: |
322 | err = -ENOMEM; | ||
323 | out_err: | ||
290 | // Make sure to clear all ProcFS directories on error | 324 | // Make sure to clear all ProcFS directories on error |
291 | while (res < g_nvdebug_devices) { | 325 | while (res < g_nvdebug_devices) { |
292 | char device_id_str[7]; | 326 | char device_id_str[7]; |
@@ -294,7 +328,7 @@ out_nomem: | |||
294 | remove_proc_subtree(device_id_str, NULL); | 328 | remove_proc_subtree(device_id_str, NULL); |
295 | res++; | 329 | res++; |
296 | } | 330 | } |
297 | return -ENOMEM; | 331 | return err; |
298 | } | 332 | } |
299 | 333 | ||
300 | static void __exit nvdebug_exit(void) { | 334 | static void __exit nvdebug_exit(void) { |