diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 12:50:02 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 13:59:56 -0400 |
commit | 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch) | |
tree | d63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /nvdebug_entry.c | |
parent | ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff) |
Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist
**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to
`echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on
runlist 0 on GPU X for pre-Ampere GPUs (for example).**
Feature changes:
- switch_to_tsg only makes sense on a per-runlist level. Before, this
always operated on runlist0; this commit allows operating on any
runlist by moving the API to the per-runlist paths.
- On Ampere+, channel and TSG IDs are per-runlist, and no longer
GPU-global. Consequently, the disable/enable_channel and
preempt_tsg APIs have been moved from GPU-global to per-runlist
paths on Ampere+.
Bug fixes:
- `preempt_runlist()` is now supported on Maxwell and Pascal.
- `resubmit_runlist()` detects too-old GPUs.
- MAX_CHID corrected from 512 to 511 and documented.
- switch_to_tsg now includes a runlist resubmit, which appears to be
necessary on Turing+ GPUs.
Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB),
GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100
(Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 53 |
1 files changed, 33 insertions, 20 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 5f99976..d5df7db 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -289,17 +289,45 @@ int __init nvdebug_init(void) { | |||
289 | snprintf(runlist_name, 12, "runlist%lu", last_runlist); | 289 | snprintf(runlist_name, 12, "runlist%lu", last_runlist); |
290 | if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id))) | 290 | if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id))) |
291 | goto out_nomem; | 291 | goto out_nomem; |
292 | // Create one file for each runlist on Ampere+, or one file for each GPU on older | ||
293 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE || last_runlist == 0) { | ||
294 | struct proc_dir_entry *chram_scope; | ||
295 | // preempt_tsg, enable_channel, and disable_channel refer to a GPU-global channel | ||
296 | // RAM on pre-Ampere GPUs | ||
297 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE) | ||
298 | chram_scope = rl_dir; | ||
299 | else | ||
300 | chram_scope = dir; | ||
301 | // Create file `/proc/gpu#/runlist#/preempt_tsg`, world writable | ||
302 | // On Turing and older, `/proc/gpu#/preempt_tsg` | ||
303 | if (!proc_create_data( | ||
304 | "preempt_tsg", 0222, chram_scope, compat_ops(&preempt_tsg_file_ops), | ||
305 | (void*)last_runlist)) | ||
306 | goto out_nomem; | ||
307 | // Create file `/proc/gpu#/runlist#/disable_channel`, world writable | ||
308 | // On Turing and older, `/proc/gpu#/disable_channel` | ||
309 | if (!proc_create_data( | ||
310 | "disable_channel", 0222, chram_scope, compat_ops(&disable_channel_file_ops), | ||
311 | (void*)last_runlist)) | ||
312 | goto out_nomem; | ||
313 | // Create file `/proc/gpu#/runlist#/enable_channel`, world writable | ||
314 | // On Turing and older, `/proc/gpu#/enable_channel` | ||
315 | if (!proc_create_data( | ||
316 | "enable_channel", 0222, chram_scope, compat_ops(&enable_channel_file_ops), | ||
317 | (void*)last_runlist)) | ||
318 | goto out_nomem; | ||
319 | } | ||
292 | // Create file `/proc/gpu#/runlist#/runlist`, world readable | 320 | // Create file `/proc/gpu#/runlist#/runlist`, world readable |
293 | if (!proc_create_data( | 321 | if (!proc_create_data( |
294 | "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops), | 322 | "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops), |
295 | (void*)last_runlist)) | 323 | (void*)last_runlist)) |
296 | goto out_nomem; | 324 | goto out_nomem; |
325 | // Create file `/proc/gpu#/runlist#/switch_to_tsg`, world writable | ||
326 | if (!proc_create_data( | ||
327 | "switch_to_tsg", 0222, rl_dir, compat_ops(&switch_to_tsg_file_ops), | ||
328 | (void*)last_runlist)) | ||
329 | goto out_nomem; | ||
297 | } while (last_runlist-- > 0); | 330 | } while (last_runlist-- > 0); |
298 | // Create file `/proc/gpu#/preempt_tsg`, world writable | ||
299 | if (!proc_create_data( | ||
300 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | ||
301 | (void*)device_id)) | ||
302 | goto out_nomem; | ||
303 | /* On the TU104, the context scheduler (contained in the Host, aka | 331 | /* On the TU104, the context scheduler (contained in the Host, aka |
304 | * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs | 332 | * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs |
305 | * containing re-enabled channels. Resubmitting the runlist | 333 | * containing re-enabled channels. Resubmitting the runlist |
@@ -311,21 +339,6 @@ int __init nvdebug_init(void) { | |||
311 | "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops), | 339 | "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops), |
312 | (void*)device_id)) | 340 | (void*)device_id)) |
313 | goto out_nomem; | 341 | goto out_nomem; |
314 | // Create file `/proc/gpu#/disable_channel`, world writable | ||
315 | if (!proc_create_data( | ||
316 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), | ||
317 | (void*)device_id)) | ||
318 | goto out_nomem; | ||
319 | // Create file `/proc/gpu#/enable_channel`, world writable | ||
320 | if (!proc_create_data( | ||
321 | "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), | ||
322 | (void*)device_id)) | ||
323 | goto out_nomem; | ||
324 | // Create file `/proc/gpu#/switch_to_tsg`, world writable | ||
325 | if (!proc_create_data( | ||
326 | "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), | ||
327 | (void*)device_id)) | ||
328 | goto out_nomem; | ||
329 | // Create file `/proc/gpu#/device_info`, world readable | 342 | // Create file `/proc/gpu#/device_info`, world readable |
330 | if (!proc_create_data( | 343 | if (!proc_create_data( |
331 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), | 344 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), |