diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-21 20:30:08 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-21 20:30:08 -0400 |
| commit | 091c242c9ef7cbd8d88d3beae936b14f5b907286 (patch) | |
| tree | 85f76f37209abc3888045ff2db8576ad3c6370d7 | |
| parent | 684c20c0afbfc2c2075a00881fbb3f9d3e68e023 (diff) | |
Add /proc/gpu#/resubmit_runlist API
Resubmits the runlist in an identical configuration. Causes the
runlist scheduler to:
1. Reload and cache timeslice and scale values from TSGs.
2. Restart scheduling from the head of the runlist [may cause a
preempt to be scheduled for the currently-running task (?)].
3. Address (?) an errata on Turing where re-enabled channels are
not always detected.
Above behavior tested on GV100 and partially tested on TU102.
| -rw-r--r-- | nvdebug.h | 20 | ||||
| -rw-r--r-- | nvdebug_entry.c | 12 | ||||
| -rw-r--r-- | runlist.c | 19 | ||||
| -rw-r--r-- | runlist_procfs.c | 27 |
4 files changed, 73 insertions, 5 deletions
| @@ -274,7 +274,7 @@ typedef union { | |||
| 274 | 274 | ||
| 275 | We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where | 275 | We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where |
| 276 | i is a runlist index). Runlists are configured via the seperate, writable | 276 | i is a runlist index). Runlists are configured via the seperate, writable |
| 277 | *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that. | 277 | *_PFIFO_RUNLIST_* register; see open-gpu-doc for more. |
| 278 | 278 | ||
| 279 | LEN : Number of entries in runlist | 279 | LEN : Number of entries in runlist |
| 280 | IS_PENDING : Is runlist committed? | 280 | IS_PENDING : Is runlist committed? |
| @@ -284,18 +284,25 @@ typedef union { | |||
| 284 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta | 284 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta |
| 285 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? | 285 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? |
| 286 | */ | 286 | */ |
| 287 | #define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only | ||
| 287 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only | 288 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only |
| 288 | typedef union { | 289 | typedef union { |
| 289 | struct { | 290 | struct { |
| 290 | // NV_PFIFO_ENG_RUNLIST_BASE_* fields | 291 | // NV_PFIFO_ENG_RUNLIST_BASE_* fields |
| 291 | uint32_t ptr:28; | 292 | uint32_t ptr:28; |
| 292 | enum INST_TARGET target:2; | 293 | enum INST_TARGET target:2; |
| 293 | uint32_t padding1:2; | 294 | uint32_t :2; |
| 294 | // NV_PFIFO_ENG_RUNLIST_* fields | 295 | // NV_PFIFO_ENG_RUNLIST_* fields |
| 295 | uint16_t len:16; | 296 | uint16_t len:16; |
| 296 | uint32_t padding2:4; | 297 | uint32_t :4; |
| 297 | bool is_pending:1; | 298 | bool is_pending:1; // Read-only from NV_PFIFO_ENG_RUNLIST... |
| 298 | uint32_t padding3:11; | 299 | uint32_t :11; |
| 300 | } __attribute__((packed)); | ||
| 301 | struct { | ||
| 302 | // NV_PFIFO_RUNLIST_* fields that differ from NV_PFIFO_ENG_RUNLIST_* | ||
| 303 | uint64_t :52; | ||
| 304 | uint32_t id:4; // Write-only to NV_PFIFO_RUNLIST... | ||
| 305 | uint32_t :8; | ||
| 299 | } __attribute__((packed)); | 306 | } __attribute__((packed)); |
| 300 | uint64_t raw; | 307 | uint64_t raw; |
| 301 | } eng_runlist_gf100_t; | 308 | } eng_runlist_gf100_t; |
| @@ -311,6 +318,9 @@ typedef union { | |||
| 311 | // Support: Turing | 318 | // Support: Turing |
| 312 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write | 319 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write |
| 313 | #define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write | 320 | #define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write |
| 321 | // Derived absolute maximum number of runlists | ||
| 322 | #define MAX_RUNLISTS_TU102 80 // On Turing; another register is at 0x00003000 | ||
| 323 | #define MAX_RUNLISTS_GF100 34 // On Volta-; another register is at 0x00002390 | ||
| 314 | typedef union { | 324 | typedef union { |
| 315 | struct { | 325 | struct { |
| 316 | enum INST_TARGET target:2; | 326 | enum INST_TARGET target:2; |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 24fcd32..eee7351 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -25,6 +25,7 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | |||
| 25 | // runlist_procfs.c | 25 | // runlist_procfs.c |
| 26 | extern struct file_operations runlist_file_ops; | 26 | extern struct file_operations runlist_file_ops; |
| 27 | extern struct file_operations preempt_tsg_file_ops; | 27 | extern struct file_operations preempt_tsg_file_ops; |
| 28 | extern struct file_operations resubmit_runlist_file_ops; | ||
| 28 | extern struct file_operations disable_channel_file_ops; | 29 | extern struct file_operations disable_channel_file_ops; |
| 29 | extern struct file_operations enable_channel_file_ops; | 30 | extern struct file_operations enable_channel_file_ops; |
| 30 | extern struct file_operations switch_to_tsg_file_ops; | 31 | extern struct file_operations switch_to_tsg_file_ops; |
| @@ -256,6 +257,17 @@ int __init nvdebug_init(void) { | |||
| 256 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | 257 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), |
| 257 | (void*)device_id)) | 258 | (void*)device_id)) |
| 258 | goto out_nomem; | 259 | goto out_nomem; |
| 260 | /* On the TU104, the context scheduler (contained in the Host, aka | ||
| 261 | * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs | ||
| 262 | * containing re-enabled channels. Resubmitting the runlist | ||
| 263 | * configuration appears to remediate this condition, and so this API | ||
| 264 | * is exposed to help reset GPU scheduling as necessary. | ||
| 265 | */ | ||
| 266 | // Create file `/proc/gpu#/resubmit_runlist`, world writable | ||
| 267 | if (!proc_create_data( | ||
| 268 | "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops), | ||
| 269 | (void*)device_id)) | ||
| 270 | goto out_nomem; | ||
| 259 | // Create file `/proc/gpu#/disable_channel`, world writable | 271 | // Create file `/proc/gpu#/disable_channel`, world writable |
| 260 | if (!proc_create_data( | 272 | if (!proc_create_data( |
| 261 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), | 273 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), |
| @@ -137,3 +137,22 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { | |||
| 137 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | 137 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); |
| 138 | return 0; | 138 | return 0; |
| 139 | } | 139 | } |
| 140 | |||
| 141 | // Read and write runlist configuration, triggering a resubmit | ||
| 142 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { | ||
| 143 | if (g->chip_id < NV_CHIP_ID_TURING) { | ||
| 144 | eng_runlist_gf100_t rl; | ||
| 145 | if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) | ||
| 146 | return -EIO; | ||
| 147 | rl.id = rl_id; | ||
| 148 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw); | ||
| 149 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { | ||
| 150 | runlist_submit_tu102_t submit; | ||
| 151 | if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) | ||
| 152 | return -EIO; | ||
| 153 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); | ||
| 154 | } else { | ||
| 155 | return -EOPNOTSUPP; | ||
| 156 | } | ||
| 157 | return 0; | ||
| 158 | } | ||
diff --git a/runlist_procfs.c b/runlist_procfs.c index 0087d90..f569c77 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
| @@ -207,6 +207,33 @@ struct file_operations preempt_tsg_file_ops = { | |||
| 207 | .llseek = default_llseek, | 207 | .llseek = default_llseek, |
| 208 | }; | 208 | }; |
| 209 | 209 | ||
| 210 | |||
| 211 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | ||
| 212 | size_t count, loff_t *off) { | ||
| 213 | uint32_t target_runlist; | ||
| 214 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
| 215 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | ||
| 216 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
| 217 | if (err) | ||
| 218 | return err; | ||
| 219 | |||
| 220 | // Verify valid runlist (in terms of absolute maximums) | ||
| 221 | if (g->chip_id < NV_CHIP_ID_TURING && target_runlist > MAX_RUNLISTS_GF100) | ||
| 222 | return -ERANGE; | ||
| 223 | else if (g->chip_id < NV_CHIP_ID_AMPERE && target_runlist > MAX_RUNLISTS_TU102) | ||
| 224 | return -ERANGE; | ||
| 225 | |||
| 226 | if ((err = resubmit_runlist(g, target_runlist))) | ||
| 227 | return err; | ||
| 228 | |||
| 229 | return count; | ||
| 230 | } | ||
| 231 | |||
| 232 | struct file_operations resubmit_runlist_file_ops = { | ||
| 233 | .write = resubmit_runlist_file_write, | ||
| 234 | .llseek = default_llseek, | ||
| 235 | }; | ||
| 236 | |||
| 210 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | 237 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, |
| 211 | size_t count, loff_t *off) { | 238 | size_t count, loff_t *off) { |
| 212 | uint32_t target_channel; | 239 | uint32_t target_channel; |
