diff options
-rw-r--r-- | nvdebug.h | 20 | ||||
-rw-r--r-- | nvdebug_entry.c | 12 | ||||
-rw-r--r-- | runlist.c | 19 | ||||
-rw-r--r-- | runlist_procfs.c | 27 |
4 files changed, 73 insertions, 5 deletions
@@ -274,7 +274,7 @@ typedef union { | |||
274 | 274 | ||
275 | We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where | 275 | We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where |
276 | i is a runlist index). Runlists are configured via the seperate, writable | 276 | i is a runlist index). Runlists are configured via the seperate, writable |
277 | *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that. | 277 | *_PFIFO_RUNLIST_* register; see open-gpu-doc for more. |
278 | 278 | ||
279 | LEN : Number of entries in runlist | 279 | LEN : Number of entries in runlist |
280 | IS_PENDING : Is runlist committed? | 280 | IS_PENDING : Is runlist committed? |
@@ -284,18 +284,25 @@ typedef union { | |||
284 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta | 284 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta |
285 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? | 285 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? |
286 | */ | 286 | */ |
287 | #define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only | ||
287 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only | 288 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only |
288 | typedef union { | 289 | typedef union { |
289 | struct { | 290 | struct { |
290 | // NV_PFIFO_ENG_RUNLIST_BASE_* fields | 291 | // NV_PFIFO_ENG_RUNLIST_BASE_* fields |
291 | uint32_t ptr:28; | 292 | uint32_t ptr:28; |
292 | enum INST_TARGET target:2; | 293 | enum INST_TARGET target:2; |
293 | uint32_t padding1:2; | 294 | uint32_t :2; |
294 | // NV_PFIFO_ENG_RUNLIST_* fields | 295 | // NV_PFIFO_ENG_RUNLIST_* fields |
295 | uint16_t len:16; | 296 | uint16_t len:16; |
296 | uint32_t padding2:4; | 297 | uint32_t :4; |
297 | bool is_pending:1; | 298 | bool is_pending:1; // Read-only from NV_PFIFO_ENG_RUNLIST... |
298 | uint32_t padding3:11; | 299 | uint32_t :11; |
300 | } __attribute__((packed)); | ||
301 | struct { | ||
302 | // NV_PFIFO_RUNLIST_* fields that differ from NV_PFIFO_ENG_RUNLIST_* | ||
303 | uint64_t :52; | ||
304 | uint32_t id:4; // Write-only to NV_PFIFO_RUNLIST... | ||
305 | uint32_t :8; | ||
299 | } __attribute__((packed)); | 306 | } __attribute__((packed)); |
300 | uint64_t raw; | 307 | uint64_t raw; |
301 | } eng_runlist_gf100_t; | 308 | } eng_runlist_gf100_t; |
@@ -311,6 +318,9 @@ typedef union { | |||
311 | // Support: Turing | 318 | // Support: Turing |
312 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write | 319 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write |
313 | #define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write | 320 | #define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write |
321 | // Derived absolute maximum number of runlists | ||
322 | #define MAX_RUNLISTS_TU102 80 // On Turing; another register is at 0x00003000 | ||
323 | #define MAX_RUNLISTS_GF100 34 // On Volta-; another register is at 0x00002390 | ||
314 | typedef union { | 324 | typedef union { |
315 | struct { | 325 | struct { |
316 | enum INST_TARGET target:2; | 326 | enum INST_TARGET target:2; |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 24fcd32..eee7351 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -25,6 +25,7 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | |||
25 | // runlist_procfs.c | 25 | // runlist_procfs.c |
26 | extern struct file_operations runlist_file_ops; | 26 | extern struct file_operations runlist_file_ops; |
27 | extern struct file_operations preempt_tsg_file_ops; | 27 | extern struct file_operations preempt_tsg_file_ops; |
28 | extern struct file_operations resubmit_runlist_file_ops; | ||
28 | extern struct file_operations disable_channel_file_ops; | 29 | extern struct file_operations disable_channel_file_ops; |
29 | extern struct file_operations enable_channel_file_ops; | 30 | extern struct file_operations enable_channel_file_ops; |
30 | extern struct file_operations switch_to_tsg_file_ops; | 31 | extern struct file_operations switch_to_tsg_file_ops; |
@@ -256,6 +257,17 @@ int __init nvdebug_init(void) { | |||
256 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | 257 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), |
257 | (void*)device_id)) | 258 | (void*)device_id)) |
258 | goto out_nomem; | 259 | goto out_nomem; |
260 | /* On the TU104, the context scheduler (contained in the Host, aka | ||
261 | * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs | ||
262 | * containing re-enabled channels. Resubmitting the runlist | ||
263 | * configuration appears to remediate this condition, and so this API | ||
264 | * is exposed to help reset GPU scheduling as necessary. | ||
265 | */ | ||
266 | // Create file `/proc/gpu#/resubmit_runlist`, world writable | ||
267 | if (!proc_create_data( | ||
268 | "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops), | ||
269 | (void*)device_id)) | ||
270 | goto out_nomem; | ||
259 | // Create file `/proc/gpu#/disable_channel`, world writable | 271 | // Create file `/proc/gpu#/disable_channel`, world writable |
260 | if (!proc_create_data( | 272 | if (!proc_create_data( |
261 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), | 273 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), |
@@ -137,3 +137,22 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { | |||
137 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | 137 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); |
138 | return 0; | 138 | return 0; |
139 | } | 139 | } |
140 | |||
141 | // Read and write runlist configuration, triggering a resubmit | ||
142 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { | ||
143 | if (g->chip_id < NV_CHIP_ID_TURING) { | ||
144 | eng_runlist_gf100_t rl; | ||
145 | if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) | ||
146 | return -EIO; | ||
147 | rl.id = rl_id; | ||
148 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw); | ||
149 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { | ||
150 | runlist_submit_tu102_t submit; | ||
151 | if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) | ||
152 | return -EIO; | ||
153 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); | ||
154 | } else { | ||
155 | return -EOPNOTSUPP; | ||
156 | } | ||
157 | return 0; | ||
158 | } | ||
diff --git a/runlist_procfs.c b/runlist_procfs.c index 0087d90..f569c77 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -207,6 +207,33 @@ struct file_operations preempt_tsg_file_ops = { | |||
207 | .llseek = default_llseek, | 207 | .llseek = default_llseek, |
208 | }; | 208 | }; |
209 | 209 | ||
210 | |||
211 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | ||
212 | size_t count, loff_t *off) { | ||
213 | uint32_t target_runlist; | ||
214 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
215 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | ||
216 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
217 | if (err) | ||
218 | return err; | ||
219 | |||
220 | // Verify valid runlist (in terms of absolute maximums) | ||
221 | if (g->chip_id < NV_CHIP_ID_TURING && target_runlist > MAX_RUNLISTS_GF100) | ||
222 | return -ERANGE; | ||
223 | else if (g->chip_id < NV_CHIP_ID_AMPERE && target_runlist > MAX_RUNLISTS_TU102) | ||
224 | return -ERANGE; | ||
225 | |||
226 | if ((err = resubmit_runlist(g, target_runlist))) | ||
227 | return err; | ||
228 | |||
229 | return count; | ||
230 | } | ||
231 | |||
232 | struct file_operations resubmit_runlist_file_ops = { | ||
233 | .write = resubmit_runlist_file_write, | ||
234 | .llseek = default_llseek, | ||
235 | }; | ||
236 | |||
210 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | 237 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, |
211 | size_t count, loff_t *off) { | 238 | size_t count, loff_t *off) { |
212 | uint32_t target_channel; | 239 | uint32_t target_channel; |