aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--nvdebug.h20
-rw-r--r--nvdebug_entry.c12
-rw-r--r--runlist.c19
-rw-r--r--runlist_procfs.c27
4 files changed, 73 insertions, 5 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 6a2383e..7564f8c 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -274,7 +274,7 @@ typedef union {
274 274
275 We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where 275 We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where
276 i is a runlist index). Runlists are configured via the seperate, writable 276 i is a runlist index). Runlists are configured via the seperate, writable
277 *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that. 277 *_PFIFO_RUNLIST_* register; see open-gpu-doc for more.
278 278
279 LEN : Number of entries in runlist 279 LEN : Number of entries in runlist
280 IS_PENDING : Is runlist committed? 280 IS_PENDING : Is runlist committed?
@@ -284,18 +284,25 @@ typedef union {
284 Support: Fermi*, Kepler, Maxwell, Pascal, Volta 284 Support: Fermi*, Kepler, Maxwell, Pascal, Volta
285 *Fermi may expose this information 8 bytes earlier, starting at 0x227C? 285 *Fermi may expose this information 8 bytes earlier, starting at 0x227C?
286*/ 286*/
287#define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only
287#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only 288#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only
288typedef union { 289typedef union {
289 struct { 290 struct {
290 // NV_PFIFO_ENG_RUNLIST_BASE_* fields 291 // NV_PFIFO_ENG_RUNLIST_BASE_* fields
291 uint32_t ptr:28; 292 uint32_t ptr:28;
292 enum INST_TARGET target:2; 293 enum INST_TARGET target:2;
293 uint32_t padding1:2; 294 uint32_t :2;
294 // NV_PFIFO_ENG_RUNLIST_* fields 295 // NV_PFIFO_ENG_RUNLIST_* fields
295 uint16_t len:16; 296 uint16_t len:16;
296 uint32_t padding2:4; 297 uint32_t :4;
297 bool is_pending:1; 298 bool is_pending:1; // Read-only from NV_PFIFO_ENG_RUNLIST...
298 uint32_t padding3:11; 299 uint32_t :11;
300 } __attribute__((packed));
301 struct {
302 // NV_PFIFO_RUNLIST_* fields that differ from NV_PFIFO_ENG_RUNLIST_*
303 uint64_t :52;
304 uint32_t id:4; // Write-only to NV_PFIFO_RUNLIST...
305 uint32_t :8;
299 } __attribute__((packed)); 306 } __attribute__((packed));
300 uint64_t raw; 307 uint64_t raw;
301} eng_runlist_gf100_t; 308} eng_runlist_gf100_t;
@@ -311,6 +318,9 @@ typedef union {
311// Support: Turing 318// Support: Turing
312#define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write 319#define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write
313#define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write 320#define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write
321// Derived absolute maximum number of runlists
322#define MAX_RUNLISTS_TU102 80 // On Turing; another register is at 0x00003000
323#define MAX_RUNLISTS_GF100 34 // On Volta-; another register is at 0x00002390
314typedef union { 324typedef union {
315 struct { 325 struct {
316 enum INST_TARGET target:2; 326 enum INST_TARGET target:2;
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 24fcd32..eee7351 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -25,6 +25,7 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
25// runlist_procfs.c 25// runlist_procfs.c
26extern struct file_operations runlist_file_ops; 26extern struct file_operations runlist_file_ops;
27extern struct file_operations preempt_tsg_file_ops; 27extern struct file_operations preempt_tsg_file_ops;
28extern struct file_operations resubmit_runlist_file_ops;
28extern struct file_operations disable_channel_file_ops; 29extern struct file_operations disable_channel_file_ops;
29extern struct file_operations enable_channel_file_ops; 30extern struct file_operations enable_channel_file_ops;
30extern struct file_operations switch_to_tsg_file_ops; 31extern struct file_operations switch_to_tsg_file_ops;
@@ -256,6 +257,17 @@ int __init nvdebug_init(void) {
256 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), 257 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops),
257 (void*)device_id)) 258 (void*)device_id))
258 goto out_nomem; 259 goto out_nomem;
260 /* On the TU104, the context scheduler (contained in the Host, aka
261 * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs
262 * containing re-enabled channels. Resubmitting the runlist
263 * configuration appears to remediate this condition, and so this API
264 * is exposed to help reset GPU scheduling as necessary.
265 */
266 // Create file `/proc/gpu#/resubmit_runlist`, world writable
267 if (!proc_create_data(
268 "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops),
269 (void*)device_id))
270 goto out_nomem;
259 // Create file `/proc/gpu#/disable_channel`, world writable 271 // Create file `/proc/gpu#/disable_channel`, world writable
260 if (!proc_create_data( 272 if (!proc_create_data(
261 "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), 273 "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops),
diff --git a/runlist.c b/runlist.c
index 22f47ff..91fca82 100644
--- a/runlist.c
+++ b/runlist.c
@@ -137,3 +137,22 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) {
137 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); 137 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
138 return 0; 138 return 0;
139} 139}
140
141// Read and write runlist configuration, triggering a resubmit
142int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) {
143 if (g->chip_id < NV_CHIP_ID_TURING) {
144 eng_runlist_gf100_t rl;
145 if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1)
146 return -EIO;
147 rl.id = rl_id;
148 nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw);
149 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
150 runlist_submit_tu102_t submit;
151 if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1)
152 return -EIO;
153 nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw);
154 } else {
155 return -EOPNOTSUPP;
156 }
157 return 0;
158}
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 0087d90..f569c77 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -207,6 +207,33 @@ struct file_operations preempt_tsg_file_ops = {
207 .llseek = default_llseek, 207 .llseek = default_llseek,
208}; 208};
209 209
210
211ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer,
212 size_t count, loff_t *off) {
213 uint32_t target_runlist;
214 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
215 int err = kstrtou32_from_user(buffer, count, 0, &target_runlist);
216 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
217 if (err)
218 return err;
219
220 // Verify valid runlist (in terms of absolute maximums)
221 if (g->chip_id < NV_CHIP_ID_TURING && target_runlist > MAX_RUNLISTS_GF100)
222 return -ERANGE;
223 else if (g->chip_id < NV_CHIP_ID_AMPERE && target_runlist > MAX_RUNLISTS_TU102)
224 return -ERANGE;
225
226 if ((err = resubmit_runlist(g, target_runlist)))
227 return err;
228
229 return count;
230}
231
232struct file_operations resubmit_runlist_file_ops = {
233 .write = resubmit_runlist_file_write,
234 .llseek = default_llseek,
235};
236
210ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, 237ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
211 size_t count, loff_t *off) { 238 size_t count, loff_t *off) {
212 uint32_t target_channel; 239 uint32_t target_channel;