aboutsummaryrefslogtreecommitdiffstats
path: root/runlist_procfs.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:34:41 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:34:41 -0400
commit232eafd04f272ed69d97a250c50a7bbed4d2894c (patch)
treebf1d03cd66e6f37b2c9ac9a9d48e4f359fcdd6b5 /runlist_procfs.c
parent0b1c304e53b88fe628d350d1380a88317f071e69 (diff)
Support printing the runlist and channels on Ampere+ GPUs
**Modifes the user API from `cat /proc/gpuX/runlist0` to `cat /proc/gpuX/runlist0/runlist` to support runlist-scoped registers** - Count number of runlists via Ampere-style PTOP parsing. - Create a ProcFS directory for each runlist, and create the runlist printing file in this directory. - Document the newly-added/-formatted Runlist RAM and Channel RAM registers. - Add a helper function `get_runlist_ram()` to obtain the location of each runlist's registers. - Support printing Ampere-style Channel RAM entries. Tested on Jetson Orin (ga10b), A100, H100, and AD102 (RTX 6000 Ada)
Diffstat (limited to 'runlist_procfs.c')
-rw-r--r--runlist_procfs.c64
1 files changed, 50 insertions, 14 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 8152463..c1cfc87 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -8,11 +8,11 @@
8 8
9#ifdef DETAILED_CHANNEL_INFO 9#ifdef DETAILED_CHANNEL_INFO
10/* Print channel details using PCCSR (Programmable Channel Control System RAM?) 10/* Print channel details using PCCSR (Programmable Channel Control System RAM?)
11 * @param s Pointer to state from seq_file subsystem to pass to seq_printf 11 @param s Pointer to state from seq_file subsystem to pass to seq_printf
12 * @param g Pointer to our internal GPU state 12 @param g Pointer to our internal GPU state
13 * @param chid ID of channel to print details on, range [0, 512) 13 @param chid ID of channel to print details on, range [0, 512)
14 * @param prefix Text string to prefix each line with, or empty string 14 @param prefix Text string to prefix each line with, or empty string
15 */ 15*/
16static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) { 16static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) {
17 channel_ctrl_t chan; 17 channel_ctrl_t chan;
18 uint64_t instance_ptr; 18 uint64_t instance_ptr;
@@ -21,7 +21,7 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state
21 return -EIO; 21 return -EIO;
22 instance_ptr = (uint64_t)chan.inst_ptr << 12; 22 instance_ptr = (uint64_t)chan.inst_ptr << 12;
23 // Don't print write-only fields 23 // Don't print write-only fields
24 seq_printf(s, "%s+- Channel Info %-4d -+\n", prefix, chid); 24 seq_printf(s, "%s|= Channel Info ======|\n", prefix);
25 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); 25 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
26 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); 26 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
27 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); 27 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
@@ -32,7 +32,37 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state
32 seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); 32 seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr);
33 seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); 33 seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target));
34 seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); 34 seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind);
35 seq_printf(s, "%s+---------------------+\n", prefix); 35 return 0;
36}
37
38/* `runlist_detail_seq_show_chan()`, but for Ampere+
39 @param runlist_pri_base Base of the RLRAM region for this runlist
40
41 `runlist_pri_base` is necessary, since Channel RAM is now per-runlist on
42 Ampere+, and its location is configured in Runlist RAM.
43*/
44static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base) {
45 runlist_channel_config_t channel_config;
46 channel_ctrl_ga100_t chan;
47
48 // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere+
49 if ((channel_config.raw = nvdebug_readl(g, runlist_pri_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
50 return -EIO;
51 if ((chan.raw = nvdebug_readl(g, (((uint32_t)channel_config.bar0_offset << 4) + chid * 4))) == -1)
52 return -EIO;
53 seq_printf(s, "%s|= Channel Info ======|\n", prefix);
54 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
55 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
56 seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy);
57 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
58 seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted);
59 seq_printf(s, "%s| On PBDMA: %d|\n", prefix, chan.on_pbdma);
60 seq_printf(s, "%s| On ENG: %d|\n", prefix, chan.on_eng);
61 seq_printf(s, "%s| Pending: %d|\n", prefix, chan.pending);
62 seq_printf(s, "%s| CTX Reload: %d|\n", prefix, chan.ctx_reload);
63 seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy);
64 seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy);
65 seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail);
36 return 0; 66 return 0;
37} 67}
38#endif 68#endif
@@ -118,27 +148,33 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
118 } else { 148 } else {
119 char *indt = ""; 149 char *indt = "";
120 u64 instance_ptr = 0; 150 u64 instance_ptr = 0;
121
122 if (rl_iter->entries_left_in_tsg) 151 if (rl_iter->entries_left_in_tsg)
123 indt = " "; 152 indt = " ";
124#ifdef DETAILED_CHANNEL_INFO
125 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
126 return 0;
127#endif
128 // Reconstruct pointer to channel instance block 153 // Reconstruct pointer to channel instance block
129 if (g->chip_id >= NV_CHIP_ID_VOLTA) { 154 if (g->chip_id >= NV_CHIP_ID_VOLTA) {
130 instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi; 155 instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi;
131 instance_ptr <<= 32; 156 instance_ptr <<= 32;
132 } 157 }
133 instance_ptr |= inst_ptr_lo(g, entry) << 12; 158 instance_ptr |= inst_ptr_lo(g, entry) << 12;
134 159 // Print channel information from runlist
135 seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry)); 160 seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry));
136 if (g->chip_id >= NV_CHIP_ID_VOLTA) 161 if (g->chip_id >= NV_CHIP_ID_VOLTA)
137 seq_printf(s, "%s| Runqueue Selector: %d|\n", indt, 162 seq_printf(s, "%s| Runqueue Selector: %d|\n", indt,
138 ((struct gv100_runlist_chan*)entry)->runqueue_selector); 163 ((struct gv100_runlist_chan*)entry)->runqueue_selector);
164 // Not populated on Kepler [ex: gk104 in Bonham (Quadro K5000)], and
165 // populated but unused on Pascal [ex: gp104 in Bonham (GTX 1080 Ti)].
166 // (The aperture field may be incorrectly populated as INVALID, but the
167 // context still works on the aformentioned Pascal GPU.)
139 seq_printf(s, "%s| Instance PTR: |\n", indt); 168 seq_printf(s, "%s| Instance PTR: |\n", indt);
140 seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr); 169 seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr);
141 seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry))); 170 seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry)));
171#ifdef DETAILED_CHANNEL_INFO
172 // Print channel info from PCCSR/Channel RAM and the instance block
173 if (g->chip_id < NV_CHIP_ID_AMPERE)
174 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
175 else
176 runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base);
177#endif
142 seq_printf(s, "%s+---------------------+\n", indt); 178 seq_printf(s, "%s+---------------------+\n", indt);
143 } 179 }
144 return 0; 180 return 0;