diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:34:41 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:34:41 -0400 |
commit | 232eafd04f272ed69d97a250c50a7bbed4d2894c (patch) | |
tree | bf1d03cd66e6f37b2c9ac9a9d48e4f359fcdd6b5 /runlist_procfs.c | |
parent | 0b1c304e53b88fe628d350d1380a88317f071e69 (diff) |
Support printing the runlist and channels on Ampere+ GPUs
**Modifes the user API from `cat /proc/gpuX/runlist0` to
`cat /proc/gpuX/runlist0/runlist` to support runlist-scoped
registers**
- Count number of runlists via Ampere-style PTOP parsing.
- Create a ProcFS directory for each runlist, and create the runlist
printing file in this directory.
- Document the newly-added/-formatted Runlist RAM and Channel RAM
registers.
- Add a helper function `get_runlist_ram()` to obtain the location
of each runlist's registers.
- Support printing Ampere-style Channel RAM entries.
Tested on Jetson Orin (ga10b), A100, H100, and AD102 (RTX 6000 Ada)
Diffstat (limited to 'runlist_procfs.c')
-rw-r--r-- | runlist_procfs.c | 64 |
1 files changed, 50 insertions, 14 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c index 8152463..c1cfc87 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -8,11 +8,11 @@ | |||
8 | 8 | ||
9 | #ifdef DETAILED_CHANNEL_INFO | 9 | #ifdef DETAILED_CHANNEL_INFO |
10 | /* Print channel details using PCCSR (Programmable Channel Control System RAM?) | 10 | /* Print channel details using PCCSR (Programmable Channel Control System RAM?) |
11 | * @param s Pointer to state from seq_file subsystem to pass to seq_printf | 11 | @param s Pointer to state from seq_file subsystem to pass to seq_printf |
12 | * @param g Pointer to our internal GPU state | 12 | @param g Pointer to our internal GPU state |
13 | * @param chid ID of channel to print details on, range [0, 512) | 13 | @param chid ID of channel to print details on, range [0, 512) |
14 | * @param prefix Text string to prefix each line with, or empty string | 14 | @param prefix Text string to prefix each line with, or empty string |
15 | */ | 15 | */ |
16 | static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) { | 16 | static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) { |
17 | channel_ctrl_t chan; | 17 | channel_ctrl_t chan; |
18 | uint64_t instance_ptr; | 18 | uint64_t instance_ptr; |
@@ -21,7 +21,7 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state | |||
21 | return -EIO; | 21 | return -EIO; |
22 | instance_ptr = (uint64_t)chan.inst_ptr << 12; | 22 | instance_ptr = (uint64_t)chan.inst_ptr << 12; |
23 | // Don't print write-only fields | 23 | // Don't print write-only fields |
24 | seq_printf(s, "%s+- Channel Info %-4d -+\n", prefix, chid); | 24 | seq_printf(s, "%s|= Channel Info ======|\n", prefix); |
25 | seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); | 25 | seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); |
26 | seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); | 26 | seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); |
27 | seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); | 27 | seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); |
@@ -32,7 +32,37 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state | |||
32 | seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); | 32 | seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); |
33 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); | 33 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); |
34 | seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); | 34 | seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); |
35 | seq_printf(s, "%s+---------------------+\n", prefix); | 35 | return 0; |
36 | } | ||
37 | |||
38 | /* `runlist_detail_seq_show_chan()`, but for Ampere+ | ||
39 | @param runlist_pri_base Base of the RLRAM region for this runlist | ||
40 | |||
41 | `runlist_pri_base` is necessary, since Channel RAM is now per-runlist on | ||
42 | Ampere+, and its location is configured in Runlist RAM. | ||
43 | */ | ||
44 | static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base) { | ||
45 | runlist_channel_config_t channel_config; | ||
46 | channel_ctrl_ga100_t chan; | ||
47 | |||
48 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere+ | ||
49 | if ((channel_config.raw = nvdebug_readl(g, runlist_pri_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
50 | return -EIO; | ||
51 | if ((chan.raw = nvdebug_readl(g, (((uint32_t)channel_config.bar0_offset << 4) + chid * 4))) == -1) | ||
52 | return -EIO; | ||
53 | seq_printf(s, "%s|= Channel Info ======|\n", prefix); | ||
54 | seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); | ||
55 | seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); | ||
56 | seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy); | ||
57 | seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); | ||
58 | seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted); | ||
59 | seq_printf(s, "%s| On PBDMA: %d|\n", prefix, chan.on_pbdma); | ||
60 | seq_printf(s, "%s| On ENG: %d|\n", prefix, chan.on_eng); | ||
61 | seq_printf(s, "%s| Pending: %d|\n", prefix, chan.pending); | ||
62 | seq_printf(s, "%s| CTX Reload: %d|\n", prefix, chan.ctx_reload); | ||
63 | seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy); | ||
64 | seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy); | ||
65 | seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail); | ||
36 | return 0; | 66 | return 0; |
37 | } | 67 | } |
38 | #endif | 68 | #endif |
@@ -118,27 +148,33 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { | |||
118 | } else { | 148 | } else { |
119 | char *indt = ""; | 149 | char *indt = ""; |
120 | u64 instance_ptr = 0; | 150 | u64 instance_ptr = 0; |
121 | |||
122 | if (rl_iter->entries_left_in_tsg) | 151 | if (rl_iter->entries_left_in_tsg) |
123 | indt = " "; | 152 | indt = " "; |
124 | #ifdef DETAILED_CHANNEL_INFO | ||
125 | runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); | ||
126 | return 0; | ||
127 | #endif | ||
128 | // Reconstruct pointer to channel instance block | 153 | // Reconstruct pointer to channel instance block |
129 | if (g->chip_id >= NV_CHIP_ID_VOLTA) { | 154 | if (g->chip_id >= NV_CHIP_ID_VOLTA) { |
130 | instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi; | 155 | instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi; |
131 | instance_ptr <<= 32; | 156 | instance_ptr <<= 32; |
132 | } | 157 | } |
133 | instance_ptr |= inst_ptr_lo(g, entry) << 12; | 158 | instance_ptr |= inst_ptr_lo(g, entry) << 12; |
134 | 159 | // Print channel information from runlist | |
135 | seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry)); | 160 | seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry)); |
136 | if (g->chip_id >= NV_CHIP_ID_VOLTA) | 161 | if (g->chip_id >= NV_CHIP_ID_VOLTA) |
137 | seq_printf(s, "%s| Runqueue Selector: %d|\n", indt, | 162 | seq_printf(s, "%s| Runqueue Selector: %d|\n", indt, |
138 | ((struct gv100_runlist_chan*)entry)->runqueue_selector); | 163 | ((struct gv100_runlist_chan*)entry)->runqueue_selector); |
164 | // Not populated on Kepler [ex: gk104 in Bonham (Quadro K5000)], and | ||
165 | // populated but unused on Pascal [ex: gp104 in Bonham (GTX 1080 Ti)]. | ||
166 | // (The aperture field may be incorrectly populated as INVALID, but the | ||
167 | // context still works on the aformentioned Pascal GPU.) | ||
139 | seq_printf(s, "%s| Instance PTR: |\n", indt); | 168 | seq_printf(s, "%s| Instance PTR: |\n", indt); |
140 | seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr); | 169 | seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr); |
141 | seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry))); | 170 | seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry))); |
171 | #ifdef DETAILED_CHANNEL_INFO | ||
172 | // Print channel info from PCCSR/Channel RAM and the instance block | ||
173 | if (g->chip_id < NV_CHIP_ID_AMPERE) | ||
174 | runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); | ||
175 | else | ||
176 | runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base); | ||
177 | #endif | ||
142 | seq_printf(s, "%s+---------------------+\n", indt); | 178 | seq_printf(s, "%s+---------------------+\n", indt); |
143 | } | 179 | } |
144 | return 0; | 180 | return 0; |