aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:34:41 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:34:41 -0400
commit232eafd04f272ed69d97a250c50a7bbed4d2894c (patch)
treebf1d03cd66e6f37b2c9ac9a9d48e4f359fcdd6b5
parent0b1c304e53b88fe628d350d1380a88317f071e69 (diff)
Support printing the runlist and channels on Ampere+ GPUs
**Modifes the user API from `cat /proc/gpuX/runlist0` to `cat /proc/gpuX/runlist0/runlist` to support runlist-scoped registers** - Count number of runlists via Ampere-style PTOP parsing. - Create a ProcFS directory for each runlist, and create the runlist printing file in this directory. - Document the newly-added/-formatted Runlist RAM and Channel RAM registers. - Add a helper function `get_runlist_ram()` to obtain the location of each runlist's registers. - Support printing Ampere-style Channel RAM entries. Tested on Jetson Orin (ga10b), A100, H100, and AD102 (RTX 6000 Ada)
-rw-r--r--nvdebug.h84
-rw-r--r--nvdebug_entry.c77
-rw-r--r--runlist.c69
-rw-r--r--runlist_procfs.c64
4 files changed, 254 insertions, 40 deletions
diff --git a/nvdebug.h b/nvdebug.h
index fd88b2e..26689d9 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -365,6 +365,37 @@ enum CHANNEL_STATUS {
365 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, 365 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14,
366}; 366};
367 367
368/* RunList RAM (RLRAM)
369 Starting with Ampere, the PFIFO register region no longer exists, and each
370 engine has seperate runlist RAM and channel RAM. The register (BAR0) offset for
371 Runlist RAM for each engine must be pulled from the runlist_pri_base field
372 (RUNLIST Private Register BASE address) provided by PTOP.
373
374 See get_runlist_ram() in runlist.c
375
376 Support: Ampere+
377*/
378#define NV_RUNLIST_BASE_GA100 0x080
379#define NV_RUNLIST_SUBMIT_GA100 0x088
380#define NV_RUNLIST_CHANNEL_CONFIG_GA100 0x004
381
382/* Channel RAM configuration, as contained in Runlist RAM
383
384 NUM_CHANNELS_LOG2 : 1 << NUM_CHANNELS_LOG2 is the number of channel_ctrl_ga100_t
385 entries in the described Channel RAM region.
386 BAR0_OFFSET : BAR0_OFFSET << 4 is the register offset (off BAR0) for the
387 Channel RAM region.
388
389 Support: Ampere+
390*/
391typedef union {
392 struct {
393 uint8_t num_channels_log2:4;
394 uint32_t bar0_offset:28;
395 }__attribute__((packed));
396 uint32_t raw;
397} runlist_channel_config_t;
398
368/* Programmable Channel Control System RAM (PCCSR) 399/* Programmable Channel Control System RAM (PCCSR)
369 512-entry array of channel control and status data structures. 400 512-entry array of channel control and status data structures.
370 401
@@ -425,6 +456,50 @@ typedef union {
425 uint64_t raw; 456 uint64_t raw;
426} channel_ctrl_t; 457} channel_ctrl_t;
427 458
459/* CHannel RAM (CHRAM) (PCCSR replacement on Ampere+)
460 Starting with Ampere, channel IDs are no longer unique indexes into the
461 global channel RAM region (PCCSR), but are indexes into per-runlist channel
462 RAMs.
463
464 As Channel RAM entries are now subsidiary to a runlist, they do not contain
465 duplicate information, such as the instance pointer (to "result in smaller
466 hardware" per ga100/dev_ram.ref.txt in open-gpu-doc).
467
468 The new format retains and adds to the status information available about a
469 channel, but does so via bit flags rather than an enum. Some bit flags are
470 writable to trigger behavior previously dedicated to a bit (eg. writing to
471 `ctx_reload` triggers the same behavior as writing to `force_ctx_reload` did).
472
473 When the first bit (`is_write_one_clears_bits`) is set in this structure,
474 writing a 1 to any field will clear, rather than set, it. Writing a 0 to any
475 field is a no-op.
476
477 All fields read/write, except the following are read-only: BUSY, ON_PBDMA,
478 ON_ENG, PBDMA_BUSY, ENG_BUSY.
479
480 Support: Ampere, Hopper, Ada (and newer likely)
481 See also: manuals/ampere/ga100/dev_runlist.ref.txt in NVIDIA's open-gpu-doc
482*/
483typedef union {
484 struct {
485 bool is_write_one_clears_bits:1; // new
486 bool enable:1;
487 bool next:1;
488 bool busy:1;
489 bool pbdma_faulted:1; // write to force_pbdma_faulted
490 bool eng_faulted:1; // write to force_eng_faulted
491 bool on_pbdma:1; // breakout
492 bool on_eng:1; // breakout
493 bool pending:1; // breakout
494 bool ctx_reload:1; // breakout; write to force_ctx_reload
495 bool pbdma_busy:1; // breakout
496 bool eng_busy:1; // new
497 bool acquire_fail:1; // breakout
498 uint32_t :19;
499 } __attribute__((packed));
500 uint32_t raw;
501} channel_ctrl_ga100_t;
502
428/* Control word for runlist enable/disable. 503/* Control word for runlist enable/disable.
429 504
430 RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled) 505 RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled)
@@ -1413,14 +1488,19 @@ struct runlist_iter {
1413 int entries_left_in_tsg; 1488 int entries_left_in_tsg;
1414 // Number of entries in runlist 1489 // Number of entries in runlist
1415 int len; 1490 int len;
1416 // Offset to start of Channel RAM (as this is per-runlist on Ampere+) 1491 // (Ampere+ only) Offset to the per-runlist "Runlist RAM" register region.
1417 uint32_t channel_ram; 1492 // This includes the offset for Channel RAM (per-runlist on Ampere+).
1493 uint32_t runlist_pri_base;
1418}; 1494};
1419 1495
1420#define NVDEBUG_MAX_DEVICES 8 1496#define NVDEBUG_MAX_DEVICES 8
1421extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 1497extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
1422 1498
1423// Defined in runlist.c 1499// Defined in runlist.c
1500int get_runlist_ram(
1501 struct nvdebug_state *g,
1502 int rl_id,
1503 uint32_t *rl_ram_off /* out */);
1424int get_runlist_iter( 1504int get_runlist_iter(
1425 struct nvdebug_state *g, 1505 struct nvdebug_state *g,
1426 int rl_id, 1506 int rl_id,
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index eee7351..1f9e1c9 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -159,35 +159,53 @@ int probe_and_cache_devices(void) {
159 return -ENODEV; 159 return -ENODEV;
160} 160}
161 161
162// Create files `/proc/gpu#/runlist#`, world readable
163// Support: Fermi, Maxwell, Pascal, Volta, Turing 162// Support: Fermi, Maxwell, Pascal, Volta, Turing
164int create_runlist_files(int device_id, struct proc_dir_entry *dir) { 163int get_last_runlist_id_gk104(struct nvdebug_state *g) {
165 ptop_device_info_gk104_t info; 164 ptop_device_info_gk104_t info;
166 struct proc_dir_entry *rl_entry; 165 int i, max_rl_id = 0; // Always at least one runlist
167 int i, rl_id;
168 char runlist_name[12];
169 int max_rl_id = 0; // Always at least one runlist
170 // Figure out how many runlists there are by checking the device info 166 // Figure out how many runlists there are by checking the device info
171 // registers. Runlists are always numbered sequentially, so we just have 167 // registers. Runlists are always numbered sequentially, so we just have
172 // to find the highest-valued one and add 1 to get the number of runlists. 168 // to find the highest-valued one and add 1 to get the number of runlists.
173 for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { 169 for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) {
174 info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i)); 170 if ((info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(i))) == -1)
171 return -EIO;
175 if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) 172 if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid)
176 continue; 173 continue;
177 if (info.runlist_enum > max_rl_id) 174 if (info.runlist_enum > max_rl_id)
178 max_rl_id = info.runlist_enum; 175 max_rl_id = info.runlist_enum;
179 } 176 }
180 // Create files to read each runlist. The read handling code looks at the 177 return max_rl_id;
181 // `pde_data` associated with the file to determine what the runlist ID is. 178}
182 for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { 179
183 snprintf(runlist_name, 12, "runlist%d", rl_id); 180// Support: Ampere, Hopper, Ada (and newer likely)
184 rl_entry = proc_create_data( 181// Identical structure to get_runlist_ram() in runlist.c. See comments there.
185 runlist_name, 0444, dir, compat_ops(&runlist_file_ops), 182int get_last_runlist_id_ga100(struct nvdebug_state *g) {
186 (void*)(uintptr_t)rl_id); 183 ptop_device_info_ga100_t ptop_entry;
187 if (!rl_entry) 184 int i, runlist_count = 0;
188 return -ENOMEM; 185 int ptop_size = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
186 int ptop_entry_subrow = 0;
187 for (i = 0; i < ptop_size; i++) {
188 if ((ptop_entry.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(i))) == -1)
189 return -EIO;
190 if (!ptop_entry.raw)
191 continue;
192 if (ptop_entry_subrow == 2 && ptop_entry.rleng_id == 0)
193 runlist_count++;
194 if (ptop_entry.has_next_entry)
195 ptop_entry_subrow += 1;
196 else
197 ptop_entry_subrow = 0;
189 } 198 }
190 return 0; 199 return runlist_count - 1;
200}
201
202// Return the maximum runlist ID. For a two-runlist GPU, this would return 1.
203int get_last_runlist_id(int device_id) {
204 struct nvdebug_state* g = &g_nvdebug_state[device_id];
205 if (g->chip_id >= NV_CHIP_ID_AMPERE)
206 return get_last_runlist_id_ga100(g);
207 else
208 return get_last_runlist_id_gk104(g);
191} 209}
192 210
193// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable 211// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
@@ -238,6 +256,7 @@ int __init nvdebug_init(void) {
238 g_nvdebug_devices = res; 256 g_nvdebug_devices = res;
239 // Create seperate ProcFS directories for each gpu 257 // Create seperate ProcFS directories for each gpu
240 while (res--) { 258 while (res--) {
259 uintptr_t last_runlist = 0;
241 char device_id_str[7]; 260 char device_id_str[7];
242 // Create a wider copy of the GPU ID to allow us to abuse the *data 261 // Create a wider copy of the GPU ID to allow us to abuse the *data
243 // field of proc_dir_entry to store the GPU ID. 262 // field of proc_dir_entry to store the GPU ID.
@@ -248,10 +267,24 @@ int __init nvdebug_init(void) {
248 snprintf(device_id_str, 7, "gpu%ld", device_id); 267 snprintf(device_id_str, 7, "gpu%ld", device_id);
249 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) 268 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
250 goto out_nomem; 269 goto out_nomem;
251 // Create files `/proc/gpu#/runlist#`, world readable 270 // Create files in the `/proc/gpu#/runlist#/` directory
252 if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) 271 // The read handling code looks at the `pde_data` associated with the parent
253 if ((err = create_runlist_files(device_id, dir))) 272 // directory to determine what the runlist ID is.
254 goto out_err; 273 if ((last_runlist = get_last_runlist_id(device_id)) < 0)
274 return last_runlist;
275 do {
276 char runlist_name[12];
277 struct proc_dir_entry *rl_dir;
278 // Create `/proc/gpu#/runlist#` directory
279 snprintf(runlist_name, 12, "runlist%lu", last_runlist);
280 if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id)))
281 goto out_nomem;
282 // Create file `/proc/gpu#/runlist#/runlist`, world readable
283 if (!proc_create_data(
284 "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops),
285 (void*)last_runlist))
286 goto out_nomem;
287 } while (last_runlist-- > 0);
255 // Create file `/proc/gpu#/preempt_tsg`, world writable 288 // Create file `/proc/gpu#/preempt_tsg`, world writable
256 if (!proc_create_data( 289 if (!proc_create_data(
257 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), 290 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops),
@@ -325,7 +358,7 @@ int __init nvdebug_init(void) {
325 "local_memory", 0444, dir, compat_ops(&local_memory_file_ops), 358 "local_memory", 0444, dir, compat_ops(&local_memory_file_ops),
326 (void*)0x00100ce0)) 359 (void*)0x00100ce0))
327 goto out_nomem; 360 goto out_nomem;
328 } 361 }
329 // Create files exposing LCE and PCE configuration (Pascal+) 362 // Create files exposing LCE and PCE configuration (Pascal+)
330 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { 363 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) {
331 // Create file `/proc/gpu#/copy_topology`, world readable 364 // Create file `/proc/gpu#/copy_topology`, world readable
diff --git a/runlist.c b/runlist.c
index 2e9577d..7e6d292 100644
--- a/runlist.c
+++ b/runlist.c
@@ -14,6 +14,52 @@
14// be enabled to print the runlist on the TX2. 14// be enabled to print the runlist on the TX2.
15//#define FALLBACK_TO_PRAMIN 15//#define FALLBACK_TO_PRAMIN
16 16
17/* Get RunList RAM (RLRAM) offset for a runlist from the device topology
18 @param rl_id Which runlist to obtain [numbered in order of appearance in
19 the device topology (PTOP) registers]
20 @param rl_ram_off Location at which to store runlist private register
21 interface base address (PRI base); an offset into the BAR0
22 register range.
23 @return 0 or -errno on error
24*/
25int get_runlist_ram(struct nvdebug_state *g, int rl_id, uint32_t *rl_ram_off) {
26 int i;
27 int curr_rl_id = 0;
28 int ptop_size = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
29 // Each PTOP entry is composed of 1--3 subrows, and the fields available
30 // on each row vary. The runlist RAM location is only available on row 3
31 int ptop_entry_subrow = 0;
32 ptop_device_info_ga100_t ptop_entry;
33 // Iterate through all PTOP entries
34 for (i = 0; i < ptop_size; i++) {
35 if ((ptop_entry.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(i))) == -1)
36 return -EIO;
37 // Skip empty entries
38 if (!ptop_entry.raw)
39 continue;
40 // If on subrow 3 (zero-base-index 2), runlist info is available
41 // Multiple engines may be associated with a single runlist, so
42 // multiple PTOP entries may refer to the same runlist. Only match when
43 // on the 0th-associated entry.
44 if (ptop_entry_subrow == 2 && ptop_entry.rleng_id == 0) {
45 // If this is the requested runlist, return it
46 if (curr_rl_id == rl_id) {
47 *rl_ram_off = (uint32_t)ptop_entry.runlist_pri_base << 10;
48 return 0;
49 }
50 // Otherwise, update our accounting of what the next runlist ID is
51 curr_rl_id++;
52 }
53 // Track if the next row is a subrow of the current entry
54 if (ptop_entry.has_next_entry)
55 ptop_entry_subrow += 1;
56 else
57 ptop_entry_subrow = 0;
58 }
59 // Search failed; requested index does not exist
60 return -EINVAL;
61}
62
17/* Get runlist head and info (incl. length) 63/* Get runlist head and info (incl. length)
18 @param rl_id Which runlist to obtain? 64 @param rl_id Which runlist to obtain?
19 @param rl_iter Location at which to store output 65 @param rl_iter Location at which to store output
@@ -39,7 +85,7 @@ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl
39 runlist_target = rl.target; 85 runlist_target = rl.target;
40 runlist_len = rl.len; 86 runlist_len = rl.len;
41 printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx)\n", 87 printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx)\n",
42 rl_id, g->chip_id, rl.len, runlist_iova, target_to_text(rl.target), rl.raw); 88 rl_id, g->chip_id, rl.len, runlist_iova, target_to_text(rl.target), rl.raw);
43 } else if (g->chip_id < NV_CHIP_ID_AMPERE) { 89 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
44 runlist_base_tu102_t base; 90 runlist_base_tu102_t base;
45 runlist_submit_tu102_t submit; 91 runlist_submit_tu102_t submit;
@@ -51,7 +97,26 @@ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl
51 runlist_target = base.target; 97 runlist_target = base.target;
52 runlist_len = submit.len; 98 runlist_len = submit.len;
53 printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx %#018llx)\n", 99 printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx %#018llx)\n",
54 rl_id, g->chip_id, submit.len, runlist_iova, target_to_text(runlist_target), base.raw, submit.raw); 100 rl_id, g->chip_id, submit.len, runlist_iova, target_to_text(runlist_target), base.raw, submit.raw);
101 } else {
102 runlist_base_tu102_t base;
103 runlist_submit_tu102_t submit;
104 uint32_t runlist_pri_base;
105 // Runlist configurations are stored in per-runlist regions on Ampere+
106 if ((err = get_runlist_ram(g, rl_id, &runlist_pri_base)) < 0)
107 return err;
108 // The runlist configuration region (RLRAM) contains Turing-like BASE
109 // and SUBMIT registers at static offsets
110 if ((base.raw = nvdebug_readq(g, runlist_pri_base + NV_RUNLIST_BASE_GA100)) == -1)
111 return -EIO;
112 if ((submit.raw = nvdebug_readq(g, runlist_pri_base + NV_RUNLIST_SUBMIT_GA100)) == -1)
113 return -EIO;
114 runlist_iova = ((uint64_t)base.ptr) << 12;
115 runlist_target = base.target;
116 runlist_len = submit.len;
117 printk(KERN_INFO "[nvdebug] Runlist %d for %x: %d entries @ %llx in %s (config raw: %#018llx %#018llx)\n",
118 rl_id, g->chip_id, submit.len, runlist_iova, target_to_text(runlist_target), base.raw, submit.raw);
119 rl_iter->runlist_pri_base = runlist_pri_base;
55 } 120 }
56 // Return early on an empty runlist 121 // Return early on an empty runlist
57 if (!runlist_len) 122 if (!runlist_len)
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 8152463..c1cfc87 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -8,11 +8,11 @@
8 8
9#ifdef DETAILED_CHANNEL_INFO 9#ifdef DETAILED_CHANNEL_INFO
10/* Print channel details using PCCSR (Programmable Channel Control System RAM?) 10/* Print channel details using PCCSR (Programmable Channel Control System RAM?)
11 * @param s Pointer to state from seq_file subsystem to pass to seq_printf 11 @param s Pointer to state from seq_file subsystem to pass to seq_printf
12 * @param g Pointer to our internal GPU state 12 @param g Pointer to our internal GPU state
13 * @param chid ID of channel to print details on, range [0, 512) 13 @param chid ID of channel to print details on, range [0, 512)
14 * @param prefix Text string to prefix each line with, or empty string 14 @param prefix Text string to prefix each line with, or empty string
15 */ 15*/
16static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) { 16static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) {
17 channel_ctrl_t chan; 17 channel_ctrl_t chan;
18 uint64_t instance_ptr; 18 uint64_t instance_ptr;
@@ -21,7 +21,7 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state
21 return -EIO; 21 return -EIO;
22 instance_ptr = (uint64_t)chan.inst_ptr << 12; 22 instance_ptr = (uint64_t)chan.inst_ptr << 12;
23 // Don't print write-only fields 23 // Don't print write-only fields
24 seq_printf(s, "%s+- Channel Info %-4d -+\n", prefix, chid); 24 seq_printf(s, "%s|= Channel Info ======|\n", prefix);
25 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); 25 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
26 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); 26 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
27 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); 27 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
@@ -32,7 +32,37 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state
32 seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); 32 seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr);
33 seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); 33 seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target));
34 seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); 34 seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind);
35 seq_printf(s, "%s+---------------------+\n", prefix); 35 return 0;
36}
37
38/* `runlist_detail_seq_show_chan()`, but for Ampere+
39 @param runlist_pri_base Base of the RLRAM region for this runlist
40
41 `runlist_pri_base` is necessary, since Channel RAM is now per-runlist on
42 Ampere+, and its location is configured in Runlist RAM.
43*/
44static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base) {
45 runlist_channel_config_t channel_config;
46 channel_ctrl_ga100_t chan;
47
48 // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere+
49 if ((channel_config.raw = nvdebug_readl(g, runlist_pri_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
50 return -EIO;
51 if ((chan.raw = nvdebug_readl(g, (((uint32_t)channel_config.bar0_offset << 4) + chid * 4))) == -1)
52 return -EIO;
53 seq_printf(s, "%s|= Channel Info ======|\n", prefix);
54 seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
55 seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
56 seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy);
57 seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
58 seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted);
59 seq_printf(s, "%s| On PBDMA: %d|\n", prefix, chan.on_pbdma);
60 seq_printf(s, "%s| On ENG: %d|\n", prefix, chan.on_eng);
61 seq_printf(s, "%s| Pending: %d|\n", prefix, chan.pending);
62 seq_printf(s, "%s| CTX Reload: %d|\n", prefix, chan.ctx_reload);
63 seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy);
64 seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy);
65 seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail);
36 return 0; 66 return 0;
37} 67}
38#endif 68#endif
@@ -118,27 +148,33 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
118 } else { 148 } else {
119 char *indt = ""; 149 char *indt = "";
120 u64 instance_ptr = 0; 150 u64 instance_ptr = 0;
121
122 if (rl_iter->entries_left_in_tsg) 151 if (rl_iter->entries_left_in_tsg)
123 indt = " "; 152 indt = " ";
124#ifdef DETAILED_CHANNEL_INFO
125 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
126 return 0;
127#endif
128 // Reconstruct pointer to channel instance block 153 // Reconstruct pointer to channel instance block
129 if (g->chip_id >= NV_CHIP_ID_VOLTA) { 154 if (g->chip_id >= NV_CHIP_ID_VOLTA) {
130 instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi; 155 instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi;
131 instance_ptr <<= 32; 156 instance_ptr <<= 32;
132 } 157 }
133 instance_ptr |= inst_ptr_lo(g, entry) << 12; 158 instance_ptr |= inst_ptr_lo(g, entry) << 12;
134 159 // Print channel information from runlist
135 seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry)); 160 seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry));
136 if (g->chip_id >= NV_CHIP_ID_VOLTA) 161 if (g->chip_id >= NV_CHIP_ID_VOLTA)
137 seq_printf(s, "%s| Runqueue Selector: %d|\n", indt, 162 seq_printf(s, "%s| Runqueue Selector: %d|\n", indt,
138 ((struct gv100_runlist_chan*)entry)->runqueue_selector); 163 ((struct gv100_runlist_chan*)entry)->runqueue_selector);
164 // Not populated on Kepler [ex: gk104 in Bonham (Quadro K5000)], and
165 // populated but unused on Pascal [ex: gp104 in Bonham (GTX 1080 Ti)].
166 // (The aperture field may be incorrectly populated as INVALID, but the
167 // context still works on the aformentioned Pascal GPU.)
139 seq_printf(s, "%s| Instance PTR: |\n", indt); 168 seq_printf(s, "%s| Instance PTR: |\n", indt);
140 seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr); 169 seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr);
141 seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry))); 170 seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry)));
171#ifdef DETAILED_CHANNEL_INFO
172 // Print channel info from PCCSR/Channel RAM and the instance block
173 if (g->chip_id < NV_CHIP_ID_AMPERE)
174 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
175 else
176 runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base);
177#endif
142 seq_printf(s, "%s+---------------------+\n", indt); 178 seq_printf(s, "%s+---------------------+\n", indt);
143 } 179 }
144 return 0; 180 return 0;