From 232eafd04f272ed69d97a250c50a7bbed4d2894c Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Mon, 16 Sep 2024 15:34:41 -0400 Subject: Support printing the runlist and channels on Ampere+ GPUs **Modifes the user API from `cat /proc/gpuX/runlist0` to `cat /proc/gpuX/runlist0/runlist` to support runlist-scoped registers** - Count number of runlists via Ampere-style PTOP parsing. - Create a ProcFS directory for each runlist, and create the runlist printing file in this directory. - Document the newly-added/-formatted Runlist RAM and Channel RAM registers. - Add a helper function `get_runlist_ram()` to obtain the location of each runlist's registers. - Support printing Ampere-style Channel RAM entries. Tested on Jetson Orin (ga10b), A100, H100, and AD102 (RTX 6000 Ada) --- runlist_procfs.c | 64 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 14 deletions(-) (limited to 'runlist_procfs.c') diff --git a/runlist_procfs.c b/runlist_procfs.c index 8152463..c1cfc87 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c @@ -8,11 +8,11 @@ #ifdef DETAILED_CHANNEL_INFO /* Print channel details using PCCSR (Programmable Channel Control System RAM?) - * @param s Pointer to state from seq_file subsystem to pass to seq_printf - * @param g Pointer to our internal GPU state - * @param chid ID of channel to print details on, range [0, 512) - * @param prefix Text string to prefix each line with, or empty string - */ + @param s Pointer to state from seq_file subsystem to pass to seq_printf + @param g Pointer to our internal GPU state + @param chid ID of channel to print details on, range [0, 512) + @param prefix Text string to prefix each line with, or empty string +*/ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) { channel_ctrl_t chan; uint64_t instance_ptr; @@ -21,7 +21,7 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state return -EIO; instance_ptr = (uint64_t)chan.inst_ptr << 12; // Don't print write-only fields - seq_printf(s, "%s+- Channel Info %-4d -+\n", prefix, chid); + seq_printf(s, "%s|= Channel Info ======|\n", prefix); seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); @@ -32,7 +32,37 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); - seq_printf(s, "%s+---------------------+\n", prefix); + return 0; +} + +/* `runlist_detail_seq_show_chan()`, but for Ampere+ + @param runlist_pri_base Base of the RLRAM region for this runlist + + `runlist_pri_base` is necessary, since Channel RAM is now per-runlist on + Ampere+, and its location is configured in Runlist RAM. +*/ +static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base) { + runlist_channel_config_t channel_config; + channel_ctrl_ga100_t chan; + + // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere+ + if ((channel_config.raw = nvdebug_readl(g, runlist_pri_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) + return -EIO; + if ((chan.raw = nvdebug_readl(g, (((uint32_t)channel_config.bar0_offset << 4) + chid * 4))) == -1) + return -EIO; + seq_printf(s, "%s|= Channel Info ======|\n", prefix); + seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); + seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); + seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy); + seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); + seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted); + seq_printf(s, "%s| On PBDMA: %d|\n", prefix, chan.on_pbdma); + seq_printf(s, "%s| On ENG: %d|\n", prefix, chan.on_eng); + seq_printf(s, "%s| Pending: %d|\n", prefix, chan.pending); + seq_printf(s, "%s| CTX Reload: %d|\n", prefix, chan.ctx_reload); + seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy); + seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy); + seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail); return 0; } #endif @@ -118,27 +148,33 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { } else { char *indt = ""; u64 instance_ptr = 0; - if (rl_iter->entries_left_in_tsg) indt = " "; -#ifdef DETAILED_CHANNEL_INFO - runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); - return 0; -#endif // Reconstruct pointer to channel instance block if (g->chip_id >= NV_CHIP_ID_VOLTA) { instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi; instance_ptr <<= 32; } instance_ptr |= inst_ptr_lo(g, entry) << 12; - + // Print channel information from runlist seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry)); if (g->chip_id >= NV_CHIP_ID_VOLTA) seq_printf(s, "%s| Runqueue Selector: %d|\n", indt, - ((struct gv100_runlist_chan*)entry)->runqueue_selector); + ((struct gv100_runlist_chan*)entry)->runqueue_selector); + // Not populated on Kepler [ex: gk104 in Bonham (Quadro K5000)], and + // populated but unused on Pascal [ex: gp104 in Bonham (GTX 1080 Ti)]. + // (The aperture field may be incorrectly populated as INVALID, but the + // context still works on the aformentioned Pascal GPU.) seq_printf(s, "%s| Instance PTR: |\n", indt); seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr); seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry))); +#ifdef DETAILED_CHANNEL_INFO + // Print channel info from PCCSR/Channel RAM and the instance block + if (g->chip_id < NV_CHIP_ID_AMPERE) + runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); + else + runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base); +#endif seq_printf(s, "%s+---------------------+\n", indt); } return 0; -- cgit v1.2.2