#include <linux/seq_file.h> // For seq_* functions and types
#include <linux/version.h> // Macros to detect kernel version
#include "nvdebug_linux.h"
// Uncomment to expand channel status information when printing the runlist
#define DETAILED_CHANNEL_INFO
#ifdef DETAILED_CHANNEL_INFO
/* Print channel details using PCCSR (Programmable Channel Control System RAM?)
@param s Pointer to state from seq_file subsystem to pass to seq_printf
@param g Pointer to our internal GPU state
@param chid ID of channel to print details on, range [0, 512)
@param prefix Text string to prefix each line with, or empty string
*/
static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) {
channel_ctrl_t chan;
uint64_t instance_ptr;
if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid))) == -1)
return -EIO;
instance_ptr = (uint64_t)chan.inst_ptr << 12;
// Don't print write-only fields
seq_printf(s, "%s|= Channel Info ======|\n", prefix);
seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted);
seq_printf(s, "%s| Status: %2d|\n", prefix, chan.status);
seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy);
seq_printf(s, "%s| Instance PTR: |\n", prefix);
seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr);
seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target));
seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind);
return 0;
}
/* `runlist_detail_seq_show_chan()`, but for Ampere+
@param runlist_pri_base Base of the RLRAM region for this runlist
`runlist_pri_base` is necessary, since Channel RAM is now per-runlist on
Ampere+, and its location is configured in Runlist RAM.
*/
static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base) {
runlist_channel_config_t channel_config;
channel_ctrl_ga100_t chan;
// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere+
if ((channel_config.raw = nvdebug_readl(g, runlist_pri_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
return -EIO;
if ((chan.raw = nvdebug_readl(g, (((uint32_t)channel_config.bar0_offset << 4) + chid * 4))) == -1)
return -EIO;
seq_printf(s, "%s|= Channel Info ======|\n", prefix);
seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable);
seq_printf(s, "%s| Next: %d|\n", prefix, chan.next);
seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy);
seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted);
seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted);
seq_printf(s, "%s| On PBDMA: %d|\n", prefix, chan.on_pbdma);
seq_printf(s, "%s| On ENG: %d|\n", prefix, chan.on_eng);
seq_printf(s, "%s| Pending: %d|\n", prefix, chan.pending);
seq_printf(s, "%s| CTX Reload: %d|\n", prefix, chan.ctx_reload);
seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy);
seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy);
seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail);
return 0;
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
// Bug workaround. See comment in runlist_file_seq_start()
static loff_t pos_fixup;
#endif
static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) {
static struct runlist_iter rl_iter;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
// *pos == 0 for first call after read of file
if (*pos == 0) {
int err = get_runlist_iter(g, seq2gpuidx(s), &rl_iter);
if (err)
return ERR_PTR(err);
// Don't try to print an empty runlist
if (rl_iter.len <= 0)
return NULL;
return &rl_iter;
}
// If we're resuming an earlier print
if (*pos < rl_iter.len) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
// There's a nasty bug prior to 4.19-rc1 that if the buffer overflows, the
// last update to `pos` is not saved. Work around that here by reloading a
// saved copy of `pos`.
if (!pos_fixup)
return NULL;
*pos = pos_fixup;
#endif
return &rl_iter;
}
// When called with *pos != 0, we already traversed the runlist
return NULL;
}
static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter,
loff_t *pos) {
struct runlist_iter* rl_iter = raw_rl_iter;
void *ret = NULL;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
// Advance by one TSG or channel
(*pos)++;
rl_iter->curr_entry += NV_RL_ENTRY_SIZE(g);
// Verify we haven't reached the end of the runlist
// len is the num of tsg entries + total num of channel entries
if (*pos < rl_iter->len) {
ret = rl_iter;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
// Bug workaround. See comment in runlist_file_seq_start()
pos_fixup = ret ? *pos : 0;
#endif
if (rl_iter->entries_left_in_tsg)
rl_iter->entries_left_in_tsg--;
return ret;
}
static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) {
// No cleanup needed
}
// _show() must be idempotent. This function will be rerun if the seq_printf
// buffer was too small.
static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
struct runlist_iter *rl_iter = raw_rl_iter;
void *entry = rl_iter->curr_entry;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
if (entry_type(g, entry) == ENTRY_TYPE_TSG) {
if (rl_iter->entries_left_in_tsg) {
printk(KERN_WARNING "[nvdebug] Found TSG ID%d @ %px when %d channels were still expected under the previous TSG in the runlist!\n", tsgid(g, entry), entry, rl_iter->entries_left_in_tsg);
while (rl_iter->entries_left_in_tsg--)
seq_printf(s, "[missing channel]\n");
}
rl_iter->entries_left_in_tsg = tsg_length(g, entry) + 1;
seq_printf(s, "+---- TSG Entry %-3d---+\n", tsgid(g, entry));
seq_printf(s, "| Scale: %-13d|\n", timeslice_scale(g, entry));
seq_printf(s, "| Timeout: %-11d|\n", timeslice_timeout(g, entry));
seq_printf(s, "| Length: %-12d|\n", tsg_length(g, entry));
seq_printf(s, "+---------------------+\n");
} else {
char *indt = "";
u64 instance_ptr = 0;
if (rl_iter->entries_left_in_tsg)
indt = " ";
// Reconstruct pointer to channel instance block
if (g->chip_id >= NV_CHIP_ID_VOLTA) {
instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi;
instance_ptr <<= 32;
}
instance_ptr |= inst_ptr_lo(g, entry) << 12;
// Print channel information from runlist
seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry));
if (g->chip_id >= NV_CHIP_ID_VOLTA)
seq_printf(s, "%s| Runqueue Selector: %d|\n", indt,
((struct gv100_runlist_chan*)entry)->runqueue_selector);
// Not populated on Kepler [ex: gk104 in Bonham (Quadro K5000)], and
// populated but unused on Pascal [ex: gp104 in Bonham (GTX 1080 Ti)].
// (The aperture field may be incorrectly populated as INVALID, but the
// context still works on the aformentioned Pascal GPU.)
seq_printf(s, "%s| Instance PTR: |\n", indt);
seq_printf(s, "%s| %#018llx|\n", indt, instance_ptr);
seq_printf(s, "%s| %20s|\n", indt, target_to_text(inst_target(g, entry)));
#ifdef DETAILED_CHANNEL_INFO
// Print channel info from PCCSR/Channel RAM and the instance block
if (g->chip_id < NV_CHIP_ID_AMPERE)
runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
else
runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base);
#endif
seq_printf(s, "%s+---------------------+\n", indt);
}
return 0;
}
static const struct seq_operations runlist_file_seq_ops = {
.start = runlist_file_seq_start,
.next = runlist_file_seq_next,
.stop = runlist_file_seq_stop,
.show = runlist_file_seq_show,
};
static int runlist_file_open(struct inode *inode, struct file *f) {
return seq_open(f, &runlist_file_seq_ops);
}
struct file_operations runlist_file_ops = {
.open = runlist_file_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
size_t count, loff_t *off) {
uint32_t target_tsgid, target_runlist_ram;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
if (err)
return err;
// TSG IDs are a 12-bit field, so make sure the request is in-range
if (target_tsgid > MAX_TSGID)
return -ERANGE;
// (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG
// ID is scoped to (only applicable on Ampere+)
if (g->chip_id >= NV_CHIP_ID_AMPERE)
target_runlist_ram = file2gpuidx(f);
else
target_runlist_ram = 0;
// Execute preemption
if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid)))
return err;
return count;
}
struct file_operations preempt_tsg_file_ops = {
.write = preempt_tsg_file_write,
.llseek = default_llseek,
};
ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer,
size_t count, loff_t *off) {
uint32_t target_runlist;
struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
int err = kstrtou32_from_user(buffer, count, 0, &target_runlist);
if (err)
return err;
// resubmit_runlist() checks that target_runlist is valid
if ((err = resubmit_runlist(g, target_runlist)))
return err;
return count;
}
struct file_operations resubmit_runlist_file_ops = {
.write = resubmit_runlist_file_write,
.llseek = default_llseek,
};
ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
size_t count, loff_t *off) {
uint32_t target_channel;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
if (err)
return err;
if (g->chip_id < NV_CHIP_ID_AMPERE) {
channel_ctrl_t chan;
if (target_channel > MAX_CHID)
return -ERANGE;
// Read current configuration
if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
return -EIO;
// Request disablement
chan.enable_clear = true;
nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
} else {
uint32_t runlist_reg_base, chram_base, channel_max;
runlist_channel_config_t channel_config;
channel_ctrl_ga100_t chan;
// (Ab)use the PDE_DATA field for the runlist ID
if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
return err;
// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
return -EIO;
channel_max = 1u << channel_config.num_channels_log2;
if (target_channel >= channel_max)
return -ERANGE;
chram_base = (uint32_t)channel_config.bar0_offset << 4;
// Writing zeros to any field of the Ampere+ channel control structure
// does nothing, so don't bother to read the structure first, and just
// write zeros to all the fields we don't care about.
chan.raw = 0;
chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1
chan.enable = 1;
nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
}
return count;
}
struct file_operations disable_channel_file_ops = {
.write = disable_channel_file_write,
.llseek = default_llseek,
};
ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
size_t count, loff_t *off) {
uint32_t target_channel;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
if (err)
return err;
if (g->chip_id < NV_CHIP_ID_AMPERE) {
channel_ctrl_t chan;
if (target_channel > MAX_CHID)
return -ERANGE;
// Read current configuration
if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
return -EIO;
// Disable channel
chan.enable_set = true;
nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
} else {
uint32_t runlist_reg_base, chram_base, channel_max;
runlist_channel_config_t channel_config;
channel_ctrl_ga100_t chan;
// (Ab)use the PDE_DATA field for the runlist ID
if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
return err;
// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
return -EIO;
channel_max = 1u << channel_config.num_channels_log2;
if (target_channel >= channel_max)
return -ERANGE;
chram_base = (uint32_t)channel_config.bar0_offset << 4;
// Writing zeros to any field of the Ampere+ channel control structure
// does nothing, so don't bother to read the structure first, and just
// write zeros to all the fields we don't care about.
chan.raw = 0;
chan.enable = 1;
nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
}
return count;
}
struct file_operations enable_channel_file_ops = {
.write = enable_channel_file_write,
.llseek = default_llseek,
};
// Tested working on Pascal (gp106) through Ada (ad102)
ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
size_t count, loff_t *off) {
uint32_t target_tsgid, target_runlist, channel_regs_base;
struct gv100_runlist_chan* chan;
channel_ctrl_t chan_ctl;
channel_ctrl_ga100_t chan_ctl_ga100;
struct runlist_iter rl_iter;
loff_t pos = 0;
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
if (err)
return err;
if (target_tsgid > MAX_TSGID)
return -ERANGE;
// (Ab)use the PDE_DATA field for the runlist ID
target_runlist = file2gpuidx(f);
if ((err = get_runlist_iter(g, target_runlist, &rl_iter)))
return err;
// On Ampere, TSG and Channel IDs are only unique per-runlist, so we need
// to pull the per-runlist copy of Channel RAM.
if (g->chip_id >= NV_CHIP_ID_AMPERE) {
uint32_t runlist_regs_base;
runlist_channel_config_t chan_config;
if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base)))
return err;
// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
return -EIO;
channel_regs_base = (uint32_t)chan_config.bar0_offset << 4;
}
// Iterate through all TSGs
while (pos < rl_iter.len) {
bool enable = false;
if (tsgid(g, rl_iter.curr_entry) == target_tsgid)
enable = true;
// Either enable or disable all channels of each TSG, dependent on if
// they are contained within the target TSG or not.
for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
if (g->chip_id < NV_CHIP_ID_AMPERE) {
// Read, update, write for PCCSR
if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1)
return -EIO;
if (enable)
chan_ctl.enable_set = true;
else
chan_ctl.enable_clear = true;
nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw);
} else {
// Writing a 0 does nothing on Ampere+, so we can just write
chan_ctl_ga100.raw = 0;
chan_ctl_ga100.is_write_one_clears_bits = !enable;
chan_ctl_ga100.enable = true;
nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw);
}
}
pos += 1 + tsg_length(g, rl_iter.curr_entry);
rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);
// TODO: Fix the above for bare channels. Add "for_chan_until_tsg"?
}
// Resubmit the runlist to ensure that changes to channel enablement are
// picked up on Turing+ GPUs (channel enablements may not be otherwise).
if (g->chip_id >= NV_CHIP_ID_TURING)
if ((err = resubmit_runlist(g, target_runlist)))
return err;
// Trigger a runlist-level preempt to stop whatever was running, triggering
// the runlist scheduler to select and run the next-enabled channel.
if ((err = preempt_runlist(g, target_runlist)))
return err;
return count;
}
struct file_operations switch_to_tsg_file_ops = {
.write = switch_to_tsg_file_write,
.llseek = default_llseek,
};