// Copyright 2024 Joshua Bakita
#include "nvdebug_linux.h"
// Maximum number of LCEs that we will print
#define MAX_LCES 32
/* Which Logical Copy Engine (LCE) maps to a given Physical Copy Engine (PCE)?
@param pce_id PCE index
@return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
*/
int get_lce_for_pce(struct nvdebug_state *g, uint8_t pce_id) {
int res;
// LCEs only exist on Pascal+
if (g->chip_id < NV_CHIP_ID_PASCAL)
return -EOPNOTSUPP;
if (g->chip_id < NV_CHIP_ID_VOLTA) {
uint32_t config = nvdebug_readl(g, NV_LCE_FOR_PCE_GP100);
if (config == -1)
return -EIO;
// On Pascal, two PCE configurations are packed per-byte.
res = (config >> (pce_id * 4)) & 0xf;
// 0x7 is the flag value for unconfigured on Pascal
if (res == 0x7)
return -ENODEV;
} else if (g->chip_id < NV_CHIP_ID_AMPERE) {
res = nvdebug_readl(g, NV_LCE_FOR_PCE_GV100(pce_id));
// On the Titan V (GV100), bogus 0xbadf3000 observed if the GPU has yet to be
// used since reset
if (res == -1 || res == 0xbadf3000)
return -EIO;
} else {
// Works through at least Ada
res = nvdebug_readl(g, NV_LCE_FOR_PCE_GA100(pce_id));
if (res == -1)
return -EIO;
}
// At least on Volta through Ampere, 0xf is a flag value for unconfigured.
if (res == 0xf)
return -ENODEV;
return res;
}
/* Which LCE does this GRaphics Copy Engine (GRCE) map to?
@param grce_id GRCE index
@return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
*/
int get_shared_lce_for_grce(struct nvdebug_state *g, uint8_t grce_id) {
int res;
uint32_t config;
// LCEs only exist on Pascal+
if (g->chip_id < NV_CHIP_ID_PASCAL)
return -EOPNOTSUPP;
if (g->chip_id < NV_CHIP_ID_VOLTA) {
if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(0))) == -1)
return -EIO;
// One config per byte; bit 4 flags if shared
if (((config >> (grce_id * 8)) & 0x8) == 0)
return -ENODEV;
// lower 3 bits contain the mapping
res = (config >> (grce_id * 8)) & 0x7;
} else if (g->chip_id < NV_CHIP_ID_AMPERE) {
if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(grce_id))) == -1)
return -EIO;
// Only the lower 4 bits contain the mapping
res = config & 0xf;
if (res == 0xf)
return -ENODEV;
} else {
// Works through at least Ada
if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GA100(grce_id))) == -1)
return -EIO;
// Only the lower 4 bits contain the mapping
res = config & 0xf;
if (res == 0xf)
return -ENODEV;
}
return res;
}
typedef struct {
enum {INVALID_CE, SHARED_LCE, PCE} type;
uint8_t ce;
} lce2pce_entry_t;
/* Which PCE/LCE is each LCE mapped to?
@param lce2pce Array of lce2pce_entry_t to store mappings in
@param lce2pce_len Number of array entries; at least 16 recommended
@return -errno on error, 0 on success.
*/
int get_pces_for_lces(struct nvdebug_state *g, lce2pce_entry_t *lce2pce, int lce2pce_len) {
uint32_t pce_id, grce_id, ce_pce_map;
memset(lce2pce, INVALID_CE, lce2pce_len * sizeof(lce2pce_entry_t));
if ((ce_pce_map = nvdebug_readl(g, NV_CE_PCE_MAP)) == -1)
return -EIO;
// Pull configuration for LCEs which directly map to a PCE
for (pce_id = 0; pce_id < NV_CE_PCE_MAP_SIZE; pce_id++) {
int lce;
// Skip reading configuration if PCE is disabled
if (((1 << pce_id) & ce_pce_map) == 0)
continue;
lce = get_lce_for_pce(g, pce_id);
if (lce == -ENODEV)
continue;
if (lce < 0)
return lce;
if (lce > lce2pce_len)
return -ERANGE;
lce2pce[lce].type = PCE;
lce2pce[lce].ce = pce_id;
}
// Pull configuration for LCEs which share a PCE with another LCE
// GRCE0 is synonymous with LCE0 (GRCE1 and LCE1 likewise)
// Only aware of up to two GRCEs per GPU
for (grce_id = 0; grce_id < NV_GRCE_MAX; grce_id++) {
int shared_lce;
// GRCEs with a PCE already associated do not share with an LCE
if (lce2pce[grce_id].type != INVALID_CE)
continue;
shared_lce = get_shared_lce_for_grce(g, grce_id);
// Each GRCE should be associated with a PCE or shared LCE
if (shared_lce == -ENODEV) {
printk(KERN_WARNING "[nvdebug] GRCE%d unconfigured.\n", grce_id);
continue;
}
if (shared_lce < 0)
return shared_lce;
lce2pce[grce_id].type = SHARED_LCE;
lce2pce[grce_id].ce = shared_lce;
}
return 0;
}
typedef struct {
int idx; // Index of LCE to print
lce2pce_entry_t lce2pce[MAX_LCES]; // MAX_LCES-length table from get_pces_for_lces()
} copy_topology_iter_t;
// The *_seq_* functions in this file follow the patterns in
// device_info_procfs.c. See there for comments on implementation.
static void *copy_topology_file_seq_start(struct seq_file *s, loff_t *pos) {
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
static copy_topology_iter_t iter;
int err;
if (*pos == 0)
iter.idx = 0;
if ((err = get_pces_for_lces(g, iter.lce2pce, MAX_LCES)) < 0)
return ERR_PTR(err);
if (iter.idx >= MAX_LCES)
return NULL;
return &iter;
}
static void* copy_topology_file_seq_next(struct seq_file *s, void *iter_raw,
loff_t *pos) {
copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
(*pos)++; // Required by seq interface
if (++iter->idx >= MAX_LCES)
return NULL;
return iter;
}
static int copy_topology_file_seq_show(struct seq_file *s, void *iter_raw) {
copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
lce2pce_entry_t entry = iter->lce2pce[iter->idx];
if (entry.type == INVALID_CE)
return 0;
// First half: The LCE/GRCE in question
if (iter->idx >= NV_GRCE_MAX)
seq_printf(s, "LCE%02d -> ", iter->idx);
else
seq_printf(s, "GRCE%d -> ", iter->idx);
// Second half: The PCE/LCE/GRCE that the LCE/GRCE in question is mapped to
if (entry.type == PCE)
seq_printf(s, "PCE%02d\n", entry.ce);
else if (entry.ce >= NV_GRCE_MAX) // Shared LCE
seq_printf(s, "LCE%02d\n", entry.ce);
else // Shared GRCE
seq_printf(s, "GRCE%d\n", entry.ce);
return 0;
}
static void copy_topology_file_seq_stop(struct seq_file *s, void *lce2pce) {
// No cleanup needed
}
static const struct seq_operations copy_topology_file_seq_ops = {
.start = copy_topology_file_seq_start,
.next = copy_topology_file_seq_next,
.show = copy_topology_file_seq_show,
.stop = copy_topology_file_seq_stop,
};
static int copy_topology_file_open(struct inode *inode, struct file *f) {
return seq_open(f, ©_topology_file_seq_ops);
}
struct file_operations copy_topology_file_ops = {
.open = copy_topology_file_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};