aboutsummaryrefslogblamecommitdiffstats
path: root/copy_topology_procfs.c
blob: cfedce785b3490f0231aba45876b501a3eca1c2e (plain) (tree)













































































































































































































                                                                                             
// Copyright 2024 Joshua Bakita

#include "nvdebug.h"

// Maximum number of LCEs that we will print
#define MAX_LCES 32

/* Which Logical Copy Engine (LCE) maps to a given Physical Copy Engine (PCE)?
  @param pce_id PCE index
  @return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
*/
int get_lce_for_pce(struct nvdebug_state *g, uint8_t pce_id) {
	int res;
	// LCEs only exist on Pascal+
	if (g->chip_id < NV_CHIP_ID_PASCAL)
		return -EOPNOTSUPP;

	if (g->chip_id < NV_CHIP_ID_VOLTA) {
		uint32_t config = nvdebug_readl(g, NV_LCE_FOR_PCE_GP100);
		if (config == -1)
			return -EIO;
		// On Pascal, two PCE configurations are packed per-byte.
		res = (config >> (pce_id * 4)) & 0xf;
		// 0x7 is the flag value for unconfigured on Pascal
		if (res == 0x7)
			return -ENODEV;
	} else if (g->chip_id < NV_CHIP_ID_AMPERE) {
		res = nvdebug_readl(g, NV_LCE_FOR_PCE_GV100(pce_id));
		// On the Titan V (GV100), bogus 0xbadf3000 observed if the GPU has yet to be
		// used since reset
		if (res == -1 || res == 0xbadf3000)
			return -EIO;
	} else {
		// Works through at least Ada
		res = nvdebug_readl(g, NV_LCE_FOR_PCE_GA100(pce_id));
		if (res == -1)
			return -EIO;
	}
	// At least on Volta through Ampere, 0xf is a flag value for unconfigured.
	if (res == 0xf)
		return -ENODEV;
	return res;
}

/* Which LCE does this GRaphics Copy Engine (GRCE) map to?
  @param grce_id GRCE index
  @return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
*/
int get_shared_lce_for_grce(struct nvdebug_state *g, uint8_t grce_id) {
	int res;
	uint32_t config;
	// LCEs only exist on Pascal+
	if (g->chip_id < NV_CHIP_ID_PASCAL)
		return -EOPNOTSUPP;

	if (g->chip_id < NV_CHIP_ID_VOLTA) {
		if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(0))) == -1)
			return -EIO;
		// One config per byte; bit 4 flags if shared
		if (((config >> (grce_id * 8)) & 0x8) == 0)
			return -ENODEV;
		// lower 3 bits contain the mapping
		res = (config >> (grce_id * 8)) & 0x7;
	} else if (g->chip_id < NV_CHIP_ID_AMPERE) {
		if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(grce_id))) == -1)
			return -EIO;
		// Only the lower 4 bits contain the mapping
		res = config & 0xf;
		if (res == 0xf)
			return -ENODEV;
	} else {
		// Works through at least Ada
		if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GA100(grce_id))) == -1)
			return -EIO;
		// Only the lower 4 bits contain the mapping
		res = config & 0xf;
		if (res == 0xf)
			return -ENODEV;
	}
	return res;
}

typedef struct {
	enum {INVALID_CE, SHARED_LCE, PCE} type;
	uint8_t ce;
} lce2pce_entry_t;

/* Which PCE/LCE is each LCE mapped to?
  @param lce2pce     Array of lce2pce_entry_t to store mappings in
  @param lce2pce_len Number of array entries; at least 16 recommended
  @return -errno on error, 0 on success.
*/
int get_pces_for_lces(struct nvdebug_state *g, lce2pce_entry_t *lce2pce, int lce2pce_len) {
	uint32_t pce_id, grce_id, ce_pce_map;
	memset(lce2pce, INVALID_CE, lce2pce_len * sizeof(lce2pce_entry_t));

	if ((ce_pce_map = nvdebug_readl(g, NV_CE_PCE_MAP)) == -1)
		return -EIO;
	// Pull configuration for LCEs which directly map to a PCE
	for (pce_id = 0; pce_id < NV_CE_PCE_MAP_SIZE; pce_id++) {
		int lce;
		// Skip reading configuration if PCE is disabled
		if (((1 << pce_id) & ce_pce_map) == 0)
			continue;
		lce = get_lce_for_pce(g, pce_id);
		if (lce == -ENODEV)
			continue;
		if (lce < 0)
			return lce;
		if (lce > lce2pce_len)
			return -ERANGE;
		lce2pce[lce].type = PCE;
		lce2pce[lce].ce = pce_id;
	}
	// Pull configuration for LCEs which share a PCE with another LCE
	// GRCE0 is synonymous with LCE0 (GRCE1 and LCE1 likewise)
	// Only aware of up to two GRCEs per GPU
	for (grce_id = 0; grce_id < NV_GRCE_MAX; grce_id++) {
		int shared_lce;
		// GRCEs with a PCE already associated do not share with an LCE
		if (lce2pce[grce_id].type != INVALID_CE)
			continue;
		shared_lce = get_shared_lce_for_grce(g, grce_id);
		// Each GRCE should be associated with a PCE or shared LCE
		if (shared_lce == -ENODEV) {
			printk(KERN_WARNING "[nvdebug] GRCE%d unconfigured.\n", grce_id);
			continue;
		}
		if (shared_lce < 0)
			return shared_lce;
		lce2pce[grce_id].type = SHARED_LCE;
		lce2pce[grce_id].ce = shared_lce;
	}
	return 0;
}

typedef struct {
	int idx; // Index of LCE to print
	lce2pce_entry_t lce2pce[MAX_LCES]; // MAX_LCES-length table from get_pces_for_lces()
} copy_topology_iter_t;

// The *_seq_* functions in this file follow the patterns in
// device_info_procfs.c. See there for comments on implementation.
static void *copy_topology_file_seq_start(struct seq_file *s, loff_t *pos) {
	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
	static copy_topology_iter_t iter;
	int err;
	if (*pos == 0)
		iter.idx = 0;
	if ((err = get_pces_for_lces(g, iter.lce2pce, MAX_LCES)) < 0)
		return ERR_PTR(err);
	if (iter.idx >= MAX_LCES)
		return NULL;
	return &iter;
}

static void* copy_topology_file_seq_next(struct seq_file *s, void *iter_raw,
                                         loff_t *pos) {
	copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
	(*pos)++; // Required by seq interface
	if (++iter->idx >= MAX_LCES)
		return NULL;
	return iter;
}

static int copy_topology_file_seq_show(struct seq_file *s, void *iter_raw) {
	copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
	lce2pce_entry_t entry = iter->lce2pce[iter->idx];
	if (entry.type == INVALID_CE)
		return 0;
	// First half: The LCE/GRCE in question
	if (iter->idx >= NV_GRCE_MAX)
		seq_printf(s, "LCE%02d -> ", iter->idx);
	else
		seq_printf(s, "GRCE%d -> ", iter->idx);
	// Second half: The PCE/LCE/GRCE that the LCE/GRCE in question is mapped to
	if (entry.type == PCE)
		seq_printf(s, "PCE%02d\n", entry.ce);
	else if (entry.ce >= NV_GRCE_MAX) // Shared LCE
		seq_printf(s, "LCE%02d\n", entry.ce);
	else // Shared GRCE
		seq_printf(s, "GRCE%d\n", entry.ce);
	return 0;
}

static void copy_topology_file_seq_stop(struct seq_file *s, void *lce2pce) {
	// No cleanup needed
}

static const struct seq_operations copy_topology_file_seq_ops = {
	.start = copy_topology_file_seq_start,
	.next = copy_topology_file_seq_next,
	.show = copy_topology_file_seq_show,
	.stop = copy_topology_file_seq_stop,
};

static int copy_topology_file_open(struct inode *inode, struct file *f) {
	return seq_open(f, &copy_topology_file_seq_ops);
}

struct file_operations copy_topology_file_ops = {
	.open = copy_topology_file_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
};