aboutsummaryrefslogtreecommitdiffstats
path: root/copy_topology_procfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'copy_topology_procfs.c')
-rw-r--r--copy_topology_procfs.c206
1 files changed, 206 insertions, 0 deletions
diff --git a/copy_topology_procfs.c b/copy_topology_procfs.c
new file mode 100644
index 0000000..cfedce7
--- /dev/null
+++ b/copy_topology_procfs.c
@@ -0,0 +1,206 @@
1// Copyright 2024 Joshua Bakita
2
3#include "nvdebug.h"
4
5// Maximum number of LCEs that we will print
6#define MAX_LCES 32
7
8/* Which Logical Copy Engine (LCE) maps to a given Physical Copy Engine (PCE)?
9 @param pce_id PCE index
10 @return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
11*/
12int get_lce_for_pce(struct nvdebug_state *g, uint8_t pce_id) {
13 int res;
14 // LCEs only exist on Pascal+
15 if (g->chip_id < NV_CHIP_ID_PASCAL)
16 return -EOPNOTSUPP;
17
18 if (g->chip_id < NV_CHIP_ID_VOLTA) {
19 uint32_t config = nvdebug_readl(g, NV_LCE_FOR_PCE_GP100);
20 if (config == -1)
21 return -EIO;
22 // On Pascal, two PCE configurations are packed per-byte.
23 res = (config >> (pce_id * 4)) & 0xf;
24 // 0x7 is the flag value for unconfigured on Pascal
25 if (res == 0x7)
26 return -ENODEV;
27 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
28 res = nvdebug_readl(g, NV_LCE_FOR_PCE_GV100(pce_id));
29 // On the Titan V (GV100), bogus 0xbadf3000 observed if the GPU has yet to be
30 // used since reset
31 if (res == -1 || res == 0xbadf3000)
32 return -EIO;
33 } else {
34 // Works through at least Ada
35 res = nvdebug_readl(g, NV_LCE_FOR_PCE_GA100(pce_id));
36 if (res == -1)
37 return -EIO;
38 }
39 // At least on Volta through Ampere, 0xf is a flag value for unconfigured.
40 if (res == 0xf)
41 return -ENODEV;
42 return res;
43}
44
45/* Which LCE does this GRaphics Copy Engine (GRCE) map to?
46 @param grce_id GRCE index
47 @return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
48*/
49int get_shared_lce_for_grce(struct nvdebug_state *g, uint8_t grce_id) {
50 int res;
51 uint32_t config;
52 // LCEs only exist on Pascal+
53 if (g->chip_id < NV_CHIP_ID_PASCAL)
54 return -EOPNOTSUPP;
55
56 if (g->chip_id < NV_CHIP_ID_VOLTA) {
57 if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(0))) == -1)
58 return -EIO;
59 // One config per byte; bit 4 flags if shared
60 if (((config >> (grce_id * 8)) & 0x8) == 0)
61 return -ENODEV;
62 // lower 3 bits contain the mapping
63 res = (config >> (grce_id * 8)) & 0x7;
64 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
65 if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(grce_id))) == -1)
66 return -EIO;
67 // Only the lower 4 bits contain the mapping
68 res = config & 0xf;
69 if (res == 0xf)
70 return -ENODEV;
71 } else {
72 // Works through at least Ada
73 if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GA100(grce_id))) == -1)
74 return -EIO;
75 // Only the lower 4 bits contain the mapping
76 res = config & 0xf;
77 if (res == 0xf)
78 return -ENODEV;
79 }
80 return res;
81}
82
83typedef struct {
84 enum {INVALID_CE, SHARED_LCE, PCE} type;
85 uint8_t ce;
86} lce2pce_entry_t;
87
88/* Which PCE/LCE is each LCE mapped to?
89 @param lce2pce Array of lce2pce_entry_t to store mappings in
90 @param lce2pce_len Number of array entries; at least 16 recommended
91 @return -errno on error, 0 on success.
92*/
93int get_pces_for_lces(struct nvdebug_state *g, lce2pce_entry_t *lce2pce, int lce2pce_len) {
94 uint32_t pce_id, grce_id, ce_pce_map;
95 memset(lce2pce, INVALID_CE, lce2pce_len * sizeof(lce2pce_entry_t));
96
97 if ((ce_pce_map = nvdebug_readl(g, NV_CE_PCE_MAP)) == -1)
98 return -EIO;
99 // Pull configuration for LCEs which directly map to a PCE
100 for (pce_id = 0; pce_id < NV_CE_PCE_MAP_SIZE; pce_id++) {
101 int lce;
102 // Skip reading configuration if PCE is disabled
103 if (((1 << pce_id) & ce_pce_map) == 0)
104 continue;
105 lce = get_lce_for_pce(g, pce_id);
106 if (lce == -ENODEV)
107 continue;
108 if (lce < 0)
109 return lce;
110 if (lce > lce2pce_len)
111 return -ERANGE;
112 lce2pce[lce].type = PCE;
113 lce2pce[lce].ce = pce_id;
114 }
115 // Pull configuration for LCEs which share a PCE with another LCE
116 // GRCE0 is synonymous with LCE0 (GRCE1 and LCE1 likewise)
117 // Only aware of up to two GRCEs per GPU
118 for (grce_id = 0; grce_id < NV_GRCE_MAX; grce_id++) {
119 int shared_lce;
120 // GRCEs with a PCE already associated do not share with an LCE
121 if (lce2pce[grce_id].type != INVALID_CE)
122 continue;
123 shared_lce = get_shared_lce_for_grce(g, grce_id);
124 // Each GRCE should be associated with a PCE or shared LCE
125 if (shared_lce == -ENODEV) {
126 printk(KERN_WARNING "[nvdebug] GRCE%d unconfigured.\n", grce_id);
127 continue;
128 }
129 if (shared_lce < 0)
130 return shared_lce;
131 lce2pce[grce_id].type = SHARED_LCE;
132 lce2pce[grce_id].ce = shared_lce;
133 }
134 return 0;
135}
136
137typedef struct {
138 int idx; // Index of LCE to print
139 lce2pce_entry_t lce2pce[MAX_LCES]; // MAX_LCES-length table from get_pces_for_lces()
140} copy_topology_iter_t;
141
142// The *_seq_* functions in this file follow the patterns in
143// device_info_procfs.c. See there for comments on implementation.
144static void *copy_topology_file_seq_start(struct seq_file *s, loff_t *pos) {
145 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
146 static copy_topology_iter_t iter;
147 int err;
148 if (*pos == 0)
149 iter.idx = 0;
150 if ((err = get_pces_for_lces(g, iter.lce2pce, MAX_LCES)) < 0)
151 return ERR_PTR(err);
152 if (iter.idx >= MAX_LCES)
153 return NULL;
154 return &iter;
155}
156
157static void* copy_topology_file_seq_next(struct seq_file *s, void *iter_raw,
158 loff_t *pos) {
159 copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
160 (*pos)++; // Required by seq interface
161 if (++iter->idx >= MAX_LCES)
162 return NULL;
163 return iter;
164}
165
166static int copy_topology_file_seq_show(struct seq_file *s, void *iter_raw) {
167 copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
168 lce2pce_entry_t entry = iter->lce2pce[iter->idx];
169 if (entry.type == INVALID_CE)
170 return 0;
171 // First half: The LCE/GRCE in question
172 if (iter->idx >= NV_GRCE_MAX)
173 seq_printf(s, "LCE%02d -> ", iter->idx);
174 else
175 seq_printf(s, "GRCE%d -> ", iter->idx);
176 // Second half: The PCE/LCE/GRCE that the LCE/GRCE in question is mapped to
177 if (entry.type == PCE)
178 seq_printf(s, "PCE%02d\n", entry.ce);
179 else if (entry.ce >= NV_GRCE_MAX) // Shared LCE
180 seq_printf(s, "LCE%02d\n", entry.ce);
181 else // Shared GRCE
182 seq_printf(s, "GRCE%d\n", entry.ce);
183 return 0;
184}
185
186static void copy_topology_file_seq_stop(struct seq_file *s, void *lce2pce) {
187 // No cleanup needed
188}
189
190static const struct seq_operations copy_topology_file_seq_ops = {
191 .start = copy_topology_file_seq_start,
192 .next = copy_topology_file_seq_next,
193 .show = copy_topology_file_seq_show,
194 .stop = copy_topology_file_seq_stop,
195};
196
197static int copy_topology_file_open(struct inode *inode, struct file *f) {
198 return seq_open(f, &copy_topology_file_seq_ops);
199}
200
201struct file_operations copy_topology_file_ops = {
202 .open = copy_topology_file_open,
203 .read = seq_read,
204 .llseek = seq_lseek,
205 .release = seq_release,
206};