summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-04-08 13:33:28 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2024-04-08 14:02:07 -0400
commitac60151ea0a4a1f3882fde3c486af870029b7977 (patch)
tree4a32ead49a987030cb4222c687b23bc1f23d4d34
parent3aab3c220f3f0bcc3d3d58d0daf6fd6acf1819e2 (diff)
Rework LCE<->PCE and GRCE->LCE configuration printing APIarchive/saman63-wip
Rather than up to dozens of individual files exposing part of each copy engine's configuration, have one file which exposes a unified view of the full topology. Example new output on RTX 2080 Ti: $ cat /proc/gpu0/copy_topology GRCE0 -> LCE04 GRCE1 -> LCE03 LCE02 -> PCE02 LCE03 -> PCE03 LCE04 -> PCE01 Old output: $ tail -n 1 /proc/gpu0/lce_for_pce* ==> /proc/gpu0/lce_for_pce0 <== 0xf ==> /proc/gpu0/lce_for_pce1 <== 0x4 ==> /proc/gpu0/lce_for_pce2 <== 0x2 ==> /proc/gpu0/lce_for_pce3 <== 0x3 $ tail -n 1 /proc/gpu1/shared_lce_for_grce* ==> /proc/gpu0/shared_lce_for_grce0 <== 0x4 ==> /proc/gpu0/shared_lce_for_grce1 <== 0x3 Specifically: - Add `copy_topology` API - Remove `shared_lce_for_grce#` and `lce_for_pce#` APIs - Move logic from `nvdebug_entry.c` to `copy_topology_procfs.c` - Do not print PCE or Shared LCE configuration if flagged absent - Refer to LCE0 and LCE1 as GRCE0 and GRCE1 - Print by LCE ID, which is move helpful when attempting to trace how a given copy runlist maps to a physical copy engine. - Document two errata with CE registers Tested working on Pascal Integrated, Pascal, Volta Integrated Volta, Turing, and Ampere Integrated on Linux 4.9 through 5.10.
-rw-r--r--Makefile3
-rw-r--r--copy_topology_procfs.c206
-rw-r--r--nvdebug.h18
-rw-r--r--nvdebug_entry.c106
4 files changed, 233 insertions, 100 deletions
diff --git a/Makefile b/Makefile
index 2dc90c7..2e588fa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
1obj-m += nvdebug.o 1obj-m += nvdebug.o
2nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o nvdebug_entry.o 2nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \
3 nvdebug_entry.o copy_topology_procfs.o
3KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" 4KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
4# -mfentry above if not building due to mcount missing 5# -mfentry above if not building due to mcount missing
5 6
diff --git a/copy_topology_procfs.c b/copy_topology_procfs.c
new file mode 100644
index 0000000..cfedce7
--- /dev/null
+++ b/copy_topology_procfs.c
@@ -0,0 +1,206 @@
1// Copyright 2024 Joshua Bakita
2
3#include "nvdebug.h"
4
5// Maximum number of LCEs that we will print
6#define MAX_LCES 32
7
8/* Which Logical Copy Engine (LCE) maps to a given Physical Copy Engine (PCE)?
9 @param pce_id PCE index
10 @return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
11*/
12int get_lce_for_pce(struct nvdebug_state *g, uint8_t pce_id) {
13 int res;
14 // LCEs only exist on Pascal+
15 if (g->chip_id < NV_CHIP_ID_PASCAL)
16 return -EOPNOTSUPP;
17
18 if (g->chip_id < NV_CHIP_ID_VOLTA) {
19 uint32_t config = nvdebug_readl(g, NV_LCE_FOR_PCE_GP100);
20 if (config == -1)
21 return -EIO;
22 // On Pascal, two PCE configurations are packed per-byte.
23 res = (config >> (pce_id * 4)) & 0xf;
24 // 0x7 is the flag value for unconfigured on Pascal
25 if (res == 0x7)
26 return -ENODEV;
27 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
28 res = nvdebug_readl(g, NV_LCE_FOR_PCE_GV100(pce_id));
29 // On the Titan V (GV100), bogus 0xbadf3000 observed if the GPU has yet to be
30 // used since reset
31 if (res == -1 || res == 0xbadf3000)
32 return -EIO;
33 } else {
34 // Works through at least Ada
35 res = nvdebug_readl(g, NV_LCE_FOR_PCE_GA100(pce_id));
36 if (res == -1)
37 return -EIO;
38 }
39 // At least on Volta through Ampere, 0xf is a flag value for unconfigured.
40 if (res == 0xf)
41 return -ENODEV;
42 return res;
43}
44
45/* Which LCE does this GRaphics Copy Engine (GRCE) map to?
46 @param grce_id GRCE index
47 @return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
48*/
49int get_shared_lce_for_grce(struct nvdebug_state *g, uint8_t grce_id) {
50 int res;
51 uint32_t config;
52 // LCEs only exist on Pascal+
53 if (g->chip_id < NV_CHIP_ID_PASCAL)
54 return -EOPNOTSUPP;
55
56 if (g->chip_id < NV_CHIP_ID_VOLTA) {
57 if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(0))) == -1)
58 return -EIO;
59 // One config per byte; bit 4 flags if shared
60 if (((config >> (grce_id * 8)) & 0x8) == 0)
61 return -ENODEV;
62 // lower 3 bits contain the mapping
63 res = (config >> (grce_id * 8)) & 0x7;
64 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
65 if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(grce_id))) == -1)
66 return -EIO;
67 // Only the lower 4 bits contain the mapping
68 res = config & 0xf;
69 if (res == 0xf)
70 return -ENODEV;
71 } else {
72 // Works through at least Ada
73 if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GA100(grce_id))) == -1)
74 return -EIO;
75 // Only the lower 4 bits contain the mapping
76 res = config & 0xf;
77 if (res == 0xf)
78 return -ENODEV;
79 }
80 return res;
81}
82
83typedef struct {
84 enum {INVALID_CE, SHARED_LCE, PCE} type;
85 uint8_t ce;
86} lce2pce_entry_t;
87
88/* Which PCE/LCE is each LCE mapped to?
89 @param lce2pce Array of lce2pce_entry_t to store mappings in
90 @param lce2pce_len Number of array entries; at least 16 recommended
91 @return -errno on error, 0 on success.
92*/
93int get_pces_for_lces(struct nvdebug_state *g, lce2pce_entry_t *lce2pce, int lce2pce_len) {
94 uint32_t pce_id, grce_id, ce_pce_map;
95 memset(lce2pce, INVALID_CE, lce2pce_len * sizeof(lce2pce_entry_t));
96
97 if ((ce_pce_map = nvdebug_readl(g, NV_CE_PCE_MAP)) == -1)
98 return -EIO;
99 // Pull configuration for LCEs which directly map to a PCE
100 for (pce_id = 0; pce_id < NV_CE_PCE_MAP_SIZE; pce_id++) {
101 int lce;
102 // Skip reading configuration if PCE is disabled
103 if (((1 << pce_id) & ce_pce_map) == 0)
104 continue;
105 lce = get_lce_for_pce(g, pce_id);
106 if (lce == -ENODEV)
107 continue;
108 if (lce < 0)
109 return lce;
110 if (lce > lce2pce_len)
111 return -ERANGE;
112 lce2pce[lce].type = PCE;
113 lce2pce[lce].ce = pce_id;
114 }
115 // Pull configuration for LCEs which share a PCE with another LCE
116 // GRCE0 is synonymous with LCE0 (GRCE1 and LCE1 likewise)
117 // Only aware of up to two GRCEs per GPU
118 for (grce_id = 0; grce_id < NV_GRCE_MAX; grce_id++) {
119 int shared_lce;
120 // GRCEs with a PCE already associated do not share with an LCE
121 if (lce2pce[grce_id].type != INVALID_CE)
122 continue;
123 shared_lce = get_shared_lce_for_grce(g, grce_id);
124 // Each GRCE should be associated with a PCE or shared LCE
125 if (shared_lce == -ENODEV) {
126 printk(KERN_WARNING "[nvdebug] GRCE%d unconfigured.\n", grce_id);
127 continue;
128 }
129 if (shared_lce < 0)
130 return shared_lce;
131 lce2pce[grce_id].type = SHARED_LCE;
132 lce2pce[grce_id].ce = shared_lce;
133 }
134 return 0;
135}
136
137typedef struct {
138 int idx; // Index of LCE to print
139 lce2pce_entry_t lce2pce[MAX_LCES]; // MAX_LCES-length table from get_pces_for_lces()
140} copy_topology_iter_t;
141
142// The *_seq_* functions in this file follow the patterns in
143// device_info_procfs.c. See there for comments on implementation.
144static void *copy_topology_file_seq_start(struct seq_file *s, loff_t *pos) {
145 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
146 static copy_topology_iter_t iter;
147 int err;
148 if (*pos == 0)
149 iter.idx = 0;
150 if ((err = get_pces_for_lces(g, iter.lce2pce, MAX_LCES)) < 0)
151 return ERR_PTR(err);
152 if (iter.idx >= MAX_LCES)
153 return NULL;
154 return &iter;
155}
156
157static void* copy_topology_file_seq_next(struct seq_file *s, void *iter_raw,
158 loff_t *pos) {
159 copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
160 (*pos)++; // Required by seq interface
161 if (++iter->idx >= MAX_LCES)
162 return NULL;
163 return iter;
164}
165
166static int copy_topology_file_seq_show(struct seq_file *s, void *iter_raw) {
167 copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
168 lce2pce_entry_t entry = iter->lce2pce[iter->idx];
169 if (entry.type == INVALID_CE)
170 return 0;
171 // First half: The LCE/GRCE in question
172 if (iter->idx >= NV_GRCE_MAX)
173 seq_printf(s, "LCE%02d -> ", iter->idx);
174 else
175 seq_printf(s, "GRCE%d -> ", iter->idx);
176 // Second half: The PCE/LCE/GRCE that the LCE/GRCE in question is mapped to
177 if (entry.type == PCE)
178 seq_printf(s, "PCE%02d\n", entry.ce);
179 else if (entry.ce >= NV_GRCE_MAX) // Shared LCE
180 seq_printf(s, "LCE%02d\n", entry.ce);
181 else // Shared GRCE
182 seq_printf(s, "GRCE%d\n", entry.ce);
183 return 0;
184}
185
186static void copy_topology_file_seq_stop(struct seq_file *s, void *lce2pce) {
187 // No cleanup needed
188}
189
190static const struct seq_operations copy_topology_file_seq_ops = {
191 .start = copy_topology_file_seq_start,
192 .next = copy_topology_file_seq_next,
193 .show = copy_topology_file_seq_show,
194 .stop = copy_topology_file_seq_stop,
195};
196
197static int copy_topology_file_open(struct inode *inode, struct file *f) {
198 return seq_open(f, &copy_topology_file_seq_ops);
199}
200
201struct file_operations copy_topology_file_ops = {
202 .open = copy_topology_file_open,
203 .read = seq_read,
204 .llseek = seq_lseek,
205 .release = seq_release,
206};
diff --git a/nvdebug.h b/nvdebug.h
index a9366e0..ac254f0 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -1,4 +1,4 @@
1/* Copyright 2021 Joshua Bakita 1/* Copyright 2024 Joshua Bakita
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 * 3 *
4 * File outline: 4 * File outline:
@@ -688,17 +688,20 @@ typedef union {
688 688
689 SCAL_NUM_CES : Number of externally accessible copy engines 689 SCAL_NUM_CES : Number of externally accessible copy engines
690 690
691 Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be
692 consistent with PTOP data.
693
691 Support: Kepler through (at least) Blackwell 694 Support: Kepler through (at least) Blackwell
692 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 695 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
693*/ 696*/
694#define NV_PTOP_SCAL_NUM_CES 0x00022444 697#define NV_PTOP_SCAL_NUM_CES 0x00022444
695// Defined number of GRCEs for a GPU 698// Defined max number of GRCEs for a GPU (TX2 has only one)
696# define NV_GRCE_NUM 2 699# define NV_GRCE_MAX 2
697// Defined GRCE->CE mapping offsets from nvgpu 700// Defined GRCE->CE mapping offsets from nvgpu
698#define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) 701#define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4)
699#define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) 702#define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4)
700// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) 703// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu)
701#define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) 704#define NV_LCE_FOR_PCE_GP100 0x0010402c
702#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) 705#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4)
703#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) 706#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4)
704// Struct for use with nvdebug_reg_range_read() 707// Struct for use with nvdebug_reg_range_read()
@@ -717,13 +720,14 @@ union reg_range {
717 720
718 CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index 721 CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index
719 is enabled (not floorswept) on this GPU. Count the number of set 722 is enabled (not floorswept) on this GPU. Count the number of set
720 bits to get the number of PCEs. 723 bits to get the number of PCEs. Note that this may be bogus if
724 the GPU has not been used since reset.
721 725
722 Support: Kepler through (at least) Blackwell 726 Support: Pascal through (at least) Blackwell
723 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 727 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
724*/ 728*/
725#define NV_CE_PCE_MAP 0x00104028 729#define NV_CE_PCE_MAP 0x00104028
726#define MAP_SIZE 32 730#define NV_CE_PCE_MAP_SIZE 32
727 731
728 732
729/* Location of the 1Kb instance block with page tables for BAR1 and BAR2. 733/* Location of the 1Kb instance block with page tables for BAR1 and BAR2.
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 78860e6..ed82e58 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -1,4 +1,4 @@
1/* Copyright 2021 Joshua Bakita 1/* Copyright 2024 Joshua Bakita
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 */ 3 */
4 4
@@ -27,6 +27,7 @@ extern struct file_operations disable_channel_file_ops;
27extern struct file_operations enable_channel_file_ops; 27extern struct file_operations enable_channel_file_ops;
28extern struct file_operations switch_to_tsg_file_ops; 28extern struct file_operations switch_to_tsg_file_ops;
29extern struct file_operations device_info_file_ops; 29extern struct file_operations device_info_file_ops;
30extern struct file_operations copy_topology_file_ops;
30extern struct file_operations nvdebug_read_reg32_file_ops; 31extern struct file_operations nvdebug_read_reg32_file_ops;
31extern struct file_operations nvdebug_read_reg_range_file_ops; 32extern struct file_operations nvdebug_read_reg_range_file_ops;
32 33
@@ -204,7 +205,7 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
204int __init nvdebug_init(void) { 205int __init nvdebug_init(void) {
205 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, 206 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry,
206 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, 207 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry,
207 *num_gpcs_entry, *lce_for_pce_entry, *grce_for_pce_entry; 208 *num_gpcs_entry;
208 int rl_create_err, tpc_masks_create_err; 209 int rl_create_err, tpc_masks_create_err;
209 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state 210 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state
210 int res = probe_and_cache_device(); 211 int res = probe_and_cache_device();
@@ -260,97 +261,18 @@ int __init nvdebug_init(void) {
260 num_gpcs_entry = proc_create_data( 261 num_gpcs_entry = proc_create_data(
261 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 262 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
262 (void*)NV_FUSE_GPC); 263 (void*)NV_FUSE_GPC);
263 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ 264 // Create files exposing LCE and PCE configuration (Pascal+)
264 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ 265 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) {
265 // Used for reading a subset of a register on pascal 266 // Create file `/proc/gpu#/copy_topology`, world readable
266 union reg_range pascal_reg; 267 if (!proc_create_data(
267 // Create a pce mask for iteration 268 "copy_topology", 0444, dir, compat_ops(&copy_topology_file_ops),
268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); 269 (void*)0))
269 char file_name[21]; 270 goto out_nomem;
270 int pce_id = 0;
271 int pce_num = 0;
272 int i;
273 for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) {
274 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing
275 if ((1 << pce_id) & ce_pce_map) {
276 snprintf(file_name, 20, "lce_for_pce%d", pce_num);
277 // Depending on GPU architecture, fetch data for the LCE of particular PCE
278 switch (g_nvdebug_state[res].chip_id & 0xff0) {
279 case NV_CHIP_ID_PASCAL:
280 // On Pascal, two PCE configurations are packed per-byte.
281 // Work around this by leveraging that we only run on 64-bit
282 // platforms (can assume that a void* is 64-bits), and that
283 // GPU register offsets are only 32-bits. Use the other 32
284 // bits to store which bits to print.
285 pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0);
286 pascal_reg.start_bit = pce_id * 4;
287 pascal_reg.stop_bit = pce_id * 4 + 4;
288 lce_for_pce_entry = proc_create_data(
289 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
290 (void*)pascal_reg.raw);
291 break;
292 case NV_CHIP_ID_VOLTA:
293 case NV_CHIP_ID_VOLTA_INTEGRATED:
294 case NV_CHIP_ID_TURING:
295 lce_for_pce_entry = proc_create_data(
296 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
297 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id));
298 break;
299 case NV_CHIP_ID_AMPERE:
300 case NV_CHIP_ID_HOPPER:
301 case NV_CHIP_ID_ADA:
302 lce_for_pce_entry = proc_create_data(
303 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
304 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id));
305 break;
306 }
307 if (!lce_for_pce_entry)
308 return -ENOMEM;
309 pce_num++;
310 }
311 }
312 // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1)
313 for (i = 0; i < 2; i++) {
314 union reg_range grce_reg = {0};
315 snprintf(file_name, 21, "shared_lce_for_grce%d", i);
316 // The offset used here is only documented for Turing
317 // Actually, Pascal through Turing
318 // On Pascal, it's only 3 bits, every 8 bits
319 // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits
320 // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing
321 switch (g_nvdebug_state[res].chip_id & 0xff0) {
322 case NV_CHIP_ID_PASCAL:
323 grce_reg.offset = NV_GRCE_FOR_CE_GP100(0);
324 grce_reg.start_bit = i * 8;
325 grce_reg.stop_bit = grce_reg.start_bit + 3;
326 break;
327 case NV_CHIP_ID_VOLTA:
328 case NV_CHIP_ID_VOLTA_INTEGRATED:
329 case NV_CHIP_ID_TURING:
330 grce_reg.offset = NV_GRCE_FOR_CE_GP100(i);
331 grce_reg.start_bit = 0;
332 grce_reg.stop_bit = grce_reg.start_bit + 4;
333 break;
334 case NV_CHIP_ID_AMPERE:
335 case NV_CHIP_ID_HOPPER:
336 case NV_CHIP_ID_ADA:
337 grce_reg.offset = NV_GRCE_FOR_CE_GA100(i);
338 grce_reg.start_bit = 0;
339 grce_reg.stop_bit = grce_reg.start_bit + 4;
340 break;
341 }
342 grce_for_pce_entry = proc_create_data(
343 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
344 (void*)grce_reg.raw);
345 if (!grce_for_pce_entry)
346 return -ENOMEM;
347 }
348
349 // TODO: Redo to num_pces
350 // Create file `/proc/gpu#/pce_map`, world readable 271 // Create file `/proc/gpu#/pce_map`, world readable
351 num_gpcs_entry = proc_create_data( 272 if (!proc_create_data(
352 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 273 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
353 (void*)NV_CE_PCE_MAP); 274 (void*)NV_CE_PCE_MAP))
275 goto out_nomem;
354 } 276 }
355 // ProcFS entry creation only fails if out of memory 277 // ProcFS entry creation only fails if out of memory
356 if (rl_create_err || tpc_masks_create_err || !preempt_entry || 278 if (rl_create_err || tpc_masks_create_err || !preempt_entry ||