aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
authorJoshua J Bakita <jbakita@rtsrv.cs.unc.edu>2023-11-08 14:41:47 -0500
committerJoshua Bakita <bakitajoshua@gmail.com>2023-11-08 15:01:24 -0500
commit3aab3c220f3f0bcc3d3d58d0daf6fd6acf1819e2 (patch)
tree71a0fef6595e65d42808e1f963cdd4957c2f28e6 /nvdebug_entry.c
parentb9d8f6a83a8e5fec38e9e20a54ee13838936fa10 (diff)
Expand support for printing LCE<->PCE and GRCE->LCE configurationrtas24-ae
Tested working on Pascal, Volta, Volta Integrated, Turing, Ampere, and Ada. Also clean up minor spacing issues, an errantly added file (nvdebug.mod), and fix some inconsistencies with upstream.
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c111
1 files changed, 71 insertions, 40 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 3815e06..78860e6 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -28,7 +28,8 @@ extern struct file_operations enable_channel_file_ops;
28extern struct file_operations switch_to_tsg_file_ops; 28extern struct file_operations switch_to_tsg_file_ops;
29extern struct file_operations device_info_file_ops; 29extern struct file_operations device_info_file_ops;
30extern struct file_operations nvdebug_read_reg32_file_ops; 30extern struct file_operations nvdebug_read_reg32_file_ops;
31extern struct file_operations nvdebug_read4_pascal_file_ops; 31extern struct file_operations nvdebug_read_reg_range_file_ops;
32
32// Bus types are global symbols in the kernel 33// Bus types are global symbols in the kernel
33extern struct bus_type platform_bus_type; 34extern struct bus_type platform_bus_type;
34struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 35struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
@@ -261,62 +262,92 @@ int __init nvdebug_init(void) {
261 (void*)NV_FUSE_GPC); 262 (void*)NV_FUSE_GPC);
262 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ 263 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+
263 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ 264 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){
264 // Declare struct for storing pce index and offset 265 // Used for reading a subset of a register on pascal
265 struct combo local_combo; 266 union reg_range pascal_reg;
266 struct combo* local_combo_ptr = &local_combo;
267 // Create a pce mask for iteration 267 // Create a pce mask for iteration
268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); 268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP);
269 char file_name[20]; 269 char file_name[21];
270 int pce_id = 0; 270 int pce_id = 0;
271 int pce_num = 0;
271 int i; 272 int i;
272 for (i = 0; i < MAP_SIZE; i++){ 273 for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) {
273 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing 274 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing
274 if ((1 << i) & ce_pce_map){ 275 if ((1 << pce_id) & ce_pce_map) {
275 snprintf(file_name, 20, "lce_for_pce%d",pce_id); 276 snprintf(file_name, 20, "lce_for_pce%d", pce_num);
276 // Depending on GPU architecture, fetch data for the LCE of particular PCE 277 // Depending on GPU architecture, fetch data for the LCE of particular PCE
277 switch (g_nvdebug_state[res].chip_id & 0xff0){ 278 switch (g_nvdebug_state[res].chip_id & 0xff0) {
278
279 case NV_CHIP_ID_PASCAL: 279 case NV_CHIP_ID_PASCAL:
280 local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id); 280 // On Pascal, two PCE configurations are packed per-byte.
281 local_combo.index = pce_id; 281 // Work around this by leveraging that we only run on 64-bit
282 // platforms (can assume that a void* is 64-bits), and that
283 // GPU register offsets are only 32-bits. Use the other 32
284 // bits to store which bits to print.
285 pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0);
286 pascal_reg.start_bit = pce_id * 4;
287 pascal_reg.stop_bit = pce_id * 4 + 4;
282 lce_for_pce_entry = proc_create_data( 288 lce_for_pce_entry = proc_create_data(
283 file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), 289 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
284 *(void**)local_combo_ptr); 290 (void*)pascal_reg.raw);
285 break; 291 break;
286 case NV_CHIP_ID_VOLTA: 292 case NV_CHIP_ID_VOLTA:
287 lce_for_pce_entry = proc_create_data( 293 case NV_CHIP_ID_VOLTA_INTEGRATED:
288 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 294 case NV_CHIP_ID_TURING:
289 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); 295 lce_for_pce_entry = proc_create_data(
296 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
297 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id));
290 break; 298 break;
291 case NV_CHIP_ID_AMPERE: 299 case NV_CHIP_ID_AMPERE:
300 case NV_CHIP_ID_HOPPER:
301 case NV_CHIP_ID_ADA:
292 lce_for_pce_entry = proc_create_data( 302 lce_for_pce_entry = proc_create_data(
293 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 303 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
294 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); 304 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id));
295 break; 305 break;
296 case NV_CHIP_ID_TURING:
297 lce_for_pce_entry = proc_create_data(
298 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
299 (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id));
300 break;
301
302 }
303 // Make 2 files for 2 GRCEs
304 if (pce_id < NV_GRCE_NUM){
305 local_combo.offset = NV_GRCE_FOR_CE(pce_id);
306 local_combo.index = 0;
307 snprintf(file_name, 20, "pce_for_grce%d",pce_id);
308 grce_for_pce_entry = proc_create_data(
309 file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops),
310 *(void**)local_combo_ptr);
311 } 306 }
312 if (!lce_for_pce_entry || !grce_for_pce_entry) 307 if (!lce_for_pce_entry)
313 return -ENOMEM; 308 return -ENOMEM;
314 pce_id++; 309 pce_num++;
315 310 }
316 } 311 }
317 } 312 // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1)
313 for (i = 0; i < 2; i++) {
314 union reg_range grce_reg = {0};
315 snprintf(file_name, 21, "shared_lce_for_grce%d", i);
316 // The offset used here is only documented for Turing
317 // Actually, Pascal through Turing
318 // On Pascal, it's only 3 bits, every 8 bits
319 // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits
320 // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing
321 switch (g_nvdebug_state[res].chip_id & 0xff0) {
322 case NV_CHIP_ID_PASCAL:
323 grce_reg.offset = NV_GRCE_FOR_CE_GP100(0);
324 grce_reg.start_bit = i * 8;
325 grce_reg.stop_bit = grce_reg.start_bit + 3;
326 break;
327 case NV_CHIP_ID_VOLTA:
328 case NV_CHIP_ID_VOLTA_INTEGRATED:
329 case NV_CHIP_ID_TURING:
330 grce_reg.offset = NV_GRCE_FOR_CE_GP100(i);
331 grce_reg.start_bit = 0;
332 grce_reg.stop_bit = grce_reg.start_bit + 4;
333 break;
334 case NV_CHIP_ID_AMPERE:
335 case NV_CHIP_ID_HOPPER:
336 case NV_CHIP_ID_ADA:
337 grce_reg.offset = NV_GRCE_FOR_CE_GA100(i);
338 grce_reg.start_bit = 0;
339 grce_reg.stop_bit = grce_reg.start_bit + 4;
340 break;
341 }
342 grce_for_pce_entry = proc_create_data(
343 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
344 (void*)grce_reg.raw);
345 if (!grce_for_pce_entry)
346 return -ENOMEM;
347 }
318 348
319 // TODO: Redo to num_pces 349 // TODO: Redo to num_pces
350 // Create file `/proc/gpu#/pce_map`, world readable
320 num_gpcs_entry = proc_create_data( 351 num_gpcs_entry = proc_create_data(
321 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 352 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
322 (void*)NV_CE_PCE_MAP); 353 (void*)NV_CE_PCE_MAP);