aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c111
1 files changed, 71 insertions, 40 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 3815e06..78860e6 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -28,7 +28,8 @@ extern struct file_operations enable_channel_file_ops;
28extern struct file_operations switch_to_tsg_file_ops; 28extern struct file_operations switch_to_tsg_file_ops;
29extern struct file_operations device_info_file_ops; 29extern struct file_operations device_info_file_ops;
30extern struct file_operations nvdebug_read_reg32_file_ops; 30extern struct file_operations nvdebug_read_reg32_file_ops;
31extern struct file_operations nvdebug_read4_pascal_file_ops; 31extern struct file_operations nvdebug_read_reg_range_file_ops;
32
32// Bus types are global symbols in the kernel 33// Bus types are global symbols in the kernel
33extern struct bus_type platform_bus_type; 34extern struct bus_type platform_bus_type;
34struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 35struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
@@ -261,62 +262,92 @@ int __init nvdebug_init(void) {
261 (void*)NV_FUSE_GPC); 262 (void*)NV_FUSE_GPC);
262 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ 263 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+
263 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ 264 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){
264 // Declare struct for storing pce index and offset 265 // Used for reading a subset of a register on pascal
265 struct combo local_combo; 266 union reg_range pascal_reg;
266 struct combo* local_combo_ptr = &local_combo;
267 // Create a pce mask for iteration 267 // Create a pce mask for iteration
268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); 268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP);
269 char file_name[20]; 269 char file_name[21];
270 int pce_id = 0; 270 int pce_id = 0;
271 int pce_num = 0;
271 int i; 272 int i;
272 for (i = 0; i < MAP_SIZE; i++){ 273 for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) {
273 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing 274 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing
274 if ((1 << i) & ce_pce_map){ 275 if ((1 << pce_id) & ce_pce_map) {
275 snprintf(file_name, 20, "lce_for_pce%d",pce_id); 276 snprintf(file_name, 20, "lce_for_pce%d", pce_num);
276 // Depending on GPU architecture, fetch data for the LCE of particular PCE 277 // Depending on GPU architecture, fetch data for the LCE of particular PCE
277 switch (g_nvdebug_state[res].chip_id & 0xff0){ 278 switch (g_nvdebug_state[res].chip_id & 0xff0) {
278
279 case NV_CHIP_ID_PASCAL: 279 case NV_CHIP_ID_PASCAL:
280 local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id); 280 // On Pascal, two PCE configurations are packed per-byte.
281 local_combo.index = pce_id; 281 // Work around this by leveraging that we only run on 64-bit
282 // platforms (can assume that a void* is 64-bits), and that
283 // GPU register offsets are only 32-bits. Use the other 32
284 // bits to store which bits to print.
285 pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0);
286 pascal_reg.start_bit = pce_id * 4;
287 pascal_reg.stop_bit = pce_id * 4 + 4;
282 lce_for_pce_entry = proc_create_data( 288 lce_for_pce_entry = proc_create_data(
283 file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), 289 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
284 *(void**)local_combo_ptr); 290 (void*)pascal_reg.raw);
285 break; 291 break;
286 case NV_CHIP_ID_VOLTA: 292 case NV_CHIP_ID_VOLTA:
287 lce_for_pce_entry = proc_create_data( 293 case NV_CHIP_ID_VOLTA_INTEGRATED:
288 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 294 case NV_CHIP_ID_TURING:
289 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); 295 lce_for_pce_entry = proc_create_data(
296 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
297 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id));
290 break; 298 break;
291 case NV_CHIP_ID_AMPERE: 299 case NV_CHIP_ID_AMPERE:
300 case NV_CHIP_ID_HOPPER:
301 case NV_CHIP_ID_ADA:
292 lce_for_pce_entry = proc_create_data( 302 lce_for_pce_entry = proc_create_data(
293 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 303 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
294 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); 304 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id));
295 break; 305 break;
296 case NV_CHIP_ID_TURING:
297 lce_for_pce_entry = proc_create_data(
298 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
299 (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id));
300 break;
301
302 }
303 // Make 2 files for 2 GRCEs
304 if (pce_id < NV_GRCE_NUM){
305 local_combo.offset = NV_GRCE_FOR_CE(pce_id);
306 local_combo.index = 0;
307 snprintf(file_name, 20, "pce_for_grce%d",pce_id);
308 grce_for_pce_entry = proc_create_data(
309 file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops),
310 *(void**)local_combo_ptr);
311 } 306 }
312 if (!lce_for_pce_entry || !grce_for_pce_entry) 307 if (!lce_for_pce_entry)
313 return -ENOMEM; 308 return -ENOMEM;
314 pce_id++; 309 pce_num++;
315 310 }
316 } 311 }
317 } 312 // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1)
313 for (i = 0; i < 2; i++) {
314 union reg_range grce_reg = {0};
315 snprintf(file_name, 21, "shared_lce_for_grce%d", i);
316 // The offset used here is only documented for Turing
317 // Actually, Pascal through Turing
318 // On Pascal, it's only 3 bits, every 8 bits
319 // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits
320 // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing
321 switch (g_nvdebug_state[res].chip_id & 0xff0) {
322 case NV_CHIP_ID_PASCAL:
323 grce_reg.offset = NV_GRCE_FOR_CE_GP100(0);
324 grce_reg.start_bit = i * 8;
325 grce_reg.stop_bit = grce_reg.start_bit + 3;
326 break;
327 case NV_CHIP_ID_VOLTA:
328 case NV_CHIP_ID_VOLTA_INTEGRATED:
329 case NV_CHIP_ID_TURING:
330 grce_reg.offset = NV_GRCE_FOR_CE_GP100(i);
331 grce_reg.start_bit = 0;
332 grce_reg.stop_bit = grce_reg.start_bit + 4;
333 break;
334 case NV_CHIP_ID_AMPERE:
335 case NV_CHIP_ID_HOPPER:
336 case NV_CHIP_ID_ADA:
337 grce_reg.offset = NV_GRCE_FOR_CE_GA100(i);
338 grce_reg.start_bit = 0;
339 grce_reg.stop_bit = grce_reg.start_bit + 4;
340 break;
341 }
342 grce_for_pce_entry = proc_create_data(
343 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
344 (void*)grce_reg.raw);
345 if (!grce_for_pce_entry)
346 return -ENOMEM;
347 }
318 348
319 // TODO: Redo to num_pces 349 // TODO: Redo to num_pces
350 // Create file `/proc/gpu#/pce_map`, world readable
320 num_gpcs_entry = proc_create_data( 351 num_gpcs_entry = proc_create_data(
321 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 352 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
322 (void*)NV_CE_PCE_MAP); 353 (void*)NV_CE_PCE_MAP);