aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c106
1 files changed, 14 insertions, 92 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 78860e6..ed82e58 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -1,4 +1,4 @@
1/* Copyright 2021 Joshua Bakita 1/* Copyright 2024 Joshua Bakita
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 */ 3 */
4 4
@@ -27,6 +27,7 @@ extern struct file_operations disable_channel_file_ops;
27extern struct file_operations enable_channel_file_ops; 27extern struct file_operations enable_channel_file_ops;
28extern struct file_operations switch_to_tsg_file_ops; 28extern struct file_operations switch_to_tsg_file_ops;
29extern struct file_operations device_info_file_ops; 29extern struct file_operations device_info_file_ops;
30extern struct file_operations copy_topology_file_ops;
30extern struct file_operations nvdebug_read_reg32_file_ops; 31extern struct file_operations nvdebug_read_reg32_file_ops;
31extern struct file_operations nvdebug_read_reg_range_file_ops; 32extern struct file_operations nvdebug_read_reg_range_file_ops;
32 33
@@ -204,7 +205,7 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
204int __init nvdebug_init(void) { 205int __init nvdebug_init(void) {
205 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, 206 struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry,
206 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, 207 *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry,
207 *num_gpcs_entry, *lce_for_pce_entry, *grce_for_pce_entry; 208 *num_gpcs_entry;
208 int rl_create_err, tpc_masks_create_err; 209 int rl_create_err, tpc_masks_create_err;
209 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state 210 // Check that an NVIDIA GPU is present and initialize g_nvdebug_state
210 int res = probe_and_cache_device(); 211 int res = probe_and_cache_device();
@@ -260,97 +261,18 @@ int __init nvdebug_init(void) {
260 num_gpcs_entry = proc_create_data( 261 num_gpcs_entry = proc_create_data(
261 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 262 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
262 (void*)NV_FUSE_GPC); 263 (void*)NV_FUSE_GPC);
263 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ 264 // Create files exposing LCE and PCE configuration (Pascal+)
264 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ 265 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) {
265 // Used for reading a subset of a register on pascal 266 // Create file `/proc/gpu#/copy_topology`, world readable
266 union reg_range pascal_reg; 267 if (!proc_create_data(
267 // Create a pce mask for iteration 268 "copy_topology", 0444, dir, compat_ops(&copy_topology_file_ops),
268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); 269 (void*)0))
269 char file_name[21]; 270 goto out_nomem;
270 int pce_id = 0;
271 int pce_num = 0;
272 int i;
273 for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) {
274 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing
275 if ((1 << pce_id) & ce_pce_map) {
276 snprintf(file_name, 20, "lce_for_pce%d", pce_num);
277 // Depending on GPU architecture, fetch data for the LCE of particular PCE
278 switch (g_nvdebug_state[res].chip_id & 0xff0) {
279 case NV_CHIP_ID_PASCAL:
280 // On Pascal, two PCE configurations are packed per-byte.
281 // Work around this by leveraging that we only run on 64-bit
282 // platforms (can assume that a void* is 64-bits), and that
283 // GPU register offsets are only 32-bits. Use the other 32
284 // bits to store which bits to print.
285 pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0);
286 pascal_reg.start_bit = pce_id * 4;
287 pascal_reg.stop_bit = pce_id * 4 + 4;
288 lce_for_pce_entry = proc_create_data(
289 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
290 (void*)pascal_reg.raw);
291 break;
292 case NV_CHIP_ID_VOLTA:
293 case NV_CHIP_ID_VOLTA_INTEGRATED:
294 case NV_CHIP_ID_TURING:
295 lce_for_pce_entry = proc_create_data(
296 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
297 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id));
298 break;
299 case NV_CHIP_ID_AMPERE:
300 case NV_CHIP_ID_HOPPER:
301 case NV_CHIP_ID_ADA:
302 lce_for_pce_entry = proc_create_data(
303 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
304 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id));
305 break;
306 }
307 if (!lce_for_pce_entry)
308 return -ENOMEM;
309 pce_num++;
310 }
311 }
312 // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1)
313 for (i = 0; i < 2; i++) {
314 union reg_range grce_reg = {0};
315 snprintf(file_name, 21, "shared_lce_for_grce%d", i);
316 // The offset used here is only documented for Turing
317 // Actually, Pascal through Turing
318 // On Pascal, it's only 3 bits, every 8 bits
319 // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits
320 // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing
321 switch (g_nvdebug_state[res].chip_id & 0xff0) {
322 case NV_CHIP_ID_PASCAL:
323 grce_reg.offset = NV_GRCE_FOR_CE_GP100(0);
324 grce_reg.start_bit = i * 8;
325 grce_reg.stop_bit = grce_reg.start_bit + 3;
326 break;
327 case NV_CHIP_ID_VOLTA:
328 case NV_CHIP_ID_VOLTA_INTEGRATED:
329 case NV_CHIP_ID_TURING:
330 grce_reg.offset = NV_GRCE_FOR_CE_GP100(i);
331 grce_reg.start_bit = 0;
332 grce_reg.stop_bit = grce_reg.start_bit + 4;
333 break;
334 case NV_CHIP_ID_AMPERE:
335 case NV_CHIP_ID_HOPPER:
336 case NV_CHIP_ID_ADA:
337 grce_reg.offset = NV_GRCE_FOR_CE_GA100(i);
338 grce_reg.start_bit = 0;
339 grce_reg.stop_bit = grce_reg.start_bit + 4;
340 break;
341 }
342 grce_for_pce_entry = proc_create_data(
343 file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops),
344 (void*)grce_reg.raw);
345 if (!grce_for_pce_entry)
346 return -ENOMEM;
347 }
348
349 // TODO: Redo to num_pces
350 // Create file `/proc/gpu#/pce_map`, world readable 271 // Create file `/proc/gpu#/pce_map`, world readable
351 num_gpcs_entry = proc_create_data( 272 if (!proc_create_data(
352 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 273 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
353 (void*)NV_CE_PCE_MAP); 274 (void*)NV_CE_PCE_MAP))
275 goto out_nomem;
354 } 276 }
355 // ProcFS entry creation only fails if out of memory 277 // ProcFS entry creation only fails if out of memory
356 if (rl_create_err || tpc_masks_create_err || !preempt_entry || 278 if (rl_create_err || tpc_masks_create_err || !preempt_entry ||