diff options
Diffstat (limited to 'nvdebug_entry.c')
| -rw-r--r-- | nvdebug_entry.c | 106 |
1 files changed, 14 insertions, 92 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 78860e6..ed82e58 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* Copyright 2021 Joshua Bakita | 1 | /* Copyright 2024 Joshua Bakita |
| 2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
| 3 | */ | 3 | */ |
| 4 | 4 | ||
| @@ -27,6 +27,7 @@ extern struct file_operations disable_channel_file_ops; | |||
| 27 | extern struct file_operations enable_channel_file_ops; | 27 | extern struct file_operations enable_channel_file_ops; |
| 28 | extern struct file_operations switch_to_tsg_file_ops; | 28 | extern struct file_operations switch_to_tsg_file_ops; |
| 29 | extern struct file_operations device_info_file_ops; | 29 | extern struct file_operations device_info_file_ops; |
| 30 | extern struct file_operations copy_topology_file_ops; | ||
| 30 | extern struct file_operations nvdebug_read_reg32_file_ops; | 31 | extern struct file_operations nvdebug_read_reg32_file_ops; |
| 31 | extern struct file_operations nvdebug_read_reg_range_file_ops; | 32 | extern struct file_operations nvdebug_read_reg_range_file_ops; |
| 32 | 33 | ||
| @@ -204,7 +205,7 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | |||
| 204 | int __init nvdebug_init(void) { | 205 | int __init nvdebug_init(void) { |
| 205 | struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, | 206 | struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, |
| 206 | *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, | 207 | *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, |
| 207 | *num_gpcs_entry, *lce_for_pce_entry, *grce_for_pce_entry; | 208 | *num_gpcs_entry; |
| 208 | int rl_create_err, tpc_masks_create_err; | 209 | int rl_create_err, tpc_masks_create_err; |
| 209 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state | 210 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state |
| 210 | int res = probe_and_cache_device(); | 211 | int res = probe_and_cache_device(); |
| @@ -260,97 +261,18 @@ int __init nvdebug_init(void) { | |||
| 260 | num_gpcs_entry = proc_create_data( | 261 | num_gpcs_entry = proc_create_data( |
| 261 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 262 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 262 | (void*)NV_FUSE_GPC); | 263 | (void*)NV_FUSE_GPC); |
| 263 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ | 264 | // Create files exposing LCE and PCE configuration (Pascal+) |
| 264 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ | 265 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { |
| 265 | // Used for reading a subset of a register on pascal | 266 | // Create file `/proc/gpu#/copy_topology`, world readable |
| 266 | union reg_range pascal_reg; | 267 | if (!proc_create_data( |
| 267 | // Create a pce mask for iteration | 268 | "copy_topology", 0444, dir, compat_ops(©_topology_file_ops), |
| 268 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); | 269 | (void*)0)) |
| 269 | char file_name[21]; | 270 | goto out_nomem; |
| 270 | int pce_id = 0; | ||
| 271 | int pce_num = 0; | ||
| 272 | int i; | ||
| 273 | for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) { | ||
| 274 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing | ||
| 275 | if ((1 << pce_id) & ce_pce_map) { | ||
| 276 | snprintf(file_name, 20, "lce_for_pce%d", pce_num); | ||
| 277 | // Depending on GPU architecture, fetch data for the LCE of particular PCE | ||
| 278 | switch (g_nvdebug_state[res].chip_id & 0xff0) { | ||
| 279 | case NV_CHIP_ID_PASCAL: | ||
| 280 | // On Pascal, two PCE configurations are packed per-byte. | ||
| 281 | // Work around this by leveraging that we only run on 64-bit | ||
| 282 | // platforms (can assume that a void* is 64-bits), and that | ||
| 283 | // GPU register offsets are only 32-bits. Use the other 32 | ||
| 284 | // bits to store which bits to print. | ||
| 285 | pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0); | ||
| 286 | pascal_reg.start_bit = pce_id * 4; | ||
| 287 | pascal_reg.stop_bit = pce_id * 4 + 4; | ||
| 288 | lce_for_pce_entry = proc_create_data( | ||
| 289 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops), | ||
| 290 | (void*)pascal_reg.raw); | ||
| 291 | break; | ||
| 292 | case NV_CHIP_ID_VOLTA: | ||
| 293 | case NV_CHIP_ID_VOLTA_INTEGRATED: | ||
| 294 | case NV_CHIP_ID_TURING: | ||
| 295 | lce_for_pce_entry = proc_create_data( | ||
| 296 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 297 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | ||
| 298 | break; | ||
| 299 | case NV_CHIP_ID_AMPERE: | ||
| 300 | case NV_CHIP_ID_HOPPER: | ||
| 301 | case NV_CHIP_ID_ADA: | ||
| 302 | lce_for_pce_entry = proc_create_data( | ||
| 303 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 304 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); | ||
| 305 | break; | ||
| 306 | } | ||
| 307 | if (!lce_for_pce_entry) | ||
| 308 | return -ENOMEM; | ||
| 309 | pce_num++; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1) | ||
| 313 | for (i = 0; i < 2; i++) { | ||
| 314 | union reg_range grce_reg = {0}; | ||
| 315 | snprintf(file_name, 21, "shared_lce_for_grce%d", i); | ||
| 316 | // The offset used here is only documented for Turing | ||
| 317 | // Actually, Pascal through Turing | ||
| 318 | // On Pascal, it's only 3 bits, every 8 bits | ||
| 319 | // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits | ||
| 320 | // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing | ||
| 321 | switch (g_nvdebug_state[res].chip_id & 0xff0) { | ||
| 322 | case NV_CHIP_ID_PASCAL: | ||
| 323 | grce_reg.offset = NV_GRCE_FOR_CE_GP100(0); | ||
| 324 | grce_reg.start_bit = i * 8; | ||
| 325 | grce_reg.stop_bit = grce_reg.start_bit + 3; | ||
| 326 | break; | ||
| 327 | case NV_CHIP_ID_VOLTA: | ||
| 328 | case NV_CHIP_ID_VOLTA_INTEGRATED: | ||
| 329 | case NV_CHIP_ID_TURING: | ||
| 330 | grce_reg.offset = NV_GRCE_FOR_CE_GP100(i); | ||
| 331 | grce_reg.start_bit = 0; | ||
| 332 | grce_reg.stop_bit = grce_reg.start_bit + 4; | ||
| 333 | break; | ||
| 334 | case NV_CHIP_ID_AMPERE: | ||
| 335 | case NV_CHIP_ID_HOPPER: | ||
| 336 | case NV_CHIP_ID_ADA: | ||
| 337 | grce_reg.offset = NV_GRCE_FOR_CE_GA100(i); | ||
| 338 | grce_reg.start_bit = 0; | ||
| 339 | grce_reg.stop_bit = grce_reg.start_bit + 4; | ||
| 340 | break; | ||
| 341 | } | ||
| 342 | grce_for_pce_entry = proc_create_data( | ||
| 343 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops), | ||
| 344 | (void*)grce_reg.raw); | ||
| 345 | if (!grce_for_pce_entry) | ||
| 346 | return -ENOMEM; | ||
| 347 | } | ||
| 348 | |||
| 349 | // TODO: Redo to num_pces | ||
| 350 | // Create file `/proc/gpu#/pce_map`, world readable | 271 | // Create file `/proc/gpu#/pce_map`, world readable |
| 351 | num_gpcs_entry = proc_create_data( | 272 | if (!proc_create_data( |
| 352 | "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 273 | "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 353 | (void*)NV_CE_PCE_MAP); | 274 | (void*)NV_CE_PCE_MAP)) |
| 275 | goto out_nomem; | ||
| 354 | } | 276 | } |
| 355 | // ProcFS entry creation only fails if out of memory | 277 | // ProcFS entry creation only fails if out of memory |
| 356 | if (rl_create_err || tpc_masks_create_err || !preempt_entry || | 278 | if (rl_create_err || tpc_masks_create_err || !preempt_entry || |
