diff options
author | Joshua J Bakita <jbakita@rtsrv.cs.unc.edu> | 2023-11-08 14:41:47 -0500 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-11-08 15:01:24 -0500 |
commit | 3aab3c220f3f0bcc3d3d58d0daf6fd6acf1819e2 (patch) | |
tree | 71a0fef6595e65d42808e1f963cdd4957c2f28e6 /nvdebug_entry.c | |
parent | b9d8f6a83a8e5fec38e9e20a54ee13838936fa10 (diff) |
Expand support for printing LCE<->PCE and GRCE->LCE configurationrtas24-ae
Tested working on Pascal, Volta, Volta Integrated, Turing, Ampere,
and Ada.
Also clean up minor spacing issues, an errantly added file
(nvdebug.mod), and fix some inconsistencies with upstream.
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 111 |
1 files changed, 71 insertions, 40 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 3815e06..78860e6 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -28,7 +28,8 @@ extern struct file_operations enable_channel_file_ops; | |||
28 | extern struct file_operations switch_to_tsg_file_ops; | 28 | extern struct file_operations switch_to_tsg_file_ops; |
29 | extern struct file_operations device_info_file_ops; | 29 | extern struct file_operations device_info_file_ops; |
30 | extern struct file_operations nvdebug_read_reg32_file_ops; | 30 | extern struct file_operations nvdebug_read_reg32_file_ops; |
31 | extern struct file_operations nvdebug_read4_pascal_file_ops; | 31 | extern struct file_operations nvdebug_read_reg_range_file_ops; |
32 | |||
32 | // Bus types are global symbols in the kernel | 33 | // Bus types are global symbols in the kernel |
33 | extern struct bus_type platform_bus_type; | 34 | extern struct bus_type platform_bus_type; |
34 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 35 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
@@ -261,62 +262,92 @@ int __init nvdebug_init(void) { | |||
261 | (void*)NV_FUSE_GPC); | 262 | (void*)NV_FUSE_GPC); |
262 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ | 263 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ |
263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ | 264 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ |
264 | // Declare struct for storing pce index and offset | 265 | // Used for reading a subset of a register on pascal |
265 | struct combo local_combo; | 266 | union reg_range pascal_reg; |
266 | struct combo* local_combo_ptr = &local_combo; | ||
267 | // Create a pce mask for iteration | 267 | // Create a pce mask for iteration |
268 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); | 268 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); |
269 | char file_name[20]; | 269 | char file_name[21]; |
270 | int pce_id = 0; | 270 | int pce_id = 0; |
271 | int pce_num = 0; | ||
271 | int i; | 272 | int i; |
272 | for (i = 0; i < MAP_SIZE; i++){ | 273 | for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) { |
273 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing | 274 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing |
274 | if ((1 << i) & ce_pce_map){ | 275 | if ((1 << pce_id) & ce_pce_map) { |
275 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); | 276 | snprintf(file_name, 20, "lce_for_pce%d", pce_num); |
276 | // Depending on GPU architecture, fetch data for the LCE of particular PCE | 277 | // Depending on GPU architecture, fetch data for the LCE of particular PCE |
277 | switch (g_nvdebug_state[res].chip_id & 0xff0){ | 278 | switch (g_nvdebug_state[res].chip_id & 0xff0) { |
278 | |||
279 | case NV_CHIP_ID_PASCAL: | 279 | case NV_CHIP_ID_PASCAL: |
280 | local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id); | 280 | // On Pascal, two PCE configurations are packed per-byte. |
281 | local_combo.index = pce_id; | 281 | // Work around this by leveraging that we only run on 64-bit |
282 | // platforms (can assume that a void* is 64-bits), and that | ||
283 | // GPU register offsets are only 32-bits. Use the other 32 | ||
284 | // bits to store which bits to print. | ||
285 | pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0); | ||
286 | pascal_reg.start_bit = pce_id * 4; | ||
287 | pascal_reg.stop_bit = pce_id * 4 + 4; | ||
282 | lce_for_pce_entry = proc_create_data( | 288 | lce_for_pce_entry = proc_create_data( |
283 | file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), | 289 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops), |
284 | *(void**)local_combo_ptr); | 290 | (void*)pascal_reg.raw); |
285 | break; | 291 | break; |
286 | case NV_CHIP_ID_VOLTA: | 292 | case NV_CHIP_ID_VOLTA: |
287 | lce_for_pce_entry = proc_create_data( | 293 | case NV_CHIP_ID_VOLTA_INTEGRATED: |
288 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 294 | case NV_CHIP_ID_TURING: |
289 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | 295 | lce_for_pce_entry = proc_create_data( |
296 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
297 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | ||
290 | break; | 298 | break; |
291 | case NV_CHIP_ID_AMPERE: | 299 | case NV_CHIP_ID_AMPERE: |
300 | case NV_CHIP_ID_HOPPER: | ||
301 | case NV_CHIP_ID_ADA: | ||
292 | lce_for_pce_entry = proc_create_data( | 302 | lce_for_pce_entry = proc_create_data( |
293 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 303 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
294 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); | 304 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); |
295 | break; | 305 | break; |
296 | case NV_CHIP_ID_TURING: | ||
297 | lce_for_pce_entry = proc_create_data( | ||
298 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
299 | (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id)); | ||
300 | break; | ||
301 | |||
302 | } | ||
303 | // Make 2 files for 2 GRCEs | ||
304 | if (pce_id < NV_GRCE_NUM){ | ||
305 | local_combo.offset = NV_GRCE_FOR_CE(pce_id); | ||
306 | local_combo.index = 0; | ||
307 | snprintf(file_name, 20, "pce_for_grce%d",pce_id); | ||
308 | grce_for_pce_entry = proc_create_data( | ||
309 | file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), | ||
310 | *(void**)local_combo_ptr); | ||
311 | } | 306 | } |
312 | if (!lce_for_pce_entry || !grce_for_pce_entry) | 307 | if (!lce_for_pce_entry) |
313 | return -ENOMEM; | 308 | return -ENOMEM; |
314 | pce_id++; | 309 | pce_num++; |
315 | 310 | } | |
316 | } | 311 | } |
317 | } | 312 | // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1) |
313 | for (i = 0; i < 2; i++) { | ||
314 | union reg_range grce_reg = {0}; | ||
315 | snprintf(file_name, 21, "shared_lce_for_grce%d", i); | ||
316 | // The offset used here is only documented for Turing | ||
317 | // Actually, Pascal through Turing | ||
318 | // On Pascal, it's only 3 bits, every 8 bits | ||
319 | // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits | ||
320 | // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing | ||
321 | switch (g_nvdebug_state[res].chip_id & 0xff0) { | ||
322 | case NV_CHIP_ID_PASCAL: | ||
323 | grce_reg.offset = NV_GRCE_FOR_CE_GP100(0); | ||
324 | grce_reg.start_bit = i * 8; | ||
325 | grce_reg.stop_bit = grce_reg.start_bit + 3; | ||
326 | break; | ||
327 | case NV_CHIP_ID_VOLTA: | ||
328 | case NV_CHIP_ID_VOLTA_INTEGRATED: | ||
329 | case NV_CHIP_ID_TURING: | ||
330 | grce_reg.offset = NV_GRCE_FOR_CE_GP100(i); | ||
331 | grce_reg.start_bit = 0; | ||
332 | grce_reg.stop_bit = grce_reg.start_bit + 4; | ||
333 | break; | ||
334 | case NV_CHIP_ID_AMPERE: | ||
335 | case NV_CHIP_ID_HOPPER: | ||
336 | case NV_CHIP_ID_ADA: | ||
337 | grce_reg.offset = NV_GRCE_FOR_CE_GA100(i); | ||
338 | grce_reg.start_bit = 0; | ||
339 | grce_reg.stop_bit = grce_reg.start_bit + 4; | ||
340 | break; | ||
341 | } | ||
342 | grce_for_pce_entry = proc_create_data( | ||
343 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops), | ||
344 | (void*)grce_reg.raw); | ||
345 | if (!grce_for_pce_entry) | ||
346 | return -ENOMEM; | ||
347 | } | ||
318 | 348 | ||
319 | // TODO: Redo to num_pces | 349 | // TODO: Redo to num_pces |
350 | // Create file `/proc/gpu#/pce_map`, world readable | ||
320 | num_gpcs_entry = proc_create_data( | 351 | num_gpcs_entry = proc_create_data( |
321 | "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 352 | "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
322 | (void*)NV_CE_PCE_MAP); | 353 | (void*)NV_CE_PCE_MAP); |