diff options
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 111 |
1 files changed, 71 insertions, 40 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 3815e06..78860e6 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -28,7 +28,8 @@ extern struct file_operations enable_channel_file_ops; | |||
28 | extern struct file_operations switch_to_tsg_file_ops; | 28 | extern struct file_operations switch_to_tsg_file_ops; |
29 | extern struct file_operations device_info_file_ops; | 29 | extern struct file_operations device_info_file_ops; |
30 | extern struct file_operations nvdebug_read_reg32_file_ops; | 30 | extern struct file_operations nvdebug_read_reg32_file_ops; |
31 | extern struct file_operations nvdebug_read4_pascal_file_ops; | 31 | extern struct file_operations nvdebug_read_reg_range_file_ops; |
32 | |||
32 | // Bus types are global symbols in the kernel | 33 | // Bus types are global symbols in the kernel |
33 | extern struct bus_type platform_bus_type; | 34 | extern struct bus_type platform_bus_type; |
34 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 35 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
@@ -261,62 +262,92 @@ int __init nvdebug_init(void) { | |||
261 | (void*)NV_FUSE_GPC); | 262 | (void*)NV_FUSE_GPC); |
262 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ | 263 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ |
263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ | 264 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ |
264 | // Declare struct for storing pce index and offset | 265 | // Used for reading a subset of a register on pascal |
265 | struct combo local_combo; | 266 | union reg_range pascal_reg; |
266 | struct combo* local_combo_ptr = &local_combo; | ||
267 | // Create a pce mask for iteration | 267 | // Create a pce mask for iteration |
268 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); | 268 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); |
269 | char file_name[20]; | 269 | char file_name[21]; |
270 | int pce_id = 0; | 270 | int pce_id = 0; |
271 | int pce_num = 0; | ||
271 | int i; | 272 | int i; |
272 | for (i = 0; i < MAP_SIZE; i++){ | 273 | for (pce_id = 0; pce_id < MAP_SIZE; pce_id++) { |
273 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing | 274 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing |
274 | if ((1 << i) & ce_pce_map){ | 275 | if ((1 << pce_id) & ce_pce_map) { |
275 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); | 276 | snprintf(file_name, 20, "lce_for_pce%d", pce_num); |
276 | // Depending on GPU architecture, fetch data for the LCE of particular PCE | 277 | // Depending on GPU architecture, fetch data for the LCE of particular PCE |
277 | switch (g_nvdebug_state[res].chip_id & 0xff0){ | 278 | switch (g_nvdebug_state[res].chip_id & 0xff0) { |
278 | |||
279 | case NV_CHIP_ID_PASCAL: | 279 | case NV_CHIP_ID_PASCAL: |
280 | local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id); | 280 | // On Pascal, two PCE configurations are packed per-byte. |
281 | local_combo.index = pce_id; | 281 | // Work around this by leveraging that we only run on 64-bit |
282 | // platforms (can assume that a void* is 64-bits), and that | ||
283 | // GPU register offsets are only 32-bits. Use the other 32 | ||
284 | // bits to store which bits to print. | ||
285 | pascal_reg.offset = NV_LCE_FOR_PCE_GP100(0); | ||
286 | pascal_reg.start_bit = pce_id * 4; | ||
287 | pascal_reg.stop_bit = pce_id * 4 + 4; | ||
282 | lce_for_pce_entry = proc_create_data( | 288 | lce_for_pce_entry = proc_create_data( |
283 | file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), | 289 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops), |
284 | *(void**)local_combo_ptr); | 290 | (void*)pascal_reg.raw); |
285 | break; | 291 | break; |
286 | case NV_CHIP_ID_VOLTA: | 292 | case NV_CHIP_ID_VOLTA: |
287 | lce_for_pce_entry = proc_create_data( | 293 | case NV_CHIP_ID_VOLTA_INTEGRATED: |
288 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 294 | case NV_CHIP_ID_TURING: |
289 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | 295 | lce_for_pce_entry = proc_create_data( |
296 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
297 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | ||
290 | break; | 298 | break; |
291 | case NV_CHIP_ID_AMPERE: | 299 | case NV_CHIP_ID_AMPERE: |
300 | case NV_CHIP_ID_HOPPER: | ||
301 | case NV_CHIP_ID_ADA: | ||
292 | lce_for_pce_entry = proc_create_data( | 302 | lce_for_pce_entry = proc_create_data( |
293 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 303 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
294 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); | 304 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); |
295 | break; | 305 | break; |
296 | case NV_CHIP_ID_TURING: | ||
297 | lce_for_pce_entry = proc_create_data( | ||
298 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
299 | (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id)); | ||
300 | break; | ||
301 | |||
302 | } | ||
303 | // Make 2 files for 2 GRCEs | ||
304 | if (pce_id < NV_GRCE_NUM){ | ||
305 | local_combo.offset = NV_GRCE_FOR_CE(pce_id); | ||
306 | local_combo.index = 0; | ||
307 | snprintf(file_name, 20, "pce_for_grce%d",pce_id); | ||
308 | grce_for_pce_entry = proc_create_data( | ||
309 | file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), | ||
310 | *(void**)local_combo_ptr); | ||
311 | } | 306 | } |
312 | if (!lce_for_pce_entry || !grce_for_pce_entry) | 307 | if (!lce_for_pce_entry) |
313 | return -ENOMEM; | 308 | return -ENOMEM; |
314 | pce_id++; | 309 | pce_num++; |
315 | 310 | } | |
316 | } | 311 | } |
317 | } | 312 | // We assume 2 GRCEs (reminder: GRCE0 and 1 are just LCE0 and 1) |
313 | for (i = 0; i < 2; i++) { | ||
314 | union reg_range grce_reg = {0}; | ||
315 | snprintf(file_name, 21, "shared_lce_for_grce%d", i); | ||
316 | // The offset used here is only documented for Turing | ||
317 | // Actually, Pascal through Turing | ||
318 | // On Pascal, it's only 3 bits, every 8 bits | ||
319 | // On Volta-Turing, it start at same offset, but it's lower 4 bits, every 32 bits | ||
320 | // On Ampere+ it starts at 0x001041c0, but is the same layout as Volta-Turing | ||
321 | switch (g_nvdebug_state[res].chip_id & 0xff0) { | ||
322 | case NV_CHIP_ID_PASCAL: | ||
323 | grce_reg.offset = NV_GRCE_FOR_CE_GP100(0); | ||
324 | grce_reg.start_bit = i * 8; | ||
325 | grce_reg.stop_bit = grce_reg.start_bit + 3; | ||
326 | break; | ||
327 | case NV_CHIP_ID_VOLTA: | ||
328 | case NV_CHIP_ID_VOLTA_INTEGRATED: | ||
329 | case NV_CHIP_ID_TURING: | ||
330 | grce_reg.offset = NV_GRCE_FOR_CE_GP100(i); | ||
331 | grce_reg.start_bit = 0; | ||
332 | grce_reg.stop_bit = grce_reg.start_bit + 4; | ||
333 | break; | ||
334 | case NV_CHIP_ID_AMPERE: | ||
335 | case NV_CHIP_ID_HOPPER: | ||
336 | case NV_CHIP_ID_ADA: | ||
337 | grce_reg.offset = NV_GRCE_FOR_CE_GA100(i); | ||
338 | grce_reg.start_bit = 0; | ||
339 | grce_reg.stop_bit = grce_reg.start_bit + 4; | ||
340 | break; | ||
341 | } | ||
342 | grce_for_pce_entry = proc_create_data( | ||
343 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg_range_file_ops), | ||
344 | (void*)grce_reg.raw); | ||
345 | if (!grce_for_pce_entry) | ||
346 | return -ENOMEM; | ||
347 | } | ||
318 | 348 | ||
319 | // TODO: Redo to num_pces | 349 | // TODO: Redo to num_pces |
350 | // Create file `/proc/gpu#/pce_map`, world readable | ||
320 | num_gpcs_entry = proc_create_data( | 351 | num_gpcs_entry = proc_create_data( |
321 | "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 352 | "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
322 | (void*)NV_CE_PCE_MAP); | 353 | (void*)NV_CE_PCE_MAP); |