diff options
author | Saman Sahebi <saman63@cs.unc.edu> | 2023-08-03 18:00:31 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-10-29 15:45:32 -0400 |
commit | bebffaf223e975ab8f6fcf5fb6bd6de814fb614c (patch) | |
tree | 694f33d3e2b22590fd264e58a42f74ce64c645b0 | |
parent | 7fda166c68c58887a90521911228ef734c7d4e4f (diff) |
patched issues with GPU compatability for CE_MAP
-rw-r--r-- | nvdebug.h | 13 | ||||
-rw-r--r-- | nvdebug.mod | 2 | ||||
-rw-r--r-- | nvdebug_entry.c | 51 |
3 files changed, 44 insertions, 22 deletions
@@ -389,7 +389,9 @@ typedef union { | |||
389 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | 389 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 |
390 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | 390 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU |
391 | #define NV_CHIP_ID_KEPLER 0x0E0 | 391 | #define NV_CHIP_ID_KEPLER 0x0E0 |
392 | #define NV_CHIP_ID_PASCAL 0x130 | ||
392 | #define NV_CHIP_ID_VOLTA 0x140 | 393 | #define NV_CHIP_ID_VOLTA 0x140 |
394 | #define NV_CHIP_ID_TURING 0x160 | ||
393 | #define NV_CHIP_ID_AMPERE 0x170 | 395 | #define NV_CHIP_ID_AMPERE 0x170 |
394 | 396 | ||
395 | inline static const char* ARCH2NAME(uint32_t arch) { | 397 | inline static const char* ARCH2NAME(uint32_t arch) { |
@@ -687,10 +689,12 @@ typedef union { | |||
687 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 689 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
688 | */ | 690 | */ |
689 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 691 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
690 | //defined GRCE->CE mapping offset from nvgpu | 692 | // Defined GRCE->CE mapping offsets from nvgpu |
691 | #define NV_GRCE_FOR_CE(i)(0x00104034+(i)*4) | 693 | #define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4) |
692 | //defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 694 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) |
693 | #define NV_LCE_FOR_PCE(i)(0x00104040+(i)*4) | 695 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) |
696 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
697 | #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) | ||
694 | 698 | ||
695 | /* Physical Copy Engine (PCE) information | 699 | /* Physical Copy Engine (PCE) information |
696 | On Pascal GPUs or newer, this register complements the above information by | 700 | On Pascal GPUs or newer, this register complements the above information by |
@@ -704,6 +708,7 @@ typedef union { | |||
704 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 708 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
705 | */ | 709 | */ |
706 | #define NV_CE_PCE_MAP 0x00104028 | 710 | #define NV_CE_PCE_MAP 0x00104028 |
711 | #define MAP_SIZE 32 | ||
707 | 712 | ||
708 | 713 | ||
709 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 714 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. |
diff --git a/nvdebug.mod b/nvdebug.mod new file mode 100644 index 0000000..5ffaef7 --- /dev/null +++ b/nvdebug.mod | |||
@@ -0,0 +1,2 @@ | |||
1 | /home/saman63/nvdebug/runlist_procfs.o /home/saman63/nvdebug/device_info_procfs.o /home/saman63/nvdebug/runlist.o /home/saman63/nvdebug/mmu.o /home/saman63/nvdebug/nvdebug_entry.o | ||
2 | |||
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index c444ff7..d355151 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -259,30 +259,45 @@ int __init nvdebug_init(void) { | |||
259 | num_gpcs_entry = proc_create_data( | 259 | num_gpcs_entry = proc_create_data( |
260 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 260 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
261 | (void*)NV_FUSE_GPC); | 261 | (void*)NV_FUSE_GPC); |
262 | // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ | 262 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ |
263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { | 263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ |
264 | 264 | ||
265 | //create a pce mask for iteration | 265 | // Create a pce mask for iteration |
266 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); | 266 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); |
267 | u32 num_pce = U32(hweight32(ce_pce_map)); | 267 | u32 num_pce = U32(hweight32(ce_pce_map)); |
268 | u32 disabled_pce_mask = ~ce_pce_map; | ||
269 | char file_name[20]; | 268 | char file_name[20]; |
270 | int pce_id; | 269 | int pce_id; |
271 | for (pce_id = 0; pce_id < num_pce; pce_id++){ | 270 | int i; |
272 | //if pce is disabled, do nothing | 271 | for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){ |
273 | if ((1 << pce_id) & disabled_pce_mask) | 272 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing |
274 | continue; | 273 | if ((1 << i) & ce_pce_map){ |
275 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); | 274 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); |
276 | lce_for_pce_entry = proc_create_data( | 275 | switch (g_nvdebug_state[res].chip_id){ |
277 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
278 | (void*)(uintptr_t)NV_LCE_FOR_PCE(pce_id)); | ||
279 | snprintf(file_name, 20, "grce_for_pce%d",pce_id); | ||
280 | grce_for_pce_entry = proc_create_data( | ||
281 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
282 | (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); | ||
283 | 276 | ||
284 | if (!lce_for_pce_entry || !grce_for_pce_entry) | 277 | case NV_CHIP_ID_PASCAL: |
285 | return -ENOMEM; | 278 | lce_for_pce_entry = proc_create_data( |
279 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
280 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id)); | ||
281 | break; | ||
282 | case NV_CHIP_ID_VOLTA: | ||
283 | lce_for_pce_entry = proc_create_data( | ||
284 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
285 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | ||
286 | break; | ||
287 | case NV_CHIP_ID_AMPERE: | ||
288 | lce_for_pce_entry = proc_create_data( | ||
289 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
290 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); | ||
291 | break; | ||
292 | } | ||
293 | snprintf(file_name, 20, "grce_for_pce%d",pce_id); | ||
294 | grce_for_pce_entry = proc_create_data( | ||
295 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
296 | (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); | ||
297 | if (!lce_for_pce_entry || !grce_for_pce_entry) | ||
298 | return -ENOMEM; | ||
299 | pce_id++ | ||
300 | } | ||
286 | } | 301 | } |
287 | 302 | ||
288 | // TODO: Redo to num_pces | 303 | // TODO: Redo to num_pces |