diff options
| author | Saman Sahebi <saman63@cs.unc.edu> | 2023-08-03 18:00:31 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-10-29 15:45:32 -0400 |
| commit | bebffaf223e975ab8f6fcf5fb6bd6de814fb614c (patch) | |
| tree | 694f33d3e2b22590fd264e58a42f74ce64c645b0 | |
| parent | 7fda166c68c58887a90521911228ef734c7d4e4f (diff) | |
patched issues with GPU compatability for CE_MAP
| -rw-r--r-- | nvdebug.h | 13 | ||||
| -rw-r--r-- | nvdebug.mod | 2 | ||||
| -rw-r--r-- | nvdebug_entry.c | 51 |
3 files changed, 44 insertions, 22 deletions
| @@ -389,7 +389,9 @@ typedef union { | |||
| 389 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | 389 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 |
| 390 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | 390 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU |
| 391 | #define NV_CHIP_ID_KEPLER 0x0E0 | 391 | #define NV_CHIP_ID_KEPLER 0x0E0 |
| 392 | #define NV_CHIP_ID_PASCAL 0x130 | ||
| 392 | #define NV_CHIP_ID_VOLTA 0x140 | 393 | #define NV_CHIP_ID_VOLTA 0x140 |
| 394 | #define NV_CHIP_ID_TURING 0x160 | ||
| 393 | #define NV_CHIP_ID_AMPERE 0x170 | 395 | #define NV_CHIP_ID_AMPERE 0x170 |
| 394 | 396 | ||
| 395 | inline static const char* ARCH2NAME(uint32_t arch) { | 397 | inline static const char* ARCH2NAME(uint32_t arch) { |
| @@ -687,10 +689,12 @@ typedef union { | |||
| 687 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 689 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
| 688 | */ | 690 | */ |
| 689 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 691 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
| 690 | //defined GRCE->CE mapping offset from nvgpu | 692 | // Defined GRCE->CE mapping offsets from nvgpu |
| 691 | #define NV_GRCE_FOR_CE(i)(0x00104034+(i)*4) | 693 | #define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4) |
| 692 | //defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 694 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) |
| 693 | #define NV_LCE_FOR_PCE(i)(0x00104040+(i)*4) | 695 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) |
| 696 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
| 697 | #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) | ||
| 694 | 698 | ||
| 695 | /* Physical Copy Engine (PCE) information | 699 | /* Physical Copy Engine (PCE) information |
| 696 | On Pascal GPUs or newer, this register complements the above information by | 700 | On Pascal GPUs or newer, this register complements the above information by |
| @@ -704,6 +708,7 @@ typedef union { | |||
| 704 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 708 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
| 705 | */ | 709 | */ |
| 706 | #define NV_CE_PCE_MAP 0x00104028 | 710 | #define NV_CE_PCE_MAP 0x00104028 |
| 711 | #define MAP_SIZE 32 | ||
| 707 | 712 | ||
| 708 | 713 | ||
| 709 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 714 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. |
diff --git a/nvdebug.mod b/nvdebug.mod new file mode 100644 index 0000000..5ffaef7 --- /dev/null +++ b/nvdebug.mod | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | /home/saman63/nvdebug/runlist_procfs.o /home/saman63/nvdebug/device_info_procfs.o /home/saman63/nvdebug/runlist.o /home/saman63/nvdebug/mmu.o /home/saman63/nvdebug/nvdebug_entry.o | ||
| 2 | |||
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index c444ff7..d355151 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -259,30 +259,45 @@ int __init nvdebug_init(void) { | |||
| 259 | num_gpcs_entry = proc_create_data( | 259 | num_gpcs_entry = proc_create_data( |
| 260 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 260 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 261 | (void*)NV_FUSE_GPC); | 261 | (void*)NV_FUSE_GPC); |
| 262 | // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ | 262 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ |
| 263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { | 263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ |
| 264 | 264 | ||
| 265 | //create a pce mask for iteration | 265 | // Create a pce mask for iteration |
| 266 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); | 266 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); |
| 267 | u32 num_pce = U32(hweight32(ce_pce_map)); | 267 | u32 num_pce = U32(hweight32(ce_pce_map)); |
| 268 | u32 disabled_pce_mask = ~ce_pce_map; | ||
| 269 | char file_name[20]; | 268 | char file_name[20]; |
| 270 | int pce_id; | 269 | int pce_id; |
| 271 | for (pce_id = 0; pce_id < num_pce; pce_id++){ | 270 | int i; |
| 272 | //if pce is disabled, do nothing | 271 | for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){ |
| 273 | if ((1 << pce_id) & disabled_pce_mask) | 272 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing |
| 274 | continue; | 273 | if ((1 << i) & ce_pce_map){ |
| 275 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); | 274 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); |
| 276 | lce_for_pce_entry = proc_create_data( | 275 | switch (g_nvdebug_state[res].chip_id){ |
| 277 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 278 | (void*)(uintptr_t)NV_LCE_FOR_PCE(pce_id)); | ||
| 279 | snprintf(file_name, 20, "grce_for_pce%d",pce_id); | ||
| 280 | grce_for_pce_entry = proc_create_data( | ||
| 281 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 282 | (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); | ||
| 283 | 276 | ||
| 284 | if (!lce_for_pce_entry || !grce_for_pce_entry) | 277 | case NV_CHIP_ID_PASCAL: |
| 285 | return -ENOMEM; | 278 | lce_for_pce_entry = proc_create_data( |
| 279 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 280 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id)); | ||
| 281 | break; | ||
| 282 | case NV_CHIP_ID_VOLTA: | ||
| 283 | lce_for_pce_entry = proc_create_data( | ||
| 284 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 285 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); | ||
| 286 | break; | ||
| 287 | case NV_CHIP_ID_AMPERE: | ||
| 288 | lce_for_pce_entry = proc_create_data( | ||
| 289 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 290 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | snprintf(file_name, 20, "grce_for_pce%d",pce_id); | ||
| 294 | grce_for_pce_entry = proc_create_data( | ||
| 295 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 296 | (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); | ||
| 297 | if (!lce_for_pce_entry || !grce_for_pce_entry) | ||
| 298 | return -ENOMEM; | ||
| 299 | pce_id++ | ||
| 300 | } | ||
| 286 | } | 301 | } |
| 287 | 302 | ||
| 288 | // TODO: Redo to num_pces | 303 | // TODO: Redo to num_pces |
