aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSaman Sahebi <saman63@cs.unc.edu>2023-08-03 18:00:31 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-10-29 15:45:32 -0400
commitbebffaf223e975ab8f6fcf5fb6bd6de814fb614c (patch)
tree694f33d3e2b22590fd264e58a42f74ce64c645b0
parent7fda166c68c58887a90521911228ef734c7d4e4f (diff)
patched issues with GPU compatability for CE_MAP
-rw-r--r--nvdebug.h13
-rw-r--r--nvdebug.mod2
-rw-r--r--nvdebug_entry.c51
3 files changed, 44 insertions, 22 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 213a786..8d78135 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -389,7 +389,9 @@ typedef union {
389#define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 389#define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060
390#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU 390#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU
391#define NV_CHIP_ID_KEPLER 0x0E0 391#define NV_CHIP_ID_KEPLER 0x0E0
392#define NV_CHIP_ID_PASCAL 0x130
392#define NV_CHIP_ID_VOLTA 0x140 393#define NV_CHIP_ID_VOLTA 0x140
394#define NV_CHIP_ID_TURING 0x160
393#define NV_CHIP_ID_AMPERE 0x170 395#define NV_CHIP_ID_AMPERE 0x170
394 396
395inline static const char* ARCH2NAME(uint32_t arch) { 397inline static const char* ARCH2NAME(uint32_t arch) {
@@ -687,10 +689,12 @@ typedef union {
687 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 689 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
688*/ 690*/
689#define NV_PTOP_SCAL_NUM_CES 0x00022444 691#define NV_PTOP_SCAL_NUM_CES 0x00022444
690//defined GRCE->CE mapping offset from nvgpu 692// Defined GRCE->CE mapping offsets from nvgpu
691#define NV_GRCE_FOR_CE(i)(0x00104034+(i)*4) 693#define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4)
692//defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) 694// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu)
693#define NV_LCE_FOR_PCE(i)(0x00104040+(i)*4) 695#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4)
696#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4)
697#define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2)
694 698
695/* Physical Copy Engine (PCE) information 699/* Physical Copy Engine (PCE) information
696 On Pascal GPUs or newer, this register complements the above information by 700 On Pascal GPUs or newer, this register complements the above information by
@@ -704,6 +708,7 @@ typedef union {
704 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 708 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
705*/ 709*/
706#define NV_CE_PCE_MAP 0x00104028 710#define NV_CE_PCE_MAP 0x00104028
711#define MAP_SIZE 32
707 712
708 713
709/* Location of the 1Kb instance block with page tables for BAR1 and BAR2. 714/* Location of the 1Kb instance block with page tables for BAR1 and BAR2.
diff --git a/nvdebug.mod b/nvdebug.mod
new file mode 100644
index 0000000..5ffaef7
--- /dev/null
+++ b/nvdebug.mod
@@ -0,0 +1,2 @@
1/home/saman63/nvdebug/runlist_procfs.o /home/saman63/nvdebug/device_info_procfs.o /home/saman63/nvdebug/runlist.o /home/saman63/nvdebug/mmu.o /home/saman63/nvdebug/nvdebug_entry.o
2
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index c444ff7..d355151 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -259,30 +259,45 @@ int __init nvdebug_init(void) {
259 num_gpcs_entry = proc_create_data( 259 num_gpcs_entry = proc_create_data(
260 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 260 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
261 (void*)NV_FUSE_GPC); 261 (void*)NV_FUSE_GPC);
262 // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ 262 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+
263 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { 263 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){
264 264
265 //create a pce mask for iteration 265 // Create a pce mask for iteration
266 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); 266 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP);
267 u32 num_pce = U32(hweight32(ce_pce_map)); 267 u32 num_pce = U32(hweight32(ce_pce_map));
268 u32 disabled_pce_mask = ~ce_pce_map;
269 char file_name[20]; 268 char file_name[20];
270 int pce_id; 269 int pce_id;
271 for (pce_id = 0; pce_id < num_pce; pce_id++){ 270 int i;
272 //if pce is disabled, do nothing 271 for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){
273 if ((1 << pce_id) & disabled_pce_mask) 272 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing
274 continue; 273 if ((1 << i) & ce_pce_map){
275 snprintf(file_name, 20, "lce_for_pce%d",pce_id); 274 snprintf(file_name, 20, "lce_for_pce%d",pce_id);
276 lce_for_pce_entry = proc_create_data( 275 switch (g_nvdebug_state[res].chip_id){
277 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
278 (void*)(uintptr_t)NV_LCE_FOR_PCE(pce_id));
279 snprintf(file_name, 20, "grce_for_pce%d",pce_id);
280 grce_for_pce_entry = proc_create_data(
281 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
282 (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id));
283 276
284 if (!lce_for_pce_entry || !grce_for_pce_entry) 277 case NV_CHIP_ID_PASCAL:
285 return -ENOMEM; 278 lce_for_pce_entry = proc_create_data(
279 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
280 (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id));
281 break;
282 case NV_CHIP_ID_VOLTA:
283 lce_for_pce_entry = proc_create_data(
284 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
285 (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id));
286 break;
287 case NV_CHIP_ID_AMPERE:
288 lce_for_pce_entry = proc_create_data(
289 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
290 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id));
291 break;
292 }
293 snprintf(file_name, 20, "grce_for_pce%d",pce_id);
294 grce_for_pce_entry = proc_create_data(
295 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
296 (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id));
297 if (!lce_for_pce_entry || !grce_for_pce_entry)
298 return -ENOMEM;
299 pce_id++
300 }
286 } 301 }
287 302
288 // TODO: Redo to num_pces 303 // TODO: Redo to num_pces