From bebffaf223e975ab8f6fcf5fb6bd6de814fb614c Mon Sep 17 00:00:00 2001 From: Saman Sahebi Date: Thu, 3 Aug 2023 18:00:31 -0400 Subject: patched issues with GPU compatability for CE_MAP --- nvdebug.h | 13 +++++++++---- nvdebug.mod | 2 ++ nvdebug_entry.c | 51 +++++++++++++++++++++++++++++++++------------------ 3 files changed, 44 insertions(+), 22 deletions(-) create mode 100644 nvdebug.mod diff --git a/nvdebug.h b/nvdebug.h index 213a786..8d78135 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -389,7 +389,9 @@ typedef union { #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU #define NV_CHIP_ID_KEPLER 0x0E0 +#define NV_CHIP_ID_PASCAL 0x130 #define NV_CHIP_ID_VOLTA 0x140 +#define NV_CHIP_ID_TURING 0x160 #define NV_CHIP_ID_AMPERE 0x170 inline static const char* ARCH2NAME(uint32_t arch) { @@ -687,10 +689,12 @@ typedef union { Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. */ #define NV_PTOP_SCAL_NUM_CES 0x00022444 -//defined GRCE->CE mapping offset from nvgpu -#define NV_GRCE_FOR_CE(i)(0x00104034+(i)*4) -//defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) -#define NV_LCE_FOR_PCE(i)(0x00104040+(i)*4) +// Defined GRCE->CE mapping offsets from nvgpu +#define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4) +// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) +#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) +#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) +#define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) /* Physical Copy Engine (PCE) information On Pascal GPUs or newer, this register complements the above information by @@ -704,6 +708,7 @@ typedef union { Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. */ #define NV_CE_PCE_MAP 0x00104028 +#define MAP_SIZE 32 /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. diff --git a/nvdebug.mod b/nvdebug.mod new file mode 100644 index 0000000..5ffaef7 --- /dev/null +++ b/nvdebug.mod @@ -0,0 +1,2 @@ +/home/saman63/nvdebug/runlist_procfs.o /home/saman63/nvdebug/device_info_procfs.o /home/saman63/nvdebug/runlist.o /home/saman63/nvdebug/mmu.o /home/saman63/nvdebug/nvdebug_entry.o + diff --git a/nvdebug_entry.c b/nvdebug_entry.c index c444ff7..d355151 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -259,30 +259,45 @@ int __init nvdebug_init(void) { num_gpcs_entry = proc_create_data( "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)NV_FUSE_GPC); - // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ - if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { + // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ + if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ - //create a pce mask for iteration + // Create a pce mask for iteration u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); u32 num_pce = U32(hweight32(ce_pce_map)); - u32 disabled_pce_mask = ~ce_pce_map; char file_name[20]; int pce_id; - for (pce_id = 0; pce_id < num_pce; pce_id++){ - //if pce is disabled, do nothing - if ((1 << pce_id) & disabled_pce_mask) - continue; - snprintf(file_name, 20, "lce_for_pce%d",pce_id); - lce_for_pce_entry = proc_create_data( - file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)(uintptr_t)NV_LCE_FOR_PCE(pce_id)); - snprintf(file_name, 20, "grce_for_pce%d",pce_id); - grce_for_pce_entry = proc_create_data( - file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); + int i; + for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){ + // If pce is enabled, create files and iterate pce_id; otherwise, do nothing + if ((1 << i) & ce_pce_map){ + snprintf(file_name, 20, "lce_for_pce%d",pce_id); + switch (g_nvdebug_state[res].chip_id){ - if (!lce_for_pce_entry || !grce_for_pce_entry) - return -ENOMEM; + case NV_CHIP_ID_PASCAL: + lce_for_pce_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id)); + break; + case NV_CHIP_ID_VOLTA: + lce_for_pce_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_LCE_FOR_PCE_GV100(pce_id)); + break; + case NV_CHIP_ID_AMPERE: + lce_for_pce_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); + break; + } + snprintf(file_name, 20, "grce_for_pce%d",pce_id); + grce_for_pce_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); + if (!lce_for_pce_entry || !grce_for_pce_entry) + return -ENOMEM; + pce_id++ + } } // TODO: Redo to num_pces -- cgit v1.2.2