From b9d8f6a83a8e5fec38e9e20a54ee13838936fa10 Mon Sep 17 00:00:00 2001 From: Saman Sahebi Date: Thu, 14 Sep 2023 13:50:48 -0400 Subject: Created new read function in device_info for GRCE mappings and Pascal LCE mappings --- device_info_procfs.c | 31 ++++++++++++++++++++++++++++++- nvdebug.h | 10 +++++++++- nvdebug_entry.c | 46 +++++++++++++++++++++++++++++++--------------- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/device_info_procfs.c b/device_info_procfs.c index b139c36..d5350c8 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c @@ -9,7 +9,7 @@ // @param off Requested offset. Updated by number of characters written. // @return -errno on error, otherwise number of bytes written to *buf // Note: Parent `data` field MUST be the GPU index -static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) { +static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off){ char out[16]; int chars_written; struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; @@ -21,11 +21,40 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); *off += chars_written; return chars_written; +} +static ssize_t nvdebug_read4_pascal(struct file *f, char __user *buf, size_t size, loff_t *off){ + char out[16]; + int chars_written; + struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; + void* data = PDE_DATA(file_inode(f)); + struct combo local_combo = *(struct combo*) &data; + + // 32 bit register will always take less than 16 characters to print + if (size < 16 || *off != 0) + return 0; + if (local_combo.index % 2 == 0) + chars_written = scnprintf(out, 16, "%#0x\n", (nvdebug_readl(g, local_combo.offset) & 0x0f)); + else + chars_written = scnprintf(out, 16, "%#0x\n", (nvdebug_readl(g, local_combo.offset) & 0xf0) >> 4); + if (copy_to_user(buf, out, chars_written)) + printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); + *off += chars_written; + return chars_written; + +//(nvdebug_readl(g,NV_LCE_FOR_PCE_GP100(*(int*)PDE_DATA(file_inode(f)))) + + + } struct file_operations nvdebug_read_reg32_file_ops = { .read = nvdebug_reg32_read, .llseek = default_llseek, }; +// File operation for reading 4 bits in 32 bit register (used for Pascal copy engine offsets) +struct file_operations nvdebug_read4_pascal_file_ops = { + .read = nvdebug_read4_pascal, + .llseek = default_llseek, +}; typedef struct { int idx; // Current index in the device_info table diff --git a/nvdebug.h b/nvdebug.h index 8d78135..b0e6bb8 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -689,12 +689,20 @@ typedef union { Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. */ #define NV_PTOP_SCAL_NUM_CES 0x00022444 +// Defined number of GRCEs for a GPU +# define NV_GRCE_NUM 2 // Defined GRCE->CE mapping offsets from nvgpu -#define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4) +#define NV_GRCE_FOR_CE(i) (0x00104034+(i)*4) // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) +#define NV_LCE_FOR_PCE_TU104(i) (0x00104040+(i)*4) +// Defined struct for storing PCE index and offset for proc_create +struct combo { + uint32_t offset:32; + uint32_t index:32; +}; /* Physical Copy Engine (PCE) information On Pascal GPUs or newer, this register complements the above information by diff --git a/nvdebug_entry.c b/nvdebug_entry.c index d355151..3815e06 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -28,7 +28,7 @@ extern struct file_operations enable_channel_file_ops; extern struct file_operations switch_to_tsg_file_ops; extern struct file_operations device_info_file_ops; extern struct file_operations nvdebug_read_reg32_file_ops; - +extern struct file_operations nvdebug_read4_pascal_file_ops; // Bus types are global symbols in the kernel extern struct bus_type platform_bus_type; struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; @@ -261,23 +261,27 @@ int __init nvdebug_init(void) { (void*)NV_FUSE_GPC); // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ - + // Declare struct for storing pce index and offset + struct combo local_combo; + struct combo* local_combo_ptr = &local_combo; // Create a pce mask for iteration u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); - u32 num_pce = U32(hweight32(ce_pce_map)); char file_name[20]; - int pce_id; + int pce_id = 0; int i; - for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){ + for (i = 0; i < MAP_SIZE; i++){ // If pce is enabled, create files and iterate pce_id; otherwise, do nothing if ((1 << i) & ce_pce_map){ snprintf(file_name, 20, "lce_for_pce%d",pce_id); - switch (g_nvdebug_state[res].chip_id){ + // Depending on GPU architecture, fetch data for the LCE of particular PCE + switch (g_nvdebug_state[res].chip_id & 0xff0){ case NV_CHIP_ID_PASCAL: + local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id); + local_combo.index = pce_id; lce_for_pce_entry = proc_create_data( - file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id)); + file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), + *(void**)local_combo_ptr); break; case NV_CHIP_ID_VOLTA: lce_for_pce_entry = proc_create_data( @@ -289,16 +293,28 @@ int __init nvdebug_init(void) { file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); break; + case NV_CHIP_ID_TURING: + lce_for_pce_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id)); + break; + + } + // Make 2 files for 2 GRCEs + if (pce_id < NV_GRCE_NUM){ + local_combo.offset = NV_GRCE_FOR_CE(pce_id); + local_combo.index = 0; + snprintf(file_name, 20, "pce_for_grce%d",pce_id); + grce_for_pce_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), + *(void**)local_combo_ptr); } - snprintf(file_name, 20, "grce_for_pce%d",pce_id); - grce_for_pce_entry = proc_create_data( - file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); if (!lce_for_pce_entry || !grce_for_pce_entry) return -ENOMEM; - pce_id++ - } - } + pce_id++; + + } + } // TODO: Redo to num_pces num_gpcs_entry = proc_create_data( -- cgit v1.2.2