aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSaman Sahebi <saman63@cs.unc.edu>2023-09-14 13:50:48 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-10-29 15:45:33 -0400
commitb9d8f6a83a8e5fec38e9e20a54ee13838936fa10 (patch)
tree0b048c52e4d6ab283e3aabc240b859311b1d0a09
parentbebffaf223e975ab8f6fcf5fb6bd6de814fb614c (diff)
Created new read function in device_info for GRCE mappings and Pascal LCE mappings
-rw-r--r--device_info_procfs.c31
-rw-r--r--nvdebug.h10
-rw-r--r--nvdebug_entry.c46
3 files changed, 70 insertions, 17 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c
index b139c36..d5350c8 100644
--- a/device_info_procfs.c
+++ b/device_info_procfs.c
@@ -9,7 +9,7 @@
9// @param off Requested offset. Updated by number of characters written. 9// @param off Requested offset. Updated by number of characters written.
10// @return -errno on error, otherwise number of bytes written to *buf 10// @return -errno on error, otherwise number of bytes written to *buf
11// Note: Parent `data` field MUST be the GPU index 11// Note: Parent `data` field MUST be the GPU index
12static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) { 12static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off){
13 char out[16]; 13 char out[16];
14 int chars_written; 14 int chars_written;
15 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; 15 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
@@ -22,10 +22,39 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size,
22 *off += chars_written; 22 *off += chars_written;
23 return chars_written; 23 return chars_written;
24} 24}
25static ssize_t nvdebug_read4_pascal(struct file *f, char __user *buf, size_t size, loff_t *off){
26 char out[16];
27 int chars_written;
28 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
29 void* data = PDE_DATA(file_inode(f));
30 struct combo local_combo = *(struct combo*) &data;
31
32 // 32 bit register will always take less than 16 characters to print
33 if (size < 16 || *off != 0)
34 return 0;
35 if (local_combo.index % 2 == 0)
36 chars_written = scnprintf(out, 16, "%#0x\n", (nvdebug_readl(g, local_combo.offset) & 0x0f));
37 else
38 chars_written = scnprintf(out, 16, "%#0x\n", (nvdebug_readl(g, local_combo.offset) & 0xf0) >> 4);
39 if (copy_to_user(buf, out, chars_written))
40 printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);
41 *off += chars_written;
42 return chars_written;
43
44//(nvdebug_readl(g,NV_LCE_FOR_PCE_GP100(*(int*)PDE_DATA(file_inode(f))))
45
46
47
48}
25struct file_operations nvdebug_read_reg32_file_ops = { 49struct file_operations nvdebug_read_reg32_file_ops = {
26 .read = nvdebug_reg32_read, 50 .read = nvdebug_reg32_read,
27 .llseek = default_llseek, 51 .llseek = default_llseek,
28}; 52};
53// File operation for reading 4 bits in 32 bit register (used for Pascal copy engine offsets)
54struct file_operations nvdebug_read4_pascal_file_ops = {
55 .read = nvdebug_read4_pascal,
56 .llseek = default_llseek,
57};
29 58
30typedef struct { 59typedef struct {
31 int idx; // Current index in the device_info table 60 int idx; // Current index in the device_info table
diff --git a/nvdebug.h b/nvdebug.h
index 8d78135..b0e6bb8 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -689,12 +689,20 @@ typedef union {
689 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 689 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
690*/ 690*/
691#define NV_PTOP_SCAL_NUM_CES 0x00022444 691#define NV_PTOP_SCAL_NUM_CES 0x00022444
692// Defined number of GRCEs for a GPU
693# define NV_GRCE_NUM 2
692// Defined GRCE->CE mapping offsets from nvgpu 694// Defined GRCE->CE mapping offsets from nvgpu
693#define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4) 695#define NV_GRCE_FOR_CE(i) (0x00104034+(i)*4)
694// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) 696// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu)
695#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) 697#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4)
696#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) 698#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4)
697#define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) 699#define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2)
700#define NV_LCE_FOR_PCE_TU104(i) (0x00104040+(i)*4)
701// Defined struct for storing PCE index and offset for proc_create
702struct combo {
703 uint32_t offset:32;
704 uint32_t index:32;
705};
698 706
699/* Physical Copy Engine (PCE) information 707/* Physical Copy Engine (PCE) information
700 On Pascal GPUs or newer, this register complements the above information by 708 On Pascal GPUs or newer, this register complements the above information by
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index d355151..3815e06 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -28,7 +28,7 @@ extern struct file_operations enable_channel_file_ops;
28extern struct file_operations switch_to_tsg_file_ops; 28extern struct file_operations switch_to_tsg_file_ops;
29extern struct file_operations device_info_file_ops; 29extern struct file_operations device_info_file_ops;
30extern struct file_operations nvdebug_read_reg32_file_ops; 30extern struct file_operations nvdebug_read_reg32_file_ops;
31 31extern struct file_operations nvdebug_read4_pascal_file_ops;
32// Bus types are global symbols in the kernel 32// Bus types are global symbols in the kernel
33extern struct bus_type platform_bus_type; 33extern struct bus_type platform_bus_type;
34struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 34struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
@@ -261,23 +261,27 @@ int __init nvdebug_init(void) {
261 (void*)NV_FUSE_GPC); 261 (void*)NV_FUSE_GPC);
262 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ 262 // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+
263 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ 263 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){
264 264 // Declare struct for storing pce index and offset
265 struct combo local_combo;
266 struct combo* local_combo_ptr = &local_combo;
265 // Create a pce mask for iteration 267 // Create a pce mask for iteration
266 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); 268 u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP);
267 u32 num_pce = U32(hweight32(ce_pce_map));
268 char file_name[20]; 269 char file_name[20];
269 int pce_id; 270 int pce_id = 0;
270 int i; 271 int i;
271 for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){ 272 for (i = 0; i < MAP_SIZE; i++){
272 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing 273 // If pce is enabled, create files and iterate pce_id; otherwise, do nothing
273 if ((1 << i) & ce_pce_map){ 274 if ((1 << i) & ce_pce_map){
274 snprintf(file_name, 20, "lce_for_pce%d",pce_id); 275 snprintf(file_name, 20, "lce_for_pce%d",pce_id);
275 switch (g_nvdebug_state[res].chip_id){ 276 // Depending on GPU architecture, fetch data for the LCE of particular PCE
277 switch (g_nvdebug_state[res].chip_id & 0xff0){
276 278
277 case NV_CHIP_ID_PASCAL: 279 case NV_CHIP_ID_PASCAL:
280 local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id);
281 local_combo.index = pce_id;
278 lce_for_pce_entry = proc_create_data( 282 lce_for_pce_entry = proc_create_data(
279 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 283 file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops),
280 (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id)); 284 *(void**)local_combo_ptr);
281 break; 285 break;
282 case NV_CHIP_ID_VOLTA: 286 case NV_CHIP_ID_VOLTA:
283 lce_for_pce_entry = proc_create_data( 287 lce_for_pce_entry = proc_create_data(
@@ -289,16 +293,28 @@ int __init nvdebug_init(void) {
289 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), 293 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
290 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); 294 (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id));
291 break; 295 break;
296 case NV_CHIP_ID_TURING:
297 lce_for_pce_entry = proc_create_data(
298 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
299 (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id));
300 break;
301
302 }
303 // Make 2 files for 2 GRCEs
304 if (pce_id < NV_GRCE_NUM){
305 local_combo.offset = NV_GRCE_FOR_CE(pce_id);
306 local_combo.index = 0;
307 snprintf(file_name, 20, "pce_for_grce%d",pce_id);
308 grce_for_pce_entry = proc_create_data(
309 file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops),
310 *(void**)local_combo_ptr);
292 } 311 }
293 snprintf(file_name, 20, "grce_for_pce%d",pce_id);
294 grce_for_pce_entry = proc_create_data(
295 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
296 (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id));
297 if (!lce_for_pce_entry || !grce_for_pce_entry) 312 if (!lce_for_pce_entry || !grce_for_pce_entry)
298 return -ENOMEM; 313 return -ENOMEM;
299 pce_id++ 314 pce_id++;
300 } 315
301 } 316 }
317 }
302 318
303 // TODO: Redo to num_pces 319 // TODO: Redo to num_pces
304 num_gpcs_entry = proc_create_data( 320 num_gpcs_entry = proc_create_data(