diff options
author | Saman Sahebi <saman63@cs.unc.edu> | 2023-09-14 13:50:48 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-10-29 15:45:33 -0400 |
commit | b9d8f6a83a8e5fec38e9e20a54ee13838936fa10 (patch) | |
tree | 0b048c52e4d6ab283e3aabc240b859311b1d0a09 | |
parent | bebffaf223e975ab8f6fcf5fb6bd6de814fb614c (diff) |
Created new read function in device_info for GRCE mappings and Pascal LCE mappings
-rw-r--r-- | device_info_procfs.c | 31 | ||||
-rw-r--r-- | nvdebug.h | 10 | ||||
-rw-r--r-- | nvdebug_entry.c | 46 |
3 files changed, 70 insertions, 17 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index b139c36..d5350c8 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
@@ -9,7 +9,7 @@ | |||
9 | // @param off Requested offset. Updated by number of characters written. | 9 | // @param off Requested offset. Updated by number of characters written. |
10 | // @return -errno on error, otherwise number of bytes written to *buf | 10 | // @return -errno on error, otherwise number of bytes written to *buf |
11 | // Note: Parent `data` field MUST be the GPU index | 11 | // Note: Parent `data` field MUST be the GPU index |
12 | static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) { | 12 | static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off){ |
13 | char out[16]; | 13 | char out[16]; |
14 | int chars_written; | 14 | int chars_written; |
15 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | 15 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
@@ -22,10 +22,39 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, | |||
22 | *off += chars_written; | 22 | *off += chars_written; |
23 | return chars_written; | 23 | return chars_written; |
24 | } | 24 | } |
25 | static ssize_t nvdebug_read4_pascal(struct file *f, char __user *buf, size_t size, loff_t *off){ | ||
26 | char out[16]; | ||
27 | int chars_written; | ||
28 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
29 | void* data = PDE_DATA(file_inode(f)); | ||
30 | struct combo local_combo = *(struct combo*) &data; | ||
31 | |||
32 | // 32 bit register will always take less than 16 characters to print | ||
33 | if (size < 16 || *off != 0) | ||
34 | return 0; | ||
35 | if (local_combo.index % 2 == 0) | ||
36 | chars_written = scnprintf(out, 16, "%#0x\n", (nvdebug_readl(g, local_combo.offset) & 0x0f)); | ||
37 | else | ||
38 | chars_written = scnprintf(out, 16, "%#0x\n", (nvdebug_readl(g, local_combo.offset) & 0xf0) >> 4); | ||
39 | if (copy_to_user(buf, out, chars_written)) | ||
40 | printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); | ||
41 | *off += chars_written; | ||
42 | return chars_written; | ||
43 | |||
44 | //(nvdebug_readl(g,NV_LCE_FOR_PCE_GP100(*(int*)PDE_DATA(file_inode(f)))) | ||
45 | |||
46 | |||
47 | |||
48 | } | ||
25 | struct file_operations nvdebug_read_reg32_file_ops = { | 49 | struct file_operations nvdebug_read_reg32_file_ops = { |
26 | .read = nvdebug_reg32_read, | 50 | .read = nvdebug_reg32_read, |
27 | .llseek = default_llseek, | 51 | .llseek = default_llseek, |
28 | }; | 52 | }; |
53 | // File operation for reading 4 bits in 32 bit register (used for Pascal copy engine offsets) | ||
54 | struct file_operations nvdebug_read4_pascal_file_ops = { | ||
55 | .read = nvdebug_read4_pascal, | ||
56 | .llseek = default_llseek, | ||
57 | }; | ||
29 | 58 | ||
30 | typedef struct { | 59 | typedef struct { |
31 | int idx; // Current index in the device_info table | 60 | int idx; // Current index in the device_info table |
@@ -689,12 +689,20 @@ typedef union { | |||
689 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 689 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
690 | */ | 690 | */ |
691 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 691 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
692 | // Defined number of GRCEs for a GPU | ||
693 | # define NV_GRCE_NUM 2 | ||
692 | // Defined GRCE->CE mapping offsets from nvgpu | 694 | // Defined GRCE->CE mapping offsets from nvgpu |
693 | #define NV_GRCE_FOR_CE_GV100(i) (0x00104034+(i)*4) | 695 | #define NV_GRCE_FOR_CE(i) (0x00104034+(i)*4) |
694 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 696 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) |
695 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | 697 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) |
696 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | 698 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) |
697 | #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) | 699 | #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) |
700 | #define NV_LCE_FOR_PCE_TU104(i) (0x00104040+(i)*4) | ||
701 | // Defined struct for storing PCE index and offset for proc_create | ||
702 | struct combo { | ||
703 | uint32_t offset:32; | ||
704 | uint32_t index:32; | ||
705 | }; | ||
698 | 706 | ||
699 | /* Physical Copy Engine (PCE) information | 707 | /* Physical Copy Engine (PCE) information |
700 | On Pascal GPUs or newer, this register complements the above information by | 708 | On Pascal GPUs or newer, this register complements the above information by |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index d355151..3815e06 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -28,7 +28,7 @@ extern struct file_operations enable_channel_file_ops; | |||
28 | extern struct file_operations switch_to_tsg_file_ops; | 28 | extern struct file_operations switch_to_tsg_file_ops; |
29 | extern struct file_operations device_info_file_ops; | 29 | extern struct file_operations device_info_file_ops; |
30 | extern struct file_operations nvdebug_read_reg32_file_ops; | 30 | extern struct file_operations nvdebug_read_reg32_file_ops; |
31 | 31 | extern struct file_operations nvdebug_read4_pascal_file_ops; | |
32 | // Bus types are global symbols in the kernel | 32 | // Bus types are global symbols in the kernel |
33 | extern struct bus_type platform_bus_type; | 33 | extern struct bus_type platform_bus_type; |
34 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 34 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
@@ -261,23 +261,27 @@ int __init nvdebug_init(void) { | |||
261 | (void*)NV_FUSE_GPC); | 261 | (void*)NV_FUSE_GPC); |
262 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ | 262 | // In both nouveau and nvgpu, the PCE_MAP register is available on Pascal+ |
263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ | 263 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL){ |
264 | 264 | // Declare struct for storing pce index and offset | |
265 | struct combo local_combo; | ||
266 | struct combo* local_combo_ptr = &local_combo; | ||
265 | // Create a pce mask for iteration | 267 | // Create a pce mask for iteration |
266 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); | 268 | u32 ce_pce_map = nvdebug_readl(&g_nvdebug_state[device_id], NV_CE_PCE_MAP); |
267 | u32 num_pce = U32(hweight32(ce_pce_map)); | ||
268 | char file_name[20]; | 269 | char file_name[20]; |
269 | int pce_id; | 270 | int pce_id = 0; |
270 | int i; | 271 | int i; |
271 | for (i = 0; pce_id = 0; pce_id < num_pce; i < MAP_SIZE; i++){ | 272 | for (i = 0; i < MAP_SIZE; i++){ |
272 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing | 273 | // If pce is enabled, create files and iterate pce_id; otherwise, do nothing |
273 | if ((1 << i) & ce_pce_map){ | 274 | if ((1 << i) & ce_pce_map){ |
274 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); | 275 | snprintf(file_name, 20, "lce_for_pce%d",pce_id); |
275 | switch (g_nvdebug_state[res].chip_id){ | 276 | // Depending on GPU architecture, fetch data for the LCE of particular PCE |
277 | switch (g_nvdebug_state[res].chip_id & 0xff0){ | ||
276 | 278 | ||
277 | case NV_CHIP_ID_PASCAL: | 279 | case NV_CHIP_ID_PASCAL: |
280 | local_combo.offset = NV_LCE_FOR_PCE_GP100(pce_id); | ||
281 | local_combo.index = pce_id; | ||
278 | lce_for_pce_entry = proc_create_data( | 282 | lce_for_pce_entry = proc_create_data( |
279 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 283 | file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), |
280 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GP100(pce_id)); | 284 | *(void**)local_combo_ptr); |
281 | break; | 285 | break; |
282 | case NV_CHIP_ID_VOLTA: | 286 | case NV_CHIP_ID_VOLTA: |
283 | lce_for_pce_entry = proc_create_data( | 287 | lce_for_pce_entry = proc_create_data( |
@@ -289,16 +293,28 @@ int __init nvdebug_init(void) { | |||
289 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 293 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
290 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); | 294 | (void*)(uintptr_t)NV_LCE_FOR_PCE_GA100(pce_id)); |
291 | break; | 295 | break; |
296 | case NV_CHIP_ID_TURING: | ||
297 | lce_for_pce_entry = proc_create_data( | ||
298 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
299 | (void*)(uintptr_t)NV_LCE_FOR_PCE_TU104(pce_id)); | ||
300 | break; | ||
301 | |||
302 | } | ||
303 | // Make 2 files for 2 GRCEs | ||
304 | if (pce_id < NV_GRCE_NUM){ | ||
305 | local_combo.offset = NV_GRCE_FOR_CE(pce_id); | ||
306 | local_combo.index = 0; | ||
307 | snprintf(file_name, 20, "pce_for_grce%d",pce_id); | ||
308 | grce_for_pce_entry = proc_create_data( | ||
309 | file_name, 0444, dir, compat_ops(&nvdebug_read4_pascal_file_ops), | ||
310 | *(void**)local_combo_ptr); | ||
292 | } | 311 | } |
293 | snprintf(file_name, 20, "grce_for_pce%d",pce_id); | ||
294 | grce_for_pce_entry = proc_create_data( | ||
295 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
296 | (void*)(uintptr_t)NV_GRCE_FOR_CE(pce_id)); | ||
297 | if (!lce_for_pce_entry || !grce_for_pce_entry) | 312 | if (!lce_for_pce_entry || !grce_for_pce_entry) |
298 | return -ENOMEM; | 313 | return -ENOMEM; |
299 | pce_id++ | 314 | pce_id++; |
300 | } | 315 | |
301 | } | 316 | } |
317 | } | ||
302 | 318 | ||
303 | // TODO: Redo to num_pces | 319 | // TODO: Redo to num_pces |
304 | num_gpcs_entry = proc_create_data( | 320 | num_gpcs_entry = proc_create_data( |