diff options
| author | Benjamin Hadad IV <bh4@unc.edu> | 2023-07-19 13:02:52 -0400 |
|---|---|---|
| committer | Benjamin Hadad IV <bh4@unc.edu> | 2023-07-19 13:02:52 -0400 |
| commit | 33c915f08f5dc63674b158ecc18897494256a6d0 (patch) | |
| tree | 917bfa0e6b3b5f482ddb3180f40d2dac9b6dfed1 | |
| parent | bfb4dcf0e78954c0163f3a06a5a088c4d1b437a8 (diff) | |
Debugged device_info functionality
- Fixed device_info crash bugs
- Made further edits to display functionality
- Refactored code to enhance readability
| -rw-r--r-- | Makefile | 1 | ||||
| -rw-r--r-- | device_info_procfs.c | 53 | ||||
| -rw-r--r-- | nvdebug.h | 4 | ||||
| -rw-r--r-- | nvdebug_entry.c | 4 |
4 files changed, 24 insertions, 38 deletions
| @@ -5,6 +5,7 @@ KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --shor | |||
| 5 | 5 | ||
| 6 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) | 6 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) |
| 7 | ccflags-y += -I$(PWD)/include | 7 | ccflags-y += -I$(PWD)/include |
| 8 | ccflags-y += -std=gnu99 | ||
| 8 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include | 9 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include |
| 9 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu | 10 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu |
| 10 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include | 11 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include |
diff --git a/device_info_procfs.c b/device_info_procfs.c index b1e58b1..3cf4bc9 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
| @@ -37,7 +37,7 @@ static void* device_info_file_seq_start_previous(struct seq_file *s, loff_t *pos | |||
| 37 | if (*pos == 0) | 37 | if (*pos == 0) |
| 38 | idx = 0; | 38 | idx = 0; |
| 39 | // Number of possible info entries is fixed, and list is sparse | 39 | // Number of possible info entries is fixed, and list is sparse |
| 40 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1) | 40 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS) |
| 41 | return NULL; | 41 | return NULL; |
| 42 | return &idx; | 42 | return &idx; |
| 43 | } | 43 | } |
| @@ -49,7 +49,7 @@ static void* device_info_file_seq_start_ampere(struct seq_file *s, loff_t *pos) | |||
| 49 | idx = 0; | 49 | idx = 0; |
| 50 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 50 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 51 | // Number of possible info entries is fixed, and list is sparse | 51 | // Number of possible info entries is fixed, and list is sparse |
| 52 | if (idx >= (nvdebug_readl(g, 0x0224fc) >> 20)) | 52 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g)) |
| 53 | return NULL; | 53 | return NULL; |
| 54 | return &idx; | 54 | return &idx; |
| 55 | } | 55 | } |
| @@ -60,7 +60,7 @@ static void* device_info_file_seq_next_previous(struct seq_file *s, void *idx, | |||
| 60 | loff_t *pos) { | 60 | loff_t *pos) { |
| 61 | (*pos)++; // Required by seq interface | 61 | (*pos)++; // Required by seq interface |
| 62 | // Number of possible info entries is fixed, and list is sparse | 62 | // Number of possible info entries is fixed, and list is sparse |
| 63 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1) | 63 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS) |
| 64 | return NULL; | 64 | return NULL; |
| 65 | return idx; | 65 | return idx; |
| 66 | } | 66 | } |
| @@ -72,38 +72,11 @@ static void* device_info_file_seq_next_ampere(struct seq_file *s, void *idx, | |||
| 72 | (*pos)++; // Required by seq interface | 72 | (*pos)++; // Required by seq interface |
| 73 | // Number of possible info entries is fixed, and list is sparse | 73 | // Number of possible info entries is fixed, and list is sparse |
| 74 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 74 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 75 | if ((*(int*)idx)++ >= (nvdebug_readl(g, 0x0224fc) >> 20)) | 75 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g)) |
| 76 | return NULL; | 76 | return NULL; |
| 77 | return idx; | 77 | return idx; |
| 78 | } | 78 | } |
| 79 | /* | 79 | |
| 80 | // Steps to next record on Ampere GPUs. Returns new value of `idx`. | ||
| 81 | static void* device_info_file_seq_next_ampere(struct seq_file *s, void *idx, | ||
| 82 | loff_t *pos) { | ||
| 83 | (*pos)++; // Required by seq interface | ||
| 84 | // Number of possible info entries is fixed, and list is sparse | ||
| 85 | while(1) { | ||
| 86 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
| 87 | if ((*(int*)idx)++ >= (nvdebug_readl(g, 0x0224fc) >> 20)) | ||
| 88 | return NULL; | ||
| 89 | ptop_device_info_t curr_info; | ||
| 90 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx)); | ||
| 91 | if(!curr_info.raw && !*info_type) continue; | ||
| 92 | (*info_type)++; | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | while(1) { | ||
| 96 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
| 97 | if ((*(int*)idx)++ >= (nvdebug_readl(g, 0x0224fc) >> 20)) | ||
| 98 | return NULL; | ||
| 99 | ptop_device_info_t curr_info; | ||
| 100 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx)); | ||
| 101 | if(curr_info.raw & 0x80000000) continue; | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | return idx; | ||
| 105 | } | ||
| 106 | */ | ||
| 107 | // Print info at index *idx. Returns non-zero on error. | 80 | // Print info at index *idx. Returns non-zero on error. |
| 108 | static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { | 81 | static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { |
| 109 | ptop_device_info_t curr_info; | 82 | ptop_device_info_t curr_info; |
| @@ -162,11 +135,21 @@ static int device_info_file_seq_show_ampere(struct seq_file *s, void *idx) { | |||
| 162 | // Check for read errors | 135 | // Check for read errors |
| 163 | if (curr_info.raw == -1) | 136 | if (curr_info.raw == -1) |
| 164 | return -EIO; | 137 | return -EIO; |
| 138 | // The info_type field is not available in the Ampere device_info data, so it must be inferred | ||
| 165 | int info_type = -1; | 139 | int info_type = -1; |
| 166 | if(curr_info.raw) { | 140 | if(curr_info.raw) { |
| 167 | if(*(int*)idx < 1 || !nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx) - 1)) info_type = 0; | 141 | for(int i = 0; i < NV_PTOP_DEVICE_INFO_TYPE_COUNT; i++) { |
| 168 | if(*(int*)idx < 2 || !nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx) - 2)) info_type = 1; | 142 | if(*(int*)idx == i) { |
| 169 | if(*(int*)idx < 3 || !nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx) - 3)) info_type = 2; | 143 | info_type = i; |
| 144 | break; | ||
| 145 | } | ||
| 146 | ptop_device_info_t prev_info; | ||
| 147 | prev_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx - i - 1)); | ||
| 148 | if(!prev_info.raw || !prev_info.has_next_entry_ampere) { | ||
| 149 | info_type = i; | ||
| 150 | break; | ||
| 151 | } | ||
| 152 | } | ||
| 170 | } | 153 | } |
| 171 | // Parse and print the data | 154 | // Parse and print the data |
| 172 | switch(info_type) { | 155 | switch(info_type) { |
| @@ -556,7 +556,9 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
| 556 | 556 | ||
| 557 | #define NV_PTOP_DEVICE_INFO_AMPERE(i) (0x00022800+(i)*4) | 557 | #define NV_PTOP_DEVICE_INFO_AMPERE(i) (0x00022800+(i)*4) |
| 558 | #define NV_PTOP_DEVICE_INFO_PREVIOUS(i) (0x00022700+(i)*4) | 558 | #define NV_PTOP_DEVICE_INFO_PREVIOUS(i) (0x00022700+(i)*4) |
| 559 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | 559 | #define NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g) (nvdebug_readl(g, 0x0224fc) >> 20) |
| 560 | #define NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS 64 | ||
| 561 | #define NV_PTOP_DEVICE_INFO_TYPE_COUNT 3 | ||
| 560 | typedef union { | 562 | typedef union { |
| 561 | struct { | 563 | struct { |
| 562 | uint32_t fault_id_ampere:7; | 564 | uint32_t fault_id_ampere:7; |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index d82c648..3dfe1e8 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -150,7 +150,7 @@ int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { | |||
| 150 | // Figure out how many runlists there are by checking the device info | 150 | // Figure out how many runlists there are by checking the device info |
| 151 | // registers. Runlists are always numbered sequentially, so we just have | 151 | // registers. Runlists are always numbered sequentially, so we just have |
| 152 | // to find the highest-valued one and add 1 to get the number of runlists. | 152 | // to find the highest-valued one and add 1 to get the number of runlists. |
| 153 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) { | 153 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS; i++) { |
| 154 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_PREVIOUS(i)); | 154 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_PREVIOUS(i)); |
| 155 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) | 155 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) |
| 156 | continue; | 156 | continue; |
| @@ -180,7 +180,7 @@ int create_runlist_files_ampere(int device_id, struct proc_dir_entry *dir) { | |||
| 180 | // Figure out how many runlists there are by checking the device info | 180 | // Figure out how many runlists there are by checking the device info |
| 181 | // registers. Runlists are always numbered sequentially, so we just have | 181 | // registers. Runlists are always numbered sequentially, so we just have |
| 182 | // to find the highest-valued one and add 1 to get the number of runlists. | 182 | // to find the highest-valued one and add 1 to get the number of runlists. |
| 183 | for (i = 0; i < (nvdebug_readl(&g_nvdebug_state[device_id], 0x0224fc) >> 20); i++) { | 183 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(&g_nvdebug_state[device_id]); i++) { |
| 184 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_AMPERE(i)); | 184 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_AMPERE(i)); |
| 185 | if (info.runlist_enum_ampere > max_rl_id) | 185 | if (info.runlist_enum_ampere > max_rl_id) |
| 186 | max_rl_id = info.runlist_enum; | 186 | max_rl_id = info.runlist_enum; |
