diff options
| author | Benjamin Hadad IV <bh4@unc.edu> | 2023-07-28 11:39:28 -0400 |
|---|---|---|
| committer | Benjamin Hadad IV <bh4@unc.edu> | 2023-07-28 11:39:28 -0400 |
| commit | 845960fc1b15995fdbd6d61c384567652a150bc4 (patch) | |
| tree | 14aad318d8694c6266576a1d45ced0d8fc502a6d | |
| parent | 8a57aaeba41c43233c323d7e0fc8bf1a81ebc65e (diff) | |
Refactored various systems and debugged minor issues
- Added device_info_iter
- Merged functions in device_info_procfs.c
- Separated device_info data structs by version in nvdebug.h
- Fixed issue with device_info runlist ID data
| -rw-r--r-- | device_info_procfs.c | 110 | ||||
| -rw-r--r-- | nvdebug.h | 34 | ||||
| -rw-r--r-- | nvdebug_entry.c | 6 |
3 files changed, 69 insertions, 81 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index 3cf4bc9..5ddf240 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
| @@ -27,61 +27,57 @@ struct file_operations nvdebug_read_reg32_file_ops = { | |||
| 27 | .llseek = default_llseek, | 27 | .llseek = default_llseek, |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | typedef struct { | ||
| 31 | int total_entries; | ||
| 32 | int index; | ||
| 33 | int type_of_next_entry; | ||
| 34 | } device_info_iter; | ||
| 35 | |||
| 30 | //// ==v== PTOP_DEVICE_INFO ==v== //// | 36 | //// ==v== PTOP_DEVICE_INFO ==v== //// |
| 31 | 37 | ||
| 38 | static void* device_info_seq_start_backend(struct seq_file *s, loff_t *pos, int initial_entry_value, int total_entries) { | ||
| 39 | static device_info_iter idx; | ||
| 40 | // If start of sequence, reset `idx` | ||
| 41 | if (*pos == 0) { | ||
| 42 | idx.index = 0; | ||
| 43 | idx.type_of_next_entry = initial_entry_value; | ||
| 44 | } | ||
| 45 | idx.total_entries = total_entries; | ||
| 46 | // Number of possible info entries is fixed, and list is sparse | ||
| 47 | if (idx.index >= idx.total_entries) | ||
| 48 | return NULL; | ||
| 49 | return &idx; | ||
| 50 | } | ||
| 51 | |||
| 32 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. | 52 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. |
| 33 | // Initializes iterator `idx` state and returns it. Ends sequence on NULL. | 53 | // Initializes iterator `idx` state and returns it. Ends sequence on NULL. |
| 34 | static void* device_info_file_seq_start_previous(struct seq_file *s, loff_t *pos) { | 54 | static void* device_info_file_seq_start_previous(struct seq_file *s, loff_t *pos) { |
| 35 | static int idx; | 55 | return device_info_seq_start_backend(s, pos, -1, NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS); |
| 36 | // If start of sequence, reset `idx` | ||
| 37 | if (*pos == 0) | ||
| 38 | idx = 0; | ||
| 39 | // Number of possible info entries is fixed, and list is sparse | ||
| 40 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS) | ||
| 41 | return NULL; | ||
| 42 | return &idx; | ||
| 43 | } | 56 | } |
| 44 | 57 | ||
| 45 | static void* device_info_file_seq_start_ampere(struct seq_file *s, loff_t *pos) { | 58 | static void* device_info_file_seq_start_ampere(struct seq_file *s, loff_t *pos) { |
| 46 | static int idx; | 59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 47 | // If start of sequence, reset `idx` | 60 | return device_info_seq_start_backend(s, pos, 0, NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g)); |
| 48 | if (*pos == 0) | ||
| 49 | idx = 0; | ||
| 50 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
| 51 | // Number of possible info entries is fixed, and list is sparse | ||
| 52 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g)) | ||
| 53 | return NULL; | ||
| 54 | return &idx; | ||
| 55 | } | 61 | } |
| 56 | 62 | ||
| 57 | // Steps to next record. Returns new value of `idx`. | 63 | // Steps to next record. Returns new value of `idx`. |
| 58 | // Calls show() on non-NULL return | 64 | // Calls show() on non-NULL return |
| 59 | static void* device_info_file_seq_next_previous(struct seq_file *s, void *idx, | 65 | static void* device_info_file_seq_next(struct seq_file *s, void *idx, |
| 60 | loff_t *pos) { | 66 | loff_t *pos) { |
| 67 | device_info_iter *idx_iter = (device_info_iter*)idx; | ||
| 61 | (*pos)++; // Required by seq interface | 68 | (*pos)++; // Required by seq interface |
| 62 | // Number of possible info entries is fixed, and list is sparse | 69 | // Number of possible info entries is fixed, and list is sparse |
| 63 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS) | 70 | if (idx_iter->index++ >= idx_iter->total_entries) |
| 64 | return NULL; | 71 | return NULL; |
| 65 | return idx; | 72 | return idx; |
| 66 | } | 73 | } |
| 67 | 74 | ||
| 68 | // Steps to next record. Returns new value of `idx`. | ||
| 69 | // Calls show() on non-NULL return | ||
| 70 | static void* device_info_file_seq_next_ampere(struct seq_file *s, void *idx, | ||
| 71 | loff_t *pos) { | ||
| 72 | (*pos)++; // Required by seq interface | ||
| 73 | // Number of possible info entries is fixed, and list is sparse | ||
| 74 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
| 75 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g)) | ||
| 76 | return NULL; | ||
| 77 | return idx; | ||
| 78 | } | ||
| 79 | |||
| 80 | // Print info at index *idx. Returns non-zero on error. | 75 | // Print info at index *idx. Returns non-zero on error. |
| 81 | static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { | 76 | static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { |
| 82 | ptop_device_info_t curr_info; | 77 | device_info_iter *idx_iter = (device_info_iter*)idx; |
| 78 | ptop_device_info_previous_t curr_info; | ||
| 83 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 79 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 84 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_PREVIOUS(*(int*)idx)); | 80 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_PREVIOUS(idx_iter->index)); |
| 85 | // Check for read errors | 81 | // Check for read errors |
| 86 | if (curr_info.raw == -1) | 82 | if (curr_info.raw == -1) |
| 87 | return -EIO; | 83 | return -EIO; |
| @@ -129,54 +125,42 @@ static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { | |||
| 129 | 125 | ||
| 130 | // Print info at index *idx for Ampere GPUs. Returns non-zero on error. | 126 | // Print info at index *idx for Ampere GPUs. Returns non-zero on error. |
| 131 | static int device_info_file_seq_show_ampere(struct seq_file *s, void *idx) { | 127 | static int device_info_file_seq_show_ampere(struct seq_file *s, void *idx) { |
| 132 | ptop_device_info_t curr_info; | 128 | device_info_iter *idx_iter = (device_info_iter*)idx; |
| 129 | ptop_device_info_ampere_t curr_info; | ||
| 133 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 130 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 134 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx)); | 131 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(idx_iter->index)); |
| 135 | // Check for read errors | 132 | // Check for read errors |
| 136 | if (curr_info.raw == -1) | 133 | if (curr_info.raw == -1) |
| 137 | return -EIO; | 134 | return -EIO; |
| 138 | // The info_type field is not available in the Ampere device_info data, so it must be inferred | 135 | // The info_type field is not available in the Ampere device_info data, so it must be inferred |
| 139 | int info_type = -1; | 136 | int info_type = curr_info.raw ? idx_iter->type_of_next_entry : -1; |
| 140 | if(curr_info.raw) { | ||
| 141 | for(int i = 0; i < NV_PTOP_DEVICE_INFO_TYPE_COUNT; i++) { | ||
| 142 | if(*(int*)idx == i) { | ||
| 143 | info_type = i; | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | ptop_device_info_t prev_info; | ||
| 147 | prev_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(*(int*)idx - i - 1)); | ||
| 148 | if(!prev_info.raw || !prev_info.has_next_entry_ampere) { | ||
| 149 | info_type = i; | ||
| 150 | break; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } | ||
| 154 | // Parse and print the data | 137 | // Parse and print the data |
| 155 | switch(info_type) { | 138 | switch(info_type) { |
| 156 | case 0: | 139 | case 0: |
| 157 | seq_printf(s, "| instance %d\n", curr_info.inst_id_ampere); | 140 | seq_printf(s, "| instance %d\n", curr_info.inst_id); |
| 158 | seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id_ampere); | 141 | seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); |
| 159 | seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type_ampere); | 142 | seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type); |
| 160 | if (curr_info.engine_type_ampere < ENGINE_TYPES_LEN) | 143 | if (curr_info.engine_type < ENGINE_TYPES_LEN) |
| 161 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type_ampere]); | 144 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); |
| 162 | else | 145 | else |
| 163 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); | 146 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); |
| 164 | break; | 147 | break; |
| 165 | case 1: | 148 | case 1: |
| 166 | seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base_ampere << 12); | 149 | seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 12); |
| 167 | seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum_ampere); | 150 | seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); |
| 168 | break; | 151 | break; |
| 169 | case 2: | 152 | case 2: |
| 170 | seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum_ampere); | 153 | seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); |
| 171 | seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum_ampere); | 154 | seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); |
| 172 | break; | 155 | break; |
| 173 | default: | 156 | default: |
| 174 | // Device info records are sparse, so skip unset or unknown ones | 157 | // Device info records are sparse, so skip unset or unknown ones |
| 175 | return 0; | 158 | return 0; |
| 176 | } | 159 | } |
| 177 | 160 | if(info_type != -1) idx_iter->type_of_next_entry++; | |
| 178 | // Draw a line between each device entry | 161 | // Draw a line between each device entry |
| 179 | if (!curr_info.has_next_entry_ampere) { | 162 | if (!curr_info.has_next_entry) { |
| 163 | idx_iter->type_of_next_entry = 0; | ||
| 180 | seq_printf(s, "+---------------------+\n"); | 164 | seq_printf(s, "+---------------------+\n"); |
| 181 | } | 165 | } |
| 182 | return 0; | 166 | return 0; |
| @@ -189,14 +173,14 @@ static void device_info_file_seq_stop(struct seq_file *s, void *idx) { | |||
| 189 | 173 | ||
| 190 | static const struct seq_operations device_info_file_seq_ops_previous = { | 174 | static const struct seq_operations device_info_file_seq_ops_previous = { |
| 191 | .start = device_info_file_seq_start_previous, | 175 | .start = device_info_file_seq_start_previous, |
| 192 | .next = device_info_file_seq_next_previous, | 176 | .next = device_info_file_seq_next, |
| 193 | .stop = device_info_file_seq_stop, | 177 | .stop = device_info_file_seq_stop, |
| 194 | .show = device_info_file_seq_show_previous, | 178 | .show = device_info_file_seq_show_previous, |
| 195 | }; | 179 | }; |
| 196 | 180 | ||
| 197 | static const struct seq_operations device_info_file_seq_ops_ampere = { | 181 | static const struct seq_operations device_info_file_seq_ops_ampere = { |
| 198 | .start = device_info_file_seq_start_ampere, | 182 | .start = device_info_file_seq_start_ampere, |
| 199 | .next = device_info_file_seq_next_ampere, | 183 | .next = device_info_file_seq_next, |
| 200 | .stop = device_info_file_seq_stop, | 184 | .stop = device_info_file_seq_stop, |
| 201 | .show = device_info_file_seq_show_ampere, | 185 | .show = device_info_file_seq_show_ampere, |
| 202 | }; | 186 | }; |
| @@ -567,25 +567,29 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
| 567 | #define NV_PTOP_DEVICE_INFO_TYPE_COUNT 3 | 567 | #define NV_PTOP_DEVICE_INFO_TYPE_COUNT 3 |
| 568 | typedef union { | 568 | typedef union { |
| 569 | struct { | 569 | struct { |
| 570 | uint32_t fault_id_ampere:7; | 570 | uint32_t fault_id:7; |
| 571 | uint32_t padding0_ampere:9; | 571 | uint32_t padding0:9; |
| 572 | uint32_t inst_id_ampere:4; | 572 | uint32_t inst_id:4; |
| 573 | uint32_t padding1_ampere:4; | 573 | uint32_t padding1:4; |
| 574 | enum ENGINE_TYPES engine_type_ampere:7; | 574 | enum ENGINE_TYPES engine_type:7; |
| 575 | bool has_next_entry_ampere:1; | 575 | bool has_next_entry:1; |
| 576 | } __attribute__((packed)); | 576 | } __attribute__((packed)); |
| 577 | struct { | 577 | struct { |
| 578 | uint32_t reset_enum_ampere:5; | 578 | uint32_t reset_enum:5; |
| 579 | uint32_t padding2_ampere:7; | 579 | uint32_t padding2:7; |
| 580 | uint32_t pri_base_ampere:12; | 580 | uint32_t pri_base:12; |
| 581 | uint32_t padding3_ampere:8; | 581 | uint32_t padding3:8; |
| 582 | } __attribute__((packed)); | 582 | } __attribute__((packed)); |
| 583 | struct { | 583 | struct { |
| 584 | uint32_t engine_enum_ampere:2; | 584 | uint32_t engine_enum:2; |
| 585 | uint32_t padding4_ampere:4; | 585 | uint32_t padding4:8; |
| 586 | uint32_t runlist_enum_ampere:14; | 586 | uint32_t runlist_enum:14; |
| 587 | uint32_t padding5_ampere:12; | 587 | uint32_t padding5:8; |
| 588 | } __attribute__((packed)); | 588 | } __attribute__((packed)); |
| 589 | uint32_t raw; | ||
| 590 | } ptop_device_info_ampere_t; | ||
| 591 | |||
| 592 | typedef union { | ||
| 589 | // DATA type fields | 593 | // DATA type fields |
| 590 | struct { | 594 | struct { |
| 591 | enum DEVICE_INFO_TYPE info_type:2; | 595 | enum DEVICE_INFO_TYPE info_type:2; |
| @@ -622,7 +626,7 @@ typedef union { | |||
| 622 | uint32_t padding9:1; | 626 | uint32_t padding9:1; |
| 623 | } __attribute__((packed)); | 627 | } __attribute__((packed)); |
| 624 | uint32_t raw; | 628 | uint32_t raw; |
| 625 | } ptop_device_info_t; | 629 | } ptop_device_info_previous_t; |
| 626 | 630 | ||
| 627 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | 631 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 |
| 628 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | 632 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 3dfe1e8..d3d934e 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -142,7 +142,7 @@ int probe_and_cache_device(void) { | |||
| 142 | 142 | ||
| 143 | // Create files `/proc/gpu#/runlist#`, world readable | 143 | // Create files `/proc/gpu#/runlist#`, world readable |
| 144 | int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { | 144 | int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { |
| 145 | ptop_device_info_t info; | 145 | ptop_device_info_previous_t info; |
| 146 | struct proc_dir_entry *rl_entry; | 146 | struct proc_dir_entry *rl_entry; |
| 147 | int i, rl_id; | 147 | int i, rl_id; |
| 148 | char runlist_name[12]; | 148 | char runlist_name[12]; |
| @@ -172,7 +172,7 @@ int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { | |||
| 172 | 172 | ||
| 173 | // Create files `/proc/gpu#/runlist#`, world readable | 173 | // Create files `/proc/gpu#/runlist#`, world readable |
| 174 | int create_runlist_files_ampere(int device_id, struct proc_dir_entry *dir) { | 174 | int create_runlist_files_ampere(int device_id, struct proc_dir_entry *dir) { |
| 175 | ptop_device_info_t info; | 175 | ptop_device_info_ampere_t info; |
| 176 | struct proc_dir_entry *rl_entry; | 176 | struct proc_dir_entry *rl_entry; |
| 177 | int i, rl_id; | 177 | int i, rl_id; |
| 178 | char runlist_name[12]; | 178 | char runlist_name[12]; |
| @@ -182,7 +182,7 @@ int create_runlist_files_ampere(int device_id, struct proc_dir_entry *dir) { | |||
| 182 | // to find the highest-valued one and add 1 to get the number of runlists. | 182 | // to find the highest-valued one and add 1 to get the number of runlists. |
| 183 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(&g_nvdebug_state[device_id]); i++) { | 183 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(&g_nvdebug_state[device_id]); i++) { |
| 184 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_AMPERE(i)); | 184 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_AMPERE(i)); |
| 185 | if (info.runlist_enum_ampere > max_rl_id) | 185 | if (info.runlist_enum > max_rl_id) |
| 186 | max_rl_id = info.runlist_enum; | 186 | max_rl_id = info.runlist_enum; |
| 187 | } | 187 | } |
| 188 | // Create files to read each runlist. The read handling code looks at the | 188 | // Create files to read each runlist. The read handling code looks at the |
