diff options
| author | Joshua Bakita <jbakita@cs.unc.edu> | 2023-09-20 13:09:17 -0400 |
|---|---|---|
| committer | Joshua Bakita <jbakita@cs.unc.edu> | 2023-09-20 13:09:17 -0400 |
| commit | 51f808e092846a60ea6c88ea3a1d2e349c92977b (patch) | |
| tree | 0f0ac492a3a0c5c293df91a25debec97d648adbf | |
| parent | 1d7adc3be1aef5ac9c144bb24008fd8cc5d688a5 (diff) | |
Bug fixes and cleanup for new device_info logic
- Update comments to match new structure
- Make show() function idempotent
- Skip empty table entries without aborting
- Include names for new engine types
- Add warning log messages for skipped table entries
- Remove non-functional runlist file creation logic for Ampere+
| -rw-r--r-- | device_info_procfs.c | 179 | ||||
| -rw-r--r-- | nvdebug.h | 52 | ||||
| -rw-r--r-- | nvdebug_entry.c | 39 |
3 files changed, 134 insertions, 136 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index b2bcd1a..195b3ff 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
| @@ -28,49 +28,64 @@ struct file_operations nvdebug_read_reg32_file_ops = { | |||
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | typedef struct { | 30 | typedef struct { |
| 31 | int total_entries; | 31 | int idx; // Current index in the device_info table |
| 32 | int index; | 32 | int length; // Length of device_info table (including unpopulated entries) |
| 33 | int type_of_next_entry; | 33 | int type_of_next_entry; // Only used on Ampere+ GPUs |
| 34 | bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence | ||
| 34 | } device_info_iter; | 35 | } device_info_iter; |
| 35 | 36 | ||
| 36 | //// ==v== PTOP_DEVICE_INFO ==v== //// | 37 | //// ==v== PTOP_DEVICE_INFO ==v== //// |
| 37 | 38 | ||
| 38 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. | 39 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. |
| 39 | // Initializes iterator `idx` state and returns it. Ends sequence on NULL. | 40 | // Initializes iterator `iter` state and returns it. Ends sequence on NULL. |
| 40 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { | 41 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { |
| 41 | static device_info_iter idx; | 42 | static device_info_iter iter; |
| 42 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 43 | // If freshly starting a sequence, reset the iterator |
| 43 | int is_ampere = g->chip_id >= NV_CHIP_ID_AMPERE; | 44 | if (*pos == 0) { |
| 44 | // If start of sequence, reset `idx` | 45 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 45 | if (*pos == 0) { | 46 | iter.idx = 0; |
| 46 | idx.index = 0; | 47 | iter.type_of_next_entry = 0; |
| 47 | idx.type_of_next_entry = is_ampere ? 0 : -1; | 48 | iter.has_next_entry = 0; |
| 49 | // On Ampere+, the device_info table length can vary | ||
| 50 | if (g->chip_id > NV_CHIP_ID_AMPERE) | ||
| 51 | iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g); | ||
| 52 | else | ||
| 53 | iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104; | ||
| 48 | } | 54 | } |
| 49 | idx.total_entries = is_ampere ? NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g) : NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS; | 55 | // Number of possible info entries is fixed, and list is sparse, so stop |
| 50 | // Number of possible info entries is fixed, and list is sparse | 56 | // iterating only when all entries have been checked, rather than on the first |
| 51 | if (idx.index >= idx.total_entries) | 57 | // empty entry. |
| 52 | return NULL; | 58 | if (iter.idx >= iter.length) |
| 53 | return &idx; | 59 | return NULL; |
| 60 | return &iter; | ||
| 54 | } | 61 | } |
| 55 | 62 | ||
| 56 | // Steps to next record. Returns new value of `idx`. | 63 | // Steps to next record. Returns `&iter` (address should not change) |
| 57 | // Calls show() on non-NULL return | 64 | // Calls show() on non-NULL return |
| 58 | static void* device_info_file_seq_next(struct seq_file *s, void *idx, | 65 | static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw, |
| 59 | loff_t *pos) { | 66 | loff_t *pos) { |
| 60 | device_info_iter *idx_iter = (device_info_iter*)idx; | 67 | device_info_iter *iter = (device_info_iter*)iter_raw; |
| 61 | (*pos)++; // Required by seq interface | 68 | (*pos)++; // Required by seq interface |
| 62 | // Number of possible info entries is fixed, and list is sparse | 69 | // Number of possible info entries is fixed, and list is sparse, so stop |
| 63 | if (idx_iter->index++ >= idx_iter->total_entries) | 70 | // iterating only when all entries have been checked, rather than on the first |
| 71 | // empty entry. | ||
| 72 | if (++iter->idx >= iter->length) | ||
| 64 | return NULL; | 73 | return NULL; |
| 65 | return idx; | 74 | // The info_type field is not available in the Ampere device_info data, so |
| 75 | // it must be inferred. NOP for older devices (cheaper than another branch). | ||
| 76 | // This has to be here (rather than in the show function) to support the | ||
| 77 | // idempotence requirements of show() in the seq_file interface. | ||
| 78 | iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0; | ||
| 79 | return iter; | ||
| 66 | } | 80 | } |
| 67 | 81 | ||
| 68 | // Print info at index *idx. Returns non-zero on error. | 82 | // Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error. |
| 69 | static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { | 83 | // Implementation of this function must be idempotent |
| 70 | device_info_iter *idx_iter = (device_info_iter*)idx; | 84 | static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) { |
| 71 | ptop_device_info_previous_t curr_info; | 85 | device_info_iter *iter = (device_info_iter*)iter_raw; |
| 86 | ptop_device_info_gk104_t curr_info; | ||
| 72 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 87 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 73 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_PREVIOUS(idx_iter->index)); | 88 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx)); |
| 74 | // Check for read errors | 89 | // Check for read errors |
| 75 | if (curr_info.raw == -1) | 90 | if (curr_info.raw == -1) |
| 76 | return -EIO; | 91 | return -EIO; |
| @@ -102,7 +117,7 @@ static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { | |||
| 102 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | 117 | if (curr_info.engine_type < ENGINE_TYPES_LEN) |
| 103 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | 118 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); |
| 104 | else | 119 | else |
| 105 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); | 120 | seq_printf(s, "Unknown Historical)\n"); |
| 106 | break; | 121 | break; |
| 107 | case INFO_TYPE_NOT_VALID: | 122 | case INFO_TYPE_NOT_VALID: |
| 108 | default: | 123 | default: |
| @@ -116,69 +131,75 @@ static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { | |||
| 116 | return 0; | 131 | return 0; |
| 117 | } | 132 | } |
| 118 | 133 | ||
| 119 | // Print info at index *idx for Ampere GPUs. Returns non-zero on error. | 134 | // Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error. |
| 120 | static int device_info_file_seq_show_ampere(struct seq_file *s, void *idx) { | 135 | // Implementation of this function must be idempotent |
| 121 | device_info_iter *idx_iter = (device_info_iter*)idx; | 136 | static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) { |
| 122 | ptop_device_info_ampere_t curr_info; | 137 | device_info_iter *iter = (device_info_iter*)iter_raw; |
| 123 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 138 | ptop_device_info_ga100_t curr_info; |
| 124 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(idx_iter->index)); | 139 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 125 | // Check for read errors | 140 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx)); |
| 126 | if (curr_info.raw == -1) | 141 | // Check for read errors |
| 127 | return -EIO; | 142 | if (curr_info.raw == -1) |
| 128 | // The info_type field is not available in the Ampere device_info data, so it must be inferred | 143 | return -EIO; |
| 129 | int info_type = curr_info.raw ? idx_iter->type_of_next_entry : -1; | 144 | |
| 130 | // Parse and print the data | 145 | // Update tracking data only used by next(); allows preserving idempotence |
| 131 | switch(info_type) { | 146 | iter->has_next_entry = curr_info.has_next_entry; |
| 132 | case 0: | 147 | // Silently skip empty entries |
| 133 | seq_printf(s, "| instance %d\n", curr_info.inst_id); | 148 | if (curr_info.raw == 0) |
| 134 | seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); | 149 | return 0; |
| 150 | // In nvdebug, an entry is considered invalid if it does not consist of at | ||
| 151 | // least two rows. So, if this is the first row of an entry, but another row | ||
| 152 | // is not indicated, this entry is invalid and should be skipped. | ||
| 153 | if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) { | ||
| 154 | printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); | ||
| 155 | return 0; | ||
| 156 | } | ||
| 157 | |||
| 158 | // Parse and print the data | ||
| 159 | switch(iter->type_of_next_entry) { | ||
| 160 | case 0: | ||
| 161 | seq_printf(s, "| instance %d\n", curr_info.inst_id); | ||
| 162 | seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); | ||
| 135 | seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type); | 163 | seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type); |
| 136 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | 164 | if (curr_info.engine_type < ENGINE_TYPES_LEN) |
| 137 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | 165 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); |
| 138 | else | 166 | else |
| 139 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); | 167 | seq_printf(s, "Unknown, introduced post-Lovelace)\n"); |
| 140 | break; | 168 | break; |
| 141 | case 1: | 169 | case 1: |
| 142 | seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 12); | 170 | seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 12); |
| 143 | seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); | 171 | seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); |
| 144 | break; | 172 | break; |
| 145 | case 2: | 173 | case 2: |
| 146 | seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); | 174 | seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); |
| 147 | seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); | 175 | seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); |
| 148 | break; | 176 | break; |
| 149 | default: | 177 | default: |
| 150 | // Device info records are sparse, so skip unset or unknown ones | 178 | printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); |
| 151 | return 0; | ||
| 152 | } | ||
| 153 | if(info_type != -1) idx_iter->type_of_next_entry++; | ||
| 154 | // Draw a line between each device entry | ||
| 155 | if (!curr_info.has_next_entry) { | ||
| 156 | idx_iter->type_of_next_entry = 0; | ||
| 157 | seq_printf(s, "+---------------------+\n"); | ||
| 158 | } | 179 | } |
| 159 | return 0; | 180 | |
| 181 | // Draw a line between each device entry | ||
| 182 | if (!curr_info.has_next_entry) | ||
| 183 | seq_printf(s, "+---------------------+\n"); | ||
| 184 | return 0; | ||
| 160 | } | 185 | } |
| 161 | 186 | ||
| 162 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { | 187 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { |
| 163 | // No cleanup needed | 188 | // No cleanup needed |
| 164 | } | 189 | } |
| 165 | 190 | ||
| 166 | static const struct seq_operations device_info_file_seq_ops = { | 191 | static struct seq_operations device_info_file_seq_ops = { |
| 167 | .start = device_info_file_seq_start, | 192 | .start = device_info_file_seq_start, |
| 168 | .next = device_info_file_seq_next, | 193 | .next = device_info_file_seq_next, |
| 169 | .stop = device_info_file_seq_stop | 194 | .stop = device_info_file_seq_stop, |
| 170 | }; | 195 | }; |
| 171 | 196 | ||
| 172 | static int device_info_file_open(struct inode *inode, struct file *f) { | 197 | static int device_info_file_open(struct inode *inode, struct file *f) { |
| 173 | if(g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) { | 198 | if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) |
| 174 | static struct seq_operations file_seq_ops_ampere = device_info_file_seq_ops; | 199 | device_info_file_seq_ops.show = device_info_file_seq_show_ga100; |
| 175 | file_seq_ops_ampere.show = device_info_file_seq_show_ampere; | 200 | else |
| 176 | return seq_open(f, &file_seq_ops_ampere); | 201 | device_info_file_seq_ops.show = device_info_file_seq_show_gk104; |
| 177 | } else { | 202 | return seq_open(f, &device_info_file_seq_ops); |
| 178 | static struct seq_operations file_seq_ops_previous = device_info_file_seq_ops; | ||
| 179 | file_seq_ops_previous.show = device_info_file_seq_show_previous; | ||
| 180 | return seq_open(f, &file_seq_ops_previous); | ||
| 181 | } | ||
| 182 | } | 203 | } |
| 183 | 204 | ||
| 184 | struct file_operations device_info_file_ops = { | 205 | struct file_operations device_info_file_ops = { |
| @@ -457,7 +457,7 @@ enum ENGINE_TYPES { | |||
| 457 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | 457 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 |
| 458 | 458 | ||
| 459 | ENGINE_MSPDEC = 8, // Picture DECoder | 459 | ENGINE_MSPDEC = 8, // Picture DECoder |
| 460 | ENGINE_MSPPP = 9, // [Video] Post Processing | 460 | ENGINE_MSPPP = 9, // [Video] Picture Post Processor |
| 461 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | 461 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder |
| 462 | ENGINE_MSENC = 11, // [Video] ENCoding | 462 | ENGINE_MSENC = 11, // [Video] ENCoding |
| 463 | ENGINE_VIC = 12, // Video Image Compositor | 463 | ENGINE_VIC = 12, // Video Image Compositor |
| @@ -468,10 +468,12 @@ enum ENGINE_TYPES { | |||
| 468 | 468 | ||
| 469 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | 469 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] |
| 470 | ENGINE_LCE = 19, // Logical Copy Engine | 470 | ENGINE_LCE = 19, // Logical Copy Engine |
| 471 | ENGINE_GSP = 20, // Gpu System Processor | 471 | ENGINE_GSP = 20, // Gpu System Processor (Volta+) |
| 472 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | 472 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Turing+) |
| 473 | ENGINE_OFA = 22, // Optical Flow Accelerator (Turing+) | ||
| 474 | ENGINE_FLA = 23, // [NVLink] Fabric Logical Addressing [?] | ||
| 473 | }; | 475 | }; |
| 474 | #define ENGINE_TYPES_LEN 22 | 476 | #define ENGINE_TYPES_LEN 24 |
| 475 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | 477 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { |
| 476 | "Graphics/Compute", | 478 | "Graphics/Compute", |
| 477 | "COPY0", | 479 | "COPY0", |
| @@ -495,9 +497,11 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
| 495 | "LCE: Logical Copy Engine", | 497 | "LCE: Logical Copy Engine", |
| 496 | "GSP: GPU System Processor", | 498 | "GSP: GPU System Processor", |
| 497 | "NVJPG: NVIDIA JPEG Decoder", | 499 | "NVJPG: NVIDIA JPEG Decoder", |
| 500 | "OFA: Optical Flow Accelerator", | ||
| 501 | "FLA: Fabric Logical Addressing", | ||
| 498 | }; | 502 | }; |
| 499 | 503 | ||
| 500 | /* GPU engine information and control register offsets | 504 | /* GPU engine information and control register offsets (GPU TOPology) |
| 501 | Each engine is described by one or more entries (terminated by an entry with | 505 | Each engine is described by one or more entries (terminated by an entry with |
| 502 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A | 506 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A |
| 503 | typical device, such as the graphics/compute engine and any copy engines, are | 507 | typical device, such as the graphics/compute engine and any copy engines, are |
| @@ -559,34 +563,34 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
| 559 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | 563 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. |
| 560 | */ | 564 | */ |
| 561 | 565 | ||
| 562 | #define NV_PTOP_DEVICE_INFO_AMPERE(i) (0x00022800+(i)*4) | 566 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) |
| 563 | #define NV_PTOP_DEVICE_INFO_PREVIOUS(i) (0x00022700+(i)*4) | 567 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) |
| 564 | #define NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g) (nvdebug_readl(g, 0x0224fc) >> 20) | 568 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) |
| 565 | #define NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS 64 | 569 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 |
| 566 | #define NV_PTOP_DEVICE_INFO_TYPE_COUNT 3 | 570 | #define NV_PTOP_DEVICE_INFO_TYPE_COUNT 3 |
| 567 | typedef union { | 571 | typedef union { |
| 568 | struct { | 572 | struct { |
| 569 | uint32_t fault_id:7; | 573 | uint32_t fault_id:11; |
| 570 | uint32_t padding0:9; | 574 | uint32_t padding0:5; |
| 571 | uint32_t inst_id:4; | 575 | uint32_t inst_id:8; |
| 572 | uint32_t padding1:4; | 576 | enum ENGINE_TYPES engine_type:7; // "type_enum" |
| 573 | enum ENGINE_TYPES engine_type:7; | ||
| 574 | bool has_next_entry:1; | 577 | bool has_next_entry:1; |
| 575 | } __attribute__((packed)); | 578 | } __attribute__((packed)); |
| 576 | struct { | 579 | struct { |
| 577 | uint32_t reset_enum:5; | 580 | uint32_t reset_enum:8; // "reset_id" |
| 578 | uint32_t padding2:7; | 581 | uint32_t pri_base:18; // "device_pri_base" |
| 579 | uint32_t pri_base:12; | 582 | uint32_t padding1:4; |
| 580 | uint32_t padding3:8; | 583 | uint32_t is_engine:1; |
| 584 | uint32_t padding2:1; | ||
| 581 | } __attribute__((packed)); | 585 | } __attribute__((packed)); |
| 582 | struct { | 586 | struct { |
| 583 | uint32_t engine_enum:2; | 587 | uint32_t engine_enum:2; // "rleng_id" |
| 584 | uint32_t padding4:8; | 588 | uint32_t padding3:8; |
| 585 | uint32_t runlist_enum:14; | 589 | uint32_t runlist_enum:16; // "runlist_pri_base" |
| 586 | uint32_t padding5:8; | 590 | uint32_t padding4:6; |
| 587 | } __attribute__((packed)); | 591 | } __attribute__((packed)); |
| 588 | uint32_t raw; | 592 | uint32_t raw; |
| 589 | } ptop_device_info_ampere_t; | 593 | } ptop_device_info_ga100_t; |
| 590 | 594 | ||
| 591 | typedef union { | 595 | typedef union { |
| 592 | // DATA type fields | 596 | // DATA type fields |
| @@ -625,7 +629,7 @@ typedef union { | |||
| 625 | uint32_t padding9:1; | 629 | uint32_t padding9:1; |
| 626 | } __attribute__((packed)); | 630 | } __attribute__((packed)); |
| 627 | uint32_t raw; | 631 | uint32_t raw; |
| 628 | } ptop_device_info_previous_t; | 632 | } ptop_device_info_gk104_t; |
| 629 | 633 | ||
| 630 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | 634 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 |
| 631 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | 635 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index cae5aea..0754f12 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -138,8 +138,8 @@ int probe_and_cache_device(void) { | |||
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | // Create files `/proc/gpu#/runlist#`, world readable | 140 | // Create files `/proc/gpu#/runlist#`, world readable |
| 141 | int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { | 141 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { |
| 142 | ptop_device_info_previous_t info; | 142 | ptop_device_info_gk104_t info; |
| 143 | struct proc_dir_entry *rl_entry; | 143 | struct proc_dir_entry *rl_entry; |
| 144 | int i, rl_id; | 144 | int i, rl_id; |
| 145 | char runlist_name[12]; | 145 | char runlist_name[12]; |
| @@ -147,8 +147,8 @@ int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { | |||
| 147 | // Figure out how many runlists there are by checking the device info | 147 | // Figure out how many runlists there are by checking the device info |
| 148 | // registers. Runlists are always numbered sequentially, so we just have | 148 | // registers. Runlists are always numbered sequentially, so we just have |
| 149 | // to find the highest-valued one and add 1 to get the number of runlists. | 149 | // to find the highest-valued one and add 1 to get the number of runlists. |
| 150 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS; i++) { | 150 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { |
| 151 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_PREVIOUS(i)); | 151 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i)); |
| 152 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) | 152 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) |
| 153 | continue; | 153 | continue; |
| 154 | if (info.runlist_enum > max_rl_id) | 154 | if (info.runlist_enum > max_rl_id) |
| @@ -167,34 +167,6 @@ int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { | |||
| 167 | return 0; | 167 | return 0; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | // Create files `/proc/gpu#/runlist#`, world readable | ||
| 171 | int create_runlist_files_ampere(int device_id, struct proc_dir_entry *dir) { | ||
| 172 | ptop_device_info_ampere_t info; | ||
| 173 | struct proc_dir_entry *rl_entry; | ||
| 174 | int i, rl_id; | ||
| 175 | char runlist_name[12]; | ||
| 176 | int max_rl_id = 0; // Always at least one runlist | ||
| 177 | // Figure out how many runlists there are by checking the device info | ||
| 178 | // registers. Runlists are always numbered sequentially, so we just have | ||
| 179 | // to find the highest-valued one and add 1 to get the number of runlists. | ||
| 180 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(&g_nvdebug_state[device_id]); i++) { | ||
| 181 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_AMPERE(i)); | ||
| 182 | if (info.runlist_enum > max_rl_id) | ||
| 183 | max_rl_id = info.runlist_enum; | ||
| 184 | } | ||
| 185 | // Create files to read each runlist. The read handling code looks at the | ||
| 186 | // PDE_DATA associated with the file to determine what the runlist ID is. | ||
| 187 | for (rl_id = 0; rl_id <= 0 * max_rl_id; rl_id++) { | ||
| 188 | snprintf(runlist_name, 12, "runlist%d", rl_id); | ||
| 189 | rl_entry = proc_create_data( | ||
| 190 | runlist_name, 0444, dir, compat_ops(&runlist_file_ops), | ||
| 191 | (void*)(uintptr_t)rl_id); | ||
| 192 | if (!rl_entry) | ||
| 193 | return -ENOMEM; | ||
| 194 | } | ||
| 195 | return 0; | ||
| 196 | } | ||
| 197 | |||
| 198 | // Create files /proc/gpu# | 170 | // Create files /proc/gpu# |
| 199 | // TODO: Don't run this on unsupported GPUs | 171 | // TODO: Don't run this on unsupported GPUs |
| 200 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | 172 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { |
| @@ -240,7 +212,8 @@ int __init nvdebug_init(void) { | |||
| 240 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) | 212 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
| 241 | goto out_nomem; | 213 | goto out_nomem; |
| 242 | // Create files `/proc/gpu#/runlist#`, world readable | 214 | // Create files `/proc/gpu#/runlist#`, world readable |
| 243 | rl_create_err = (g_nvdebug_state[device_id].chip_id >= NV_CHIP_ID_AMPERE) ? create_runlist_files_ampere(device_id, dir) : create_runlist_files_previous(device_id, dir); | 215 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) |
| 216 | create_runlist_files(device_id, dir); | ||
| 244 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable | 217 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
| 245 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); | 218 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); |
| 246 | // Create file `/proc/gpu#/preempt_tsg`, world writable | 219 | // Create file `/proc/gpu#/preempt_tsg`, world writable |
