diff options
Diffstat (limited to 'device_info_procfs.c')
-rw-r--r-- | device_info_procfs.c | 134 |
1 files changed, 112 insertions, 22 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index 5fc417f..b139c36 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
@@ -27,38 +27,65 @@ struct file_operations nvdebug_read_reg32_file_ops = { | |||
27 | .llseek = default_llseek, | 27 | .llseek = default_llseek, |
28 | }; | 28 | }; |
29 | 29 | ||
30 | typedef struct { | ||
31 | int idx; // Current index in the device_info table | ||
32 | int length; // Length of device_info table (including unpopulated entries) | ||
33 | int type_of_next_entry; // Only used on Ampere+ GPUs | ||
34 | bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence | ||
35 | } device_info_iter; | ||
36 | |||
30 | //// ==v== PTOP_DEVICE_INFO ==v== //// | 37 | //// ==v== PTOP_DEVICE_INFO ==v== //// |
31 | 38 | ||
32 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. | 39 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. |
33 | // Initializes iterator `idx` state and returns it. Ends sequence on NULL. | 40 | // Initializes iterator `iter` state and returns it. Ends sequence on NULL. |
34 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { | 41 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { |
35 | static int idx; | 42 | static device_info_iter iter; |
36 | // If start of sequence, reset `idx` | 43 | // If freshly starting a sequence, reset the iterator |
37 | if (*pos == 0) | 44 | if (*pos == 0) { |
38 | idx = 0; | 45 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
39 | // Number of possible info entries is fixed, and list is sparse | 46 | iter.idx = 0; |
40 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1) | 47 | iter.type_of_next_entry = 0; |
48 | iter.has_next_entry = 0; | ||
49 | // On Ampere+, the device_info table length can vary | ||
50 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
51 | iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g); | ||
52 | else | ||
53 | iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104; | ||
54 | } | ||
55 | // Number of possible info entries is fixed, and list is sparse, so stop | ||
56 | // iterating only when all entries have been checked, rather than on the first | ||
57 | // empty entry. | ||
58 | if (iter.idx >= iter.length) | ||
41 | return NULL; | 59 | return NULL; |
42 | return &idx; | 60 | return &iter; |
43 | } | 61 | } |
44 | 62 | ||
45 | // Steps to next record. Returns new value of `idx`. | 63 | // Steps to next record. Returns `&iter` (address should not change) |
46 | // Calls show() on non-NULL return | 64 | // Calls show() on non-NULL return |
47 | static void* device_info_file_seq_next(struct seq_file *s, void *idx, | 65 | static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw, |
48 | loff_t *pos) { | 66 | loff_t *pos) { |
67 | device_info_iter *iter = (device_info_iter*)iter_raw; | ||
49 | (*pos)++; // Required by seq interface | 68 | (*pos)++; // Required by seq interface |
50 | // Number of possible info entries is fixed, and list is sparse | 69 | // Number of possible info entries is fixed, and list is sparse, so stop |
51 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1) | 70 | // iterating only when all entries have been checked, rather than on the first |
71 | // empty entry. | ||
72 | if (++iter->idx >= iter->length) | ||
52 | return NULL; | 73 | return NULL; |
53 | return idx; | 74 | // The info_type field is not available in the Ampere device_info data, so |
75 | // it must be inferred. NOP for older devices (cheaper than another branch). | ||
76 | // This has to be here (rather than in the show function) to support the | ||
77 | // idempotence requirements of show() in the seq_file interface. | ||
78 | iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0; | ||
79 | return iter; | ||
54 | } | 80 | } |
55 | 81 | ||
56 | // Print info at index *idx. Returns non-zero on error. | 82 | // Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error. |
57 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { | 83 | // Implementation of this function must be idempotent |
58 | ptop_device_info_t curr_info; | 84 | static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) { |
85 | device_info_iter *iter = (device_info_iter*)iter_raw; | ||
86 | ptop_device_info_gk104_t curr_info; | ||
59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 87 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
60 | 88 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx)); | |
61 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); | ||
62 | // Check for read errors | 89 | // Check for read errors |
63 | if (curr_info.raw == -1) | 90 | if (curr_info.raw == -1) |
64 | return -EIO; | 91 | return -EIO; |
@@ -90,7 +117,7 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) { | |||
90 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | 117 | if (curr_info.engine_type < ENGINE_TYPES_LEN) |
91 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | 118 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); |
92 | else | 119 | else |
93 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); | 120 | seq_printf(s, "Unknown Historical)\n"); |
94 | break; | 121 | break; |
95 | case INFO_TYPE_NOT_VALID: | 122 | case INFO_TYPE_NOT_VALID: |
96 | default: | 123 | default: |
@@ -104,18 +131,81 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) { | |||
104 | return 0; | 131 | return 0; |
105 | } | 132 | } |
106 | 133 | ||
134 | // Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error. | ||
135 | // Implementation of this function must be idempotent | ||
136 | static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) { | ||
137 | device_info_iter *iter = (device_info_iter*)iter_raw; | ||
138 | ptop_device_info_ga100_t curr_info; | ||
139 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
140 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx)); | ||
141 | // Check for read errors | ||
142 | if (curr_info.raw == -1) | ||
143 | return -EIO; | ||
144 | |||
145 | // Update tracking data only used by next(); allows preserving idempotence | ||
146 | iter->has_next_entry = curr_info.has_next_entry; | ||
147 | // Silently skip empty entries | ||
148 | if (curr_info.raw == 0) | ||
149 | return 0; | ||
150 | // In nvdebug, an entry is considered invalid if it does not consist of at | ||
151 | // least two rows. So, if this is the first row of an entry, but another row | ||
152 | // is not indicated, this entry is invalid and should be skipped. | ||
153 | if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) { | ||
154 | printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | // Parse and print the data | ||
159 | // Note: The goal of this interface is to present useful information to | ||
160 | // a human user, NOT to provide a stable format for scripts to parse. | ||
161 | // Because of this, we favor accurately printing the data in each entry, | ||
162 | // rather than providing stable (if imperfectly correct) field names | ||
163 | switch(iter->type_of_next_entry) { | ||
164 | case 0: | ||
165 | seq_printf(s, "| Engine Type: %3d (", curr_info.engine_type); | ||
166 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | ||
167 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | ||
168 | else | ||
169 | seq_printf(s, "Unknown, introduced post-Lovelace)\n"); | ||
170 | seq_printf(s, "| instance %d\n", curr_info.inst_id); | ||
171 | seq_printf(s, "| Fault ID: %4d\n", curr_info.fault_id); | ||
172 | break; | ||
173 | case 1: | ||
174 | seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 8); | ||
175 | seq_printf(s, "| Reset ID: %3d\n", curr_info.reset_id); | ||
176 | seq_printf(s, "| Is Engine: %1d\n", curr_info.is_engine); | ||
177 | |||
178 | break; | ||
179 | case 2: | ||
180 | seq_printf(s, "| Runlist Eng. ID: %1d\n", curr_info.rleng_id); | ||
181 | // Theoretically, we could extract an ID from the runlist RAM | ||
182 | seq_printf(s, "| RL Base: %#.8x\n", curr_info.runlist_pri_base << 10); | ||
183 | break; | ||
184 | default: | ||
185 | printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); | ||
186 | } | ||
187 | |||
188 | // Draw a line between each device entry | ||
189 | if (!curr_info.has_next_entry) | ||
190 | seq_printf(s, "+---------------------+\n"); | ||
191 | return 0; | ||
192 | } | ||
193 | |||
107 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { | 194 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { |
108 | // No cleanup needed | 195 | // No cleanup needed |
109 | } | 196 | } |
110 | 197 | ||
111 | static const struct seq_operations device_info_file_seq_ops = { | 198 | static struct seq_operations device_info_file_seq_ops = { |
112 | .start = device_info_file_seq_start, | 199 | .start = device_info_file_seq_start, |
113 | .next = device_info_file_seq_next, | 200 | .next = device_info_file_seq_next, |
114 | .stop = device_info_file_seq_stop, | 201 | .stop = device_info_file_seq_stop, |
115 | .show = device_info_file_seq_show, | ||
116 | }; | 202 | }; |
117 | 203 | ||
118 | static int device_info_file_open(struct inode *inode, struct file *f) { | 204 | static int device_info_file_open(struct inode *inode, struct file *f) { |
205 | if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) | ||
206 | device_info_file_seq_ops.show = device_info_file_seq_show_ga100; | ||
207 | else | ||
208 | device_info_file_seq_ops.show = device_info_file_seq_show_gk104; | ||
119 | return seq_open(f, &device_info_file_seq_ops); | 209 | return seq_open(f, &device_info_file_seq_ops); |
120 | } | 210 | } |
121 | 211 | ||