aboutsummaryrefslogtreecommitdiffstats
path: root/device_info_procfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'device_info_procfs.c')
-rw-r--r--device_info_procfs.c134
1 files changed, 112 insertions, 22 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c
index 5fc417f..b139c36 100644
--- a/device_info_procfs.c
+++ b/device_info_procfs.c
@@ -27,38 +27,65 @@ struct file_operations nvdebug_read_reg32_file_ops = {
27 .llseek = default_llseek, 27 .llseek = default_llseek,
28}; 28};
29 29
30typedef struct {
31 int idx; // Current index in the device_info table
32 int length; // Length of device_info table (including unpopulated entries)
33 int type_of_next_entry; // Only used on Ampere+ GPUs
34 bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence
35} device_info_iter;
36
30//// ==v== PTOP_DEVICE_INFO ==v== //// 37//// ==v== PTOP_DEVICE_INFO ==v== ////
31 38
32// Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. 39// Called to start or resume a sequence. Prior to 4.19, *pos is unreliable.
33// Initializes iterator `idx` state and returns it. Ends sequence on NULL. 40// Initializes iterator `iter` state and returns it. Ends sequence on NULL.
34static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { 41static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) {
35 static int idx; 42 static device_info_iter iter;
36 // If start of sequence, reset `idx` 43 // If freshly starting a sequence, reset the iterator
37 if (*pos == 0) 44 if (*pos == 0) {
38 idx = 0; 45 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
39 // Number of possible info entries is fixed, and list is sparse 46 iter.idx = 0;
40 if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1) 47 iter.type_of_next_entry = 0;
48 iter.has_next_entry = 0;
49 // On Ampere+, the device_info table length can vary
50 if (g->chip_id >= NV_CHIP_ID_AMPERE)
51 iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
52 else
53 iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104;
54 }
55 // Number of possible info entries is fixed, and list is sparse, so stop
56 // iterating only when all entries have been checked, rather than on the first
57 // empty entry.
58 if (iter.idx >= iter.length)
41 return NULL; 59 return NULL;
42 return &idx; 60 return &iter;
43} 61}
44 62
45// Steps to next record. Returns new value of `idx`. 63// Steps to next record. Returns `&iter` (address should not change)
46// Calls show() on non-NULL return 64// Calls show() on non-NULL return
47static void* device_info_file_seq_next(struct seq_file *s, void *idx, 65static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw,
48 loff_t *pos) { 66 loff_t *pos) {
67 device_info_iter *iter = (device_info_iter*)iter_raw;
49 (*pos)++; // Required by seq interface 68 (*pos)++; // Required by seq interface
50 // Number of possible info entries is fixed, and list is sparse 69 // Number of possible info entries is fixed, and list is sparse, so stop
51 if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1) 70 // iterating only when all entries have been checked, rather than on the first
71 // empty entry.
72 if (++iter->idx >= iter->length)
52 return NULL; 73 return NULL;
53 return idx; 74 // The info_type field is not available in the Ampere device_info data, so
75 // it must be inferred. NOP for older devices (cheaper than another branch).
76 // This has to be here (rather than in the show function) to support the
77 // idempotence requirements of show() in the seq_file interface.
78 iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0;
79 return iter;
54} 80}
55 81
56// Print info at index *idx. Returns non-zero on error. 82// Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error.
57static int device_info_file_seq_show(struct seq_file *s, void *idx) { 83// Implementation of this function must be idempotent
58 ptop_device_info_t curr_info; 84static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) {
85 device_info_iter *iter = (device_info_iter*)iter_raw;
86 ptop_device_info_gk104_t curr_info;
59 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; 87 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
60 88 curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx));
61 curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx));
62 // Check for read errors 89 // Check for read errors
63 if (curr_info.raw == -1) 90 if (curr_info.raw == -1)
64 return -EIO; 91 return -EIO;
@@ -90,7 +117,7 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) {
90 if (curr_info.engine_type < ENGINE_TYPES_LEN) 117 if (curr_info.engine_type < ENGINE_TYPES_LEN)
91 seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); 118 seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
92 else 119 else
93 seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); 120 seq_printf(s, "Unknown Historical)\n");
94 break; 121 break;
95 case INFO_TYPE_NOT_VALID: 122 case INFO_TYPE_NOT_VALID:
96 default: 123 default:
@@ -104,18 +131,81 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) {
104 return 0; 131 return 0;
105} 132}
106 133
134// Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error.
135// Implementation of this function must be idempotent
136static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) {
137 device_info_iter *iter = (device_info_iter*)iter_raw;
138 ptop_device_info_ga100_t curr_info;
139 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
140 curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx));
141 // Check for read errors
142 if (curr_info.raw == -1)
143 return -EIO;
144
145 // Update tracking data only used by next(); allows preserving idempotence
146 iter->has_next_entry = curr_info.has_next_entry;
147 // Silently skip empty entries
148 if (curr_info.raw == 0)
149 return 0;
150 // In nvdebug, an entry is considered invalid if it does not consist of at
151 // least two rows. So, if this is the first row of an entry, but another row
152 // is not indicated, this entry is invalid and should be skipped.
153 if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) {
154 printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
155 return 0;
156 }
157
158 // Parse and print the data
159 // Note: The goal of this interface is to present useful information to
160 // a human user, NOT to provide a stable format for scripts to parse.
161 // Because of this, we favor accurately printing the data in each entry,
162 // rather than providing stable (if imperfectly correct) field names
163 switch(iter->type_of_next_entry) {
164 case 0:
165 seq_printf(s, "| Engine Type: %3d (", curr_info.engine_type);
166 if (curr_info.engine_type < ENGINE_TYPES_LEN)
167 seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
168 else
169 seq_printf(s, "Unknown, introduced post-Lovelace)\n");
170 seq_printf(s, "| instance %d\n", curr_info.inst_id);
171 seq_printf(s, "| Fault ID: %4d\n", curr_info.fault_id);
172 break;
173 case 1:
174 seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 8);
175 seq_printf(s, "| Reset ID: %3d\n", curr_info.reset_id);
176 seq_printf(s, "| Is Engine: %1d\n", curr_info.is_engine);
177
178 break;
179 case 2:
180 seq_printf(s, "| Runlist Eng. ID: %1d\n", curr_info.rleng_id);
181 // Theoretically, we could extract an ID from the runlist RAM
182 seq_printf(s, "| RL Base: %#.8x\n", curr_info.runlist_pri_base << 10);
183 break;
184 default:
185 printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
186 }
187
188 // Draw a line between each device entry
189 if (!curr_info.has_next_entry)
190 seq_printf(s, "+---------------------+\n");
191 return 0;
192}
193
107static void device_info_file_seq_stop(struct seq_file *s, void *idx) { 194static void device_info_file_seq_stop(struct seq_file *s, void *idx) {
108 // No cleanup needed 195 // No cleanup needed
109} 196}
110 197
111static const struct seq_operations device_info_file_seq_ops = { 198static struct seq_operations device_info_file_seq_ops = {
112 .start = device_info_file_seq_start, 199 .start = device_info_file_seq_start,
113 .next = device_info_file_seq_next, 200 .next = device_info_file_seq_next,
114 .stop = device_info_file_seq_stop, 201 .stop = device_info_file_seq_stop,
115 .show = device_info_file_seq_show,
116}; 202};
117 203
118static int device_info_file_open(struct inode *inode, struct file *f) { 204static int device_info_file_open(struct inode *inode, struct file *f) {
205 if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE)
206 device_info_file_seq_ops.show = device_info_file_seq_show_ga100;
207 else
208 device_info_file_seq_ops.show = device_info_file_seq_show_gk104;
119 return seq_open(f, &device_info_file_seq_ops); 209 return seq_open(f, &device_info_file_seq_ops);
120} 210}
121 211