diff options
-rw-r--r-- | nvdebug.h | 154 | ||||
-rw-r--r-- | nvdebug_entry.c | 28 | ||||
-rw-r--r-- | runlist.c | 25 | ||||
-rw-r--r-- | runlist_procfs.c | 135 |
4 files changed, 313 insertions, 29 deletions
@@ -99,42 +99,103 @@ struct entry_tsg { | |||
99 | uint32_t tsgid:12; | 99 | uint32_t tsgid:12; |
100 | uint64_t padding4:52; | 100 | uint64_t padding4:52; |
101 | } __attribute__((packed)); | 101 | } __attribute__((packed)); |
102 | #define MAX_TSGID (1 << 12) | ||
102 | 103 | ||
103 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | 104 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; |
104 | 105 | ||
105 | /* Preempt | 106 | /* Preempt a TSG or Channel by ID |
106 | ID/CHID : Id of TSG or channel to preempt | 107 | ID/CHID : Id of TSG or channel to preempt |
108 | IS_PENDING : ???? | ||
109 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | ||
110 | |||
111 | Support: Kepler, Maxwell, Pascal, Volta | ||
107 | */ | 112 | */ |
108 | #define NV_PFIFO_PREEMPT 0x00002634 | 113 | #define NV_PFIFO_PREEMPT 0x00002634 |
109 | struct pfifo_preempt { | 114 | typedef union { |
110 | uint32_t id:12; | 115 | struct { |
111 | uint32_t padding:8; | 116 | uint32_t id:12; |
112 | bool is_pending:1; | 117 | uint32_t padding:8; |
113 | uint32_t padding2:3; | 118 | bool is_pending:1; |
114 | enum PREEMPT_TYPE type:2; | 119 | uint32_t padding2:3; |
115 | uint32_t padding3:6; | 120 | enum PREEMPT_TYPE type:2; |
116 | } __attribute__((packed)); | 121 | uint32_t padding3:6; |
122 | } __attribute__((packed)); | ||
123 | uint32_t raw; | ||
124 | } pfifo_preempt_t; | ||
125 | |||
126 | /* | ||
127 | "Initiate a preempt of the engine by writing the bit associated with its | ||
128 | runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT | ||
129 | for the preempt to complete." | ||
117 | 130 | ||
131 | Useful for preempting multiple runlists at once. | ||
132 | |||
133 | Appears to trigger an interrupt or some other side-effect on the Jetson | ||
134 | Xavier, as the built-in nvgpu driver seems to be disturbed by writing to this. | ||
135 | |||
136 | To select the runlist dynamically, use the BIT(nr) kernel macro. | ||
137 | Example: | ||
138 | runlist_preempt_t rl_preempt; | ||
139 | rl_preempt.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_PREEMPT); | ||
140 | rl_preempt.raw |= BIT(nr); | ||
141 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | ||
142 | |||
143 | Support: Volta | ||
144 | */ | ||
118 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 | 145 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 |
119 | struct runlist_preempt { | 146 | typedef union { |
120 | bool runlist_0:1; | 147 | struct { |
121 | bool runlist_1:1; | 148 | bool runlist_0:1; |
122 | bool runlist_2:1; | 149 | bool runlist_1:1; |
123 | bool runlist_3:1; | 150 | bool runlist_2:1; |
124 | bool runlist_4:1; | 151 | bool runlist_3:1; |
125 | bool runlist_5:1; | 152 | bool runlist_4:1; |
126 | bool runlist_6:1; | 153 | bool runlist_5:1; |
127 | bool runlist_7:1; | 154 | bool runlist_6:1; |
128 | bool runlist_8:1; | 155 | bool runlist_7:1; |
129 | bool runlist_9:1; | 156 | bool runlist_8:1; |
130 | bool runlist_10:1; | 157 | bool runlist_9:1; |
131 | bool runlist_11:1; | 158 | bool runlist_10:1; |
132 | bool runlist_12:1; | 159 | bool runlist_11:1; |
133 | bool runlist_13:1; | 160 | bool runlist_12:1; |
134 | uint32_t padding:28; | 161 | bool runlist_13:1; |
135 | } __attribute__((packed)); | 162 | uint32_t padding:18; |
163 | } __attribute__((packed)); | ||
164 | uint32_t raw; | ||
165 | } runlist_preempt_t; | ||
166 | |||
167 | /* Additional information on preempting from NVIDIA's driver (commit b1d0d8ece) | ||
168 | * "From h/w team | ||
169 | * Engine save can be blocked by eng stalling interrupts. | ||
170 | * FIFO interrupts shouldn’t block an engine save from | ||
171 | * finishing, but could block FIFO from reporting preempt done. | ||
172 | * No immediate reason to reset the engine if FIFO interrupt is | ||
173 | * pending. | ||
174 | * The hub, priv_ring, and ltc interrupts could block context | ||
175 | * switch (or memory), but doesn’t necessarily have to. | ||
176 | * For Hub interrupts they just report access counters and page | ||
177 | * faults. Neither of these necessarily block context switch | ||
178 | * or preemption, but they could. | ||
179 | * For example a page fault for graphics would prevent graphics | ||
180 | * from saving out. An access counter interrupt is a | ||
181 | * notification and has no effect. | ||
182 | * SW should handle page faults though for preempt to complete. | ||
183 | * PRI interrupt (due to a failed PRI transaction) will result | ||
184 | * in ctxsw failure reported to HOST. | ||
185 | * LTC interrupts are generally ECC related and if so, | ||
186 | * certainly don’t block preemption/ctxsw but they could. | ||
187 | * Bus interrupts shouldn’t have anything to do with preemption | ||
188 | * state as they are part of the Host EXT pipe, though they may | ||
189 | * exhibit a symptom that indicates that GPU is in a bad state. | ||
190 | * To be completely fair, when an engine is preempting SW | ||
191 | * really should just handle other interrupts as they come in. | ||
192 | * It’s generally bad to just poll and wait on a preempt | ||
193 | * to complete since there are many things in the GPU which may | ||
194 | * cause a system to hang/stop responding." | ||
195 | */ | ||
136 | 196 | ||
137 | // Note: This is different with Turing | 197 | // Note: This is different with Turing |
198 | // Support: Kepler, Maxwell, Pascal, Volta | ||
138 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | 199 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 |
139 | typedef union { | 200 | typedef union { |
140 | struct { | 201 | struct { |
@@ -145,6 +206,7 @@ typedef union { | |||
145 | uint32_t raw; | 206 | uint32_t raw; |
146 | } runlist_base_t; | 207 | } runlist_base_t; |
147 | 208 | ||
209 | // Support: Kepler, Maxwell, Pascal, Volta | ||
148 | #define NV_PFIFO_RUNLIST 0x00002274 | 210 | #define NV_PFIFO_RUNLIST 0x00002274 |
149 | typedef union { | 211 | typedef union { |
150 | struct { | 212 | struct { |
@@ -175,8 +237,8 @@ enum CHANNEL_STATUS { | |||
175 | }; | 237 | }; |
176 | 238 | ||
177 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 239 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
178 | #define MAX_CHID 512 // TODO: Double-check this is right | ||
179 | // There are a total of 512 possible channels | 240 | // There are a total of 512 possible channels |
241 | #define MAX_CHID 512 | ||
180 | typedef union { | 242 | typedef union { |
181 | struct { | 243 | struct { |
182 | // 0:31 | 244 | // 0:31 |
@@ -202,6 +264,43 @@ typedef union { | |||
202 | uint64_t raw; | 264 | uint64_t raw; |
203 | } channel_ctrl_t; | 265 | } channel_ctrl_t; |
204 | 266 | ||
267 | /* Control word for runlist enable/disable. | ||
268 | |||
269 | RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled) | ||
270 | |||
271 | To select the runlist dynamically, use the BIT(nr) kernel macro. | ||
272 | Disabling example: | ||
273 | runlist_disable_t rl_disable; | ||
274 | rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); | ||
275 | rl_disable.raw |= BIT(nr); | ||
276 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
277 | Enabling example: | ||
278 | runlist_disable_t rl_disable; | ||
279 | rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); | ||
280 | rl_disable.raw &= ~BIT(nr); | ||
281 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
282 | |||
283 | Support: Fermi, Kepler, Maxwell, Pascal, Volta, Turing | ||
284 | */ | ||
285 | #define NV_PFIFO_SCHED_DISABLE 0x00002630 | ||
286 | typedef union { | ||
287 | struct { | ||
288 | bool runlist_0:1; | ||
289 | bool runlist_1:1; | ||
290 | bool runlist_2:1; | ||
291 | bool runlist_3:1; | ||
292 | bool runlist_4:1; | ||
293 | bool runlist_5:1; | ||
294 | bool runlist_6:1; | ||
295 | bool runlist_7:1; | ||
296 | bool runlist_8:1; | ||
297 | bool runlist_9:1; | ||
298 | bool runlist_10:1; | ||
299 | uint32_t padding:21; | ||
300 | } __attribute__((packed)); | ||
301 | uint32_t raw; | ||
302 | } runlist_disable_t; | ||
303 | |||
205 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 304 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
206 | 305 | ||
207 | #define for_chan_in_tsg(chan, tsg) \ | 306 | #define for_chan_in_tsg(chan, tsg) \ |
@@ -220,6 +319,7 @@ struct runlist_iter { | |||
220 | // Defined in runlist.c | 319 | // Defined in runlist.c |
221 | struct gk20a* get_live_gk20a(void); | 320 | struct gk20a* get_live_gk20a(void); |
222 | int get_runlist_iter(struct runlist_iter *rl_iter); | 321 | int get_runlist_iter(struct runlist_iter *rl_iter); |
322 | int preempt_tsg(uint32_t tsg_id); | ||
223 | 323 | ||
224 | static inline struct gk20a *get_gk20a(struct device *dev) { | 324 | static inline struct gk20a *get_gk20a(struct device *dev) { |
225 | // XXX: Only works because gk20a* is the first member of gk20a_platform | 325 | // XXX: Only works because gk20a* is the first member of gk20a_platform |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 6346659..14ad6e9 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -20,11 +20,31 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | |||
20 | MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now | 20 | MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now |
21 | 21 | ||
22 | extern const struct file_operations runlist_file_ops; | 22 | extern const struct file_operations runlist_file_ops; |
23 | extern const struct file_operations preempt_tsg_file_ops; | ||
24 | extern const struct file_operations disable_channel_file_ops; | ||
25 | extern const struct file_operations enable_channel_file_ops; | ||
26 | extern const struct file_operations switch_to_tsg_file_ops; | ||
23 | 27 | ||
24 | int __init nvdebug_init(void) { | 28 | int __init nvdebug_init(void) { |
25 | struct proc_dir_entry *entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); | 29 | struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry, |
26 | if (!entry) { | 30 | *enable_channel_entry, *switch_to_tsg_entry; |
31 | // Create file `/proc/preempt_tsg`, world readable | ||
32 | rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); | ||
33 | // Create file `/proc/preempt_tsg`, world writable | ||
34 | preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops); | ||
35 | // Create file `/proc/disable_channel`, world writable | ||
36 | disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops); | ||
37 | // Create file `/proc/enable_channel`, world writable | ||
38 | enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops); | ||
39 | // Create file `/proc/switch_to_tsg`, world writable | ||
40 | switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops); | ||
41 | // ProcFS entry creation only fails if out of memory | ||
42 | if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) { | ||
27 | remove_proc_entry("runlist", NULL); | 43 | remove_proc_entry("runlist", NULL); |
44 | remove_proc_entry("preempt_tsg", NULL); | ||
45 | remove_proc_entry("disable_channel", NULL); | ||
46 | remove_proc_entry("enable_channel", NULL); | ||
47 | remove_proc_entry("switch_to_tsg", NULL); | ||
28 | printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); | 48 | printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); |
29 | return -ENOMEM; | 49 | return -ENOMEM; |
30 | } | 50 | } |
@@ -34,6 +54,10 @@ int __init nvdebug_init(void) { | |||
34 | 54 | ||
35 | static void __exit nvdebug_exit(void) { | 55 | static void __exit nvdebug_exit(void) { |
36 | remove_proc_entry("runlist", NULL); | 56 | remove_proc_entry("runlist", NULL); |
57 | remove_proc_entry("preempt_tsg", NULL); | ||
58 | remove_proc_entry("disable_channel", NULL); | ||
59 | remove_proc_entry("enable_channel", NULL); | ||
60 | remove_proc_entry("switch_to_tsg", NULL); | ||
37 | printk(KERN_INFO "[nvdebug] Exiting...\n"); | 61 | printk(KERN_INFO "[nvdebug] Exiting...\n"); |
38 | } | 62 | } |
39 | 63 | ||
@@ -109,3 +109,28 @@ int get_runlist_iter(struct runlist_iter *rl_iter) { | |||
109 | printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | 109 | printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); |
110 | printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ | 110 | printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ |
111 | } | 111 | } |
112 | |||
113 | int preempt_tsg(uint32_t tsg_id) { | ||
114 | struct gk20a *g = get_live_gk20a(); | ||
115 | runlist_info_t rl_info; | ||
116 | pfifo_preempt_t pfifo_preempt; | ||
117 | runlist_disable_t rl_disable; | ||
118 | if (!g) | ||
119 | return -EIO; | ||
120 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); | ||
121 | pfifo_preempt.id = tsg_id; | ||
122 | pfifo_preempt.is_pending = 0; | ||
123 | pfifo_preempt.type = PREEMPT_TYPE_TSG; | ||
124 | // There may be a bug (?) that requires us to disable scheduling before preempting | ||
125 | rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); | ||
126 | rl_disable.raw |= BIT(rl_info.id); // Disable runlist rl_info.id | ||
127 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
128 | // Actually trigger the preemption | ||
129 | nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw); | ||
130 | // Renable scheduling | ||
131 | rl_disable.raw &= ~BIT(rl_info.id); // Enable runlist rl_info.id | ||
132 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
133 | |||
134 | printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id); | ||
135 | return 0; | ||
136 | } | ||
diff --git a/runlist_procfs.c b/runlist_procfs.c index 183eab6..411f844 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -151,3 +151,138 @@ const struct file_operations runlist_file_ops = { | |||
151 | .llseek = seq_lseek, | 151 | .llseek = seq_lseek, |
152 | .release = seq_release, | 152 | .release = seq_release, |
153 | }; | 153 | }; |
154 | |||
155 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | ||
156 | size_t count, loff_t *off) { | ||
157 | uint32_t target_tsgid; | ||
158 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
159 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | ||
160 | if (err) | ||
161 | return err; | ||
162 | |||
163 | // TSG IDs are a 12-bit field, so make sure the request is in-range | ||
164 | if (target_tsgid > MAX_TSGID) | ||
165 | return -ERANGE; | ||
166 | |||
167 | // Execute preemption | ||
168 | err = preempt_tsg(target_tsgid); | ||
169 | if (err) | ||
170 | return err; | ||
171 | |||
172 | return count; | ||
173 | } | ||
174 | |||
175 | const struct file_operations preempt_tsg_file_ops = { | ||
176 | .write = preempt_tsg_file_write, | ||
177 | }; | ||
178 | |||
179 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | ||
180 | size_t count, loff_t *off) { | ||
181 | uint32_t target_channel; | ||
182 | channel_ctrl_t chan; | ||
183 | int err; | ||
184 | struct gk20a *g = get_live_gk20a(); | ||
185 | if (!g) | ||
186 | return -EIO; | ||
187 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
188 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | ||
189 | if (err) | ||
190 | return err; | ||
191 | |||
192 | if (target_channel > MAX_CHID) | ||
193 | return -ERANGE; | ||
194 | |||
195 | // Disable channel | ||
196 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | ||
197 | chan.enable_clear = true; | ||
198 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
199 | |||
200 | return count; | ||
201 | } | ||
202 | |||
203 | const struct file_operations disable_channel_file_ops = { | ||
204 | .write = disable_channel_file_write, | ||
205 | }; | ||
206 | |||
207 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, | ||
208 | size_t count, loff_t *off) { | ||
209 | uint32_t target_channel; | ||
210 | channel_ctrl_t chan; | ||
211 | int err; | ||
212 | struct gk20a *g = get_live_gk20a(); | ||
213 | if (!g) | ||
214 | return -EIO; | ||
215 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
216 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | ||
217 | if (err) | ||
218 | return err; | ||
219 | |||
220 | if (target_channel > MAX_CHID) | ||
221 | return -ERANGE; | ||
222 | |||
223 | // Disable channel | ||
224 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | ||
225 | chan.enable_set = true; | ||
226 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
227 | |||
228 | return count; | ||
229 | } | ||
230 | |||
231 | const struct file_operations enable_channel_file_ops = { | ||
232 | .write = enable_channel_file_write, | ||
233 | }; | ||
234 | |||
235 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | ||
236 | size_t count, loff_t *off) { | ||
237 | uint32_t target_tsgid; | ||
238 | struct runlist_chan* chan; | ||
239 | channel_ctrl_t chan_ctl; | ||
240 | struct runlist_iter rl_iter; | ||
241 | int err; | ||
242 | loff_t pos = 0; | ||
243 | struct gk20a *g = get_live_gk20a(); | ||
244 | if (!g) | ||
245 | return -EIO; | ||
246 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
247 | err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | ||
248 | if (err) | ||
249 | return err; | ||
250 | |||
251 | if (target_tsgid > MAX_TSGID) | ||
252 | return -ERANGE; | ||
253 | |||
254 | err = get_runlist_iter(&rl_iter); | ||
255 | if (err) | ||
256 | return err; | ||
257 | |||
258 | // Iterate through all TSGs | ||
259 | while (pos < rl_iter.rl_info.len) { | ||
260 | if (rl_iter.curr_tsg->tsgid == target_tsgid) { | ||
261 | // Enable channels of target TSG | ||
262 | for_chan_in_tsg(chan, rl_iter.curr_tsg) { | ||
263 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | ||
264 | chan_ctl.enable_set = true; | ||
265 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | ||
266 | } | ||
267 | } else { | ||
268 | // Disable all other channels | ||
269 | for_chan_in_tsg(chan, rl_iter.curr_tsg) { | ||
270 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | ||
271 | chan_ctl.enable_clear = true; | ||
272 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | ||
273 | } | ||
274 | } | ||
275 | pos += 1 + rl_iter.curr_tsg->tsg_length; | ||
276 | rl_iter.curr_tsg = next_tsg(rl_iter.curr_tsg); | ||
277 | } | ||
278 | // Switch to next TSG with active channels (should be our TSG) | ||
279 | err = preempt_tsg(target_tsgid); | ||
280 | if (err) | ||
281 | return err; | ||
282 | |||
283 | return count; | ||
284 | } | ||
285 | |||
286 | const struct file_operations switch_to_tsg_file_ops = { | ||
287 | .write = switch_to_tsg_file_write, | ||
288 | }; | ||