aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--nvdebug.h154
-rw-r--r--nvdebug_entry.c28
-rw-r--r--runlist.c25
-rw-r--r--runlist_procfs.c135
4 files changed, 313 insertions, 29 deletions
diff --git a/nvdebug.h b/nvdebug.h
index cd0dc90..9ac71da 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -99,42 +99,103 @@ struct entry_tsg {
99 uint32_t tsgid:12; 99 uint32_t tsgid:12;
100 uint64_t padding4:52; 100 uint64_t padding4:52;
101} __attribute__((packed)); 101} __attribute__((packed));
102#define MAX_TSGID (1 << 12)
102 103
103enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; 104enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
104 105
105/* Preempt 106/* Preempt a TSG or Channel by ID
106 ID/CHID : Id of TSG or channel to preempt 107 ID/CHID : Id of TSG or channel to preempt
108 IS_PENDING : ????
109 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG
110
111 Support: Kepler, Maxwell, Pascal, Volta
107*/ 112*/
108#define NV_PFIFO_PREEMPT 0x00002634 113#define NV_PFIFO_PREEMPT 0x00002634
109struct pfifo_preempt { 114typedef union {
110 uint32_t id:12; 115 struct {
111 uint32_t padding:8; 116 uint32_t id:12;
112 bool is_pending:1; 117 uint32_t padding:8;
113 uint32_t padding2:3; 118 bool is_pending:1;
114 enum PREEMPT_TYPE type:2; 119 uint32_t padding2:3;
115 uint32_t padding3:6; 120 enum PREEMPT_TYPE type:2;
116} __attribute__((packed)); 121 uint32_t padding3:6;
122 } __attribute__((packed));
123 uint32_t raw;
124} pfifo_preempt_t;
125
126/*
127 "Initiate a preempt of the engine by writing the bit associated with its
128 runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT
129 for the preempt to complete."
117 130
131 Useful for preempting multiple runlists at once.
132
133 Appears to trigger an interrupt or some other side-effect on the Jetson
134 Xavier, as the built-in nvgpu driver seems to be disturbed by writing to this.
135
136 To select the runlist dynamically, use the BIT(nr) kernel macro.
137 Example:
138 runlist_preempt_t rl_preempt;
139 rl_preempt.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_PREEMPT);
140 rl_preempt.raw |= BIT(nr);
141 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
142
143 Support: Volta
144*/
118#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 145#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638
119struct runlist_preempt { 146typedef union {
120 bool runlist_0:1; 147 struct {
121 bool runlist_1:1; 148 bool runlist_0:1;
122 bool runlist_2:1; 149 bool runlist_1:1;
123 bool runlist_3:1; 150 bool runlist_2:1;
124 bool runlist_4:1; 151 bool runlist_3:1;
125 bool runlist_5:1; 152 bool runlist_4:1;
126 bool runlist_6:1; 153 bool runlist_5:1;
127 bool runlist_7:1; 154 bool runlist_6:1;
128 bool runlist_8:1; 155 bool runlist_7:1;
129 bool runlist_9:1; 156 bool runlist_8:1;
130 bool runlist_10:1; 157 bool runlist_9:1;
131 bool runlist_11:1; 158 bool runlist_10:1;
132 bool runlist_12:1; 159 bool runlist_11:1;
133 bool runlist_13:1; 160 bool runlist_12:1;
134 uint32_t padding:28; 161 bool runlist_13:1;
135} __attribute__((packed)); 162 uint32_t padding:18;
163 } __attribute__((packed));
164 uint32_t raw;
165} runlist_preempt_t;
166
167/* Additional information on preempting from NVIDIA's driver (commit b1d0d8ece)
168 * "From h/w team
169 * Engine save can be blocked by eng stalling interrupts.
170 * FIFO interrupts shouldn’t block an engine save from
171 * finishing, but could block FIFO from reporting preempt done.
172 * No immediate reason to reset the engine if FIFO interrupt is
173 * pending.
174 * The hub, priv_ring, and ltc interrupts could block context
175 * switch (or memory), but doesn’t necessarily have to.
176 * For Hub interrupts they just report access counters and page
177 * faults. Neither of these necessarily block context switch
178 * or preemption, but they could.
179 * For example a page fault for graphics would prevent graphics
180 * from saving out. An access counter interrupt is a
181 * notification and has no effect.
182 * SW should handle page faults though for preempt to complete.
183 * PRI interrupt (due to a failed PRI transaction) will result
184 * in ctxsw failure reported to HOST.
185 * LTC interrupts are generally ECC related and if so,
186 * certainly don’t block preemption/ctxsw but they could.
187 * Bus interrupts shouldn’t have anything to do with preemption
188 * state as they are part of the Host EXT pipe, though they may
189 * exhibit a symptom that indicates that GPU is in a bad state.
190 * To be completely fair, when an engine is preempting SW
191 * really should just handle other interrupts as they come in.
192 * It’s generally bad to just poll and wait on a preempt
193 * to complete since there are many things in the GPU which may
194 * cause a system to hang/stop responding."
195 */
136 196
137// Note: This is different with Turing 197// Note: This is different with Turing
198// Support: Kepler, Maxwell, Pascal, Volta
138#define NV_PFIFO_RUNLIST_BASE 0x00002270 199#define NV_PFIFO_RUNLIST_BASE 0x00002270
139typedef union { 200typedef union {
140 struct { 201 struct {
@@ -145,6 +206,7 @@ typedef union {
145 uint32_t raw; 206 uint32_t raw;
146} runlist_base_t; 207} runlist_base_t;
147 208
209// Support: Kepler, Maxwell, Pascal, Volta
148#define NV_PFIFO_RUNLIST 0x00002274 210#define NV_PFIFO_RUNLIST 0x00002274
149typedef union { 211typedef union {
150 struct { 212 struct {
@@ -175,8 +237,8 @@ enum CHANNEL_STATUS {
175}; 237};
176 238
177#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) 239#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
178#define MAX_CHID 512 // TODO: Double-check this is right
179// There are a total of 512 possible channels 240// There are a total of 512 possible channels
241#define MAX_CHID 512
180typedef union { 242typedef union {
181 struct { 243 struct {
182// 0:31 244// 0:31
@@ -202,6 +264,43 @@ typedef union {
202 uint64_t raw; 264 uint64_t raw;
203} channel_ctrl_t; 265} channel_ctrl_t;
204 266
267/* Control word for runlist enable/disable.
268
269 RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled)
270
271 To select the runlist dynamically, use the BIT(nr) kernel macro.
272 Disabling example:
273 runlist_disable_t rl_disable;
274 rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
275 rl_disable.raw |= BIT(nr);
276 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
277 Enabling example:
278 runlist_disable_t rl_disable;
279 rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
280 rl_disable.raw &= ~BIT(nr);
281 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
282
283 Support: Fermi, Kepler, Maxwell, Pascal, Volta, Turing
284*/
285#define NV_PFIFO_SCHED_DISABLE 0x00002630
286typedef union {
287 struct {
288 bool runlist_0:1;
289 bool runlist_1:1;
290 bool runlist_2:1;
291 bool runlist_3:1;
292 bool runlist_4:1;
293 bool runlist_5:1;
294 bool runlist_6:1;
295 bool runlist_7:1;
296 bool runlist_8:1;
297 bool runlist_9:1;
298 bool runlist_10:1;
299 uint32_t padding:21;
300 } __attribute__((packed));
301 uint32_t raw;
302} runlist_disable_t;
303
205// TODO(jbakita): Maybe put the above GPU types in a different file. 304// TODO(jbakita): Maybe put the above GPU types in a different file.
206 305
207#define for_chan_in_tsg(chan, tsg) \ 306#define for_chan_in_tsg(chan, tsg) \
@@ -220,6 +319,7 @@ struct runlist_iter {
220// Defined in runlist.c 319// Defined in runlist.c
221struct gk20a* get_live_gk20a(void); 320struct gk20a* get_live_gk20a(void);
222int get_runlist_iter(struct runlist_iter *rl_iter); 321int get_runlist_iter(struct runlist_iter *rl_iter);
322int preempt_tsg(uint32_t tsg_id);
223 323
224static inline struct gk20a *get_gk20a(struct device *dev) { 324static inline struct gk20a *get_gk20a(struct device *dev) {
225 // XXX: Only works because gk20a* is the first member of gk20a_platform 325 // XXX: Only works because gk20a* is the first member of gk20a_platform
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 6346659..14ad6e9 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -20,11 +20,31 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
20MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now 20MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now
21 21
22extern const struct file_operations runlist_file_ops; 22extern const struct file_operations runlist_file_ops;
23extern const struct file_operations preempt_tsg_file_ops;
24extern const struct file_operations disable_channel_file_ops;
25extern const struct file_operations enable_channel_file_ops;
26extern const struct file_operations switch_to_tsg_file_ops;
23 27
24int __init nvdebug_init(void) { 28int __init nvdebug_init(void) {
25 struct proc_dir_entry *entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); 29 struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry,
26 if (!entry) { 30 *enable_channel_entry, *switch_to_tsg_entry;
31 // Create file `/proc/preempt_tsg`, world readable
32 rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops);
33 // Create file `/proc/preempt_tsg`, world writable
34 preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops);
35 // Create file `/proc/disable_channel`, world writable
36 disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops);
37 // Create file `/proc/enable_channel`, world writable
38 enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops);
39 // Create file `/proc/switch_to_tsg`, world writable
40 switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops);
41 // ProcFS entry creation only fails if out of memory
42 if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) {
27 remove_proc_entry("runlist", NULL); 43 remove_proc_entry("runlist", NULL);
44 remove_proc_entry("preempt_tsg", NULL);
45 remove_proc_entry("disable_channel", NULL);
46 remove_proc_entry("enable_channel", NULL);
47 remove_proc_entry("switch_to_tsg", NULL);
28 printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); 48 printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n");
29 return -ENOMEM; 49 return -ENOMEM;
30 } 50 }
@@ -34,6 +54,10 @@ int __init nvdebug_init(void) {
34 54
35static void __exit nvdebug_exit(void) { 55static void __exit nvdebug_exit(void) {
36 remove_proc_entry("runlist", NULL); 56 remove_proc_entry("runlist", NULL);
57 remove_proc_entry("preempt_tsg", NULL);
58 remove_proc_entry("disable_channel", NULL);
59 remove_proc_entry("enable_channel", NULL);
60 remove_proc_entry("switch_to_tsg", NULL);
37 printk(KERN_INFO "[nvdebug] Exiting...\n"); 61 printk(KERN_INFO "[nvdebug] Exiting...\n");
38} 62}
39 63
diff --git a/runlist.c b/runlist.c
index 8dfa1c7..03528af 100644
--- a/runlist.c
+++ b/runlist.c
@@ -109,3 +109,28 @@ int get_runlist_iter(struct runlist_iter *rl_iter) {
109 printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); 109 printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
110 printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ 110 printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */
111} 111}
112
113int preempt_tsg(uint32_t tsg_id) {
114 struct gk20a *g = get_live_gk20a();
115 runlist_info_t rl_info;
116 pfifo_preempt_t pfifo_preempt;
117 runlist_disable_t rl_disable;
118 if (!g)
119 return -EIO;
120 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
121 pfifo_preempt.id = tsg_id;
122 pfifo_preempt.is_pending = 0;
123 pfifo_preempt.type = PREEMPT_TYPE_TSG;
124 // There may be a bug (?) that requires us to disable scheduling before preempting
125 rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
126 rl_disable.raw |= BIT(rl_info.id); // Disable runlist rl_info.id
127 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
128 // Actually trigger the preemption
129 nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw);
130 // Renable scheduling
131 rl_disable.raw &= ~BIT(rl_info.id); // Enable runlist rl_info.id
132 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
133
134 printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id);
135 return 0;
136}
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 183eab6..411f844 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -151,3 +151,138 @@ const struct file_operations runlist_file_ops = {
151 .llseek = seq_lseek, 151 .llseek = seq_lseek,
152 .release = seq_release, 152 .release = seq_release,
153}; 153};
154
155ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
156 size_t count, loff_t *off) {
157 uint32_t target_tsgid;
158 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
159 int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
160 if (err)
161 return err;
162
163 // TSG IDs are a 12-bit field, so make sure the request is in-range
164 if (target_tsgid > MAX_TSGID)
165 return -ERANGE;
166
167 // Execute preemption
168 err = preempt_tsg(target_tsgid);
169 if (err)
170 return err;
171
172 return count;
173}
174
175const struct file_operations preempt_tsg_file_ops = {
176 .write = preempt_tsg_file_write,
177};
178
179ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
180 size_t count, loff_t *off) {
181 uint32_t target_channel;
182 channel_ctrl_t chan;
183 int err;
184 struct gk20a *g = get_live_gk20a();
185 if (!g)
186 return -EIO;
187 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
188 err = kstrtou32_from_user(buffer, count, 0, &target_channel);
189 if (err)
190 return err;
191
192 if (target_channel > MAX_CHID)
193 return -ERANGE;
194
195 // Disable channel
196 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));
197 chan.enable_clear = true;
198 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
199
200 return count;
201}
202
203const struct file_operations disable_channel_file_ops = {
204 .write = disable_channel_file_write,
205};
206
207ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
208 size_t count, loff_t *off) {
209 uint32_t target_channel;
210 channel_ctrl_t chan;
211 int err;
212 struct gk20a *g = get_live_gk20a();
213 if (!g)
214 return -EIO;
215 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
216 err = kstrtou32_from_user(buffer, count, 0, &target_channel);
217 if (err)
218 return err;
219
220 if (target_channel > MAX_CHID)
221 return -ERANGE;
222
223 // Disable channel
224 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));
225 chan.enable_set = true;
226 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
227
228 return count;
229}
230
231const struct file_operations enable_channel_file_ops = {
232 .write = enable_channel_file_write,
233};
234
235ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
236 size_t count, loff_t *off) {
237 uint32_t target_tsgid;
238 struct runlist_chan* chan;
239 channel_ctrl_t chan_ctl;
240 struct runlist_iter rl_iter;
241 int err;
242 loff_t pos = 0;
243 struct gk20a *g = get_live_gk20a();
244 if (!g)
245 return -EIO;
246 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
247 err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
248 if (err)
249 return err;
250
251 if (target_tsgid > MAX_TSGID)
252 return -ERANGE;
253
254 err = get_runlist_iter(&rl_iter);
255 if (err)
256 return err;
257
258 // Iterate through all TSGs
259 while (pos < rl_iter.rl_info.len) {
260 if (rl_iter.curr_tsg->tsgid == target_tsgid) {
261 // Enable channels of target TSG
262 for_chan_in_tsg(chan, rl_iter.curr_tsg) {
263 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
264 chan_ctl.enable_set = true;
265 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
266 }
267 } else {
268 // Disable all other channels
269 for_chan_in_tsg(chan, rl_iter.curr_tsg) {
270 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
271 chan_ctl.enable_clear = true;
272 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
273 }
274 }
275 pos += 1 + rl_iter.curr_tsg->tsg_length;
276 rl_iter.curr_tsg = next_tsg(rl_iter.curr_tsg);
277 }
278 // Switch to next TSG with active channels (should be our TSG)
279 err = preempt_tsg(target_tsgid);
280 if (err)
281 return err;
282
283 return count;
284}
285
286const struct file_operations switch_to_tsg_file_ops = {
287 .write = switch_to_tsg_file_write,
288};