aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c')
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c266
1 files changed, 224 insertions, 42 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 38c0910722c0..3a24788c3185 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -27,11 +27,71 @@
27#include <core/client.h> 27#include <core/client.h>
28#include <core/gpuobj.h> 28#include <core/gpuobj.h>
29#include <subdev/bar.h> 29#include <subdev/bar.h>
30#include <subdev/timer.h>
30#include <subdev/top.h> 31#include <subdev/top.h>
31#include <engine/sw.h> 32#include <engine/sw.h>
32 33
33#include <nvif/class.h> 34#include <nvif/class.h>
34 35
36struct gk104_fifo_engine_status {
37 bool busy;
38 bool faulted;
39 bool chsw;
40 bool save;
41 bool load;
42 struct {
43 bool tsg;
44 u32 id;
45 } prev, next, *chan;
46};
47
48static void
49gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
50 struct gk104_fifo_engine_status *status)
51{
52 struct nvkm_engine *engine = fifo->engine[engn].engine;
53 struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
54 struct nvkm_device *device = subdev->device;
55 u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
56
57 status->busy = !!(stat & 0x80000000);
58 status->faulted = !!(stat & 0x40000000);
59 status->next.tsg = !!(stat & 0x10000000);
60 status->next.id = (stat & 0x0fff0000) >> 16;
61 status->chsw = !!(stat & 0x00008000);
62 status->save = !!(stat & 0x00004000);
63 status->load = !!(stat & 0x00002000);
64 status->prev.tsg = !!(stat & 0x00001000);
65 status->prev.id = (stat & 0x00000fff);
66 status->chan = NULL;
67
68 if (status->busy && status->chsw) {
69 if (status->load && status->save) {
70 if (engine && nvkm_engine_chsw_load(engine))
71 status->chan = &status->next;
72 else
73 status->chan = &status->prev;
74 } else
75 if (status->load) {
76 status->chan = &status->next;
77 } else {
78 status->chan = &status->prev;
79 }
80 } else
81 if (status->load) {
82 status->chan = &status->prev;
83 }
84
85 nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
86 "save %d load %d %sid %d%s-> %sid %d%s\n",
87 engn, status->busy, status->faulted,
88 status->chsw, status->save, status->load,
89 status->prev.tsg ? "tsg" : "ch", status->prev.id,
90 status->chan == &status->prev ? "*" : " ",
91 status->next.tsg ? "tsg" : "ch", status->next.id,
92 status->chan == &status->next ? "*" : " ");
93}
94
35static int 95static int
36gk104_fifo_class_get(struct nvkm_fifo *base, int index, 96gk104_fifo_class_get(struct nvkm_fifo *base, int index,
37 const struct nvkm_fifo_chan_oclass **psclass) 97 const struct nvkm_fifo_chan_oclass **psclass)
@@ -83,10 +143,13 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
83 } 143 }
84 nvkm_done(mem); 144 nvkm_done(mem);
85 145
86 if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM) 146 switch (nvkm_memory_target(mem)) {
87 target = 0; 147 case NVKM_MEM_TARGET_VRAM: target = 0; break;
88 else 148 case NVKM_MEM_TARGET_NCOH: target = 3; break;
89 target = 3; 149 default:
150 WARN_ON(1);
151 return;
152 }
90 153
91 nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) | 154 nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
92 (target << 28)); 155 (target << 28));
@@ -149,31 +212,137 @@ gk104_fifo_recover_work(struct work_struct *w)
149 nvkm_mask(device, 0x002630, runm, 0x00000000); 212 nvkm_mask(device, 0x002630, runm, 0x00000000);
150} 213}
151 214
215static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
216
152static void 217static void
153gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine, 218gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
154 struct gk104_fifo_chan *chan)
155{ 219{
156 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 220 struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
157 struct nvkm_device *device = subdev->device; 221 struct nvkm_device *device = subdev->device;
158 u32 chid = chan->base.chid; 222 const u32 runm = BIT(runl);
159 int engn;
160 223
161 nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
162 nvkm_subdev_name[engine->subdev.index], chid);
163 assert_spin_locked(&fifo->base.lock); 224 assert_spin_locked(&fifo->base.lock);
225 if (fifo->recover.runm & runm)
226 return;
227 fifo->recover.runm |= runm;
164 228
165 nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800); 229 /* Block runlist to prevent channel assignment(s) from changing. */
166 list_del_init(&chan->head); 230 nvkm_mask(device, 0x002630, runm, runm);
167 chan->killed = true;
168 231
169 for (engn = 0; engn < fifo->engine_nr; engn++) { 232 /* Schedule recovery. */
170 if (fifo->engine[engn].engine == engine) { 233 nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
171 fifo->recover.engm |= BIT(engn); 234 schedule_work(&fifo->recover.work);
235}
236
237static void
238gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
239{
240 struct gk104_fifo *fifo = gk104_fifo(base);
241 struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
242 struct nvkm_device *device = subdev->device;
243 const u32 stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
244 const u32 runl = (stat & 0x000f0000) >> 16;
245 const bool used = (stat & 0x00000001);
246 unsigned long engn, engm = fifo->runlist[runl].engm;
247 struct gk104_fifo_chan *chan;
248
249 assert_spin_locked(&fifo->base.lock);
250 if (!used)
251 return;
252
253 /* Lookup SW state for channel, and mark it as dead. */
254 list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
255 if (chan->base.chid == chid) {
256 list_del_init(&chan->head);
257 chan->killed = true;
258 nvkm_fifo_kevent(&fifo->base, chid);
172 break; 259 break;
173 } 260 }
174 } 261 }
175 262
176 fifo->recover.runm |= BIT(chan->runl); 263 /* Disable channel. */
264 nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
265 nvkm_warn(subdev, "channel %d: killed\n", chid);
266
267 /* Block channel assignments from changing during recovery. */
268 gk104_fifo_recover_runl(fifo, runl);
269
270 /* Schedule recovery for any engines the channel is on. */
271 for_each_set_bit(engn, &engm, fifo->engine_nr) {
272 struct gk104_fifo_engine_status status;
273 gk104_fifo_engine_status(fifo, engn, &status);
274 if (!status.chan || status.chan->id != chid)
275 continue;
276 gk104_fifo_recover_engn(fifo, engn);
277 }
278}
279
280static void
281gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
282{
283 struct nvkm_engine *engine = fifo->engine[engn].engine;
284 struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
285 struct nvkm_device *device = subdev->device;
286 const u32 runl = fifo->engine[engn].runl;
287 const u32 engm = BIT(engn);
288 struct gk104_fifo_engine_status status;
289 int mmui = -1;
290
291 assert_spin_locked(&fifo->base.lock);
292 if (fifo->recover.engm & engm)
293 return;
294 fifo->recover.engm |= engm;
295
296 /* Block channel assignments from changing during recovery. */
297 gk104_fifo_recover_runl(fifo, runl);
298
299 /* Determine which channel (if any) is currently on the engine. */
300 gk104_fifo_engine_status(fifo, engn, &status);
301 if (status.chan) {
302 /* The channel is not longer viable, kill it. */
303 gk104_fifo_recover_chan(&fifo->base, status.chan->id);
304 }
305
306 /* Determine MMU fault ID for the engine, if we're not being
307 * called from the fault handler already.
308 */
309 if (!status.faulted && engine) {
310 mmui = nvkm_top_fault_id(device, engine->subdev.index);
311 if (mmui < 0) {
312 const struct nvkm_enum *en = fifo->func->fault.engine;
313 for (; en && en->name; en++) {
314 if (en->data2 == engine->subdev.index) {
315 mmui = en->value;
316 break;
317 }
318 }
319 }
320 WARN_ON(mmui < 0);
321 }
322
323 /* Trigger a MMU fault for the engine.
324 *
325 * No good idea why this is needed, but nvgpu does something similar,
326 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
327 */
328 if (mmui >= 0) {
329 nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
330
331 /* Wait for fault to trigger. */
332 nvkm_msec(device, 2000,
333 gk104_fifo_engine_status(fifo, engn, &status);
334 if (status.faulted)
335 break;
336 );
337
338 /* Release MMU fault trigger, and ACK the fault. */
339 nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
340 nvkm_wr32(device, 0x00259c, BIT(mmui));
341 nvkm_wr32(device, 0x002100, 0x10000000);
342 }
343
344 /* Schedule recovery. */
345 nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
177 schedule_work(&fifo->recover.work); 346 schedule_work(&fifo->recover.work);
178} 347}
179 348
@@ -211,34 +380,30 @@ static void
211gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo) 380gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
212{ 381{
213 struct nvkm_device *device = fifo->base.engine.subdev.device; 382 struct nvkm_device *device = fifo->base.engine.subdev.device;
214 struct gk104_fifo_chan *chan; 383 unsigned long flags, engm = 0;
215 unsigned long flags;
216 u32 engn; 384 u32 engn;
217 385
386 /* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
387 * as MMU_FAULT cannot be triggered while it's pending.
388 */
218 spin_lock_irqsave(&fifo->base.lock, flags); 389 spin_lock_irqsave(&fifo->base.lock, flags);
390 nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
391 nvkm_wr32(device, 0x002100, 0x00000100);
392
219 for (engn = 0; engn < fifo->engine_nr; engn++) { 393 for (engn = 0; engn < fifo->engine_nr; engn++) {
220 struct nvkm_engine *engine = fifo->engine[engn].engine; 394 struct gk104_fifo_engine_status status;
221 int runl = fifo->engine[engn].runl; 395
222 u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08)); 396 gk104_fifo_engine_status(fifo, engn, &status);
223 u32 busy = (stat & 0x80000000); 397 if (!status.busy || !status.chsw)
224 u32 next = (stat & 0x0fff0000) >> 16;
225 u32 chsw = (stat & 0x00008000);
226 u32 save = (stat & 0x00004000);
227 u32 load = (stat & 0x00002000);
228 u32 prev = (stat & 0x00000fff);
229 u32 chid = load ? next : prev;
230 (void)save;
231
232 if (!busy || !chsw)
233 continue; 398 continue;
234 399
235 list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { 400 engm |= BIT(engn);
236 if (chan->base.chid == chid && engine) {
237 gk104_fifo_recover(fifo, engine, chan);
238 break;
239 }
240 }
241 } 401 }
402
403 for_each_set_bit(engn, &engm, fifo->engine_nr)
404 gk104_fifo_recover_engn(fifo, engn);
405
406 nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
242 spin_unlock_irqrestore(&fifo->base.lock, flags); 407 spin_unlock_irqrestore(&fifo->base.lock, flags);
243} 408}
244 409
@@ -301,6 +466,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
301 struct nvkm_fifo_chan *chan; 466 struct nvkm_fifo_chan *chan;
302 unsigned long flags; 467 unsigned long flags;
303 char gpcid[8] = "", en[16] = ""; 468 char gpcid[8] = "", en[16] = "";
469 int engn;
304 470
305 er = nvkm_enum_find(fifo->func->fault.reason, reason); 471 er = nvkm_enum_find(fifo->func->fault.reason, reason);
306 eu = nvkm_enum_find(fifo->func->fault.engine, unit); 472 eu = nvkm_enum_find(fifo->func->fault.engine, unit);
@@ -342,7 +508,8 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
342 snprintf(en, sizeof(en), "%s", eu->name); 508 snprintf(en, sizeof(en), "%s", eu->name);
343 } 509 }
344 510
345 chan = nvkm_fifo_chan_inst(&fifo->base, (u64)inst << 12, &flags); 511 spin_lock_irqsave(&fifo->base.lock, flags);
512 chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12);
346 513
347 nvkm_error(subdev, 514 nvkm_error(subdev,
348 "%s fault at %010llx engine %02x [%s] client %02x [%s%s] " 515 "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
@@ -353,9 +520,23 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
353 (u64)inst << 12, 520 (u64)inst << 12,
354 chan ? chan->object.client->name : "unknown"); 521 chan ? chan->object.client->name : "unknown");
355 522
356 if (engine && chan) 523
357 gk104_fifo_recover(fifo, engine, (void *)chan); 524 /* Kill the channel that caused the fault. */
358 nvkm_fifo_chan_put(&fifo->base, flags, &chan); 525 if (chan)
526 gk104_fifo_recover_chan(&fifo->base, chan->chid);
527
528 /* Channel recovery will probably have already done this for the
529 * correct engine(s), but just in case we can't find the channel
530 * information...
531 */
532 for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
533 if (fifo->engine[engn].engine == engine) {
534 gk104_fifo_recover_engn(fifo, engn);
535 break;
536 }
537 }
538
539 spin_unlock_irqrestore(&fifo->base.lock, flags);
359} 540}
360 541
361static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = { 542static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
@@ -716,6 +897,7 @@ gk104_fifo_ = {
716 .intr = gk104_fifo_intr, 897 .intr = gk104_fifo_intr,
717 .uevent_init = gk104_fifo_uevent_init, 898 .uevent_init = gk104_fifo_uevent_init,
718 .uevent_fini = gk104_fifo_uevent_fini, 899 .uevent_fini = gk104_fifo_uevent_fini,
900 .recover_chan = gk104_fifo_recover_chan,
719 .class_get = gk104_fifo_class_get, 901 .class_get = gk104_fifo_class_get,
720}; 902};
721 903