diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c')
| -rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c | 266 |
1 files changed, 224 insertions, 42 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index 38c0910722c0..3a24788c3185 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c | |||
| @@ -27,11 +27,71 @@ | |||
| 27 | #include <core/client.h> | 27 | #include <core/client.h> |
| 28 | #include <core/gpuobj.h> | 28 | #include <core/gpuobj.h> |
| 29 | #include <subdev/bar.h> | 29 | #include <subdev/bar.h> |
| 30 | #include <subdev/timer.h> | ||
| 30 | #include <subdev/top.h> | 31 | #include <subdev/top.h> |
| 31 | #include <engine/sw.h> | 32 | #include <engine/sw.h> |
| 32 | 33 | ||
| 33 | #include <nvif/class.h> | 34 | #include <nvif/class.h> |
| 34 | 35 | ||
| 36 | struct gk104_fifo_engine_status { | ||
| 37 | bool busy; | ||
| 38 | bool faulted; | ||
| 39 | bool chsw; | ||
| 40 | bool save; | ||
| 41 | bool load; | ||
| 42 | struct { | ||
| 43 | bool tsg; | ||
| 44 | u32 id; | ||
| 45 | } prev, next, *chan; | ||
| 46 | }; | ||
| 47 | |||
| 48 | static void | ||
| 49 | gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn, | ||
| 50 | struct gk104_fifo_engine_status *status) | ||
| 51 | { | ||
| 52 | struct nvkm_engine *engine = fifo->engine[engn].engine; | ||
| 53 | struct nvkm_subdev *subdev = &fifo->base.engine.subdev; | ||
| 54 | struct nvkm_device *device = subdev->device; | ||
| 55 | u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08)); | ||
| 56 | |||
| 57 | status->busy = !!(stat & 0x80000000); | ||
| 58 | status->faulted = !!(stat & 0x40000000); | ||
| 59 | status->next.tsg = !!(stat & 0x10000000); | ||
| 60 | status->next.id = (stat & 0x0fff0000) >> 16; | ||
| 61 | status->chsw = !!(stat & 0x00008000); | ||
| 62 | status->save = !!(stat & 0x00004000); | ||
| 63 | status->load = !!(stat & 0x00002000); | ||
| 64 | status->prev.tsg = !!(stat & 0x00001000); | ||
| 65 | status->prev.id = (stat & 0x00000fff); | ||
| 66 | status->chan = NULL; | ||
| 67 | |||
| 68 | if (status->busy && status->chsw) { | ||
| 69 | if (status->load && status->save) { | ||
| 70 | if (engine && nvkm_engine_chsw_load(engine)) | ||
| 71 | status->chan = &status->next; | ||
| 72 | else | ||
| 73 | status->chan = &status->prev; | ||
| 74 | } else | ||
| 75 | if (status->load) { | ||
| 76 | status->chan = &status->next; | ||
| 77 | } else { | ||
| 78 | status->chan = &status->prev; | ||
| 79 | } | ||
| 80 | } else | ||
| 81 | if (status->load) { | ||
| 82 | status->chan = &status->prev; | ||
| 83 | } | ||
| 84 | |||
| 85 | nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d " | ||
| 86 | "save %d load %d %sid %d%s-> %sid %d%s\n", | ||
| 87 | engn, status->busy, status->faulted, | ||
| 88 | status->chsw, status->save, status->load, | ||
| 89 | status->prev.tsg ? "tsg" : "ch", status->prev.id, | ||
| 90 | status->chan == &status->prev ? "*" : " ", | ||
| 91 | status->next.tsg ? "tsg" : "ch", status->next.id, | ||
| 92 | status->chan == &status->next ? "*" : " "); | ||
| 93 | } | ||
| 94 | |||
| 35 | static int | 95 | static int |
| 36 | gk104_fifo_class_get(struct nvkm_fifo *base, int index, | 96 | gk104_fifo_class_get(struct nvkm_fifo *base, int index, |
| 37 | const struct nvkm_fifo_chan_oclass **psclass) | 97 | const struct nvkm_fifo_chan_oclass **psclass) |
| @@ -83,10 +143,13 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) | |||
| 83 | } | 143 | } |
| 84 | nvkm_done(mem); | 144 | nvkm_done(mem); |
| 85 | 145 | ||
| 86 | if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM) | 146 | switch (nvkm_memory_target(mem)) { |
| 87 | target = 0; | 147 | case NVKM_MEM_TARGET_VRAM: target = 0; break; |
| 88 | else | 148 | case NVKM_MEM_TARGET_NCOH: target = 3; break; |
| 89 | target = 3; | 149 | default: |
| 150 | WARN_ON(1); | ||
| 151 | return; | ||
| 152 | } | ||
| 90 | 153 | ||
| 91 | nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) | | 154 | nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) | |
| 92 | (target << 28)); | 155 | (target << 28)); |
| @@ -149,31 +212,137 @@ gk104_fifo_recover_work(struct work_struct *w) | |||
| 149 | nvkm_mask(device, 0x002630, runm, 0x00000000); | 212 | nvkm_mask(device, 0x002630, runm, 0x00000000); |
| 150 | } | 213 | } |
| 151 | 214 | ||
| 215 | static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn); | ||
| 216 | |||
| 152 | static void | 217 | static void |
| 153 | gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine, | 218 | gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl) |
| 154 | struct gk104_fifo_chan *chan) | ||
| 155 | { | 219 | { |
| 156 | struct nvkm_subdev *subdev = &fifo->base.engine.subdev; | 220 | struct nvkm_subdev *subdev = &fifo->base.engine.subdev; |
| 157 | struct nvkm_device *device = subdev->device; | 221 | struct nvkm_device *device = subdev->device; |
| 158 | u32 chid = chan->base.chid; | 222 | const u32 runm = BIT(runl); |
| 159 | int engn; | ||
| 160 | 223 | ||
| 161 | nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n", | ||
| 162 | nvkm_subdev_name[engine->subdev.index], chid); | ||
| 163 | assert_spin_locked(&fifo->base.lock); | 224 | assert_spin_locked(&fifo->base.lock); |
| 225 | if (fifo->recover.runm & runm) | ||
| 226 | return; | ||
| 227 | fifo->recover.runm |= runm; | ||
| 164 | 228 | ||
| 165 | nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800); | 229 | /* Block runlist to prevent channel assignment(s) from changing. */ |
| 166 | list_del_init(&chan->head); | 230 | nvkm_mask(device, 0x002630, runm, runm); |
| 167 | chan->killed = true; | ||
| 168 | 231 | ||
| 169 | for (engn = 0; engn < fifo->engine_nr; engn++) { | 232 | /* Schedule recovery. */ |
| 170 | if (fifo->engine[engn].engine == engine) { | 233 | nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl); |
| 171 | fifo->recover.engm |= BIT(engn); | 234 | schedule_work(&fifo->recover.work); |
| 235 | } | ||
| 236 | |||
| 237 | static void | ||
| 238 | gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid) | ||
| 239 | { | ||
| 240 | struct gk104_fifo *fifo = gk104_fifo(base); | ||
| 241 | struct nvkm_subdev *subdev = &fifo->base.engine.subdev; | ||
| 242 | struct nvkm_device *device = subdev->device; | ||
| 243 | const u32 stat = nvkm_rd32(device, 0x800004 + (chid * 0x08)); | ||
| 244 | const u32 runl = (stat & 0x000f0000) >> 16; | ||
| 245 | const bool used = (stat & 0x00000001); | ||
| 246 | unsigned long engn, engm = fifo->runlist[runl].engm; | ||
| 247 | struct gk104_fifo_chan *chan; | ||
| 248 | |||
| 249 | assert_spin_locked(&fifo->base.lock); | ||
| 250 | if (!used) | ||
| 251 | return; | ||
| 252 | |||
| 253 | /* Lookup SW state for channel, and mark it as dead. */ | ||
| 254 | list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { | ||
| 255 | if (chan->base.chid == chid) { | ||
| 256 | list_del_init(&chan->head); | ||
| 257 | chan->killed = true; | ||
| 258 | nvkm_fifo_kevent(&fifo->base, chid); | ||
| 172 | break; | 259 | break; |
| 173 | } | 260 | } |
| 174 | } | 261 | } |
| 175 | 262 | ||
| 176 | fifo->recover.runm |= BIT(chan->runl); | 263 | /* Disable channel. */ |
| 264 | nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800); | ||
| 265 | nvkm_warn(subdev, "channel %d: killed\n", chid); | ||
| 266 | |||
| 267 | /* Block channel assignments from changing during recovery. */ | ||
| 268 | gk104_fifo_recover_runl(fifo, runl); | ||
| 269 | |||
| 270 | /* Schedule recovery for any engines the channel is on. */ | ||
| 271 | for_each_set_bit(engn, &engm, fifo->engine_nr) { | ||
| 272 | struct gk104_fifo_engine_status status; | ||
| 273 | gk104_fifo_engine_status(fifo, engn, &status); | ||
| 274 | if (!status.chan || status.chan->id != chid) | ||
| 275 | continue; | ||
| 276 | gk104_fifo_recover_engn(fifo, engn); | ||
| 277 | } | ||
| 278 | } | ||
| 279 | |||
| 280 | static void | ||
| 281 | gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn) | ||
| 282 | { | ||
| 283 | struct nvkm_engine *engine = fifo->engine[engn].engine; | ||
| 284 | struct nvkm_subdev *subdev = &fifo->base.engine.subdev; | ||
| 285 | struct nvkm_device *device = subdev->device; | ||
| 286 | const u32 runl = fifo->engine[engn].runl; | ||
| 287 | const u32 engm = BIT(engn); | ||
| 288 | struct gk104_fifo_engine_status status; | ||
| 289 | int mmui = -1; | ||
| 290 | |||
| 291 | assert_spin_locked(&fifo->base.lock); | ||
| 292 | if (fifo->recover.engm & engm) | ||
| 293 | return; | ||
| 294 | fifo->recover.engm |= engm; | ||
| 295 | |||
| 296 | /* Block channel assignments from changing during recovery. */ | ||
| 297 | gk104_fifo_recover_runl(fifo, runl); | ||
| 298 | |||
| 299 | /* Determine which channel (if any) is currently on the engine. */ | ||
| 300 | gk104_fifo_engine_status(fifo, engn, &status); | ||
| 301 | if (status.chan) { | ||
| 302 | /* The channel is not longer viable, kill it. */ | ||
| 303 | gk104_fifo_recover_chan(&fifo->base, status.chan->id); | ||
| 304 | } | ||
| 305 | |||
| 306 | /* Determine MMU fault ID for the engine, if we're not being | ||
| 307 | * called from the fault handler already. | ||
| 308 | */ | ||
| 309 | if (!status.faulted && engine) { | ||
| 310 | mmui = nvkm_top_fault_id(device, engine->subdev.index); | ||
| 311 | if (mmui < 0) { | ||
| 312 | const struct nvkm_enum *en = fifo->func->fault.engine; | ||
| 313 | for (; en && en->name; en++) { | ||
| 314 | if (en->data2 == engine->subdev.index) { | ||
| 315 | mmui = en->value; | ||
| 316 | break; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | } | ||
| 320 | WARN_ON(mmui < 0); | ||
| 321 | } | ||
| 322 | |||
| 323 | /* Trigger a MMU fault for the engine. | ||
| 324 | * | ||
| 325 | * No good idea why this is needed, but nvgpu does something similar, | ||
| 326 | * and it makes recovery from CTXSW_TIMEOUT a lot more reliable. | ||
| 327 | */ | ||
| 328 | if (mmui >= 0) { | ||
| 329 | nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui); | ||
| 330 | |||
| 331 | /* Wait for fault to trigger. */ | ||
| 332 | nvkm_msec(device, 2000, | ||
| 333 | gk104_fifo_engine_status(fifo, engn, &status); | ||
| 334 | if (status.faulted) | ||
| 335 | break; | ||
| 336 | ); | ||
| 337 | |||
| 338 | /* Release MMU fault trigger, and ACK the fault. */ | ||
| 339 | nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000); | ||
| 340 | nvkm_wr32(device, 0x00259c, BIT(mmui)); | ||
| 341 | nvkm_wr32(device, 0x002100, 0x10000000); | ||
| 342 | } | ||
| 343 | |||
| 344 | /* Schedule recovery. */ | ||
| 345 | nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn); | ||
| 177 | schedule_work(&fifo->recover.work); | 346 | schedule_work(&fifo->recover.work); |
| 178 | } | 347 | } |
| 179 | 348 | ||
| @@ -211,34 +380,30 @@ static void | |||
| 211 | gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo) | 380 | gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo) |
| 212 | { | 381 | { |
| 213 | struct nvkm_device *device = fifo->base.engine.subdev.device; | 382 | struct nvkm_device *device = fifo->base.engine.subdev.device; |
| 214 | struct gk104_fifo_chan *chan; | 383 | unsigned long flags, engm = 0; |
| 215 | unsigned long flags; | ||
| 216 | u32 engn; | 384 | u32 engn; |
| 217 | 385 | ||
| 386 | /* We need to ACK the SCHED_ERROR here, and prevent it reasserting, | ||
| 387 | * as MMU_FAULT cannot be triggered while it's pending. | ||
| 388 | */ | ||
| 218 | spin_lock_irqsave(&fifo->base.lock, flags); | 389 | spin_lock_irqsave(&fifo->base.lock, flags); |
| 390 | nvkm_mask(device, 0x002140, 0x00000100, 0x00000000); | ||
| 391 | nvkm_wr32(device, 0x002100, 0x00000100); | ||
| 392 | |||
| 219 | for (engn = 0; engn < fifo->engine_nr; engn++) { | 393 | for (engn = 0; engn < fifo->engine_nr; engn++) { |
| 220 | struct nvkm_engine *engine = fifo->engine[engn].engine; | 394 | struct gk104_fifo_engine_status status; |
| 221 | int runl = fifo->engine[engn].runl; | 395 | |
| 222 | u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08)); | 396 | gk104_fifo_engine_status(fifo, engn, &status); |
| 223 | u32 busy = (stat & 0x80000000); | 397 | if (!status.busy || !status.chsw) |
| 224 | u32 next = (stat & 0x0fff0000) >> 16; | ||
| 225 | u32 chsw = (stat & 0x00008000); | ||
| 226 | u32 save = (stat & 0x00004000); | ||
| 227 | u32 load = (stat & 0x00002000); | ||
| 228 | u32 prev = (stat & 0x00000fff); | ||
| 229 | u32 chid = load ? next : prev; | ||
| 230 | (void)save; | ||
| 231 | |||
| 232 | if (!busy || !chsw) | ||
| 233 | continue; | 398 | continue; |
| 234 | 399 | ||
| 235 | list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { | 400 | engm |= BIT(engn); |
| 236 | if (chan->base.chid == chid && engine) { | ||
| 237 | gk104_fifo_recover(fifo, engine, chan); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | } | 401 | } |
| 402 | |||
| 403 | for_each_set_bit(engn, &engm, fifo->engine_nr) | ||
| 404 | gk104_fifo_recover_engn(fifo, engn); | ||
| 405 | |||
| 406 | nvkm_mask(device, 0x002140, 0x00000100, 0x00000100); | ||
| 242 | spin_unlock_irqrestore(&fifo->base.lock, flags); | 407 | spin_unlock_irqrestore(&fifo->base.lock, flags); |
| 243 | } | 408 | } |
| 244 | 409 | ||
| @@ -301,6 +466,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit) | |||
| 301 | struct nvkm_fifo_chan *chan; | 466 | struct nvkm_fifo_chan *chan; |
| 302 | unsigned long flags; | 467 | unsigned long flags; |
| 303 | char gpcid[8] = "", en[16] = ""; | 468 | char gpcid[8] = "", en[16] = ""; |
| 469 | int engn; | ||
| 304 | 470 | ||
| 305 | er = nvkm_enum_find(fifo->func->fault.reason, reason); | 471 | er = nvkm_enum_find(fifo->func->fault.reason, reason); |
| 306 | eu = nvkm_enum_find(fifo->func->fault.engine, unit); | 472 | eu = nvkm_enum_find(fifo->func->fault.engine, unit); |
| @@ -342,7 +508,8 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit) | |||
| 342 | snprintf(en, sizeof(en), "%s", eu->name); | 508 | snprintf(en, sizeof(en), "%s", eu->name); |
| 343 | } | 509 | } |
| 344 | 510 | ||
| 345 | chan = nvkm_fifo_chan_inst(&fifo->base, (u64)inst << 12, &flags); | 511 | spin_lock_irqsave(&fifo->base.lock, flags); |
| 512 | chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12); | ||
| 346 | 513 | ||
| 347 | nvkm_error(subdev, | 514 | nvkm_error(subdev, |
| 348 | "%s fault at %010llx engine %02x [%s] client %02x [%s%s] " | 515 | "%s fault at %010llx engine %02x [%s] client %02x [%s%s] " |
| @@ -353,9 +520,23 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit) | |||
| 353 | (u64)inst << 12, | 520 | (u64)inst << 12, |
| 354 | chan ? chan->object.client->name : "unknown"); | 521 | chan ? chan->object.client->name : "unknown"); |
| 355 | 522 | ||
| 356 | if (engine && chan) | 523 | |
| 357 | gk104_fifo_recover(fifo, engine, (void *)chan); | 524 | /* Kill the channel that caused the fault. */ |
| 358 | nvkm_fifo_chan_put(&fifo->base, flags, &chan); | 525 | if (chan) |
| 526 | gk104_fifo_recover_chan(&fifo->base, chan->chid); | ||
| 527 | |||
| 528 | /* Channel recovery will probably have already done this for the | ||
| 529 | * correct engine(s), but just in case we can't find the channel | ||
| 530 | * information... | ||
| 531 | */ | ||
| 532 | for (engn = 0; engn < fifo->engine_nr && engine; engn++) { | ||
| 533 | if (fifo->engine[engn].engine == engine) { | ||
| 534 | gk104_fifo_recover_engn(fifo, engn); | ||
| 535 | break; | ||
| 536 | } | ||
| 537 | } | ||
| 538 | |||
| 539 | spin_unlock_irqrestore(&fifo->base.lock, flags); | ||
| 359 | } | 540 | } |
| 360 | 541 | ||
| 361 | static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = { | 542 | static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = { |
| @@ -716,6 +897,7 @@ gk104_fifo_ = { | |||
| 716 | .intr = gk104_fifo_intr, | 897 | .intr = gk104_fifo_intr, |
| 717 | .uevent_init = gk104_fifo_uevent_init, | 898 | .uevent_init = gk104_fifo_uevent_init, |
| 718 | .uevent_fini = gk104_fifo_uevent_fini, | 899 | .uevent_fini = gk104_fifo_uevent_fini, |
| 900 | .recover_chan = gk104_fifo_recover_chan, | ||
| 719 | .class_get = gk104_fifo_class_get, | 901 | .class_get = gk104_fifo_class_get, |
| 720 | }; | 902 | }; |
| 721 | 903 | ||
