diff options
author | Seema Khowala <seemaj@nvidia.com> | 2017-03-17 19:21:55 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-23 20:18:58 -0400 |
commit | fb71b882750332fde543550adffe3f70f1d47d47 (patch) | |
tree | 6f2d2672443ff13c41a045cc3728c9e0ce5bca3f /drivers | |
parent | a8efce77f5fab497b477d1f5fd461cded528bc0b (diff) |
gpu: nvgpu: *ERROR_MMU_ERR_FLT* not set for fake mmu faults
For fake faults, errror notifiers are expected to be set
before triggering fake mmu fault.
JIRA GPUT19X-7
Change-Id: I458af8d95c5960f20693b6923e1990fe3aa59857
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: http://git-master/r/1323413
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 130 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 8 |
2 files changed, 87 insertions, 51 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index fb31c3fd..60190521 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -1313,53 +1313,81 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, | |||
1313 | } | 1313 | } |
1314 | 1314 | ||
1315 | /* caller must hold a channel reference */ | 1315 | /* caller must hold a channel reference */ |
1316 | static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, | 1316 | bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g, |
1317 | struct channel_gk20a *ch) | 1317 | struct channel_gk20a *refch) |
1318 | { | 1318 | { |
1319 | bool verbose = true; | 1319 | bool verbose = true; |
1320 | if (!ch) | 1320 | if (!refch) |
1321 | return verbose; | 1321 | return verbose; |
1322 | 1322 | ||
1323 | nvgpu_mutex_acquire(&ch->error_notifier_mutex); | 1323 | nvgpu_mutex_acquire(&refch->error_notifier_mutex); |
1324 | if (ch->error_notifier_ref) { | 1324 | if (refch->error_notifier_ref) { |
1325 | u32 err = ch->error_notifier->info32; | 1325 | u32 err = refch->error_notifier->info32; |
1326 | if (ch->error_notifier->status == 0xffff) { | 1326 | |
1327 | /* If error code is already set, this mmu fault | 1327 | if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT) |
1328 | * was triggered as part of recovery from other | 1328 | verbose = refch->timeout_debug_dump; |
1329 | * error condition. | ||
1330 | * Don't overwrite error flag. */ | ||
1331 | /* Fifo timeout debug spew is controlled by user */ | ||
1332 | if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT) | ||
1333 | verbose = ch->timeout_debug_dump; | ||
1334 | } else { | ||
1335 | gk20a_set_error_notifier_locked(ch, | ||
1336 | NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); | ||
1337 | } | ||
1338 | } | 1329 | } |
1339 | nvgpu_mutex_release(&ch->error_notifier_mutex); | 1330 | nvgpu_mutex_release(&refch->error_notifier_mutex); |
1331 | return verbose; | ||
1332 | } | ||
1333 | |||
1334 | /* caller must hold a channel reference */ | ||
1335 | void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g, | ||
1336 | struct channel_gk20a *refch) | ||
1337 | { | ||
1338 | if (refch) { | ||
1339 | /* mark channel as faulted */ | ||
1340 | refch->has_timedout = true; | ||
1341 | wmb(); | ||
1342 | /* unblock pending waits */ | ||
1343 | wake_up(&refch->semaphore_wq); | ||
1344 | wake_up(&refch->notifier_wq); | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | /* caller must hold a channel reference */ | ||
1349 | bool gk20a_fifo_error_ch(struct gk20a *g, | ||
1350 | struct channel_gk20a *refch) | ||
1351 | { | ||
1352 | bool verbose; | ||
1353 | |||
1354 | verbose = gk20a_fifo_ch_timeout_debug_dump_state(g, refch); | ||
1355 | gk20a_fifo_set_has_timedout_and_wake_up_wqs(g, refch); | ||
1340 | 1356 | ||
1341 | /* mark channel as faulted */ | ||
1342 | ch->has_timedout = true; | ||
1343 | wmb(); | ||
1344 | /* unblock pending waits */ | ||
1345 | wake_up(&ch->semaphore_wq); | ||
1346 | wake_up(&ch->notifier_wq); | ||
1347 | return verbose; | 1357 | return verbose; |
1348 | } | 1358 | } |
1349 | 1359 | ||
1350 | bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, | 1360 | bool gk20a_fifo_error_tsg(struct gk20a *g, |
1351 | struct channel_gk20a *ch) | 1361 | struct tsg_gk20a *tsg) |
1352 | { | 1362 | { |
1353 | gk20a_err(dev_from_gk20a(g), | 1363 | struct channel_gk20a *ch = NULL; |
1354 | "channel %d generated a mmu fault", ch->hw_chid); | 1364 | bool verbose = true; |
1365 | |||
1366 | down_read(&tsg->ch_list_lock); | ||
1367 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
1368 | if (gk20a_channel_get(ch)) { | ||
1369 | verbose = gk20a_fifo_error_ch(g, ch); | ||
1370 | gk20a_channel_put(ch); | ||
1371 | } | ||
1372 | } | ||
1373 | up_read(&tsg->ch_list_lock); | ||
1374 | |||
1375 | return verbose; | ||
1355 | 1376 | ||
1356 | return gk20a_fifo_set_ctx_mmu_error(g, ch); | 1377 | } |
1378 | /* caller must hold a channel reference */ | ||
1379 | void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, | ||
1380 | struct channel_gk20a *refch) | ||
1381 | { | ||
1382 | gk20a_err(dev_from_gk20a(g), | ||
1383 | "channel %d generated a mmu fault", refch->hw_chid); | ||
1384 | gk20a_set_error_notifier(refch, | ||
1385 | NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); | ||
1357 | } | 1386 | } |
1358 | 1387 | ||
1359 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | 1388 | void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, |
1360 | struct tsg_gk20a *tsg) | 1389 | struct tsg_gk20a *tsg) |
1361 | { | 1390 | { |
1362 | bool ret = true; | ||
1363 | struct channel_gk20a *ch = NULL; | 1391 | struct channel_gk20a *ch = NULL; |
1364 | 1392 | ||
1365 | gk20a_err(dev_from_gk20a(g), | 1393 | gk20a_err(dev_from_gk20a(g), |
@@ -1368,14 +1396,12 @@ bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | |||
1368 | down_read(&tsg->ch_list_lock); | 1396 | down_read(&tsg->ch_list_lock); |
1369 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | 1397 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { |
1370 | if (gk20a_channel_get(ch)) { | 1398 | if (gk20a_channel_get(ch)) { |
1371 | if (!gk20a_fifo_set_ctx_mmu_error(g, ch)) | 1399 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); |
1372 | ret = false; | ||
1373 | gk20a_channel_put(ch); | 1400 | gk20a_channel_put(ch); |
1374 | } | 1401 | } |
1375 | } | 1402 | } |
1376 | up_read(&tsg->ch_list_lock); | 1403 | up_read(&tsg->ch_list_lock); |
1377 | 1404 | ||
1378 | return ret; | ||
1379 | } | 1405 | } |
1380 | 1406 | ||
1381 | void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt) | 1407 | void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt) |
@@ -1496,7 +1522,7 @@ static bool gk20a_fifo_handle_mmu_fault( | |||
1496 | struct fifo_mmu_fault_info_gk20a f; | 1522 | struct fifo_mmu_fault_info_gk20a f; |
1497 | struct channel_gk20a *ch = NULL; | 1523 | struct channel_gk20a *ch = NULL; |
1498 | struct tsg_gk20a *tsg = NULL; | 1524 | struct tsg_gk20a *tsg = NULL; |
1499 | struct channel_gk20a *referenced_channel = NULL; | 1525 | struct channel_gk20a *refch = NULL; |
1500 | /* read and parse engine status */ | 1526 | /* read and parse engine status */ |
1501 | u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); | 1527 | u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); |
1502 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); | 1528 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); |
@@ -1559,12 +1585,12 @@ static bool gk20a_fifo_handle_mmu_fault( | |||
1559 | tsg = &g->fifo.tsg[id]; | 1585 | tsg = &g->fifo.tsg[id]; |
1560 | else if (type == fifo_engine_status_id_type_chid_v()) { | 1586 | else if (type == fifo_engine_status_id_type_chid_v()) { |
1561 | ch = &g->fifo.channel[id]; | 1587 | ch = &g->fifo.channel[id]; |
1562 | referenced_channel = gk20a_channel_get(ch); | 1588 | refch = gk20a_channel_get(ch); |
1563 | } | 1589 | } |
1564 | } else { | 1590 | } else { |
1565 | /* read channel based on instruction pointer */ | 1591 | /* read channel based on instruction pointer */ |
1566 | ch = gk20a_refch_from_inst_ptr(g, f.inst_ptr); | 1592 | ch = gk20a_refch_from_inst_ptr(g, f.inst_ptr); |
1567 | referenced_channel = ch; | 1593 | refch = ch; |
1568 | } | 1594 | } |
1569 | 1595 | ||
1570 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) | 1596 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) |
@@ -1602,19 +1628,27 @@ static bool gk20a_fifo_handle_mmu_fault( | |||
1602 | * syncpoints */ | 1628 | * syncpoints */ |
1603 | 1629 | ||
1604 | if (tsg) { | 1630 | if (tsg) { |
1605 | if (!g->fifo.deferred_reset_pending) | 1631 | if (!g->fifo.deferred_reset_pending) { |
1606 | verbose = | 1632 | if (!fake_fault) |
1607 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); | 1633 | gk20a_fifo_set_ctx_mmu_error_tsg(g, |
1608 | 1634 | tsg); | |
1635 | verbose = gk20a_fifo_error_tsg(g, tsg); | ||
1636 | } | ||
1609 | gk20a_fifo_abort_tsg(g, tsg->tsgid, false); | 1637 | gk20a_fifo_abort_tsg(g, tsg->tsgid, false); |
1610 | 1638 | ||
1611 | /* put back the ref taken early above */ | 1639 | /* put back the ref taken early above */ |
1612 | if (referenced_channel) | 1640 | if (refch) |
1613 | gk20a_channel_put(ch); | 1641 | gk20a_channel_put(ch); |
1614 | } else if (ch) { | 1642 | } else if (ch) { |
1615 | if (referenced_channel) { | 1643 | if (refch) { |
1616 | if (!g->fifo.deferred_reset_pending) | 1644 | if (!g->fifo.deferred_reset_pending) { |
1617 | verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch); | 1645 | if (!fake_fault) |
1646 | gk20a_fifo_set_ctx_mmu_error_ch( | ||
1647 | g, refch); | ||
1648 | |||
1649 | verbose = gk20a_fifo_error_ch(g, | ||
1650 | refch); | ||
1651 | } | ||
1618 | gk20a_channel_abort(ch, false); | 1652 | gk20a_channel_abort(ch, false); |
1619 | gk20a_channel_put(ch); | 1653 | gk20a_channel_put(ch); |
1620 | } else { | 1654 | } else { |
@@ -1759,7 +1793,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) | |||
1759 | if (gk20a_channel_get(ch)) { | 1793 | if (gk20a_channel_get(ch)) { |
1760 | gk20a_channel_abort(ch, false); | 1794 | gk20a_channel_abort(ch, false); |
1761 | 1795 | ||
1762 | if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) | 1796 | if (gk20a_fifo_error_ch(g, ch)) |
1763 | gk20a_debug_dump(g->dev); | 1797 | gk20a_debug_dump(g->dev); |
1764 | 1798 | ||
1765 | gk20a_channel_put(ch); | 1799 | gk20a_channel_put(ch); |
@@ -1786,7 +1820,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) | |||
1786 | else { | 1820 | else { |
1787 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; | 1821 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; |
1788 | 1822 | ||
1789 | if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) | 1823 | if (gk20a_fifo_error_tsg(g, tsg)) |
1790 | gk20a_debug_dump(g->dev); | 1824 | gk20a_debug_dump(g->dev); |
1791 | 1825 | ||
1792 | gk20a_fifo_abort_tsg(g, tsgid, false); | 1826 | gk20a_fifo_abort_tsg(g, tsgid, false); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index eab57ba3..ae728a36 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -270,11 +270,13 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); | |||
270 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); | 270 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); |
271 | u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, | 271 | u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, |
272 | int *__id, bool *__is_tsg); | 272 | int *__id, bool *__is_tsg); |
273 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | 273 | void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, |
274 | struct tsg_gk20a *tsg); | 274 | struct tsg_gk20a *tsg); |
275 | void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt); | 275 | void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt); |
276 | bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, | 276 | void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, |
277 | struct channel_gk20a *ch); | 277 | struct channel_gk20a *refch); |
278 | bool gk20a_fifo_error_tsg(struct gk20a *g, struct tsg_gk20a *tsg); | ||
279 | bool gk20a_fifo_error_ch(struct gk20a *g, struct channel_gk20a *refch); | ||
278 | 280 | ||
279 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | 281 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, |
280 | u32 hw_chid); | 282 | u32 hw_chid); |