summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-03-17 19:21:55 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-23 20:18:58 -0400
commitfb71b882750332fde543550adffe3f70f1d47d47 (patch)
tree6f2d2672443ff13c41a045cc3728c9e0ce5bca3f
parenta8efce77f5fab497b477d1f5fd461cded528bc0b (diff)
gpu: nvgpu: *ERROR_MMU_ERR_FLT* not set for fake mmu faults
For fake faults, errror notifiers are expected to be set before triggering fake mmu fault. JIRA GPUT19X-7 Change-Id: I458af8d95c5960f20693b6923e1990fe3aa59857 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: http://git-master/r/1323413 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c130
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h8
2 files changed, 87 insertions, 51 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index fb31c3fd..60190521 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1313,53 +1313,81 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
1313} 1313}
1314 1314
1315/* caller must hold a channel reference */ 1315/* caller must hold a channel reference */
1316static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, 1316bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
1317 struct channel_gk20a *ch) 1317 struct channel_gk20a *refch)
1318{ 1318{
1319 bool verbose = true; 1319 bool verbose = true;
1320 if (!ch) 1320 if (!refch)
1321 return verbose; 1321 return verbose;
1322 1322
1323 nvgpu_mutex_acquire(&ch->error_notifier_mutex); 1323 nvgpu_mutex_acquire(&refch->error_notifier_mutex);
1324 if (ch->error_notifier_ref) { 1324 if (refch->error_notifier_ref) {
1325 u32 err = ch->error_notifier->info32; 1325 u32 err = refch->error_notifier->info32;
1326 if (ch->error_notifier->status == 0xffff) { 1326
1327 /* If error code is already set, this mmu fault 1327 if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
1328 * was triggered as part of recovery from other 1328 verbose = refch->timeout_debug_dump;
1329 * error condition.
1330 * Don't overwrite error flag. */
1331 /* Fifo timeout debug spew is controlled by user */
1332 if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
1333 verbose = ch->timeout_debug_dump;
1334 } else {
1335 gk20a_set_error_notifier_locked(ch,
1336 NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
1337 }
1338 } 1329 }
1339 nvgpu_mutex_release(&ch->error_notifier_mutex); 1330 nvgpu_mutex_release(&refch->error_notifier_mutex);
1331 return verbose;
1332}
1333
1334/* caller must hold a channel reference */
1335void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1336 struct channel_gk20a *refch)
1337{
1338 if (refch) {
1339 /* mark channel as faulted */
1340 refch->has_timedout = true;
1341 wmb();
1342 /* unblock pending waits */
1343 wake_up(&refch->semaphore_wq);
1344 wake_up(&refch->notifier_wq);
1345 }
1346}
1347
1348/* caller must hold a channel reference */
1349bool gk20a_fifo_error_ch(struct gk20a *g,
1350 struct channel_gk20a *refch)
1351{
1352 bool verbose;
1353
1354 verbose = gk20a_fifo_ch_timeout_debug_dump_state(g, refch);
1355 gk20a_fifo_set_has_timedout_and_wake_up_wqs(g, refch);
1340 1356
1341 /* mark channel as faulted */
1342 ch->has_timedout = true;
1343 wmb();
1344 /* unblock pending waits */
1345 wake_up(&ch->semaphore_wq);
1346 wake_up(&ch->notifier_wq);
1347 return verbose; 1357 return verbose;
1348} 1358}
1349 1359
1350bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, 1360bool gk20a_fifo_error_tsg(struct gk20a *g,
1351 struct channel_gk20a *ch) 1361 struct tsg_gk20a *tsg)
1352{ 1362{
1353 gk20a_err(dev_from_gk20a(g), 1363 struct channel_gk20a *ch = NULL;
1354 "channel %d generated a mmu fault", ch->hw_chid); 1364 bool verbose = true;
1365
1366 down_read(&tsg->ch_list_lock);
1367 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
1368 if (gk20a_channel_get(ch)) {
1369 verbose = gk20a_fifo_error_ch(g, ch);
1370 gk20a_channel_put(ch);
1371 }
1372 }
1373 up_read(&tsg->ch_list_lock);
1374
1375 return verbose;
1355 1376
1356 return gk20a_fifo_set_ctx_mmu_error(g, ch); 1377}
1378/* caller must hold a channel reference */
1379void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
1380 struct channel_gk20a *refch)
1381{
1382 gk20a_err(dev_from_gk20a(g),
1383 "channel %d generated a mmu fault", refch->hw_chid);
1384 gk20a_set_error_notifier(refch,
1385 NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
1357} 1386}
1358 1387
1359bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, 1388void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
1360 struct tsg_gk20a *tsg) 1389 struct tsg_gk20a *tsg)
1361{ 1390{
1362 bool ret = true;
1363 struct channel_gk20a *ch = NULL; 1391 struct channel_gk20a *ch = NULL;
1364 1392
1365 gk20a_err(dev_from_gk20a(g), 1393 gk20a_err(dev_from_gk20a(g),
@@ -1368,14 +1396,12 @@ bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
1368 down_read(&tsg->ch_list_lock); 1396 down_read(&tsg->ch_list_lock);
1369 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 1397 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
1370 if (gk20a_channel_get(ch)) { 1398 if (gk20a_channel_get(ch)) {
1371 if (!gk20a_fifo_set_ctx_mmu_error(g, ch)) 1399 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1372 ret = false;
1373 gk20a_channel_put(ch); 1400 gk20a_channel_put(ch);
1374 } 1401 }
1375 } 1402 }
1376 up_read(&tsg->ch_list_lock); 1403 up_read(&tsg->ch_list_lock);
1377 1404
1378 return ret;
1379} 1405}
1380 1406
1381void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt) 1407void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt)
@@ -1496,7 +1522,7 @@ static bool gk20a_fifo_handle_mmu_fault(
1496 struct fifo_mmu_fault_info_gk20a f; 1522 struct fifo_mmu_fault_info_gk20a f;
1497 struct channel_gk20a *ch = NULL; 1523 struct channel_gk20a *ch = NULL;
1498 struct tsg_gk20a *tsg = NULL; 1524 struct tsg_gk20a *tsg = NULL;
1499 struct channel_gk20a *referenced_channel = NULL; 1525 struct channel_gk20a *refch = NULL;
1500 /* read and parse engine status */ 1526 /* read and parse engine status */
1501 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); 1527 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1502 u32 ctx_status = fifo_engine_status_ctx_status_v(status); 1528 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
@@ -1559,12 +1585,12 @@ static bool gk20a_fifo_handle_mmu_fault(
1559 tsg = &g->fifo.tsg[id]; 1585 tsg = &g->fifo.tsg[id];
1560 else if (type == fifo_engine_status_id_type_chid_v()) { 1586 else if (type == fifo_engine_status_id_type_chid_v()) {
1561 ch = &g->fifo.channel[id]; 1587 ch = &g->fifo.channel[id];
1562 referenced_channel = gk20a_channel_get(ch); 1588 refch = gk20a_channel_get(ch);
1563 } 1589 }
1564 } else { 1590 } else {
1565 /* read channel based on instruction pointer */ 1591 /* read channel based on instruction pointer */
1566 ch = gk20a_refch_from_inst_ptr(g, f.inst_ptr); 1592 ch = gk20a_refch_from_inst_ptr(g, f.inst_ptr);
1567 referenced_channel = ch; 1593 refch = ch;
1568 } 1594 }
1569 1595
1570 if (ch && gk20a_is_channel_marked_as_tsg(ch)) 1596 if (ch && gk20a_is_channel_marked_as_tsg(ch))
@@ -1602,19 +1628,27 @@ static bool gk20a_fifo_handle_mmu_fault(
1602 * syncpoints */ 1628 * syncpoints */
1603 1629
1604 if (tsg) { 1630 if (tsg) {
1605 if (!g->fifo.deferred_reset_pending) 1631 if (!g->fifo.deferred_reset_pending) {
1606 verbose = 1632 if (!fake_fault)
1607 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); 1633 gk20a_fifo_set_ctx_mmu_error_tsg(g,
1608 1634 tsg);
1635 verbose = gk20a_fifo_error_tsg(g, tsg);
1636 }
1609 gk20a_fifo_abort_tsg(g, tsg->tsgid, false); 1637 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1610 1638
1611 /* put back the ref taken early above */ 1639 /* put back the ref taken early above */
1612 if (referenced_channel) 1640 if (refch)
1613 gk20a_channel_put(ch); 1641 gk20a_channel_put(ch);
1614 } else if (ch) { 1642 } else if (ch) {
1615 if (referenced_channel) { 1643 if (refch) {
1616 if (!g->fifo.deferred_reset_pending) 1644 if (!g->fifo.deferred_reset_pending) {
1617 verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch); 1645 if (!fake_fault)
1646 gk20a_fifo_set_ctx_mmu_error_ch(
1647 g, refch);
1648
1649 verbose = gk20a_fifo_error_ch(g,
1650 refch);
1651 }
1618 gk20a_channel_abort(ch, false); 1652 gk20a_channel_abort(ch, false);
1619 gk20a_channel_put(ch); 1653 gk20a_channel_put(ch);
1620 } else { 1654 } else {
@@ -1759,7 +1793,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1759 if (gk20a_channel_get(ch)) { 1793 if (gk20a_channel_get(ch)) {
1760 gk20a_channel_abort(ch, false); 1794 gk20a_channel_abort(ch, false);
1761 1795
1762 if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) 1796 if (gk20a_fifo_error_ch(g, ch))
1763 gk20a_debug_dump(g->dev); 1797 gk20a_debug_dump(g->dev);
1764 1798
1765 gk20a_channel_put(ch); 1799 gk20a_channel_put(ch);
@@ -1786,7 +1820,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1786 else { 1820 else {
1787 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1821 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1788 1822
1789 if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) 1823 if (gk20a_fifo_error_tsg(g, tsg))
1790 gk20a_debug_dump(g->dev); 1824 gk20a_debug_dump(g->dev);
1791 1825
1792 gk20a_fifo_abort_tsg(g, tsgid, false); 1826 gk20a_fifo_abort_tsg(g, tsgid, false);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index eab57ba3..ae728a36 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -270,11 +270,13 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
270u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); 270u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
271u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, 271u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
272 int *__id, bool *__is_tsg); 272 int *__id, bool *__is_tsg);
273bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, 273void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
274 struct tsg_gk20a *tsg); 274 struct tsg_gk20a *tsg);
275void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt); 275void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt);
276bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, 276void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
277 struct channel_gk20a *ch); 277 struct channel_gk20a *refch);
278bool gk20a_fifo_error_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
279bool gk20a_fifo_error_ch(struct gk20a *g, struct channel_gk20a *refch);
278 280
279struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 281struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
280 u32 hw_chid); 282 u32 hw_chid);