summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-09-22 18:07:13 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-15 05:05:16 -0500
commit72b51a129fda4a89f226aad7c99f062977a07189 (patch)
treee8027fc8c70bb45453f6897a4e5b15400e92ef7f /drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
parentf1c962daae5fdb231a8c8b0202d96c1c4e242ef1 (diff)
gpu: nvgpu: gv11b: detect stall intr during preemption
Check for interrupts or hangs while waiting for the preempt to complete. During pbdma/eng preempt done polling, any stalling interrupts relating to the runlist must be detected and handled in order for the preemption to complete. When PBDMA fault or CE fault occurs, the PBDMA will save out automatically. TSG related to the context in which the fault occurred will not be scheduled again until the fault is handled. In the case of some other issue requiring the engine to be reset, TSG will need to be manually preempted. In all cases, a PBDMA interrupt may occur prior to the PBDMA being able to switch out. SW must handle these interrupts according to the relevant handling procedure before the PBDMA preempt can complete. Opt for eng reset instead of waiting for preemption to be finished when there is any stall interrupt pending during engine context preempt completion. Bug 200277163 Bug 1945121 Change-Id: Icaef79e3046d82987b8486d15cbfc8365aa26f2e Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1522914 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: David Martinez Nieto <dmartineznie@nvidia.com> Tested-by: David Martinez Nieto <dmartineznie@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fifo_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c89
1 files changed, 43 insertions, 46 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index a3cb9292..f87c6dea 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -56,6 +56,7 @@
56#include "fifo_gv11b.h" 56#include "fifo_gv11b.h"
57#include "subctx_gv11b.h" 57#include "subctx_gv11b.h"
58#include "gr_gv11b.h" 58#include "gr_gv11b.h"
59#include "mc_gv11b.h"
59 60
60#define PBDMA_SUBDEVICE_ID 1 61#define PBDMA_SUBDEVICE_ID 1
61 62
@@ -393,45 +394,35 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
393 u32 pbdma_id, unsigned int timeout_rc_type) 394 u32 pbdma_id, unsigned int timeout_rc_type)
394{ 395{
395 struct nvgpu_timeout timeout; 396 struct nvgpu_timeout timeout;
396 unsigned long delay = GR_IDLE_CHECK_DEFAULT; 397 unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
397 u32 pbdma_stat; 398 u32 pbdma_stat;
398 u32 chan_stat; 399 u32 chan_stat;
399 int ret = -EBUSY; 400 int ret = -EBUSY;
400 401
401 /* 402 /* timeout in milli seconds */
402 * If the PBDMA has a stalling interrupt and receives a NACK, the PBDMA
403 * won't save out until the STALLING interrupt is cleared. Note that
404 * the stalling interrupt need not be directly addressed, as simply
405 * clearing of the interrupt bit will be sufficient to allow the PBDMA
406 * to save out. If the stalling interrupt was due to a SW method or
407 * another deterministic failure, the PBDMA will assert it when the
408 * channel is reloaded/resumed. Note that the fault will still be
409 * reported to SW.
410 */
411
412 if (timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
413 /* called from recovery */
414 u32 pbdma_intr_0, pbdma_intr_1;
415
416 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
417 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
418
419 if (pbdma_intr_0)
420 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
421 if (pbdma_intr_1)
422 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
423 }
424
425 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g), 403 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
426 NVGPU_TIMER_CPU_TIMER); 404 NVGPU_TIMER_CPU_TIMER);
427 405
406 nvgpu_log(g, gpu_dbg_info, "wait preempt pbdma %d", pbdma_id);
428 /* Verify that ch/tsg is no longer on the pbdma */ 407 /* Verify that ch/tsg is no longer on the pbdma */
429 do { 408 do {
409 /*
410 * If the PBDMA has a stalling interrupt and receives a NACK,
411 * the PBDMA won't save out until the STALLING interrupt is
412 * cleared. Stalling interrupt need not be directly addressed,
413 * as simply clearing of the interrupt bit will be sufficient
414 * to allow the PBDMA to save out. If the stalling interrupt
415 * was due to a SW method or another deterministic failure,
416 * the PBDMA will assert it when the channel is reloaded
417 * or resumed. Note that the fault will still be
418 * reported to SW.
419 */
420
421 gk20a_fifo_handle_pbdma_intr(g, &g->fifo, pbdma_id, RC_NO);
422
430 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(pbdma_id)); 423 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(pbdma_id));
431 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat); 424 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
432 425
433 gk20a_dbg_info("wait preempt pbdma");
434
435 if (chan_stat == 426 if (chan_stat ==
436 fifo_pbdma_status_chan_status_valid_v() || 427 fifo_pbdma_status_chan_status_valid_v() ||
437 chan_stat == 428 chan_stat ==
@@ -473,26 +464,36 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
473} 464}
474 465
475static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id, 466static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
476 u32 engine_idx, u32 *reset_eng_bitmask, 467 u32 act_eng_id, u32 *reset_eng_bitmask,
477 unsigned int timeout_rc_type) 468 unsigned int timeout_rc_type)
478{ 469{
479 struct nvgpu_timeout timeout; 470 struct nvgpu_timeout timeout;
480 unsigned long delay = GR_IDLE_CHECK_DEFAULT; 471 unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
481 u32 eng_stat; 472 u32 eng_stat;
482 u32 ctx_stat; 473 u32 ctx_stat;
483 int ret = -EBUSY; 474 int ret = -EBUSY;
475 bool stall_intr = false;
484 476
477 /* timeout in milli seconds */
485 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g), 478 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
486 NVGPU_TIMER_CPU_TIMER); 479 NVGPU_TIMER_CPU_TIMER);
487 480
481 nvgpu_log(g, gpu_dbg_info, "wait preempt act engine id: %u",
482 act_eng_id);
488 /* Check if ch/tsg has saved off the engine or if ctxsw is hung */ 483 /* Check if ch/tsg has saved off the engine or if ctxsw is hung */
489 do { 484 do {
490 eng_stat = gk20a_readl(g, fifo_engine_status_r(engine_idx)); 485 eng_stat = gk20a_readl(g, fifo_engine_status_r(act_eng_id));
491 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat); 486 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
492 487
488 if (gv11b_mc_is_stall_and_eng_intr_pending(g, act_eng_id)) {
489 stall_intr = true;
490 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
491 "stall intr set, "
492 "preemption will not finish");
493 }
493 if (ctx_stat == 494 if (ctx_stat ==
494 fifo_engine_status_ctx_status_ctxsw_switch_v()) { 495 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
495 gk20a_dbg_info("engine save hasn't started yet"); 496 /* Eng save hasn't started yet. Continue polling */
496 497
497 } else if (ctx_stat == 498 } else if (ctx_stat ==
498 fifo_engine_status_ctx_status_valid_v() || 499 fifo_engine_status_ctx_status_valid_v() ||
@@ -500,14 +501,12 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
500 fifo_engine_status_ctx_status_ctxsw_save_v()) { 501 fifo_engine_status_ctx_status_ctxsw_save_v()) {
501 502
502 if (id == fifo_engine_status_id_v(eng_stat)) { 503 if (id == fifo_engine_status_id_v(eng_stat)) {
503 if (timeout_rc_type == PREEMPT_TIMEOUT_NORC) { 504 if (stall_intr ||
504 /* called from recovery, eng seems to be hung */ 505 timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
505 *reset_eng_bitmask |= BIT(engine_idx); 506 /* preemption will not finish */
507 *reset_eng_bitmask |= BIT(act_eng_id);
506 ret = 0; 508 ret = 0;
507 break; 509 break;
508 } else {
509 gk20a_dbg_info("wait preempt engine. "
510 "ctx_status (valid/save)=%u", ctx_stat);
511 } 510 }
512 } else { 511 } else {
513 /* context is not running on the engine */ 512 /* context is not running on the engine */
@@ -520,14 +519,12 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
520 519
521 if (id == fifo_engine_status_next_id_v(eng_stat)) { 520 if (id == fifo_engine_status_next_id_v(eng_stat)) {
522 521
523 if (timeout_rc_type == PREEMPT_TIMEOUT_NORC) { 522 if (stall_intr ||
524 /* called from recovery, eng seems to be hung */ 523 timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
525 *reset_eng_bitmask |= BIT(engine_idx); 524 /* preemption will not finish */
525 *reset_eng_bitmask |= BIT(act_eng_id);
526 ret = 0; 526 ret = 0;
527 break; 527 break;
528 } else {
529 gk20a_dbg_info("wait preempt engine. "
530 "ctx_status (load)=%u", ctx_stat);
531 } 528 }
532 } else { 529 } else {
533 /* context is not running on the engine */ 530 /* context is not running on the engine */
@@ -540,7 +537,7 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
540 ret = 0; 537 ret = 0;
541 break; 538 break;
542 } 539 }
543 usleep_range(delay, delay * 2); 540 nvgpu_usleep_range(delay, delay * 2);
544 delay = min_t(unsigned long, 541 delay = min_t(unsigned long,
545 delay << 1, GR_IDLE_CHECK_MAX); 542 delay << 1, GR_IDLE_CHECK_MAX);
546 } while (!nvgpu_timeout_expired_msg(&timeout, 543 } while (!nvgpu_timeout_expired_msg(&timeout,
@@ -712,7 +709,7 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
712 break; 709 break;
713 } 710 }
714 711
715 usleep_range(delay, delay * 2); 712 nvgpu_usleep_range(delay, delay * 2);
716 delay = min_t(unsigned long, 713 delay = min_t(unsigned long,
717 delay << 1, GR_IDLE_CHECK_MAX); 714 delay << 1, GR_IDLE_CHECK_MAX);
718 } while (!nvgpu_timeout_expired_msg(&timeout, 715 } while (!nvgpu_timeout_expired_msg(&timeout,
@@ -758,7 +755,7 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
758 755
759 f->runlist_info[runlist_id].reset_eng_bitmask = 0; 756 f->runlist_info[runlist_id].reset_eng_bitmask = 0;
760 757
761 for_each_set_bit(act_eng_id, &runlist_served_engines, f->num_engines) { 758 for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines) {
762 759
763 func_ret = gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id, 760 func_ret = gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id,
764 &f->runlist_info[runlist_id].reset_eng_bitmask, 761 &f->runlist_info[runlist_id].reset_eng_bitmask,