summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/fb/fb_gv11b.c79
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c15
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gv100/mc_gv100.c16
-rw-r--r--drivers/gpu/nvgpu/gv100/mc_gv100.h3
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c469
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.h10
-rw-r--r--drivers/gpu/nvgpu/gv11b/mc_gv11b.c16
-rw-r--r--drivers/gpu/nvgpu/gv11b/mc_gv11b.h3
10 files changed, 397 insertions, 230 deletions
diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
index 69a71575..26dabd72 100644
--- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
@@ -792,10 +792,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
792static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, 792static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
793 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val) 793 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
794{ 794{
795 unsigned int id_type; 795 unsigned int id_type = ID_TYPE_UNKNOWN;
796 u32 num_lce, act_eng_bitmask = 0; 796 u32 num_lce, act_eng_bitmask = 0;
797 int err = 0; 797 int err = 0;
798 u32 id = ((u32)~0); 798 u32 id = FIFO_INVAL_TSG_ID;
799 unsigned int rc_type = RC_TYPE_NO_RC;
799 800
800 if (!mmfault->valid) 801 if (!mmfault->valid)
801 return; 802 return;
@@ -810,18 +811,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
810 /* CE page faults are not reported as replayable */ 811 /* CE page faults are not reported as replayable */
811 nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); 812 nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
812 err = gv11b_fb_fix_page_fault(g, mmfault); 813 err = gv11b_fb_fix_page_fault(g, mmfault);
813 gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch, 814 if (mmfault->refch &&
814 mmfault->faulted_pbdma, mmfault->faulted_engine); 815 (u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
816 gv11b_fifo_reset_pbdma_and_eng_faulted(g,
817 &g->fifo.tsg[mmfault->refch->tsgid],
818 mmfault->faulted_pbdma,
819 mmfault->faulted_engine);
820 }
815 if (!err) { 821 if (!err) {
816 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed"); 822 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
817 *invalidate_replay_val = 0; 823 *invalidate_replay_val = 0;
818 /* refch in mmfault is assigned at the time of copying 824 if (mmfault->refch) {
819 * fault info from snap reg or bar2 fault buf 825 gk20a_channel_put(mmfault->refch);
820 */ 826 mmfault->refch = NULL;
821 gk20a_channel_put(mmfault->refch); 827 }
822 return; 828 return;
823 } 829 }
824 /* Do recovery. Channel recovery needs refch */ 830 /* Do recovery */
825 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed"); 831 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
826 } 832 }
827 833
@@ -833,16 +839,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
833 * instance block, the fault cannot be isolated to a 839 * instance block, the fault cannot be isolated to a
834 * single context so we need to reset the entire runlist 840 * single context so we need to reset the entire runlist
835 */ 841 */
836 id_type = ID_TYPE_UNKNOWN; 842 rc_type = RC_TYPE_MMU_FAULT;
837 843
838 } else if (mmfault->refch) { 844 } else if (mmfault->refch) {
839 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
840 id = mmfault->refch->tsgid;
841 id_type = ID_TYPE_TSG;
842 } else {
843 id = mmfault->chid;
844 id_type = ID_TYPE_CHANNEL;
845 }
846 if (mmfault->refch->mmu_nack_handled) { 845 if (mmfault->refch->mmu_nack_handled) {
847 /* We have already recovered for the same 846 /* We have already recovered for the same
848 * context, skip doing another recovery. 847 * context, skip doing another recovery.
@@ -863,19 +862,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
863 */ 862 */
864 gk20a_channel_put(mmfault->refch); 863 gk20a_channel_put(mmfault->refch);
865 return; 864 return;
865 } else {
866 /* Indicate recovery is handled if mmu fault is
867 * a result of mmu nack.
868 */
869 mmfault->refch->mmu_nack_handled = true;
870 }
871
872 rc_type = RC_TYPE_MMU_FAULT;
873 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
874 id = mmfault->refch->tsgid;
875 if (id != FIFO_INVAL_TSG_ID)
876 id_type = ID_TYPE_TSG;
877 } else {
878 nvgpu_err(g, "bare channels not supported");
866 } 879 }
867 } else {
868 id_type = ID_TYPE_UNKNOWN;
869 } 880 }
870 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) 881
882 /* engine is faulted */
883 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
871 act_eng_bitmask = BIT(mmfault->faulted_engine); 884 act_eng_bitmask = BIT(mmfault->faulted_engine);
885 rc_type = RC_TYPE_MMU_FAULT;
886 }
872 887
873 /* Indicate recovery is handled if mmu fault is a result of 888 /* refch in mmfault is assigned at the time of copying
874 * mmu nack. 889 * fault info from snap reg or bar2 fault buf
875 */ 890 */
876 mmfault->refch->mmu_nack_handled = true; 891 if (mmfault->refch) {
877 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, 892 gk20a_channel_put(mmfault->refch);
878 id, id_type, RC_TYPE_MMU_FAULT, mmfault); 893 mmfault->refch = NULL;
894 }
895
896 if (rc_type != RC_TYPE_NO_RC)
897 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
898 id, id_type, rc_type, mmfault);
879 } else { 899 } else {
880 if (mmfault->fault_type == gmmu_fault_type_pte_v()) { 900 if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
881 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix"); 901 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -894,7 +914,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
894 /* refch in mmfault is assigned at the time of copying 914 /* refch in mmfault is assigned at the time of copying
895 * fault info from snap reg or bar2 fault buf 915 * fault info from snap reg or bar2 fault buf
896 */ 916 */
897 gk20a_channel_put(mmfault->refch); 917 if (mmfault->refch) {
918 gk20a_channel_put(mmfault->refch);
919 mmfault->refch = NULL;
920 }
898 } 921 }
899} 922}
900 923
@@ -985,8 +1008,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
985 next_fault_addr = mmfault->fault_addr; 1008 next_fault_addr = mmfault->fault_addr;
986 if (prev_fault_addr == next_fault_addr) { 1009 if (prev_fault_addr == next_fault_addr) {
987 nvgpu_log(g, gpu_dbg_intr, "pte already scanned"); 1010 nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
988 if (mmfault->refch) 1011 if (mmfault->refch) {
989 gk20a_channel_put(mmfault->refch); 1012 gk20a_channel_put(mmfault->refch);
1013 mmfault->refch = NULL;
1014 }
990 continue; 1015 continue;
991 } 1016 }
992 } 1017 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index cd54baf1..57cb0019 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -55,9 +55,7 @@
55#define FECS_METHOD_WFI_RESTORE 0x80000 55#define FECS_METHOD_WFI_RESTORE 0x80000
56#define FECS_MAILBOX_0_ACK_RESTORE 0x4 56#define FECS_MAILBOX_0_ACK_RESTORE 0x4
57 57
58static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 58
59 u32 chid, bool add,
60 bool wait_for_finish);
61static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); 59static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
62 60
63static const char *const pbdma_intr_fault_type_desc[] = { 61static const char *const pbdma_intr_fault_type_desc[] = {
@@ -2708,7 +2706,7 @@ void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
2708} 2706}
2709 2707
2710int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, 2708int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
2711 unsigned int id_type, unsigned int timeout_rc_type) 2709 unsigned int id_type)
2712{ 2710{
2713 struct nvgpu_timeout timeout; 2711 struct nvgpu_timeout timeout;
2714 u32 delay = GR_IDLE_CHECK_DEFAULT; 2712 u32 delay = GR_IDLE_CHECK_DEFAULT;
@@ -2781,8 +2779,8 @@ int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
2781 id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL; 2779 id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
2782 2780
2783 /* wait for preempt */ 2781 /* wait for preempt */
2784 ret = g->ops.fifo.is_preempt_pending(g, id, id_type, 2782 ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
2785 PREEMPT_TIMEOUT_RC); 2783
2786 return ret; 2784 return ret;
2787} 2785}
2788 2786
@@ -3279,7 +3277,7 @@ void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
3279 fifo_eng_runlist_length_f(count)); 3277 fifo_eng_runlist_length_f(count));
3280} 3278}
3281 3279
3282static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 3280int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3283 u32 chid, bool add, 3281 u32 chid, bool add,
3284 bool wait_for_finish) 3282 bool wait_for_finish)
3285{ 3283{
@@ -3452,8 +3450,7 @@ static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3452 gk20a_readl(g, fifo_preempt_r())); 3450 gk20a_readl(g, fifo_preempt_r()));
3453#endif 3451#endif
3454 if (wait_preempt) { 3452 if (wait_preempt) {
3455 g->ops.fifo.is_preempt_pending( 3453 g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type);
3456 g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
3457 } 3454 }
3458#ifdef TRACEPOINTS_ENABLED 3455#ifdef TRACEPOINTS_ENABLED
3459 trace_gk20a_reschedule_preempted_next(ch->chid); 3456 trace_gk20a_reschedule_preempted_next(ch->chid);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index bccd15f6..77030c94 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -50,9 +50,6 @@ enum {
50#define ID_TYPE_TSG 1 50#define ID_TYPE_TSG 1
51#define ID_TYPE_UNKNOWN ((u32)~0) 51#define ID_TYPE_UNKNOWN ((u32)~0)
52 52
53#define PREEMPT_TIMEOUT_RC 1
54#define PREEMPT_TIMEOUT_NORC 0
55
56#define RC_YES 1 53#define RC_YES 1
57#define RC_NO 0 54#define RC_NO 0
58 55
@@ -257,6 +254,9 @@ int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
257int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid, 254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
258 bool add, bool wait_for_finish); 255 bool add, bool wait_for_finish);
259 256
257int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
258 u32 chid, bool add,
259 bool wait_for_finish);
260int gk20a_fifo_suspend(struct gk20a *g); 260int gk20a_fifo_suspend(struct gk20a *g);
261 261
262bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); 262bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
@@ -390,8 +390,8 @@ void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a);
390 390
391u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g); 391u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g);
392 392
393int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type, 393int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
394 unsigned int timeout_rc_type); 394 unsigned int id_type);
395int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg); 395int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
396void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, 396void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
397 unsigned int id_type); 397 unsigned int id_type);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d6e0342b..17b0a60b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -685,9 +685,9 @@ struct gpu_ops {
685 struct ch_state *ch_state); 685 struct ch_state *ch_state);
686 u32 (*intr_0_error_mask)(struct gk20a *g); 686 u32 (*intr_0_error_mask)(struct gk20a *g);
687 int (*is_preempt_pending)(struct gk20a *g, u32 id, 687 int (*is_preempt_pending)(struct gk20a *g, u32 id,
688 unsigned int id_type, unsigned int timeout_rc_type); 688 unsigned int id_type);
689 int (*preempt_ch_tsg)(struct gk20a *g, u32 id, 689 int (*preempt_ch_tsg)(struct gk20a *g, u32 id,
690 unsigned int id_type, unsigned int timeout_rc_type); 690 unsigned int id_type);
691 void (*init_pbdma_intr_descs)(struct fifo_gk20a *f); 691 void (*init_pbdma_intr_descs)(struct fifo_gk20a *f);
692 int (*reset_enable_hw)(struct gk20a *g); 692 int (*reset_enable_hw)(struct gk20a *g);
693 int (*setup_userd)(struct channel_gk20a *c); 693 int (*setup_userd)(struct channel_gk20a *c);
@@ -1132,7 +1132,7 @@ struct gpu_ops {
1132 bool (*is_intr_hub_pending)(struct gk20a *g, u32 mc_intr); 1132 bool (*is_intr_hub_pending)(struct gk20a *g, u32 mc_intr);
1133 bool (*is_intr_nvlink_pending)(struct gk20a *g, u32 mc_intr); 1133 bool (*is_intr_nvlink_pending)(struct gk20a *g, u32 mc_intr);
1134 bool (*is_stall_and_eng_intr_pending)(struct gk20a *g, 1134 bool (*is_stall_and_eng_intr_pending)(struct gk20a *g,
1135 u32 act_eng_id); 1135 u32 act_eng_id, u32 *eng_intr_pending);
1136 u32 (*intr_stall)(struct gk20a *g); 1136 u32 (*intr_stall)(struct gk20a *g);
1137 void (*intr_stall_pause)(struct gk20a *g); 1137 void (*intr_stall_pause)(struct gk20a *g);
1138 void (*intr_stall_resume)(struct gk20a *g); 1138 void (*intr_stall_resume)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gv100/mc_gv100.c b/drivers/gpu/nvgpu/gv100/mc_gv100.c
index 46af100a..7d38a3fb 100644
--- a/drivers/gpu/nvgpu/gv100/mc_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/mc_gv100.c
@@ -66,15 +66,14 @@ bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0)
66 return (((mc_intr_0 & mc_intr_nvlink_pending_f()) != 0U) ? true : false); 66 return (((mc_intr_0 & mc_intr_nvlink_pending_f()) != 0U) ? true : false);
67} 67}
68 68
69bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id) 69bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
70 u32 *eng_intr_pending)
70{ 71{
71 u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0)); 72 u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
72 u32 stall_intr, eng_intr_mask; 73 u32 stall_intr, eng_intr_mask;
73 74
74 eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id); 75 eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id);
75 if ((mc_intr_0 & eng_intr_mask) != 0U) { 76 *eng_intr_pending = mc_intr_0 & eng_intr_mask;
76 return true;
77 }
78 77
79 stall_intr = mc_intr_pfifo_pending_f() | 78 stall_intr = mc_intr_pfifo_pending_f() |
80 mc_intr_hub_pending_f() | 79 mc_intr_hub_pending_f() |
@@ -82,9 +81,10 @@ bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id)
82 mc_intr_pbus_pending_f() | 81 mc_intr_pbus_pending_f() |
83 mc_intr_ltc_pending_f() | 82 mc_intr_ltc_pending_f() |
84 mc_intr_nvlink_pending_f(); 83 mc_intr_nvlink_pending_f();
85 if ((mc_intr_0 & stall_intr) != 0U) {
86 return true;
87 }
88 84
89 return false; 85 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
86 "mc_intr_0 = 0x%08x, eng_intr = 0x%08x",
87 mc_intr_0 & stall_intr, *eng_intr_pending);
88
89 return (mc_intr_0 & (eng_intr_mask | stall_intr)) != 0U;
90} 90}
diff --git a/drivers/gpu/nvgpu/gv100/mc_gv100.h b/drivers/gpu/nvgpu/gv100/mc_gv100.h
index 4aff4a36..e9069258 100644
--- a/drivers/gpu/nvgpu/gv100/mc_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/mc_gv100.h
@@ -26,5 +26,6 @@ struct gk20a;
26 26
27void mc_gv100_intr_enable(struct gk20a *g); 27void mc_gv100_intr_enable(struct gk20a *g);
28bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0); 28bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0);
29bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id); 29bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
30 u32 *eng_intr_pending);
30#endif 31#endif
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 4edaaac1..f30f2ae1 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -387,17 +387,24 @@ u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g)
387 387
388u32 gv11b_fifo_get_preempt_timeout(struct gk20a *g) 388u32 gv11b_fifo_get_preempt_timeout(struct gk20a *g)
389{ 389{
390 return gk20a_get_gr_idle_timeout(g); 390 /* if timeouts are enabled, using 3000ms timeout
391 * for polling pdma/eng/runlist might kick in
392 * timeout handler in the cases where preempt
393 * is stuck. Use 1000ms timeout for polling when
394 * timeouts are enabled */
395 return nvgpu_is_timeouts_enabled(g) ? PREEMPT_TIMEOUT_1000_MS :
396 g->gr_idle_timeout_default;
391} 397}
392 398
393static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id, 399static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
394 u32 pbdma_id, unsigned int timeout_rc_type) 400 u32 pbdma_id)
395{ 401{
396 struct nvgpu_timeout timeout; 402 struct nvgpu_timeout timeout;
397 unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */ 403 unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
398 u32 pbdma_stat; 404 u32 pbdma_stat;
399 u32 chan_stat; 405 u32 chan_stat;
400 int ret = -EBUSY; 406 int ret = -EBUSY;
407 unsigned int loop_count = 0;
401 408
402 /* timeout in milli seconds */ 409 /* timeout in milli seconds */
403 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g), 410 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
@@ -406,6 +413,14 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
406 nvgpu_log(g, gpu_dbg_info, "wait preempt pbdma %d", pbdma_id); 413 nvgpu_log(g, gpu_dbg_info, "wait preempt pbdma %d", pbdma_id);
407 /* Verify that ch/tsg is no longer on the pbdma */ 414 /* Verify that ch/tsg is no longer on the pbdma */
408 do { 415 do {
416 if (!nvgpu_platform_is_silicon(g)) {
417 if (loop_count >= MAX_PRE_SI_RETRIES) {
418 nvgpu_err(g, "preempt pbdma retries: %u",
419 loop_count);
420 break;
421 }
422 loop_count++;
423 }
409 /* 424 /*
410 * If the PBDMA has a stalling interrupt and receives a NACK, 425 * If the PBDMA has a stalling interrupt and receives a NACK,
411 * the PBDMA won't save out until the STALLING interrupt is 426 * the PBDMA won't save out until the STALLING interrupt is
@@ -458,21 +473,24 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
458 nvgpu_usleep_range(delay, delay * 2); 473 nvgpu_usleep_range(delay, delay * 2);
459 delay = min_t(unsigned long, 474 delay = min_t(unsigned long,
460 delay << 1, GR_IDLE_CHECK_MAX); 475 delay << 1, GR_IDLE_CHECK_MAX);
461 } while (!nvgpu_timeout_expired_msg(&timeout, 476 } while (!nvgpu_timeout_expired(&timeout));
462 "preempt timeout pbdma")); 477
478 if (ret)
479 nvgpu_err(g, "preempt timeout pbdma: %u pbdma_stat: %u "
480 "tsgid: %u", pbdma_id, pbdma_stat, id);
463 return ret; 481 return ret;
464} 482}
465 483
466static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id, 484static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
467 u32 act_eng_id, u32 *reset_eng_bitmask, 485 u32 act_eng_id, u32 *reset_eng_bitmask)
468 unsigned int timeout_rc_type)
469{ 486{
470 struct nvgpu_timeout timeout; 487 struct nvgpu_timeout timeout;
471 unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */ 488 unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
472 u32 eng_stat; 489 u32 eng_stat;
473 u32 ctx_stat; 490 u32 ctx_stat;
474 int ret = -EBUSY; 491 int ret = -EBUSY;
475 bool stall_intr = false; 492 unsigned int loop_count = 0;
493 u32 eng_intr_pending;
476 494
477 /* timeout in milli seconds */ 495 /* timeout in milli seconds */
478 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g), 496 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
@@ -482,20 +500,56 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
482 act_eng_id); 500 act_eng_id);
483 /* Check if ch/tsg has saved off the engine or if ctxsw is hung */ 501 /* Check if ch/tsg has saved off the engine or if ctxsw is hung */
484 do { 502 do {
503 if (!nvgpu_platform_is_silicon(g)) {
504 if (loop_count >= MAX_PRE_SI_RETRIES) {
505 nvgpu_err(g, "preempt eng retries: %u",
506 loop_count);
507 break;
508 }
509 loop_count++;
510 }
485 eng_stat = gk20a_readl(g, fifo_engine_status_r(act_eng_id)); 511 eng_stat = gk20a_readl(g, fifo_engine_status_r(act_eng_id));
486 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat); 512 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
487 513
488 if (g->ops.mc.is_stall_and_eng_intr_pending(g, act_eng_id)) { 514 if (g->ops.mc.is_stall_and_eng_intr_pending(g, act_eng_id,
489 stall_intr = true; 515 &eng_intr_pending)) {
516 /* From h/w team
517 * Engine save can be blocked by eng stalling interrupts.
518 * FIFO interrupts shouldn’t block an engine save from
519 * finishing, but could block FIFO from reporting preempt done.
520 * No immediate reason to reset the engine if FIFO interrupt is
521 * pending.
522 * The hub, priv_ring, and ltc interrupts could block context
523 * switch (or memory), but doesn’t necessarily have to.
524 * For Hub interrupts they just report access counters and page
525 * faults. Neither of these necessarily block context switch
526 * or preemption, but they could.
527 * For example a page fault for graphics would prevent graphics
528 * from saving out. An access counter interrupt is a
529 * notification and has no effect.
530 * SW should handle page faults though for preempt to complete.
531 * PRI interrupt (due to a failed PRI transaction) will result
532 * in ctxsw failure reported to HOST.
533 * LTC interrupts are generally ECC related and if so,
534 * certainly don’t block preemption/ctxsw but they could.
535 * Bus interrupts shouldn’t have anything to do with preemption
536 * state as they are part of the Host EXT pipe, though they may
537 * exhibit a symptom that indicates that GPU is in a bad state.
538 * To be completely fair, when an engine is preempting SW
539 * really should just handle other interrupts as they come in.
540 * It’s generally bad to just poll and wait on a preempt
541 * to complete since there are many things in the GPU which may
542 * cause a system to hang/stop responding.
543 */
490 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, 544 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
491 "stall intr set, " 545 "stall intr set, "
492 "preemption will not finish"); 546 "preemption might not finish");
493 } 547 }
494 if (ctx_stat == 548 if (ctx_stat ==
495 fifo_engine_status_ctx_status_ctxsw_switch_v()) { 549 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
496 /* Eng save hasn't started yet. Continue polling */ 550 /* Eng save hasn't started yet. Continue polling */
497 if (stall_intr) { 551 if (eng_intr_pending) {
498 /* if stall intr stop polling */ 552 /* if eng intr, stop polling */
499 *reset_eng_bitmask |= BIT(act_eng_id); 553 *reset_eng_bitmask |= BIT(act_eng_id);
500 ret = 0; 554 ret = 0;
501 break; 555 break;
@@ -507,8 +561,7 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
507 fifo_engine_status_ctx_status_ctxsw_save_v()) { 561 fifo_engine_status_ctx_status_ctxsw_save_v()) {
508 562
509 if (id == fifo_engine_status_id_v(eng_stat)) { 563 if (id == fifo_engine_status_id_v(eng_stat)) {
510 if (stall_intr || 564 if (eng_intr_pending) {
511 timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
512 /* preemption will not finish */ 565 /* preemption will not finish */
513 *reset_eng_bitmask |= BIT(act_eng_id); 566 *reset_eng_bitmask |= BIT(act_eng_id);
514 ret = 0; 567 ret = 0;
@@ -524,9 +577,7 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
524 fifo_engine_status_ctx_status_ctxsw_load_v()) { 577 fifo_engine_status_ctx_status_ctxsw_load_v()) {
525 578
526 if (id == fifo_engine_status_next_id_v(eng_stat)) { 579 if (id == fifo_engine_status_next_id_v(eng_stat)) {
527 580 if (eng_intr_pending) {
528 if (stall_intr ||
529 timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
530 /* preemption will not finish */ 581 /* preemption will not finish */
531 *reset_eng_bitmask |= BIT(act_eng_id); 582 *reset_eng_bitmask |= BIT(act_eng_id);
532 ret = 0; 583 ret = 0;
@@ -546,8 +597,21 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
546 nvgpu_usleep_range(delay, delay * 2); 597 nvgpu_usleep_range(delay, delay * 2);
547 delay = min_t(unsigned long, 598 delay = min_t(unsigned long,
548 delay << 1, GR_IDLE_CHECK_MAX); 599 delay << 1, GR_IDLE_CHECK_MAX);
549 } while (!nvgpu_timeout_expired_msg(&timeout, 600 } while (!nvgpu_timeout_expired(&timeout));
550 "preempt timeout eng")); 601
602 if (ret) {
603 /*
604 * The reasons a preempt can fail are:
605 * 1.Some other stalling interrupt is asserted preventing
606 * channel or context save.
607 * 2.The memory system hangs.
608 * 3.The engine hangs during CTXSW.
609 */
610 nvgpu_err(g, "preempt timeout eng: %u ctx_stat: %u tsgid: %u",
611 act_eng_id, ctx_stat, id);
612 *reset_eng_bitmask |= BIT(act_eng_id);
613 }
614
551 return ret; 615 return ret;
552} 616}
553 617
@@ -594,29 +658,19 @@ static void gv11b_reset_pbdma_faulted_tsg(struct tsg_gk20a *tsg)
594} 658}
595 659
596void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, 660void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
597 struct channel_gk20a *refch, 661 struct tsg_gk20a *tsg,
598 u32 faulted_pbdma, u32 faulted_engine) 662 u32 faulted_pbdma, u32 faulted_engine)
599{ 663{
600 struct tsg_gk20a *tsg; 664 if (!tsg)
665 return;
601 666
602 nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x", 667 nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x",
603 faulted_pbdma, faulted_engine); 668 faulted_pbdma, faulted_engine);
604 669
605 if (!refch) 670 if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
606 return; 671 gv11b_reset_pbdma_faulted_tsg(tsg);
607 672 if (faulted_engine != FIFO_INVAL_ENGINE_ID)
608 if (gk20a_is_channel_marked_as_tsg(refch)) { 673 gv11b_reset_eng_faulted_tsg(tsg);
609 tsg = &g->fifo.tsg[refch->tsgid];
610 if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
611 gv11b_reset_pbdma_faulted_tsg(tsg);
612 if (faulted_engine != FIFO_INVAL_ENGINE_ID)
613 gv11b_reset_eng_faulted_tsg(tsg);
614 } else {
615 if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
616 gv11b_reset_pbdma_faulted_ch(g, refch->chid);
617 if (faulted_engine != FIFO_INVAL_ENGINE_ID)
618 gv11b_reset_eng_faulted_ch(g, refch->chid);
619 }
620} 674}
621 675
622static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask, 676static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
@@ -626,7 +680,7 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
626 u32 runlists_mask = 0; 680 u32 runlists_mask = 0;
627 struct fifo_gk20a *f = &g->fifo; 681 struct fifo_gk20a *f = &g->fifo;
628 struct fifo_runlist_info_gk20a *runlist; 682 struct fifo_runlist_info_gk20a *runlist;
629 u32 pbdma_bitmask = 0; 683 u32 rlid, pbdma_bitmask = 0;
630 684
631 if (id_type != ID_TYPE_UNKNOWN) { 685 if (id_type != ID_TYPE_UNKNOWN) {
632 if (id_type == ID_TYPE_TSG) 686 if (id_type == ID_TYPE_TSG)
@@ -641,31 +695,31 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
641 if (mmfault->faulted_pbdma != FIFO_INVAL_PBDMA_ID) 695 if (mmfault->faulted_pbdma != FIFO_INVAL_PBDMA_ID)
642 pbdma_bitmask = BIT(mmfault->faulted_pbdma); 696 pbdma_bitmask = BIT(mmfault->faulted_pbdma);
643 697
644 for (id = 0; id < f->max_runlists; id++) { 698 for (rlid = 0; rlid < f->max_runlists; rlid++) {
645 699
646 runlist = &f->runlist_info[id]; 700 runlist = &f->runlist_info[rlid];
647 701
648 if (runlist->eng_bitmask & act_eng_bitmask) 702 if (runlist->eng_bitmask & act_eng_bitmask)
649 runlists_mask |= 703 runlists_mask |=
650 fifo_sched_disable_runlist_m(id); 704 fifo_sched_disable_runlist_m(rlid);
651 705
652 if (runlist->pbdma_bitmask & pbdma_bitmask) 706 if (runlist->pbdma_bitmask & pbdma_bitmask)
653 runlists_mask |= 707 runlists_mask |=
654 fifo_sched_disable_runlist_m(id); 708 fifo_sched_disable_runlist_m(rlid);
655 } 709 }
656 } 710 }
657 711
658 if (id_type == ID_TYPE_UNKNOWN) { 712 if (id_type == ID_TYPE_UNKNOWN) {
659 for (id = 0; id < f->max_runlists; id++) { 713 for (rlid = 0; rlid < f->max_runlists; rlid++) {
660 if (act_eng_bitmask) { 714 if (act_eng_bitmask) {
661 /* eng ids are known */ 715 /* eng ids are known */
662 runlist = &f->runlist_info[id]; 716 runlist = &f->runlist_info[rlid];
663 if (runlist->eng_bitmask & act_eng_bitmask) 717 if (runlist->eng_bitmask & act_eng_bitmask)
664 runlists_mask |= 718 runlists_mask |=
665 fifo_sched_disable_runlist_m(id); 719 fifo_sched_disable_runlist_m(rlid);
666 } else { 720 } else {
667 runlists_mask |= 721 runlists_mask |=
668 fifo_sched_disable_runlist_m(id); 722 fifo_sched_disable_runlist_m(rlid);
669 } 723 }
670 } 724 }
671 } 725 }
@@ -697,10 +751,20 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
697 struct nvgpu_timeout timeout; 751 struct nvgpu_timeout timeout;
698 u32 delay = GR_IDLE_CHECK_DEFAULT; 752 u32 delay = GR_IDLE_CHECK_DEFAULT;
699 int ret = -EBUSY; 753 int ret = -EBUSY;
754 unsigned int loop_count = 0;
700 755
701 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g), 756 nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
702 NVGPU_TIMER_CPU_TIMER); 757 NVGPU_TIMER_CPU_TIMER);
703 do { 758 do {
759 if (!nvgpu_platform_is_silicon(g)) {
760 if (loop_count >= MAX_PRE_SI_RETRIES) {
761 nvgpu_err(g, "preempt runlist retries: %u",
762 loop_count);
763 break;
764 }
765 loop_count++;
766 }
767
704 if (!((gk20a_readl(g, fifo_runlist_preempt_r())) & 768 if (!((gk20a_readl(g, fifo_runlist_preempt_r())) &
705 runlists_mask)) { 769 runlists_mask)) {
706 ret = 0; 770 ret = 0;
@@ -710,13 +774,16 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
710 nvgpu_usleep_range(delay, delay * 2); 774 nvgpu_usleep_range(delay, delay * 2);
711 delay = min_t(unsigned long, 775 delay = min_t(unsigned long,
712 delay << 1, GR_IDLE_CHECK_MAX); 776 delay << 1, GR_IDLE_CHECK_MAX);
713 } while (!nvgpu_timeout_expired_msg(&timeout, 777 } while (!nvgpu_timeout_expired(&timeout));
714 "runlist preempt timeout")); 778
779 if (ret)
780 nvgpu_err(g, "preempt runlist timeout, runlists_mask:0x%08x",
781 runlists_mask);
715 return ret; 782 return ret;
716} 783}
717 784
718int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id, 785int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
719 unsigned int id_type, unsigned int timeout_rc_type) 786 unsigned int id_type)
720{ 787{
721 struct fifo_gk20a *f = &g->fifo; 788 struct fifo_gk20a *f = &g->fifo;
722 unsigned long runlist_served_pbdmas; 789 unsigned long runlist_served_pbdmas;
@@ -724,7 +791,6 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
724 u32 pbdma_id; 791 u32 pbdma_id;
725 u32 act_eng_id; 792 u32 act_eng_id;
726 u32 runlist_id; 793 u32 runlist_id;
727 int func_ret;
728 int ret = 0; 794 int ret = 0;
729 u32 tsgid; 795 u32 tsgid;
730 796
@@ -741,30 +807,14 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
741 runlist_served_pbdmas = f->runlist_info[runlist_id].pbdma_bitmask; 807 runlist_served_pbdmas = f->runlist_info[runlist_id].pbdma_bitmask;
742 runlist_served_engines = f->runlist_info[runlist_id].eng_bitmask; 808 runlist_served_engines = f->runlist_info[runlist_id].eng_bitmask;
743 809
744 for_each_set_bit(pbdma_id, &runlist_served_pbdmas, f->num_pbdma) { 810 for_each_set_bit(pbdma_id, &runlist_served_pbdmas, f->num_pbdma)
745 811 ret |= gv11b_fifo_poll_pbdma_chan_status(g, tsgid, pbdma_id);
746 func_ret = gv11b_fifo_poll_pbdma_chan_status(g, tsgid, pbdma_id,
747 timeout_rc_type);
748 if (func_ret != 0) {
749 nvgpu_log_info(g, "preempt timeout pbdma %d", pbdma_id);
750 ret |= func_ret;
751 }
752 }
753 812
754 f->runlist_info[runlist_id].reset_eng_bitmask = 0; 813 f->runlist_info[runlist_id].reset_eng_bitmask = 0;
755 814
756 for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines) { 815 for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines)
757 816 ret |= gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id,
758 func_ret = gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id, 817 &f->runlist_info[runlist_id].reset_eng_bitmask);
759 &f->runlist_info[runlist_id].reset_eng_bitmask,
760 timeout_rc_type);
761
762 if (func_ret != 0) {
763 nvgpu_log_info(g, "preempt timeout engine %d", act_eng_id);
764 ret |= func_ret;
765 }
766 }
767
768 return ret; 818 return ret;
769} 819}
770 820
@@ -848,6 +898,9 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
848 898
849 nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock); 899 nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock);
850 900
901 /* WAR for Bug 2065990 */
902 gk20a_fifo_disable_tsg_sched(g, &f->tsg[tsgid]);
903
851 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 904 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
852 905
853 ret = __locked_fifo_preempt(g, tsgid, true); 906 ret = __locked_fifo_preempt(g, tsgid, true);
@@ -855,6 +908,9 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
855 if (!mutex_ret) 908 if (!mutex_ret)
856 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 909 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
857 910
911 /* WAR for Bug 2065990 */
912 gk20a_fifo_enable_tsg_sched(g, &f->tsg[tsgid]);
913
858 nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock); 914 nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
859 915
860 if (ret) 916 if (ret)
@@ -863,44 +919,36 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
863 return ret; 919 return ret;
864} 920}
865 921
866static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask) 922static void gv11b_fifo_locked_preempt_runlists(struct gk20a *g, u32 runlists_mask)
867{ 923{
868 int ret = 0; 924 int ret = 0;
869 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 925 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
870 u32 mutex_ret = 0; 926 u32 mutex_ret = 0;
871 u32 runlist_id; 927 u32 rlid;
872
873 nvgpu_log_fn(g, " ");
874 928
875 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 929 /* runlist_lock are locked by teardown and sched are disabled too */
876 if (runlists_mask & fifo_runlist_preempt_runlist_m(runlist_id)) 930 nvgpu_log_fn(g, "preempt runlists_mask:0x%08x", runlists_mask);
877 nvgpu_mutex_acquire(&g->fifo.
878 runlist_info[runlist_id].runlist_lock);
879 }
880 931
881 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 932 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
882 933
883 ret = __locked_fifo_preempt_runlists(g, runlists_mask); 934 ret = __locked_fifo_preempt_runlists(g, runlists_mask);
884 935
885 if (!mutex_ret) 936 if (ret) {
886 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 937 /* if preempt timed out, reset engs served by runlists */
887 938 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
888 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 939 if (runlists_mask &
889 if (runlists_mask & 940 fifo_runlist_preempt_runlist_m(rlid))
890 fifo_runlist_preempt_runlist_m(runlist_id)) { 941 g->fifo.runlist_info[rlid].reset_eng_bitmask =
891 /* during recovery reset engs served by this runlist */ 942 g->fifo.runlist_info[rlid].eng_bitmask;
892 g->fifo.runlist_info[runlist_id].reset_eng_bitmask =
893 g->fifo.runlist_info[runlist_id].eng_bitmask;
894 nvgpu_mutex_release(&g->fifo.
895 runlist_info[runlist_id].runlist_lock);
896 } 943 }
897 } 944 }
898 945
899 return ret; 946 if (!mutex_ret)
947 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
900} 948}
901 949
902static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, 950static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
903 unsigned int id_type, unsigned int timeout_rc_type) 951 unsigned int id_type)
904{ 952{
905 int ret; 953 int ret;
906 struct fifo_gk20a *f = &g->fifo; 954 struct fifo_gk20a *f = &g->fifo;
@@ -914,52 +962,97 @@ static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
914 gk20a_fifo_issue_preempt(g, id, true); 962 gk20a_fifo_issue_preempt(g, id, true);
915 963
916 /* wait for preempt */ 964 /* wait for preempt */
917 ret = g->ops.fifo.is_preempt_pending(g, id, id_type, 965 ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
918 timeout_rc_type);
919 966
920 if (ret && (timeout_rc_type == PREEMPT_TIMEOUT_RC)) 967 /* No recovery even if preempt timed out since
921 gk20a_fifo_preempt_timeout_rc(g, id, id_type); 968 * this is called from recovery path
969 */
922 970
923 return ret; 971 return ret;
924} 972}
925 973
926 974
927int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, 975int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
928 unsigned int id_type, unsigned int timeout_rc_type) 976 unsigned int id_type)
929{ 977{
930 struct fifo_gk20a *f = &g->fifo;
931 u32 ret = 0; 978 u32 ret = 0;
932 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 979 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
933 u32 mutex_ret = 0; 980 u32 mutex_ret = 0;
934 u32 runlist_id;
935 981
936 if (id_type == ID_TYPE_TSG) 982 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
937 runlist_id = f->tsg[id].runlist_id; 983 /*
938 else if (id_type == ID_TYPE_CHANNEL) 984 * This is called from teardown path only. runlist_lock
939 runlist_id = f->channel[id].runlist_id; 985 * is already acquired before calling this function.
940 else 986 */
941 return -EINVAL; 987 ret = __locked_fifo_preempt_ch_tsg(g, id, id_type);
942 988
943 if (runlist_id >= g->fifo.max_runlists) { 989 if (!mutex_ret)
944 nvgpu_log_info(g, "runlist_id = %d", runlist_id); 990 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
945 return -EINVAL;
946 }
947 991
948 nvgpu_log_fn(g, "preempt id = %d, runlist_id = %d", id, runlist_id); 992 return ret;
949 993
950 nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock); 994}
995
996static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
997 unsigned int rc_type,
998 u32 runlists_mask)
999{
1000 struct tsg_gk20a *tsg = NULL;
1001 u32 rlid, tsgid;
1002 struct fifo_runlist_info_gk20a *runlist = NULL;
1003 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
1004 u32 mutex_ret = 0;
1005 bool add = false, wait_for_finish = false;
1006 int err;
951 1007
1008 nvgpu_err(g, "runlist id unknown, abort active tsgs in runlists");
1009
1010 /* runlist_lock are locked by teardown */
952 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 1011 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
953 1012
954 ret = __locked_fifo_preempt_ch_tsg(g, id, id_type, timeout_rc_type); 1013 for (rlid = 0; rlid < g->fifo.max_runlists;
1014 rlid++) {
1015 if (!(runlists_mask & BIT(rlid)))
1016 continue;
1017 nvgpu_log(g, gpu_dbg_info, "abort runlist id %d",
1018 rlid);
1019 runlist = &g->fifo.runlist_info[rlid];
1020
1021 for_each_set_bit(tsgid, runlist->active_tsgs,
1022 g->fifo.num_channels) {
1023 nvgpu_log(g, gpu_dbg_info, "abort tsg id %d", tsgid);
1024 tsg = &g->fifo.tsg[tsgid];
1025 gk20a_disable_tsg(tsg);
955 1026
956 if (!mutex_ret) 1027 /* assume all pbdma and eng faulted are set */
957 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 1028 nvgpu_log(g, gpu_dbg_info, "reset pbdma and eng faulted");
1029 gv11b_reset_pbdma_faulted_tsg(tsg);
1030 gv11b_reset_eng_faulted_tsg(tsg);
958 1031
959 nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock); 1032#ifdef CONFIG_GK20A_CTXSW_TRACE
1033 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1034#endif
1035 if (!g->fifo.deferred_reset_pending) {
1036 if (rc_type == RC_TYPE_MMU_FAULT) {
1037 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
1038 gk20a_fifo_error_tsg(g, tsg);
1039 }
1040 }
960 1041
961 return ret; 1042 /* (chid == ~0 && !add) remove all act ch from runlist*/
1043 err = gk20a_fifo_update_runlist_locked(g, rlid,
1044 FIFO_INVAL_CHANNEL_ID, add, wait_for_finish);
1045 if (err)
1046 nvgpu_err(g, "runlist id %d is not cleaned up",
1047 rlid);
962 1048
1049 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1050
1051 nvgpu_log(g, gpu_dbg_info, "aborted tsg id %d", tsgid);
1052 }
1053 }
1054 if (!mutex_ret)
1055 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
963} 1056}
964 1057
965void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask, 1058void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
@@ -967,10 +1060,66 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
967 struct mmu_fault_info *mmfault) 1060 struct mmu_fault_info *mmfault)
968{ 1061{
969 struct tsg_gk20a *tsg = NULL; 1062 struct tsg_gk20a *tsg = NULL;
970 struct channel_gk20a *refch = NULL; 1063 u32 runlists_mask, rlid;
971 u32 runlists_mask, runlist_id;
972 struct fifo_runlist_info_gk20a *runlist = NULL; 1064 struct fifo_runlist_info_gk20a *runlist = NULL;
973 u32 engine_id, client_type = ~0; 1065 u32 engine_id, client_type = ~0;
1066 struct fifo_gk20a *f = &g->fifo;
1067 u32 runlist_id = FIFO_INVAL_RUNLIST_ID;
1068 u32 num_runlists = 0;
1069
1070 nvgpu_log_fn(g, "acquire runlist_lock for all runlists");
1071 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
1072 nvgpu_mutex_acquire(&f->runlist_info[rlid].
1073 runlist_lock);
1074
1075 /* get runlist id and tsg */
1076 if (id_type == ID_TYPE_TSG) {
1077 if (id != FIFO_INVAL_TSG_ID) {
1078 tsg = &g->fifo.tsg[id];
1079 runlist_id = tsg->runlist_id;
1080 if (runlist_id != FIFO_INVAL_RUNLIST_ID)
1081 num_runlists++;
1082 else
1083 nvgpu_log_fn(g, "tsg runlist id is invalid");
1084 } else {
1085 nvgpu_log_fn(g, "id type is tsg but tsg id is inval");
1086 }
1087 } else {
1088 /*
1089 * id type is unknown, get runlist_id if eng mask is such that
1090 * it corresponds to single runlist id. If eng mask corresponds
1091 * to multiple runlists, then abort all runlists
1092 */
1093 for (rlid = 0; rlid < f->max_runlists; rlid++) {
1094 if (act_eng_bitmask) {
1095 /* eng ids are known */
1096 runlist = &f->runlist_info[rlid];
1097 if (runlist->eng_bitmask & act_eng_bitmask) {
1098 runlist_id = rlid;
1099 num_runlists++;
1100 }
1101 } else {
1102 break;
1103 }
1104 }
1105 if (num_runlists > 1 ) /* abort all runlists */
1106 runlist_id = FIFO_INVAL_RUNLIST_ID;
1107 }
1108
1109 /* if runlist_id is valid and there is only single runlist to be
1110 * aborted, release runlist lock that are not
1111 * needed for this recovery
1112 */
1113 if (runlist_id != FIFO_INVAL_RUNLIST_ID && num_runlists == 1) {
1114 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1115 if (rlid != runlist_id) {
1116 nvgpu_log_fn(g, "release runlist_lock for "
1117 "unused runlist id: %d", rlid);
1118 nvgpu_mutex_release(&f->runlist_info[rlid].
1119 runlist_lock);
1120 }
1121 }
1122 }
974 1123
975 nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, " 1124 nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, "
976 "act_eng_bitmask = 0x%x, mmfault ptr = 0x%p", 1125 "act_eng_bitmask = 0x%x, mmfault ptr = 0x%p",
@@ -979,6 +1128,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
979 runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id, 1128 runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
980 id_type, rc_type, mmfault); 1129 id_type, rc_type, mmfault);
981 1130
1131 /* Disable runlist scheduler */
982 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED); 1132 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED);
983 1133
984 g->fifo.deferred_reset_pending = false; 1134 g->fifo.deferred_reset_pending = false;
@@ -1000,41 +1150,41 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1000 1150
1001 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); 1151 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
1002 1152
1003 /* Get tsg/ch */
1004 if (rc_type == RC_TYPE_MMU_FAULT) { 1153 if (rc_type == RC_TYPE_MMU_FAULT) {
1005 gk20a_debug_dump(g); 1154 gk20a_debug_dump(g);
1006 refch = mmfault->refch;
1007 client_type = mmfault->client_type; 1155 client_type = mmfault->client_type;
1008 gv11b_fifo_reset_pbdma_and_eng_faulted(g, refch, 1156 gv11b_fifo_reset_pbdma_and_eng_faulted(g, tsg,
1009 mmfault->faulted_pbdma, 1157 mmfault->faulted_pbdma,
1010 mmfault->faulted_engine); 1158 mmfault->faulted_engine);
1011 } 1159 }
1012 1160
1013 if (id_type == ID_TYPE_TSG) {
1014 tsg = &g->fifo.tsg[id];
1015 } else if (id_type == ID_TYPE_CHANNEL) {
1016 if (refch == NULL)
1017 refch = gk20a_channel_get(&g->fifo.channel[id]);
1018 }
1019 /* Disable tsg/ch */
1020 if (tsg) 1161 if (tsg)
1021 gk20a_disable_tsg(tsg); 1162 gk20a_disable_tsg(tsg);
1022 else if (refch)
1023 g->ops.fifo.disable_channel(refch);
1024 1163
1025 /* Preempt tsg/ch */ 1164 /*
1026 if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) { 1165 * Even though TSG preempt timed out, the RC sequence would by design
1027 g->ops.fifo.preempt_ch_tsg(g, id, id_type, 1166 * require s/w to issue another preempt.
1028 PREEMPT_TIMEOUT_NORC); 1167 * If recovery includes an ENGINE_RESET, to not have race conditions,
1168 * use RUNLIST_PREEMPT to kick all work off, and cancel any context
1169 * load which may be pending. This is also needed to make sure
1170 * that all PBDMAs serving the engine are not loaded when engine is
1171 * reset.
1172 */
1173 if (tsg) {
1174 int preempt_failed;
1175
1176 preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type);
1177 if (preempt_failed)
1178 gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
1029 } else { 1179 } else {
1030 gv11b_fifo_preempt_runlists(g, runlists_mask); 1180 gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
1031 } 1181 }
1032 1182
1033 /* check if engine reset should be deferred */ 1183 /* check if engine reset should be deferred */
1034 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 1184 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1035 1185
1036 runlist = &g->fifo.runlist_info[runlist_id]; 1186 runlist = &g->fifo.runlist_info[rlid];
1037 if ((runlists_mask & BIT(runlist_id)) && 1187 if ((runlists_mask & BIT(rlid)) &&
1038 runlist->reset_eng_bitmask) { 1188 runlist->reset_eng_bitmask) {
1039 1189
1040 unsigned long __reset_eng_bitmask = 1190 unsigned long __reset_eng_bitmask =
@@ -1042,7 +1192,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1042 1192
1043 for_each_set_bit(engine_id, &__reset_eng_bitmask, 1193 for_each_set_bit(engine_id, &__reset_eng_bitmask,
1044 g->fifo.max_engines) { 1194 g->fifo.max_engines) {
1045 if ((refch || tsg) && 1195 if (tsg &&
1046 gk20a_fifo_should_defer_engine_reset(g, 1196 gk20a_fifo_should_defer_engine_reset(g,
1047 engine_id, client_type, false)) { 1197 engine_id, client_type, false)) {
1048 1198
@@ -1074,13 +1224,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1074 } 1224 }
1075 1225
1076#ifdef CONFIG_GK20A_CTXSW_TRACE 1226#ifdef CONFIG_GK20A_CTXSW_TRACE
1077 /* tsg and refch both could be valid for mmu fault. Check tsg first */
1078 if (tsg) 1227 if (tsg)
1079 gk20a_ctxsw_trace_tsg_reset(g, tsg); 1228 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1080 else if (refch)
1081 gk20a_ctxsw_trace_channel_reset(g, refch);
1082#endif 1229#endif
1083
1084 if (tsg) { 1230 if (tsg) {
1085 if (g->fifo.deferred_reset_pending) { 1231 if (g->fifo.deferred_reset_pending) {
1086 gk20a_disable_tsg(tsg); 1232 gk20a_disable_tsg(tsg);
@@ -1090,26 +1236,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1090 1236
1091 gk20a_fifo_abort_tsg(g, tsg->tsgid, false); 1237 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1092 } 1238 }
1093 if (refch)
1094 gk20a_channel_put(refch);
1095 } else if (refch) {
1096 if (g->fifo.deferred_reset_pending) {
1097 g->ops.fifo.disable_channel(refch);
1098 } else {
1099 if (rc_type == RC_TYPE_MMU_FAULT)
1100 gk20a_fifo_set_ctx_mmu_error_ch(g, refch);
1101
1102 gk20a_channel_abort(refch, false);
1103 }
1104 gk20a_channel_put(refch);
1105 } else { 1239 } else {
1106 nvgpu_err(g, "id unknown, abort runlist"); 1240 gv11b_fifo_locked_abort_runlist_active_tsgs(g, rc_type,
1107 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; 1241 runlists_mask);
1108 runlist_id++) {
1109 if (runlists_mask & BIT(runlist_id))
1110 g->ops.fifo.update_runlist(g, runlist_id,
1111 FIFO_INVAL_CHANNEL_ID, false, true);
1112 }
1113 } 1242 }
1114 1243
1115 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED); 1244 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED);
@@ -1117,6 +1246,18 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1117 /* It is safe to enable ELPG again. */ 1246 /* It is safe to enable ELPG again. */
1118 if (g->support_pmu && g->elpg_enabled) 1247 if (g->support_pmu && g->elpg_enabled)
1119 nvgpu_pmu_enable_elpg(g); 1248 nvgpu_pmu_enable_elpg(g);
1249
1250 /* release runlist_lock */
1251 if (runlist_id != FIFO_INVAL_RUNLIST_ID) {
1252 nvgpu_log_fn(g, "release runlist_lock runlist_id = %d",
1253 runlist_id);
1254 nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
1255 } else {
1256 nvgpu_log_fn(g, "release runlist_lock for all runlists");
1257 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
1258 nvgpu_mutex_release(&f->runlist_info[rlid].
1259 runlist_lock);
1260 }
1120} 1261}
1121 1262
1122void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f) 1263void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f)
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
index 1ae3c93e..aee7aef2 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -50,10 +50,13 @@
50 50
51#define CHANNEL_INFO_VEID0 0 51#define CHANNEL_INFO_VEID0 0
52 52
53#define MAX_PRE_SI_RETRIES 200000 /* 1G/500KHz * 100 */
54#define PREEMPT_TIMEOUT_1000_MS 1000
55
53struct gpu_ops; 56struct gpu_ops;
54 57
55void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, 58void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
56 struct channel_gk20a *refch, 59 struct tsg_gk20a *tsg,
57 u32 faulted_pbdma, u32 faulted_engine); 60 u32 faulted_pbdma, u32 faulted_engine);
58void gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(struct gk20a *g, 61void gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(struct gk20a *g,
59 u32 mmu_fault_id, u32 *active_engine_id, u32 *veid, u32 *pbdma_id); 62 u32 mmu_fault_id, u32 *active_engine_id, u32 *veid, u32 *pbdma_id);
@@ -78,12 +81,11 @@ void gv11b_dump_eng_status(struct gk20a *g,
78u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g); 81u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g);
79int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next); 82int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
80int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id, 83int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
81 unsigned int id_type, unsigned int timeout_rc_type); 84 unsigned int id_type);
82int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid); 85int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid);
83int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid); 86int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid);
84int gv11b_fifo_enable_tsg(struct tsg_gk20a *tsg); 87int gv11b_fifo_enable_tsg(struct tsg_gk20a *tsg);
85int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, 88int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, unsigned int id_type);
86 unsigned int id_type, unsigned int timeout_rc_type);
87void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask, 89void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
88 u32 id, unsigned int id_type, unsigned int rc_type, 90 u32 id, unsigned int id_type, unsigned int rc_type,
89 struct mmu_fault_info *mmfault); 91 struct mmu_fault_info *mmfault);
diff --git a/drivers/gpu/nvgpu/gv11b/mc_gv11b.c b/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
index 64680fc6..bc802c2d 100644
--- a/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
@@ -66,24 +66,24 @@ bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0)
66 return (((mc_intr_0 & mc_intr_hub_pending_f()) != 0U) ? true : false); 66 return (((mc_intr_0 & mc_intr_hub_pending_f()) != 0U) ? true : false);
67} 67}
68 68
69bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id) 69bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
70 u32 *eng_intr_pending)
70{ 71{
71 u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0)); 72 u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
72 u32 stall_intr, eng_intr_mask; 73 u32 stall_intr, eng_intr_mask;
73 74
74 eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id); 75 eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id);
75 if ((mc_intr_0 & eng_intr_mask) != 0U) { 76 *eng_intr_pending = mc_intr_0 & eng_intr_mask;
76 return true;
77 }
78 77
79 stall_intr = mc_intr_pfifo_pending_f() | 78 stall_intr = mc_intr_pfifo_pending_f() |
80 mc_intr_hub_pending_f() | 79 mc_intr_hub_pending_f() |
81 mc_intr_priv_ring_pending_f() | 80 mc_intr_priv_ring_pending_f() |
82 mc_intr_pbus_pending_f() | 81 mc_intr_pbus_pending_f() |
83 mc_intr_ltc_pending_f(); 82 mc_intr_ltc_pending_f();
84 if ((mc_intr_0 & stall_intr) != 0U) {
85 return true;
86 }
87 83
88 return false; 84 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
85 "mc_intr_0 = 0x%08x, eng_intr = 0x%08x",
86 mc_intr_0 & stall_intr, *eng_intr_pending);
87
88 return (mc_intr_0 & (eng_intr_mask | stall_intr)) != 0U;
89} 89}
diff --git a/drivers/gpu/nvgpu/gv11b/mc_gv11b.h b/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
index eb9d0e4e..faa4d38d 100644
--- a/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
@@ -26,5 +26,6 @@ struct gk20a;
26 26
27void mc_gv11b_intr_enable(struct gk20a *g); 27void mc_gv11b_intr_enable(struct gk20a *g);
28bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0); 28bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0);
29bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id); 29bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
30 u32 *eng_intr_pending);
30#endif 31#endif