summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-02-23 16:00:00 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-24 12:53:44 -0400
commitcd6e821cf66837a2c3479e928414007064b9c496 (patch)
tree18e5cfde24246342b05e8431ba8b816de9ec407b /drivers
parent5cf1eb145fef763f7153e449be60f1a7602e2c81 (diff)
gpu: nvgpu: gv11b: add runlist abort & remove bare channel
-Add support for aborting runlist/s. Aborting runlist/s, will abort all active tsgs and associated active channels within these active tsgs -Bare channels are no longer supported. Remove recovery support for bare channels. In case there are bare channels, recovery will trigger runlist abort Bug 2125776 Bug 2108544 Bug 2105322 Bug 2092051 Bug 2048824 Bug 2043838 Bug 2039587 Bug 2028993 Bug 2029245 Bug 2065990 Bug 1945121 Bug 200401707 Bug 200393631 Bug 200327596 Change-Id: I6bec8a0004508cf65ea128bf641a26bf4c2f236d Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1640567 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gv11b/fb_gv11b.c79
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c149
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.h2
5 files changed, 135 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index c8789c3a..dc1f48b7 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -55,9 +55,7 @@
55#define FECS_METHOD_WFI_RESTORE 0x80000 55#define FECS_METHOD_WFI_RESTORE 0x80000
56#define FECS_MAILBOX_0_ACK_RESTORE 0x4 56#define FECS_MAILBOX_0_ACK_RESTORE 0x4
57 57
58static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 58
59 u32 chid, bool add,
60 bool wait_for_finish);
61static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); 59static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
62 60
63static const char *const pbdma_intr_fault_type_desc[] = { 61static const char *const pbdma_intr_fault_type_desc[] = {
@@ -3275,7 +3273,7 @@ void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
3275 fifo_eng_runlist_length_f(count)); 3273 fifo_eng_runlist_length_f(count));
3276} 3274}
3277 3275
3278static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 3276int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3279 u32 chid, bool add, 3277 u32 chid, bool add,
3280 bool wait_for_finish) 3278 bool wait_for_finish)
3281{ 3279{
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index d6e759ac..77030c94 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -254,6 +254,9 @@ int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid, 254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
255 bool add, bool wait_for_finish); 255 bool add, bool wait_for_finish);
256 256
257int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
258 u32 chid, bool add,
259 bool wait_for_finish);
257int gk20a_fifo_suspend(struct gk20a *g); 260int gk20a_fifo_suspend(struct gk20a *g);
258 261
259bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); 262bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index 54f0d2d8..2ceb816b 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -870,10 +870,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
870static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, 870static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
871 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val) 871 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
872{ 872{
873 unsigned int id_type; 873 unsigned int id_type = ID_TYPE_UNKNOWN;
874 u32 num_lce, act_eng_bitmask = 0; 874 u32 num_lce, act_eng_bitmask = 0;
875 int err = 0; 875 int err = 0;
876 u32 id = ((u32)~0); 876 u32 id = FIFO_INVAL_TSG_ID;
877 unsigned int rc_type = RC_TYPE_NO_RC;
877 878
878 if (!mmfault->valid) 879 if (!mmfault->valid)
879 return; 880 return;
@@ -888,18 +889,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
888 /* CE page faults are not reported as replayable */ 889 /* CE page faults are not reported as replayable */
889 nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); 890 nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
890 err = gv11b_fb_fix_page_fault(g, mmfault); 891 err = gv11b_fb_fix_page_fault(g, mmfault);
891 gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch, 892 if (mmfault->refch &&
892 mmfault->faulted_pbdma, mmfault->faulted_engine); 893 (u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
894 gv11b_fifo_reset_pbdma_and_eng_faulted(g,
895 &g->fifo.tsg[mmfault->refch->tsgid],
896 mmfault->faulted_pbdma,
897 mmfault->faulted_engine);
898 }
893 if (!err) { 899 if (!err) {
894 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed"); 900 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
895 *invalidate_replay_val = 0; 901 *invalidate_replay_val = 0;
896 /* refch in mmfault is assigned at the time of copying 902 if (mmfault->refch) {
897 * fault info from snap reg or bar2 fault buf 903 gk20a_channel_put(mmfault->refch);
898 */ 904 mmfault->refch = NULL;
899 gk20a_channel_put(mmfault->refch); 905 }
900 return; 906 return;
901 } 907 }
902 /* Do recovery. Channel recovery needs refch */ 908 /* Do recovery */
903 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed"); 909 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
904 } 910 }
905 911
@@ -911,16 +917,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
911 * instance block, the fault cannot be isolated to a 917 * instance block, the fault cannot be isolated to a
912 * single context so we need to reset the entire runlist 918 * single context so we need to reset the entire runlist
913 */ 919 */
914 id_type = ID_TYPE_UNKNOWN; 920 rc_type = RC_TYPE_MMU_FAULT;
915 921
916 } else if (mmfault->refch) { 922 } else if (mmfault->refch) {
917 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
918 id = mmfault->refch->tsgid;
919 id_type = ID_TYPE_TSG;
920 } else {
921 id = mmfault->chid;
922 id_type = ID_TYPE_CHANNEL;
923 }
924 if (mmfault->refch->mmu_nack_handled) { 923 if (mmfault->refch->mmu_nack_handled) {
925 /* We have already recovered for the same 924 /* We have already recovered for the same
926 * context, skip doing another recovery. 925 * context, skip doing another recovery.
@@ -941,19 +940,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
941 */ 940 */
942 gk20a_channel_put(mmfault->refch); 941 gk20a_channel_put(mmfault->refch);
943 return; 942 return;
943 } else {
944 /* Indicate recovery is handled if mmu fault is
945 * a result of mmu nack.
946 */
947 mmfault->refch->mmu_nack_handled = true;
948 }
949
950 rc_type = RC_TYPE_MMU_FAULT;
951 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
952 id = mmfault->refch->tsgid;
953 if (id != FIFO_INVAL_TSG_ID)
954 id_type = ID_TYPE_TSG;
955 } else {
956 nvgpu_err(g, "bare channels not supported");
944 } 957 }
945 } else {
946 id_type = ID_TYPE_UNKNOWN;
947 } 958 }
948 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) 959
960 /* engine is faulted */
961 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
949 act_eng_bitmask = BIT(mmfault->faulted_engine); 962 act_eng_bitmask = BIT(mmfault->faulted_engine);
963 rc_type = RC_TYPE_MMU_FAULT;
964 }
950 965
951 /* Indicate recovery is handled if mmu fault is a result of 966 /* refch in mmfault is assigned at the time of copying
952 * mmu nack. 967 * fault info from snap reg or bar2 fault buf
953 */ 968 */
954 mmfault->refch->mmu_nack_handled = true; 969 if (mmfault->refch) {
955 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, 970 gk20a_channel_put(mmfault->refch);
956 id, id_type, RC_TYPE_MMU_FAULT, mmfault); 971 mmfault->refch = NULL;
972 }
973
974 if (rc_type != RC_TYPE_NO_RC)
975 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
976 id, id_type, rc_type, mmfault);
957 } else { 977 } else {
958 if (mmfault->fault_type == gmmu_fault_type_pte_v()) { 978 if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
959 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix"); 979 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -972,7 +992,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
972 /* refch in mmfault is assigned at the time of copying 992 /* refch in mmfault is assigned at the time of copying
973 * fault info from snap reg or bar2 fault buf 993 * fault info from snap reg or bar2 fault buf
974 */ 994 */
975 gk20a_channel_put(mmfault->refch); 995 if (mmfault->refch) {
996 gk20a_channel_put(mmfault->refch);
997 mmfault->refch = NULL;
998 }
976 } 999 }
977} 1000}
978 1001
@@ -1061,8 +1084,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
1061 next_fault_addr = mmfault->fault_addr; 1084 next_fault_addr = mmfault->fault_addr;
1062 if (prev_fault_addr == next_fault_addr) { 1085 if (prev_fault_addr == next_fault_addr) {
1063 nvgpu_log(g, gpu_dbg_intr, "pte already scanned"); 1086 nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
1064 if (mmfault->refch) 1087 if (mmfault->refch) {
1065 gk20a_channel_put(mmfault->refch); 1088 gk20a_channel_put(mmfault->refch);
1089 mmfault->refch = NULL;
1090 }
1066 continue; 1091 continue;
1067 } 1092 }
1068 } 1093 }
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 8f0f6b0c..6df1d343 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -652,29 +652,19 @@ static void gv11b_reset_pbdma_faulted_tsg(struct tsg_gk20a *tsg)
652} 652}
653 653
654void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, 654void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
655 struct channel_gk20a *refch, 655 struct tsg_gk20a *tsg,
656 u32 faulted_pbdma, u32 faulted_engine) 656 u32 faulted_pbdma, u32 faulted_engine)
657{ 657{
658 struct tsg_gk20a *tsg; 658 if (!tsg)
659 return;
659 660
660 nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x", 661 nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x",
661 faulted_pbdma, faulted_engine); 662 faulted_pbdma, faulted_engine);
662 663
663 if (!refch) 664 if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
664 return; 665 gv11b_reset_pbdma_faulted_tsg(tsg);
665 666 if (faulted_engine != FIFO_INVAL_ENGINE_ID)
666 if (gk20a_is_channel_marked_as_tsg(refch)) { 667 gv11b_reset_eng_faulted_tsg(tsg);
667 tsg = &g->fifo.tsg[refch->tsgid];
668 if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
669 gv11b_reset_pbdma_faulted_tsg(tsg);
670 if (faulted_engine != FIFO_INVAL_ENGINE_ID)
671 gv11b_reset_eng_faulted_tsg(tsg);
672 } else {
673 if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
674 gv11b_reset_pbdma_faulted_ch(g, refch->chid);
675 if (faulted_engine != FIFO_INVAL_ENGINE_ID)
676 gv11b_reset_eng_faulted_ch(g, refch->chid);
677 }
678} 668}
679 669
680static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask, 670static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
@@ -992,12 +982,74 @@ int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
992 982
993} 983}
994 984
985static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
986 unsigned int rc_type,
987 u32 runlists_mask)
988{
989 bool verbose = false;
990 struct tsg_gk20a *tsg = NULL;
991 u32 rlid, tsgid;
992 struct fifo_runlist_info_gk20a *runlist = NULL;
993 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
994 u32 mutex_ret = 0;
995 bool add = false, wait_for_finish = false;
996 int err;
997
998 nvgpu_err(g, "runlist id unknown, abort active tsgs in runlists");
999
1000 /* runlist_lock are locked by teardown */
1001 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1002
1003 for (rlid = 0; rlid < g->fifo.max_runlists;
1004 rlid++) {
1005 if (!(runlists_mask & BIT(rlid)))
1006 continue;
1007 nvgpu_log(g, gpu_dbg_info, "abort runlist id %d",
1008 rlid);
1009 runlist = &g->fifo.runlist_info[rlid];
1010
1011 for_each_set_bit(tsgid, runlist->active_tsgs,
1012 g->fifo.num_channels) {
1013 nvgpu_log(g, gpu_dbg_info, "abort tsg id %d", tsgid);
1014 tsg = &g->fifo.tsg[tsgid];
1015 gk20a_disable_tsg(tsg);
1016
1017 /* assume all pbdma and eng faulted are set */
1018 nvgpu_log(g, gpu_dbg_info, "reset pbdma and eng faulted");
1019 gv11b_reset_pbdma_faulted_tsg(tsg);
1020 gv11b_reset_eng_faulted_tsg(tsg);
1021
1022#ifdef CONFIG_GK20A_CTXSW_TRACE
1023 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1024#endif
1025 if (!g->fifo.deferred_reset_pending) {
1026 if (rc_type == RC_TYPE_MMU_FAULT) {
1027 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
1028 verbose = gk20a_fifo_error_tsg(g, tsg);
1029 }
1030 }
1031
1032 /* (chid == ~0 && !add) remove all act ch from runlist*/
1033 err = gk20a_fifo_update_runlist_locked(g, rlid,
1034 FIFO_INVAL_CHANNEL_ID, add, wait_for_finish);
1035 if (err)
1036 nvgpu_err(g, "runlist id %d is not cleaned up",
1037 rlid);
1038
1039 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1040
1041 nvgpu_log(g, gpu_dbg_info, "aborted tsg id %d", tsgid);
1042 }
1043 }
1044 if (!mutex_ret)
1045 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1046}
1047
995void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask, 1048void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
996 u32 id, unsigned int id_type, unsigned int rc_type, 1049 u32 id, unsigned int id_type, unsigned int rc_type,
997 struct mmu_fault_info *mmfault) 1050 struct mmu_fault_info *mmfault)
998{ 1051{
999 struct tsg_gk20a *tsg = NULL; 1052 struct tsg_gk20a *tsg = NULL;
1000 struct channel_gk20a *refch = NULL;
1001 u32 runlists_mask, rlid; 1053 u32 runlists_mask, rlid;
1002 struct fifo_runlist_info_gk20a *runlist = NULL; 1054 struct fifo_runlist_info_gk20a *runlist = NULL;
1003 u32 engine_id, client_type = ~0; 1055 u32 engine_id, client_type = ~0;
@@ -1022,21 +1074,6 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1022 } else { 1074 } else {
1023 nvgpu_log_fn(g, "id type is tsg but tsg id is inval"); 1075 nvgpu_log_fn(g, "id type is tsg but tsg id is inval");
1024 } 1076 }
1025 } else if (id_type == ID_TYPE_CHANNEL) {
1026 if (id != FIFO_INVAL_CHANNEL_ID) {
1027 runlist_id = f->channel[id].runlist_id;
1028 if (runlist_id != FIFO_INVAL_RUNLIST_ID)
1029 num_runlists++;
1030 else
1031 nvgpu_log_fn(g, "ch runlist id is invalid");
1032
1033 if ((u32)f->channel[id].tsgid != FIFO_INVAL_TSG_ID)
1034 tsg = &f->tsg[f->channel[id].tsgid];
1035 else
1036 nvgpu_log_fn(g, "tsgid for ch is invalid");
1037 } else {
1038 nvgpu_log_fn(g, "id type is ch but ch id is inval");
1039 }
1040 } else { 1077 } else {
1041 /* 1078 /*
1042 * id type is unknown, get runlist_id if eng mask is such that 1079 * id type is unknown, get runlist_id if eng mask is such that
@@ -1103,27 +1140,16 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1103 1140
1104 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); 1141 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
1105 1142
1106 /* Get tsg/ch */
1107 if (rc_type == RC_TYPE_MMU_FAULT) { 1143 if (rc_type == RC_TYPE_MMU_FAULT) {
1108 gk20a_debug_dump(g); 1144 gk20a_debug_dump(g);
1109 refch = mmfault->refch;
1110 client_type = mmfault->client_type; 1145 client_type = mmfault->client_type;
1111 gv11b_fifo_reset_pbdma_and_eng_faulted(g, refch, 1146 gv11b_fifo_reset_pbdma_and_eng_faulted(g, tsg,
1112 mmfault->faulted_pbdma, 1147 mmfault->faulted_pbdma,
1113 mmfault->faulted_engine); 1148 mmfault->faulted_engine);
1114 } 1149 }
1115 1150
1116 if (id_type == ID_TYPE_TSG) {
1117 tsg = &g->fifo.tsg[id];
1118 } else if (id_type == ID_TYPE_CHANNEL) {
1119 if (refch == NULL)
1120 refch = gk20a_channel_get(&g->fifo.channel[id]);
1121 }
1122 /* Disable tsg/ch */
1123 if (tsg) 1151 if (tsg)
1124 gk20a_disable_tsg(tsg); 1152 gk20a_disable_tsg(tsg);
1125 else if (refch)
1126 g->ops.fifo.disable_channel(refch);
1127 1153
1128 /* 1154 /*
1129 * Even though TSG preempt timed out, the RC sequence would by design 1155 * Even though TSG preempt timed out, the RC sequence would by design
@@ -1134,7 +1160,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1134 * that all PBDMAs serving the engine are not loaded when engine is 1160 * that all PBDMAs serving the engine are not loaded when engine is
1135 * reset. 1161 * reset.
1136 */ 1162 */
1137 if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) { 1163 if (tsg) {
1138 int preempt_failed; 1164 int preempt_failed;
1139 1165
1140 preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type); 1166 preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type);
@@ -1156,7 +1182,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1156 1182
1157 for_each_set_bit(engine_id, &__reset_eng_bitmask, 1183 for_each_set_bit(engine_id, &__reset_eng_bitmask,
1158 g->fifo.max_engines) { 1184 g->fifo.max_engines) {
1159 if ((refch || tsg) && 1185 if (tsg &&
1160 gk20a_fifo_should_defer_engine_reset(g, 1186 gk20a_fifo_should_defer_engine_reset(g,
1161 engine_id, client_type, false)) { 1187 engine_id, client_type, false)) {
1162 1188
@@ -1188,13 +1214,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1188 } 1214 }
1189 1215
1190#ifdef CONFIG_GK20A_CTXSW_TRACE 1216#ifdef CONFIG_GK20A_CTXSW_TRACE
1191 /* tsg and refch both could be valid for mmu fault. Check tsg first */
1192 if (tsg) 1217 if (tsg)
1193 gk20a_ctxsw_trace_tsg_reset(g, tsg); 1218 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1194 else if (refch)
1195 gk20a_ctxsw_trace_channel_reset(g, refch);
1196#endif 1219#endif
1197
1198 if (tsg) { 1220 if (tsg) {
1199 if (g->fifo.deferred_reset_pending) { 1221 if (g->fifo.deferred_reset_pending) {
1200 gk20a_disable_tsg(tsg); 1222 gk20a_disable_tsg(tsg);
@@ -1204,26 +1226,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1204 1226
1205 gk20a_fifo_abort_tsg(g, tsg->tsgid, false); 1227 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1206 } 1228 }
1207 if (refch)
1208 gk20a_channel_put(refch);
1209 } else if (refch) {
1210 if (g->fifo.deferred_reset_pending) {
1211 g->ops.fifo.disable_channel(refch);
1212 } else {
1213 if (rc_type == RC_TYPE_MMU_FAULT)
1214 gk20a_fifo_set_ctx_mmu_error_ch(g, refch);
1215
1216 gk20a_channel_abort(refch, false);
1217 }
1218 gk20a_channel_put(refch);
1219 } else { 1229 } else {
1220 nvgpu_err(g, "id unknown, abort runlist"); 1230 gv11b_fifo_locked_abort_runlist_active_tsgs(g, rc_type,
1221 for (rlid = 0; rlid < g->fifo.max_runlists; 1231 runlists_mask);
1222 rlid++) {
1223 if (runlists_mask & BIT(rlid))
1224 g->ops.fifo.update_runlist(g, rlid,
1225 FIFO_INVAL_CHANNEL_ID, false, true);
1226 }
1227 } 1232 }
1228 1233
1229 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED); 1234 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED);
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
index 3dfc337c..aee7aef2 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -56,7 +56,7 @@
56struct gpu_ops; 56struct gpu_ops;
57 57
58void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, 58void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
59 struct channel_gk20a *refch, 59 struct tsg_gk20a *tsg,
60 u32 faulted_pbdma, u32 faulted_engine); 60 u32 faulted_pbdma, u32 faulted_engine);
61void gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(struct gk20a *g, 61void gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(struct gk20a *g,
62 u32 mmu_fault_id, u32 *active_engine_id, u32 *veid, u32 *pbdma_id); 62 u32 mmu_fault_id, u32 *active_engine_id, u32 *veid, u32 *pbdma_id);