summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-11-16 16:46:11 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-28 12:46:46 -0500
commit3fbb44d7576238d42635e2ca6501a17cdc7306f7 (patch)
treee0182e16b6dd13838503d06b111c266c2f1d1b52 /drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
parent87f42744e0821d54eff7bf0bb863c7e53063e5c1 (diff)
gpu: nvgpu: gv11b: channel/tsg recovery reorged
Context TSG teardown procedure: 1. Disable scheduling for the engine's runlist via PFIFO_SCHED_DISABLE. This enables SW to determine whether a context has hung later in the process: otherwise, ongoing work on the runlist may keep ENG_STATUS from reaching a steady state. 2. Disable all channels in the TSG being torn down or submit a new runlist that does not contain the TSG. This is to prevent the TSG from being rescheduled once scheduling is reenabled in step 6. 3. a)Initiate a preempt of the TSG by writing NV_PFIFO_PREEMPT with the TSG's ID and the TYPE set to TSG if TSG id is known else do 3b b)Initiate a preempt of the engine by writing the bit associated with its runlist to NV_PFIFO_RUNLIST_PREEMPT. This allows us to begin the preempt process prior to doing the slow register reads needed to determine whether the context has hit any interrupts or is hung. Do not poll NV_PFIFO_RUNLIST_PREEMPT for the preempt to complete. 4. Check for preempt done 5. If a reset is needed as determined by step 4: a. Halt the memory interface for the engine (as per the relevant engine procedure). b. Reset the engine via PMC_ENABLE. c. Take the engine out of reset and reinit the engine (as per the relevant engine procedure) 6. Re-enable scheduling for the engine's runlist via PFIFO_SCHED_ENABLE. Bug 200277163 Change-Id: I1e945a2c6b9845f365d6952109f6803309aa2270 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1599841 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fifo_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c111
1 files changed, 60 insertions, 51 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 0238ae6c..ae2b6cfc 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -660,7 +660,7 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
660 } 660 }
661 } 661 }
662 } 662 }
663 gk20a_dbg_info("runlists_mask = %08x", runlists_mask); 663 nvgpu_log(g, gpu_dbg_info, "runlists_mask = 0x%08x", runlists_mask);
664 return runlists_mask; 664 return runlists_mask;
665} 665}
666 666
@@ -873,9 +873,14 @@ static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask)
873 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 873 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
874 874
875 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 875 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
876 if (runlists_mask & fifo_runlist_preempt_runlist_m(runlist_id)) 876 if (runlists_mask &
877 fifo_runlist_preempt_runlist_m(runlist_id)) {
878 /* during recovery reset engs served by this runlist */
879 g->fifo.runlist_info[runlist_id].reset_eng_bitmask =
880 g->fifo.runlist_info[runlist_id].eng_bitmask;
877 nvgpu_mutex_release(&g->fifo. 881 nvgpu_mutex_release(&g->fifo.
878 runlist_info[runlist_id].mutex); 882 runlist_info[runlist_id].mutex);
883 }
879 } 884 }
880 885
881 return ret; 886 return ret;
@@ -955,11 +960,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
955 struct fifo_runlist_info_gk20a *runlist = NULL; 960 struct fifo_runlist_info_gk20a *runlist = NULL;
956 u32 engine_id, client_type = ~0; 961 u32 engine_id, client_type = ~0;
957 962
958 gk20a_dbg_info("active engine ids bitmask =0x%x", act_eng_bitmask); 963 nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, "
959 gk20a_dbg_info("hw id =%d", id); 964 "act_eng_bitmask = 0x%x, mmfault ptr = 0x%p",
960 gk20a_dbg_info("id_type =%d", id_type); 965 id, id_type, rc_type, act_eng_bitmask, mmfault);
961 gk20a_dbg_info("rc_type =%d", rc_type);
962 gk20a_dbg_info("mmu_fault =0x%p", mmfault);
963 966
964 runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id, 967 runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
965 id_type, rc_type, mmfault); 968 id_type, rc_type, mmfault);
@@ -986,25 +989,29 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
986 989
987 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); 990 gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
988 991
989 if (rc_type == RC_TYPE_MMU_FAULT) 992 /* Get tsg/ch */
993 if (rc_type == RC_TYPE_MMU_FAULT) {
990 gk20a_debug_dump(g); 994 gk20a_debug_dump(g);
991
992 /* get the channel/TSG */
993 if (rc_type == RC_TYPE_MMU_FAULT && mmfault && mmfault->refch) {
994 refch = mmfault->refch; 995 refch = mmfault->refch;
995 client_type = mmfault->client_type; 996 client_type = mmfault->client_type;
996 if (gk20a_is_channel_marked_as_tsg(refch))
997 tsg = &g->fifo.tsg[refch->tsgid];
998 gv11b_fifo_reset_pbdma_and_eng_faulted(g, refch, 997 gv11b_fifo_reset_pbdma_and_eng_faulted(g, refch,
999 mmfault->faulted_pbdma, 998 mmfault->faulted_pbdma,
1000 mmfault->faulted_engine); 999 mmfault->faulted_engine);
1001 } else { 1000 }
1002 if (id_type == ID_TYPE_TSG) 1001
1003 tsg = &g->fifo.tsg[id]; 1002 if (id_type == ID_TYPE_TSG) {
1004 else if (id_type == ID_TYPE_CHANNEL) 1003 tsg = &g->fifo.tsg[id];
1004 } else if (id_type == ID_TYPE_CHANNEL) {
1005 if (refch == NULL)
1005 refch = gk20a_channel_get(&g->fifo.channel[id]); 1006 refch = gk20a_channel_get(&g->fifo.channel[id]);
1006 } 1007 }
1008 /* Disable tsg/ch */
1009 if (tsg)
1010 gk20a_disable_tsg(tsg);
1011 else if (refch)
1012 g->ops.fifo.disable_channel(refch);
1007 1013
1014 /* Preempt tsg/ch */
1008 if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) { 1015 if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) {
1009 g->ops.fifo.preempt_ch_tsg(g, id, id_type, 1016 g->ops.fifo.preempt_ch_tsg(g, id, id_type,
1010 PREEMPT_TIMEOUT_NORC); 1017 PREEMPT_TIMEOUT_NORC);
@@ -1012,35 +1019,6 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1012 gv11b_fifo_preempt_runlists(g, runlists_mask); 1019 gv11b_fifo_preempt_runlists(g, runlists_mask);
1013 } 1020 }
1014 1021
1015 if (tsg) {
1016 if (!g->fifo.deferred_reset_pending) {
1017 if (rc_type == RC_TYPE_MMU_FAULT) {
1018 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
1019 verbose = gk20a_fifo_error_tsg(g, tsg);
1020 }
1021 }
1022 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1023 if (refch)
1024 gk20a_channel_put(refch);
1025 } else if (refch) {
1026 if (!g->fifo.deferred_reset_pending) {
1027 if (rc_type == RC_TYPE_MMU_FAULT) {
1028 gk20a_fifo_set_ctx_mmu_error_ch(g, refch);
1029 verbose = gk20a_fifo_error_ch(g, refch);
1030 }
1031 }
1032 gk20a_channel_abort(refch, false);
1033 gk20a_channel_put(refch);
1034 } else {
1035 nvgpu_err(g, "id unknown, abort runlist");
1036 for (runlist_id = 0; runlist_id < g->fifo.max_runlists;
1037 runlist_id++) {
1038 if (runlists_mask & BIT(runlist_id))
1039 g->ops.fifo.update_runlist(g, runlist_id,
1040 FIFO_INVAL_CHANNEL_ID, false, true);
1041 }
1042 }
1043
1044 /* check if engine reset should be deferred */ 1022 /* check if engine reset should be deferred */
1045 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 1023 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
1046 1024
@@ -1051,7 +1029,8 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1051 unsigned long __reset_eng_bitmask = 1029 unsigned long __reset_eng_bitmask =
1052 runlist->reset_eng_bitmask; 1030 runlist->reset_eng_bitmask;
1053 1031
1054 for_each_set_bit(engine_id, &__reset_eng_bitmask, 32) { 1032 for_each_set_bit(engine_id, &__reset_eng_bitmask,
1033 g->fifo.max_engines) {
1055 if ((refch || tsg) && 1034 if ((refch || tsg) &&
1056 gk20a_fifo_should_defer_engine_reset(g, 1035 gk20a_fifo_should_defer_engine_reset(g,
1057 engine_id, client_type, false)) { 1036 engine_id, client_type, false)) {
@@ -1061,7 +1040,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1061 1040
1062 /* handled during channel free */ 1041 /* handled during channel free */
1063 g->fifo.deferred_reset_pending = true; 1042 g->fifo.deferred_reset_pending = true;
1064 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 1043 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
1065 "sm debugger attached," 1044 "sm debugger attached,"
1066 " deferring channel recovery to channel free"); 1045 " deferring channel recovery to channel free");
1067 } else { 1046 } else {
@@ -1084,12 +1063,42 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1084 } 1063 }
1085 1064
1086#ifdef CONFIG_GK20A_CTXSW_TRACE 1065#ifdef CONFIG_GK20A_CTXSW_TRACE
1087 if (refch) 1066 /* tsg and refch both could be valid for mmu fault. Check tsg first */
1088 gk20a_ctxsw_trace_channel_reset(g, refch); 1067 if (tsg)
1089 else if (tsg)
1090 gk20a_ctxsw_trace_tsg_reset(g, tsg); 1068 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1069 else if (refch)
1070 gk20a_ctxsw_trace_channel_reset(g, refch);
1091#endif 1071#endif
1092 1072
1073 if (tsg) {
1074 if (!g->fifo.deferred_reset_pending) {
1075 if (rc_type == RC_TYPE_MMU_FAULT) {
1076 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
1077 verbose = gk20a_fifo_error_tsg(g, tsg);
1078 }
1079 }
1080 gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
1081 if (refch)
1082 gk20a_channel_put(refch);
1083 } else if (refch) {
1084 if (!g->fifo.deferred_reset_pending) {
1085 if (rc_type == RC_TYPE_MMU_FAULT) {
1086 gk20a_fifo_set_ctx_mmu_error_ch(g, refch);
1087 verbose = gk20a_fifo_error_ch(g, refch);
1088 }
1089 }
1090 gk20a_channel_abort(refch, false);
1091 gk20a_channel_put(refch);
1092 } else {
1093 nvgpu_err(g, "id unknown, abort runlist");
1094 for (runlist_id = 0; runlist_id < g->fifo.max_runlists;
1095 runlist_id++) {
1096 if (runlists_mask & BIT(runlist_id))
1097 g->ops.fifo.update_runlist(g, runlist_id,
1098 FIFO_INVAL_CHANNEL_ID, false, true);
1099 }
1100 }
1101
1093 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED, 1102 gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED,
1094 !RUNLIST_INFO_MUTEX_LOCKED); 1103 !RUNLIST_INFO_MUTEX_LOCKED);
1095 1104