summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-02-23 15:25:30 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-24 12:53:36 -0400
commita8d6f31bde3ccef22ee77023eaff4a62f6f88199 (patch)
tree2bf033b40e9a9d712d5d080bd5a9277da7f33eef /drivers
parent067ddbc4e4df3f1f756f03e7865c369a46f420aa (diff)
gpu: nvgpu: gv11b: acquire/release runlist_lock during teardown
Recovery can be called for various types of faults. Make sure current teardown is done before proceeding to next one. Bug 2125776 Bug 2108544 Bug 2105322 Bug 2092051 Bug 2048824 Bug 2043838 Bug 2039587 Bug 2028993 Bug 2029245 Bug 2065990 Bug 1945121 Bug 200401707 Bug 200393631 Bug 200327596 Change-Id: I2073cc4f659fcc2103cb1fc8b7d9e1b2f1fb466f Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1662681 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c179
1 files changed, 119 insertions, 60 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 4917f828..47c34d32 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -684,7 +684,7 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
684 u32 runlists_mask = 0; 684 u32 runlists_mask = 0;
685 struct fifo_gk20a *f = &g->fifo; 685 struct fifo_gk20a *f = &g->fifo;
686 struct fifo_runlist_info_gk20a *runlist; 686 struct fifo_runlist_info_gk20a *runlist;
687 u32 pbdma_bitmask = 0; 687 u32 rlid, pbdma_bitmask = 0;
688 688
689 if (id_type != ID_TYPE_UNKNOWN) { 689 if (id_type != ID_TYPE_UNKNOWN) {
690 if (id_type == ID_TYPE_TSG) 690 if (id_type == ID_TYPE_TSG)
@@ -699,31 +699,31 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
699 if (mmfault->faulted_pbdma != FIFO_INVAL_PBDMA_ID) 699 if (mmfault->faulted_pbdma != FIFO_INVAL_PBDMA_ID)
700 pbdma_bitmask = BIT(mmfault->faulted_pbdma); 700 pbdma_bitmask = BIT(mmfault->faulted_pbdma);
701 701
702 for (id = 0; id < f->max_runlists; id++) { 702 for (rlid = 0; rlid < f->max_runlists; rlid++) {
703 703
704 runlist = &f->runlist_info[id]; 704 runlist = &f->runlist_info[rlid];
705 705
706 if (runlist->eng_bitmask & act_eng_bitmask) 706 if (runlist->eng_bitmask & act_eng_bitmask)
707 runlists_mask |= 707 runlists_mask |=
708 fifo_sched_disable_runlist_m(id); 708 fifo_sched_disable_runlist_m(rlid);
709 709
710 if (runlist->pbdma_bitmask & pbdma_bitmask) 710 if (runlist->pbdma_bitmask & pbdma_bitmask)
711 runlists_mask |= 711 runlists_mask |=
712 fifo_sched_disable_runlist_m(id); 712 fifo_sched_disable_runlist_m(rlid);
713 } 713 }
714 } 714 }
715 715
716 if (id_type == ID_TYPE_UNKNOWN) { 716 if (id_type == ID_TYPE_UNKNOWN) {
717 for (id = 0; id < f->max_runlists; id++) { 717 for (rlid = 0; rlid < f->max_runlists; rlid++) {
718 if (act_eng_bitmask) { 718 if (act_eng_bitmask) {
719 /* eng ids are known */ 719 /* eng ids are known */
720 runlist = &f->runlist_info[id]; 720 runlist = &f->runlist_info[rlid];
721 if (runlist->eng_bitmask & act_eng_bitmask) 721 if (runlist->eng_bitmask & act_eng_bitmask)
722 runlists_mask |= 722 runlists_mask |=
723 fifo_sched_disable_runlist_m(id); 723 fifo_sched_disable_runlist_m(rlid);
724 } else { 724 } else {
725 runlists_mask |= 725 runlists_mask |=
726 fifo_sched_disable_runlist_m(id); 726 fifo_sched_disable_runlist_m(rlid);
727 } 727 }
728 } 728 }
729 } 729 }
@@ -905,40 +905,32 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
905 return ret; 905 return ret;
906} 906}
907 907
908static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask) 908static void gv11b_fifo_locked_preempt_runlists(struct gk20a *g, u32 runlists_mask)
909{ 909{
910 int ret = 0; 910 int ret = 0;
911 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 911 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
912 u32 mutex_ret = 0; 912 u32 mutex_ret = 0;
913 u32 runlist_id; 913 u32 rlid;
914
915 nvgpu_log_fn(g, " ");
916 914
917 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 915 /* runlist_lock are locked by teardown and sched are disabled too */
918 if (runlists_mask & fifo_runlist_preempt_runlist_m(runlist_id)) 916 nvgpu_log_fn(g, "preempt runlists_mask:0x%08x", runlists_mask);
919 nvgpu_mutex_acquire(&g->fifo.
920 runlist_info[runlist_id].runlist_lock);
921 }
922 917
923 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 918 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
924 919
925 ret = __locked_fifo_preempt_runlists(g, runlists_mask); 920 ret = __locked_fifo_preempt_runlists(g, runlists_mask);
926 921
927 if (!mutex_ret) 922 if (ret) {
928 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 923 /* if preempt timed out, reset engs served by runlists */
929 924 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
930 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 925 if (runlists_mask &
931 if (runlists_mask & 926 fifo_runlist_preempt_runlist_m(rlid))
932 fifo_runlist_preempt_runlist_m(runlist_id)) { 927 g->fifo.runlist_info[rlid].reset_eng_bitmask =
933 /* during recovery reset engs served by this runlist */ 928 g->fifo.runlist_info[rlid].eng_bitmask;
934 g->fifo.runlist_info[runlist_id].reset_eng_bitmask =
935 g->fifo.runlist_info[runlist_id].eng_bitmask;
936 nvgpu_mutex_release(&g->fifo.
937 runlist_info[runlist_id].runlist_lock);
938 } 929 }
939 } 930 }
940 931
941 return ret; 932 if (!mutex_ret)
933 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
942} 934}
943 935
944static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, 936static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
@@ -969,37 +961,20 @@ static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
969int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, 961int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
970 unsigned int id_type) 962 unsigned int id_type)
971{ 963{
972 struct fifo_gk20a *f = &g->fifo;
973 u32 ret = 0; 964 u32 ret = 0;
974 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 965 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
975 u32 mutex_ret = 0; 966 u32 mutex_ret = 0;
976 u32 runlist_id;
977
978 if (id_type == ID_TYPE_TSG)
979 runlist_id = f->tsg[id].runlist_id;
980 else if (id_type == ID_TYPE_CHANNEL)
981 runlist_id = f->channel[id].runlist_id;
982 else
983 return -EINVAL;
984
985 if (runlist_id >= g->fifo.max_runlists) {
986 nvgpu_log_info(g, "runlist_id = %d", runlist_id);
987 return -EINVAL;
988 }
989
990 nvgpu_log_fn(g, "preempt id = %d, runlist_id = %d", id, runlist_id);
991
992 nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock);
993 967
994 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 968 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
995 969 /*
970 * This is called from teardown path only. runlist_lock
971 * is already acquired before calling this function.
972 */
996 ret = __locked_fifo_preempt_ch_tsg(g, id, id_type); 973 ret = __locked_fifo_preempt_ch_tsg(g, id, id_type);
997 974
998 if (!mutex_ret) 975 if (!mutex_ret)
999 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); 976 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1000 977
1001 nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
1002
1003 return ret; 978 return ret;
1004 979
1005} 980}
@@ -1010,9 +985,81 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1010{ 985{
1011 struct tsg_gk20a *tsg = NULL; 986 struct tsg_gk20a *tsg = NULL;
1012 struct channel_gk20a *refch = NULL; 987 struct channel_gk20a *refch = NULL;
1013 u32 runlists_mask, runlist_id; 988 u32 runlists_mask, rlid;
1014 struct fifo_runlist_info_gk20a *runlist = NULL; 989 struct fifo_runlist_info_gk20a *runlist = NULL;
1015 u32 engine_id, client_type = ~0; 990 u32 engine_id, client_type = ~0;
991 struct fifo_gk20a *f = &g->fifo;
992 u32 runlist_id = FIFO_INVAL_RUNLIST_ID;
993 u32 num_runlists = 0;
994
995 nvgpu_log_fn(g, "acquire runlist_lock for all runlists");
996 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
997 nvgpu_mutex_acquire(&f->runlist_info[rlid].
998 runlist_lock);
999
1000 /* get runlist id and tsg */
1001 if (id_type == ID_TYPE_TSG) {
1002 if (id != FIFO_INVAL_TSG_ID) {
1003 tsg = &g->fifo.tsg[id];
1004 runlist_id = tsg->runlist_id;
1005 if (runlist_id != FIFO_INVAL_RUNLIST_ID)
1006 num_runlists++;
1007 else
1008 nvgpu_log_fn(g, "tsg runlist id is invalid");
1009 } else {
1010 nvgpu_log_fn(g, "id type is tsg but tsg id is inval");
1011 }
1012 } else if (id_type == ID_TYPE_CHANNEL) {
1013 if (id != FIFO_INVAL_CHANNEL_ID) {
1014 runlist_id = f->channel[id].runlist_id;
1015 if (runlist_id != FIFO_INVAL_RUNLIST_ID)
1016 num_runlists++;
1017 else
1018 nvgpu_log_fn(g, "ch runlist id is invalid");
1019
1020 if ((u32)f->channel[id].tsgid != FIFO_INVAL_TSG_ID)
1021 tsg = &f->tsg[f->channel[id].tsgid];
1022 else
1023 nvgpu_log_fn(g, "tsgid for ch is invalid");
1024 } else {
1025 nvgpu_log_fn(g, "id type is ch but ch id is inval");
1026 }
1027 } else {
1028 /*
1029 * id type is unknown, get runlist_id if eng mask is such that
1030 * it corresponds to single runlist id. If eng mask corresponds
1031 * to multiple runlists, then abort all runlists
1032 */
1033 for (rlid = 0; rlid < f->max_runlists; rlid++) {
1034 if (act_eng_bitmask) {
1035 /* eng ids are known */
1036 runlist = &f->runlist_info[rlid];
1037 if (runlist->eng_bitmask & act_eng_bitmask) {
1038 runlist_id = rlid;
1039 num_runlists++;
1040 }
1041 } else {
1042 break;
1043 }
1044 }
1045 if (num_runlists > 1 ) /* abort all runlists */
1046 runlist_id = FIFO_INVAL_RUNLIST_ID;
1047 }
1048
1049 /* if runlist_id is valid and there is only single runlist to be
1050 * aborted, release runlist lock that are not
1051 * needed for this recovery
1052 */
1053 if (runlist_id != FIFO_INVAL_RUNLIST_ID && num_runlists == 1) {
1054 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1055 if (rlid != runlist_id) {
1056 nvgpu_log_fn(g, "release runlist_lock for "
1057 "unused runlist id: %d", rlid);
1058 nvgpu_mutex_release(&f->runlist_info[rlid].
1059 runlist_lock);
1060 }
1061 }
1062 }
1016 1063
1017 nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, " 1064 nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, "
1018 "act_eng_bitmask = 0x%x, mmfault ptr = 0x%p", 1065 "act_eng_bitmask = 0x%x, mmfault ptr = 0x%p",
@@ -1068,14 +1115,14 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1068 if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) { 1115 if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) {
1069 g->ops.fifo.preempt_ch_tsg(g, id, id_type); 1116 g->ops.fifo.preempt_ch_tsg(g, id, id_type);
1070 } else { 1117 } else {
1071 gv11b_fifo_preempt_runlists(g, runlists_mask); 1118 gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
1072 } 1119 }
1073 1120
1074 /* check if engine reset should be deferred */ 1121 /* check if engine reset should be deferred */
1075 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) { 1122 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1076 1123
1077 runlist = &g->fifo.runlist_info[runlist_id]; 1124 runlist = &g->fifo.runlist_info[rlid];
1078 if ((runlists_mask & BIT(runlist_id)) && 1125 if ((runlists_mask & BIT(rlid)) &&
1079 runlist->reset_eng_bitmask) { 1126 runlist->reset_eng_bitmask) {
1080 1127
1081 unsigned long __reset_eng_bitmask = 1128 unsigned long __reset_eng_bitmask =
@@ -1145,10 +1192,10 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1145 gk20a_channel_put(refch); 1192 gk20a_channel_put(refch);
1146 } else { 1193 } else {
1147 nvgpu_err(g, "id unknown, abort runlist"); 1194 nvgpu_err(g, "id unknown, abort runlist");
1148 for (runlist_id = 0; runlist_id < g->fifo.max_runlists; 1195 for (rlid = 0; rlid < g->fifo.max_runlists;
1149 runlist_id++) { 1196 rlid++) {
1150 if (runlists_mask & BIT(runlist_id)) 1197 if (runlists_mask & BIT(rlid))
1151 g->ops.fifo.update_runlist(g, runlist_id, 1198 g->ops.fifo.update_runlist(g, rlid,
1152 FIFO_INVAL_CHANNEL_ID, false, true); 1199 FIFO_INVAL_CHANNEL_ID, false, true);
1153 } 1200 }
1154 } 1201 }
@@ -1158,6 +1205,18 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
1158 /* It is safe to enable ELPG again. */ 1205 /* It is safe to enable ELPG again. */
1159 if (g->support_pmu && g->elpg_enabled) 1206 if (g->support_pmu && g->elpg_enabled)
1160 nvgpu_pmu_enable_elpg(g); 1207 nvgpu_pmu_enable_elpg(g);
1208
1209 /* release runlist_lock */
1210 if (runlist_id != FIFO_INVAL_RUNLIST_ID) {
1211 nvgpu_log_fn(g, "release runlist_lock runlist_id = %d",
1212 runlist_id);
1213 nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
1214 } else {
1215 nvgpu_log_fn(g, "release runlist_lock for all runlists");
1216 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
1217 nvgpu_mutex_release(&f->runlist_info[rlid].
1218 runlist_lock);
1219 }
1161} 1220}
1162 1221
1163void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f) 1222void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f)