diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2014-09-17 03:08:34 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:11:17 -0400 |
commit | 2f232348e686f92a6a34e5fa7b98884d4b48313b (patch) | |
tree | 4e2dc2229e22bf85ac5fdd9a4a37cab089f79bc4 /drivers | |
parent | e4a7bc1602cc9a041dabee4da4a16594f2f9552a (diff) |
gpu: nvgpu: handle MMU fault for TSG
- add support to handle MMU faults on a channel in TSG
- first get the ID and type of channel that engine is running
- if TSG, abort each channel in it
- if regular channel, abort that channel
- also, add two versions of API set_ctx_mmu_error(), one for
regular channel and another for TSG
Bug 1470692
Change-Id: Ia7b01b81739598459702ed172180adb00e345eba
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/497874
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 99 |
2 files changed, 68 insertions, 33 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index c174afcd..7a3132c9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -585,7 +585,7 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) | |||
585 | ch->error_notifier->info32 = error; | 585 | ch->error_notifier->info32 = error; |
586 | ch->error_notifier->status = 0xffff; | 586 | ch->error_notifier->status = 0xffff; |
587 | gk20a_err(dev_from_gk20a(ch->g), | 587 | gk20a_err(dev_from_gk20a(ch->g), |
588 | "error notifier set to %d\n", error); | 588 | "error notifier set to %d for ch %d\n", error, ch->hw_chid); |
589 | } | 589 | } |
590 | } | 590 | } |
591 | 591 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 7a6f5608..3f35e7b2 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -911,14 +911,12 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, | |||
911 | } | 911 | } |
912 | 912 | ||
913 | static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, | 913 | static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, |
914 | struct channel_gk20a *ch) { | 914 | struct channel_gk20a *ch) |
915 | { | ||
915 | bool verbose = true; | 916 | bool verbose = true; |
916 | if (!ch) | 917 | if (!ch) |
917 | return verbose; | 918 | return verbose; |
918 | 919 | ||
919 | gk20a_err(dev_from_gk20a(g), | ||
920 | "channel %d generated a mmu fault", | ||
921 | ch->hw_chid); | ||
922 | if (ch->error_notifier) { | 920 | if (ch->error_notifier) { |
923 | u32 err = ch->error_notifier->info32; | 921 | u32 err = ch->error_notifier->info32; |
924 | if (ch->error_notifier->status == 0xffff) { | 922 | if (ch->error_notifier->status == 0xffff) { |
@@ -944,6 +942,31 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, | |||
944 | return verbose; | 942 | return verbose; |
945 | } | 943 | } |
946 | 944 | ||
945 | static bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, | ||
946 | struct channel_gk20a *ch) | ||
947 | { | ||
948 | gk20a_err(dev_from_gk20a(g), | ||
949 | "channel %d generated a mmu fault", ch->hw_chid); | ||
950 | |||
951 | return gk20a_fifo_set_ctx_mmu_error(g, ch); | ||
952 | } | ||
953 | |||
954 | static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | ||
955 | struct tsg_gk20a *tsg) | ||
956 | { | ||
957 | bool ret = true; | ||
958 | struct channel_gk20a *ch = NULL; | ||
959 | |||
960 | gk20a_err(dev_from_gk20a(g), | ||
961 | "TSG %d generated a mmu fault", tsg->tsgid); | ||
962 | |||
963 | mutex_lock(&tsg->ch_list_lock); | ||
964 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) | ||
965 | ret = gk20a_fifo_set_ctx_mmu_error(g, ch); | ||
966 | mutex_unlock(&tsg->ch_list_lock); | ||
967 | |||
968 | return ret; | ||
969 | } | ||
947 | 970 | ||
948 | static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | 971 | static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) |
949 | { | 972 | { |
@@ -987,6 +1010,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
987 | u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); | 1010 | u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); |
988 | struct fifo_mmu_fault_info_gk20a f; | 1011 | struct fifo_mmu_fault_info_gk20a f; |
989 | struct channel_gk20a *ch = NULL; | 1012 | struct channel_gk20a *ch = NULL; |
1013 | struct tsg_gk20a *tsg = NULL; | ||
990 | 1014 | ||
991 | get_exception_mmu_fault_info(g, engine_mmu_id, &f); | 1015 | get_exception_mmu_fault_info(g, engine_mmu_id, &f); |
992 | trace_gk20a_mmu_fault(f.fault_hi_v, | 1016 | trace_gk20a_mmu_fault(f.fault_hi_v, |
@@ -1008,49 +1032,61 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
1008 | f.fault_type_v, f.fault_type_desc, | 1032 | f.fault_type_v, f.fault_type_desc, |
1009 | f.fault_info_v, f.inst_ptr); | 1033 | f.fault_info_v, f.inst_ptr); |
1010 | 1034 | ||
1011 | /* get the channel */ | 1035 | /* get the channel/TSG */ |
1012 | if (fake_fault) { | 1036 | if (fake_fault) { |
1013 | /* read and parse engine status */ | 1037 | /* read and parse engine status */ |
1014 | u32 status = gk20a_readl(g, | 1038 | u32 status = gk20a_readl(g, |
1015 | fifo_engine_status_r(engine_id)); | 1039 | fifo_engine_status_r(engine_id)); |
1016 | u32 ctx_status = | 1040 | u32 ctx_status = |
1017 | fifo_engine_status_ctx_status_v(status); | 1041 | fifo_engine_status_ctx_status_v(status); |
1018 | bool type_ch = fifo_pbdma_status_id_type_v(status) == | ||
1019 | fifo_pbdma_status_id_type_chid_v(); | ||
1020 | 1042 | ||
1021 | /* use next_id if context load is failing */ | 1043 | /* use next_id if context load is failing */ |
1022 | u32 id = (ctx_status == | 1044 | u32 id = (ctx_status == |
1023 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | 1045 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? |
1024 | fifo_engine_status_next_id_v(status) : | 1046 | fifo_engine_status_next_id_v(status) : |
1025 | fifo_engine_status_id_v(status); | 1047 | fifo_engine_status_id_v(status); |
1048 | u32 type = (ctx_status == | ||
1049 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | ||
1050 | fifo_engine_status_next_id_type_v(status) : | ||
1051 | fifo_engine_status_id_type_v(status); | ||
1026 | 1052 | ||
1027 | if (type_ch) { | 1053 | if (type == fifo_engine_status_id_type_tsgid_v()) |
1028 | ch = g->fifo.channel + id; | 1054 | tsg = &g->fifo.tsg[id]; |
1029 | } else { | 1055 | else if (type == fifo_engine_status_id_type_chid_v()) |
1030 | gk20a_err(dev_from_gk20a(g), "non-chid type not supported"); | 1056 | ch = &g->fifo.channel[id]; |
1031 | WARN_ON(1); | ||
1032 | } | ||
1033 | } else { | 1057 | } else { |
1034 | /* read channel based on instruction pointer */ | 1058 | /* read channel based on instruction pointer */ |
1035 | ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); | 1059 | ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); |
1036 | } | 1060 | } |
1037 | 1061 | ||
1038 | if (ch) { | 1062 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) |
1039 | /* check if engine reset should be deferred */ | 1063 | tsg = &g->fifo.tsg[ch->tsgid]; |
1040 | if (gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) { | ||
1041 | g->fifo.mmu_fault_engines = fault_id; | ||
1042 | 1064 | ||
1043 | /* handled during channel free */ | 1065 | /* check if engine reset should be deferred */ |
1044 | g->fifo.deferred_reset_pending = true; | 1066 | if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g, |
1045 | } else | 1067 | engine_id, &f, fake_fault)) { |
1046 | verbose = gk20a_fifo_set_ctx_mmu_error(g, ch); | 1068 | g->fifo.mmu_fault_engines = fault_id; |
1047 | 1069 | ||
1048 | if (ch->in_use) { | 1070 | /* handled during channel free */ |
1049 | /* disable the channel from hw and increment | 1071 | g->fifo.deferred_reset_pending = true; |
1050 | * syncpoints */ | 1072 | } |
1051 | gk20a_channel_abort(ch); | ||
1052 | } | ||
1053 | 1073 | ||
1074 | /* disable the channel/TSG from hw and increment | ||
1075 | * syncpoints */ | ||
1076 | if (tsg) { | ||
1077 | struct channel_gk20a *ch = NULL; | ||
1078 | if (!g->fifo.deferred_reset_pending) | ||
1079 | verbose = | ||
1080 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); | ||
1081 | mutex_lock(&tsg->ch_list_lock); | ||
1082 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) | ||
1083 | gk20a_channel_abort(ch); | ||
1084 | mutex_unlock(&tsg->ch_list_lock); | ||
1085 | } else if (ch) { | ||
1086 | if (!g->fifo.deferred_reset_pending) | ||
1087 | verbose = | ||
1088 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); | ||
1089 | gk20a_channel_abort(ch); | ||
1054 | } else if (f.inst_ptr == | 1090 | } else if (f.inst_ptr == |
1055 | g->mm.bar1.inst_block.cpu_pa) { | 1091 | g->mm.bar1.inst_block.cpu_pa) { |
1056 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); | 1092 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); |
@@ -1192,7 +1228,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) | |||
1192 | 1228 | ||
1193 | gk20a_channel_abort(ch); | 1229 | gk20a_channel_abort(ch); |
1194 | 1230 | ||
1195 | if (gk20a_fifo_set_ctx_mmu_error(g, ch)) | 1231 | if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) |
1196 | gk20a_debug_dump(g->dev); | 1232 | gk20a_debug_dump(g->dev); |
1197 | } | 1233 | } |
1198 | } | 1234 | } |
@@ -1206,13 +1242,12 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) | |||
1206 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; | 1242 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; |
1207 | struct channel_gk20a *ch; | 1243 | struct channel_gk20a *ch; |
1208 | 1244 | ||
1209 | mutex_lock(&tsg->ch_list_lock); | 1245 | if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) |
1210 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | 1246 | gk20a_debug_dump(g->dev); |
1211 | if (gk20a_fifo_set_ctx_mmu_error(g, ch)) | ||
1212 | gk20a_debug_dump(g->dev); | ||
1213 | 1247 | ||
1248 | mutex_lock(&tsg->ch_list_lock); | ||
1249 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) | ||
1214 | gk20a_channel_abort(ch); | 1250 | gk20a_channel_abort(ch); |
1215 | } | ||
1216 | mutex_unlock(&tsg->ch_list_lock); | 1251 | mutex_unlock(&tsg->ch_list_lock); |
1217 | } | 1252 | } |
1218 | } | 1253 | } |