summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-02-23 16:00:00 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-24 12:53:44 -0400
commitcd6e821cf66837a2c3479e928414007064b9c496 (patch)
tree18e5cfde24246342b05e8431ba8b816de9ec407b /drivers/gpu/nvgpu/gv11b/fb_gv11b.c
parent5cf1eb145fef763f7153e449be60f1a7602e2c81 (diff)
gpu: nvgpu: gv11b: add runlist abort & remove bare channel
-Add support for aborting runlist/s. Aborting runlist/s, will abort all active tsgs and associated active channels within these active tsgs -Bare channels are no longer supported. Remove recovery support for bare channels. In case there are bare channels, recovery will trigger runlist abort Bug 2125776 Bug 2108544 Bug 2105322 Bug 2092051 Bug 2048824 Bug 2043838 Bug 2039587 Bug 2028993 Bug 2029245 Bug 2065990 Bug 1945121 Bug 200401707 Bug 200393631 Bug 200327596 Change-Id: I6bec8a0004508cf65ea128bf641a26bf4c2f236d Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1640567 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fb_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fb_gv11b.c79
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index 54f0d2d8..2ceb816b 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -870,10 +870,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
870static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, 870static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
871 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val) 871 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
872{ 872{
873 unsigned int id_type; 873 unsigned int id_type = ID_TYPE_UNKNOWN;
874 u32 num_lce, act_eng_bitmask = 0; 874 u32 num_lce, act_eng_bitmask = 0;
875 int err = 0; 875 int err = 0;
876 u32 id = ((u32)~0); 876 u32 id = FIFO_INVAL_TSG_ID;
877 unsigned int rc_type = RC_TYPE_NO_RC;
877 878
878 if (!mmfault->valid) 879 if (!mmfault->valid)
879 return; 880 return;
@@ -888,18 +889,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
888 /* CE page faults are not reported as replayable */ 889 /* CE page faults are not reported as replayable */
889 nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); 890 nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
890 err = gv11b_fb_fix_page_fault(g, mmfault); 891 err = gv11b_fb_fix_page_fault(g, mmfault);
891 gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch, 892 if (mmfault->refch &&
892 mmfault->faulted_pbdma, mmfault->faulted_engine); 893 (u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
894 gv11b_fifo_reset_pbdma_and_eng_faulted(g,
895 &g->fifo.tsg[mmfault->refch->tsgid],
896 mmfault->faulted_pbdma,
897 mmfault->faulted_engine);
898 }
893 if (!err) { 899 if (!err) {
894 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed"); 900 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
895 *invalidate_replay_val = 0; 901 *invalidate_replay_val = 0;
896 /* refch in mmfault is assigned at the time of copying 902 if (mmfault->refch) {
897 * fault info from snap reg or bar2 fault buf 903 gk20a_channel_put(mmfault->refch);
898 */ 904 mmfault->refch = NULL;
899 gk20a_channel_put(mmfault->refch); 905 }
900 return; 906 return;
901 } 907 }
902 /* Do recovery. Channel recovery needs refch */ 908 /* Do recovery */
903 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed"); 909 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
904 } 910 }
905 911
@@ -911,16 +917,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
911 * instance block, the fault cannot be isolated to a 917 * instance block, the fault cannot be isolated to a
912 * single context so we need to reset the entire runlist 918 * single context so we need to reset the entire runlist
913 */ 919 */
914 id_type = ID_TYPE_UNKNOWN; 920 rc_type = RC_TYPE_MMU_FAULT;
915 921
916 } else if (mmfault->refch) { 922 } else if (mmfault->refch) {
917 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
918 id = mmfault->refch->tsgid;
919 id_type = ID_TYPE_TSG;
920 } else {
921 id = mmfault->chid;
922 id_type = ID_TYPE_CHANNEL;
923 }
924 if (mmfault->refch->mmu_nack_handled) { 923 if (mmfault->refch->mmu_nack_handled) {
925 /* We have already recovered for the same 924 /* We have already recovered for the same
926 * context, skip doing another recovery. 925 * context, skip doing another recovery.
@@ -941,19 +940,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
941 */ 940 */
942 gk20a_channel_put(mmfault->refch); 941 gk20a_channel_put(mmfault->refch);
943 return; 942 return;
943 } else {
944 /* Indicate recovery is handled if mmu fault is
945 * a result of mmu nack.
946 */
947 mmfault->refch->mmu_nack_handled = true;
948 }
949
950 rc_type = RC_TYPE_MMU_FAULT;
951 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
952 id = mmfault->refch->tsgid;
953 if (id != FIFO_INVAL_TSG_ID)
954 id_type = ID_TYPE_TSG;
955 } else {
956 nvgpu_err(g, "bare channels not supported");
944 } 957 }
945 } else {
946 id_type = ID_TYPE_UNKNOWN;
947 } 958 }
948 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) 959
960 /* engine is faulted */
961 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
949 act_eng_bitmask = BIT(mmfault->faulted_engine); 962 act_eng_bitmask = BIT(mmfault->faulted_engine);
963 rc_type = RC_TYPE_MMU_FAULT;
964 }
950 965
951 /* Indicate recovery is handled if mmu fault is a result of 966 /* refch in mmfault is assigned at the time of copying
952 * mmu nack. 967 * fault info from snap reg or bar2 fault buf
953 */ 968 */
954 mmfault->refch->mmu_nack_handled = true; 969 if (mmfault->refch) {
955 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, 970 gk20a_channel_put(mmfault->refch);
956 id, id_type, RC_TYPE_MMU_FAULT, mmfault); 971 mmfault->refch = NULL;
972 }
973
974 if (rc_type != RC_TYPE_NO_RC)
975 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
976 id, id_type, rc_type, mmfault);
957 } else { 977 } else {
958 if (mmfault->fault_type == gmmu_fault_type_pte_v()) { 978 if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
959 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix"); 979 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -972,7 +992,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
972 /* refch in mmfault is assigned at the time of copying 992 /* refch in mmfault is assigned at the time of copying
973 * fault info from snap reg or bar2 fault buf 993 * fault info from snap reg or bar2 fault buf
974 */ 994 */
975 gk20a_channel_put(mmfault->refch); 995 if (mmfault->refch) {
996 gk20a_channel_put(mmfault->refch);
997 mmfault->refch = NULL;
998 }
976 } 999 }
977} 1000}
978 1001
@@ -1061,8 +1084,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
1061 next_fault_addr = mmfault->fault_addr; 1084 next_fault_addr = mmfault->fault_addr;
1062 if (prev_fault_addr == next_fault_addr) { 1085 if (prev_fault_addr == next_fault_addr) {
1063 nvgpu_log(g, gpu_dbg_intr, "pte already scanned"); 1086 nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
1064 if (mmfault->refch) 1087 if (mmfault->refch) {
1065 gk20a_channel_put(mmfault->refch); 1088 gk20a_channel_put(mmfault->refch);
1089 mmfault->refch = NULL;
1090 }
1066 continue; 1091 continue;
1067 } 1092 }
1068 } 1093 }