diff options
author | Seema Khowala <seemaj@nvidia.com> | 2018-02-23 16:00:00 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-24 12:53:44 -0400 |
commit | cd6e821cf66837a2c3479e928414007064b9c496 (patch) | |
tree | 18e5cfde24246342b05e8431ba8b816de9ec407b /drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |
parent | 5cf1eb145fef763f7153e449be60f1a7602e2c81 (diff) |
gpu: nvgpu: gv11b: add runlist abort & remove bare channel
-Add support for aborting runlist/s. Aborting runlist/s,
will abort all active tsgs and associated active channels
within these active tsgs
-Bare channels are no longer supported. Remove recovery
support for bare channels. In case there are bare
channels, recovery will trigger runlist abort
Bug 2125776
Bug 2108544
Bug 2105322
Bug 2092051
Bug 2048824
Bug 2043838
Bug 2039587
Bug 2028993
Bug 2029245
Bug 2065990
Bug 1945121
Bug 200401707
Bug 200393631
Bug 200327596
Change-Id: I6bec8a0004508cf65ea128bf641a26bf4c2f236d
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1640567
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fb_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fb_gv11b.c | 79 |
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c index 54f0d2d8..2ceb816b 100644 --- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |||
@@ -870,10 +870,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g, | |||
870 | static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | 870 | static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, |
871 | struct mmu_fault_info *mmfault, u32 *invalidate_replay_val) | 871 | struct mmu_fault_info *mmfault, u32 *invalidate_replay_val) |
872 | { | 872 | { |
873 | unsigned int id_type; | 873 | unsigned int id_type = ID_TYPE_UNKNOWN; |
874 | u32 num_lce, act_eng_bitmask = 0; | 874 | u32 num_lce, act_eng_bitmask = 0; |
875 | int err = 0; | 875 | int err = 0; |
876 | u32 id = ((u32)~0); | 876 | u32 id = FIFO_INVAL_TSG_ID; |
877 | unsigned int rc_type = RC_TYPE_NO_RC; | ||
877 | 878 | ||
878 | if (!mmfault->valid) | 879 | if (!mmfault->valid) |
879 | return; | 880 | return; |
@@ -888,18 +889,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | |||
888 | /* CE page faults are not reported as replayable */ | 889 | /* CE page faults are not reported as replayable */ |
889 | nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); | 890 | nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); |
890 | err = gv11b_fb_fix_page_fault(g, mmfault); | 891 | err = gv11b_fb_fix_page_fault(g, mmfault); |
891 | gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch, | 892 | if (mmfault->refch && |
892 | mmfault->faulted_pbdma, mmfault->faulted_engine); | 893 | (u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) { |
894 | gv11b_fifo_reset_pbdma_and_eng_faulted(g, | ||
895 | &g->fifo.tsg[mmfault->refch->tsgid], | ||
896 | mmfault->faulted_pbdma, | ||
897 | mmfault->faulted_engine); | ||
898 | } | ||
893 | if (!err) { | 899 | if (!err) { |
894 | nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed"); | 900 | nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed"); |
895 | *invalidate_replay_val = 0; | 901 | *invalidate_replay_val = 0; |
896 | /* refch in mmfault is assigned at the time of copying | 902 | if (mmfault->refch) { |
897 | * fault info from snap reg or bar2 fault buf | 903 | gk20a_channel_put(mmfault->refch); |
898 | */ | 904 | mmfault->refch = NULL; |
899 | gk20a_channel_put(mmfault->refch); | 905 | } |
900 | return; | 906 | return; |
901 | } | 907 | } |
902 | /* Do recovery. Channel recovery needs refch */ | 908 | /* Do recovery */ |
903 | nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed"); | 909 | nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed"); |
904 | } | 910 | } |
905 | 911 | ||
@@ -911,16 +917,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | |||
911 | * instance block, the fault cannot be isolated to a | 917 | * instance block, the fault cannot be isolated to a |
912 | * single context so we need to reset the entire runlist | 918 | * single context so we need to reset the entire runlist |
913 | */ | 919 | */ |
914 | id_type = ID_TYPE_UNKNOWN; | 920 | rc_type = RC_TYPE_MMU_FAULT; |
915 | 921 | ||
916 | } else if (mmfault->refch) { | 922 | } else if (mmfault->refch) { |
917 | if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) { | ||
918 | id = mmfault->refch->tsgid; | ||
919 | id_type = ID_TYPE_TSG; | ||
920 | } else { | ||
921 | id = mmfault->chid; | ||
922 | id_type = ID_TYPE_CHANNEL; | ||
923 | } | ||
924 | if (mmfault->refch->mmu_nack_handled) { | 923 | if (mmfault->refch->mmu_nack_handled) { |
925 | /* We have already recovered for the same | 924 | /* We have already recovered for the same |
926 | * context, skip doing another recovery. | 925 | * context, skip doing another recovery. |
@@ -941,19 +940,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | |||
941 | */ | 940 | */ |
942 | gk20a_channel_put(mmfault->refch); | 941 | gk20a_channel_put(mmfault->refch); |
943 | return; | 942 | return; |
943 | } else { | ||
944 | /* Indicate recovery is handled if mmu fault is | ||
945 | * a result of mmu nack. | ||
946 | */ | ||
947 | mmfault->refch->mmu_nack_handled = true; | ||
948 | } | ||
949 | |||
950 | rc_type = RC_TYPE_MMU_FAULT; | ||
951 | if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) { | ||
952 | id = mmfault->refch->tsgid; | ||
953 | if (id != FIFO_INVAL_TSG_ID) | ||
954 | id_type = ID_TYPE_TSG; | ||
955 | } else { | ||
956 | nvgpu_err(g, "bare channels not supported"); | ||
944 | } | 957 | } |
945 | } else { | ||
946 | id_type = ID_TYPE_UNKNOWN; | ||
947 | } | 958 | } |
948 | if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) | 959 | |
960 | /* engine is faulted */ | ||
961 | if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) { | ||
949 | act_eng_bitmask = BIT(mmfault->faulted_engine); | 962 | act_eng_bitmask = BIT(mmfault->faulted_engine); |
963 | rc_type = RC_TYPE_MMU_FAULT; | ||
964 | } | ||
950 | 965 | ||
951 | /* Indicate recovery is handled if mmu fault is a result of | 966 | /* refch in mmfault is assigned at the time of copying |
952 | * mmu nack. | 967 | * fault info from snap reg or bar2 fault buf |
953 | */ | 968 | */ |
954 | mmfault->refch->mmu_nack_handled = true; | 969 | if (mmfault->refch) { |
955 | g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, | 970 | gk20a_channel_put(mmfault->refch); |
956 | id, id_type, RC_TYPE_MMU_FAULT, mmfault); | 971 | mmfault->refch = NULL; |
972 | } | ||
973 | |||
974 | if (rc_type != RC_TYPE_NO_RC) | ||
975 | g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, | ||
976 | id, id_type, rc_type, mmfault); | ||
957 | } else { | 977 | } else { |
958 | if (mmfault->fault_type == gmmu_fault_type_pte_v()) { | 978 | if (mmfault->fault_type == gmmu_fault_type_pte_v()) { |
959 | nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix"); | 979 | nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix"); |
@@ -972,7 +992,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | |||
972 | /* refch in mmfault is assigned at the time of copying | 992 | /* refch in mmfault is assigned at the time of copying |
973 | * fault info from snap reg or bar2 fault buf | 993 | * fault info from snap reg or bar2 fault buf |
974 | */ | 994 | */ |
975 | gk20a_channel_put(mmfault->refch); | 995 | if (mmfault->refch) { |
996 | gk20a_channel_put(mmfault->refch); | ||
997 | mmfault->refch = NULL; | ||
998 | } | ||
976 | } | 999 | } |
977 | } | 1000 | } |
978 | 1001 | ||
@@ -1061,8 +1084,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g, | |||
1061 | next_fault_addr = mmfault->fault_addr; | 1084 | next_fault_addr = mmfault->fault_addr; |
1062 | if (prev_fault_addr == next_fault_addr) { | 1085 | if (prev_fault_addr == next_fault_addr) { |
1063 | nvgpu_log(g, gpu_dbg_intr, "pte already scanned"); | 1086 | nvgpu_log(g, gpu_dbg_intr, "pte already scanned"); |
1064 | if (mmfault->refch) | 1087 | if (mmfault->refch) { |
1065 | gk20a_channel_put(mmfault->refch); | 1088 | gk20a_channel_put(mmfault->refch); |
1089 | mmfault->refch = NULL; | ||
1090 | } | ||
1066 | continue; | 1091 | continue; |
1067 | } | 1092 | } |
1068 | } | 1093 | } |