summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/fb
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-06-27 01:57:02 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-19 16:54:26 -0400
commitb1d0d8ece83ba0aa7b1e7ea9062eedc5cd9e4e33 (patch)
tree5a88d345e23e05d3a3ca9018cedcf6b12958a20b /drivers/gpu/nvgpu/common/fb
parentd859c5f4a03b975dc493f72a35016e83adad279a (diff)
Revert "Revert: GV11B runlist preemption patches"
This reverts commit 0b02c8589dcc507865a8fd398431c45fbda2ba9c. Originally change was reverted as it was making ap_compute test on embedded-qnx-hv e3550-t194 fail. With fixes related to replacing tsg preempt with runlist preempt during teardown, preempt timeout set to 100 ms (earlier this was set to 1000ms for t194 and 3000ms for legacy chips) and not issuing preempt timeout recovery if preempt fails, helped resolve the issue. Bug 200426402 Change-Id: If9a68d028a155075444cc1bdf411057e3388d48e Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1762563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/fb')
-rw-r--r--drivers/gpu/nvgpu/common/fb/fb_gv11b.c79
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
index 69a71575..26dabd72 100644
--- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
@@ -792,10 +792,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
792static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, 792static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
793 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val) 793 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
794{ 794{
795 unsigned int id_type; 795 unsigned int id_type = ID_TYPE_UNKNOWN;
796 u32 num_lce, act_eng_bitmask = 0; 796 u32 num_lce, act_eng_bitmask = 0;
797 int err = 0; 797 int err = 0;
798 u32 id = ((u32)~0); 798 u32 id = FIFO_INVAL_TSG_ID;
799 unsigned int rc_type = RC_TYPE_NO_RC;
799 800
800 if (!mmfault->valid) 801 if (!mmfault->valid)
801 return; 802 return;
@@ -810,18 +811,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
810 /* CE page faults are not reported as replayable */ 811 /* CE page faults are not reported as replayable */
811 nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); 812 nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
812 err = gv11b_fb_fix_page_fault(g, mmfault); 813 err = gv11b_fb_fix_page_fault(g, mmfault);
813 gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch, 814 if (mmfault->refch &&
814 mmfault->faulted_pbdma, mmfault->faulted_engine); 815 (u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
816 gv11b_fifo_reset_pbdma_and_eng_faulted(g,
817 &g->fifo.tsg[mmfault->refch->tsgid],
818 mmfault->faulted_pbdma,
819 mmfault->faulted_engine);
820 }
815 if (!err) { 821 if (!err) {
816 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed"); 822 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
817 *invalidate_replay_val = 0; 823 *invalidate_replay_val = 0;
818 /* refch in mmfault is assigned at the time of copying 824 if (mmfault->refch) {
819 * fault info from snap reg or bar2 fault buf 825 gk20a_channel_put(mmfault->refch);
820 */ 826 mmfault->refch = NULL;
821 gk20a_channel_put(mmfault->refch); 827 }
822 return; 828 return;
823 } 829 }
824 /* Do recovery. Channel recovery needs refch */ 830 /* Do recovery */
825 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed"); 831 nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
826 } 832 }
827 833
@@ -833,16 +839,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
833 * instance block, the fault cannot be isolated to a 839 * instance block, the fault cannot be isolated to a
834 * single context so we need to reset the entire runlist 840 * single context so we need to reset the entire runlist
835 */ 841 */
836 id_type = ID_TYPE_UNKNOWN; 842 rc_type = RC_TYPE_MMU_FAULT;
837 843
838 } else if (mmfault->refch) { 844 } else if (mmfault->refch) {
839 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
840 id = mmfault->refch->tsgid;
841 id_type = ID_TYPE_TSG;
842 } else {
843 id = mmfault->chid;
844 id_type = ID_TYPE_CHANNEL;
845 }
846 if (mmfault->refch->mmu_nack_handled) { 845 if (mmfault->refch->mmu_nack_handled) {
847 /* We have already recovered for the same 846 /* We have already recovered for the same
848 * context, skip doing another recovery. 847 * context, skip doing another recovery.
@@ -863,19 +862,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
863 */ 862 */
864 gk20a_channel_put(mmfault->refch); 863 gk20a_channel_put(mmfault->refch);
865 return; 864 return;
865 } else {
866 /* Indicate recovery is handled if mmu fault is
867 * a result of mmu nack.
868 */
869 mmfault->refch->mmu_nack_handled = true;
870 }
871
872 rc_type = RC_TYPE_MMU_FAULT;
873 if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
874 id = mmfault->refch->tsgid;
875 if (id != FIFO_INVAL_TSG_ID)
876 id_type = ID_TYPE_TSG;
877 } else {
878 nvgpu_err(g, "bare channels not supported");
866 } 879 }
867 } else {
868 id_type = ID_TYPE_UNKNOWN;
869 } 880 }
870 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) 881
882 /* engine is faulted */
883 if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
871 act_eng_bitmask = BIT(mmfault->faulted_engine); 884 act_eng_bitmask = BIT(mmfault->faulted_engine);
885 rc_type = RC_TYPE_MMU_FAULT;
886 }
872 887
873 /* Indicate recovery is handled if mmu fault is a result of 888 /* refch in mmfault is assigned at the time of copying
874 * mmu nack. 889 * fault info from snap reg or bar2 fault buf
875 */ 890 */
876 mmfault->refch->mmu_nack_handled = true; 891 if (mmfault->refch) {
877 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, 892 gk20a_channel_put(mmfault->refch);
878 id, id_type, RC_TYPE_MMU_FAULT, mmfault); 893 mmfault->refch = NULL;
894 }
895
896 if (rc_type != RC_TYPE_NO_RC)
897 g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
898 id, id_type, rc_type, mmfault);
879 } else { 899 } else {
880 if (mmfault->fault_type == gmmu_fault_type_pte_v()) { 900 if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
881 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix"); 901 nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -894,7 +914,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
894 /* refch in mmfault is assigned at the time of copying 914 /* refch in mmfault is assigned at the time of copying
895 * fault info from snap reg or bar2 fault buf 915 * fault info from snap reg or bar2 fault buf
896 */ 916 */
897 gk20a_channel_put(mmfault->refch); 917 if (mmfault->refch) {
918 gk20a_channel_put(mmfault->refch);
919 mmfault->refch = NULL;
920 }
898 } 921 }
899} 922}
900 923
@@ -985,8 +1008,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
985 next_fault_addr = mmfault->fault_addr; 1008 next_fault_addr = mmfault->fault_addr;
986 if (prev_fault_addr == next_fault_addr) { 1009 if (prev_fault_addr == next_fault_addr) {
987 nvgpu_log(g, gpu_dbg_intr, "pte already scanned"); 1010 nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
988 if (mmfault->refch) 1011 if (mmfault->refch) {
989 gk20a_channel_put(mmfault->refch); 1012 gk20a_channel_put(mmfault->refch);
1013 mmfault->refch = NULL;
1014 }
990 continue; 1015 continue;
991 } 1016 }
992 } 1017 }