Revert "Revert: GV11B runlist preemption patches"

This reverts commit 0b02c8589dcc507865a8fd398431c45fbda2ba9c. Originally change was reverted as it was making ap_compute test on embedded-qnx-hv e3550-t194 fail. With fixes related to replacing tsg preempt with runlist preempt during teardown, preempt timeout set to 100 ms (earlier this was set to 1000ms for t194 and 3000ms for legacy chips) and not issuing preempt timeout recovery if preempt fails, helped resolve the issue. Bug 200426402 Change-Id: If9a68d028a155075444cc1bdf411057e3388d48e Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1762563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Seema Khowala <seemaj@nvidia.com> 2018-06-27 01:57:02 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-07-19 16:54:26 -0400
commit: b1d0d8ece83ba0aa7b1e7ea9062eedc5cd9e4e33 (patch)
tree: 5a88d345e23e05d3a3ca9018cedcf6b12958a20b /drivers/gpu/nvgpu/common/fb
parent: d859c5f4a03b975dc493f72a35016e83adad279a (diff)
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
index 69a71575..26dabd72 100644
--- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
@@ -792,10 +792,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
 static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
                 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
 {
-        unsigned int id_type;
+        unsigned int id_type = ID_TYPE_UNKNOWN;
        u32 num_lce, act_eng_bitmask = 0;
        int err = 0;
-        u32 id = ((u32)~0);
+        u32 id = FIFO_INVAL_TSG_ID;
+        unsigned int rc_type = RC_TYPE_NO_RC;
        if (!mmfault->valid)
                return;
@@ -810,18 +811,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
                /* CE page faults are not reported as replayable */
                nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
                err = gv11b_fb_fix_page_fault(g, mmfault);
-                gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch,
+                if (mmfault->refch &&
-                        mmfault->faulted_pbdma, mmfault->faulted_engine);
+                        (u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
+                        gv11b_fifo_reset_pbdma_and_eng_faulted(g,
+                                &g->fifo.tsg[mmfault->refch->tsgid],
+                                mmfault->faulted_pbdma,
+                                mmfault->faulted_engine);
+                }
                if (!err) {
                        nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
                        *invalidate_replay_val = 0;
-                        /* refch in mmfault is assigned at the time of copying
+                        if (mmfault->refch) {
-                         * fault info from snap reg or bar2 fault buf
+                                gk20a_channel_put(mmfault->refch);
-                         */
+                                mmfault->refch = NULL;
-                        gk20a_channel_put(mmfault->refch);
+                        }
                        return;
                }
-                /* Do recovery. Channel recovery needs refch */
+                /* Do recovery */
                nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
        }
@@ -833,16 +839,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
                 * instance block, the fault cannot be isolated to a
                 * single context so we need to reset the entire runlist
                 */
-                id_type = ID_TYPE_UNKNOWN;
+                        rc_type = RC_TYPE_MMU_FAULT;
                } else if (mmfault->refch) {
-                        if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
-                                id = mmfault->refch->tsgid;
-                                id_type = ID_TYPE_TSG;
-                        } else {
-                                id = mmfault->chid;
-                                id_type = ID_TYPE_CHANNEL;
-                        }
                        if (mmfault->refch->mmu_nack_handled) {
                                /* We have already recovered for the same
                                 * context, skip doing another recovery.
@@ -863,19 +862,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
                                 */
                                gk20a_channel_put(mmfault->refch);
                                return;
+                        } else {
+                                /* Indicate recovery is handled if mmu fault is
+                                 * a result of mmu nack.
+                                 */
+                                mmfault->refch->mmu_nack_handled = true;
+                        }
+                        rc_type = RC_TYPE_MMU_FAULT;
+                        if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
+                                id = mmfault->refch->tsgid;
+                                if (id != FIFO_INVAL_TSG_ID)
+                                        id_type = ID_TYPE_TSG;
+                        } else {
+                                nvgpu_err(g, "bare channels not supported");
                        }
-                } else {
-                        id_type = ID_TYPE_UNKNOWN;
                }
-                if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)
+                /* engine is faulted */
+                if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
                        act_eng_bitmask = BIT(mmfault->faulted_engine);
+                        rc_type = RC_TYPE_MMU_FAULT;
+                }
-                /* Indicate recovery is handled if mmu fault is a result of
+                /* refch in mmfault is assigned at the time of copying
-                 * mmu nack.
+                 * fault info from snap reg or bar2 fault buf
                 */
-                mmfault->refch->mmu_nack_handled = true;
+                if (mmfault->refch) {
-                g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
+                        gk20a_channel_put(mmfault->refch);
-                        id, id_type, RC_TYPE_MMU_FAULT, mmfault);
+                        mmfault->refch = NULL;
+                }
+                if (rc_type != RC_TYPE_NO_RC)
+                        g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
+                                id, id_type, rc_type, mmfault);
        } else {
                if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
                        nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -894,7 +914,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
                /* refch in mmfault is assigned at the time of copying
                 * fault info from snap reg or bar2 fault buf
                 */
-                gk20a_channel_put(mmfault->refch);
+                if (mmfault->refch) {
+                        gk20a_channel_put(mmfault->refch);
+                        mmfault->refch = NULL;
+                }
        }
 }
@@ -985,8 +1008,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
                        next_fault_addr = mmfault->fault_addr;
                        if (prev_fault_addr == next_fault_addr) {
                                nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
-                                if (mmfault->refch)
+                                if (mmfault->refch) {
                                        gk20a_channel_put(mmfault->refch);
+                                        mmfault->refch = NULL;
+                                }
                                continue;
                        }
                }
author	Seema Khowala <seemaj@nvidia.com>	2018-06-27 01:57:02 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-07-19 16:54:26 -0400
commit	b1d0d8ece83ba0aa7b1e7ea9062eedc5cd9e4e33 (patch)
tree	5a88d345e23e05d3a3ca9018cedcf6b12958a20b /drivers/gpu/nvgpu/common/fb
parent	d859c5f4a03b975dc493f72a35016e83adad279a (diff)

diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c index 69a71575..26dabd72 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
@@ -792,10 +792,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
792	static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,	792	static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
793	struct mmu_fault_info mmfault, u32 invalidate_replay_val)	793	struct mmu_fault_info mmfault, u32 invalidate_replay_val)
794	{	794	{
795	unsigned int id_type;	795	unsigned int id_type = ID_TYPE_UNKNOWN;
796	u32 num_lce, act_eng_bitmask = 0;	796	u32 num_lce, act_eng_bitmask = 0;
797	int err = 0;	797	int err = 0;
798	u32 id = ((u32)~0);	798	u32 id = FIFO_INVAL_TSG_ID;
		799	unsigned int rc_type = RC_TYPE_NO_RC;
799		800
800	if (!mmfault->valid)	801	if (!mmfault->valid)
801	return;	802	return;
@@ -810,18 +811,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
810	/* CE page faults are not reported as replayable */	811	/* CE page faults are not reported as replayable */
811	nvgpu_log(g, gpu_dbg_intr, "CE Faulted");	812	nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
812	err = gv11b_fb_fix_page_fault(g, mmfault);	813	err = gv11b_fb_fix_page_fault(g, mmfault);
813	gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch,	814	if (mmfault->refch &&
814	mmfault->faulted_pbdma, mmfault->faulted_engine);	815	(u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
		816	gv11b_fifo_reset_pbdma_and_eng_faulted(g,
		817	&g->fifo.tsg[mmfault->refch->tsgid],
		818	mmfault->faulted_pbdma,
		819	mmfault->faulted_engine);
		820	}
815	if (!err) {	821	if (!err) {
816	nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");	822	nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
817	*invalidate_replay_val = 0;	823	*invalidate_replay_val = 0;
818	/* refch in mmfault is assigned at the time of copying	824	if (mmfault->refch) {
819	* fault info from snap reg or bar2 fault buf	825	gk20a_channel_put(mmfault->refch);
820	*/	826	mmfault->refch = NULL;
821	gk20a_channel_put(mmfault->refch);	827	}
822	return;	828	return;
823	}	829	}
824	/* Do recovery. Channel recovery needs refch */	830	/* Do recovery */
825	nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");	831	nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
826	}	832	}
827		833
@@ -833,16 +839,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
833	* instance block, the fault cannot be isolated to a	839	* instance block, the fault cannot be isolated to a
834	* single context so we need to reset the entire runlist	840	* single context so we need to reset the entire runlist
835	*/	841	*/
836	id_type = ID_TYPE_UNKNOWN;	842	rc_type = RC_TYPE_MMU_FAULT;
837		843
838	} else if (mmfault->refch) {	844	} else if (mmfault->refch) {
839	if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
840	id = mmfault->refch->tsgid;
841	id_type = ID_TYPE_TSG;
842	} else {
843	id = mmfault->chid;
844	id_type = ID_TYPE_CHANNEL;
845	}
846	if (mmfault->refch->mmu_nack_handled) {	845	if (mmfault->refch->mmu_nack_handled) {
847	/* We have already recovered for the same	846	/* We have already recovered for the same
848	* context, skip doing another recovery.	847	* context, skip doing another recovery.
@@ -863,19 +862,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
863	*/	862	*/
864	gk20a_channel_put(mmfault->refch);	863	gk20a_channel_put(mmfault->refch);
865	return;	864	return;
		865	} else {
		866	/* Indicate recovery is handled if mmu fault is
		867	* a result of mmu nack.
		868	*/
		869	mmfault->refch->mmu_nack_handled = true;
		870	}
		871
		872	rc_type = RC_TYPE_MMU_FAULT;
		873	if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
		874	id = mmfault->refch->tsgid;
		875	if (id != FIFO_INVAL_TSG_ID)
		876	id_type = ID_TYPE_TSG;
		877	} else {
		878	nvgpu_err(g, "bare channels not supported");
866	}	879	}
867	} else {
868	id_type = ID_TYPE_UNKNOWN;
869	}	880	}
870	if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)	881
		882	/* engine is faulted */
		883	if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
871	act_eng_bitmask = BIT(mmfault->faulted_engine);	884	act_eng_bitmask = BIT(mmfault->faulted_engine);
		885	rc_type = RC_TYPE_MMU_FAULT;
		886	}
872		887
873	/* Indicate recovery is handled if mmu fault is a result of	888	/* refch in mmfault is assigned at the time of copying
874	* mmu nack.	889	* fault info from snap reg or bar2 fault buf
875	*/	890	*/
876	mmfault->refch->mmu_nack_handled = true;	891	if (mmfault->refch) {
877	g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,	892	gk20a_channel_put(mmfault->refch);
878	id, id_type, RC_TYPE_MMU_FAULT, mmfault);	893	mmfault->refch = NULL;
		894	}
		895
		896	if (rc_type != RC_TYPE_NO_RC)
		897	g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
		898	id, id_type, rc_type, mmfault);
879	} else {	899	} else {
880	if (mmfault->fault_type == gmmu_fault_type_pte_v()) {	900	if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
881	nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");	901	nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -894,7 +914,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
894	/* refch in mmfault is assigned at the time of copying	914	/* refch in mmfault is assigned at the time of copying
895	* fault info from snap reg or bar2 fault buf	915	* fault info from snap reg or bar2 fault buf
896	*/	916	*/
897	gk20a_channel_put(mmfault->refch);	917	if (mmfault->refch) {
		918	gk20a_channel_put(mmfault->refch);
		919	mmfault->refch = NULL;
		920	}
898	}	921	}
899	}	922	}
900		923
@@ -985,8 +1008,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
985	next_fault_addr = mmfault->fault_addr;	1008	next_fault_addr = mmfault->fault_addr;
986	if (prev_fault_addr == next_fault_addr) {	1009	if (prev_fault_addr == next_fault_addr) {
987	nvgpu_log(g, gpu_dbg_intr, "pte already scanned");	1010	nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
988	if (mmfault->refch)	1011	if (mmfault->refch) {
989	gk20a_channel_put(mmfault->refch);	1012	gk20a_channel_put(mmfault->refch);
		1013	mmfault->refch = NULL;
		1014	}
990	continue;	1015	continue;
991	}	1016	}
992	}	1017	}