gpu: nvgpu: recover on first interrupt reported for mmu nack

In case of mmu nack error interrupt is received twice through SM reported mmu nack interrupt and mmu fault in undertermined order. Recover on the first received interrupt to avoid semaphore release and skip doing a second recovery. Also fix NULL pointer dereference in function gv11b_fifo_reset_pbdma_and_eng_faulted when channel reference is invalid in teardown path. Bug 200382235 Change-Id: I361a5725d7b6355ebf02b2870727f647fbd7a37e Signed-off-by: Aparna Das <aparnad@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1739804 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Aparna Das <aparnad@nvidia.com> 2018-06-04 22:40:56 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-06-15 20:47:06 -0400
commit: 98d996f4ffb0137d119b5849cae46d7b7e5693e1 (patch)
tree: 4e3243b91e8e559dd14b8bc30fc2468713182027 /drivers/gpu/nvgpu/gv11b/fb_gv11b.c
parent: 1f51620fda57443c77506c354af837a60883d78b (diff)
1 files changed, 25 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index ce8f5669..bba7e66c 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -935,12 +935,37 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
                                id = mmfault->chid;
                                id_type = ID_TYPE_CHANNEL;
                        }
+                        if (mmfault->refch->mmu_nack_handled) {
+                                /* We have already recovered for the same
+                                 * context, skip doing another recovery.
+                                 */
+                                mmfault->refch->mmu_nack_handled = false;
+                                /*
+                                 * Recovery path can be entered twice for the
+                                 * same error in case of mmu nack. If mmu
+                                 * nack interrupt is handled before mmu fault
+                                 * then channel reference is increased to avoid
+                                 * closing the channel by userspace. Decrement
+                                 * channel reference.
+                                 */
+                                gk20a_channel_put(mmfault->refch);
+                                /* refch in mmfault is assigned at the time
+                                 * of copying fault info from snap reg or bar2
+                                 * fault buf.
+                                 */
+                                gk20a_channel_put(mmfault->refch);
+                                return;
+                        }
                } else {
                        id_type = ID_TYPE_UNKNOWN;
                }
                if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)
                        act_eng_bitmask = BIT(mmfault->faulted_engine);
+                /* Indicate recovery is handled if mmu fault is a result of
+                 * mmu nack.
+                 */
+                mmfault->refch->mmu_nack_handled = true;
                g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
                        id, id_type, RC_TYPE_MMU_FAULT, mmfault);
        } else {
author	Aparna Das <aparnad@nvidia.com>	2018-06-04 22:40:56 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-06-15 20:47:06 -0400
commit	98d996f4ffb0137d119b5849cae46d7b7e5693e1 (patch)
tree	4e3243b91e8e559dd14b8bc30fc2468713182027 /drivers/gpu/nvgpu/gv11b/fb_gv11b.c
parent	1f51620fda57443c77506c354af837a60883d78b (diff)

diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c index ce8f5669..bba7e66c 100644 --- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -935,12 +935,37 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
935	id = mmfault->chid;	935	id = mmfault->chid;
936	id_type = ID_TYPE_CHANNEL;	936	id_type = ID_TYPE_CHANNEL;
937	}	937	}
		938	if (mmfault->refch->mmu_nack_handled) {
		939	/* We have already recovered for the same
		940	* context, skip doing another recovery.
		941	*/
		942	mmfault->refch->mmu_nack_handled = false;
		943	/*
		944	* Recovery path can be entered twice for the
		945	* same error in case of mmu nack. If mmu
		946	* nack interrupt is handled before mmu fault
		947	* then channel reference is increased to avoid
		948	* closing the channel by userspace. Decrement
		949	* channel reference.
		950	*/
		951	gk20a_channel_put(mmfault->refch);
		952	/* refch in mmfault is assigned at the time
		953	* of copying fault info from snap reg or bar2
		954	* fault buf.
		955	*/
		956	gk20a_channel_put(mmfault->refch);
		957	return;
		958	}
938	} else {	959	} else {
939	id_type = ID_TYPE_UNKNOWN;	960	id_type = ID_TYPE_UNKNOWN;
940	}	961	}
941	if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)	962	if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)
942	act_eng_bitmask = BIT(mmfault->faulted_engine);	963	act_eng_bitmask = BIT(mmfault->faulted_engine);
943		964
		965	/* Indicate recovery is handled if mmu fault is a result of
		966	* mmu nack.
		967	*/
		968	mmfault->refch->mmu_nack_handled = true;
944	g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,	969	g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
945	id, id_type, RC_TYPE_MMU_FAULT, mmfault);	970	id, id_type, RC_TYPE_MMU_FAULT, mmfault);
946	} else {	971	} else {