From 0d088ad70cb43e54661163971095409c76a79f51 Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Mon, 3 May 2021 23:17:16 +0530 Subject: gpu: nvgpu: wait for stalling interrupts to complete during TSG unbind preempt Some of the engine stalling interrupts can block the context save off the engine if not handled during fifo.preempt_tsg. They need to be handled while polling for engine ctxsw status. Bug 200711183 Bug 200726848 Change-Id: Ie45d76d9d1d8be3ffb842670843507f2d9aea6d0 Signed-off-by: Sagar Kamble Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2521971 (cherry picked from commit I7418a9e0354013b81fbefd8c0cab5068404fc44e) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2523938 Reviewed-by: svc-mobile-coverity Reviewed-by: Deepak Nibade Reviewed-by: Bibek Basu Reviewed-by: mobile promotions Tested-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 24 +++++++++++++++++------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 7 ++++--- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a') diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index e91830f8..049b8da2 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -2981,7 +2981,7 @@ static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g) } int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, - unsigned int id_type) + unsigned int id_type, bool preempt_retries_left) { struct nvgpu_timeout timeout; u32 delay = GR_IDLE_CHECK_DEFAULT; @@ -3037,7 +3037,8 @@ void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch) RC_TYPE_PREEMPT_TIMEOUT); } -int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) +int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg, + bool preempt_retries_left) { int ret; unsigned int id_type; @@ -3049,8 +3050,17 @@ int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL; - /* wait for preempt */ - ret = g->ops.fifo.is_preempt_pending(g, id, id_type); + /* + * Poll for preempt done. if stalling interrupts are pending + * while preempt is in progress we poll for stalling interrupts + * to finish based on return value from this function and + * retry preempt again. + * If HW is hung, on the last retry instance we try to identify + * the engines hung and set the runlist reset_eng_bitmask + * and mark preemption completion. + */ + ret = g->ops.fifo.is_preempt_pending(g, id, id_type, + preempt_retries_left); return ret; } @@ -3072,7 +3082,7 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch) mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); - ret = __locked_fifo_preempt(g, ch->chid, false); + ret = __locked_fifo_preempt(g, ch->chid, false, false); if (!mutex_ret) { nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); @@ -3112,7 +3122,7 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg) mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); - ret = __locked_fifo_preempt(g, tsg->tsgid, true); + ret = __locked_fifo_preempt(g, tsg->tsgid, true, false); if (!mutex_ret) { nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); @@ -3785,7 +3795,7 @@ static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch, gk20a_readl(g, fifo_preempt_r())); #endif if (wait_preempt) { - g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type); + g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type, false); } #ifdef TRACEPOINTS_ENABLED trace_gk20a_reschedule_preempted_next(ch->chid); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 26365cae..078236d0 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -1,7 +1,7 @@ /* * GK20A graphics fifo (gr host) * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -388,8 +388,9 @@ void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a); u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g); int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, - unsigned int id_type); -int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg); + unsigned int id_type, bool preempt_retries_left); +int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg, + bool preempt_retries_left); void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg); void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch); int gk20a_fifo_setup_ramfc(struct channel_gk20a *c, -- cgit v1.2.2