diff options
author | Debarshi Dutta <ddutta@nvidia.com> | 2019-04-30 05:41:31 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2019-05-09 17:42:33 -0400 |
commit | 6509bb49da19ba9b19e3df64e473b01d54fd310d (patch) | |
tree | b34d19c88fc122f369b1f22094d9a5e22c67df92 /drivers/gpu/nvgpu/common | |
parent | 4d8ad643d67ac4044f76976c4085a35fcc5d4095 (diff) |
gpu: nvgpu: protect recovery with engines_reset_mutex
Rename gr_reset_mutex to engines_reset_mutex and acquire it
before initiating recovery. Recovery running in parallel with
engine reset is not recommended.
On hitting engine reset, h/w drops the ctxsw_status to INVALID in
fifo_engine_status register. Also while the engine is held in reset
h/w passes busy/idle straight through. fifo_engine_status registers
are correct in that there is no context switch outstanding
as the CTXSW is aborted when reset is asserted.
Use deferred_reset_mutex to protect deferred_reset_pending variable
If deferred_reset_pending is true then acquire engines_reset_mutex
and call gk20a_fifo_deferred_reset.
gk20a_fifo_deferred_reset would also check the value of
deferred_reset_pending before initiating reset process
Bug 2092051
Bug 2429295
Bug 2484211
Bug 1890287
Change-Id: I47de669a6203e0b2e9a8237ec4e4747339b9837c
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2022373
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
(cherry-picked from cb91bf1e13740023903282d1c2271d9154e940ba
in dev-main)
Reviewed-on: https://git-master.nvidia.com/r/2024901
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/channel.c | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index d30b8ded..4bea032a 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c | |||
@@ -308,6 +308,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
308 | struct dbg_session_data *session_data, *tmp_s; | 308 | struct dbg_session_data *session_data, *tmp_s; |
309 | struct dbg_session_channel_data *ch_data, *tmp; | 309 | struct dbg_session_channel_data *ch_data, *tmp; |
310 | int err; | 310 | int err; |
311 | bool deferred_reset_pending; | ||
311 | 312 | ||
312 | nvgpu_log_fn(g, " "); | 313 | nvgpu_log_fn(g, " "); |
313 | 314 | ||
@@ -381,17 +382,17 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
381 | 382 | ||
382 | /* if engine reset was deferred, perform it now */ | 383 | /* if engine reset was deferred, perform it now */ |
383 | nvgpu_mutex_acquire(&f->deferred_reset_mutex); | 384 | nvgpu_mutex_acquire(&f->deferred_reset_mutex); |
384 | if (g->fifo.deferred_reset_pending) { | 385 | deferred_reset_pending = g->fifo.deferred_reset_pending; |
386 | nvgpu_mutex_release(&f->deferred_reset_mutex); | ||
387 | |||
388 | if (deferred_reset_pending) { | ||
385 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" | 389 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" |
386 | " deferred, running now"); | 390 | " deferred, running now"); |
387 | /* if lock is already taken, a reset is taking place | 391 | nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); |
388 | so no need to repeat */ | 392 | gk20a_fifo_deferred_reset(g, ch); |
389 | if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) { | 393 | nvgpu_mutex_release(&g->fifo.engines_reset_mutex); |
390 | gk20a_fifo_deferred_reset(g, ch); | ||
391 | nvgpu_mutex_release(&g->fifo.gr_reset_mutex); | ||
392 | } | ||
393 | } | 394 | } |
394 | nvgpu_mutex_release(&f->deferred_reset_mutex); | 395 | |
395 | 396 | ||
396 | if (!gk20a_channel_as_bound(ch)) { | 397 | if (!gk20a_channel_as_bound(ch)) { |
397 | goto unbind; | 398 | goto unbind; |