diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2016-07-01 03:05:27 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-07-04 04:02:00 -0400 |
commit | 1002f40a3bb54db6e40be77b836437ccb2f3aa96 (patch) | |
tree | 0a7ce238c17a82194ea7025bdc7d64a2ab4492e3 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | 1e01a49fdc139b8cdf5164b4a6767d22ef4ad1d3 (diff) |
gpu: nvgpu: cancel job clean up before aborting channel
It is possible that when we abort the channel, we have
job clean up worker running, which could race with abort
and sometimes result in below panic
[ 245.483566] Unable to handle kernel paging request at virtual address
800000000
...
[ 245.548991] PC is at gk20a_channel_abort_clean_up+0xb8/0x140
[ 245.554683] LR is at gk20a_channel_abort_clean_up+0xac/0x140
...
[ 247.301860] [<ffffffc000479390>]
gk20a_channel_abort_clean_up+0xb8/0x140
[ 247.312853] [<ffffffc0004794d4>] gk20a_channel_abort+0xbc/0xc8
[ 247.322970] [<ffffffc0004794f8>] gk20a_disable_channel+0x18/0x30
[ 247.333267] [<ffffffc000479628>] gk20a_free_channel+0x118/0x584
[ 247.343473] [<ffffffc000479aa0>] gk20a_channel_close+0xc/0x14
[ 247.353479] [<ffffffc000479b80>] gk20a_channel_release+0xd8/0x104
Fix this by cancelling the job clean up worker before aborting
the channel
Bug 1777281
Change-Id: Ic24c7c03b27cfb5cd164a52efdb1e2813a41a10a
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1174416
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 6c7ff551..18f2e896 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -69,6 +69,8 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch); | |||
69 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); | 69 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); |
70 | 70 | ||
71 | static void gk20a_channel_clean_up_jobs(struct work_struct *work); | 71 | static void gk20a_channel_clean_up_jobs(struct work_struct *work); |
72 | static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, | ||
73 | bool wait_for_completion); | ||
72 | 74 | ||
73 | /* allocate GPU channel */ | 75 | /* allocate GPU channel */ |
74 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | 76 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) |
@@ -460,6 +462,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
460 | struct channel_gk20a_job *job, *n; | 462 | struct channel_gk20a_job *job, *n; |
461 | bool released_job_semaphore = false; | 463 | bool released_job_semaphore = false; |
462 | 464 | ||
465 | gk20a_channel_cancel_job_clean_up(ch, true); | ||
466 | |||
463 | /* ensure no fences are pending */ | 467 | /* ensure no fences are pending */ |
464 | mutex_lock(&ch->sync_lock); | 468 | mutex_lock(&ch->sync_lock); |
465 | if (ch->sync) | 469 | if (ch->sync) |