diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2016-11-25 05:25:19 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-11-30 12:19:31 -0500 |
commit | 53a9eceab74738a750e693301c5e5998aa18e89a (patch) | |
tree | 8b30c1329e6dfe1a2e4b7302ecedf6d94805f2ac /drivers/gpu | |
parent | bc5a2580494fbdc6474968a964595c494d47eb32 (diff) |
gpu: nvgpu: fix deadlock between clean up and timeout worker
In case one job completes just around timeout boundary,
it is possible that we launch both clean up worker and
timeout worker for same job
Then in clean up worker we try to cancel timeout
worker, and in timeout worker we try to wait for clean
up to finish which leads to deadlock with below stacks
stack 1:
[<ffffffc0000bb484>] cancel_delayed_work_sync+0x10/0x18
[<ffffffc0004f820c>] gk20a_channel_cancel_job_clean_up+0x20/0x44
[<ffffffc0004fc794>] gk20a_channel_abort_clean_up+0x34/0x31c
[<ffffffc0004fcb30>] gk20a_channel_abort+0xb4/0xc0
[<ffffffc0004f3d18>] gk20a_fifo_recover_ch+0x9c/0xec
[<ffffffc0004f3f04>] gk20a_fifo_force_reset_ch+0xdc/0xf8
[<ffffffc0004fa8c4>] gk20a_channel_timeout_handler+0xf8/0x128
stack 2:
[<ffffffc0000bb484>] cancel_delayed_work_sync+0x10/0x18
[<ffffffc0004f82c4>] gk20a_channel_timeout_stop+0x40/0x60
[<ffffffc0004fc488>] gk20a_channel_clean_up_jobs+0x7c/0x238
To fix this, cancel the timeout worker in
gk20a_channel_update() itself instead of cancelling in
gk20a_channel_clean_up_jobs()
Bug 200246829
Change-Id: Idef9de3cae29668f4e25beb564422cf2e3736182
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1259963
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 |
1 files changed, 1 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 8b5a1a9e..e487e079 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -2218,7 +2218,6 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2218 | platform = gk20a_get_platform(g->dev); | 2218 | platform = gk20a_get_platform(g->dev); |
2219 | 2219 | ||
2220 | gk20a_channel_cancel_job_clean_up(c, false); | 2220 | gk20a_channel_cancel_job_clean_up(c, false); |
2221 | gk20a_channel_timeout_stop(c); | ||
2222 | 2221 | ||
2223 | while (1) { | 2222 | while (1) { |
2224 | bool completed; | 2223 | bool completed; |
@@ -2244,8 +2243,6 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2244 | break; | 2243 | break; |
2245 | } | 2244 | } |
2246 | 2245 | ||
2247 | gk20a_channel_timeout_stop(c); | ||
2248 | |||
2249 | WARN_ON(!c->sync); | 2246 | WARN_ON(!c->sync); |
2250 | 2247 | ||
2251 | if (c->sync) { | 2248 | if (c->sync) { |
@@ -2317,6 +2314,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
2317 | } | 2314 | } |
2318 | 2315 | ||
2319 | trace_gk20a_channel_update(c->hw_chid); | 2316 | trace_gk20a_channel_update(c->hw_chid); |
2317 | gk20a_channel_timeout_stop(c); | ||
2320 | gk20a_channel_schedule_job_clean_up(c); | 2318 | gk20a_channel_schedule_job_clean_up(c); |
2321 | 2319 | ||
2322 | gk20a_channel_put(c); | 2320 | gk20a_channel_put(c); |