diff options
author | Vedashree Vidwans <vvidwans@nvidia.com> | 2019-08-23 17:28:21 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2019-08-27 23:12:03 -0400 |
commit | 6500ce7581b7fa2360657a8e58d327e0cf311f25 (patch) | |
tree | ae97824bcd01e524393241cb6e4e2123e7b62654 /drivers/gpu | |
parent | eab9d968e5d66857cfb2c7f833d3ced0c56191d4 (diff) |
gpu: nvgpu: fix race for channel sync read/write
CTS test dEQP-VK.api.object_management.max_concurrent.device_group
crashes with invalid userspace memory access.
Currently, nvgpu_submit_prepare_syncs() races with
gk20a_channel_clean_up_jobs() and this race condition is exposed when
aggressive_sync_destroy_thresh is set to non-zero value.
nvgpu_submit_prepare_syncs() gets ref for c->sync to submit job and
releases channel sync_lock immediately. Meanwhile,
gk20a_channel_worker_process() triggers gk20a_channel_clean_up_jobs(),
which destroys ref'd c->sync pointer.
Channel sync is deleted by gk20a_channel_clean_up_jobs() only if
aggressive_sync_destroy_thresh is non-zero.
So, gk20a_channel_clean_up_jobs() and nvgpu_submit_prepare_syncs() will
race only in this scenario.
Hence, if aggressive_sync_destroy_thresh value is non-zero, this patch
protects channel's sync pointer by holding channel sync_lock
during complete execution of nvgpu_submit_prepare_syncs().
Bug 2613870
Change-Id: I6f3d48aff361d1cb38c30d2ce5de276d0c55fb6f
Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2180550
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/submit.c | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 599539cd..4effc663 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c | |||
@@ -59,13 +59,11 @@ static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c, | |||
59 | c->sync = nvgpu_channel_sync_create(c, false); | 59 | c->sync = nvgpu_channel_sync_create(c, false); |
60 | if (!c->sync) { | 60 | if (!c->sync) { |
61 | err = -ENOMEM; | 61 | err = -ENOMEM; |
62 | nvgpu_mutex_release(&c->sync_lock); | ||
63 | goto fail; | 62 | goto fail; |
64 | } | 63 | } |
65 | new_sync_created = true; | 64 | new_sync_created = true; |
66 | } | 65 | } |
67 | nvgpu_atomic_inc(&c->sync->refcount); | 66 | nvgpu_atomic_inc(&c->sync->refcount); |
68 | nvgpu_mutex_release(&c->sync_lock); | ||
69 | } | 67 | } |
70 | 68 | ||
71 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | 69 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { |
@@ -151,6 +149,9 @@ static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c, | |||
151 | goto clean_up_incr_cmd; | 149 | goto clean_up_incr_cmd; |
152 | } | 150 | } |
153 | 151 | ||
152 | if (g->aggressive_sync_destroy_thresh) { | ||
153 | nvgpu_mutex_release(&c->sync_lock); | ||
154 | } | ||
154 | return 0; | 155 | return 0; |
155 | 156 | ||
156 | clean_up_incr_cmd: | 157 | clean_up_incr_cmd: |
@@ -169,6 +170,9 @@ clean_up_wait_cmd: | |||
169 | job->wait_cmd = NULL; | 170 | job->wait_cmd = NULL; |
170 | } | 171 | } |
171 | fail: | 172 | fail: |
173 | if (g->aggressive_sync_destroy_thresh) { | ||
174 | nvgpu_mutex_release(&c->sync_lock); | ||
175 | } | ||
172 | *wait_cmd = NULL; | 176 | *wait_cmd = NULL; |
173 | return err; | 177 | return err; |
174 | } | 178 | } |