summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVedashree Vidwans <vvidwans@nvidia.com>2019-08-23 17:28:21 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2019-08-27 23:12:03 -0400
commit6500ce7581b7fa2360657a8e58d327e0cf311f25 (patch)
treeae97824bcd01e524393241cb6e4e2123e7b62654
parenteab9d968e5d66857cfb2c7f833d3ced0c56191d4 (diff)
gpu: nvgpu: fix race for channel sync read/write
CTS test dEQP-VK.api.object_management.max_concurrent.device_group crashes with invalid userspace memory access. Currently, nvgpu_submit_prepare_syncs() races with gk20a_channel_clean_up_jobs() and this race condition is exposed when aggressive_sync_destroy_thresh is set to non-zero value. nvgpu_submit_prepare_syncs() gets ref for c->sync to submit job and releases channel sync_lock immediately. Meanwhile, gk20a_channel_worker_process() triggers gk20a_channel_clean_up_jobs(), which destroys ref'd c->sync pointer. Channel sync is deleted by gk20a_channel_clean_up_jobs() only if aggressive_sync_destroy_thresh is non-zero. So, gk20a_channel_clean_up_jobs() and nvgpu_submit_prepare_syncs() will race only in this scenario. Hence, if aggressive_sync_destroy_thresh value is non-zero, this patch protects channel's sync pointer by holding channel sync_lock during complete execution of nvgpu_submit_prepare_syncs(). Bug 2613870 Change-Id: I6f3d48aff361d1cb38c30d2ce5de276d0c55fb6f Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2180550 Reviewed-by: Seema Khowala <seemaj@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/fifo/submit.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 599539cd..4effc663 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -59,13 +59,11 @@ static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c,
59 c->sync = nvgpu_channel_sync_create(c, false); 59 c->sync = nvgpu_channel_sync_create(c, false);
60 if (!c->sync) { 60 if (!c->sync) {
61 err = -ENOMEM; 61 err = -ENOMEM;
62 nvgpu_mutex_release(&c->sync_lock);
63 goto fail; 62 goto fail;
64 } 63 }
65 new_sync_created = true; 64 new_sync_created = true;
66 } 65 }
67 nvgpu_atomic_inc(&c->sync->refcount); 66 nvgpu_atomic_inc(&c->sync->refcount);
68 nvgpu_mutex_release(&c->sync_lock);
69 } 67 }
70 68
71 if (g->ops.fifo.resetup_ramfc && new_sync_created) { 69 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
@@ -151,6 +149,9 @@ static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c,
151 goto clean_up_incr_cmd; 149 goto clean_up_incr_cmd;
152 } 150 }
153 151
152 if (g->aggressive_sync_destroy_thresh) {
153 nvgpu_mutex_release(&c->sync_lock);
154 }
154 return 0; 155 return 0;
155 156
156clean_up_incr_cmd: 157clean_up_incr_cmd:
@@ -169,6 +170,9 @@ clean_up_wait_cmd:
169 job->wait_cmd = NULL; 170 job->wait_cmd = NULL;
170 } 171 }
171fail: 172fail:
173 if (g->aggressive_sync_destroy_thresh) {
174 nvgpu_mutex_release(&c->sync_lock);
175 }
172 *wait_cmd = NULL; 176 *wait_cmd = NULL;
173 return err; 177 return err;
174} 178}