summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-10-19 15:08:46 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2019-02-13 16:19:37 -0500
commit220860d04383489a8e75684802a2ced1323831df (patch)
tree123e92d8b1781afa6de32bc2615ae5835b389f4d
parent18643ac1357a845d204d6dabd98359a0ab0509a7 (diff)
gpu: nvgpu: rename has_timedout and make it thread safe
Currently has_timedout variable is protected by wmb at places where it is being set and there is no correspoding rmb whenever has_timedout variable is read. This is prone to errors for concurrent execution. This change is supposed to fix this issue. Rename has_timedout variable of channel struct to ch_timedout. Also to avoid rmb every time ch_timedout is read, ch_timedout_spinlock is added to protect ch_timedout variable for taking care of concurrent execution. Bug 2404865 Bug 2092051 Change-Id: I0bee9f50af0a48720aa8b54cbc3af97ef9f6df00 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1930935 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> (cherry picked from commit 1f54ea09e3445d9ca3cf7a69b4967849cc9defc8 in dev-kernel) Reviewed-on: https://git-master.nvidia.com/r/2016975 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/fifo/channel.c26
-rw-r--r--drivers/gpu/nvgpu/common/fifo/submit.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/channel.h6
-rw-r--r--drivers/gpu/nvgpu/os/linux/cde.c5
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_channel.c17
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_tsg.c2
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c8
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c2
9 files changed, 54 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 29720886..b5ae42d4 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -212,6 +212,24 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
212 gk20a_channel_update(ch); 212 gk20a_channel_update(ch);
213} 213}
214 214
215void gk20a_channel_set_timedout(struct channel_gk20a *ch)
216{
217 nvgpu_spinlock_acquire(&ch->ch_timedout_lock);
218 ch->ch_timedout = true;
219 nvgpu_spinlock_release(&ch->ch_timedout_lock);
220}
221
222bool gk20a_channel_check_timedout(struct channel_gk20a *ch)
223{
224 bool ch_timedout_status;
225
226 nvgpu_spinlock_acquire(&ch->ch_timedout_lock);
227 ch_timedout_status = ch->ch_timedout;
228 nvgpu_spinlock_release(&ch->ch_timedout_lock);
229
230 return ch_timedout_status;
231}
232
215void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) 233void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
216{ 234{
217 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); 235 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
@@ -223,7 +241,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
223 } 241 }
224 242
225 /* make sure new kickoffs are prevented */ 243 /* make sure new kickoffs are prevented */
226 ch->has_timedout = true; 244 gk20a_channel_set_timedout(ch);
227 245
228 ch->g->ops.fifo.disable_channel(ch); 246 ch->g->ops.fifo.disable_channel(ch);
229 247
@@ -425,7 +443,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
425 * Set user managed syncpoint to safe state 443 * Set user managed syncpoint to safe state
426 * But it's already done if channel has timedout 444 * But it's already done if channel has timedout
427 */ 445 */
428 if (ch->has_timedout) { 446 if (gk20a_channel_check_timedout(ch)) {
429 nvgpu_channel_sync_destroy(ch->user_sync, false); 447 nvgpu_channel_sync_destroy(ch->user_sync, false);
430 } else { 448 } else {
431 nvgpu_channel_sync_destroy(ch->user_sync, true); 449 nvgpu_channel_sync_destroy(ch->user_sync, true);
@@ -711,7 +729,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
711 /* set gr host default timeout */ 729 /* set gr host default timeout */
712 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); 730 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
713 ch->timeout_debug_dump = true; 731 ch->timeout_debug_dump = true;
714 ch->has_timedout = false; 732 ch->ch_timedout = false;
715 733
716 /* init kernel watchdog timeout */ 734 /* init kernel watchdog timeout */
717 ch->timeout.enabled = true; 735 ch->timeout.enabled = true;
@@ -2196,6 +2214,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2196 c->referenceable = false; 2214 c->referenceable = false;
2197 nvgpu_cond_init(&c->ref_count_dec_wq); 2215 nvgpu_cond_init(&c->ref_count_dec_wq);
2198 2216
2217 nvgpu_spinlock_init(&c->ch_timedout_lock);
2218
2199#if GK20A_CHANNEL_REFCOUNT_TRACKING 2219#if GK20A_CHANNEL_REFCOUNT_TRACKING
2200 nvgpu_spinlock_init(&c->ref_actions_lock); 2220 nvgpu_spinlock_init(&c->ref_actions_lock);
2201#endif 2221#endif
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 5e218fbc..599539cd 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -336,7 +336,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
336 return -ENODEV; 336 return -ENODEV;
337 } 337 }
338 338
339 if (c->has_timedout) { 339 if (gk20a_channel_check_timedout(c)) {
340 return -ETIMEDOUT; 340 return -ETIMEDOUT;
341 } 341 }
342 342
@@ -504,7 +504,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
504 } 504 }
505 } 505 }
506 506
507 if (c->has_timedout) { 507 if (gk20a_channel_check_timedout(c)) {
508 err = -ETIMEDOUT; 508 err = -ETIMEDOUT;
509 goto clean_up; 509 goto clean_up;
510 } 510 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 06db0bb0..d4e386bd 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1484,8 +1484,8 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1484{ 1484{
1485 if (refch) { 1485 if (refch) {
1486 /* mark channel as faulted */ 1486 /* mark channel as faulted */
1487 refch->has_timedout = true; 1487 gk20a_channel_set_timedout(refch);
1488 nvgpu_smp_wmb(); 1488
1489 /* unblock pending waits */ 1489 /* unblock pending waits */
1490 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); 1490 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1491 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); 1491 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
@@ -1568,7 +1568,7 @@ void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)
1568 nvgpu_rwsem_down_read(&tsg->ch_list_lock); 1568 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1569 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { 1569 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1570 if (gk20a_channel_get(ch)) { 1570 if (gk20a_channel_get(ch)) {
1571 ch->has_timedout = true; 1571 gk20a_channel_set_timedout(ch);
1572 if (ch->g->ops.fifo.ch_abort_clean_up) { 1572 if (ch->g->ops.fifo.ch_abort_clean_up) {
1573 ch->g->ops.fifo.ch_abort_clean_up(ch); 1573 ch->g->ops.fifo.ch_abort_clean_up(ch);
1574 } 1574 }
@@ -2181,7 +2181,7 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
2181 2181
2182 /* If one channel in TSG times out, we disable all channels */ 2182 /* If one channel in TSG times out, we disable all channels */
2183 nvgpu_rwsem_down_write(&tsg->ch_list_lock); 2183 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2184 tsg_timedout = ch->has_timedout; 2184 tsg_timedout = gk20a_channel_check_timedout(ch);
2185 nvgpu_rwsem_up_write(&tsg->ch_list_lock); 2185 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2186 2186
2187 /* Disable TSG and examine status before unbinding channel */ 2187 /* Disable TSG and examine status before unbinding channel */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 1851b9e2..0a956c66 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -298,6 +298,8 @@ struct channel_gk20a {
298 298
299 struct nvgpu_mem ctx_header; 299 struct nvgpu_mem ctx_header;
300 300
301 struct nvgpu_spinlock ch_timedout_lock;
302 bool ch_timedout;
301 /* Any operating system specific data. */ 303 /* Any operating system specific data. */
302 void *os_priv; 304 void *os_priv;
303 305
@@ -313,7 +315,6 @@ struct channel_gk20a {
313 u32 runlist_id; 315 u32 runlist_id;
314 316
315 bool mmu_nack_handled; 317 bool mmu_nack_handled;
316 bool has_timedout;
317 bool referenceable; 318 bool referenceable;
318 bool vpr; 319 bool vpr;
319 bool deterministic; 320 bool deterministic;
@@ -464,4 +465,7 @@ static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
464} 465}
465#endif 466#endif
466 467
468void gk20a_channel_set_timedout(struct channel_gk20a *ch);
469bool gk20a_channel_check_timedout(struct channel_gk20a *ch);
470
467#endif 471#endif
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index a15ef560..7b2cba7d 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -1272,7 +1272,7 @@ __releases(&cde_app->mutex)
1272 nvgpu_log_info(g, "double finish cde context %p on channel %p", 1272 nvgpu_log_info(g, "double finish cde context %p on channel %p",
1273 cde_ctx, ch); 1273 cde_ctx, ch);
1274 1274
1275 if (ch->has_timedout) { 1275 if (gk20a_channel_check_timedout(ch)) {
1276 if (cde_ctx->is_temporary) { 1276 if (cde_ctx->is_temporary) {
1277 nvgpu_warn(g, 1277 nvgpu_warn(g,
1278 "cde: channel had timed out" 1278 "cde: channel had timed out"
@@ -1299,8 +1299,9 @@ __releases(&cde_app->mutex)
1299 msecs_to_jiffies(CTX_DELETE_TIME)); 1299 msecs_to_jiffies(CTX_DELETE_TIME));
1300 } 1300 }
1301 1301
1302 if (!ch->has_timedout) 1302 if (!gk20a_channel_check_timedout(ch)) {
1303 gk20a_cde_ctx_release(cde_ctx); 1303 gk20a_cde_ctx_release(cde_ctx);
1304 }
1304} 1305}
1305 1306
1306static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) 1307static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index 45d49474..d0d4b1af 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -636,8 +636,9 @@ static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
636 int ret = 0; 636 int ret = 0;
637 637
638 /* do not wait if channel has timed out */ 638 /* do not wait if channel has timed out */
639 if (ch->has_timedout) 639 if (gk20a_channel_check_timedout(ch)) {
640 return -ETIMEDOUT; 640 return -ETIMEDOUT;
641 }
641 642
642 dmabuf = dma_buf_get(id); 643 dmabuf = dma_buf_get(id);
643 if (IS_ERR(dmabuf)) { 644 if (IS_ERR(dmabuf)) {
@@ -656,7 +657,8 @@ static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
656 657
657 ret = NVGPU_COND_WAIT_INTERRUPTIBLE( 658 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
658 &ch->semaphore_wq, 659 &ch->semaphore_wq,
659 *semaphore == payload || ch->has_timedout, 660 *semaphore == payload ||
661 gk20a_channel_check_timedout(ch),
660 timeout); 662 timeout);
661 663
662 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); 664 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
@@ -680,8 +682,9 @@ static int gk20a_channel_wait(struct channel_gk20a *ch,
680 682
681 nvgpu_log_fn(g, " "); 683 nvgpu_log_fn(g, " ");
682 684
683 if (ch->has_timedout) 685 if (gk20a_channel_check_timedout(ch)) {
684 return -ETIMEDOUT; 686 return -ETIMEDOUT;
687 }
685 688
686 switch (args->type) { 689 switch (args->type) {
687 case NVGPU_WAIT_TYPE_NOTIFIER: 690 case NVGPU_WAIT_TYPE_NOTIFIER:
@@ -716,7 +719,8 @@ static int gk20a_channel_wait(struct channel_gk20a *ch,
716 * calling this ioctl */ 719 * calling this ioctl */
717 remain = NVGPU_COND_WAIT_INTERRUPTIBLE( 720 remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
718 &ch->notifier_wq, 721 &ch->notifier_wq,
719 notif->status == 0 || ch->has_timedout, 722 notif->status == 0 ||
723 gk20a_channel_check_timedout(ch),
720 args->timeout); 724 args->timeout);
721 725
722 if (remain == 0 && notif->status != 0) { 726 if (remain == 0 && notif->status != 0) {
@@ -786,8 +790,9 @@ static int gk20a_ioctl_channel_submit_gpfifo(
786 profile = gk20a_fifo_profile_acquire(ch->g); 790 profile = gk20a_fifo_profile_acquire(ch->g);
787 gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); 791 gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
788 792
789 if (ch->has_timedout) 793 if (gk20a_channel_check_timedout(ch)) {
790 return -ETIMEDOUT; 794 return -ETIMEDOUT;
795 }
791 796
792 nvgpu_get_fence_args(&args->fence, &fence); 797 nvgpu_get_fence_args(&args->fence, &fence);
793 submit_flags = 798 submit_flags =
@@ -1249,7 +1254,7 @@ long gk20a_channel_ioctl(struct file *filp,
1249 } 1254 }
1250 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: 1255 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
1251 ((struct nvgpu_get_param_args *)buf)->value = 1256 ((struct nvgpu_get_param_args *)buf)->value =
1252 ch->has_timedout; 1257 gk20a_channel_check_timedout(ch);
1253 break; 1258 break;
1254 case NVGPU_IOCTL_CHANNEL_ENABLE: 1259 case NVGPU_IOCTL_CHANNEL_ENABLE:
1255 err = gk20a_busy(ch->g); 1260 err = gk20a_busy(ch->g);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
index b0cdf5e5..c5079bd6 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -141,7 +141,7 @@ static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
141 * Mark the channel timedout since channel unbound from TSG 141 * Mark the channel timedout since channel unbound from TSG
142 * has no context of its own so it can't serve any job 142 * has no context of its own so it can't serve any job
143 */ 143 */
144 ch->has_timedout = true; 144 gk20a_channel_set_timedout(ch);
145 145
146out: 146out:
147 gk20a_channel_put(ch); 147 gk20a_channel_put(ch);
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 234f6fd4..4055d5af 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -651,7 +651,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
651 if (gk20a_channel_get(ch_tsg)) { 651 if (gk20a_channel_get(ch_tsg)) {
652 g->ops.fifo.set_error_notifier(ch_tsg, 652 g->ops.fifo.set_error_notifier(ch_tsg,
653 err_code); 653 err_code);
654 ch_tsg->has_timedout = true; 654 gk20a_channel_set_timedout(ch_tsg);
655 gk20a_channel_put(ch_tsg); 655 gk20a_channel_put(ch_tsg);
656 } 656 }
657 } 657 }
@@ -659,7 +659,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
659 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 659 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
660 } else { 660 } else {
661 g->ops.fifo.set_error_notifier(ch, err_code); 661 g->ops.fifo.set_error_notifier(ch, err_code);
662 ch->has_timedout = true; 662 gk20a_channel_set_timedout(ch);
663 } 663 }
664 664
665 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET; 665 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET;
@@ -685,8 +685,8 @@ static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
685 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); 685 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
686 686
687 /* mark channel as faulted */ 687 /* mark channel as faulted */
688 ch->has_timedout = true; 688 gk20a_channel_set_timedout(ch);
689 nvgpu_smp_wmb(); 689
690 /* unblock pending waits */ 690 /* unblock pending waits */
691 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); 691 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
692 nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); 692 nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 07361afe..c17a16df 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -126,7 +126,7 @@ static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid)
126 return; 126 return;
127 } 127 }
128 128
129 ch->has_timedout = true; 129 gk20a_channel_set_timedout(ch);
130 g->ops.fifo.ch_abort_clean_up(ch); 130 g->ops.fifo.ch_abort_clean_up(ch);
131 gk20a_channel_put(ch); 131 gk20a_channel_put(ch);
132} 132}