summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c1
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c223
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
6 files changed, 1 insertions, 241 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 62596d49..c3663117 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -77,9 +77,6 @@ static void nvgpu_init_vars(struct gk20a *g)
77 dma_set_mask(dev, DMA_BIT_MASK(34)); 77 dma_set_mask(dev, DMA_BIT_MASK(34));
78 dma_set_coherent_mask(dev, DMA_BIT_MASK(34)); 78 dma_set_coherent_mask(dev, DMA_BIT_MASK(34));
79 79
80 nvgpu_init_list_node(&g->pending_sema_waits);
81 nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
82
83 nvgpu_init_list_node(&g->profiler_objects); 80 nvgpu_init_list_node(&g->profiler_objects);
84 81
85 nvgpu_init_list_node(&g->boardobj_head); 82 nvgpu_init_list_node(&g->boardobj_head);
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 84cc3399..d79de211 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -1052,7 +1052,6 @@ void gk20a_driver_start_unload(struct gk20a *g)
1052 gk20a_wait_for_idle(g); 1052 gk20a_wait_for_idle(g);
1053 1053
1054 nvgpu_wait_for_deferred_interrupts(g); 1054 nvgpu_wait_for_deferred_interrupts(g);
1055 gk20a_channel_cancel_pending_sema_waits(g);
1056 1055
1057 if (l->nonstall_work_queue) { 1056 if (l->nonstall_work_queue) {
1058 cancel_work_sync(&l->nonstall_fn_work); 1057 cancel_work_sync(&l->nonstall_fn_work);
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
index 6f040603..c3d95b4a 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
@@ -80,9 +80,6 @@ static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
80 l->regs_saved = l->regs; 80 l->regs_saved = l->regs;
81 l->bar1_saved = l->bar1; 81 l->bar1_saved = l->bar1;
82 82
83 nvgpu_init_list_node(&g->pending_sema_waits);
84 nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
85
86 g->aggressive_sync_destroy = platform->aggressive_sync_destroy; 83 g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
87 g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; 84 g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
88 g->has_syncpoints = platform->has_syncpoints; 85 g->has_syncpoints = platform->has_syncpoints;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index e896eb88..20460c80 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -379,127 +379,6 @@ struct gk20a_channel_semaphore {
379 struct sync_timeline *timeline; 379 struct sync_timeline *timeline;
380}; 380};
381 381
382#ifdef CONFIG_SYNC
383struct wait_fence_work {
384 struct sync_fence_waiter waiter;
385 struct sync_fence *fence;
386 struct channel_gk20a *ch;
387 struct nvgpu_semaphore *sema;
388 struct gk20a *g;
389 struct nvgpu_list_node entry;
390};
391
392static inline struct wait_fence_work *
393wait_fence_work_from_entry(struct nvgpu_list_node *node)
394{
395 return (struct wait_fence_work *)
396 ((uintptr_t)node - offsetof(struct wait_fence_work, entry));
397};
398
399/*
400 * Keep track of all the pending waits on semaphores that exist for a GPU. This
401 * has to be done because the waits on fences backed by semaphores are
402 * asynchronous so it's impossible to otherwise know when they will fire. During
403 * driver cleanup this list can be checked and all existing waits can be
404 * canceled.
405 */
406static void gk20a_add_pending_sema_wait(struct gk20a *g,
407 struct wait_fence_work *work)
408{
409 nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
410 nvgpu_list_add(&work->entry, &g->pending_sema_waits);
411 nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
412}
413
414/*
415 * Copy the list head from the pending wait list to the passed list and
416 * then delete the entire pending list.
417 */
418static void gk20a_start_sema_wait_cancel(struct gk20a *g,
419 struct nvgpu_list_node *list)
420{
421 nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
422 nvgpu_list_replace_init(&g->pending_sema_waits, list);
423 nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
424}
425
426/*
427 * During shutdown this should be called to make sure that any pending sema
428 * waits are canceled. This is a fairly delicate and tricky bit of code. Here's
429 * how it works.
430 *
431 * Every time a semaphore wait is initiated in SW the wait_fence_work struct is
432 * added to the pending_sema_waits list. When the semaphore launcher code runs
433 * it checks the pending_sema_waits list. If this list is non-empty that means
434 * that the wait_fence_work struct must be present and can be removed.
435 *
436 * When the driver shuts down one of the steps is to cancel pending sema waits.
437 * To do this the entire list of pending sema waits is removed (and stored in a
438 * separate local list). So now, if the semaphore launcher code runs it will see
439 * that the pending_sema_waits list is empty and knows that it no longer owns
440 * the wait_fence_work struct.
441 */
442void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
443{
444 struct wait_fence_work *work;
445 struct nvgpu_list_node local_pending_sema_waits;
446
447 gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits);
448
449 while (!nvgpu_list_empty(&local_pending_sema_waits)) {
450 int ret;
451
452 work = nvgpu_list_first_entry(&local_pending_sema_waits,
453 wait_fence_work,
454 entry);
455
456 nvgpu_list_del(&work->entry);
457
458 /*
459 * Only nvgpu_kfree() work if the cancel is successful.
460 * Otherwise it's in use by the
461 * gk20a_channel_semaphore_launcher() code.
462 */
463 ret = sync_fence_cancel_async(work->fence, &work->waiter);
464 if (ret == 0)
465 nvgpu_kfree(g, work);
466 }
467}
468
469static void gk20a_channel_semaphore_launcher(
470 struct sync_fence *fence,
471 struct sync_fence_waiter *waiter)
472{
473 int err;
474 struct wait_fence_work *w =
475 container_of(waiter, struct wait_fence_work, waiter);
476 struct gk20a *g = w->g;
477
478 /*
479 * This spinlock must protect a _very_ small critical section -
480 * otherwise it's possible that the deterministic submit path suffers.
481 */
482 nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
483 if (!nvgpu_list_empty(&g->pending_sema_waits))
484 nvgpu_list_del(&w->entry);
485 nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
486
487 gk20a_dbg_info("waiting for pre fence %p '%s'",
488 fence, fence->name);
489 err = sync_fence_wait(fence, -1);
490 if (err < 0)
491 nvgpu_err(g, "error waiting pre-fence: %d", err);
492
493 gk20a_dbg_info(
494 "wait completed (%d) for fence %p '%s', triggering gpu work",
495 err, fence, fence->name);
496 sync_fence_put(fence);
497 nvgpu_semaphore_release(w->sema, w->ch->hw_sema);
498 nvgpu_semaphore_put(w->sema);
499 nvgpu_kfree(g, w);
500}
501#endif
502
503static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, 382static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
504 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, 383 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
505 u32 offset, bool acquire, bool wfi) 384 u32 offset, bool acquire, bool wfi)
@@ -638,102 +517,6 @@ put_fence:
638 sync_fence_put(sync_fence); 517 sync_fence_put(sync_fence);
639 return err; 518 return err;
640} 519}
641
642static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd,
643 struct priv_cmd_entry *wait_cmd,
644 struct gk20a_fence *fence_out,
645 struct sync_timeline *timeline)
646{
647 const int wait_cmd_size = 8;
648 struct sync_fence *sync_fence;
649 struct wait_fence_work *w = NULL;
650 int err, status;
651
652 sync_fence = sync_fence_fdget(fd);
653 if (!sync_fence)
654 return -EINVAL;
655
656 /* If the fence has signaled there is no reason to wait on it. */
657 status = atomic_read(&sync_fence->status);
658 if (status == 0) {
659 sync_fence_put(sync_fence);
660 return 0;
661 }
662
663 err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd);
664 if (err) {
665 nvgpu_err(c->g,
666 "not enough priv cmd buffer space");
667 goto clean_up_sync_fence;
668 }
669
670 w = nvgpu_kzalloc(c->g, sizeof(*w));
671 if (!w) {
672 err = -ENOMEM;
673 goto clean_up_priv_cmd;
674 }
675
676 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
677 w->fence = sync_fence;
678 w->g = c->g;
679 w->ch = c;
680 w->sema = nvgpu_semaphore_alloc(c);
681 if (!w->sema) {
682 nvgpu_err(c->g, "ran out of semaphores");
683 err = -ENOMEM;
684 goto clean_up_worker;
685 }
686
687 /* worker takes one reference */
688 nvgpu_semaphore_get(w->sema);
689 nvgpu_semaphore_incr(w->sema, c->hw_sema);
690
691 /* GPU unblocked when the semaphore value increments. */
692 add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false);
693
694 /*
695 * We need to create the fence before adding the waiter to ensure
696 * that we properly clean up in the event the sync_fence has
697 * already signaled
698 */
699 err = gk20a_fence_from_semaphore(c->g, fence_out, timeline,
700 w->sema, &c->semaphore_wq, false);
701 if (err)
702 goto clean_up_sema;
703
704 err = sync_fence_wait_async(sync_fence, &w->waiter);
705 gk20a_add_pending_sema_wait(c->g, w);
706
707 /*
708 * If the sync_fence has already signaled then the above wait_async
709 * will not get scheduled; the fence completed just after doing the
710 * status check above before allocs and waiter init, and won the race.
711 * This causes the waiter to be skipped, so let's release the semaphore
712 * here and put the refs taken for the worker.
713 */
714 if (err == 1) {
715 sync_fence_put(sync_fence);
716 nvgpu_semaphore_release(w->sema, c->hw_sema);
717 nvgpu_semaphore_put(w->sema);
718 }
719
720 return 0;
721
722clean_up_sema:
723 /*
724 * Release the refs to the semaphore, including
725 * the one for the worker since it will never run.
726 */
727 nvgpu_semaphore_put(w->sema);
728 nvgpu_semaphore_put(w->sema);
729clean_up_worker:
730 nvgpu_kfree(c->g, w);
731clean_up_priv_cmd:
732 gk20a_free_priv_cmdbuf(c, wait_cmd);
733clean_up_sync_fence:
734 sync_fence_put(sync_fence);
735 return err;
736}
737#endif 520#endif
738 521
739static int gk20a_channel_semaphore_wait_fd( 522static int gk20a_channel_semaphore_wait_fd(
@@ -745,12 +528,8 @@ static int gk20a_channel_semaphore_wait_fd(
745 container_of(s, struct gk20a_channel_semaphore, ops); 528 container_of(s, struct gk20a_channel_semaphore, ops);
746 struct channel_gk20a *c = sema->c; 529 struct channel_gk20a *c = sema->c;
747#ifdef CONFIG_SYNC 530#ifdef CONFIG_SYNC
748 int err;
749 531
750 err = semaphore_wait_fd_native(c, fd, entry); 532 return semaphore_wait_fd_native(c, fd, entry);
751 if (err)
752 err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline);
753 return err;
754#else 533#else
755 nvgpu_err(c->g, 534 nvgpu_err(c->g,
756 "trying to use sync fds with CONFIG_SYNC disabled"); 535 "trying to use sync fds with CONFIG_SYNC disabled");
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index f4f54145..dd0213dc 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -114,12 +114,4 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,
114 bool user_managed); 114 bool user_managed);
115bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g); 115bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
116 116
117#ifdef CONFIG_SYNC
118void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
119#else
120static inline void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
121{
122}
123#endif
124
125#endif 117#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index dd9cb3ce..c1824b07 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1306,10 +1306,6 @@ struct gk20a {
1306 */ 1306 */
1307 struct nvgpu_semaphore_sea *sema_sea; 1307 struct nvgpu_semaphore_sea *sema_sea;
1308 1308
1309 /* List of pending SW semaphore waits. */
1310 struct nvgpu_list_node pending_sema_waits;
1311 struct nvgpu_raw_spinlock pending_sema_waits_lock;
1312
1313 /* held while manipulating # of debug/profiler sessions present */ 1309 /* held while manipulating # of debug/profiler sessions present */
1314 /* also prevents debug sessions from attaching until released */ 1310 /* also prevents debug sessions from attaching until released */
1315 struct nvgpu_mutex dbg_sessions_lock; 1311 struct nvgpu_mutex dbg_sessions_lock;