diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/driver_common.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 223 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 |
6 files changed, 1 insertions, 241 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c index 62596d49..c3663117 100644 --- a/drivers/gpu/nvgpu/common/linux/driver_common.c +++ b/drivers/gpu/nvgpu/common/linux/driver_common.c | |||
@@ -77,9 +77,6 @@ static void nvgpu_init_vars(struct gk20a *g) | |||
77 | dma_set_mask(dev, DMA_BIT_MASK(34)); | 77 | dma_set_mask(dev, DMA_BIT_MASK(34)); |
78 | dma_set_coherent_mask(dev, DMA_BIT_MASK(34)); | 78 | dma_set_coherent_mask(dev, DMA_BIT_MASK(34)); |
79 | 79 | ||
80 | nvgpu_init_list_node(&g->pending_sema_waits); | ||
81 | nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock); | ||
82 | |||
83 | nvgpu_init_list_node(&g->profiler_objects); | 80 | nvgpu_init_list_node(&g->profiler_objects); |
84 | 81 | ||
85 | nvgpu_init_list_node(&g->boardobj_head); | 82 | nvgpu_init_list_node(&g->boardobj_head); |
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 84cc3399..d79de211 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -1052,7 +1052,6 @@ void gk20a_driver_start_unload(struct gk20a *g) | |||
1052 | gk20a_wait_for_idle(g); | 1052 | gk20a_wait_for_idle(g); |
1053 | 1053 | ||
1054 | nvgpu_wait_for_deferred_interrupts(g); | 1054 | nvgpu_wait_for_deferred_interrupts(g); |
1055 | gk20a_channel_cancel_pending_sema_waits(g); | ||
1056 | 1055 | ||
1057 | if (l->nonstall_work_queue) { | 1056 | if (l->nonstall_work_queue) { |
1058 | cancel_work_sync(&l->nonstall_fn_work); | 1057 | cancel_work_sync(&l->nonstall_fn_work); |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c index 6f040603..c3d95b4a 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c | |||
@@ -80,9 +80,6 @@ static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) | |||
80 | l->regs_saved = l->regs; | 80 | l->regs_saved = l->regs; |
81 | l->bar1_saved = l->bar1; | 81 | l->bar1_saved = l->bar1; |
82 | 82 | ||
83 | nvgpu_init_list_node(&g->pending_sema_waits); | ||
84 | nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock); | ||
85 | |||
86 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | 83 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; |
87 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | 84 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; |
88 | g->has_syncpoints = platform->has_syncpoints; | 85 | g->has_syncpoints = platform->has_syncpoints; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index e896eb88..20460c80 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -379,127 +379,6 @@ struct gk20a_channel_semaphore { | |||
379 | struct sync_timeline *timeline; | 379 | struct sync_timeline *timeline; |
380 | }; | 380 | }; |
381 | 381 | ||
382 | #ifdef CONFIG_SYNC | ||
383 | struct wait_fence_work { | ||
384 | struct sync_fence_waiter waiter; | ||
385 | struct sync_fence *fence; | ||
386 | struct channel_gk20a *ch; | ||
387 | struct nvgpu_semaphore *sema; | ||
388 | struct gk20a *g; | ||
389 | struct nvgpu_list_node entry; | ||
390 | }; | ||
391 | |||
392 | static inline struct wait_fence_work * | ||
393 | wait_fence_work_from_entry(struct nvgpu_list_node *node) | ||
394 | { | ||
395 | return (struct wait_fence_work *) | ||
396 | ((uintptr_t)node - offsetof(struct wait_fence_work, entry)); | ||
397 | }; | ||
398 | |||
399 | /* | ||
400 | * Keep track of all the pending waits on semaphores that exist for a GPU. This | ||
401 | * has to be done because the waits on fences backed by semaphores are | ||
402 | * asynchronous so it's impossible to otherwise know when they will fire. During | ||
403 | * driver cleanup this list can be checked and all existing waits can be | ||
404 | * canceled. | ||
405 | */ | ||
406 | static void gk20a_add_pending_sema_wait(struct gk20a *g, | ||
407 | struct wait_fence_work *work) | ||
408 | { | ||
409 | nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock); | ||
410 | nvgpu_list_add(&work->entry, &g->pending_sema_waits); | ||
411 | nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock); | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Copy the list head from the pending wait list to the passed list and | ||
416 | * then delete the entire pending list. | ||
417 | */ | ||
418 | static void gk20a_start_sema_wait_cancel(struct gk20a *g, | ||
419 | struct nvgpu_list_node *list) | ||
420 | { | ||
421 | nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock); | ||
422 | nvgpu_list_replace_init(&g->pending_sema_waits, list); | ||
423 | nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock); | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * During shutdown this should be called to make sure that any pending sema | ||
428 | * waits are canceled. This is a fairly delicate and tricky bit of code. Here's | ||
429 | * how it works. | ||
430 | * | ||
431 | * Every time a semaphore wait is initiated in SW the wait_fence_work struct is | ||
432 | * added to the pending_sema_waits list. When the semaphore launcher code runs | ||
433 | * it checks the pending_sema_waits list. If this list is non-empty that means | ||
434 | * that the wait_fence_work struct must be present and can be removed. | ||
435 | * | ||
436 | * When the driver shuts down one of the steps is to cancel pending sema waits. | ||
437 | * To do this the entire list of pending sema waits is removed (and stored in a | ||
438 | * separate local list). So now, if the semaphore launcher code runs it will see | ||
439 | * that the pending_sema_waits list is empty and knows that it no longer owns | ||
440 | * the wait_fence_work struct. | ||
441 | */ | ||
442 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g) | ||
443 | { | ||
444 | struct wait_fence_work *work; | ||
445 | struct nvgpu_list_node local_pending_sema_waits; | ||
446 | |||
447 | gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits); | ||
448 | |||
449 | while (!nvgpu_list_empty(&local_pending_sema_waits)) { | ||
450 | int ret; | ||
451 | |||
452 | work = nvgpu_list_first_entry(&local_pending_sema_waits, | ||
453 | wait_fence_work, | ||
454 | entry); | ||
455 | |||
456 | nvgpu_list_del(&work->entry); | ||
457 | |||
458 | /* | ||
459 | * Only nvgpu_kfree() work if the cancel is successful. | ||
460 | * Otherwise it's in use by the | ||
461 | * gk20a_channel_semaphore_launcher() code. | ||
462 | */ | ||
463 | ret = sync_fence_cancel_async(work->fence, &work->waiter); | ||
464 | if (ret == 0) | ||
465 | nvgpu_kfree(g, work); | ||
466 | } | ||
467 | } | ||
468 | |||
469 | static void gk20a_channel_semaphore_launcher( | ||
470 | struct sync_fence *fence, | ||
471 | struct sync_fence_waiter *waiter) | ||
472 | { | ||
473 | int err; | ||
474 | struct wait_fence_work *w = | ||
475 | container_of(waiter, struct wait_fence_work, waiter); | ||
476 | struct gk20a *g = w->g; | ||
477 | |||
478 | /* | ||
479 | * This spinlock must protect a _very_ small critical section - | ||
480 | * otherwise it's possible that the deterministic submit path suffers. | ||
481 | */ | ||
482 | nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock); | ||
483 | if (!nvgpu_list_empty(&g->pending_sema_waits)) | ||
484 | nvgpu_list_del(&w->entry); | ||
485 | nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock); | ||
486 | |||
487 | gk20a_dbg_info("waiting for pre fence %p '%s'", | ||
488 | fence, fence->name); | ||
489 | err = sync_fence_wait(fence, -1); | ||
490 | if (err < 0) | ||
491 | nvgpu_err(g, "error waiting pre-fence: %d", err); | ||
492 | |||
493 | gk20a_dbg_info( | ||
494 | "wait completed (%d) for fence %p '%s', triggering gpu work", | ||
495 | err, fence, fence->name); | ||
496 | sync_fence_put(fence); | ||
497 | nvgpu_semaphore_release(w->sema, w->ch->hw_sema); | ||
498 | nvgpu_semaphore_put(w->sema); | ||
499 | nvgpu_kfree(g, w); | ||
500 | } | ||
501 | #endif | ||
502 | |||
503 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | 382 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, |
504 | struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, | 383 | struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, |
505 | u32 offset, bool acquire, bool wfi) | 384 | u32 offset, bool acquire, bool wfi) |
@@ -638,102 +517,6 @@ put_fence: | |||
638 | sync_fence_put(sync_fence); | 517 | sync_fence_put(sync_fence); |
639 | return err; | 518 | return err; |
640 | } | 519 | } |
641 | |||
642 | static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd, | ||
643 | struct priv_cmd_entry *wait_cmd, | ||
644 | struct gk20a_fence *fence_out, | ||
645 | struct sync_timeline *timeline) | ||
646 | { | ||
647 | const int wait_cmd_size = 8; | ||
648 | struct sync_fence *sync_fence; | ||
649 | struct wait_fence_work *w = NULL; | ||
650 | int err, status; | ||
651 | |||
652 | sync_fence = sync_fence_fdget(fd); | ||
653 | if (!sync_fence) | ||
654 | return -EINVAL; | ||
655 | |||
656 | /* If the fence has signaled there is no reason to wait on it. */ | ||
657 | status = atomic_read(&sync_fence->status); | ||
658 | if (status == 0) { | ||
659 | sync_fence_put(sync_fence); | ||
660 | return 0; | ||
661 | } | ||
662 | |||
663 | err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd); | ||
664 | if (err) { | ||
665 | nvgpu_err(c->g, | ||
666 | "not enough priv cmd buffer space"); | ||
667 | goto clean_up_sync_fence; | ||
668 | } | ||
669 | |||
670 | w = nvgpu_kzalloc(c->g, sizeof(*w)); | ||
671 | if (!w) { | ||
672 | err = -ENOMEM; | ||
673 | goto clean_up_priv_cmd; | ||
674 | } | ||
675 | |||
676 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); | ||
677 | w->fence = sync_fence; | ||
678 | w->g = c->g; | ||
679 | w->ch = c; | ||
680 | w->sema = nvgpu_semaphore_alloc(c); | ||
681 | if (!w->sema) { | ||
682 | nvgpu_err(c->g, "ran out of semaphores"); | ||
683 | err = -ENOMEM; | ||
684 | goto clean_up_worker; | ||
685 | } | ||
686 | |||
687 | /* worker takes one reference */ | ||
688 | nvgpu_semaphore_get(w->sema); | ||
689 | nvgpu_semaphore_incr(w->sema, c->hw_sema); | ||
690 | |||
691 | /* GPU unblocked when the semaphore value increments. */ | ||
692 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false); | ||
693 | |||
694 | /* | ||
695 | * We need to create the fence before adding the waiter to ensure | ||
696 | * that we properly clean up in the event the sync_fence has | ||
697 | * already signaled | ||
698 | */ | ||
699 | err = gk20a_fence_from_semaphore(c->g, fence_out, timeline, | ||
700 | w->sema, &c->semaphore_wq, false); | ||
701 | if (err) | ||
702 | goto clean_up_sema; | ||
703 | |||
704 | err = sync_fence_wait_async(sync_fence, &w->waiter); | ||
705 | gk20a_add_pending_sema_wait(c->g, w); | ||
706 | |||
707 | /* | ||
708 | * If the sync_fence has already signaled then the above wait_async | ||
709 | * will not get scheduled; the fence completed just after doing the | ||
710 | * status check above before allocs and waiter init, and won the race. | ||
711 | * This causes the waiter to be skipped, so let's release the semaphore | ||
712 | * here and put the refs taken for the worker. | ||
713 | */ | ||
714 | if (err == 1) { | ||
715 | sync_fence_put(sync_fence); | ||
716 | nvgpu_semaphore_release(w->sema, c->hw_sema); | ||
717 | nvgpu_semaphore_put(w->sema); | ||
718 | } | ||
719 | |||
720 | return 0; | ||
721 | |||
722 | clean_up_sema: | ||
723 | /* | ||
724 | * Release the refs to the semaphore, including | ||
725 | * the one for the worker since it will never run. | ||
726 | */ | ||
727 | nvgpu_semaphore_put(w->sema); | ||
728 | nvgpu_semaphore_put(w->sema); | ||
729 | clean_up_worker: | ||
730 | nvgpu_kfree(c->g, w); | ||
731 | clean_up_priv_cmd: | ||
732 | gk20a_free_priv_cmdbuf(c, wait_cmd); | ||
733 | clean_up_sync_fence: | ||
734 | sync_fence_put(sync_fence); | ||
735 | return err; | ||
736 | } | ||
737 | #endif | 520 | #endif |
738 | 521 | ||
739 | static int gk20a_channel_semaphore_wait_fd( | 522 | static int gk20a_channel_semaphore_wait_fd( |
@@ -745,12 +528,8 @@ static int gk20a_channel_semaphore_wait_fd( | |||
745 | container_of(s, struct gk20a_channel_semaphore, ops); | 528 | container_of(s, struct gk20a_channel_semaphore, ops); |
746 | struct channel_gk20a *c = sema->c; | 529 | struct channel_gk20a *c = sema->c; |
747 | #ifdef CONFIG_SYNC | 530 | #ifdef CONFIG_SYNC |
748 | int err; | ||
749 | 531 | ||
750 | err = semaphore_wait_fd_native(c, fd, entry); | 532 | return semaphore_wait_fd_native(c, fd, entry); |
751 | if (err) | ||
752 | err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline); | ||
753 | return err; | ||
754 | #else | 533 | #else |
755 | nvgpu_err(c->g, | 534 | nvgpu_err(c->g, |
756 | "trying to use sync fds with CONFIG_SYNC disabled"); | 535 | "trying to use sync fds with CONFIG_SYNC disabled"); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index f4f54145..dd0213dc 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -114,12 +114,4 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c, | |||
114 | bool user_managed); | 114 | bool user_managed); |
115 | bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g); | 115 | bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g); |
116 | 116 | ||
117 | #ifdef CONFIG_SYNC | ||
118 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); | ||
119 | #else | ||
120 | static inline void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g) | ||
121 | { | ||
122 | } | ||
123 | #endif | ||
124 | |||
125 | #endif | 117 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index dd9cb3ce..c1824b07 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1306,10 +1306,6 @@ struct gk20a { | |||
1306 | */ | 1306 | */ |
1307 | struct nvgpu_semaphore_sea *sema_sea; | 1307 | struct nvgpu_semaphore_sea *sema_sea; |
1308 | 1308 | ||
1309 | /* List of pending SW semaphore waits. */ | ||
1310 | struct nvgpu_list_node pending_sema_waits; | ||
1311 | struct nvgpu_raw_spinlock pending_sema_waits_lock; | ||
1312 | |||
1313 | /* held while manipulating # of debug/profiler sessions present */ | 1309 | /* held while manipulating # of debug/profiler sessions present */ |
1314 | /* also prevents debug sessions from attaching until released */ | 1310 | /* also prevents debug sessions from attaching until released */ |
1315 | struct nvgpu_mutex dbg_sessions_lock; | 1311 | struct nvgpu_mutex dbg_sessions_lock; |