diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 88 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/nvgpu_common.c | 3 |
4 files changed, 95 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index ba8fbc98..c3c6fbb8 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -16,6 +16,8 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/gk20a.h> | 18 | #include <linux/gk20a.h> |
19 | |||
20 | #include <linux/list.h> | ||
19 | #include <linux/version.h> | 21 | #include <linux/version.h> |
20 | 22 | ||
21 | #include "channel_sync_gk20a.h" | 23 | #include "channel_sync_gk20a.h" |
@@ -396,10 +398,82 @@ struct gk20a_channel_semaphore { | |||
396 | #ifdef CONFIG_SYNC | 398 | #ifdef CONFIG_SYNC |
397 | struct wait_fence_work { | 399 | struct wait_fence_work { |
398 | struct sync_fence_waiter waiter; | 400 | struct sync_fence_waiter waiter; |
401 | struct sync_fence *fence; | ||
399 | struct channel_gk20a *ch; | 402 | struct channel_gk20a *ch; |
400 | struct gk20a_semaphore *sema; | 403 | struct gk20a_semaphore *sema; |
404 | struct gk20a *g; | ||
405 | struct list_head entry; | ||
401 | }; | 406 | }; |
402 | 407 | ||
408 | /* | ||
409 | * Keep track of all the pending waits on semaphores that exist for a GPU. This | ||
410 | * has to be done because the waits on fences backed by semaphores are | ||
411 | * asynchronous so it's impossible to otherwise know when they will fire. During | ||
412 | * driver cleanup this list can be checked and all existing waits can be | ||
413 | * canceled. | ||
414 | */ | ||
415 | static void gk20a_add_pending_sema_wait(struct gk20a *g, | ||
416 | struct wait_fence_work *work) | ||
417 | { | ||
418 | raw_spin_lock(&g->pending_sema_waits_lock); | ||
419 | list_add(&work->entry, &g->pending_sema_waits); | ||
420 | raw_spin_unlock(&g->pending_sema_waits_lock); | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Copy the list head from the pending wait list to the passed list and | ||
425 | * then delete the entire pending list. | ||
426 | */ | ||
427 | static void gk20a_start_sema_wait_cancel(struct gk20a *g, | ||
428 | struct list_head *list) | ||
429 | { | ||
430 | raw_spin_lock(&g->pending_sema_waits_lock); | ||
431 | list_replace_init(&g->pending_sema_waits, list); | ||
432 | raw_spin_unlock(&g->pending_sema_waits_lock); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * During shutdown this should be called to make sure that any pending sema | ||
437 | * waits are canceled. This is a fairly delicate and tricky bit of code. Here's | ||
438 | * how it works. | ||
439 | * | ||
440 | * Every time a semaphore wait is initiated in SW the wait_fence_work struct is | ||
441 | * added to the pending_sema_waits list. When the semaphore launcher code runs | ||
442 | * it checks the pending_sema_waits list. If this list is non-empty that means | ||
443 | * that the wait_fence_work struct must be present and can be removed. | ||
444 | * | ||
445 | * When the driver shuts down one of the steps is to cancel pending sema waits. | ||
446 | * To do this the entire list of pending sema waits is removed (and stored in a | ||
447 | * separate local list). So now, if the semaphore launcher code runs it will see | ||
448 | * that the pending_sema_waits list is empty and knows that it no longer owns | ||
449 | * the wait_fence_work struct. | ||
450 | */ | ||
451 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g) | ||
452 | { | ||
453 | struct wait_fence_work *work; | ||
454 | struct list_head local_pending_sema_waits; | ||
455 | |||
456 | gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits); | ||
457 | |||
458 | while (!list_empty(&local_pending_sema_waits)) { | ||
459 | int ret; | ||
460 | |||
461 | work = list_first_entry(&local_pending_sema_waits, | ||
462 | struct wait_fence_work, | ||
463 | entry); | ||
464 | |||
465 | list_del_init(&work->entry); | ||
466 | |||
467 | /* | ||
468 | * Only kfree() work if the cancel is successful. Otherwise it's | ||
469 | * in use by the gk20a_channel_semaphore_launcher() code. | ||
470 | */ | ||
471 | ret = sync_fence_cancel_async(work->fence, &work->waiter); | ||
472 | if (ret == 0) | ||
473 | kfree(work); | ||
474 | } | ||
475 | } | ||
476 | |||
403 | static void gk20a_channel_semaphore_launcher( | 477 | static void gk20a_channel_semaphore_launcher( |
404 | struct sync_fence *fence, | 478 | struct sync_fence *fence, |
405 | struct sync_fence_waiter *waiter) | 479 | struct sync_fence_waiter *waiter) |
@@ -407,7 +481,16 @@ static void gk20a_channel_semaphore_launcher( | |||
407 | int err; | 481 | int err; |
408 | struct wait_fence_work *w = | 482 | struct wait_fence_work *w = |
409 | container_of(waiter, struct wait_fence_work, waiter); | 483 | container_of(waiter, struct wait_fence_work, waiter); |
410 | struct gk20a *g = w->ch->g; | 484 | struct gk20a *g = w->g; |
485 | |||
486 | /* | ||
487 | * This spinlock must protect a _very_ small critical section - | ||
488 | * otherwise it's possible that the deterministic submit path suffers. | ||
489 | */ | ||
490 | raw_spin_lock(&g->pending_sema_waits_lock); | ||
491 | if (!list_empty(&g->pending_sema_waits)) | ||
492 | list_del_init(&w->entry); | ||
493 | raw_spin_unlock(&g->pending_sema_waits_lock); | ||
411 | 494 | ||
412 | gk20a_dbg_info("waiting for pre fence %p '%s'", | 495 | gk20a_dbg_info("waiting for pre fence %p '%s'", |
413 | fence, fence->name); | 496 | fence, fence->name); |
@@ -631,6 +714,8 @@ static int gk20a_channel_semaphore_wait_fd( | |||
631 | } | 714 | } |
632 | 715 | ||
633 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); | 716 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); |
717 | w->fence = sync_fence; | ||
718 | w->g = c->g; | ||
634 | w->ch = c; | 719 | w->ch = c; |
635 | w->sema = gk20a_semaphore_alloc(c); | 720 | w->sema = gk20a_semaphore_alloc(c); |
636 | if (!w->sema) { | 721 | if (!w->sema) { |
@@ -657,6 +742,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
657 | goto clean_up_sema; | 742 | goto clean_up_sema; |
658 | 743 | ||
659 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | 744 | ret = sync_fence_wait_async(sync_fence, &w->waiter); |
745 | gk20a_add_pending_sema_wait(c->g, w); | ||
660 | 746 | ||
661 | /* | 747 | /* |
662 | * If the sync_fence has already signaled then the above async_wait | 748 | * If the sync_fence has already signaled then the above async_wait |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 5e75dd9b..063a5457 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -25,6 +25,7 @@ struct priv_cmd_entry; | |||
25 | struct channel_gk20a; | 25 | struct channel_gk20a; |
26 | struct gk20a_semaphore; | 26 | struct gk20a_semaphore; |
27 | struct gk20a_fence; | 27 | struct gk20a_fence; |
28 | struct gk20a; | ||
28 | 29 | ||
29 | struct gk20a_channel_sync { | 30 | struct gk20a_channel_sync { |
30 | atomic_t refcount; | 31 | atomic_t refcount; |
@@ -102,5 +103,6 @@ struct gk20a_channel_sync { | |||
102 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); | 103 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); |
103 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); | 104 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); |
104 | bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); | 105 | bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); |
106 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); | ||
105 | 107 | ||
106 | #endif | 108 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a4cbb4b2..987dd517 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -874,6 +874,10 @@ struct gk20a { | |||
874 | */ | 874 | */ |
875 | struct gk20a_semaphore_sea *sema_sea; | 875 | struct gk20a_semaphore_sea *sema_sea; |
876 | 876 | ||
877 | /* List of pending SW semaphore waits. */ | ||
878 | struct list_head pending_sema_waits; | ||
879 | raw_spinlock_t pending_sema_waits_lock; | ||
880 | |||
877 | /* held while manipulating # of debug/profiler sessions present */ | 881 | /* held while manipulating # of debug/profiler sessions present */ |
878 | /* also prevents debug sessions from attaching until released */ | 882 | /* also prevents debug sessions from attaching until released */ |
879 | struct mutex dbg_sessions_lock; | 883 | struct mutex dbg_sessions_lock; |
diff --git a/drivers/gpu/nvgpu/nvgpu_common.c b/drivers/gpu/nvgpu/nvgpu_common.c index d50f2beb..4f0e883f 100644 --- a/drivers/gpu/nvgpu/nvgpu_common.c +++ b/drivers/gpu/nvgpu/nvgpu_common.c | |||
@@ -51,6 +51,8 @@ static void nvgpu_init_vars(struct gk20a *g) | |||
51 | g->dev->dma_parms = &g->dma_parms; | 51 | g->dev->dma_parms = &g->dma_parms; |
52 | dma_set_max_seg_size(g->dev, UINT_MAX); | 52 | dma_set_max_seg_size(g->dev, UINT_MAX); |
53 | 53 | ||
54 | INIT_LIST_HEAD(&g->pending_sema_waits); | ||
55 | raw_spin_lock_init(&g->pending_sema_waits_lock); | ||
54 | } | 56 | } |
55 | 57 | ||
56 | static void nvgpu_init_timeout(struct gk20a *g) | 58 | static void nvgpu_init_timeout(struct gk20a *g) |
@@ -219,4 +221,3 @@ const struct firmware *nvgpu_request_firmware(struct gk20a *g, | |||
219 | 221 | ||
220 | return fw; | 222 | return fw; |
221 | } | 223 | } |
222 | |||