summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c233
1 files changed, 176 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index d2d8c094..9c8911e9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -424,28 +424,52 @@ static void gk20a_channel_semaphore_launcher(
424} 424}
425#endif 425#endif
426 426
427static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, 427static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
428 u64 sema, u32 payload, bool acquire, bool wfi) 428 struct gk20a_semaphore *s, struct priv_cmd_entry *cmd,
429 int cmd_size, bool acquire, bool wfi)
429{ 430{
430 u32 off = cmd->off; 431 u32 off = cmd->off;
432 u64 va;
433
434 /*
435 * RO for acquire (since we just need to read the mem) and RW for
436 * release since we will need to write back to the semaphore memory.
437 */
438 va = acquire ? gk20a_semaphore_gpu_ro_va(s) :
439 gk20a_semaphore_gpu_rw_va(s);
440
441 /*
442 * If the op is not an acquire (so therefor a release) we should
443 * incr the underlying sema next_value.
444 */
445 if (!acquire)
446 gk20a_semaphore_incr(s);
447
431 /* semaphore_a */ 448 /* semaphore_a */
432 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); 449 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004);
433 /* offset_upper */ 450 /* offset_upper */
434 gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff); 451 gk20a_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff);
435 /* semaphore_b */ 452 /* semaphore_b */
436 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); 453 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005);
437 /* offset */ 454 /* offset */
438 gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff); 455 gk20a_mem_wr32(g, cmd->mem, off++, va & 0xffffffff);
439 /* semaphore_c */ 456
440 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
441 /* payload */
442 gk20a_mem_wr32(g, cmd->mem, off++, payload);
443 if (acquire) { 457 if (acquire) {
458 /* semaphore_c */
459 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
460 /* payload */
461 gk20a_mem_wr32(g, cmd->mem, off++,
462 gk20a_semaphore_get_value(s));
444 /* semaphore_d */ 463 /* semaphore_d */
445 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); 464 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
446 /* operation: acq_geq, switch_en */ 465 /* operation: acq_geq, switch_en */
447 gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); 466 gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
448 } else { 467 } else {
468 /* semaphore_c */
469 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
470 /* payload */
471 gk20a_mem_wr32(g, cmd->mem, off++,
472 gk20a_semaphore_get_value(s));
449 /* semaphore_d */ 473 /* semaphore_d */
450 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); 474 gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
451 /* operation: release, wfi */ 475 /* operation: release, wfi */
@@ -456,7 +480,6 @@ static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd,
456 /* ignored */ 480 /* ignored */
457 gk20a_mem_wr32(g, cmd->mem, off++, 0); 481 gk20a_mem_wr32(g, cmd->mem, off++, 0);
458 } 482 }
459 return off - cmd->off;
460} 483}
461 484
462static int gk20a_channel_semaphore_wait_syncpt( 485static int gk20a_channel_semaphore_wait_syncpt(
@@ -471,6 +494,76 @@ static int gk20a_channel_semaphore_wait_syncpt(
471 return -ENODEV; 494 return -ENODEV;
472} 495}
473 496
497/*
498 * UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18.
499 * But since there's no API for getting the underlying sync_pts we have to do
500 * some conditional compilation.
501 */
502#ifdef CONFIG_SYNC
503static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f)
504{
505#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
506 struct sync_pt *pt;
507
508 pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
509 return gk20a_sync_pt_inst_get_sema(pt);
510#else
511 return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt);
512#endif
513}
514
515/*
516 * Attempt a fast path for waiting on a sync_fence. Basically if the passed
517 * sync_fence is backed by a gk20a_semaphore then there's no reason to go
518 * through the rigmarole of setting up a separate semaphore which waits on an
519 * interrupt from the GPU and then triggers a worker thread to execute a SW
520 * based semaphore release. Instead just have the GPU wait on the same semaphore
521 * that is going to be incremented by the GPU.
522 *
523 * This function returns 2 possible values: -ENODEV or 0 on success. In the case
524 * of -ENODEV the fastpath cannot be taken due to the fence not being backed by
525 * a GPU semaphore.
526 */
527static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
528 struct sync_fence *fence,
529 struct priv_cmd_entry **wait_cmd,
530 struct gk20a_semaphore **fp_sema)
531{
532 struct gk20a_semaphore *sema;
533 int err;
534
535 if (!gk20a_is_sema_backed_sync_fence(fence))
536 return -ENODEV;
537
538 sema = sema_from_sync_fence(fence);
539
540 /*
541 * If there's no underlying sema then that means the underlying sema has
542 * already signaled.
543 */
544 if (!sema) {
545 *fp_sema = NULL;
546 return 0;
547 }
548
549 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
550 if (err)
551 return err;
552
553 gk20a_semaphore_get(sema);
554 BUG_ON(!atomic_read(&sema->value));
555 add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false);
556
557 /*
558 * Make sure that gk20a_channel_semaphore_wait_fd() can create another
559 * fence with the underlying semaphore.
560 */
561 *fp_sema = sema;
562
563 return 0;
564}
565#endif
566
474static int gk20a_channel_semaphore_wait_fd( 567static int gk20a_channel_semaphore_wait_fd(
475 struct gk20a_channel_sync *s, int fd, 568 struct gk20a_channel_sync *s, int fd,
476 struct priv_cmd_entry **entry, 569 struct priv_cmd_entry **entry,
@@ -480,69 +573,107 @@ static int gk20a_channel_semaphore_wait_fd(
480 container_of(s, struct gk20a_channel_semaphore, ops); 573 container_of(s, struct gk20a_channel_semaphore, ops);
481 struct channel_gk20a *c = sema->c; 574 struct channel_gk20a *c = sema->c;
482#ifdef CONFIG_SYNC 575#ifdef CONFIG_SYNC
576 struct gk20a_semaphore *fp_sema;
483 struct sync_fence *sync_fence; 577 struct sync_fence *sync_fence;
484 struct priv_cmd_entry *wait_cmd = NULL; 578 struct priv_cmd_entry *wait_cmd = NULL;
485 struct wait_fence_work *w; 579 struct wait_fence_work *w = NULL;
486 int written; 580 int err, ret, status;
487 int err, ret;
488 u64 va;
489 581
490 sync_fence = gk20a_sync_fence_fdget(fd); 582 sync_fence = gk20a_sync_fence_fdget(fd);
491 if (!sync_fence) 583 if (!sync_fence)
492 return -EINVAL; 584 return -EINVAL;
493 585
586 ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema);
587 if (ret == 0) {
588 if (fp_sema)
589 *fence = gk20a_fence_from_semaphore(sema->timeline,
590 fp_sema,
591 &c->semaphore_wq,
592 NULL, false);
593 else
594 /*
595 * Allocate an empty fence. It will instantly return
596 * from gk20a_fence_wait().
597 */
598 *fence = gk20a_alloc_fence(NULL, NULL, false);
599
600 sync_fence_put(sync_fence);
601 goto skip_slow_path;
602 }
603
604 /* If the fence has signaled there is no reason to wait on it. */
605#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
606 status = sync_fence->status;
607#else
608 status = atomic_read(&sync_fence->status);
609#endif
610 if (status) {
611 sync_fence_put(sync_fence);
612 goto skip_slow_path;
613 }
614
615 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
616 if (err) {
617 gk20a_err(dev_from_gk20a(c->g),
618 "not enough priv cmd buffer space");
619 sync_fence_put(sync_fence);
620 return -ENOMEM;
621 }
622
494 w = kzalloc(sizeof(*w), GFP_KERNEL); 623 w = kzalloc(sizeof(*w), GFP_KERNEL);
495 if (!w) { 624 if (!w) {
496 err = -ENOMEM; 625 err = -ENOMEM;
497 goto fail; 626 goto fail_free_cmdbuf;
498 } 627 }
628
499 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); 629 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
500 w->ch = c; 630 w->ch = c;
501 w->sema = gk20a_semaphore_alloc(sema->pool); 631 w->sema = gk20a_semaphore_alloc(c);
502 if (!w->sema) { 632 if (!w->sema) {
503 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); 633 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
504 err = -ENOMEM; 634 err = -ENOMEM;
505 goto fail; 635 goto fail_free_worker;
506 } 636 }
507 637
508 /* worker takes one reference */ 638 /* worker takes one reference */
509 gk20a_semaphore_get(w->sema); 639 gk20a_semaphore_get(w->sema);
640 gk20a_semaphore_incr(w->sema);
510 641
511 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); 642 /* GPU unblocked when the semaphore value increments. */
512 if (err) { 643 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
513 gk20a_err(dev_from_gk20a(c->g),
514 "not enough priv cmd buffer space");
515 goto fail;
516 }
517
518 va = gk20a_semaphore_gpu_va(w->sema, c->vm);
519 /* GPU unblocked when when the semaphore value becomes 1. */
520 written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false);
521 644
522 WARN_ON(written != wait_cmd->size);
523 ret = sync_fence_wait_async(sync_fence, &w->waiter); 645 ret = sync_fence_wait_async(sync_fence, &w->waiter);
524 646
525 /* 647 /*
526 * If the sync_fence has already signaled then the above async_wait 648 * If the sync_fence has already signaled then the above async_wait
527 * will never trigger. This causes the semaphore release op to never 649 * will never trigger. This causes the semaphore release op to never
528 * happen which, in turn, hangs the GPU. That's bad. So let's just 650 * happen which, in turn, hangs the GPU. That's bad. So let's just
529 * do the semaphore_release right now. 651 * do the gk20a_semaphore_release() right now.
530 */ 652 */
531 if (ret == 1) 653 if (ret == 1) {
654 sync_fence_put(sync_fence);
532 gk20a_semaphore_release(w->sema); 655 gk20a_semaphore_release(w->sema);
656 gk20a_semaphore_put(w->sema);
657 }
533 658
534 /* XXX - this fixes an actual bug, we need to hold a ref to this 659 /* XXX - this fixes an actual bug, we need to hold a ref to this
535 semaphore while the job is in flight. */ 660 semaphore while the job is in flight. */
536 *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, 661 *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
537 &c->semaphore_wq, 662 &c->semaphore_wq,
538 NULL, false); 663 NULL, false);
664
665skip_slow_path:
539 *entry = wait_cmd; 666 *entry = wait_cmd;
540 return 0; 667 return 0;
541fail: 668
669fail_free_worker:
542 if (w && w->sema) 670 if (w && w->sema)
543 gk20a_semaphore_put(w->sema); 671 gk20a_semaphore_put(w->sema);
544 kfree(w); 672 kfree(w);
545 sync_fence_put(sync_fence); 673 sync_fence_put(sync_fence);
674fail_free_cmdbuf:
675 if (wait_cmd)
676 gk20a_free_priv_cmdbuf(c, wait_cmd);
546 return err; 677 return err;
547#else 678#else
548 gk20a_err(dev_from_gk20a(c->g), 679 gk20a_err(dev_from_gk20a(c->g),
@@ -558,9 +689,7 @@ static int __gk20a_channel_semaphore_incr(
558 struct gk20a_fence **fence, 689 struct gk20a_fence **fence,
559 bool need_sync_fence) 690 bool need_sync_fence)
560{ 691{
561 u64 va;
562 int incr_cmd_size; 692 int incr_cmd_size;
563 int written;
564 struct priv_cmd_entry *incr_cmd = NULL; 693 struct priv_cmd_entry *incr_cmd = NULL;
565 struct gk20a_channel_semaphore *sp = 694 struct gk20a_channel_semaphore *sp =
566 container_of(s, struct gk20a_channel_semaphore, ops); 695 container_of(s, struct gk20a_channel_semaphore, ops);
@@ -568,7 +697,7 @@ static int __gk20a_channel_semaphore_incr(
568 struct gk20a_semaphore *semaphore; 697 struct gk20a_semaphore *semaphore;
569 int err = 0; 698 int err = 0;
570 699
571 semaphore = gk20a_semaphore_alloc(sp->pool); 700 semaphore = gk20a_semaphore_alloc(c);
572 if (!semaphore) { 701 if (!semaphore) {
573 gk20a_err(dev_from_gk20a(c->g), 702 gk20a_err(dev_from_gk20a(c->g),
574 "ran out of semaphores"); 703 "ran out of semaphores");
@@ -585,9 +714,7 @@ static int __gk20a_channel_semaphore_incr(
585 } 714 }
586 715
587 /* Release the completion semaphore. */ 716 /* Release the completion semaphore. */
588 va = gk20a_semaphore_gpu_va(semaphore, c->vm); 717 add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
589 written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd);
590 WARN_ON(written != incr_cmd_size);
591 718
592 *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, 719 *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
593 &c->semaphore_wq, 720 &c->semaphore_wq,
@@ -615,8 +742,10 @@ static int gk20a_channel_semaphore_incr(
615{ 742{
616 /* Don't put wfi cmd to this one since we're not returning 743 /* Don't put wfi cmd to this one since we're not returning
617 * a fence to user space. */ 744 * a fence to user space. */
618 return __gk20a_channel_semaphore_incr(s, false /* no wfi */, 745 return __gk20a_channel_semaphore_incr(s,
619 NULL, entry, fence, need_sync_fence); 746 false /* no wfi */,
747 NULL,
748 entry, fence, need_sync_fence);
620} 749}
621 750
622static int gk20a_channel_semaphore_incr_user( 751static int gk20a_channel_semaphore_incr_user(
@@ -679,17 +808,16 @@ static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
679 container_of(s, struct gk20a_channel_semaphore, ops); 808 container_of(s, struct gk20a_channel_semaphore, ops);
680 if (sema->timeline) 809 if (sema->timeline)
681 gk20a_sync_timeline_destroy(sema->timeline); 810 gk20a_sync_timeline_destroy(sema->timeline);
682 if (sema->pool) { 811
683 gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm); 812 /* The sema pool is cleaned up by the VM destroy. */
684 gk20a_semaphore_pool_put(sema->pool); 813 sema->pool = NULL;
685 } 814
686 kfree(sema); 815 kfree(sema);
687} 816}
688 817
689static struct gk20a_channel_sync * 818static struct gk20a_channel_sync *
690gk20a_channel_semaphore_create(struct channel_gk20a *c) 819gk20a_channel_semaphore_create(struct channel_gk20a *c)
691{ 820{
692 int err;
693 int asid = -1; 821 int asid = -1;
694 struct gk20a_channel_semaphore *sema; 822 struct gk20a_channel_semaphore *sema;
695 char pool_name[20]; 823 char pool_name[20];
@@ -706,21 +834,15 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
706 asid = c->vm->as_share->id; 834 asid = c->vm->as_share->id;
707 835
708 sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); 836 sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
709 sema->pool = gk20a_semaphore_pool_alloc(c->g, pool_name, 1024); 837 sema->pool = c->vm->sema_pool;
710 if (!sema->pool)
711 goto clean_up;
712
713 /* Map the semaphore pool to the channel vm. Map as read-write to the
714 * owner channel (all other channels should map as read only!). */
715 err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none);
716 if (err)
717 goto clean_up;
718 838
719#ifdef CONFIG_SYNC 839#ifdef CONFIG_SYNC
720 sema->timeline = gk20a_sync_timeline_create( 840 sema->timeline = gk20a_sync_timeline_create(
721 "gk20a_ch%d_as%d", c->hw_chid, asid); 841 "gk20a_ch%d_as%d", c->hw_chid, asid);
722 if (!sema->timeline) 842 if (!sema->timeline) {
723 goto clean_up; 843 gk20a_channel_semaphore_destroy(&sema->ops);
844 return NULL;
845 }
724#endif 846#endif
725 atomic_set(&sema->ops.refcount, 0); 847 atomic_set(&sema->ops.refcount, 0);
726 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; 848 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
@@ -734,9 +856,6 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
734 sema->ops.destroy = gk20a_channel_semaphore_destroy; 856 sema->ops.destroy = gk20a_channel_semaphore_destroy;
735 857
736 return &sema->ops; 858 return &sema->ops;
737clean_up:
738 gk20a_channel_semaphore_destroy(&sema->ops);
739 return NULL;
740} 859}
741 860
742void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync) 861void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)