diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 233 |
1 files changed, 176 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index d2d8c094..9c8911e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -424,28 +424,52 @@ static void gk20a_channel_semaphore_launcher( | |||
424 | } | 424 | } |
425 | #endif | 425 | #endif |
426 | 426 | ||
427 | static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, | 427 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, |
428 | u64 sema, u32 payload, bool acquire, bool wfi) | 428 | struct gk20a_semaphore *s, struct priv_cmd_entry *cmd, |
429 | int cmd_size, bool acquire, bool wfi) | ||
429 | { | 430 | { |
430 | u32 off = cmd->off; | 431 | u32 off = cmd->off; |
432 | u64 va; | ||
433 | |||
434 | /* | ||
435 | * RO for acquire (since we just need to read the mem) and RW for | ||
436 | * release since we will need to write back to the semaphore memory. | ||
437 | */ | ||
438 | va = acquire ? gk20a_semaphore_gpu_ro_va(s) : | ||
439 | gk20a_semaphore_gpu_rw_va(s); | ||
440 | |||
441 | /* | ||
442 | * If the op is not an acquire (so therefor a release) we should | ||
443 | * incr the underlying sema next_value. | ||
444 | */ | ||
445 | if (!acquire) | ||
446 | gk20a_semaphore_incr(s); | ||
447 | |||
431 | /* semaphore_a */ | 448 | /* semaphore_a */ |
432 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); | 449 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); |
433 | /* offset_upper */ | 450 | /* offset_upper */ |
434 | gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff); | 451 | gk20a_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff); |
435 | /* semaphore_b */ | 452 | /* semaphore_b */ |
436 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); | 453 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); |
437 | /* offset */ | 454 | /* offset */ |
438 | gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff); | 455 | gk20a_mem_wr32(g, cmd->mem, off++, va & 0xffffffff); |
439 | /* semaphore_c */ | 456 | |
440 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
441 | /* payload */ | ||
442 | gk20a_mem_wr32(g, cmd->mem, off++, payload); | ||
443 | if (acquire) { | 457 | if (acquire) { |
458 | /* semaphore_c */ | ||
459 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
460 | /* payload */ | ||
461 | gk20a_mem_wr32(g, cmd->mem, off++, | ||
462 | gk20a_semaphore_get_value(s)); | ||
444 | /* semaphore_d */ | 463 | /* semaphore_d */ |
445 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); | 464 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); |
446 | /* operation: acq_geq, switch_en */ | 465 | /* operation: acq_geq, switch_en */ |
447 | gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); | 466 | gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); |
448 | } else { | 467 | } else { |
468 | /* semaphore_c */ | ||
469 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
470 | /* payload */ | ||
471 | gk20a_mem_wr32(g, cmd->mem, off++, | ||
472 | gk20a_semaphore_get_value(s)); | ||
449 | /* semaphore_d */ | 473 | /* semaphore_d */ |
450 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); | 474 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); |
451 | /* operation: release, wfi */ | 475 | /* operation: release, wfi */ |
@@ -456,7 +480,6 @@ static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, | |||
456 | /* ignored */ | 480 | /* ignored */ |
457 | gk20a_mem_wr32(g, cmd->mem, off++, 0); | 481 | gk20a_mem_wr32(g, cmd->mem, off++, 0); |
458 | } | 482 | } |
459 | return off - cmd->off; | ||
460 | } | 483 | } |
461 | 484 | ||
462 | static int gk20a_channel_semaphore_wait_syncpt( | 485 | static int gk20a_channel_semaphore_wait_syncpt( |
@@ -471,6 +494,76 @@ static int gk20a_channel_semaphore_wait_syncpt( | |||
471 | return -ENODEV; | 494 | return -ENODEV; |
472 | } | 495 | } |
473 | 496 | ||
497 | /* | ||
498 | * UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18. | ||
499 | * But since there's no API for getting the underlying sync_pts we have to do | ||
500 | * some conditional compilation. | ||
501 | */ | ||
502 | #ifdef CONFIG_SYNC | ||
503 | static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f) | ||
504 | { | ||
505 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
506 | struct sync_pt *pt; | ||
507 | |||
508 | pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); | ||
509 | return gk20a_sync_pt_inst_get_sema(pt); | ||
510 | #else | ||
511 | return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt); | ||
512 | #endif | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Attempt a fast path for waiting on a sync_fence. Basically if the passed | ||
517 | * sync_fence is backed by a gk20a_semaphore then there's no reason to go | ||
518 | * through the rigmarole of setting up a separate semaphore which waits on an | ||
519 | * interrupt from the GPU and then triggers a worker thread to execute a SW | ||
520 | * based semaphore release. Instead just have the GPU wait on the same semaphore | ||
521 | * that is going to be incremented by the GPU. | ||
522 | * | ||
523 | * This function returns 2 possible values: -ENODEV or 0 on success. In the case | ||
524 | * of -ENODEV the fastpath cannot be taken due to the fence not being backed by | ||
525 | * a GPU semaphore. | ||
526 | */ | ||
527 | static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | ||
528 | struct sync_fence *fence, | ||
529 | struct priv_cmd_entry **wait_cmd, | ||
530 | struct gk20a_semaphore **fp_sema) | ||
531 | { | ||
532 | struct gk20a_semaphore *sema; | ||
533 | int err; | ||
534 | |||
535 | if (!gk20a_is_sema_backed_sync_fence(fence)) | ||
536 | return -ENODEV; | ||
537 | |||
538 | sema = sema_from_sync_fence(fence); | ||
539 | |||
540 | /* | ||
541 | * If there's no underlying sema then that means the underlying sema has | ||
542 | * already signaled. | ||
543 | */ | ||
544 | if (!sema) { | ||
545 | *fp_sema = NULL; | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); | ||
550 | if (err) | ||
551 | return err; | ||
552 | |||
553 | gk20a_semaphore_get(sema); | ||
554 | BUG_ON(!atomic_read(&sema->value)); | ||
555 | add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false); | ||
556 | |||
557 | /* | ||
558 | * Make sure that gk20a_channel_semaphore_wait_fd() can create another | ||
559 | * fence with the underlying semaphore. | ||
560 | */ | ||
561 | *fp_sema = sema; | ||
562 | |||
563 | return 0; | ||
564 | } | ||
565 | #endif | ||
566 | |||
474 | static int gk20a_channel_semaphore_wait_fd( | 567 | static int gk20a_channel_semaphore_wait_fd( |
475 | struct gk20a_channel_sync *s, int fd, | 568 | struct gk20a_channel_sync *s, int fd, |
476 | struct priv_cmd_entry **entry, | 569 | struct priv_cmd_entry **entry, |
@@ -480,69 +573,107 @@ static int gk20a_channel_semaphore_wait_fd( | |||
480 | container_of(s, struct gk20a_channel_semaphore, ops); | 573 | container_of(s, struct gk20a_channel_semaphore, ops); |
481 | struct channel_gk20a *c = sema->c; | 574 | struct channel_gk20a *c = sema->c; |
482 | #ifdef CONFIG_SYNC | 575 | #ifdef CONFIG_SYNC |
576 | struct gk20a_semaphore *fp_sema; | ||
483 | struct sync_fence *sync_fence; | 577 | struct sync_fence *sync_fence; |
484 | struct priv_cmd_entry *wait_cmd = NULL; | 578 | struct priv_cmd_entry *wait_cmd = NULL; |
485 | struct wait_fence_work *w; | 579 | struct wait_fence_work *w = NULL; |
486 | int written; | 580 | int err, ret, status; |
487 | int err, ret; | ||
488 | u64 va; | ||
489 | 581 | ||
490 | sync_fence = gk20a_sync_fence_fdget(fd); | 582 | sync_fence = gk20a_sync_fence_fdget(fd); |
491 | if (!sync_fence) | 583 | if (!sync_fence) |
492 | return -EINVAL; | 584 | return -EINVAL; |
493 | 585 | ||
586 | ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema); | ||
587 | if (ret == 0) { | ||
588 | if (fp_sema) | ||
589 | *fence = gk20a_fence_from_semaphore(sema->timeline, | ||
590 | fp_sema, | ||
591 | &c->semaphore_wq, | ||
592 | NULL, false); | ||
593 | else | ||
594 | /* | ||
595 | * Allocate an empty fence. It will instantly return | ||
596 | * from gk20a_fence_wait(). | ||
597 | */ | ||
598 | *fence = gk20a_alloc_fence(NULL, NULL, false); | ||
599 | |||
600 | sync_fence_put(sync_fence); | ||
601 | goto skip_slow_path; | ||
602 | } | ||
603 | |||
604 | /* If the fence has signaled there is no reason to wait on it. */ | ||
605 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
606 | status = sync_fence->status; | ||
607 | #else | ||
608 | status = atomic_read(&sync_fence->status); | ||
609 | #endif | ||
610 | if (status) { | ||
611 | sync_fence_put(sync_fence); | ||
612 | goto skip_slow_path; | ||
613 | } | ||
614 | |||
615 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); | ||
616 | if (err) { | ||
617 | gk20a_err(dev_from_gk20a(c->g), | ||
618 | "not enough priv cmd buffer space"); | ||
619 | sync_fence_put(sync_fence); | ||
620 | return -ENOMEM; | ||
621 | } | ||
622 | |||
494 | w = kzalloc(sizeof(*w), GFP_KERNEL); | 623 | w = kzalloc(sizeof(*w), GFP_KERNEL); |
495 | if (!w) { | 624 | if (!w) { |
496 | err = -ENOMEM; | 625 | err = -ENOMEM; |
497 | goto fail; | 626 | goto fail_free_cmdbuf; |
498 | } | 627 | } |
628 | |||
499 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); | 629 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); |
500 | w->ch = c; | 630 | w->ch = c; |
501 | w->sema = gk20a_semaphore_alloc(sema->pool); | 631 | w->sema = gk20a_semaphore_alloc(c); |
502 | if (!w->sema) { | 632 | if (!w->sema) { |
503 | gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); | 633 | gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); |
504 | err = -ENOMEM; | 634 | err = -ENOMEM; |
505 | goto fail; | 635 | goto fail_free_worker; |
506 | } | 636 | } |
507 | 637 | ||
508 | /* worker takes one reference */ | 638 | /* worker takes one reference */ |
509 | gk20a_semaphore_get(w->sema); | 639 | gk20a_semaphore_get(w->sema); |
640 | gk20a_semaphore_incr(w->sema); | ||
510 | 641 | ||
511 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); | 642 | /* GPU unblocked when the semaphore value increments. */ |
512 | if (err) { | 643 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); |
513 | gk20a_err(dev_from_gk20a(c->g), | ||
514 | "not enough priv cmd buffer space"); | ||
515 | goto fail; | ||
516 | } | ||
517 | |||
518 | va = gk20a_semaphore_gpu_va(w->sema, c->vm); | ||
519 | /* GPU unblocked when when the semaphore value becomes 1. */ | ||
520 | written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false); | ||
521 | 644 | ||
522 | WARN_ON(written != wait_cmd->size); | ||
523 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | 645 | ret = sync_fence_wait_async(sync_fence, &w->waiter); |
524 | 646 | ||
525 | /* | 647 | /* |
526 | * If the sync_fence has already signaled then the above async_wait | 648 | * If the sync_fence has already signaled then the above async_wait |
527 | * will never trigger. This causes the semaphore release op to never | 649 | * will never trigger. This causes the semaphore release op to never |
528 | * happen which, in turn, hangs the GPU. That's bad. So let's just | 650 | * happen which, in turn, hangs the GPU. That's bad. So let's just |
529 | * do the semaphore_release right now. | 651 | * do the gk20a_semaphore_release() right now. |
530 | */ | 652 | */ |
531 | if (ret == 1) | 653 | if (ret == 1) { |
654 | sync_fence_put(sync_fence); | ||
532 | gk20a_semaphore_release(w->sema); | 655 | gk20a_semaphore_release(w->sema); |
656 | gk20a_semaphore_put(w->sema); | ||
657 | } | ||
533 | 658 | ||
534 | /* XXX - this fixes an actual bug, we need to hold a ref to this | 659 | /* XXX - this fixes an actual bug, we need to hold a ref to this |
535 | semaphore while the job is in flight. */ | 660 | semaphore while the job is in flight. */ |
536 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, | 661 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, |
537 | &c->semaphore_wq, | 662 | &c->semaphore_wq, |
538 | NULL, false); | 663 | NULL, false); |
664 | |||
665 | skip_slow_path: | ||
539 | *entry = wait_cmd; | 666 | *entry = wait_cmd; |
540 | return 0; | 667 | return 0; |
541 | fail: | 668 | |
669 | fail_free_worker: | ||
542 | if (w && w->sema) | 670 | if (w && w->sema) |
543 | gk20a_semaphore_put(w->sema); | 671 | gk20a_semaphore_put(w->sema); |
544 | kfree(w); | 672 | kfree(w); |
545 | sync_fence_put(sync_fence); | 673 | sync_fence_put(sync_fence); |
674 | fail_free_cmdbuf: | ||
675 | if (wait_cmd) | ||
676 | gk20a_free_priv_cmdbuf(c, wait_cmd); | ||
546 | return err; | 677 | return err; |
547 | #else | 678 | #else |
548 | gk20a_err(dev_from_gk20a(c->g), | 679 | gk20a_err(dev_from_gk20a(c->g), |
@@ -558,9 +689,7 @@ static int __gk20a_channel_semaphore_incr( | |||
558 | struct gk20a_fence **fence, | 689 | struct gk20a_fence **fence, |
559 | bool need_sync_fence) | 690 | bool need_sync_fence) |
560 | { | 691 | { |
561 | u64 va; | ||
562 | int incr_cmd_size; | 692 | int incr_cmd_size; |
563 | int written; | ||
564 | struct priv_cmd_entry *incr_cmd = NULL; | 693 | struct priv_cmd_entry *incr_cmd = NULL; |
565 | struct gk20a_channel_semaphore *sp = | 694 | struct gk20a_channel_semaphore *sp = |
566 | container_of(s, struct gk20a_channel_semaphore, ops); | 695 | container_of(s, struct gk20a_channel_semaphore, ops); |
@@ -568,7 +697,7 @@ static int __gk20a_channel_semaphore_incr( | |||
568 | struct gk20a_semaphore *semaphore; | 697 | struct gk20a_semaphore *semaphore; |
569 | int err = 0; | 698 | int err = 0; |
570 | 699 | ||
571 | semaphore = gk20a_semaphore_alloc(sp->pool); | 700 | semaphore = gk20a_semaphore_alloc(c); |
572 | if (!semaphore) { | 701 | if (!semaphore) { |
573 | gk20a_err(dev_from_gk20a(c->g), | 702 | gk20a_err(dev_from_gk20a(c->g), |
574 | "ran out of semaphores"); | 703 | "ran out of semaphores"); |
@@ -585,9 +714,7 @@ static int __gk20a_channel_semaphore_incr( | |||
585 | } | 714 | } |
586 | 715 | ||
587 | /* Release the completion semaphore. */ | 716 | /* Release the completion semaphore. */ |
588 | va = gk20a_semaphore_gpu_va(semaphore, c->vm); | 717 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); |
589 | written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd); | ||
590 | WARN_ON(written != incr_cmd_size); | ||
591 | 718 | ||
592 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, | 719 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, |
593 | &c->semaphore_wq, | 720 | &c->semaphore_wq, |
@@ -615,8 +742,10 @@ static int gk20a_channel_semaphore_incr( | |||
615 | { | 742 | { |
616 | /* Don't put wfi cmd to this one since we're not returning | 743 | /* Don't put wfi cmd to this one since we're not returning |
617 | * a fence to user space. */ | 744 | * a fence to user space. */ |
618 | return __gk20a_channel_semaphore_incr(s, false /* no wfi */, | 745 | return __gk20a_channel_semaphore_incr(s, |
619 | NULL, entry, fence, need_sync_fence); | 746 | false /* no wfi */, |
747 | NULL, | ||
748 | entry, fence, need_sync_fence); | ||
620 | } | 749 | } |
621 | 750 | ||
622 | static int gk20a_channel_semaphore_incr_user( | 751 | static int gk20a_channel_semaphore_incr_user( |
@@ -679,17 +808,16 @@ static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) | |||
679 | container_of(s, struct gk20a_channel_semaphore, ops); | 808 | container_of(s, struct gk20a_channel_semaphore, ops); |
680 | if (sema->timeline) | 809 | if (sema->timeline) |
681 | gk20a_sync_timeline_destroy(sema->timeline); | 810 | gk20a_sync_timeline_destroy(sema->timeline); |
682 | if (sema->pool) { | 811 | |
683 | gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm); | 812 | /* The sema pool is cleaned up by the VM destroy. */ |
684 | gk20a_semaphore_pool_put(sema->pool); | 813 | sema->pool = NULL; |
685 | } | 814 | |
686 | kfree(sema); | 815 | kfree(sema); |
687 | } | 816 | } |
688 | 817 | ||
689 | static struct gk20a_channel_sync * | 818 | static struct gk20a_channel_sync * |
690 | gk20a_channel_semaphore_create(struct channel_gk20a *c) | 819 | gk20a_channel_semaphore_create(struct channel_gk20a *c) |
691 | { | 820 | { |
692 | int err; | ||
693 | int asid = -1; | 821 | int asid = -1; |
694 | struct gk20a_channel_semaphore *sema; | 822 | struct gk20a_channel_semaphore *sema; |
695 | char pool_name[20]; | 823 | char pool_name[20]; |
@@ -706,21 +834,15 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
706 | asid = c->vm->as_share->id; | 834 | asid = c->vm->as_share->id; |
707 | 835 | ||
708 | sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); | 836 | sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); |
709 | sema->pool = gk20a_semaphore_pool_alloc(c->g, pool_name, 1024); | 837 | sema->pool = c->vm->sema_pool; |
710 | if (!sema->pool) | ||
711 | goto clean_up; | ||
712 | |||
713 | /* Map the semaphore pool to the channel vm. Map as read-write to the | ||
714 | * owner channel (all other channels should map as read only!). */ | ||
715 | err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none); | ||
716 | if (err) | ||
717 | goto clean_up; | ||
718 | 838 | ||
719 | #ifdef CONFIG_SYNC | 839 | #ifdef CONFIG_SYNC |
720 | sema->timeline = gk20a_sync_timeline_create( | 840 | sema->timeline = gk20a_sync_timeline_create( |
721 | "gk20a_ch%d_as%d", c->hw_chid, asid); | 841 | "gk20a_ch%d_as%d", c->hw_chid, asid); |
722 | if (!sema->timeline) | 842 | if (!sema->timeline) { |
723 | goto clean_up; | 843 | gk20a_channel_semaphore_destroy(&sema->ops); |
844 | return NULL; | ||
845 | } | ||
724 | #endif | 846 | #endif |
725 | atomic_set(&sema->ops.refcount, 0); | 847 | atomic_set(&sema->ops.refcount, 0); |
726 | sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; | 848 | sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; |
@@ -734,9 +856,6 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
734 | sema->ops.destroy = gk20a_channel_semaphore_destroy; | 856 | sema->ops.destroy = gk20a_channel_semaphore_destroy; |
735 | 857 | ||
736 | return &sema->ops; | 858 | return &sema->ops; |
737 | clean_up: | ||
738 | gk20a_channel_semaphore_destroy(&sema->ops); | ||
739 | return NULL; | ||
740 | } | 859 | } |
741 | 860 | ||
742 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync) | 861 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync) |