diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 233 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 82 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 435 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | 303 |
9 files changed, 847 insertions, 225 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 3f9b0432..6c7ff551 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1002,6 +1002,9 @@ unbind: | |||
1002 | 1002 | ||
1003 | mutex_unlock(&g->dbg_sessions_lock); | 1003 | mutex_unlock(&g->dbg_sessions_lock); |
1004 | 1004 | ||
1005 | /* Make sure that when the ch is re-opened it will get a new HW sema. */ | ||
1006 | ch->hw_sema = NULL; | ||
1007 | |||
1005 | /* make sure we catch accesses of unopened channels in case | 1008 | /* make sure we catch accesses of unopened channels in case |
1006 | * there's non-refcounted channel pointers hanging around */ | 1009 | * there's non-refcounted channel pointers hanging around */ |
1007 | ch->g = NULL; | 1010 | ch->g = NULL; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index acd272b4..c5a1bd24 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -108,6 +108,8 @@ struct channel_gk20a { | |||
108 | atomic_t ref_count; | 108 | atomic_t ref_count; |
109 | wait_queue_head_t ref_count_dec_wq; | 109 | wait_queue_head_t ref_count_dec_wq; |
110 | 110 | ||
111 | struct gk20a_semaphore_int *hw_sema; | ||
112 | |||
111 | int hw_chid; | 113 | int hw_chid; |
112 | bool wdt_enabled; | 114 | bool wdt_enabled; |
113 | bool bound; | 115 | bool bound; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index d2d8c094..9c8911e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -424,28 +424,52 @@ static void gk20a_channel_semaphore_launcher( | |||
424 | } | 424 | } |
425 | #endif | 425 | #endif |
426 | 426 | ||
427 | static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, | 427 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, |
428 | u64 sema, u32 payload, bool acquire, bool wfi) | 428 | struct gk20a_semaphore *s, struct priv_cmd_entry *cmd, |
429 | int cmd_size, bool acquire, bool wfi) | ||
429 | { | 430 | { |
430 | u32 off = cmd->off; | 431 | u32 off = cmd->off; |
432 | u64 va; | ||
433 | |||
434 | /* | ||
435 | * RO for acquire (since we just need to read the mem) and RW for | ||
436 | * release since we will need to write back to the semaphore memory. | ||
437 | */ | ||
438 | va = acquire ? gk20a_semaphore_gpu_ro_va(s) : | ||
439 | gk20a_semaphore_gpu_rw_va(s); | ||
440 | |||
441 | /* | ||
442 | * If the op is not an acquire (so therefor a release) we should | ||
443 | * incr the underlying sema next_value. | ||
444 | */ | ||
445 | if (!acquire) | ||
446 | gk20a_semaphore_incr(s); | ||
447 | |||
431 | /* semaphore_a */ | 448 | /* semaphore_a */ |
432 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); | 449 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); |
433 | /* offset_upper */ | 450 | /* offset_upper */ |
434 | gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff); | 451 | gk20a_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff); |
435 | /* semaphore_b */ | 452 | /* semaphore_b */ |
436 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); | 453 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); |
437 | /* offset */ | 454 | /* offset */ |
438 | gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff); | 455 | gk20a_mem_wr32(g, cmd->mem, off++, va & 0xffffffff); |
439 | /* semaphore_c */ | 456 | |
440 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
441 | /* payload */ | ||
442 | gk20a_mem_wr32(g, cmd->mem, off++, payload); | ||
443 | if (acquire) { | 457 | if (acquire) { |
458 | /* semaphore_c */ | ||
459 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
460 | /* payload */ | ||
461 | gk20a_mem_wr32(g, cmd->mem, off++, | ||
462 | gk20a_semaphore_get_value(s)); | ||
444 | /* semaphore_d */ | 463 | /* semaphore_d */ |
445 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); | 464 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); |
446 | /* operation: acq_geq, switch_en */ | 465 | /* operation: acq_geq, switch_en */ |
447 | gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); | 466 | gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); |
448 | } else { | 467 | } else { |
468 | /* semaphore_c */ | ||
469 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
470 | /* payload */ | ||
471 | gk20a_mem_wr32(g, cmd->mem, off++, | ||
472 | gk20a_semaphore_get_value(s)); | ||
449 | /* semaphore_d */ | 473 | /* semaphore_d */ |
450 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); | 474 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); |
451 | /* operation: release, wfi */ | 475 | /* operation: release, wfi */ |
@@ -456,7 +480,6 @@ static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, | |||
456 | /* ignored */ | 480 | /* ignored */ |
457 | gk20a_mem_wr32(g, cmd->mem, off++, 0); | 481 | gk20a_mem_wr32(g, cmd->mem, off++, 0); |
458 | } | 482 | } |
459 | return off - cmd->off; | ||
460 | } | 483 | } |
461 | 484 | ||
462 | static int gk20a_channel_semaphore_wait_syncpt( | 485 | static int gk20a_channel_semaphore_wait_syncpt( |
@@ -471,6 +494,76 @@ static int gk20a_channel_semaphore_wait_syncpt( | |||
471 | return -ENODEV; | 494 | return -ENODEV; |
472 | } | 495 | } |
473 | 496 | ||
497 | /* | ||
498 | * UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18. | ||
499 | * But since there's no API for getting the underlying sync_pts we have to do | ||
500 | * some conditional compilation. | ||
501 | */ | ||
502 | #ifdef CONFIG_SYNC | ||
503 | static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f) | ||
504 | { | ||
505 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
506 | struct sync_pt *pt; | ||
507 | |||
508 | pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); | ||
509 | return gk20a_sync_pt_inst_get_sema(pt); | ||
510 | #else | ||
511 | return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt); | ||
512 | #endif | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Attempt a fast path for waiting on a sync_fence. Basically if the passed | ||
517 | * sync_fence is backed by a gk20a_semaphore then there's no reason to go | ||
518 | * through the rigmarole of setting up a separate semaphore which waits on an | ||
519 | * interrupt from the GPU and then triggers a worker thread to execute a SW | ||
520 | * based semaphore release. Instead just have the GPU wait on the same semaphore | ||
521 | * that is going to be incremented by the GPU. | ||
522 | * | ||
523 | * This function returns 2 possible values: -ENODEV or 0 on success. In the case | ||
524 | * of -ENODEV the fastpath cannot be taken due to the fence not being backed by | ||
525 | * a GPU semaphore. | ||
526 | */ | ||
527 | static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | ||
528 | struct sync_fence *fence, | ||
529 | struct priv_cmd_entry **wait_cmd, | ||
530 | struct gk20a_semaphore **fp_sema) | ||
531 | { | ||
532 | struct gk20a_semaphore *sema; | ||
533 | int err; | ||
534 | |||
535 | if (!gk20a_is_sema_backed_sync_fence(fence)) | ||
536 | return -ENODEV; | ||
537 | |||
538 | sema = sema_from_sync_fence(fence); | ||
539 | |||
540 | /* | ||
541 | * If there's no underlying sema then that means the underlying sema has | ||
542 | * already signaled. | ||
543 | */ | ||
544 | if (!sema) { | ||
545 | *fp_sema = NULL; | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); | ||
550 | if (err) | ||
551 | return err; | ||
552 | |||
553 | gk20a_semaphore_get(sema); | ||
554 | BUG_ON(!atomic_read(&sema->value)); | ||
555 | add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false); | ||
556 | |||
557 | /* | ||
558 | * Make sure that gk20a_channel_semaphore_wait_fd() can create another | ||
559 | * fence with the underlying semaphore. | ||
560 | */ | ||
561 | *fp_sema = sema; | ||
562 | |||
563 | return 0; | ||
564 | } | ||
565 | #endif | ||
566 | |||
474 | static int gk20a_channel_semaphore_wait_fd( | 567 | static int gk20a_channel_semaphore_wait_fd( |
475 | struct gk20a_channel_sync *s, int fd, | 568 | struct gk20a_channel_sync *s, int fd, |
476 | struct priv_cmd_entry **entry, | 569 | struct priv_cmd_entry **entry, |
@@ -480,69 +573,107 @@ static int gk20a_channel_semaphore_wait_fd( | |||
480 | container_of(s, struct gk20a_channel_semaphore, ops); | 573 | container_of(s, struct gk20a_channel_semaphore, ops); |
481 | struct channel_gk20a *c = sema->c; | 574 | struct channel_gk20a *c = sema->c; |
482 | #ifdef CONFIG_SYNC | 575 | #ifdef CONFIG_SYNC |
576 | struct gk20a_semaphore *fp_sema; | ||
483 | struct sync_fence *sync_fence; | 577 | struct sync_fence *sync_fence; |
484 | struct priv_cmd_entry *wait_cmd = NULL; | 578 | struct priv_cmd_entry *wait_cmd = NULL; |
485 | struct wait_fence_work *w; | 579 | struct wait_fence_work *w = NULL; |
486 | int written; | 580 | int err, ret, status; |
487 | int err, ret; | ||
488 | u64 va; | ||
489 | 581 | ||
490 | sync_fence = gk20a_sync_fence_fdget(fd); | 582 | sync_fence = gk20a_sync_fence_fdget(fd); |
491 | if (!sync_fence) | 583 | if (!sync_fence) |
492 | return -EINVAL; | 584 | return -EINVAL; |
493 | 585 | ||
586 | ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema); | ||
587 | if (ret == 0) { | ||
588 | if (fp_sema) | ||
589 | *fence = gk20a_fence_from_semaphore(sema->timeline, | ||
590 | fp_sema, | ||
591 | &c->semaphore_wq, | ||
592 | NULL, false); | ||
593 | else | ||
594 | /* | ||
595 | * Allocate an empty fence. It will instantly return | ||
596 | * from gk20a_fence_wait(). | ||
597 | */ | ||
598 | *fence = gk20a_alloc_fence(NULL, NULL, false); | ||
599 | |||
600 | sync_fence_put(sync_fence); | ||
601 | goto skip_slow_path; | ||
602 | } | ||
603 | |||
604 | /* If the fence has signaled there is no reason to wait on it. */ | ||
605 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
606 | status = sync_fence->status; | ||
607 | #else | ||
608 | status = atomic_read(&sync_fence->status); | ||
609 | #endif | ||
610 | if (status) { | ||
611 | sync_fence_put(sync_fence); | ||
612 | goto skip_slow_path; | ||
613 | } | ||
614 | |||
615 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); | ||
616 | if (err) { | ||
617 | gk20a_err(dev_from_gk20a(c->g), | ||
618 | "not enough priv cmd buffer space"); | ||
619 | sync_fence_put(sync_fence); | ||
620 | return -ENOMEM; | ||
621 | } | ||
622 | |||
494 | w = kzalloc(sizeof(*w), GFP_KERNEL); | 623 | w = kzalloc(sizeof(*w), GFP_KERNEL); |
495 | if (!w) { | 624 | if (!w) { |
496 | err = -ENOMEM; | 625 | err = -ENOMEM; |
497 | goto fail; | 626 | goto fail_free_cmdbuf; |
498 | } | 627 | } |
628 | |||
499 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); | 629 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); |
500 | w->ch = c; | 630 | w->ch = c; |
501 | w->sema = gk20a_semaphore_alloc(sema->pool); | 631 | w->sema = gk20a_semaphore_alloc(c); |
502 | if (!w->sema) { | 632 | if (!w->sema) { |
503 | gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); | 633 | gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); |
504 | err = -ENOMEM; | 634 | err = -ENOMEM; |
505 | goto fail; | 635 | goto fail_free_worker; |
506 | } | 636 | } |
507 | 637 | ||
508 | /* worker takes one reference */ | 638 | /* worker takes one reference */ |
509 | gk20a_semaphore_get(w->sema); | 639 | gk20a_semaphore_get(w->sema); |
640 | gk20a_semaphore_incr(w->sema); | ||
510 | 641 | ||
511 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); | 642 | /* GPU unblocked when the semaphore value increments. */ |
512 | if (err) { | 643 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); |
513 | gk20a_err(dev_from_gk20a(c->g), | ||
514 | "not enough priv cmd buffer space"); | ||
515 | goto fail; | ||
516 | } | ||
517 | |||
518 | va = gk20a_semaphore_gpu_va(w->sema, c->vm); | ||
519 | /* GPU unblocked when when the semaphore value becomes 1. */ | ||
520 | written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false); | ||
521 | 644 | ||
522 | WARN_ON(written != wait_cmd->size); | ||
523 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | 645 | ret = sync_fence_wait_async(sync_fence, &w->waiter); |
524 | 646 | ||
525 | /* | 647 | /* |
526 | * If the sync_fence has already signaled then the above async_wait | 648 | * If the sync_fence has already signaled then the above async_wait |
527 | * will never trigger. This causes the semaphore release op to never | 649 | * will never trigger. This causes the semaphore release op to never |
528 | * happen which, in turn, hangs the GPU. That's bad. So let's just | 650 | * happen which, in turn, hangs the GPU. That's bad. So let's just |
529 | * do the semaphore_release right now. | 651 | * do the gk20a_semaphore_release() right now. |
530 | */ | 652 | */ |
531 | if (ret == 1) | 653 | if (ret == 1) { |
654 | sync_fence_put(sync_fence); | ||
532 | gk20a_semaphore_release(w->sema); | 655 | gk20a_semaphore_release(w->sema); |
656 | gk20a_semaphore_put(w->sema); | ||
657 | } | ||
533 | 658 | ||
534 | /* XXX - this fixes an actual bug, we need to hold a ref to this | 659 | /* XXX - this fixes an actual bug, we need to hold a ref to this |
535 | semaphore while the job is in flight. */ | 660 | semaphore while the job is in flight. */ |
536 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, | 661 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, |
537 | &c->semaphore_wq, | 662 | &c->semaphore_wq, |
538 | NULL, false); | 663 | NULL, false); |
664 | |||
665 | skip_slow_path: | ||
539 | *entry = wait_cmd; | 666 | *entry = wait_cmd; |
540 | return 0; | 667 | return 0; |
541 | fail: | 668 | |
669 | fail_free_worker: | ||
542 | if (w && w->sema) | 670 | if (w && w->sema) |
543 | gk20a_semaphore_put(w->sema); | 671 | gk20a_semaphore_put(w->sema); |
544 | kfree(w); | 672 | kfree(w); |
545 | sync_fence_put(sync_fence); | 673 | sync_fence_put(sync_fence); |
674 | fail_free_cmdbuf: | ||
675 | if (wait_cmd) | ||
676 | gk20a_free_priv_cmdbuf(c, wait_cmd); | ||
546 | return err; | 677 | return err; |
547 | #else | 678 | #else |
548 | gk20a_err(dev_from_gk20a(c->g), | 679 | gk20a_err(dev_from_gk20a(c->g), |
@@ -558,9 +689,7 @@ static int __gk20a_channel_semaphore_incr( | |||
558 | struct gk20a_fence **fence, | 689 | struct gk20a_fence **fence, |
559 | bool need_sync_fence) | 690 | bool need_sync_fence) |
560 | { | 691 | { |
561 | u64 va; | ||
562 | int incr_cmd_size; | 692 | int incr_cmd_size; |
563 | int written; | ||
564 | struct priv_cmd_entry *incr_cmd = NULL; | 693 | struct priv_cmd_entry *incr_cmd = NULL; |
565 | struct gk20a_channel_semaphore *sp = | 694 | struct gk20a_channel_semaphore *sp = |
566 | container_of(s, struct gk20a_channel_semaphore, ops); | 695 | container_of(s, struct gk20a_channel_semaphore, ops); |
@@ -568,7 +697,7 @@ static int __gk20a_channel_semaphore_incr( | |||
568 | struct gk20a_semaphore *semaphore; | 697 | struct gk20a_semaphore *semaphore; |
569 | int err = 0; | 698 | int err = 0; |
570 | 699 | ||
571 | semaphore = gk20a_semaphore_alloc(sp->pool); | 700 | semaphore = gk20a_semaphore_alloc(c); |
572 | if (!semaphore) { | 701 | if (!semaphore) { |
573 | gk20a_err(dev_from_gk20a(c->g), | 702 | gk20a_err(dev_from_gk20a(c->g), |
574 | "ran out of semaphores"); | 703 | "ran out of semaphores"); |
@@ -585,9 +714,7 @@ static int __gk20a_channel_semaphore_incr( | |||
585 | } | 714 | } |
586 | 715 | ||
587 | /* Release the completion semaphore. */ | 716 | /* Release the completion semaphore. */ |
588 | va = gk20a_semaphore_gpu_va(semaphore, c->vm); | 717 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); |
589 | written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd); | ||
590 | WARN_ON(written != incr_cmd_size); | ||
591 | 718 | ||
592 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, | 719 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, |
593 | &c->semaphore_wq, | 720 | &c->semaphore_wq, |
@@ -615,8 +742,10 @@ static int gk20a_channel_semaphore_incr( | |||
615 | { | 742 | { |
616 | /* Don't put wfi cmd to this one since we're not returning | 743 | /* Don't put wfi cmd to this one since we're not returning |
617 | * a fence to user space. */ | 744 | * a fence to user space. */ |
618 | return __gk20a_channel_semaphore_incr(s, false /* no wfi */, | 745 | return __gk20a_channel_semaphore_incr(s, |
619 | NULL, entry, fence, need_sync_fence); | 746 | false /* no wfi */, |
747 | NULL, | ||
748 | entry, fence, need_sync_fence); | ||
620 | } | 749 | } |
621 | 750 | ||
622 | static int gk20a_channel_semaphore_incr_user( | 751 | static int gk20a_channel_semaphore_incr_user( |
@@ -679,17 +808,16 @@ static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) | |||
679 | container_of(s, struct gk20a_channel_semaphore, ops); | 808 | container_of(s, struct gk20a_channel_semaphore, ops); |
680 | if (sema->timeline) | 809 | if (sema->timeline) |
681 | gk20a_sync_timeline_destroy(sema->timeline); | 810 | gk20a_sync_timeline_destroy(sema->timeline); |
682 | if (sema->pool) { | 811 | |
683 | gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm); | 812 | /* The sema pool is cleaned up by the VM destroy. */ |
684 | gk20a_semaphore_pool_put(sema->pool); | 813 | sema->pool = NULL; |
685 | } | 814 | |
686 | kfree(sema); | 815 | kfree(sema); |
687 | } | 816 | } |
688 | 817 | ||
689 | static struct gk20a_channel_sync * | 818 | static struct gk20a_channel_sync * |
690 | gk20a_channel_semaphore_create(struct channel_gk20a *c) | 819 | gk20a_channel_semaphore_create(struct channel_gk20a *c) |
691 | { | 820 | { |
692 | int err; | ||
693 | int asid = -1; | 821 | int asid = -1; |
694 | struct gk20a_channel_semaphore *sema; | 822 | struct gk20a_channel_semaphore *sema; |
695 | char pool_name[20]; | 823 | char pool_name[20]; |
@@ -706,21 +834,15 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
706 | asid = c->vm->as_share->id; | 834 | asid = c->vm->as_share->id; |
707 | 835 | ||
708 | sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); | 836 | sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); |
709 | sema->pool = gk20a_semaphore_pool_alloc(c->g, pool_name, 1024); | 837 | sema->pool = c->vm->sema_pool; |
710 | if (!sema->pool) | ||
711 | goto clean_up; | ||
712 | |||
713 | /* Map the semaphore pool to the channel vm. Map as read-write to the | ||
714 | * owner channel (all other channels should map as read only!). */ | ||
715 | err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none); | ||
716 | if (err) | ||
717 | goto clean_up; | ||
718 | 838 | ||
719 | #ifdef CONFIG_SYNC | 839 | #ifdef CONFIG_SYNC |
720 | sema->timeline = gk20a_sync_timeline_create( | 840 | sema->timeline = gk20a_sync_timeline_create( |
721 | "gk20a_ch%d_as%d", c->hw_chid, asid); | 841 | "gk20a_ch%d_as%d", c->hw_chid, asid); |
722 | if (!sema->timeline) | 842 | if (!sema->timeline) { |
723 | goto clean_up; | 843 | gk20a_channel_semaphore_destroy(&sema->ops); |
844 | return NULL; | ||
845 | } | ||
724 | #endif | 846 | #endif |
725 | atomic_set(&sema->ops.refcount, 0); | 847 | atomic_set(&sema->ops.refcount, 0); |
726 | sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; | 848 | sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; |
@@ -734,9 +856,6 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
734 | sema->ops.destroy = gk20a_channel_semaphore_destroy; | 856 | sema->ops.destroy = gk20a_channel_semaphore_destroy; |
735 | 857 | ||
736 | return &sema->ops; | 858 | return &sema->ops; |
737 | clean_up: | ||
738 | gk20a_channel_semaphore_destroy(&sema->ops); | ||
739 | return NULL; | ||
740 | } | 859 | } |
741 | 860 | ||
742 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync) | 861 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync) |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 23522882..fbbaa2a7 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -155,8 +155,8 @@ struct gk20a_fence *gk20a_fence_from_semaphore( | |||
155 | 155 | ||
156 | #ifdef CONFIG_SYNC | 156 | #ifdef CONFIG_SYNC |
157 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, | 157 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, |
158 | dependency, "f-gk20a-0x%04x", | 158 | dependency, "f-gk20a-0x%04x", |
159 | semaphore->offset & 0xffff); | 159 | gk20a_semaphore_gpu_ro_va(semaphore)); |
160 | if (!sync_fence) | 160 | if (!sync_fence) |
161 | return NULL; | 161 | return NULL; |
162 | #endif | 162 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 5ab09ac3..7bd9775e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -738,6 +738,11 @@ struct gk20a { | |||
738 | #endif | 738 | #endif |
739 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; | 739 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; |
740 | 740 | ||
741 | /* | ||
742 | * A group of semaphore pools. One for each channel. | ||
743 | */ | ||
744 | struct gk20a_semaphore_sea *sema_sea; | ||
745 | |||
741 | /* held while manipulating # of debug/profiler sessions present */ | 746 | /* held while manipulating # of debug/profiler sessions present */ |
742 | /* also prevents debug sessions from attaching until released */ | 747 | /* also prevents debug sessions from attaching until released */ |
743 | struct mutex dbg_sessions_lock; | 748 | struct mutex dbg_sessions_lock; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 3b21e843..9299266f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -3213,6 +3213,17 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
3213 | struct rb_node *node; | 3213 | struct rb_node *node; |
3214 | 3214 | ||
3215 | gk20a_dbg_fn(""); | 3215 | gk20a_dbg_fn(""); |
3216 | |||
3217 | /* | ||
3218 | * Do this outside of the update_gmmu_lock since unmapping the semaphore | ||
3219 | * pool involves unmapping a GMMU mapping which means aquiring the | ||
3220 | * update_gmmu_lock. | ||
3221 | */ | ||
3222 | if (!gk20a_platform_has_syncpoints(gk20a_from_vm(vm)->dev)) { | ||
3223 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); | ||
3224 | gk20a_semaphore_pool_put(vm->sema_pool); | ||
3225 | } | ||
3226 | |||
3216 | mutex_lock(&vm->update_gmmu_lock); | 3227 | mutex_lock(&vm->update_gmmu_lock); |
3217 | 3228 | ||
3218 | /* TBD: add a flag here for the unmap code to recognize teardown | 3229 | /* TBD: add a flag here for the unmap code to recognize teardown |
@@ -3286,6 +3297,64 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { | |||
3286 | {.update_entry = NULL} | 3297 | {.update_entry = NULL} |
3287 | }; | 3298 | }; |
3288 | 3299 | ||
3300 | /* | ||
3301 | * Initialize a semaphore pool. Just return successfully if we do not need | ||
3302 | * semaphores (i.e when sync-pts are active). | ||
3303 | */ | ||
3304 | int gk20a_init_sema_pool(struct vm_gk20a *vm) | ||
3305 | { | ||
3306 | struct gk20a_semaphore_sea *sema_sea; | ||
3307 | struct mm_gk20a *mm = vm->mm; | ||
3308 | struct gk20a *g = mm->g; | ||
3309 | int err; | ||
3310 | |||
3311 | /* | ||
3312 | * Don't waste the memory on semaphores if we don't need them. | ||
3313 | */ | ||
3314 | if (gk20a_platform_has_syncpoints(g->dev)) | ||
3315 | return 0; | ||
3316 | |||
3317 | if (vm->sema_pool) | ||
3318 | return 0; | ||
3319 | |||
3320 | sema_sea = gk20a_semaphore_sea_create(g); | ||
3321 | if (!sema_sea) | ||
3322 | return -ENOMEM; | ||
3323 | |||
3324 | vm->sema_pool = gk20a_semaphore_pool_alloc(sema_sea); | ||
3325 | if (!vm->sema_pool) { | ||
3326 | gk20a_vm_put(vm); | ||
3327 | return -ENOMEM; | ||
3328 | } | ||
3329 | |||
3330 | /* | ||
3331 | * Allocate a chunk of GPU VA space for mapping the semaphores. We will | ||
3332 | * do a fixed alloc in the kernel VM so that all channels have the same | ||
3333 | * RO address range for the semaphores. | ||
3334 | * | ||
3335 | * !!! TODO: cleanup. | ||
3336 | */ | ||
3337 | sema_sea->gpu_va = gk20a_balloc_fixed(&vm->vma[gmmu_page_size_kernel], | ||
3338 | vm->va_limit - | ||
3339 | mm->channel.kernel_size, | ||
3340 | 512 * PAGE_SIZE); | ||
3341 | if (!sema_sea->gpu_va) { | ||
3342 | gk20a_bfree(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); | ||
3343 | gk20a_vm_put(vm); | ||
3344 | return -ENOMEM; | ||
3345 | } | ||
3346 | |||
3347 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); | ||
3348 | if (err) { | ||
3349 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); | ||
3350 | gk20a_bfree(&vm->vma[gmmu_page_size_small], | ||
3351 | vm->sema_pool->gpu_va); | ||
3352 | gk20a_vm_put(vm); | ||
3353 | } | ||
3354 | |||
3355 | return 0; | ||
3356 | } | ||
3357 | |||
3289 | int gk20a_init_vm(struct mm_gk20a *mm, | 3358 | int gk20a_init_vm(struct mm_gk20a *mm, |
3290 | struct vm_gk20a *vm, | 3359 | struct vm_gk20a *vm, |
3291 | u32 big_page_size, | 3360 | u32 big_page_size, |
@@ -3317,9 +3386,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
3317 | vm->big_pages = big_pages; | 3386 | vm->big_pages = big_pages; |
3318 | 3387 | ||
3319 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | 3388 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
3320 | |||
3321 | vm->userspace_managed = userspace_managed; | 3389 | vm->userspace_managed = userspace_managed; |
3322 | |||
3323 | vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g, | 3390 | vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g, |
3324 | vm->big_page_size); | 3391 | vm->big_page_size); |
3325 | 3392 | ||
@@ -3465,6 +3532,17 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
3465 | kref_init(&vm->ref); | 3532 | kref_init(&vm->ref); |
3466 | INIT_LIST_HEAD(&vm->reserved_va_list); | 3533 | INIT_LIST_HEAD(&vm->reserved_va_list); |
3467 | 3534 | ||
3535 | /* | ||
3536 | * This is only necessary for channel address spaces. The best way to | ||
3537 | * distinguish channel address spaces from other address spaces is by | ||
3538 | * size - if the address space is 4GB or less, it's not a channel. | ||
3539 | */ | ||
3540 | if (vm->va_limit > SZ_4G) { | ||
3541 | err = gk20a_init_sema_pool(vm); | ||
3542 | if (err) | ||
3543 | goto clean_up_big_allocator; | ||
3544 | } | ||
3545 | |||
3468 | return 0; | 3546 | return 0; |
3469 | 3547 | ||
3470 | clean_up_big_allocator: | 3548 | clean_up_big_allocator: |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index db74a5ca..7bb4d011 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -287,6 +287,11 @@ struct vm_gk20a { | |||
287 | /* if non-NULL, kref_put will use this batch when | 287 | /* if non-NULL, kref_put will use this batch when |
288 | unmapping. Must hold vm->update_gmmu_lock. */ | 288 | unmapping. Must hold vm->update_gmmu_lock. */ |
289 | struct vm_gk20a_mapping_batch *kref_put_batch; | 289 | struct vm_gk20a_mapping_batch *kref_put_batch; |
290 | |||
291 | /* | ||
292 | * Each address space needs to have a semaphore pool. | ||
293 | */ | ||
294 | struct gk20a_semaphore_pool *sema_pool; | ||
290 | }; | 295 | }; |
291 | 296 | ||
292 | struct gk20a; | 297 | struct gk20a; |
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 3b17bfcb..aa375b24 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | |||
@@ -15,63 +15,284 @@ | |||
15 | * more details. | 15 | * more details. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "semaphore_gk20a.h" | 18 | #define pr_fmt(fmt) "gpu_sema: " fmt |
19 | |||
19 | #include <linux/dma-mapping.h> | 20 | #include <linux/dma-mapping.h> |
21 | #include <linux/highmem.h> | ||
20 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | |||
24 | #include <asm/pgtable.h> | ||
25 | |||
21 | #include "gk20a.h" | 26 | #include "gk20a.h" |
22 | #include "mm_gk20a.h" | 27 | #include "mm_gk20a.h" |
28 | #include "semaphore_gk20a.h" | ||
29 | |||
30 | #define __lock_sema_sea(s) \ | ||
31 | do { \ | ||
32 | mutex_lock(&s->sea_lock); \ | ||
33 | } while (0) | ||
23 | 34 | ||
24 | static const int SEMAPHORE_SIZE = 16; | 35 | #define __unlock_sema_sea(s) \ |
36 | do { \ | ||
37 | mutex_unlock(&s->sea_lock); \ | ||
38 | } while (0) | ||
25 | 39 | ||
26 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct gk20a *g, | 40 | /* |
27 | const char *unique_name, size_t capacity) | 41 | * Return the sema_sea pointer. |
42 | */ | ||
43 | struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g) | ||
44 | { | ||
45 | return g->sema_sea; | ||
46 | } | ||
47 | |||
48 | static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea) | ||
49 | { | ||
50 | int ret = 0; | ||
51 | struct gk20a *gk20a = sea->gk20a; | ||
52 | |||
53 | __lock_sema_sea(sea); | ||
54 | |||
55 | ret = gk20a_gmmu_alloc_attr(gk20a, DMA_ATTR_NO_KERNEL_MAPPING, | ||
56 | PAGE_SIZE * SEMAPHORE_POOL_COUNT, | ||
57 | &sea->sea_mem); | ||
58 | if (ret) | ||
59 | goto out; | ||
60 | |||
61 | sea->ro_sg_table = sea->sea_mem.sgt; | ||
62 | sea->size = SEMAPHORE_POOL_COUNT; | ||
63 | sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; | ||
64 | |||
65 | out: | ||
66 | __unlock_sema_sea(sea); | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Create the semaphore sea. Only create it once - subsequent calls to this will | ||
72 | * return the originally created sea pointer. | ||
73 | */ | ||
74 | struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g) | ||
75 | { | ||
76 | if (g->sema_sea) | ||
77 | return g->sema_sea; | ||
78 | |||
79 | g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL); | ||
80 | if (!g->sema_sea) | ||
81 | return NULL; | ||
82 | |||
83 | g->sema_sea->size = 0; | ||
84 | g->sema_sea->page_count = 0; | ||
85 | g->sema_sea->gk20a = g; | ||
86 | INIT_LIST_HEAD(&g->sema_sea->pool_list); | ||
87 | mutex_init(&g->sema_sea->sea_lock); | ||
88 | |||
89 | if (__gk20a_semaphore_sea_grow(g->sema_sea)) | ||
90 | goto cleanup; | ||
91 | |||
92 | return g->sema_sea; | ||
93 | |||
94 | cleanup: | ||
95 | kfree(g->sema_sea); | ||
96 | g->sema_sea = NULL; | ||
97 | return NULL; | ||
98 | } | ||
99 | |||
100 | static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len) | ||
101 | { | ||
102 | unsigned long idx = find_first_zero_bit(bitmap, len); | ||
103 | |||
104 | if (idx == len) | ||
105 | return -ENOSPC; | ||
106 | |||
107 | set_bit(idx, bitmap); | ||
108 | |||
109 | return (int)idx; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Allocate a pool from the sea. | ||
114 | */ | ||
115 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( | ||
116 | struct gk20a_semaphore_sea *sea) | ||
28 | { | 117 | { |
29 | struct gk20a_semaphore_pool *p; | 118 | struct gk20a_semaphore_pool *p; |
119 | unsigned long page_idx; | ||
120 | int err = 0; | ||
121 | |||
30 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 122 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
31 | if (!p) | 123 | if (!p) |
32 | return NULL; | 124 | return ERR_PTR(-ENOMEM); |
125 | |||
126 | __lock_sema_sea(sea); | ||
127 | |||
128 | page_idx = __semaphore_bitmap_alloc(sea->pools_alloced, | ||
129 | SEMAPHORE_POOL_COUNT); | ||
130 | if (page_idx < 0) { | ||
131 | err = page_idx; | ||
132 | goto fail; | ||
133 | } | ||
33 | 134 | ||
135 | p->page = sea->sea_mem.pages[page_idx]; | ||
136 | p->ro_sg_table = sea->ro_sg_table; | ||
137 | p->page_idx = page_idx; | ||
138 | p->sema_sea = sea; | ||
139 | INIT_LIST_HEAD(&p->hw_semas); | ||
34 | kref_init(&p->ref); | 140 | kref_init(&p->ref); |
35 | INIT_LIST_HEAD(&p->maps); | 141 | mutex_init(&p->pool_lock); |
36 | mutex_init(&p->maps_mutex); | 142 | |
37 | p->g = g; | 143 | sea->page_count++; |
38 | 144 | list_add(&p->pool_list_entry, &sea->pool_list); | |
39 | /* Alloc one 4k page of semaphore per channel. */ | 145 | __unlock_sema_sea(sea); |
40 | if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE), | 146 | |
41 | &p->mem)) | ||
42 | goto clean_up; | ||
43 | |||
44 | /* Sacrifice one semaphore in the name of returning error codes. */ | ||
45 | if (gk20a_allocator_init(&p->alloc, unique_name, | ||
46 | SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE, | ||
47 | SEMAPHORE_SIZE)) | ||
48 | goto clean_up; | ||
49 | |||
50 | gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va, | ||
51 | (u64)sg_dma_address(p->mem.sgt->sgl), | ||
52 | (u64)sg_phys(p->mem.sgt->sgl)); | ||
53 | return p; | 147 | return p; |
54 | 148 | ||
55 | clean_up: | 149 | fail: |
56 | if (p->mem.size) | 150 | __unlock_sema_sea(sea); |
57 | gk20a_gmmu_free(p->g, &p->mem); | ||
58 | kfree(p); | 151 | kfree(p); |
59 | return NULL; | 152 | return ERR_PTR(err); |
153 | } | ||
154 | |||
155 | /* | ||
156 | * Map a pool into the passed vm's address space. This handles both the fixed | ||
157 | * global RO mapping and the non-fixed private RW mapping. | ||
158 | */ | ||
159 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | ||
160 | struct vm_gk20a *vm) | ||
161 | { | ||
162 | int ents, err = 0; | ||
163 | u64 addr; | ||
164 | |||
165 | p->cpu_va = vmap(&p->page, 1, 0, | ||
166 | pgprot_writecombine(PAGE_KERNEL)); | ||
167 | |||
168 | /* First do the RW mapping. */ | ||
169 | p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL); | ||
170 | if (!p->rw_sg_table) | ||
171 | return -ENOMEM; | ||
172 | |||
173 | err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, | ||
174 | PAGE_SIZE, GFP_KERNEL); | ||
175 | if (err) { | ||
176 | err = -ENOMEM; | ||
177 | goto fail; | ||
178 | } | ||
179 | |||
180 | /* Add IOMMU mapping... */ | ||
181 | ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
182 | DMA_BIDIRECTIONAL); | ||
183 | if (ents != 1) { | ||
184 | err = -ENOMEM; | ||
185 | goto fail_free_sgt; | ||
186 | } | ||
187 | |||
188 | /* Map into the GPU... Doesn't need to be fixed. */ | ||
189 | p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, | ||
190 | 0, gk20a_mem_flag_none, false); | ||
191 | if (!p->gpu_va) { | ||
192 | err = -ENOMEM; | ||
193 | goto fail_unmap_sgt; | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * And now the global mapping. Take the sea lock so that we don't race | ||
198 | * with a concurrent remap. | ||
199 | */ | ||
200 | __lock_sema_sea(p->sema_sea); | ||
201 | |||
202 | BUG_ON(p->mapped); | ||
203 | addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, | ||
204 | p->sema_sea->gpu_va, p->sema_sea->map_size, | ||
205 | 0, | ||
206 | gk20a_mem_flag_read_only, | ||
207 | false); | ||
208 | if (!addr) { | ||
209 | err = -ENOMEM; | ||
210 | BUG(); | ||
211 | goto fail_unlock; | ||
212 | } | ||
213 | p->gpu_va_ro = addr; | ||
214 | p->mapped = 1; | ||
215 | |||
216 | __unlock_sema_sea(p->sema_sea); | ||
217 | |||
218 | return 0; | ||
219 | |||
220 | fail_unlock: | ||
221 | __unlock_sema_sea(p->sema_sea); | ||
222 | fail_unmap_sgt: | ||
223 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
224 | DMA_BIDIRECTIONAL); | ||
225 | fail_free_sgt: | ||
226 | sg_free_table(p->rw_sg_table); | ||
227 | fail: | ||
228 | kfree(p->rw_sg_table); | ||
229 | p->rw_sg_table = NULL; | ||
230 | return err; | ||
60 | } | 231 | } |
61 | 232 | ||
233 | /* | ||
234 | * Unmap a semaphore_pool. | ||
235 | */ | ||
236 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, | ||
237 | struct vm_gk20a *vm) | ||
238 | { | ||
239 | struct gk20a_semaphore_int *hw_sema; | ||
240 | |||
241 | kunmap(p->cpu_va); | ||
242 | |||
243 | /* First the global RO mapping... */ | ||
244 | __lock_sema_sea(p->sema_sea); | ||
245 | gk20a_gmmu_unmap(vm, p->gpu_va_ro, | ||
246 | p->sema_sea->map_size, gk20a_mem_flag_none); | ||
247 | p->ro_sg_table = NULL; | ||
248 | __unlock_sema_sea(p->sema_sea); | ||
249 | |||
250 | /* And now the private RW mapping. */ | ||
251 | gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); | ||
252 | p->gpu_va = 0; | ||
253 | |||
254 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
255 | DMA_BIDIRECTIONAL); | ||
256 | |||
257 | sg_free_table(p->rw_sg_table); | ||
258 | kfree(p->rw_sg_table); | ||
259 | p->rw_sg_table = NULL; | ||
260 | |||
261 | gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx); | ||
262 | list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list) | ||
263 | /* | ||
264 | * Make sure the mem addresses are all NULL so if this gets | ||
265 | * reused we will fault. | ||
266 | */ | ||
267 | hw_sema->value = NULL; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Completely free a sempahore_pool. You should make sure this pool is not | ||
272 | * mapped otherwise there's going to be a memory leak. | ||
273 | */ | ||
62 | static void gk20a_semaphore_pool_free(struct kref *ref) | 274 | static void gk20a_semaphore_pool_free(struct kref *ref) |
63 | { | 275 | { |
64 | struct gk20a_semaphore_pool *p = | 276 | struct gk20a_semaphore_pool *p = |
65 | container_of(ref, struct gk20a_semaphore_pool, ref); | 277 | container_of(ref, struct gk20a_semaphore_pool, ref); |
66 | mutex_lock(&p->maps_mutex); | 278 | struct gk20a_semaphore_sea *s = p->sema_sea; |
67 | WARN_ON(!list_empty(&p->maps)); | 279 | struct gk20a_semaphore_int *hw_sema, *tmp; |
68 | mutex_unlock(&p->maps_mutex); | 280 | |
69 | gk20a_gmmu_free(p->g, &p->mem); | 281 | WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); |
70 | gk20a_allocator_destroy(&p->alloc); | 282 | |
283 | __lock_sema_sea(s); | ||
284 | list_del(&p->pool_list_entry); | ||
285 | clear_bit(p->page_idx, s->pools_alloced); | ||
286 | s->page_count--; | ||
287 | __unlock_sema_sea(s); | ||
288 | |||
289 | list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list) | ||
290 | kfree(hw_sema); | ||
291 | |||
71 | kfree(p); | 292 | kfree(p); |
72 | } | 293 | } |
73 | 294 | ||
74 | static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) | 295 | void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) |
75 | { | 296 | { |
76 | kref_get(&p->ref); | 297 | kref_get(&p->ref); |
77 | } | 298 | } |
@@ -81,104 +302,96 @@ void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p) | |||
81 | kref_put(&p->ref, gk20a_semaphore_pool_free); | 302 | kref_put(&p->ref, gk20a_semaphore_pool_free); |
82 | } | 303 | } |
83 | 304 | ||
84 | static struct gk20a_semaphore_pool_map * | 305 | /* |
85 | gk20a_semaphore_pool_find_map_locked(struct gk20a_semaphore_pool *p, | 306 | * Get the address for a semaphore_pool - if global is true then return the |
86 | struct vm_gk20a *vm) | 307 | * global RO address instead of the RW address owned by the semaphore's VM. |
308 | */ | ||
309 | u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global) | ||
87 | { | 310 | { |
88 | struct gk20a_semaphore_pool_map *map, *found = NULL; | 311 | if (!global) |
89 | list_for_each_entry(map, &p->maps, list) { | 312 | return p->gpu_va; |
90 | if (map->vm == vm) { | 313 | |
91 | found = map; | 314 | return p->gpu_va_ro + (PAGE_SIZE * p->page_idx); |
92 | break; | ||
93 | } | ||
94 | } | ||
95 | return found; | ||
96 | } | 315 | } |
97 | 316 | ||
98 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | 317 | static int __gk20a_init_hw_sema(struct channel_gk20a *ch) |
99 | struct vm_gk20a *vm, | ||
100 | enum gk20a_mem_rw_flag rw_flag) | ||
101 | { | 318 | { |
102 | struct gk20a_semaphore_pool_map *map; | 319 | int hw_sema_idx; |
320 | int ret = 0; | ||
321 | struct gk20a_semaphore_int *hw_sema; | ||
322 | struct gk20a_semaphore_pool *p = ch->vm->sema_pool; | ||
103 | 323 | ||
104 | map = kzalloc(sizeof(*map), GFP_KERNEL); | 324 | BUG_ON(!p); |
105 | if (!map) | ||
106 | return -ENOMEM; | ||
107 | map->vm = vm; | ||
108 | map->rw_flag = rw_flag; | ||
109 | map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size, | ||
110 | 0/*uncached*/, rw_flag, | ||
111 | false); | ||
112 | if (!map->gpu_va) { | ||
113 | kfree(map); | ||
114 | return -ENOMEM; | ||
115 | } | ||
116 | gk20a_vm_get(vm); | ||
117 | 325 | ||
118 | mutex_lock(&p->maps_mutex); | 326 | mutex_lock(&p->pool_lock); |
119 | WARN_ON(gk20a_semaphore_pool_find_map_locked(p, vm)); | ||
120 | list_add(&map->list, &p->maps); | ||
121 | mutex_unlock(&p->maps_mutex); | ||
122 | return 0; | ||
123 | } | ||
124 | 327 | ||
125 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, | 328 | /* Find an available HW semaphore. */ |
126 | struct vm_gk20a *vm) | 329 | hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced, |
127 | { | 330 | PAGE_SIZE / SEMAPHORE_SIZE); |
128 | struct gk20a_semaphore_pool_map *map; | 331 | if (hw_sema_idx < 0) { |
129 | WARN_ON(!vm); | 332 | ret = hw_sema_idx; |
130 | 333 | goto fail; | |
131 | mutex_lock(&p->maps_mutex); | ||
132 | map = gk20a_semaphore_pool_find_map_locked(p, vm); | ||
133 | if (map) { | ||
134 | gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag); | ||
135 | gk20a_vm_put(vm); | ||
136 | list_del(&map->list); | ||
137 | kfree(map); | ||
138 | } | 334 | } |
139 | mutex_unlock(&p->maps_mutex); | ||
140 | } | ||
141 | 335 | ||
142 | u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, | 336 | hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL); |
143 | struct vm_gk20a *vm) | 337 | if (!hw_sema) { |
144 | { | 338 | ret = -ENOMEM; |
145 | struct gk20a_semaphore_pool_map *map; | 339 | goto fail_free_idx; |
146 | u64 gpu_va = 0; | 340 | } |
147 | 341 | ||
148 | mutex_lock(&p->maps_mutex); | 342 | ch->hw_sema = hw_sema; |
149 | map = gk20a_semaphore_pool_find_map_locked(p, vm); | 343 | hw_sema->ch = ch; |
150 | if (map) | 344 | hw_sema->p = p; |
151 | gpu_va = map->gpu_va; | 345 | hw_sema->idx = hw_sema_idx; |
152 | mutex_unlock(&p->maps_mutex); | 346 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; |
347 | atomic_set(&hw_sema->next_value, 0); | ||
348 | hw_sema->value = p->cpu_va + hw_sema->offset; | ||
349 | writel(0, hw_sema->value); | ||
153 | 350 | ||
154 | return gpu_va; | 351 | list_add(&hw_sema->hw_sema_list, &p->hw_semas); |
352 | |||
353 | mutex_unlock(&p->pool_lock); | ||
354 | |||
355 | return 0; | ||
356 | |||
357 | fail_free_idx: | ||
358 | clear_bit(hw_sema_idx, p->semas_alloced); | ||
359 | fail: | ||
360 | mutex_unlock(&p->pool_lock); | ||
361 | return ret; | ||
155 | } | 362 | } |
156 | 363 | ||
157 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) | 364 | /* |
365 | * Allocate a semaphore from the passed pool. | ||
366 | * | ||
367 | * Since semaphores are ref-counted there's no explicit free for external code | ||
368 | * to use. When the ref-count hits 0 the internal free will happen. | ||
369 | */ | ||
370 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch) | ||
158 | { | 371 | { |
159 | struct gk20a_semaphore *s; | 372 | struct gk20a_semaphore *s; |
373 | int ret; | ||
374 | |||
375 | if (!ch->hw_sema) { | ||
376 | ret = __gk20a_init_hw_sema(ch); | ||
377 | if (ret) | ||
378 | return ERR_PTR(ret); | ||
379 | } | ||
160 | 380 | ||
161 | s = kzalloc(sizeof(*s), GFP_KERNEL); | 381 | s = kzalloc(sizeof(*s), GFP_KERNEL); |
162 | if (!s) | 382 | if (!s) |
163 | return NULL; | 383 | return NULL; |
164 | 384 | ||
165 | s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); | 385 | kref_init(&s->ref); |
166 | if (!s->offset) { | 386 | s->hw_sema = ch->hw_sema; |
167 | gk20a_err(dev_from_gk20a(pool->g), | 387 | atomic_set(&s->value, 0); |
168 | "failed to allocate semaphore"); | ||
169 | kfree(s); | ||
170 | return NULL; | ||
171 | } | ||
172 | 388 | ||
173 | gk20a_semaphore_pool_get(pool); | 389 | /* |
174 | s->pool = pool; | 390 | * Take a ref on the pool so that we can keep this pool alive for |
391 | * as long as this semaphore is alive. | ||
392 | */ | ||
393 | gk20a_semaphore_pool_get(s->hw_sema->p); | ||
175 | 394 | ||
176 | kref_init(&s->ref); | ||
177 | /* Initially acquired. */ | ||
178 | gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0); | ||
179 | gk20a_dbg_info("created semaphore offset=%d, value=%d", | ||
180 | s->offset, | ||
181 | gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset)); | ||
182 | return s; | 395 | return s; |
183 | } | 396 | } |
184 | 397 | ||
@@ -187,8 +400,8 @@ static void gk20a_semaphore_free(struct kref *ref) | |||
187 | struct gk20a_semaphore *s = | 400 | struct gk20a_semaphore *s = |
188 | container_of(ref, struct gk20a_semaphore, ref); | 401 | container_of(ref, struct gk20a_semaphore, ref); |
189 | 402 | ||
190 | gk20a_bfree(&s->pool->alloc, s->offset); | 403 | gk20a_semaphore_pool_put(s->hw_sema->p); |
191 | gk20a_semaphore_pool_put(s->pool); | 404 | |
192 | kfree(s); | 405 | kfree(s); |
193 | } | 406 | } |
194 | 407 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index 1f12e262..58081b56 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | |||
@@ -15,17 +15,128 @@ | |||
15 | #define SEMAPHORE_GK20A_H | 15 | #define SEMAPHORE_GK20A_H |
16 | 16 | ||
17 | #include <linux/kref.h> | 17 | #include <linux/kref.h> |
18 | #include "gk20a_allocator.h" | 18 | #include <linux/list.h> |
19 | #include <linux/delay.h> | ||
20 | |||
21 | #include "gk20a.h" | ||
19 | #include "mm_gk20a.h" | 22 | #include "mm_gk20a.h" |
23 | #include "channel_gk20a.h" | ||
24 | |||
25 | /* | ||
26 | * Max number of channels that can be used is 512. This of course needs to be | ||
27 | * fixed to be dynamic but still fast. | ||
28 | */ | ||
29 | #define SEMAPHORE_POOL_COUNT 512 | ||
30 | #define SEMAPHORE_SIZE 16 | ||
31 | #define SEMAPHORE_SEA_GROWTH_RATE 32 | ||
32 | |||
33 | struct gk20a_semaphore_sea; | ||
34 | |||
35 | /* | ||
36 | * Underlying semaphore data structure. This semaphore can be shared amongst | ||
37 | * other semaphore instances. | ||
38 | */ | ||
39 | struct gk20a_semaphore_int { | ||
40 | int idx; /* Semaphore index. */ | ||
41 | u32 offset; /* Offset into the pool. */ | ||
42 | atomic_t next_value; /* Next available value. */ | ||
43 | u32 *value; /* Current value (access w/ readl()). */ | ||
44 | u32 nr_incrs; /* Number of increments programmed. */ | ||
45 | struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */ | ||
46 | struct channel_gk20a *ch; /* Channel that owns this sema. */ | ||
47 | struct list_head hw_sema_list; /* List of HW semaphores. */ | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * A semaphore which the rest of the driver actually uses. This consists of a | ||
52 | * pointer to a real semaphore and a value to wait for. This allows one physical | ||
53 | * semaphore to be shared among an essentially infinite number of submits. | ||
54 | */ | ||
55 | struct gk20a_semaphore { | ||
56 | struct gk20a_semaphore_int *hw_sema; | ||
20 | 57 | ||
21 | /* A memory pool for holding semaphores. */ | 58 | atomic_t value; |
59 | int incremented; | ||
60 | |||
61 | struct kref ref; | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * A semaphore pool. Each address space will own exactly one of these. | ||
66 | */ | ||
22 | struct gk20a_semaphore_pool { | 67 | struct gk20a_semaphore_pool { |
23 | struct mem_desc mem; | 68 | struct page *page; /* This pool's page of memory */ |
24 | struct gk20a *g; | 69 | struct list_head pool_list_entry; /* Node for list of pools. */ |
25 | struct list_head maps; | 70 | void *cpu_va; /* CPU access to the pool. */ |
26 | struct mutex maps_mutex; | 71 | u64 gpu_va; /* GPU access to the pool. */ |
72 | u64 gpu_va_ro; /* GPU access to the pool. */ | ||
73 | int page_idx; /* Index into sea bitmap. */ | ||
74 | |||
75 | struct list_head hw_semas; /* List of HW semas. */ | ||
76 | DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE); | ||
77 | |||
78 | struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */ | ||
79 | |||
80 | struct mutex pool_lock; | ||
81 | |||
82 | /* | ||
83 | * This is the address spaces's personal RW table. Other channels will | ||
84 | * ultimately map this page as RO. | ||
85 | */ | ||
86 | struct sg_table *rw_sg_table; | ||
87 | |||
88 | /* | ||
89 | * This is to keep track of whether the pool has had its sg_table | ||
90 | * updated during sea resizing. | ||
91 | */ | ||
92 | struct sg_table *ro_sg_table; | ||
93 | |||
94 | int mapped; | ||
95 | |||
96 | /* | ||
97 | * Sometimes a channel can be released before other channels are | ||
98 | * done waiting on it. This ref count ensures that the pool doesn't | ||
99 | * go away until all semaphores using this pool are cleaned up first. | ||
100 | */ | ||
27 | struct kref ref; | 101 | struct kref ref; |
28 | struct gk20a_allocator alloc; | 102 | }; |
103 | |||
104 | /* | ||
105 | * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple | ||
106 | * channels can share a VM each channel gets it's own HW semaphore from the | ||
107 | * pool. Channels then allocate regular semaphores - basically just a value that | ||
108 | * signifies when a particular job is done. | ||
109 | */ | ||
110 | struct gk20a_semaphore_sea { | ||
111 | struct list_head pool_list; /* List of pools in this sea. */ | ||
112 | struct gk20a *gk20a; | ||
113 | |||
114 | size_t size; /* Number of pages available. */ | ||
115 | u64 gpu_va; /* GPU virtual address of sema sea. */ | ||
116 | u64 map_size; /* Size of the mapping. */ | ||
117 | |||
118 | /* | ||
119 | * TODO: | ||
120 | * List of pages that we use to back the pools. The number of pages | ||
121 | * can grow dynamically since allocating 512 pages for all channels at | ||
122 | * once would be a tremendous waste. | ||
123 | */ | ||
124 | int page_count; /* Pages allocated to pools. */ | ||
125 | |||
126 | struct sg_table *ro_sg_table; | ||
127 | /* | ||
128 | struct page *pages[SEMAPHORE_POOL_COUNT]; | ||
129 | */ | ||
130 | |||
131 | struct mem_desc sea_mem; | ||
132 | |||
133 | /* | ||
134 | * Can't use a regular allocator here since the full range of pools are | ||
135 | * not always allocated. Instead just use a bitmap. | ||
136 | */ | ||
137 | DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT); | ||
138 | |||
139 | struct mutex sea_lock; /* Lock alloc/free calls. */ | ||
29 | }; | 140 | }; |
30 | 141 | ||
31 | enum gk20a_mem_rw_flag { | 142 | enum gk20a_mem_rw_flag { |
@@ -34,64 +145,150 @@ enum gk20a_mem_rw_flag { | |||
34 | gk20a_mem_flag_write_only = 2, | 145 | gk20a_mem_flag_write_only = 2, |
35 | }; | 146 | }; |
36 | 147 | ||
37 | /* A semaphore pool can be mapped to multiple GPU address spaces. */ | 148 | /* |
38 | struct gk20a_semaphore_pool_map { | 149 | * Semaphore sea functions. |
39 | u64 gpu_va; | 150 | */ |
40 | enum gk20a_mem_rw_flag rw_flag; | 151 | struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a); |
41 | struct vm_gk20a *vm; | 152 | int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea, |
42 | struct list_head list; | 153 | struct vm_gk20a *vm); |
43 | }; | 154 | void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea, |
155 | struct vm_gk20a *vm); | ||
156 | struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g); | ||
157 | |||
158 | /* | ||
159 | * Semaphore pool functions. | ||
160 | */ | ||
161 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( | ||
162 | struct gk20a_semaphore_sea *sea); | ||
163 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool, | ||
164 | struct vm_gk20a *vm); | ||
165 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool, | ||
166 | struct vm_gk20a *vm); | ||
167 | u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global); | ||
168 | void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p); | ||
169 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p); | ||
170 | |||
171 | /* | ||
172 | * Semaphore functions. | ||
173 | */ | ||
174 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch); | ||
175 | void gk20a_semaphore_put(struct gk20a_semaphore *s); | ||
176 | void gk20a_semaphore_get(struct gk20a_semaphore *s); | ||
177 | |||
178 | /* | ||
179 | * Return the address of a specific semaphore. | ||
180 | * | ||
181 | * Don't call this on a semaphore you don't own - the VA returned will make no | ||
182 | * sense in your specific channel's VM. | ||
183 | */ | ||
184 | static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s) | ||
185 | { | ||
186 | return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) + | ||
187 | s->hw_sema->offset; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Get the global RO address for the semaphore. Can be called on any semaphore | ||
192 | * regardless of whether you own it. | ||
193 | */ | ||
194 | static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s) | ||
195 | { | ||
196 | return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) + | ||
197 | s->hw_sema->offset; | ||
198 | } | ||
199 | |||
200 | static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema) | ||
201 | { | ||
202 | return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) + | ||
203 | hw_sema->offset; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * TODO: handle wrap around... Hmm, how to do this? | ||
208 | */ | ||
209 | static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s) | ||
210 | { | ||
211 | u32 sema_val = readl(s->hw_sema->value); | ||
44 | 212 | ||
45 | /* A semaphore that lives inside a semaphore pool. */ | ||
46 | struct gk20a_semaphore { | ||
47 | struct gk20a_semaphore_pool *pool; | ||
48 | /* | 213 | /* |
49 | * value exists within the pool's memory at the specified offset. | 214 | * If the underlying semaphore value is greater than or equal to |
50 | * 0=acquired, 1=released. | 215 | * the value of the semaphore then the semaphore has been signaled |
216 | * (a.k.a. released). | ||
51 | */ | 217 | */ |
52 | u32 offset; /* byte offset within pool */ | 218 | return sema_val >= atomic_read(&s->value); |
53 | struct kref ref; | 219 | } |
54 | }; | ||
55 | 220 | ||
56 | /* Create a semaphore pool that can hold at most 'capacity' semaphores. */ | 221 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) |
57 | struct gk20a_semaphore_pool * | ||
58 | gk20a_semaphore_pool_alloc(struct gk20a *, const char *unique_name, | ||
59 | size_t capacity); | ||
60 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *); | ||
61 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *, | ||
62 | struct vm_gk20a *, | ||
63 | enum gk20a_mem_rw_flag); | ||
64 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *, | ||
65 | struct vm_gk20a *); | ||
66 | u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *, | ||
67 | struct vm_gk20a *); | ||
68 | |||
69 | /* Allocate a semaphore from the semaphore pool. The newly allocated | ||
70 | * semaphore will be in acquired state (value=0). */ | ||
71 | struct gk20a_semaphore * | ||
72 | gk20a_semaphore_alloc(struct gk20a_semaphore_pool *); | ||
73 | void gk20a_semaphore_put(struct gk20a_semaphore *); | ||
74 | void gk20a_semaphore_get(struct gk20a_semaphore *); | ||
75 | |||
76 | static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s, | ||
77 | struct vm_gk20a *vm) | ||
78 | { | 222 | { |
79 | return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset; | 223 | return !gk20a_semaphore_is_released(s); |
80 | } | 224 | } |
81 | 225 | ||
82 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) | 226 | /* |
227 | * Read the underlying value from a semaphore. | ||
228 | */ | ||
229 | static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s) | ||
83 | { | 230 | { |
84 | u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset); | 231 | return readl(s->hw_sema->value); |
232 | } | ||
85 | 233 | ||
86 | /* When often block on value reaching a certain threshold. We must make | 234 | static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s) |
87 | * sure that if we get unblocked, we haven't read anything too early. */ | 235 | { |
88 | smp_rmb(); | 236 | return atomic_read(&s->value); |
89 | return v == 0; | ||
90 | } | 237 | } |
91 | 238 | ||
239 | static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s) | ||
240 | { | ||
241 | return atomic_read(&s->hw_sema->next_value); | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Note - if you call this then any prior semaphores will also be released. | ||
246 | */ | ||
92 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) | 247 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) |
93 | { | 248 | { |
94 | smp_wmb(); | 249 | u32 current_val; |
95 | gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1); | 250 | u32 val = gk20a_semaphore_get_value(s); |
251 | int attempts = 0; | ||
252 | |||
253 | /* | ||
254 | * Wait until the sema value is 1 less than the write value. That | ||
255 | * way this function is essentially an increment. | ||
256 | * | ||
257 | * TODO: tune the wait a little better. | ||
258 | */ | ||
259 | while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) { | ||
260 | msleep(100); | ||
261 | attempts += 1; | ||
262 | if (attempts > 100) { | ||
263 | WARN(1, "Stall on sema release!"); | ||
264 | return; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * If the semaphore has already passed the value we would write then | ||
270 | * this is really just a NO-OP. | ||
271 | */ | ||
272 | if (current_val >= val) | ||
273 | return; | ||
274 | |||
275 | writel(val, s->hw_sema->value); | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Configure a software based increment on this semaphore. This is useful for | ||
280 | * when we want the GPU to wait on a SW event before processing a channel. | ||
281 | * Another way to describe this is when the GPU needs to wait on a SW pre-fence. | ||
282 | * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which | ||
283 | * then allows the GPU to continue. | ||
284 | * | ||
285 | * Also used to prep a semaphore for an INCR by the GPU. | ||
286 | */ | ||
287 | static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s) | ||
288 | { | ||
289 | BUG_ON(s->incremented); | ||
290 | |||
291 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); | ||
292 | s->incremented = 1; | ||
96 | } | 293 | } |
97 | #endif | 294 | #endif |