diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-06-25 05:35:42 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-27 21:40:16 -0400 |
commit | 7998233b77a343d002b699d5f348bbeb243e16f5 (patch) | |
tree | aa24afcc414be8fbccf6991804f69946e2b72525 /drivers/gpu/nvgpu/os/linux | |
parent | 2ac6fb4253fa815ed17f09a01141b938c826dac9 (diff) |
gpu: nvgpu: move submit code to common
To finish OS unification of the submit path, move the
gk20a_submit_channel_gpfifo* functions to a file that's accessible also
outside Linux code.
Also change the prefix of the submit functions from gk20a_ to nvgpu_.
Jira NVGPU-705
Change-Id: I8ca355d1eb69771fb016c7a21fc7f102ca7967d7
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1760421
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux')
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/cde.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ce2.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.c | 551 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 5 |
5 files changed, 9 insertions, 568 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 052a1d21..39b7d1f5 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <nvgpu/bug.h> | 32 | #include <nvgpu/bug.h> |
33 | #include <nvgpu/firmware.h> | 33 | #include <nvgpu/firmware.h> |
34 | #include <nvgpu/os_sched.h> | 34 | #include <nvgpu/os_sched.h> |
35 | #include <nvgpu/channel.h> | ||
35 | 36 | ||
36 | #include <nvgpu/linux/vm.h> | 37 | #include <nvgpu/linux/vm.h> |
37 | 38 | ||
@@ -783,7 +784,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | |||
783 | return -ENOSYS; | 784 | return -ENOSYS; |
784 | } | 785 | } |
785 | 786 | ||
786 | return gk20a_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, | 787 | return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, |
787 | num_entries, flags, fence, fence_out); | 788 | num_entries, flags, fence, fence_out); |
788 | } | 789 | } |
789 | 790 | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c index 8f20091b..0b43c0d1 100644 --- a/drivers/gpu/nvgpu/os/linux/ce2.c +++ b/drivers/gpu/nvgpu/os/linux/ce2.c | |||
@@ -15,6 +15,7 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/types.h> | 17 | #include <nvgpu/types.h> |
18 | #include <nvgpu/channel.h> | ||
18 | 19 | ||
19 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | 20 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> |
20 | 21 | ||
@@ -130,7 +131,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
130 | 131 | ||
131 | nvgpu_smp_wmb(); | 132 | nvgpu_smp_wmb(); |
132 | 133 | ||
133 | ret = gk20a_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, | 134 | ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, |
134 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out); | 135 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out); |
135 | 136 | ||
136 | if (!ret) { | 137 | if (!ret) { |
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c index 391950af..fef44f2b 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.c +++ b/drivers/gpu/nvgpu/os/linux/channel.c | |||
@@ -16,7 +16,6 @@ | |||
16 | 16 | ||
17 | #include <nvgpu/enabled.h> | 17 | #include <nvgpu/enabled.h> |
18 | #include <nvgpu/debug.h> | 18 | #include <nvgpu/debug.h> |
19 | #include <nvgpu/ltc.h> | ||
20 | #include <nvgpu/error_notifier.h> | 19 | #include <nvgpu/error_notifier.h> |
21 | #include <nvgpu/os_sched.h> | 20 | #include <nvgpu/os_sched.h> |
22 | 21 | ||
@@ -489,11 +488,9 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, | |||
489 | dma_buf_vunmap(dmabuf, mem); | 488 | dma_buf_vunmap(dmabuf, mem); |
490 | } | 489 | } |
491 | } | 490 | } |
492 | #endif | ||
493 | 491 | ||
494 | static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | 492 | void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) |
495 | { | 493 | { |
496 | #ifdef CONFIG_DEBUG_FS | ||
497 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; | 494 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; |
498 | u32 n = c->gpfifo.entry_num; | 495 | u32 n = c->gpfifo.entry_num; |
499 | u32 start = c->gpfifo.put; | 496 | u32 start = c->gpfifo.put; |
@@ -507,549 +504,5 @@ static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | |||
507 | 504 | ||
508 | for (i = 0; i < count; i++) | 505 | for (i = 0; i < count; i++) |
509 | trace_write_pushbuffer(c, &gp[(start + i) % n]); | 506 | trace_write_pushbuffer(c, &gp[(start + i) % n]); |
510 | #endif | ||
511 | } | ||
512 | |||
513 | /* | ||
514 | * Handle the submit synchronization - pre-fences and post-fences. | ||
515 | */ | ||
516 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | ||
517 | struct nvgpu_channel_fence *fence, | ||
518 | struct channel_gk20a_job *job, | ||
519 | struct priv_cmd_entry **wait_cmd, | ||
520 | struct priv_cmd_entry **incr_cmd, | ||
521 | struct gk20a_fence **post_fence, | ||
522 | bool register_irq, | ||
523 | u32 flags) | ||
524 | { | ||
525 | struct gk20a *g = c->g; | ||
526 | bool need_sync_fence = false; | ||
527 | bool new_sync_created = false; | ||
528 | int wait_fence_fd = -1; | ||
529 | int err = 0; | ||
530 | bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); | ||
531 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
532 | |||
533 | if (g->aggressive_sync_destroy_thresh) { | ||
534 | nvgpu_mutex_acquire(&c->sync_lock); | ||
535 | if (!c->sync) { | ||
536 | c->sync = gk20a_channel_sync_create(c, false); | ||
537 | if (!c->sync) { | ||
538 | err = -ENOMEM; | ||
539 | nvgpu_mutex_release(&c->sync_lock); | ||
540 | goto fail; | ||
541 | } | ||
542 | new_sync_created = true; | ||
543 | } | ||
544 | nvgpu_atomic_inc(&c->sync->refcount); | ||
545 | nvgpu_mutex_release(&c->sync_lock); | ||
546 | } | ||
547 | |||
548 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | ||
549 | err = g->ops.fifo.resetup_ramfc(c); | ||
550 | if (err) | ||
551 | goto fail; | ||
552 | } | ||
553 | |||
554 | /* | ||
555 | * Optionally insert syncpt/semaphore wait in the beginning of gpfifo | ||
556 | * submission when user requested and the wait hasn't expired. | ||
557 | */ | ||
558 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { | ||
559 | int max_wait_cmds = c->deterministic ? 1 : 0; | ||
560 | |||
561 | if (!pre_alloc_enabled) | ||
562 | job->wait_cmd = nvgpu_kzalloc(g, | ||
563 | sizeof(struct priv_cmd_entry)); | ||
564 | |||
565 | if (!job->wait_cmd) { | ||
566 | err = -ENOMEM; | ||
567 | goto fail; | ||
568 | } | ||
569 | |||
570 | if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
571 | wait_fence_fd = fence->id; | ||
572 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | ||
573 | job->wait_cmd, max_wait_cmds); | ||
574 | } else { | ||
575 | err = c->sync->wait_syncpt(c->sync, fence->id, | ||
576 | fence->value, | ||
577 | job->wait_cmd); | ||
578 | } | ||
579 | |||
580 | if (err) | ||
581 | goto clean_up_wait_cmd; | ||
582 | |||
583 | if (job->wait_cmd->valid) | ||
584 | *wait_cmd = job->wait_cmd; | ||
585 | } | ||
586 | |||
587 | if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && | ||
588 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) | ||
589 | need_sync_fence = true; | ||
590 | |||
591 | /* | ||
592 | * Always generate an increment at the end of a GPFIFO submission. This | ||
593 | * is used to keep track of method completion for idle railgating. The | ||
594 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | ||
595 | */ | ||
596 | job->post_fence = gk20a_alloc_fence(c); | ||
597 | if (!job->post_fence) { | ||
598 | err = -ENOMEM; | ||
599 | goto clean_up_wait_cmd; | ||
600 | } | ||
601 | if (!pre_alloc_enabled) | ||
602 | job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); | ||
603 | |||
604 | if (!job->incr_cmd) { | ||
605 | err = -ENOMEM; | ||
606 | goto clean_up_post_fence; | ||
607 | } | ||
608 | |||
609 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
610 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, | ||
611 | job->post_fence, need_wfi, need_sync_fence, | ||
612 | register_irq); | ||
613 | else | ||
614 | err = c->sync->incr(c->sync, job->incr_cmd, | ||
615 | job->post_fence, need_sync_fence, | ||
616 | register_irq); | ||
617 | if (!err) { | ||
618 | *incr_cmd = job->incr_cmd; | ||
619 | *post_fence = job->post_fence; | ||
620 | } else | ||
621 | goto clean_up_incr_cmd; | ||
622 | |||
623 | return 0; | ||
624 | |||
625 | clean_up_incr_cmd: | ||
626 | free_priv_cmdbuf(c, job->incr_cmd); | ||
627 | if (!pre_alloc_enabled) | ||
628 | job->incr_cmd = NULL; | ||
629 | clean_up_post_fence: | ||
630 | gk20a_fence_put(job->post_fence); | ||
631 | job->post_fence = NULL; | ||
632 | clean_up_wait_cmd: | ||
633 | if (job->wait_cmd) | ||
634 | free_priv_cmdbuf(c, job->wait_cmd); | ||
635 | if (!pre_alloc_enabled) | ||
636 | job->wait_cmd = NULL; | ||
637 | fail: | ||
638 | *wait_cmd = NULL; | ||
639 | return err; | ||
640 | } | ||
641 | |||
642 | static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | ||
643 | struct priv_cmd_entry *cmd) | ||
644 | { | ||
645 | struct gk20a *g = c->g; | ||
646 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
647 | struct nvgpu_gpfifo_entry x = { | ||
648 | .entry0 = u64_lo32(cmd->gva), | ||
649 | .entry1 = u64_hi32(cmd->gva) | | ||
650 | pbdma_gp_entry1_length_f(cmd->size) | ||
651 | }; | ||
652 | |||
653 | nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), | ||
654 | &x, sizeof(x)); | ||
655 | |||
656 | if (cmd->mem->aperture == APERTURE_SYSMEM) | ||
657 | trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, | ||
658 | (u32 *)cmd->mem->cpu_va + cmd->off); | ||
659 | |||
660 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
661 | } | ||
662 | |||
663 | static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, | ||
664 | struct nvgpu_gpfifo_userdata userdata, | ||
665 | u32 num_entries) | ||
666 | { | ||
667 | struct gk20a *g = c->g; | ||
668 | struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; | ||
669 | u32 gpfifo_size = c->gpfifo.entry_num; | ||
670 | u32 len = num_entries; | ||
671 | u32 start = c->gpfifo.put; | ||
672 | u32 end = start + len; /* exclusive */ | ||
673 | int err; | ||
674 | |||
675 | if (end > gpfifo_size) { | ||
676 | /* wrap-around */ | ||
677 | int length0 = gpfifo_size - start; | ||
678 | int length1 = len - length0; | ||
679 | |||
680 | err = g->os_channel.copy_user_gpfifo( | ||
681 | gpfifo_cpu + start, userdata, | ||
682 | 0, length0); | ||
683 | if (err) | ||
684 | return err; | ||
685 | |||
686 | err = g->os_channel.copy_user_gpfifo( | ||
687 | gpfifo_cpu, userdata, | ||
688 | length0, length1); | ||
689 | if (err) | ||
690 | return err; | ||
691 | } else { | ||
692 | err = g->os_channel.copy_user_gpfifo( | ||
693 | gpfifo_cpu + start, userdata, | ||
694 | 0, len); | ||
695 | if (err) | ||
696 | return err; | ||
697 | } | ||
698 | |||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c, | ||
703 | struct nvgpu_gpfifo_entry *src, u32 num_entries) | ||
704 | { | ||
705 | struct gk20a *g = c->g; | ||
706 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
707 | /* in bytes */ | ||
708 | u32 gpfifo_size = | ||
709 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | ||
710 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | ||
711 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
712 | u32 end = start + len; /* exclusive */ | ||
713 | |||
714 | if (end > gpfifo_size) { | ||
715 | /* wrap-around */ | ||
716 | int length0 = gpfifo_size - start; | ||
717 | int length1 = len - length0; | ||
718 | struct nvgpu_gpfifo_entry *src2 = src + length0; | ||
719 | |||
720 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); | ||
721 | nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); | ||
722 | } else { | ||
723 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); | ||
724 | } | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
729 | * splitting into two memcpys to handle wrap-around. | ||
730 | */ | ||
731 | static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c, | ||
732 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
733 | struct nvgpu_gpfifo_userdata userdata, | ||
734 | u32 num_entries) | ||
735 | { | ||
736 | struct gk20a *g = c->g; | ||
737 | int err; | ||
738 | |||
739 | if (!kern_gpfifo && !c->gpfifo.pipe) { | ||
740 | /* | ||
741 | * This path (from userspace to sysmem) is special in order to | ||
742 | * avoid two copies unnecessarily (from user to pipe, then from | ||
743 | * pipe to gpu sysmem buffer). | ||
744 | */ | ||
745 | err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, | ||
746 | num_entries); | ||
747 | if (err) | ||
748 | return err; | ||
749 | } else if (!kern_gpfifo) { | ||
750 | /* from userspace to vidmem, use the common path */ | ||
751 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, | ||
752 | 0, num_entries); | ||
753 | if (err) | ||
754 | return err; | ||
755 | |||
756 | nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, | ||
757 | num_entries); | ||
758 | } else { | ||
759 | /* from kernel to either sysmem or vidmem, don't need | ||
760 | * copy_user_gpfifo so use the common path */ | ||
761 | nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); | ||
762 | } | ||
763 | |||
764 | trace_write_pushbuffers(c, num_entries); | ||
765 | |||
766 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
767 | (c->gpfifo.entry_num - 1); | ||
768 | |||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | ||
773 | struct nvgpu_gpfifo_entry *gpfifo, | ||
774 | struct nvgpu_gpfifo_userdata userdata, | ||
775 | u32 num_entries, | ||
776 | u32 flags, | ||
777 | struct nvgpu_channel_fence *fence, | ||
778 | struct gk20a_fence **fence_out, | ||
779 | struct fifo_profile_gk20a *profile) | ||
780 | { | ||
781 | struct gk20a *g = c->g; | ||
782 | struct priv_cmd_entry *wait_cmd = NULL; | ||
783 | struct priv_cmd_entry *incr_cmd = NULL; | ||
784 | struct gk20a_fence *post_fence = NULL; | ||
785 | struct channel_gk20a_job *job = NULL; | ||
786 | /* we might need two extra gpfifo entries - one for pre fence | ||
787 | * and one for post fence. */ | ||
788 | const int extra_entries = 2; | ||
789 | bool skip_buffer_refcounting = (flags & | ||
790 | NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); | ||
791 | int err = 0; | ||
792 | bool need_job_tracking; | ||
793 | bool need_deferred_cleanup = false; | ||
794 | |||
795 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
796 | return -ENODEV; | ||
797 | |||
798 | if (c->has_timedout) | ||
799 | return -ETIMEDOUT; | ||
800 | |||
801 | if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) | ||
802 | return -ENOMEM; | ||
803 | |||
804 | /* fifo not large enough for request. Return error immediately. | ||
805 | * Kernel can insert gpfifo entries before and after user gpfifos. | ||
806 | * So, add extra_entries in user request. Also, HW with fifo size N | ||
807 | * can accept only N-1 entreis and so the below condition */ | ||
808 | if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { | ||
809 | nvgpu_err(g, "not enough gpfifo space allocated"); | ||
810 | return -ENOMEM; | ||
811 | } | ||
812 | |||
813 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | | ||
814 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && | ||
815 | !fence) | ||
816 | return -EINVAL; | ||
817 | |||
818 | /* an address space needs to have been bound at this point. */ | ||
819 | if (!gk20a_channel_as_bound(c)) { | ||
820 | nvgpu_err(g, | ||
821 | "not bound to an address space at time of gpfifo" | ||
822 | " submission."); | ||
823 | return -EINVAL; | ||
824 | } | ||
825 | |||
826 | gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); | ||
827 | |||
828 | /* update debug settings */ | ||
829 | nvgpu_ltc_sync_enabled(g); | ||
830 | |||
831 | nvgpu_log_info(g, "channel %d", c->chid); | ||
832 | |||
833 | /* | ||
834 | * Job tracking is necessary for any of the following conditions: | ||
835 | * - pre- or post-fence functionality | ||
836 | * - channel wdt | ||
837 | * - GPU rail-gating with non-deterministic channels | ||
838 | * - buffer refcounting | ||
839 | * | ||
840 | * If none of the conditions are met, then job tracking is not | ||
841 | * required and a fast submit can be done (ie. only need to write | ||
842 | * out userspace GPFIFO entries and update GP_PUT). | ||
843 | */ | ||
844 | need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || | ||
845 | (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || | ||
846 | c->timeout.enabled || | ||
847 | (g->can_railgate && !c->deterministic) || | ||
848 | !skip_buffer_refcounting; | ||
849 | |||
850 | if (need_job_tracking) { | ||
851 | bool need_sync_framework = false; | ||
852 | |||
853 | /* | ||
854 | * If the channel is to have deterministic latency and | ||
855 | * job tracking is required, the channel must have | ||
856 | * pre-allocated resources. Otherwise, we fail the submit here | ||
857 | */ | ||
858 | if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) | ||
859 | return -EINVAL; | ||
860 | |||
861 | need_sync_framework = | ||
862 | gk20a_channel_sync_needs_sync_framework(g) || | ||
863 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && | ||
864 | flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); | ||
865 | |||
866 | /* | ||
867 | * Deferred clean-up is necessary for any of the following | ||
868 | * conditions: | ||
869 | * - channel's deterministic flag is not set | ||
870 | * - dependency on sync framework, which could make the | ||
871 | * behavior of the clean-up operation non-deterministic | ||
872 | * (should not be performed in the submit path) | ||
873 | * - channel wdt | ||
874 | * - GPU rail-gating with non-deterministic channels | ||
875 | * - buffer refcounting | ||
876 | * | ||
877 | * If none of the conditions are met, then deferred clean-up | ||
878 | * is not required, and we clean-up one job-tracking | ||
879 | * resource in the submit path. | ||
880 | */ | ||
881 | need_deferred_cleanup = !c->deterministic || | ||
882 | need_sync_framework || | ||
883 | c->timeout.enabled || | ||
884 | (g->can_railgate && | ||
885 | !c->deterministic) || | ||
886 | !skip_buffer_refcounting; | ||
887 | |||
888 | /* | ||
889 | * For deterministic channels, we don't allow deferred clean_up | ||
890 | * processing to occur. In cases we hit this, we fail the submit | ||
891 | */ | ||
892 | if (c->deterministic && need_deferred_cleanup) | ||
893 | return -EINVAL; | ||
894 | |||
895 | if (!c->deterministic) { | ||
896 | /* | ||
897 | * Get a power ref unless this is a deterministic | ||
898 | * channel that holds them during the channel lifetime. | ||
899 | * This one is released by gk20a_channel_clean_up_jobs, | ||
900 | * via syncpt or sema interrupt, whichever is used. | ||
901 | */ | ||
902 | err = gk20a_busy(g); | ||
903 | if (err) { | ||
904 | nvgpu_err(g, | ||
905 | "failed to host gk20a to submit gpfifo"); | ||
906 | nvgpu_print_current(g, NULL, NVGPU_ERROR); | ||
907 | return err; | ||
908 | } | ||
909 | } | ||
910 | |||
911 | if (!need_deferred_cleanup) { | ||
912 | /* clean up a single job */ | ||
913 | gk20a_channel_clean_up_jobs(c, false); | ||
914 | } | ||
915 | } | ||
916 | |||
917 | |||
918 | /* Grab access to HW to deal with do_idle */ | ||
919 | if (c->deterministic) | ||
920 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
921 | |||
922 | if (c->deterministic && c->deterministic_railgate_allowed) { | ||
923 | /* | ||
924 | * Nope - this channel has dropped its own power ref. As | ||
925 | * deterministic submits don't hold power on per each submitted | ||
926 | * job like normal ones do, the GPU might railgate any time now | ||
927 | * and thus submit is disallowed. | ||
928 | */ | ||
929 | err = -EINVAL; | ||
930 | goto clean_up; | ||
931 | } | ||
932 | |||
933 | trace_gk20a_channel_submit_gpfifo(g->name, | ||
934 | c->chid, | ||
935 | num_entries, | ||
936 | flags, | ||
937 | fence ? fence->id : 0, | ||
938 | fence ? fence->value : 0); | ||
939 | |||
940 | nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", | ||
941 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
942 | |||
943 | /* | ||
944 | * Make sure we have enough space for gpfifo entries. Check cached | ||
945 | * values first and then read from HW. If no space, return EAGAIN | ||
946 | * and let userpace decide to re-try request or not. | ||
947 | */ | ||
948 | if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { | ||
949 | if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { | ||
950 | err = -EAGAIN; | ||
951 | goto clean_up; | ||
952 | } | ||
953 | } | ||
954 | |||
955 | if (c->has_timedout) { | ||
956 | err = -ETIMEDOUT; | ||
957 | goto clean_up; | ||
958 | } | ||
959 | |||
960 | if (need_job_tracking) { | ||
961 | err = channel_gk20a_alloc_job(c, &job); | ||
962 | if (err) | ||
963 | goto clean_up; | ||
964 | |||
965 | err = gk20a_submit_prepare_syncs(c, fence, job, | ||
966 | &wait_cmd, &incr_cmd, | ||
967 | &post_fence, | ||
968 | need_deferred_cleanup, | ||
969 | flags); | ||
970 | if (err) | ||
971 | goto clean_up_job; | ||
972 | } | ||
973 | |||
974 | gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); | ||
975 | |||
976 | if (wait_cmd) | ||
977 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | ||
978 | |||
979 | err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, | ||
980 | num_entries); | ||
981 | if (err) | ||
982 | goto clean_up_job; | ||
983 | |||
984 | /* | ||
985 | * And here's where we add the incr_cmd we generated earlier. It should | ||
986 | * always run! | ||
987 | */ | ||
988 | if (incr_cmd) | ||
989 | gk20a_submit_append_priv_cmdbuf(c, incr_cmd); | ||
990 | |||
991 | if (fence_out) | ||
992 | *fence_out = gk20a_fence_get(post_fence); | ||
993 | |||
994 | if (need_job_tracking) | ||
995 | /* TODO! Check for errors... */ | ||
996 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | ||
997 | gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); | ||
998 | |||
999 | g->ops.fifo.userd_gp_put(g, c); | ||
1000 | |||
1001 | /* No hw access beyond this point */ | ||
1002 | if (c->deterministic) | ||
1003 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1004 | |||
1005 | trace_gk20a_channel_submitted_gpfifo(g->name, | ||
1006 | c->chid, | ||
1007 | num_entries, | ||
1008 | flags, | ||
1009 | post_fence ? post_fence->syncpt_id : 0, | ||
1010 | post_fence ? post_fence->syncpt_value : 0); | ||
1011 | |||
1012 | nvgpu_log_info(g, "post-submit put %d, get %d, size %d", | ||
1013 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
1014 | |||
1015 | gk20a_fifo_profile_snapshot(profile, PROFILE_END); | ||
1016 | |||
1017 | nvgpu_log_fn(g, "done"); | ||
1018 | return err; | ||
1019 | |||
1020 | clean_up_job: | ||
1021 | channel_gk20a_free_job(c, job); | ||
1022 | clean_up: | ||
1023 | nvgpu_log_fn(g, "fail"); | ||
1024 | gk20a_fence_put(post_fence); | ||
1025 | if (c->deterministic) | ||
1026 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1027 | else if (need_deferred_cleanup) | ||
1028 | gk20a_idle(g); | ||
1029 | |||
1030 | return err; | ||
1031 | } | ||
1032 | |||
1033 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
1034 | struct nvgpu_gpfifo_userdata userdata, | ||
1035 | u32 num_entries, | ||
1036 | u32 flags, | ||
1037 | struct nvgpu_channel_fence *fence, | ||
1038 | struct gk20a_fence **fence_out, | ||
1039 | struct fifo_profile_gk20a *profile) | ||
1040 | { | ||
1041 | return gk20a_submit_channel_gpfifo(c, NULL, userdata, num_entries, | ||
1042 | flags, fence, fence_out, profile); | ||
1043 | } | ||
1044 | |||
1045 | int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
1046 | struct nvgpu_gpfifo_entry *gpfifo, | ||
1047 | u32 num_entries, | ||
1048 | u32 flags, | ||
1049 | struct nvgpu_channel_fence *fence, | ||
1050 | struct gk20a_fence **fence_out) | ||
1051 | { | ||
1052 | struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; | ||
1053 | return gk20a_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, | ||
1054 | flags, fence, fence_out, NULL); | ||
1055 | } | 507 | } |
508 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h index 43fa492b..87231a79 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.h +++ b/drivers/gpu/nvgpu/os/linux/channel.h | |||
@@ -84,19 +84,4 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | |||
84 | int runlist_id, | 84 | int runlist_id, |
85 | bool is_privileged_channel); | 85 | bool is_privileged_channel); |
86 | 86 | ||
87 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
88 | struct nvgpu_gpfifo_userdata userdata, | ||
89 | u32 num_entries, | ||
90 | u32 flags, | ||
91 | struct nvgpu_channel_fence *fence, | ||
92 | struct gk20a_fence **fence_out, | ||
93 | struct fifo_profile_gk20a *profile); | ||
94 | |||
95 | int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
96 | struct nvgpu_gpfifo_entry *gpfifo, | ||
97 | u32 num_entries, | ||
98 | u32 flags, | ||
99 | struct nvgpu_channel_fence *fence, | ||
100 | struct gk20a_fence **fence_out); | ||
101 | |||
102 | #endif /* __NVGPU_CHANNEL_H__ */ | 87 | #endif /* __NVGPU_CHANNEL_H__ */ |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index fa6a02d6..7b003b76 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <nvgpu/barrier.h> | 34 | #include <nvgpu/barrier.h> |
35 | #include <nvgpu/nvhost.h> | 35 | #include <nvgpu/nvhost.h> |
36 | #include <nvgpu/os_sched.h> | 36 | #include <nvgpu/os_sched.h> |
37 | #include <nvgpu/channel.h> | ||
37 | 38 | ||
38 | #include "gk20a/gk20a.h" | 39 | #include "gk20a/gk20a.h" |
39 | #include "gk20a/dbg_gpu_gk20a.h" | 40 | #include "gk20a/dbg_gpu_gk20a.h" |
@@ -799,11 +800,11 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
799 | return fd; | 800 | return fd; |
800 | } | 801 | } |
801 | 802 | ||
802 | userdata.entries = (struct nvgpu_gpfifo_entry __user*) | 803 | userdata.entries = (struct nvgpu_gpfifo_entry __user *) |
803 | (uintptr_t)args->gpfifo; | 804 | (uintptr_t)args->gpfifo; |
804 | userdata.context = NULL; | 805 | userdata.context = NULL; |
805 | 806 | ||
806 | ret = gk20a_submit_channel_gpfifo_user(ch, | 807 | ret = nvgpu_submit_channel_gpfifo_user(ch, |
807 | userdata, args->num_entries, | 808 | userdata, args->num_entries, |
808 | submit_flags, &fence, &fence_out, profile); | 809 | submit_flags, &fence, &fence_out, profile); |
809 | 810 | ||