diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-05-18 04:35:29 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-20 15:25:45 -0400 |
commit | 819f32bdf119ebf4d758a901c9a22c2c4a7d167a (patch) | |
tree | 0dd9760b4788cb3e287750c62a43e89366eb38b5 /drivers/gpu | |
parent | 0ca69a482d15ccf22bacccc4964606969df144f6 (diff) |
gpu: nvgpu: abstract away ioctl gpfifo read
The biggest remaining Linuxism in the submit path is the
copy_from_user() calls for reading the gpfifo entries to the HW-visible
buffer. Abstract away the copy of one such segment starting at some
offset and keep the wraparound logic and vidmem proxy in the core submit
path.
Jira NVGPU-705
Change-Id: I0c6438045c695e5e3f5da4fbc0c92d2c6e7f32cb
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1730480
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.c | 99 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 8 |
4 files changed, 80 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 3b8c5cd2..9061236e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -148,6 +148,14 @@ enum gk20a_cbc_op { | |||
148 | enum nvgpu_unit; | 148 | enum nvgpu_unit; |
149 | 149 | ||
150 | enum nvgpu_flush_op; | 150 | enum nvgpu_flush_op; |
151 | |||
152 | struct _resmgr_context; | ||
153 | |||
154 | struct nvgpu_gpfifo_userdata { | ||
155 | struct nvgpu_gpfifo_entry __user *entries; | ||
156 | struct _resmgr_context *context; | ||
157 | }; | ||
158 | |||
151 | /* | 159 | /* |
152 | * gpu_ops should only contain function pointers! Non-function pointer members | 160 | * gpu_ops should only contain function pointers! Non-function pointer members |
153 | * should go in struct gk20a or be implemented with the boolean flag API defined | 161 | * should go in struct gk20a or be implemented with the boolean flag API defined |
@@ -1488,6 +1496,9 @@ struct gk20a { | |||
1488 | struct channel_gk20a *ch, const char *fmt, ...); | 1496 | struct channel_gk20a *ch, const char *fmt, ...); |
1489 | void (*signal_os_fence_framework)(struct channel_gk20a *ch); | 1497 | void (*signal_os_fence_framework)(struct channel_gk20a *ch); |
1490 | void (*destroy_os_fence_framework)(struct channel_gk20a *ch); | 1498 | void (*destroy_os_fence_framework)(struct channel_gk20a *ch); |
1499 | int (*copy_user_gpfifo)(struct nvgpu_gpfifo_entry *dest, | ||
1500 | struct nvgpu_gpfifo_userdata userdata, | ||
1501 | u32 start, u32 length); | ||
1491 | } os_channel; | 1502 | } os_channel; |
1492 | 1503 | ||
1493 | struct gk20a_scale_profile *scale_profile; | 1504 | struct gk20a_scale_profile *scale_profile; |
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c index a84c5a1c..dd2b17ee 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.c +++ b/drivers/gpu/nvgpu/os/linux/channel.c | |||
@@ -372,6 +372,18 @@ static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) | |||
372 | return (fence_framework->timeline != NULL); | 372 | return (fence_framework->timeline != NULL); |
373 | } | 373 | } |
374 | 374 | ||
375 | static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest, | ||
376 | struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length) | ||
377 | { | ||
378 | struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries; | ||
379 | unsigned long n; | ||
380 | |||
381 | n = copy_from_user(dest, user_gpfifo + start, | ||
382 | length * sizeof(struct nvgpu_gpfifo_entry)); | ||
383 | |||
384 | return n == 0 ? 0 : -EFAULT; | ||
385 | } | ||
386 | |||
375 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) | 387 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) |
376 | { | 388 | { |
377 | struct gk20a *g = &l->g; | 389 | struct gk20a *g = &l->g; |
@@ -403,6 +415,9 @@ int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) | |||
403 | g->os_channel.destroy_os_fence_framework = | 415 | g->os_channel.destroy_os_fence_framework = |
404 | nvgpu_channel_destroy_os_fence_framework; | 416 | nvgpu_channel_destroy_os_fence_framework; |
405 | 417 | ||
418 | g->os_channel.copy_user_gpfifo = | ||
419 | nvgpu_channel_copy_user_gpfifo; | ||
420 | |||
406 | return 0; | 421 | return 0; |
407 | 422 | ||
408 | err_clean: | 423 | err_clean: |
@@ -673,20 +688,20 @@ static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | |||
673 | */ | 688 | */ |
674 | static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, | 689 | static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, |
675 | struct nvgpu_gpfifo_entry *kern_gpfifo, | 690 | struct nvgpu_gpfifo_entry *kern_gpfifo, |
676 | struct nvgpu_gpfifo_entry __user *user_gpfifo, | 691 | struct nvgpu_gpfifo_userdata userdata, |
677 | u32 num_entries) | 692 | u32 num_entries) |
678 | { | 693 | { |
679 | /* byte offsets */ | 694 | struct gk20a *g = c->g; |
680 | u32 gpfifo_size = | 695 | u32 gpfifo_size = c->gpfifo.entry_num; |
681 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | 696 | u32 len = num_entries; |
682 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | 697 | u32 start = c->gpfifo.put; |
683 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
684 | u32 end = start + len; /* exclusive */ | 698 | u32 end = start + len; /* exclusive */ |
685 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | 699 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; |
686 | struct nvgpu_gpfifo_entry *cpu_src; | 700 | struct nvgpu_gpfifo_entry *cpu_src; |
687 | int err; | 701 | int err; |
688 | 702 | ||
689 | if (user_gpfifo && !c->gpfifo.pipe) { | 703 | if (!kern_gpfifo && !c->gpfifo.pipe) { |
704 | struct nvgpu_gpfifo_entry *gpfifo_cpu = gpfifo_mem->cpu_va; | ||
690 | /* | 705 | /* |
691 | * This path (from userspace to sysmem) is special in order to | 706 | * This path (from userspace to sysmem) is special in order to |
692 | * avoid two copies unnecessarily (from user to pipe, then from | 707 | * avoid two copies unnecessarily (from user to pipe, then from |
@@ -696,37 +711,45 @@ static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, | |||
696 | /* wrap-around */ | 711 | /* wrap-around */ |
697 | int length0 = gpfifo_size - start; | 712 | int length0 = gpfifo_size - start; |
698 | int length1 = len - length0; | 713 | int length1 = len - length0; |
699 | void __user *user2 = (u8 __user *)user_gpfifo + length0; | ||
700 | 714 | ||
701 | err = copy_from_user(gpfifo_mem->cpu_va + start, | 715 | err = g->os_channel.copy_user_gpfifo( |
702 | user_gpfifo, length0); | 716 | gpfifo_cpu + start, userdata, |
717 | 0, length0); | ||
703 | if (err) | 718 | if (err) |
704 | return err; | 719 | return err; |
705 | 720 | ||
706 | err = copy_from_user(gpfifo_mem->cpu_va, | 721 | err = g->os_channel.copy_user_gpfifo( |
707 | user2, length1); | 722 | gpfifo_cpu, userdata, |
723 | length0, length1); | ||
708 | if (err) | 724 | if (err) |
709 | return err; | 725 | return err; |
726 | |||
727 | trace_write_pushbuffer_range(c, gpfifo_cpu, NULL, | ||
728 | start, length0); | ||
729 | trace_write_pushbuffer_range(c, gpfifo_cpu, NULL, | ||
730 | 0, length1); | ||
710 | } else { | 731 | } else { |
711 | err = copy_from_user(gpfifo_mem->cpu_va + start, | 732 | err = g->os_channel.copy_user_gpfifo( |
712 | user_gpfifo, len); | 733 | gpfifo_cpu + start, userdata, |
734 | 0, len); | ||
713 | if (err) | 735 | if (err) |
714 | return err; | 736 | return err; |
715 | } | ||
716 | 737 | ||
717 | trace_write_pushbuffer_range(c, NULL, user_gpfifo, | 738 | trace_write_pushbuffer_range(c, gpfifo_cpu, NULL, |
718 | 0, num_entries); | 739 | start, len); |
740 | } | ||
719 | goto out; | 741 | goto out; |
720 | } else if (user_gpfifo) { | 742 | } else if (!kern_gpfifo) { |
721 | /* from userspace to vidmem, use the common copy path below */ | 743 | /* from userspace to vidmem, use the common copy path below */ |
722 | err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len); | 744 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, |
745 | 0, len); | ||
723 | if (err) | 746 | if (err) |
724 | return err; | 747 | return err; |
725 | 748 | ||
726 | cpu_src = c->gpfifo.pipe; | 749 | cpu_src = c->gpfifo.pipe; |
727 | } else { | 750 | } else { |
728 | /* from kernel to either sysmem or vidmem, don't need | 751 | /* from kernel to either sysmem or vidmem, don't need |
729 | * copy_from_user so use the common path below */ | 752 | * copy_user_gpfifo so use the common path below */ |
730 | cpu_src = kern_gpfifo; | 753 | cpu_src = kern_gpfifo; |
731 | } | 754 | } |
732 | 755 | ||
@@ -734,13 +757,18 @@ static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, | |||
734 | /* wrap-around */ | 757 | /* wrap-around */ |
735 | int length0 = gpfifo_size - start; | 758 | int length0 = gpfifo_size - start; |
736 | int length1 = len - length0; | 759 | int length1 = len - length0; |
737 | void *src2 = (u8 *)cpu_src + length0; | 760 | struct nvgpu_gpfifo_entry *src2 = cpu_src + length0; |
761 | int s_bytes = start * sizeof(struct nvgpu_gpfifo_entry); | ||
762 | int l0_bytes = length0 * sizeof(struct nvgpu_gpfifo_entry); | ||
763 | int l1_bytes = length1 * sizeof(struct nvgpu_gpfifo_entry); | ||
738 | 764 | ||
739 | nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0); | 765 | nvgpu_mem_wr_n(c->g, gpfifo_mem, s_bytes, cpu_src, l0_bytes); |
740 | nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1); | 766 | nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, l1_bytes); |
741 | } else { | 767 | } else { |
742 | nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len); | 768 | nvgpu_mem_wr_n(c->g, gpfifo_mem, |
743 | 769 | start * sizeof(struct nvgpu_gpfifo_entry), | |
770 | cpu_src, | ||
771 | len * sizeof(struct nvgpu_gpfifo_entry)); | ||
744 | } | 772 | } |
745 | 773 | ||
746 | trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); | 774 | trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); |
@@ -754,7 +782,7 @@ out: | |||
754 | 782 | ||
755 | static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | 783 | static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, |
756 | struct nvgpu_gpfifo_entry *gpfifo, | 784 | struct nvgpu_gpfifo_entry *gpfifo, |
757 | struct nvgpu_submit_gpfifo_args *args, | 785 | struct nvgpu_gpfifo_userdata userdata, |
758 | u32 num_entries, | 786 | u32 num_entries, |
759 | u32 flags, | 787 | u32 flags, |
760 | struct nvgpu_channel_fence *fence, | 788 | struct nvgpu_channel_fence *fence, |
@@ -774,8 +802,6 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
774 | int err = 0; | 802 | int err = 0; |
775 | bool need_job_tracking; | 803 | bool need_job_tracking; |
776 | bool need_deferred_cleanup = false; | 804 | bool need_deferred_cleanup = false; |
777 | struct nvgpu_gpfifo_entry __user *user_gpfifo = args ? | ||
778 | (struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL; | ||
779 | 805 | ||
780 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | 806 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) |
781 | return -ENODEV; | 807 | return -ENODEV; |
@@ -795,9 +821,6 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
795 | return -ENOMEM; | 821 | return -ENOMEM; |
796 | } | 822 | } |
797 | 823 | ||
798 | if (!gpfifo && !args) | ||
799 | return -EINVAL; | ||
800 | |||
801 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | | 824 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | |
802 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && | 825 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && |
803 | !fence) | 826 | !fence) |
@@ -964,9 +987,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
964 | if (wait_cmd) | 987 | if (wait_cmd) |
965 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | 988 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); |
966 | 989 | ||
967 | if (gpfifo || user_gpfifo) | 990 | err = gk20a_submit_append_gpfifo(c, gpfifo, userdata, |
968 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, | 991 | num_entries); |
969 | num_entries); | ||
970 | if (err) | 992 | if (err) |
971 | goto clean_up_job; | 993 | goto clean_up_job; |
972 | 994 | ||
@@ -1020,14 +1042,14 @@ clean_up: | |||
1020 | } | 1042 | } |
1021 | 1043 | ||
1022 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, | 1044 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, |
1023 | struct nvgpu_submit_gpfifo_args *args, | 1045 | struct nvgpu_gpfifo_userdata userdata, |
1024 | u32 num_entries, | 1046 | u32 num_entries, |
1025 | u32 flags, | 1047 | u32 flags, |
1026 | struct nvgpu_channel_fence *fence, | 1048 | struct nvgpu_channel_fence *fence, |
1027 | struct gk20a_fence **fence_out, | 1049 | struct gk20a_fence **fence_out, |
1028 | struct fifo_profile_gk20a *profile) | 1050 | struct fifo_profile_gk20a *profile) |
1029 | { | 1051 | { |
1030 | return gk20a_submit_channel_gpfifo(c, NULL, args, num_entries, | 1052 | return gk20a_submit_channel_gpfifo(c, NULL, userdata, num_entries, |
1031 | flags, fence, fence_out, profile); | 1053 | flags, fence, fence_out, profile); |
1032 | } | 1054 | } |
1033 | 1055 | ||
@@ -1038,6 +1060,7 @@ int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | |||
1038 | struct nvgpu_channel_fence *fence, | 1060 | struct nvgpu_channel_fence *fence, |
1039 | struct gk20a_fence **fence_out) | 1061 | struct gk20a_fence **fence_out) |
1040 | { | 1062 | { |
1041 | return gk20a_submit_channel_gpfifo(c, gpfifo, NULL, num_entries, flags, | 1063 | struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; |
1042 | fence, fence_out, NULL); | 1064 | return gk20a_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, |
1065 | flags, fence, fence_out, NULL); | ||
1043 | } | 1066 | } |
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h index 689ad8bf..43fa492b 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.h +++ b/drivers/gpu/nvgpu/os/linux/channel.h | |||
@@ -85,7 +85,7 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | |||
85 | bool is_privileged_channel); | 85 | bool is_privileged_channel); |
86 | 86 | ||
87 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, | 87 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, |
88 | struct nvgpu_submit_gpfifo_args *args, | 88 | struct nvgpu_gpfifo_userdata userdata, |
89 | u32 num_entries, | 89 | u32 num_entries, |
90 | u32 flags, | 90 | u32 flags, |
91 | struct nvgpu_channel_fence *fence, | 91 | struct nvgpu_channel_fence *fence, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index ee4ef237..fa6a02d6 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c | |||
@@ -776,6 +776,7 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
776 | u32 submit_flags = 0; | 776 | u32 submit_flags = 0; |
777 | int fd = -1; | 777 | int fd = -1; |
778 | struct gk20a *g = ch->g; | 778 | struct gk20a *g = ch->g; |
779 | struct nvgpu_gpfifo_userdata userdata; | ||
779 | 780 | ||
780 | int ret = 0; | 781 | int ret = 0; |
781 | nvgpu_log_fn(g, " "); | 782 | nvgpu_log_fn(g, " "); |
@@ -798,7 +799,12 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
798 | return fd; | 799 | return fd; |
799 | } | 800 | } |
800 | 801 | ||
801 | ret = gk20a_submit_channel_gpfifo_user(ch, args, args->num_entries, | 802 | userdata.entries = (struct nvgpu_gpfifo_entry __user*) |
803 | (uintptr_t)args->gpfifo; | ||
804 | userdata.context = NULL; | ||
805 | |||
806 | ret = gk20a_submit_channel_gpfifo_user(ch, | ||
807 | userdata, args->num_entries, | ||
802 | submit_flags, &fence, &fence_out, profile); | 808 | submit_flags, &fence, &fence_out, profile); |
803 | 809 | ||
804 | if (ret) { | 810 | if (ret) { |