summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-05-21 10:31:10 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-20 15:25:49 -0400
commit84536b929be7e56dd8221ba8d541fd4cbcd149ed (patch)
treedd52bafa84fcf297861a8f3356f2f7651ea35792 /drivers
parent819f32bdf119ebf4d758a901c9a22c2c4a7d167a (diff)
gpu: nvgpu: split gk20a_submit_append_gpfifo
In gk20a_submit_channel_gpfifo the gpfifo entries can come from a kernel buffer or from userspace. To simplify the logic in gk20a_submit_append_gpfifo, extract out a function that copies the entries directly from userspace to the gpu memory for performance, and another function that copies from a kernel buffer to the gpu memory. The latter is used for kernel submits and when the gpfifo pipe exists which would mean that the gpfifo memory is in vidmem and is thus not directly accessible with a kernel virtual pointer. While this function is being changed a lot, also rename it to start with nvgpu_ instead of gk20a_. Additionally, simplify pushbuffer debug tracing by always using the kernel memory for the prints. Tracing when the gpfifo memory has been allocated in vidmem is no longer supported; sysmem is almost always used in practice anyway. Jira NVGPU-705 Change-Id: Icab843a379a75fb46054dee157a0a54ff9fbba59 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1730481 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/os/linux/channel.c182
1 files changed, 85 insertions, 97 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
index dd2b17ee..37e36cb9 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.c
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -491,45 +491,22 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
491} 491}
492#endif 492#endif
493 493
494static void trace_write_pushbuffer_range(struct channel_gk20a *c, 494static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
495 struct nvgpu_gpfifo_entry *g,
496 struct nvgpu_gpfifo_entry __user *user_gpfifo,
497 int offset,
498 int count)
499{ 495{
500#ifdef CONFIG_DEBUG_FS 496#ifdef CONFIG_DEBUG_FS
501 u32 size; 497 struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
502 int i; 498 u32 n = c->gpfifo.entry_num;
503 struct nvgpu_gpfifo_entry *gp; 499 u32 start = c->gpfifo.put;
504 bool gpfifo_allocated = false; 500 u32 i;
505 501
506 if (!gk20a_debug_trace_cmdbuf) 502 if (!gk20a_debug_trace_cmdbuf)
507 return; 503 return;
508 504
509 if (!g && !user_gpfifo) 505 if (!gp)
510 return; 506 return;
511 507
512 if (!g) { 508 for (i = 0; i < count; i++)
513 size = count * sizeof(struct nvgpu_gpfifo_entry); 509 trace_write_pushbuffer(c, &gp[(start + i) % n]);
514 if (size) {
515 g = nvgpu_big_malloc(c->g, size);
516 if (!g)
517 return;
518
519 if (copy_from_user(g, user_gpfifo, size)) {
520 nvgpu_big_free(c->g, g);
521 return;
522 }
523 }
524 gpfifo_allocated = true;
525 }
526
527 gp = g + offset;
528 for (i = 0; i < count; i++, gp++)
529 trace_write_pushbuffer(c, gp);
530
531 if (gpfifo_allocated)
532 nvgpu_big_free(c->g, g);
533#endif 510#endif
534} 511}
535 512
@@ -682,98 +659,109 @@ static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
682 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); 659 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
683} 660}
684 661
685/* 662static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
686 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
687 * splitting into two memcpys to handle wrap-around.
688 */
689static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
690 struct nvgpu_gpfifo_entry *kern_gpfifo,
691 struct nvgpu_gpfifo_userdata userdata, 663 struct nvgpu_gpfifo_userdata userdata,
692 u32 num_entries) 664 u32 num_entries)
693{ 665{
694 struct gk20a *g = c->g; 666 struct gk20a *g = c->g;
667 struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
695 u32 gpfifo_size = c->gpfifo.entry_num; 668 u32 gpfifo_size = c->gpfifo.entry_num;
696 u32 len = num_entries; 669 u32 len = num_entries;
697 u32 start = c->gpfifo.put; 670 u32 start = c->gpfifo.put;
698 u32 end = start + len; /* exclusive */ 671 u32 end = start + len; /* exclusive */
672 int err;
673
674 if (end > gpfifo_size) {
675 /* wrap-around */
676 int length0 = gpfifo_size - start;
677 int length1 = len - length0;
678
679 err = g->os_channel.copy_user_gpfifo(
680 gpfifo_cpu + start, userdata,
681 0, length0);
682 if (err)
683 return err;
684
685 err = g->os_channel.copy_user_gpfifo(
686 gpfifo_cpu, userdata,
687 length0, length1);
688 if (err)
689 return err;
690 } else {
691 err = g->os_channel.copy_user_gpfifo(
692 gpfifo_cpu + start, userdata,
693 0, len);
694 if (err)
695 return err;
696 }
697
698 return 0;
699}
700
701static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
702 struct nvgpu_gpfifo_entry *src, u32 num_entries)
703{
704 struct gk20a *g = c->g;
699 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; 705 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
700 struct nvgpu_gpfifo_entry *cpu_src; 706 /* in bytes */
707 u32 gpfifo_size =
708 c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
709 u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
710 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
711 u32 end = start + len; /* exclusive */
712
713 if (end > gpfifo_size) {
714 /* wrap-around */
715 int length0 = gpfifo_size - start;
716 int length1 = len - length0;
717 struct nvgpu_gpfifo_entry *src2 = src + length0;
718
719 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
720 nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
721 } else {
722 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
723 }
724}
725
726/*
727 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
728 * splitting into two memcpys to handle wrap-around.
729 */
730static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
731 struct nvgpu_gpfifo_entry *kern_gpfifo,
732 struct nvgpu_gpfifo_userdata userdata,
733 u32 num_entries)
734{
735 struct gk20a *g = c->g;
701 int err; 736 int err;
702 737
703 if (!kern_gpfifo && !c->gpfifo.pipe) { 738 if (!kern_gpfifo && !c->gpfifo.pipe) {
704 struct nvgpu_gpfifo_entry *gpfifo_cpu = gpfifo_mem->cpu_va;
705 /* 739 /*
706 * This path (from userspace to sysmem) is special in order to 740 * This path (from userspace to sysmem) is special in order to
707 * avoid two copies unnecessarily (from user to pipe, then from 741 * avoid two copies unnecessarily (from user to pipe, then from
708 * pipe to gpu sysmem buffer). 742 * pipe to gpu sysmem buffer).
709 */ 743 */
710 if (end > gpfifo_size) { 744 err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
711 /* wrap-around */ 745 num_entries);
712 int length0 = gpfifo_size - start; 746 if (err)
713 int length1 = len - length0; 747 return err;
714
715 err = g->os_channel.copy_user_gpfifo(
716 gpfifo_cpu + start, userdata,
717 0, length0);
718 if (err)
719 return err;
720
721 err = g->os_channel.copy_user_gpfifo(
722 gpfifo_cpu, userdata,
723 length0, length1);
724 if (err)
725 return err;
726
727 trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
728 start, length0);
729 trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
730 0, length1);
731 } else {
732 err = g->os_channel.copy_user_gpfifo(
733 gpfifo_cpu + start, userdata,
734 0, len);
735 if (err)
736 return err;
737
738 trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
739 start, len);
740 }
741 goto out;
742 } else if (!kern_gpfifo) { 748 } else if (!kern_gpfifo) {
743 /* from userspace to vidmem, use the common copy path below */ 749 /* from userspace to vidmem, use the common path */
744 err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, 750 err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
745 0, len); 751 0, num_entries);
746 if (err) 752 if (err)
747 return err; 753 return err;
748 754
749 cpu_src = c->gpfifo.pipe; 755 nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
756 num_entries);
750 } else { 757 } else {
751 /* from kernel to either sysmem or vidmem, don't need 758 /* from kernel to either sysmem or vidmem, don't need
752 * copy_user_gpfifo so use the common path below */ 759 * copy_user_gpfifo so use the common path */
753 cpu_src = kern_gpfifo; 760 nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
754 }
755
756 if (end > gpfifo_size) {
757 /* wrap-around */
758 int length0 = gpfifo_size - start;
759 int length1 = len - length0;
760 struct nvgpu_gpfifo_entry *src2 = cpu_src + length0;
761 int s_bytes = start * sizeof(struct nvgpu_gpfifo_entry);
762 int l0_bytes = length0 * sizeof(struct nvgpu_gpfifo_entry);
763 int l1_bytes = length1 * sizeof(struct nvgpu_gpfifo_entry);
764
765 nvgpu_mem_wr_n(c->g, gpfifo_mem, s_bytes, cpu_src, l0_bytes);
766 nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, l1_bytes);
767 } else {
768 nvgpu_mem_wr_n(c->g, gpfifo_mem,
769 start * sizeof(struct nvgpu_gpfifo_entry),
770 cpu_src,
771 len * sizeof(struct nvgpu_gpfifo_entry));
772 } 761 }
773 762
774 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); 763 trace_write_pushbuffers(c, num_entries);
775 764
776out:
777 c->gpfifo.put = (c->gpfifo.put + num_entries) & 765 c->gpfifo.put = (c->gpfifo.put + num_entries) &
778 (c->gpfifo.entry_num - 1); 766 (c->gpfifo.entry_num - 1);
779 767
@@ -987,7 +975,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
987 if (wait_cmd) 975 if (wait_cmd)
988 gk20a_submit_append_priv_cmdbuf(c, wait_cmd); 976 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
989 977
990 err = gk20a_submit_append_gpfifo(c, gpfifo, userdata, 978 err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
991 num_entries); 979 num_entries);
992 if (err) 980 if (err)
993 goto clean_up_job; 981 goto clean_up_job;