diff options
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux')
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.c | 182 |
1 files changed, 85 insertions, 97 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c index dd2b17ee..37e36cb9 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.c +++ b/drivers/gpu/nvgpu/os/linux/channel.c | |||
@@ -491,45 +491,22 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, | |||
491 | } | 491 | } |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | static void trace_write_pushbuffer_range(struct channel_gk20a *c, | 494 | static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) |
495 | struct nvgpu_gpfifo_entry *g, | ||
496 | struct nvgpu_gpfifo_entry __user *user_gpfifo, | ||
497 | int offset, | ||
498 | int count) | ||
499 | { | 495 | { |
500 | #ifdef CONFIG_DEBUG_FS | 496 | #ifdef CONFIG_DEBUG_FS |
501 | u32 size; | 497 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; |
502 | int i; | 498 | u32 n = c->gpfifo.entry_num; |
503 | struct nvgpu_gpfifo_entry *gp; | 499 | u32 start = c->gpfifo.put; |
504 | bool gpfifo_allocated = false; | 500 | u32 i; |
505 | 501 | ||
506 | if (!gk20a_debug_trace_cmdbuf) | 502 | if (!gk20a_debug_trace_cmdbuf) |
507 | return; | 503 | return; |
508 | 504 | ||
509 | if (!g && !user_gpfifo) | 505 | if (!gp) |
510 | return; | 506 | return; |
511 | 507 | ||
512 | if (!g) { | 508 | for (i = 0; i < count; i++) |
513 | size = count * sizeof(struct nvgpu_gpfifo_entry); | 509 | trace_write_pushbuffer(c, &gp[(start + i) % n]); |
514 | if (size) { | ||
515 | g = nvgpu_big_malloc(c->g, size); | ||
516 | if (!g) | ||
517 | return; | ||
518 | |||
519 | if (copy_from_user(g, user_gpfifo, size)) { | ||
520 | nvgpu_big_free(c->g, g); | ||
521 | return; | ||
522 | } | ||
523 | } | ||
524 | gpfifo_allocated = true; | ||
525 | } | ||
526 | |||
527 | gp = g + offset; | ||
528 | for (i = 0; i < count; i++, gp++) | ||
529 | trace_write_pushbuffer(c, gp); | ||
530 | |||
531 | if (gpfifo_allocated) | ||
532 | nvgpu_big_free(c->g, g); | ||
533 | #endif | 510 | #endif |
534 | } | 511 | } |
535 | 512 | ||
@@ -682,98 +659,109 @@ static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | |||
682 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | 659 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); |
683 | } | 660 | } |
684 | 661 | ||
685 | /* | 662 | static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, |
686 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
687 | * splitting into two memcpys to handle wrap-around. | ||
688 | */ | ||
689 | static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, | ||
690 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
691 | struct nvgpu_gpfifo_userdata userdata, | 663 | struct nvgpu_gpfifo_userdata userdata, |
692 | u32 num_entries) | 664 | u32 num_entries) |
693 | { | 665 | { |
694 | struct gk20a *g = c->g; | 666 | struct gk20a *g = c->g; |
667 | struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; | ||
695 | u32 gpfifo_size = c->gpfifo.entry_num; | 668 | u32 gpfifo_size = c->gpfifo.entry_num; |
696 | u32 len = num_entries; | 669 | u32 len = num_entries; |
697 | u32 start = c->gpfifo.put; | 670 | u32 start = c->gpfifo.put; |
698 | u32 end = start + len; /* exclusive */ | 671 | u32 end = start + len; /* exclusive */ |
672 | int err; | ||
673 | |||
674 | if (end > gpfifo_size) { | ||
675 | /* wrap-around */ | ||
676 | int length0 = gpfifo_size - start; | ||
677 | int length1 = len - length0; | ||
678 | |||
679 | err = g->os_channel.copy_user_gpfifo( | ||
680 | gpfifo_cpu + start, userdata, | ||
681 | 0, length0); | ||
682 | if (err) | ||
683 | return err; | ||
684 | |||
685 | err = g->os_channel.copy_user_gpfifo( | ||
686 | gpfifo_cpu, userdata, | ||
687 | length0, length1); | ||
688 | if (err) | ||
689 | return err; | ||
690 | } else { | ||
691 | err = g->os_channel.copy_user_gpfifo( | ||
692 | gpfifo_cpu + start, userdata, | ||
693 | 0, len); | ||
694 | if (err) | ||
695 | return err; | ||
696 | } | ||
697 | |||
698 | return 0; | ||
699 | } | ||
700 | |||
701 | static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c, | ||
702 | struct nvgpu_gpfifo_entry *src, u32 num_entries) | ||
703 | { | ||
704 | struct gk20a *g = c->g; | ||
699 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | 705 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; |
700 | struct nvgpu_gpfifo_entry *cpu_src; | 706 | /* in bytes */ |
707 | u32 gpfifo_size = | ||
708 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | ||
709 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | ||
710 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
711 | u32 end = start + len; /* exclusive */ | ||
712 | |||
713 | if (end > gpfifo_size) { | ||
714 | /* wrap-around */ | ||
715 | int length0 = gpfifo_size - start; | ||
716 | int length1 = len - length0; | ||
717 | struct nvgpu_gpfifo_entry *src2 = src + length0; | ||
718 | |||
719 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); | ||
720 | nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); | ||
721 | } else { | ||
722 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); | ||
723 | } | ||
724 | } | ||
725 | |||
726 | /* | ||
727 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
728 | * splitting into two memcpys to handle wrap-around. | ||
729 | */ | ||
730 | static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c, | ||
731 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
732 | struct nvgpu_gpfifo_userdata userdata, | ||
733 | u32 num_entries) | ||
734 | { | ||
735 | struct gk20a *g = c->g; | ||
701 | int err; | 736 | int err; |
702 | 737 | ||
703 | if (!kern_gpfifo && !c->gpfifo.pipe) { | 738 | if (!kern_gpfifo && !c->gpfifo.pipe) { |
704 | struct nvgpu_gpfifo_entry *gpfifo_cpu = gpfifo_mem->cpu_va; | ||
705 | /* | 739 | /* |
706 | * This path (from userspace to sysmem) is special in order to | 740 | * This path (from userspace to sysmem) is special in order to |
707 | * avoid two copies unnecessarily (from user to pipe, then from | 741 | * avoid two copies unnecessarily (from user to pipe, then from |
708 | * pipe to gpu sysmem buffer). | 742 | * pipe to gpu sysmem buffer). |
709 | */ | 743 | */ |
710 | if (end > gpfifo_size) { | 744 | err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, |
711 | /* wrap-around */ | 745 | num_entries); |
712 | int length0 = gpfifo_size - start; | 746 | if (err) |
713 | int length1 = len - length0; | 747 | return err; |
714 | |||
715 | err = g->os_channel.copy_user_gpfifo( | ||
716 | gpfifo_cpu + start, userdata, | ||
717 | 0, length0); | ||
718 | if (err) | ||
719 | return err; | ||
720 | |||
721 | err = g->os_channel.copy_user_gpfifo( | ||
722 | gpfifo_cpu, userdata, | ||
723 | length0, length1); | ||
724 | if (err) | ||
725 | return err; | ||
726 | |||
727 | trace_write_pushbuffer_range(c, gpfifo_cpu, NULL, | ||
728 | start, length0); | ||
729 | trace_write_pushbuffer_range(c, gpfifo_cpu, NULL, | ||
730 | 0, length1); | ||
731 | } else { | ||
732 | err = g->os_channel.copy_user_gpfifo( | ||
733 | gpfifo_cpu + start, userdata, | ||
734 | 0, len); | ||
735 | if (err) | ||
736 | return err; | ||
737 | |||
738 | trace_write_pushbuffer_range(c, gpfifo_cpu, NULL, | ||
739 | start, len); | ||
740 | } | ||
741 | goto out; | ||
742 | } else if (!kern_gpfifo) { | 748 | } else if (!kern_gpfifo) { |
743 | /* from userspace to vidmem, use the common copy path below */ | 749 | /* from userspace to vidmem, use the common path */ |
744 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, | 750 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, |
745 | 0, len); | 751 | 0, num_entries); |
746 | if (err) | 752 | if (err) |
747 | return err; | 753 | return err; |
748 | 754 | ||
749 | cpu_src = c->gpfifo.pipe; | 755 | nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, |
756 | num_entries); | ||
750 | } else { | 757 | } else { |
751 | /* from kernel to either sysmem or vidmem, don't need | 758 | /* from kernel to either sysmem or vidmem, don't need |
752 | * copy_user_gpfifo so use the common path below */ | 759 | * copy_user_gpfifo so use the common path */ |
753 | cpu_src = kern_gpfifo; | 760 | nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); |
754 | } | ||
755 | |||
756 | if (end > gpfifo_size) { | ||
757 | /* wrap-around */ | ||
758 | int length0 = gpfifo_size - start; | ||
759 | int length1 = len - length0; | ||
760 | struct nvgpu_gpfifo_entry *src2 = cpu_src + length0; | ||
761 | int s_bytes = start * sizeof(struct nvgpu_gpfifo_entry); | ||
762 | int l0_bytes = length0 * sizeof(struct nvgpu_gpfifo_entry); | ||
763 | int l1_bytes = length1 * sizeof(struct nvgpu_gpfifo_entry); | ||
764 | |||
765 | nvgpu_mem_wr_n(c->g, gpfifo_mem, s_bytes, cpu_src, l0_bytes); | ||
766 | nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, l1_bytes); | ||
767 | } else { | ||
768 | nvgpu_mem_wr_n(c->g, gpfifo_mem, | ||
769 | start * sizeof(struct nvgpu_gpfifo_entry), | ||
770 | cpu_src, | ||
771 | len * sizeof(struct nvgpu_gpfifo_entry)); | ||
772 | } | 761 | } |
773 | 762 | ||
774 | trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); | 763 | trace_write_pushbuffers(c, num_entries); |
775 | 764 | ||
776 | out: | ||
777 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | 765 | c->gpfifo.put = (c->gpfifo.put + num_entries) & |
778 | (c->gpfifo.entry_num - 1); | 766 | (c->gpfifo.entry_num - 1); |
779 | 767 | ||
@@ -987,7 +975,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
987 | if (wait_cmd) | 975 | if (wait_cmd) |
988 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | 976 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); |
989 | 977 | ||
990 | err = gk20a_submit_append_gpfifo(c, gpfifo, userdata, | 978 | err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, |
991 | num_entries); | 979 | num_entries); |
992 | if (err) | 980 | if (err) |
993 | goto clean_up_job; | 981 | goto clean_up_job; |