summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-07-28 05:07:18 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-01 12:10:20 -0400
commitf79639f61858c377cf1f3facfc0ce631f787f0e6 (patch)
tree188d4033c93fcf0e5b819c074dc436b9b36f448e /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parentaa7f4bf251ee6346bf300f3793002eb4a7f05562 (diff)
gpu: nvgpu: clear whole vidmem on first allocation
We currently clear vidmem pages in gk20a_gmmu_alloc_attr_vid_at() i.e. allocation path for each buffer But since buffer allocation path could be latency critical, clear whole vidmem first and before first User allcation in gk20a_vidmem_buf_alloc() And then clear buffer pages while releasing the buffer In this way, we can ensure that vidmem pages are already cleared during buffer allocation path At a later stage, clearing of pages can be removed from free path and moved to a separate worker as well At this point, first allocation has overhead of clearing whole vidmem which takes about 380mS and this should improve once clocks are raised. Also, this is one time larency, and subsequent allocations should not have any overhead for clearing at all Add API gk20a_vidmem_clear_all() to clear whole vidmem We have WPR buffers allocated during boot up and at fixed address in vidmem. To prevent overwriting to these buffers in gk20a_vidmem_clear_all(), clear whole vidmem except for the bootstrap allocator carveout Add new API gk20a_gmmu_clear_vidmem_mem() to clear one mem_desc Jira DNVGPU-84 Change-Id: I5661700585c6241a6a1ddeb5b7c068d3d2aed4b3 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1194301 (cherry picked from commit 950ab61a04290ea405968d8b0d03e3bd044ce83d) Reviewed-on: http://git-master/r/1193158 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c179
1 files changed, 131 insertions, 48 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a5158e7c..65157ccd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -774,6 +774,70 @@ static void gk20a_init_pramin(struct mm_gk20a *mm)
774 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; 774 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
775} 775}
776 776
777#if defined(CONFIG_GK20A_VIDMEM)
778static int gk20a_vidmem_clear_all(struct gk20a *g)
779{
780 struct mm_gk20a *mm = &g->mm;
781 struct gk20a_fence *gk20a_fence_out = NULL;
782 u64 region2_base = 0;
783 int err = 0;
784
785 if (mm->vidmem.ce_ctx_id == ~0)
786 return -EINVAL;
787
788 err = gk20a_ce_execute_ops(g->dev,
789 mm->vidmem.ce_ctx_id,
790 0,
791 mm->vidmem.base,
792 mm->vidmem.bootstrap_base - mm->vidmem.base,
793 0x00000000,
794 NVGPU_CE_DST_LOCATION_LOCAL_FB,
795 NVGPU_CE_MEMSET,
796 NULL,
797 0,
798 NULL);
799 if (err) {
800 gk20a_err(g->dev,
801 "Failed to clear vidmem region 1 : %d", err);
802 return err;
803 }
804
805 region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
806
807 err = gk20a_ce_execute_ops(g->dev,
808 mm->vidmem.ce_ctx_id,
809 0,
810 region2_base,
811 mm->vidmem.size - region2_base,
812 0x00000000,
813 NVGPU_CE_DST_LOCATION_LOCAL_FB,
814 NVGPU_CE_MEMSET,
815 NULL,
816 0,
817 &gk20a_fence_out);
818 if (err) {
819 gk20a_err(g->dev,
820 "Failed to clear vidmem region 2 : %d", err);
821 return err;
822 }
823
824 if (gk20a_fence_out) {
825 err = gk20a_fence_wait(gk20a_fence_out,
826 gk20a_get_gr_idle_timeout(g));
827 gk20a_fence_put(gk20a_fence_out);
828 if (err) {
829 gk20a_err(g->dev,
830 "fence wait failed for CE execute ops");
831 return err;
832 }
833 }
834
835 mm->vidmem.cleared = true;
836
837 return 0;
838}
839#endif
840
777static int gk20a_init_vidmem(struct mm_gk20a *mm) 841static int gk20a_init_vidmem(struct mm_gk20a *mm)
778{ 842{
779#if defined(CONFIG_GK20A_VIDMEM) 843#if defined(CONFIG_GK20A_VIDMEM)
@@ -813,7 +877,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
813 gk20a_alloc_fixed(&g->mm.vidmem.allocator, 877 gk20a_alloc_fixed(&g->mm.vidmem.allocator,
814 bootstrap_base, bootstrap_size); 878 bootstrap_base, bootstrap_size);
815 879
816 mm->vidmem.size = size; 880 mm->vidmem.base = base;
881 mm->vidmem.size = size - base;
882 mm->vidmem.bootstrap_base = bootstrap_base;
883 mm->vidmem.bootstrap_size = bootstrap_size;
817 884
818 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); 885 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
819 886
@@ -2027,7 +2094,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
2027{ 2094{
2028#if defined(CONFIG_GK20A_VIDMEM) 2095#if defined(CONFIG_GK20A_VIDMEM)
2029 struct gk20a_vidmem_buf *buf; 2096 struct gk20a_vidmem_buf *buf;
2030 int err, fd; 2097 int err = 0, fd;
2031 2098
2032 gk20a_dbg_fn(""); 2099 gk20a_dbg_fn("");
2033 2100
@@ -2037,6 +2104,14 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
2037 2104
2038 buf->g = g; 2105 buf->g = g;
2039 2106
2107 if (!g->mm.vidmem.cleared) {
2108 err = gk20a_vidmem_clear_all(g);
2109 if (err) {
2110 gk20a_err(g->dev, "failed to clear whole vidmem");
2111 goto err_kfree;
2112 }
2113 }
2114
2040 err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); 2115 err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
2041 if (err) 2116 if (err)
2042 goto err_kfree; 2117 goto err_kfree;
@@ -2743,6 +2818,59 @@ static void gk20a_gmmu_free_attr_sys(struct gk20a *g, enum dma_attr attr,
2743 mem->aperture = APERTURE_INVALID; 2818 mem->aperture = APERTURE_INVALID;
2744} 2819}
2745 2820
2821#if defined(CONFIG_GK20A_VIDMEM)
2822static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem)
2823{
2824 struct gk20a_fence *gk20a_fence_out = NULL;
2825 struct gk20a_fence *gk20a_last_fence = NULL;
2826 struct gk20a_page_alloc *alloc = NULL;
2827 struct page_alloc_chunk *chunk = NULL;
2828 int err = 0;
2829
2830 if (g->mm.vidmem.ce_ctx_id == ~0)
2831 return -EINVAL;
2832
2833 alloc = (struct gk20a_page_alloc *)
2834 g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
2835
2836 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
2837 if (gk20a_last_fence)
2838 gk20a_fence_put(gk20a_last_fence);
2839
2840 err = gk20a_ce_execute_ops(g->dev,
2841 g->mm.vidmem.ce_ctx_id,
2842 0,
2843 chunk->base,
2844 chunk->length,
2845 0x00000000,
2846 NVGPU_CE_DST_LOCATION_LOCAL_FB,
2847 NVGPU_CE_MEMSET,
2848 NULL,
2849 0,
2850 &gk20a_fence_out);
2851
2852 if (err) {
2853 gk20a_err(g->dev,
2854 "Failed gk20a_ce_execute_ops[%d]", err);
2855 return err;
2856 }
2857
2858 gk20a_last_fence = gk20a_fence_out;
2859 }
2860
2861 if (gk20a_last_fence) {
2862 err = gk20a_fence_wait(gk20a_last_fence,
2863 gk20a_get_gr_idle_timeout(g));
2864 gk20a_fence_put(gk20a_last_fence);
2865 if (err)
2866 gk20a_err(g->dev,
2867 "fence wait failed for CE execute ops");
2868 }
2869
2870 return err;
2871}
2872#endif
2873
2746int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem) 2874int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem)
2747{ 2875{
2748 return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem); 2876 return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem);
@@ -2803,56 +2931,10 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2803 mem->size = size; 2931 mem->size = size;
2804 mem->aperture = APERTURE_VIDMEM; 2932 mem->aperture = APERTURE_VIDMEM;
2805 2933
2806 if (g->mm.vidmem.ce_ctx_id != ~0) {
2807 struct gk20a_fence *gk20a_fence_out = NULL;
2808 struct gk20a_fence *gk20a_last_fence = NULL;
2809 struct gk20a_page_alloc *alloc = NULL;
2810 struct page_alloc_chunk *chunk = NULL;
2811
2812 alloc = (struct gk20a_page_alloc *)
2813 g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
2814
2815 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
2816 if (gk20a_last_fence)
2817 gk20a_fence_put(gk20a_last_fence);
2818
2819 err = gk20a_ce_execute_ops(g->dev,
2820 g->mm.vidmem.ce_ctx_id,
2821 0,
2822 chunk->base,
2823 chunk->length,
2824 0x00000000,
2825 NVGPU_CE_DST_LOCATION_LOCAL_FB,
2826 NVGPU_CE_MEMSET,
2827 NULL,
2828 0,
2829 &gk20a_fence_out);
2830
2831 if (err) {
2832 gk20a_err(g->dev,
2833 "Failed gk20a_ce_execute_ops[%d]", err);
2834 goto fail_free_table;
2835 }
2836
2837 gk20a_last_fence = gk20a_fence_out;
2838 }
2839
2840 if (gk20a_last_fence) {
2841 err = gk20a_fence_wait(gk20a_last_fence,
2842 gk20a_get_gr_idle_timeout(g));
2843 gk20a_fence_put(gk20a_last_fence);
2844 if (err)
2845 gk20a_err(g->dev,
2846 "Failed to get the fence_out from CE execute ops");
2847 }
2848 }
2849
2850 gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); 2934 gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
2851 2935
2852 return 0; 2936 return 0;
2853 2937
2854fail_free_table:
2855 sg_free_table(mem->sgt);
2856fail_kfree: 2938fail_kfree:
2857 kfree(mem->sgt); 2939 kfree(mem->sgt);
2858fail_physfree: 2940fail_physfree:
@@ -2867,6 +2949,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
2867 struct mem_desc *mem) 2949 struct mem_desc *mem)
2868{ 2950{
2869#if defined(CONFIG_GK20A_VIDMEM) 2951#if defined(CONFIG_GK20A_VIDMEM)
2952 gk20a_gmmu_clear_vidmem_mem(g, mem);
2870 gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); 2953 gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl));
2871 gk20a_free_sgtable(&mem->sgt); 2954 gk20a_free_sgtable(&mem->sgt);
2872 mem->size = 0; 2955 mem->size = 0;