summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c179
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h5
2 files changed, 136 insertions, 48 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a5158e7c..65157ccd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -774,6 +774,70 @@ static void gk20a_init_pramin(struct mm_gk20a *mm)
774 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; 774 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
775} 775}
776 776
777#if defined(CONFIG_GK20A_VIDMEM)
778static int gk20a_vidmem_clear_all(struct gk20a *g)
779{
780 struct mm_gk20a *mm = &g->mm;
781 struct gk20a_fence *gk20a_fence_out = NULL;
782 u64 region2_base = 0;
783 int err = 0;
784
785 if (mm->vidmem.ce_ctx_id == ~0)
786 return -EINVAL;
787
788 err = gk20a_ce_execute_ops(g->dev,
789 mm->vidmem.ce_ctx_id,
790 0,
791 mm->vidmem.base,
792 mm->vidmem.bootstrap_base - mm->vidmem.base,
793 0x00000000,
794 NVGPU_CE_DST_LOCATION_LOCAL_FB,
795 NVGPU_CE_MEMSET,
796 NULL,
797 0,
798 NULL);
799 if (err) {
800 gk20a_err(g->dev,
801 "Failed to clear vidmem region 1 : %d", err);
802 return err;
803 }
804
805 region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
806
807 err = gk20a_ce_execute_ops(g->dev,
808 mm->vidmem.ce_ctx_id,
809 0,
810 region2_base,
811 mm->vidmem.size - region2_base,
812 0x00000000,
813 NVGPU_CE_DST_LOCATION_LOCAL_FB,
814 NVGPU_CE_MEMSET,
815 NULL,
816 0,
817 &gk20a_fence_out);
818 if (err) {
819 gk20a_err(g->dev,
820 "Failed to clear vidmem region 2 : %d", err);
821 return err;
822 }
823
824 if (gk20a_fence_out) {
825 err = gk20a_fence_wait(gk20a_fence_out,
826 gk20a_get_gr_idle_timeout(g));
827 gk20a_fence_put(gk20a_fence_out);
828 if (err) {
829 gk20a_err(g->dev,
830 "fence wait failed for CE execute ops");
831 return err;
832 }
833 }
834
835 mm->vidmem.cleared = true;
836
837 return 0;
838}
839#endif
840
777static int gk20a_init_vidmem(struct mm_gk20a *mm) 841static int gk20a_init_vidmem(struct mm_gk20a *mm)
778{ 842{
779#if defined(CONFIG_GK20A_VIDMEM) 843#if defined(CONFIG_GK20A_VIDMEM)
@@ -813,7 +877,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
813 gk20a_alloc_fixed(&g->mm.vidmem.allocator, 877 gk20a_alloc_fixed(&g->mm.vidmem.allocator,
814 bootstrap_base, bootstrap_size); 878 bootstrap_base, bootstrap_size);
815 879
816 mm->vidmem.size = size; 880 mm->vidmem.base = base;
881 mm->vidmem.size = size - base;
882 mm->vidmem.bootstrap_base = bootstrap_base;
883 mm->vidmem.bootstrap_size = bootstrap_size;
817 884
818 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); 885 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
819 886
@@ -2027,7 +2094,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
2027{ 2094{
2028#if defined(CONFIG_GK20A_VIDMEM) 2095#if defined(CONFIG_GK20A_VIDMEM)
2029 struct gk20a_vidmem_buf *buf; 2096 struct gk20a_vidmem_buf *buf;
2030 int err, fd; 2097 int err = 0, fd;
2031 2098
2032 gk20a_dbg_fn(""); 2099 gk20a_dbg_fn("");
2033 2100
@@ -2037,6 +2104,14 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
2037 2104
2038 buf->g = g; 2105 buf->g = g;
2039 2106
2107 if (!g->mm.vidmem.cleared) {
2108 err = gk20a_vidmem_clear_all(g);
2109 if (err) {
2110 gk20a_err(g->dev, "failed to clear whole vidmem");
2111 goto err_kfree;
2112 }
2113 }
2114
2040 err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); 2115 err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
2041 if (err) 2116 if (err)
2042 goto err_kfree; 2117 goto err_kfree;
@@ -2743,6 +2818,59 @@ static void gk20a_gmmu_free_attr_sys(struct gk20a *g, enum dma_attr attr,
2743 mem->aperture = APERTURE_INVALID; 2818 mem->aperture = APERTURE_INVALID;
2744} 2819}
2745 2820
2821#if defined(CONFIG_GK20A_VIDMEM)
2822static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem)
2823{
2824 struct gk20a_fence *gk20a_fence_out = NULL;
2825 struct gk20a_fence *gk20a_last_fence = NULL;
2826 struct gk20a_page_alloc *alloc = NULL;
2827 struct page_alloc_chunk *chunk = NULL;
2828 int err = 0;
2829
2830 if (g->mm.vidmem.ce_ctx_id == ~0)
2831 return -EINVAL;
2832
2833 alloc = (struct gk20a_page_alloc *)
2834 g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
2835
2836 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
2837 if (gk20a_last_fence)
2838 gk20a_fence_put(gk20a_last_fence);
2839
2840 err = gk20a_ce_execute_ops(g->dev,
2841 g->mm.vidmem.ce_ctx_id,
2842 0,
2843 chunk->base,
2844 chunk->length,
2845 0x00000000,
2846 NVGPU_CE_DST_LOCATION_LOCAL_FB,
2847 NVGPU_CE_MEMSET,
2848 NULL,
2849 0,
2850 &gk20a_fence_out);
2851
2852 if (err) {
2853 gk20a_err(g->dev,
2854 "Failed gk20a_ce_execute_ops[%d]", err);
2855 return err;
2856 }
2857
2858 gk20a_last_fence = gk20a_fence_out;
2859 }
2860
2861 if (gk20a_last_fence) {
2862 err = gk20a_fence_wait(gk20a_last_fence,
2863 gk20a_get_gr_idle_timeout(g));
2864 gk20a_fence_put(gk20a_last_fence);
2865 if (err)
2866 gk20a_err(g->dev,
2867 "fence wait failed for CE execute ops");
2868 }
2869
2870 return err;
2871}
2872#endif
2873
2746int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem) 2874int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem)
2747{ 2875{
2748 return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem); 2876 return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem);
@@ -2803,56 +2931,10 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2803 mem->size = size; 2931 mem->size = size;
2804 mem->aperture = APERTURE_VIDMEM; 2932 mem->aperture = APERTURE_VIDMEM;
2805 2933
2806 if (g->mm.vidmem.ce_ctx_id != ~0) {
2807 struct gk20a_fence *gk20a_fence_out = NULL;
2808 struct gk20a_fence *gk20a_last_fence = NULL;
2809 struct gk20a_page_alloc *alloc = NULL;
2810 struct page_alloc_chunk *chunk = NULL;
2811
2812 alloc = (struct gk20a_page_alloc *)
2813 g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
2814
2815 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
2816 if (gk20a_last_fence)
2817 gk20a_fence_put(gk20a_last_fence);
2818
2819 err = gk20a_ce_execute_ops(g->dev,
2820 g->mm.vidmem.ce_ctx_id,
2821 0,
2822 chunk->base,
2823 chunk->length,
2824 0x00000000,
2825 NVGPU_CE_DST_LOCATION_LOCAL_FB,
2826 NVGPU_CE_MEMSET,
2827 NULL,
2828 0,
2829 &gk20a_fence_out);
2830
2831 if (err) {
2832 gk20a_err(g->dev,
2833 "Failed gk20a_ce_execute_ops[%d]", err);
2834 goto fail_free_table;
2835 }
2836
2837 gk20a_last_fence = gk20a_fence_out;
2838 }
2839
2840 if (gk20a_last_fence) {
2841 err = gk20a_fence_wait(gk20a_last_fence,
2842 gk20a_get_gr_idle_timeout(g));
2843 gk20a_fence_put(gk20a_last_fence);
2844 if (err)
2845 gk20a_err(g->dev,
2846 "Failed to get the fence_out from CE execute ops");
2847 }
2848 }
2849
2850 gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); 2934 gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
2851 2935
2852 return 0; 2936 return 0;
2853 2937
2854fail_free_table:
2855 sg_free_table(mem->sgt);
2856fail_kfree: 2938fail_kfree:
2857 kfree(mem->sgt); 2939 kfree(mem->sgt);
2858fail_physfree: 2940fail_physfree:
@@ -2867,6 +2949,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
2867 struct mem_desc *mem) 2949 struct mem_desc *mem)
2868{ 2950{
2869#if defined(CONFIG_GK20A_VIDMEM) 2951#if defined(CONFIG_GK20A_VIDMEM)
2952 gk20a_gmmu_clear_vidmem_mem(g, mem);
2870 gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); 2953 gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl));
2871 gk20a_free_sgtable(&mem->sgt); 2954 gk20a_free_sgtable(&mem->sgt);
2872 mem->size = 0; 2955 mem->size = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index e4d7d741..c6360955 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -405,8 +405,13 @@ struct mm_gk20a {
405 405
406 struct { 406 struct {
407 size_t size; 407 size_t size;
408 u64 base;
409 size_t bootstrap_size;
410 u64 bootstrap_base;
411
408 struct gk20a_allocator allocator; 412 struct gk20a_allocator allocator;
409 struct gk20a_allocator bootstrap_allocator; 413 struct gk20a_allocator bootstrap_allocator;
414
410 u32 ce_ctx_id; 415 u32 ce_ctx_id;
411 bool cleared; 416 bool cleared;
412 } vidmem; 417 } vidmem;