diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 179 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
2 files changed, 136 insertions, 48 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a5158e7c..65157ccd 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -774,6 +774,70 @@ static void gk20a_init_pramin(struct mm_gk20a *mm) | |||
774 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | 774 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; |
775 | } | 775 | } |
776 | 776 | ||
777 | #if defined(CONFIG_GK20A_VIDMEM) | ||
778 | static int gk20a_vidmem_clear_all(struct gk20a *g) | ||
779 | { | ||
780 | struct mm_gk20a *mm = &g->mm; | ||
781 | struct gk20a_fence *gk20a_fence_out = NULL; | ||
782 | u64 region2_base = 0; | ||
783 | int err = 0; | ||
784 | |||
785 | if (mm->vidmem.ce_ctx_id == ~0) | ||
786 | return -EINVAL; | ||
787 | |||
788 | err = gk20a_ce_execute_ops(g->dev, | ||
789 | mm->vidmem.ce_ctx_id, | ||
790 | 0, | ||
791 | mm->vidmem.base, | ||
792 | mm->vidmem.bootstrap_base - mm->vidmem.base, | ||
793 | 0x00000000, | ||
794 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
795 | NVGPU_CE_MEMSET, | ||
796 | NULL, | ||
797 | 0, | ||
798 | NULL); | ||
799 | if (err) { | ||
800 | gk20a_err(g->dev, | ||
801 | "Failed to clear vidmem region 1 : %d", err); | ||
802 | return err; | ||
803 | } | ||
804 | |||
805 | region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size; | ||
806 | |||
807 | err = gk20a_ce_execute_ops(g->dev, | ||
808 | mm->vidmem.ce_ctx_id, | ||
809 | 0, | ||
810 | region2_base, | ||
811 | mm->vidmem.size - region2_base, | ||
812 | 0x00000000, | ||
813 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
814 | NVGPU_CE_MEMSET, | ||
815 | NULL, | ||
816 | 0, | ||
817 | &gk20a_fence_out); | ||
818 | if (err) { | ||
819 | gk20a_err(g->dev, | ||
820 | "Failed to clear vidmem region 2 : %d", err); | ||
821 | return err; | ||
822 | } | ||
823 | |||
824 | if (gk20a_fence_out) { | ||
825 | err = gk20a_fence_wait(gk20a_fence_out, | ||
826 | gk20a_get_gr_idle_timeout(g)); | ||
827 | gk20a_fence_put(gk20a_fence_out); | ||
828 | if (err) { | ||
829 | gk20a_err(g->dev, | ||
830 | "fence wait failed for CE execute ops"); | ||
831 | return err; | ||
832 | } | ||
833 | } | ||
834 | |||
835 | mm->vidmem.cleared = true; | ||
836 | |||
837 | return 0; | ||
838 | } | ||
839 | #endif | ||
840 | |||
777 | static int gk20a_init_vidmem(struct mm_gk20a *mm) | 841 | static int gk20a_init_vidmem(struct mm_gk20a *mm) |
778 | { | 842 | { |
779 | #if defined(CONFIG_GK20A_VIDMEM) | 843 | #if defined(CONFIG_GK20A_VIDMEM) |
@@ -813,7 +877,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
813 | gk20a_alloc_fixed(&g->mm.vidmem.allocator, | 877 | gk20a_alloc_fixed(&g->mm.vidmem.allocator, |
814 | bootstrap_base, bootstrap_size); | 878 | bootstrap_base, bootstrap_size); |
815 | 879 | ||
816 | mm->vidmem.size = size; | 880 | mm->vidmem.base = base; |
881 | mm->vidmem.size = size - base; | ||
882 | mm->vidmem.bootstrap_base = bootstrap_base; | ||
883 | mm->vidmem.bootstrap_size = bootstrap_size; | ||
817 | 884 | ||
818 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | 885 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); |
819 | 886 | ||
@@ -2027,7 +2094,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes) | |||
2027 | { | 2094 | { |
2028 | #if defined(CONFIG_GK20A_VIDMEM) | 2095 | #if defined(CONFIG_GK20A_VIDMEM) |
2029 | struct gk20a_vidmem_buf *buf; | 2096 | struct gk20a_vidmem_buf *buf; |
2030 | int err, fd; | 2097 | int err = 0, fd; |
2031 | 2098 | ||
2032 | gk20a_dbg_fn(""); | 2099 | gk20a_dbg_fn(""); |
2033 | 2100 | ||
@@ -2037,6 +2104,14 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes) | |||
2037 | 2104 | ||
2038 | buf->g = g; | 2105 | buf->g = g; |
2039 | 2106 | ||
2107 | if (!g->mm.vidmem.cleared) { | ||
2108 | err = gk20a_vidmem_clear_all(g); | ||
2109 | if (err) { | ||
2110 | gk20a_err(g->dev, "failed to clear whole vidmem"); | ||
2111 | goto err_kfree; | ||
2112 | } | ||
2113 | } | ||
2114 | |||
2040 | err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); | 2115 | err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); |
2041 | if (err) | 2116 | if (err) |
2042 | goto err_kfree; | 2117 | goto err_kfree; |
@@ -2743,6 +2818,59 @@ static void gk20a_gmmu_free_attr_sys(struct gk20a *g, enum dma_attr attr, | |||
2743 | mem->aperture = APERTURE_INVALID; | 2818 | mem->aperture = APERTURE_INVALID; |
2744 | } | 2819 | } |
2745 | 2820 | ||
2821 | #if defined(CONFIG_GK20A_VIDMEM) | ||
2822 | static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem) | ||
2823 | { | ||
2824 | struct gk20a_fence *gk20a_fence_out = NULL; | ||
2825 | struct gk20a_fence *gk20a_last_fence = NULL; | ||
2826 | struct gk20a_page_alloc *alloc = NULL; | ||
2827 | struct page_alloc_chunk *chunk = NULL; | ||
2828 | int err = 0; | ||
2829 | |||
2830 | if (g->mm.vidmem.ce_ctx_id == ~0) | ||
2831 | return -EINVAL; | ||
2832 | |||
2833 | alloc = (struct gk20a_page_alloc *) | ||
2834 | g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
2835 | |||
2836 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
2837 | if (gk20a_last_fence) | ||
2838 | gk20a_fence_put(gk20a_last_fence); | ||
2839 | |||
2840 | err = gk20a_ce_execute_ops(g->dev, | ||
2841 | g->mm.vidmem.ce_ctx_id, | ||
2842 | 0, | ||
2843 | chunk->base, | ||
2844 | chunk->length, | ||
2845 | 0x00000000, | ||
2846 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
2847 | NVGPU_CE_MEMSET, | ||
2848 | NULL, | ||
2849 | 0, | ||
2850 | &gk20a_fence_out); | ||
2851 | |||
2852 | if (err) { | ||
2853 | gk20a_err(g->dev, | ||
2854 | "Failed gk20a_ce_execute_ops[%d]", err); | ||
2855 | return err; | ||
2856 | } | ||
2857 | |||
2858 | gk20a_last_fence = gk20a_fence_out; | ||
2859 | } | ||
2860 | |||
2861 | if (gk20a_last_fence) { | ||
2862 | err = gk20a_fence_wait(gk20a_last_fence, | ||
2863 | gk20a_get_gr_idle_timeout(g)); | ||
2864 | gk20a_fence_put(gk20a_last_fence); | ||
2865 | if (err) | ||
2866 | gk20a_err(g->dev, | ||
2867 | "fence wait failed for CE execute ops"); | ||
2868 | } | ||
2869 | |||
2870 | return err; | ||
2871 | } | ||
2872 | #endif | ||
2873 | |||
2746 | int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem) | 2874 | int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem) |
2747 | { | 2875 | { |
2748 | return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem); | 2876 | return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem); |
@@ -2803,56 +2931,10 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2803 | mem->size = size; | 2931 | mem->size = size; |
2804 | mem->aperture = APERTURE_VIDMEM; | 2932 | mem->aperture = APERTURE_VIDMEM; |
2805 | 2933 | ||
2806 | if (g->mm.vidmem.ce_ctx_id != ~0) { | ||
2807 | struct gk20a_fence *gk20a_fence_out = NULL; | ||
2808 | struct gk20a_fence *gk20a_last_fence = NULL; | ||
2809 | struct gk20a_page_alloc *alloc = NULL; | ||
2810 | struct page_alloc_chunk *chunk = NULL; | ||
2811 | |||
2812 | alloc = (struct gk20a_page_alloc *) | ||
2813 | g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
2814 | |||
2815 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
2816 | if (gk20a_last_fence) | ||
2817 | gk20a_fence_put(gk20a_last_fence); | ||
2818 | |||
2819 | err = gk20a_ce_execute_ops(g->dev, | ||
2820 | g->mm.vidmem.ce_ctx_id, | ||
2821 | 0, | ||
2822 | chunk->base, | ||
2823 | chunk->length, | ||
2824 | 0x00000000, | ||
2825 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
2826 | NVGPU_CE_MEMSET, | ||
2827 | NULL, | ||
2828 | 0, | ||
2829 | &gk20a_fence_out); | ||
2830 | |||
2831 | if (err) { | ||
2832 | gk20a_err(g->dev, | ||
2833 | "Failed gk20a_ce_execute_ops[%d]", err); | ||
2834 | goto fail_free_table; | ||
2835 | } | ||
2836 | |||
2837 | gk20a_last_fence = gk20a_fence_out; | ||
2838 | } | ||
2839 | |||
2840 | if (gk20a_last_fence) { | ||
2841 | err = gk20a_fence_wait(gk20a_last_fence, | ||
2842 | gk20a_get_gr_idle_timeout(g)); | ||
2843 | gk20a_fence_put(gk20a_last_fence); | ||
2844 | if (err) | ||
2845 | gk20a_err(g->dev, | ||
2846 | "Failed to get the fence_out from CE execute ops"); | ||
2847 | } | ||
2848 | } | ||
2849 | |||
2850 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); | 2934 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); |
2851 | 2935 | ||
2852 | return 0; | 2936 | return 0; |
2853 | 2937 | ||
2854 | fail_free_table: | ||
2855 | sg_free_table(mem->sgt); | ||
2856 | fail_kfree: | 2938 | fail_kfree: |
2857 | kfree(mem->sgt); | 2939 | kfree(mem->sgt); |
2858 | fail_physfree: | 2940 | fail_physfree: |
@@ -2867,6 +2949,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
2867 | struct mem_desc *mem) | 2949 | struct mem_desc *mem) |
2868 | { | 2950 | { |
2869 | #if defined(CONFIG_GK20A_VIDMEM) | 2951 | #if defined(CONFIG_GK20A_VIDMEM) |
2952 | gk20a_gmmu_clear_vidmem_mem(g, mem); | ||
2870 | gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); | 2953 | gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); |
2871 | gk20a_free_sgtable(&mem->sgt); | 2954 | gk20a_free_sgtable(&mem->sgt); |
2872 | mem->size = 0; | 2955 | mem->size = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e4d7d741..c6360955 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -405,8 +405,13 @@ struct mm_gk20a { | |||
405 | 405 | ||
406 | struct { | 406 | struct { |
407 | size_t size; | 407 | size_t size; |
408 | u64 base; | ||
409 | size_t bootstrap_size; | ||
410 | u64 bootstrap_base; | ||
411 | |||
408 | struct gk20a_allocator allocator; | 412 | struct gk20a_allocator allocator; |
409 | struct gk20a_allocator bootstrap_allocator; | 413 | struct gk20a_allocator bootstrap_allocator; |
414 | |||
410 | u32 ce_ctx_id; | 415 | u32 ce_ctx_id; |
411 | bool cleared; | 416 | bool cleared; |
412 | } vidmem; | 417 | } vidmem; |