diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 52 |
1 files changed, 39 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 555353e2..634ae86a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -58,6 +58,34 @@ | |||
58 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); | 58 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | static inline void | ||
62 | set_vidmem_page_alloc(struct scatterlist *sgl, u64 addr) | ||
63 | { | ||
64 | /* set bit 0 to indicate vidmem allocation */ | ||
65 | sg_dma_address(sgl) = (addr | 1ULL); | ||
66 | } | ||
67 | |||
68 | static inline bool | ||
69 | is_vidmem_page_alloc(u64 addr) | ||
70 | { | ||
71 | return !!(addr & 1ULL); | ||
72 | } | ||
73 | |||
74 | static inline struct gk20a_page_alloc * | ||
75 | get_vidmem_page_alloc(struct scatterlist *sgl) | ||
76 | { | ||
77 | u64 addr; | ||
78 | |||
79 | addr = sg_dma_address(sgl); | ||
80 | |||
81 | if (is_vidmem_page_alloc(addr)) | ||
82 | addr = addr & ~1ULL; | ||
83 | else | ||
84 | WARN_ON(1); | ||
85 | |||
86 | return (struct gk20a_page_alloc *)(uintptr_t)addr; | ||
87 | } | ||
88 | |||
61 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | 89 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) |
62 | { | 90 | { |
63 | void *cpu_va; | 91 | void *cpu_va; |
@@ -149,8 +177,7 @@ static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | |||
149 | struct page_alloc_chunk *chunk = NULL; | 177 | struct page_alloc_chunk *chunk = NULL; |
150 | u32 byteoff, start_reg, until_end, n; | 178 | u32 byteoff, start_reg, until_end, n; |
151 | 179 | ||
152 | alloc = (struct gk20a_page_alloc *)(uintptr_t) | 180 | alloc = get_vidmem_page_alloc(mem->sgt->sgl); |
153 | sg_dma_address(mem->sgt->sgl); | ||
154 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | 181 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { |
155 | if (offset >= chunk->length) | 182 | if (offset >= chunk->length) |
156 | offset -= chunk->length; | 183 | offset -= chunk->length; |
@@ -2197,8 +2224,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, | |||
2197 | u64 buf_addr; | 2224 | u64 buf_addr; |
2198 | 2225 | ||
2199 | if (aperture == APERTURE_VIDMEM) { | 2226 | if (aperture == APERTURE_VIDMEM) { |
2200 | struct gk20a_page_alloc *alloc = (struct gk20a_page_alloc *) | 2227 | struct gk20a_page_alloc *alloc = get_vidmem_page_alloc(sgl); |
2201 | (uintptr_t)sg_dma_address(sgl); | ||
2202 | struct page_alloc_chunk *chunk = NULL; | 2228 | struct page_alloc_chunk *chunk = NULL; |
2203 | 2229 | ||
2204 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | 2230 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { |
@@ -2914,8 +2940,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem) | |||
2914 | if (g->mm.vidmem.ce_ctx_id == ~0) | 2940 | if (g->mm.vidmem.ce_ctx_id == ~0) |
2915 | return -EINVAL; | 2941 | return -EINVAL; |
2916 | 2942 | ||
2917 | alloc = (struct gk20a_page_alloc *)(uintptr_t) | 2943 | alloc = get_vidmem_page_alloc(mem->sgt->sgl); |
2918 | sg_dma_address(mem->sgt->sgl); | ||
2919 | 2944 | ||
2920 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | 2945 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { |
2921 | if (gk20a_last_fence) | 2946 | if (gk20a_last_fence) |
@@ -3038,7 +3063,7 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
3038 | if (err) | 3063 | if (err) |
3039 | goto fail_kfree; | 3064 | goto fail_kfree; |
3040 | 3065 | ||
3041 | sg_dma_address(mem->sgt->sgl) = addr; | 3066 | set_vidmem_page_alloc(mem->sgt->sgl, addr); |
3042 | sg_set_page(mem->sgt->sgl, NULL, size, 0); | 3067 | sg_set_page(mem->sgt->sgl, NULL, size, 0); |
3043 | 3068 | ||
3044 | mem->size = size; | 3069 | mem->size = size; |
@@ -3082,7 +3107,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
3082 | } else { | 3107 | } else { |
3083 | gk20a_memset(g, mem, 0, 0, mem->size); | 3108 | gk20a_memset(g, mem, 0, 0, mem->size); |
3084 | gk20a_free(mem->allocator, | 3109 | gk20a_free(mem->allocator, |
3085 | sg_dma_address(mem->sgt->sgl)); | 3110 | (u64)get_vidmem_page_alloc(mem->sgt->sgl)); |
3086 | gk20a_free_sgtable(&mem->sgt); | 3111 | gk20a_free_sgtable(&mem->sgt); |
3087 | 3112 | ||
3088 | mem->size = 0; | 3113 | mem->size = 0; |
@@ -3120,8 +3145,7 @@ u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem, | |||
3120 | u64 addr; | 3145 | u64 addr; |
3121 | 3146 | ||
3122 | if (mem->aperture == APERTURE_VIDMEM) { | 3147 | if (mem->aperture == APERTURE_VIDMEM) { |
3123 | alloc = (struct gk20a_page_alloc *)(uintptr_t) | 3148 | alloc = get_vidmem_page_alloc(mem->sgt->sgl); |
3124 | sg_dma_address(mem->sgt->sgl); | ||
3125 | 3149 | ||
3126 | /* This API should not be used with > 1 chunks */ | 3150 | /* This API should not be used with > 1 chunks */ |
3127 | WARN_ON(alloc->nr_chunks != 1); | 3151 | WARN_ON(alloc->nr_chunks != 1); |
@@ -3159,7 +3183,7 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) | |||
3159 | while ((mem = get_pending_mem_desc(mm)) != NULL) { | 3183 | while ((mem = get_pending_mem_desc(mm)) != NULL) { |
3160 | gk20a_gmmu_clear_vidmem_mem(g, mem); | 3184 | gk20a_gmmu_clear_vidmem_mem(g, mem); |
3161 | gk20a_free(mem->allocator, | 3185 | gk20a_free(mem->allocator, |
3162 | sg_dma_address(mem->sgt->sgl)); | 3186 | (u64)get_vidmem_page_alloc(mem->sgt->sgl)); |
3163 | gk20a_free_sgtable(&mem->sgt); | 3187 | gk20a_free_sgtable(&mem->sgt); |
3164 | 3188 | ||
3165 | WARN_ON(atomic_dec_return(&mm->vidmem.clears_pending) < 0); | 3189 | WARN_ON(atomic_dec_return(&mm->vidmem.clears_pending) < 0); |
@@ -3400,6 +3424,9 @@ void gk20a_free_sgtable(struct sg_table **sgt) | |||
3400 | 3424 | ||
3401 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) | 3425 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) |
3402 | { | 3426 | { |
3427 | /* ensure it is not vidmem allocation */ | ||
3428 | WARN_ON(is_vidmem_page_alloc((u64)iova)); | ||
3429 | |||
3403 | if (device_is_iommuable(dev_from_gk20a(g)) && | 3430 | if (device_is_iommuable(dev_from_gk20a(g)) && |
3404 | g->ops.mm.get_physical_addr_bits) | 3431 | g->ops.mm.get_physical_addr_bits) |
3405 | return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g); | 3432 | return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g); |
@@ -3747,8 +3774,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3747 | pgsz_idx, gpu_va, gpu_end-1, iova); | 3774 | pgsz_idx, gpu_va, gpu_end-1, iova); |
3748 | 3775 | ||
3749 | if (sgt) { | 3776 | if (sgt) { |
3750 | alloc = (struct gk20a_page_alloc *)(uintptr_t) | 3777 | alloc = get_vidmem_page_alloc(sgt->sgl); |
3751 | sg_dma_address(sgt->sgl); | ||
3752 | 3778 | ||
3753 | list_for_each_entry(chunk, &alloc->alloc_chunks, | 3779 | list_for_each_entry(chunk, &alloc->alloc_chunks, |
3754 | list_entry) { | 3780 | list_entry) { |