summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2014-12-05 03:56:08 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:08:24 -0400
commit593c7a3f3027cf95e1b35f9acb8b3d0dc45f24e4 (patch)
tree2125b47ccc62f47e973a82aa0b7a9e85f8d3c6c2 /drivers
parentce3afaaaf6092f46b2db0f8835e4d4b9b46ef1a4 (diff)
gpu: nvgpu: cache cde compbits buf mappings
don't unmap compbits_buf explicitly from system vm early but store it in the dmabuf's private data, and unmap it later when all user mappings to that buffer have been disappeared. Bug 1546619 Change-Id: I333235a0ea74c48503608afac31f5e9f1eb4b99b Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/661949 (cherry picked from commit ed2177e25d9e5facfb38786b818330798a14b9bb) Reviewed-on: http://git-master/r/661835 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c32
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c71
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h3
3 files changed, 91 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 57283343..49a1c1a8 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Color decompression engine support 2 * Color decompression engine support
3 * 3 *
4 * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. 4 * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -979,17 +979,23 @@ __releases(&cde_app->mutex)
979 map_size = compbits_buf->size - map_offset; 979 map_size = compbits_buf->size - map_offset;
980 } 980 }
981 981
982 /* map the destination buffer */ 982 /* map the destination buffer, if not cached yet */
983 get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */ 983 /* races protected by the cde app mutex above */
984 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, 984 map_vaddr = gk20a_vm_cde_mapped(cde_ctx->vm, compbits_buf);
985 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
986 compbits_kind, NULL, true,
987 gk20a_mem_flag_none,
988 map_offset, map_size);
989 if (!map_vaddr) { 985 if (!map_vaddr) {
990 dma_buf_put(compbits_buf); 986 /* take a ref for gk20a_vm_map, pair is in (cached) unmap */
991 err = -EINVAL; 987 get_dma_buf(compbits_buf);
992 goto exit_unlock; 988 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0,
989 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
990 compbits_kind, NULL, true,
991 gk20a_mem_flag_none,
992 map_offset, map_size);
993 if (!map_vaddr) {
994 dma_buf_put(compbits_buf);
995 err = -EINVAL;
996 goto exit_unlock;
997 }
998 gk20a_vm_mark_cde_mapped(cde_ctx->vm, compbits_buf, map_vaddr);
993 } 999 }
994 1000
995 /* store source buffer compression tags */ 1001 /* store source buffer compression tags */
@@ -1052,9 +1058,7 @@ __releases(&cde_app->mutex)
1052 1058
1053exit_unlock: 1059exit_unlock:
1054 1060
1055 /* unmap the buffers - channel holds references to them now */ 1061 /* leave map_vaddr mapped - released when unmapped from userspace */
1056 if (map_vaddr)
1057 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1058 1062
1059 mutex_unlock(&g->cde_app.mutex); 1063 mutex_unlock(&g->cde_app.mutex);
1060 return err; 1064 return err;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d8bd3e70..79bfb687 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -117,6 +117,11 @@ struct gk20a_dmabuf_priv {
117 int pin_count; 117 int pin_count;
118 118
119 struct list_head states; 119 struct list_head states;
120
121 /* cached cde compbits buf */
122 struct vm_gk20a *cde_vm;
123 u64 cde_map_vaddr;
124 int map_count;
120}; 125};
121 126
122static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); 127static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
@@ -198,6 +203,60 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
198 mutex_unlock(&priv->lock); 203 mutex_unlock(&priv->lock);
199} 204}
200 205
206/* CDE compbits buf caching: keep compbit buffer mapped during user mappings.
207 * Call these four only after dma_buf has a drvdata allocated */
208
209u64 gk20a_vm_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf)
210{
211 struct device *dev = dev_from_vm(vm);
212 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
213 u64 map_vaddr;
214
215 mutex_lock(&priv->lock);
216 map_vaddr = priv->cde_map_vaddr;
217 mutex_unlock(&priv->lock);
218
219 return map_vaddr;
220}
221
222void gk20a_vm_mark_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf,
223 u64 map_vaddr)
224{
225 struct device *dev = dev_from_vm(vm);
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227
228 mutex_lock(&priv->lock);
229 priv->cde_vm = vm;
230 priv->cde_map_vaddr = map_vaddr;
231 mutex_unlock(&priv->lock);
232}
233
234static void gk20a_vm_inc_maps(struct vm_gk20a *vm, struct dma_buf *dmabuf)
235{
236 struct device *dev = dev_from_vm(vm);
237 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
238
239 mutex_lock(&priv->lock);
240 priv->map_count++;
241 mutex_unlock(&priv->lock);
242}
243
244static void gk20a_vm_dec_maps(struct vm_gk20a *vm, struct dma_buf *dmabuf,
245 struct vm_gk20a **cde_vm, u64 *cde_map_vaddr)
246{
247 struct device *dev = dev_from_vm(vm);
248 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
249
250 mutex_lock(&priv->lock);
251 if (--priv->map_count == 0) {
252 *cde_vm = priv->cde_vm;
253 *cde_map_vaddr = priv->cde_map_vaddr;
254 priv->cde_vm = NULL;
255 priv->cde_map_vaddr = 0;
256 }
257 mutex_unlock(&priv->lock);
258}
259
201void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, 260void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
202 struct gk20a_comptags *comptags) 261 struct gk20a_comptags *comptags)
203{ 262{
@@ -751,6 +810,8 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
751 struct device *d = dev_from_vm(vm); 810 struct device *d = dev_from_vm(vm);
752 int retries; 811 int retries;
753 struct mapped_buffer_node *mapped_buffer; 812 struct mapped_buffer_node *mapped_buffer;
813 struct vm_gk20a *cde_vm = NULL;
814 u64 cde_map_vaddr = 0;
754 815
755 mutex_lock(&vm->update_gmmu_lock); 816 mutex_lock(&vm->update_gmmu_lock);
756 817
@@ -783,9 +844,15 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
783 mapped_buffer->user_mapped--; 844 mapped_buffer->user_mapped--;
784 if (mapped_buffer->user_mapped == 0) 845 if (mapped_buffer->user_mapped == 0)
785 vm->num_user_mapped_buffers--; 846 vm->num_user_mapped_buffers--;
847
848 gk20a_vm_dec_maps(vm, mapped_buffer->dmabuf, &cde_vm, &cde_map_vaddr);
849
786 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); 850 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
787 851
788 mutex_unlock(&vm->update_gmmu_lock); 852 mutex_unlock(&vm->update_gmmu_lock);
853
854 if (cde_map_vaddr)
855 gk20a_vm_unmap(cde_vm, cde_map_vaddr);
789} 856}
790 857
791u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 858u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
@@ -2599,7 +2666,9 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
2599 mapping_size); 2666 mapping_size);
2600 2667
2601 *offset_align = ret_va; 2668 *offset_align = ret_va;
2602 if (!ret_va) { 2669 if (ret_va) {
2670 gk20a_vm_inc_maps(vm, dmabuf);
2671 } else {
2603 dma_buf_put(dmabuf); 2672 dma_buf_put(dmabuf);
2604 err = -EINVAL; 2673 err = -EINVAL;
2605 } 2674 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 42c164be..8f0f736b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -554,6 +554,9 @@ void gk20a_deinit_vm(struct vm_gk20a *vm);
554int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset); 554int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset);
555void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, 555void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
556 struct gk20a_comptags *comptags); 556 struct gk20a_comptags *comptags);
557u64 gk20a_vm_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf);
558void gk20a_vm_mark_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf,
559 u64 map_vaddr);
557dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); 560dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
558 561
559int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); 562int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);