diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2014-12-05 03:56:08 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:08:24 -0400 |
commit | 593c7a3f3027cf95e1b35f9acb8b3d0dc45f24e4 (patch) | |
tree | 2125b47ccc62f47e973a82aa0b7a9e85f8d3c6c2 /drivers/gpu | |
parent | ce3afaaaf6092f46b2db0f8835e4d4b9b46ef1a4 (diff) |
gpu: nvgpu: cache cde compbits buf mappings
don't unmap compbits_buf explicitly from system vm early but store it in
the dmabuf's private data, and unmap it later when all user mappings to
that buffer have been disappeared.
Bug 1546619
Change-Id: I333235a0ea74c48503608afac31f5e9f1eb4b99b
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/661949
(cherry picked from commit ed2177e25d9e5facfb38786b818330798a14b9bb)
Reviewed-on: http://git-master/r/661835
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 32 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 71 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 3 |
3 files changed, 91 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 57283343..49a1c1a8 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Color decompression engine support | 2 | * Color decompression engine support |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. | 4 | * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -979,17 +979,23 @@ __releases(&cde_app->mutex) | |||
979 | map_size = compbits_buf->size - map_offset; | 979 | map_size = compbits_buf->size - map_offset; |
980 | } | 980 | } |
981 | 981 | ||
982 | /* map the destination buffer */ | 982 | /* map the destination buffer, if not cached yet */ |
983 | get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */ | 983 | /* races protected by the cde app mutex above */ |
984 | map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, | 984 | map_vaddr = gk20a_vm_cde_mapped(cde_ctx->vm, compbits_buf); |
985 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
986 | compbits_kind, NULL, true, | ||
987 | gk20a_mem_flag_none, | ||
988 | map_offset, map_size); | ||
989 | if (!map_vaddr) { | 985 | if (!map_vaddr) { |
990 | dma_buf_put(compbits_buf); | 986 | /* take a ref for gk20a_vm_map, pair is in (cached) unmap */ |
991 | err = -EINVAL; | 987 | get_dma_buf(compbits_buf); |
992 | goto exit_unlock; | 988 | map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, |
989 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
990 | compbits_kind, NULL, true, | ||
991 | gk20a_mem_flag_none, | ||
992 | map_offset, map_size); | ||
993 | if (!map_vaddr) { | ||
994 | dma_buf_put(compbits_buf); | ||
995 | err = -EINVAL; | ||
996 | goto exit_unlock; | ||
997 | } | ||
998 | gk20a_vm_mark_cde_mapped(cde_ctx->vm, compbits_buf, map_vaddr); | ||
993 | } | 999 | } |
994 | 1000 | ||
995 | /* store source buffer compression tags */ | 1001 | /* store source buffer compression tags */ |
@@ -1052,9 +1058,7 @@ __releases(&cde_app->mutex) | |||
1052 | 1058 | ||
1053 | exit_unlock: | 1059 | exit_unlock: |
1054 | 1060 | ||
1055 | /* unmap the buffers - channel holds references to them now */ | 1061 | /* leave map_vaddr mapped - released when unmapped from userspace */ |
1056 | if (map_vaddr) | ||
1057 | gk20a_vm_unmap(cde_ctx->vm, map_vaddr); | ||
1058 | 1062 | ||
1059 | mutex_unlock(&g->cde_app.mutex); | 1063 | mutex_unlock(&g->cde_app.mutex); |
1060 | return err; | 1064 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index d8bd3e70..79bfb687 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -117,6 +117,11 @@ struct gk20a_dmabuf_priv { | |||
117 | int pin_count; | 117 | int pin_count; |
118 | 118 | ||
119 | struct list_head states; | 119 | struct list_head states; |
120 | |||
121 | /* cached cde compbits buf */ | ||
122 | struct vm_gk20a *cde_vm; | ||
123 | u64 cde_map_vaddr; | ||
124 | int map_count; | ||
120 | }; | 125 | }; |
121 | 126 | ||
122 | static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); | 127 | static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); |
@@ -198,6 +203,60 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | |||
198 | mutex_unlock(&priv->lock); | 203 | mutex_unlock(&priv->lock); |
199 | } | 204 | } |
200 | 205 | ||
206 | /* CDE compbits buf caching: keep compbit buffer mapped during user mappings. | ||
207 | * Call these four only after dma_buf has a drvdata allocated */ | ||
208 | |||
209 | u64 gk20a_vm_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf) | ||
210 | { | ||
211 | struct device *dev = dev_from_vm(vm); | ||
212 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
213 | u64 map_vaddr; | ||
214 | |||
215 | mutex_lock(&priv->lock); | ||
216 | map_vaddr = priv->cde_map_vaddr; | ||
217 | mutex_unlock(&priv->lock); | ||
218 | |||
219 | return map_vaddr; | ||
220 | } | ||
221 | |||
222 | void gk20a_vm_mark_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf, | ||
223 | u64 map_vaddr) | ||
224 | { | ||
225 | struct device *dev = dev_from_vm(vm); | ||
226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
227 | |||
228 | mutex_lock(&priv->lock); | ||
229 | priv->cde_vm = vm; | ||
230 | priv->cde_map_vaddr = map_vaddr; | ||
231 | mutex_unlock(&priv->lock); | ||
232 | } | ||
233 | |||
234 | static void gk20a_vm_inc_maps(struct vm_gk20a *vm, struct dma_buf *dmabuf) | ||
235 | { | ||
236 | struct device *dev = dev_from_vm(vm); | ||
237 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
238 | |||
239 | mutex_lock(&priv->lock); | ||
240 | priv->map_count++; | ||
241 | mutex_unlock(&priv->lock); | ||
242 | } | ||
243 | |||
244 | static void gk20a_vm_dec_maps(struct vm_gk20a *vm, struct dma_buf *dmabuf, | ||
245 | struct vm_gk20a **cde_vm, u64 *cde_map_vaddr) | ||
246 | { | ||
247 | struct device *dev = dev_from_vm(vm); | ||
248 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
249 | |||
250 | mutex_lock(&priv->lock); | ||
251 | if (--priv->map_count == 0) { | ||
252 | *cde_vm = priv->cde_vm; | ||
253 | *cde_map_vaddr = priv->cde_map_vaddr; | ||
254 | priv->cde_vm = NULL; | ||
255 | priv->cde_map_vaddr = 0; | ||
256 | } | ||
257 | mutex_unlock(&priv->lock); | ||
258 | } | ||
259 | |||
201 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | 260 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, |
202 | struct gk20a_comptags *comptags) | 261 | struct gk20a_comptags *comptags) |
203 | { | 262 | { |
@@ -751,6 +810,8 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
751 | struct device *d = dev_from_vm(vm); | 810 | struct device *d = dev_from_vm(vm); |
752 | int retries; | 811 | int retries; |
753 | struct mapped_buffer_node *mapped_buffer; | 812 | struct mapped_buffer_node *mapped_buffer; |
813 | struct vm_gk20a *cde_vm = NULL; | ||
814 | u64 cde_map_vaddr = 0; | ||
754 | 815 | ||
755 | mutex_lock(&vm->update_gmmu_lock); | 816 | mutex_lock(&vm->update_gmmu_lock); |
756 | 817 | ||
@@ -783,9 +844,15 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
783 | mapped_buffer->user_mapped--; | 844 | mapped_buffer->user_mapped--; |
784 | if (mapped_buffer->user_mapped == 0) | 845 | if (mapped_buffer->user_mapped == 0) |
785 | vm->num_user_mapped_buffers--; | 846 | vm->num_user_mapped_buffers--; |
847 | |||
848 | gk20a_vm_dec_maps(vm, mapped_buffer->dmabuf, &cde_vm, &cde_map_vaddr); | ||
849 | |||
786 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | 850 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); |
787 | 851 | ||
788 | mutex_unlock(&vm->update_gmmu_lock); | 852 | mutex_unlock(&vm->update_gmmu_lock); |
853 | |||
854 | if (cde_map_vaddr) | ||
855 | gk20a_vm_unmap(cde_vm, cde_map_vaddr); | ||
789 | } | 856 | } |
790 | 857 | ||
791 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | 858 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, |
@@ -2599,7 +2666,9 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm, | |||
2599 | mapping_size); | 2666 | mapping_size); |
2600 | 2667 | ||
2601 | *offset_align = ret_va; | 2668 | *offset_align = ret_va; |
2602 | if (!ret_va) { | 2669 | if (ret_va) { |
2670 | gk20a_vm_inc_maps(vm, dmabuf); | ||
2671 | } else { | ||
2603 | dma_buf_put(dmabuf); | 2672 | dma_buf_put(dmabuf); |
2604 | err = -EINVAL; | 2673 | err = -EINVAL; |
2605 | } | 2674 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 42c164be..8f0f736b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -554,6 +554,9 @@ void gk20a_deinit_vm(struct vm_gk20a *vm); | |||
554 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset); | 554 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset); |
555 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | 555 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, |
556 | struct gk20a_comptags *comptags); | 556 | struct gk20a_comptags *comptags); |
557 | u64 gk20a_vm_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf); | ||
558 | void gk20a_vm_mark_cde_mapped(struct vm_gk20a *vm, struct dma_buf *dmabuf, | ||
559 | u64 map_vaddr); | ||
557 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); | 560 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); |
558 | 561 | ||
559 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | 562 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); |