diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2017-08-22 09:14:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-15 18:45:45 -0400 |
commit | 5d09c908b0679f060bc89ae70eef681a6783ebbc (patch) | |
tree | 244177eb4425e765b9a043e66533ca624f0d3cbd | |
parent | 2b7e8a2c2a5df041c9a434804d0f3f6d9df82737 (diff) |
gpu: nvgpu: Direct GMMU PTE kind control
Allow userspace to control directly the PTE kind for the mappings by
supplying NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL for MAP_BUFFER_EX.
In particular, in this mode, the userspace will tell the kernel
whether the kind is compressible, and if so, what is the
incompressible fallback kind. By supplying only the compressible kind,
the userspace can require that the map kind will not be demoted to the
incompressible fallback kind in case of comptag allocation failure.
Add also a GPU characteristics flag
NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL to signal whether direct
kind control is supported.
Fix indentation of nvgpu_as_map_buffer_ex_args header comment.
Bug 1705731
Change-Id: I317ab474ae53b78eb8fdd31bd6bca0541fcba9a4
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1543462
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/cde.c | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_as.c | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 76 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm_priv.h | 32 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 49 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 38 |
8 files changed, 205 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c index 5b0fb910..0c52271a 100644 --- a/drivers/gpu/nvgpu/common/linux/cde.c +++ b/drivers/gpu/nvgpu/common/linux/cde.c | |||
@@ -975,7 +975,7 @@ __releases(&l->cde_app->mutex) | |||
975 | u64 big_page_mask = 0; | 975 | u64 big_page_mask = 0; |
976 | u32 flags; | 976 | u32 flags; |
977 | int err, i; | 977 | int err, i; |
978 | const s32 compbits_kind = 0; | 978 | const s16 compbits_kind = 0; |
979 | 979 | ||
980 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | 980 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", |
981 | compbits_byte_offset, scatterbuffer_byte_offset); | 981 | compbits_byte_offset, scatterbuffer_byte_offset); |
@@ -1038,8 +1038,11 @@ __releases(&l->cde_app->mutex) | |||
1038 | /* map the destination buffer */ | 1038 | /* map the destination buffer */ |
1039 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ | 1039 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ |
1040 | map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, | 1040 | map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, |
1041 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 1041 | NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE | |
1042 | compbits_kind, true, | 1042 | NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL, |
1043 | NV_KIND_INVALID, | ||
1044 | compbits_kind, /* incompressible kind */ | ||
1045 | true, | ||
1043 | gk20a_mem_flag_none, | 1046 | gk20a_mem_flag_none, |
1044 | map_offset, map_size, | 1047 | map_offset, map_size, |
1045 | NULL); | 1048 | NULL); |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c index d4242955..cfc4e7ef 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c | |||
@@ -79,11 +79,22 @@ static int gk20a_as_ioctl_map_buffer_ex( | |||
79 | struct gk20a_as_share *as_share, | 79 | struct gk20a_as_share *as_share, |
80 | struct nvgpu_as_map_buffer_ex_args *args) | 80 | struct nvgpu_as_map_buffer_ex_args *args) |
81 | { | 81 | { |
82 | s16 compressible_kind; | ||
83 | s16 incompressible_kind; | ||
84 | |||
82 | gk20a_dbg_fn(""); | 85 | gk20a_dbg_fn(""); |
83 | 86 | ||
87 | if (args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
88 | compressible_kind = args->compr_kind; | ||
89 | incompressible_kind = args->incompr_kind; | ||
90 | } else { | ||
91 | compressible_kind = args->kind; | ||
92 | incompressible_kind = NV_KIND_INVALID; | ||
93 | } | ||
94 | |||
84 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, | 95 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, |
85 | &args->offset, args->flags, | 96 | &args->offset, args->flags, |
86 | args->kind, | 97 | compressible_kind, incompressible_kind, |
87 | args->buffer_offset, | 98 | args->buffer_offset, |
88 | args->mapping_size, | 99 | args->mapping_size, |
89 | NULL); | 100 | NULL); |
@@ -97,6 +108,7 @@ static int gk20a_as_ioctl_map_buffer( | |||
97 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, | 108 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, |
98 | &args->o_a.offset, | 109 | &args->o_a.offset, |
99 | args->flags, NV_KIND_DEFAULT, | 110 | args->flags, NV_KIND_DEFAULT, |
111 | NV_KIND_DEFAULT, | ||
100 | 0, 0, NULL); | 112 | 0, 0, NULL); |
101 | /* args->o_a.offset will be set if !err */ | 113 | /* args->o_a.offset will be set if !err */ |
102 | } | 114 | } |
@@ -158,6 +170,9 @@ static int gk20a_as_ioctl_map_buffer_batch( | |||
158 | } | 170 | } |
159 | 171 | ||
160 | for (i = 0; i < args->num_maps; ++i) { | 172 | for (i = 0; i < args->num_maps; ++i) { |
173 | s16 compressible_kind; | ||
174 | s16 incompressible_kind; | ||
175 | |||
161 | struct nvgpu_as_map_buffer_ex_args map_args; | 176 | struct nvgpu_as_map_buffer_ex_args map_args; |
162 | memset(&map_args, 0, sizeof(map_args)); | 177 | memset(&map_args, 0, sizeof(map_args)); |
163 | 178 | ||
@@ -167,10 +182,19 @@ static int gk20a_as_ioctl_map_buffer_batch( | |||
167 | break; | 182 | break; |
168 | } | 183 | } |
169 | 184 | ||
185 | if (map_args.flags & | ||
186 | NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
187 | compressible_kind = map_args.compr_kind; | ||
188 | incompressible_kind = map_args.incompr_kind; | ||
189 | } else { | ||
190 | compressible_kind = map_args.kind; | ||
191 | incompressible_kind = NV_KIND_INVALID; | ||
192 | } | ||
193 | |||
170 | err = nvgpu_vm_map_buffer( | 194 | err = nvgpu_vm_map_buffer( |
171 | as_share->vm, map_args.dmabuf_fd, | 195 | as_share->vm, map_args.dmabuf_fd, |
172 | &map_args.offset, map_args.flags, | 196 | &map_args.offset, map_args.flags, |
173 | map_args.kind, | 197 | compressible_kind, incompressible_kind, |
174 | map_args.buffer_offset, | 198 | map_args.buffer_offset, |
175 | map_args.mapping_size, | 199 | map_args.mapping_size, |
176 | &batch); | 200 | &batch); |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 58e2da13..86d8bec9 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -177,11 +177,46 @@ static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
177 | return mapped_buffer->addr; | 177 | return mapped_buffer->addr; |
178 | } | 178 | } |
179 | 179 | ||
180 | static int setup_bfr_kind_fields(struct buffer_attrs *bfr, s16 compr_kind, | ||
181 | s16 incompr_kind, u32 flags) | ||
182 | { | ||
183 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
184 | /* were we supplied with a kind in either parameter? */ | ||
185 | if ((compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE) && | ||
186 | (incompr_kind < 0 || incompr_kind >= NV_KIND_ATTR_SIZE)) | ||
187 | return -EINVAL; | ||
188 | |||
189 | if (compr_kind != NV_KIND_INVALID) { | ||
190 | bfr->use_kind_v = true; | ||
191 | bfr->kind_v = (u8)compr_kind; | ||
192 | } | ||
193 | |||
194 | if (incompr_kind != NV_KIND_INVALID) { | ||
195 | bfr->use_uc_kind_v = true; | ||
196 | bfr->uc_kind_v = (u8)incompr_kind; | ||
197 | } | ||
198 | } else { | ||
199 | if (compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE) | ||
200 | return -EINVAL; | ||
201 | |||
202 | bfr->use_kind_v = true; | ||
203 | bfr->kind_v = (u8)compr_kind; | ||
204 | |||
205 | /* | ||
206 | * Note: setup_buffer_kind_and_compression() will | ||
207 | * figure out uc_kind_v or return an error | ||
208 | */ | ||
209 | } | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
180 | u64 nvgpu_vm_map(struct vm_gk20a *vm, | 214 | u64 nvgpu_vm_map(struct vm_gk20a *vm, |
181 | struct dma_buf *dmabuf, | 215 | struct dma_buf *dmabuf, |
182 | u64 offset_align, | 216 | u64 offset_align, |
183 | u32 flags, | 217 | u32 flags, |
184 | int kind, | 218 | s16 compr_kind, |
219 | s16 incompr_kind, | ||
185 | bool user_mapped, | 220 | bool user_mapped, |
186 | int rw_flag, | 221 | int rw_flag, |
187 | u64 buffer_offset, | 222 | u64 buffer_offset, |
@@ -203,6 +238,22 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
203 | u32 ctag_offset; | 238 | u32 ctag_offset; |
204 | enum nvgpu_aperture aperture; | 239 | enum nvgpu_aperture aperture; |
205 | 240 | ||
241 | /* | ||
242 | * The kind used as part of the key for map caching. HW may | ||
243 | * actually be programmed with the fallback kind in case the | ||
244 | * key kind is compressible but we're out of comptags. | ||
245 | */ | ||
246 | s16 map_key_kind; | ||
247 | |||
248 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
249 | if (compr_kind != NV_KIND_INVALID) | ||
250 | map_key_kind = compr_kind; | ||
251 | else | ||
252 | map_key_kind = incompr_kind; | ||
253 | } else { | ||
254 | map_key_kind = compr_kind; | ||
255 | } | ||
256 | |||
206 | if (user_mapped && vm->userspace_managed && | 257 | if (user_mapped && vm->userspace_managed && |
207 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | 258 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { |
208 | nvgpu_err(g, "non-fixed-offset mapping not available on " | 259 | nvgpu_err(g, "non-fixed-offset mapping not available on " |
@@ -216,7 +267,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
216 | if (!vm->userspace_managed) { | 267 | if (!vm->userspace_managed) { |
217 | map_offset = __nvgpu_vm_find_mapping( | 268 | map_offset = __nvgpu_vm_find_mapping( |
218 | vm, dmabuf, offset_align, | 269 | vm, dmabuf, offset_align, |
219 | flags, kind, | 270 | flags, map_key_kind, |
220 | user_mapped, rw_flag); | 271 | user_mapped, rw_flag); |
221 | if (map_offset) { | 272 | if (map_offset) { |
222 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 273 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
@@ -239,12 +290,10 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
239 | goto clean_up; | 290 | goto clean_up; |
240 | } | 291 | } |
241 | 292 | ||
242 | if (kind >= NV_KIND_ATTR_SIZE) { | 293 | err = setup_bfr_kind_fields(&bfr, compr_kind, incompr_kind, flags); |
243 | err = -EINVAL; | 294 | if (err) |
244 | goto clean_up; | 295 | goto clean_up; |
245 | } else { | 296 | |
246 | bfr.kind_v = (u8)kind; | ||
247 | } | ||
248 | bfr.size = dmabuf->size; | 297 | bfr.size = dmabuf->size; |
249 | sgl = bfr.sgt->sgl; | 298 | sgl = bfr.sgt->sgl; |
250 | 299 | ||
@@ -306,10 +355,15 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
306 | err = gk20a_alloc_comptags(g, dev, dmabuf, | 355 | err = gk20a_alloc_comptags(g, dev, dmabuf, |
307 | ctag_allocator, | 356 | ctag_allocator, |
308 | bfr.ctag_lines); | 357 | bfr.ctag_lines); |
309 | if (err) { | 358 | if (unlikely(err)) { |
310 | /* ok to fall back here if we ran out */ | ||
311 | /* TBD: we can partially alloc ctags as well... */ | 359 | /* TBD: we can partially alloc ctags as well... */ |
312 | bfr.kind_v = bfr.uc_kind_v; | 360 | if (bfr.use_uc_kind_v) { |
361 | /* no comptags, but fallback kind available */ | ||
362 | bfr.kind_v = bfr.uc_kind_v; | ||
363 | } else { | ||
364 | nvgpu_err(g, "comptag alloc failed and no fallback kind specified"); | ||
365 | goto clean_up; | ||
366 | } | ||
313 | } else { | 367 | } else { |
314 | gk20a_get_comptags(dev, | 368 | gk20a_get_comptags(dev, |
315 | dmabuf, &comptags); | 369 | dmabuf, &comptags); |
@@ -371,7 +425,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
371 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; | 425 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; |
372 | mapped_buffer->vm = vm; | 426 | mapped_buffer->vm = vm; |
373 | mapped_buffer->flags = flags; | 427 | mapped_buffer->flags = flags; |
374 | mapped_buffer->kind = kind; | 428 | mapped_buffer->kind = map_key_kind; |
375 | mapped_buffer->va_allocated = va_allocated; | 429 | mapped_buffer->va_allocated = va_allocated; |
376 | mapped_buffer->user_mapped = user_mapped ? 1 : 0; | 430 | mapped_buffer->user_mapped = user_mapped ? 1 : 0; |
377 | mapped_buffer->own_mem_ref = user_mapped; | 431 | mapped_buffer->own_mem_ref = user_mapped; |
diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h index 4f6b10bb..1eadf1d0 100644 --- a/drivers/gpu/nvgpu/common/linux/vm_priv.h +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h | |||
@@ -34,7 +34,9 @@ struct buffer_attrs { | |||
34 | u32 ctag_allocated_lines; | 34 | u32 ctag_allocated_lines; |
35 | int pgsz_idx; | 35 | int pgsz_idx; |
36 | u8 kind_v; | 36 | u8 kind_v; |
37 | bool use_kind_v; | ||
37 | u8 uc_kind_v; | 38 | u8 uc_kind_v; |
39 | bool use_uc_kind_v; | ||
38 | bool ctag_user_mappable; | 40 | bool ctag_user_mappable; |
39 | }; | 41 | }; |
40 | 42 | ||
@@ -42,19 +44,43 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
42 | struct dma_buf *dmabuf, | 44 | struct dma_buf *dmabuf, |
43 | u64 offset_align, | 45 | u64 offset_align, |
44 | u32 flags, | 46 | u32 flags, |
45 | int kind, | 47 | |
48 | /* | ||
49 | * compressible kind if | ||
50 | * NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is | ||
51 | * specified, otherwise just the kind | ||
52 | */ | ||
53 | s16 compr_kind, | ||
54 | |||
55 | /* | ||
56 | * incompressible kind if | ||
57 | * NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is | ||
58 | * specified, otherwise ignored | ||
59 | */ | ||
60 | s16 incompr_kind, | ||
61 | |||
46 | bool user_mapped, | 62 | bool user_mapped, |
47 | int rw_flag, | 63 | int rw_flag, |
48 | u64 buffer_offset, | 64 | u64 buffer_offset, |
49 | u64 mapping_size, | 65 | u64 mapping_size, |
50 | struct vm_gk20a_mapping_batch *mapping_batch); | 66 | struct vm_gk20a_mapping_batch *mapping_batch); |
51 | 67 | ||
52 | /* Note: batch may be NULL if map op is not part of a batch */ | 68 | /* |
69 | * Notes: | ||
70 | * - Batch may be NULL if map op is not part of a batch. | ||
71 | * - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is set, | ||
72 | * compr_kind and incompr_kind work as explained in nvgpu.h. | ||
73 | * - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is NOT set, | ||
74 | * compr_kind holds the kind and kernel will figure out whether | ||
75 | * it is a compressible or incompressible kind. If compressible, kernel will | ||
76 | * also figure out the incompressible counterpart or return an error. | ||
77 | */ | ||
53 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | 78 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, |
54 | int dmabuf_fd, | 79 | int dmabuf_fd, |
55 | u64 *offset_align, | 80 | u64 *offset_align, |
56 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ | 81 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ |
57 | int kind, | 82 | s16 compr_kind, |
83 | s16 incompr_kind, | ||
58 | u64 buffer_offset, | 84 | u64 buffer_offset, |
59 | u64 mapping_size, | 85 | u64 mapping_size, |
60 | struct vm_gk20a_mapping_batch *batch); | 86 | struct vm_gk20a_mapping_batch *batch); |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 19433df9..2b37a62a 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1929,6 +1929,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1929 | 0, | 1929 | 0, |
1930 | 0, | 1930 | 0, |
1931 | 0, | 1931 | 0, |
1932 | 0, | ||
1932 | args->mapping_size, | 1933 | args->mapping_size, |
1933 | NULL); | 1934 | NULL); |
1934 | if (err) | 1935 | if (err) |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 455fa238..e9948c16 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -411,7 +411,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
411 | gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); | 411 | gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); |
412 | } | 412 | } |
413 | 413 | ||
414 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS; | 414 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | |
415 | NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL; | ||
415 | 416 | ||
416 | if (IS_ENABLED(CONFIG_SYNC)) | 417 | if (IS_ENABLED(CONFIG_SYNC)) |
417 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; | 418 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a6507d2d..97b7aa80 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -822,14 +822,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, | |||
822 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 822 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
823 | } | 823 | } |
824 | 824 | ||
825 | int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | 825 | static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr, |
826 | u32 flags, | 826 | bool *pkind_compressible) |
827 | struct buffer_attrs *bfr, | ||
828 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
829 | { | 827 | { |
830 | bool kind_compressible; | ||
831 | struct gk20a *g = gk20a_from_vm(vm); | 828 | struct gk20a *g = gk20a_from_vm(vm); |
832 | int ctag_granularity = g->ops.fb.compression_page_size(g); | 829 | bool kind_compressible; |
833 | 830 | ||
834 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) | 831 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) |
835 | bfr->kind_v = gmmu_pte_kind_pitch_v(); | 832 | bfr->kind_v = gmmu_pte_kind_pitch_v(); |
@@ -840,7 +837,7 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
840 | } | 837 | } |
841 | 838 | ||
842 | bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); | 839 | bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); |
843 | /* find a suitable uncompressed kind if it becomes necessary later */ | 840 | /* find a suitable incompressible kind if it becomes necessary later */ |
844 | kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); | 841 | kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); |
845 | if (kind_compressible) { | 842 | if (kind_compressible) { |
846 | bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); | 843 | bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); |
@@ -852,6 +849,36 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
852 | return -EINVAL; | 849 | return -EINVAL; |
853 | } | 850 | } |
854 | } | 851 | } |
852 | |||
853 | *pkind_compressible = kind_compressible; | ||
854 | return 0; | ||
855 | } | ||
856 | |||
857 | int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | ||
858 | u32 flags, | ||
859 | struct buffer_attrs *bfr, | ||
860 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
861 | { | ||
862 | bool kind_compressible; | ||
863 | struct gk20a *g = gk20a_from_vm(vm); | ||
864 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
865 | |||
866 | if (!bfr->use_kind_v) | ||
867 | bfr->kind_v = gmmu_pte_kind_invalid_v(); | ||
868 | if (!bfr->use_uc_kind_v) | ||
869 | bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); | ||
870 | |||
871 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
872 | kind_compressible = (bfr->kind_v != gmmu_pte_kind_invalid_v()); | ||
873 | if (!kind_compressible) | ||
874 | bfr->kind_v = bfr->uc_kind_v; | ||
875 | } else { | ||
876 | int err = setup_kind_legacy(vm, bfr, &kind_compressible); | ||
877 | |||
878 | if (err) | ||
879 | return err; | ||
880 | } | ||
881 | |||
855 | /* comptags only supported for suitable kinds, 128KB pagesize */ | 882 | /* comptags only supported for suitable kinds, 128KB pagesize */ |
856 | if (kind_compressible && | 883 | if (kind_compressible && |
857 | vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) { | 884 | vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) { |
@@ -865,6 +892,9 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
865 | else | 892 | else |
866 | bfr->ctag_lines = 0; | 893 | bfr->ctag_lines = 0; |
867 | 894 | ||
895 | bfr->use_kind_v = (bfr->kind_v != gmmu_pte_kind_invalid_v()); | ||
896 | bfr->use_uc_kind_v = (bfr->uc_kind_v != gmmu_pte_kind_invalid_v()); | ||
897 | |||
868 | return 0; | 898 | return 0; |
869 | } | 899 | } |
870 | 900 | ||
@@ -1649,7 +1679,8 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
1649 | int dmabuf_fd, | 1679 | int dmabuf_fd, |
1650 | u64 *offset_align, | 1680 | u64 *offset_align, |
1651 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | 1681 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ |
1652 | int kind, | 1682 | s16 compr_kind, |
1683 | s16 incompr_kind, | ||
1653 | u64 buffer_offset, | 1684 | u64 buffer_offset, |
1654 | u64 mapping_size, | 1685 | u64 mapping_size, |
1655 | struct vm_gk20a_mapping_batch *batch) | 1686 | struct vm_gk20a_mapping_batch *batch) |
@@ -1690,7 +1721,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
1690 | } | 1721 | } |
1691 | 1722 | ||
1692 | ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, | 1723 | ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, |
1693 | flags, kind, true, | 1724 | flags, compr_kind, incompr_kind, true, |
1694 | gk20a_mem_flag_none, | 1725 | gk20a_mem_flag_none, |
1695 | buffer_offset, | 1726 | buffer_offset, |
1696 | mapping_size, | 1727 | mapping_size, |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 5b1d606a..9c883a93 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -146,6 +146,9 @@ struct nvgpu_gpu_zbc_query_table_args { | |||
146 | #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) | 146 | #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) |
147 | /* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ | 147 | /* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ |
148 | #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) | 148 | #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) |
149 | /* Direct PTE kind control is supported (map_buffer_ex) */ | ||
150 | #define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23) | ||
151 | |||
149 | 152 | ||
150 | struct nvgpu_gpu_characteristics { | 153 | struct nvgpu_gpu_characteristics { |
151 | __u32 arch; | 154 | __u32 arch; |
@@ -1751,6 +1754,7 @@ struct nvgpu_as_map_buffer_args { | |||
1751 | #define NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT (1 << 4) | 1754 | #define NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT (1 << 4) |
1752 | #define NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE (1 << 5) | 1755 | #define NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE (1 << 5) |
1753 | #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6) | 1756 | #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6) |
1757 | #define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL (1 << 8) | ||
1754 | __u32 reserved; /* in */ | 1758 | __u32 reserved; /* in */ |
1755 | __u32 dmabuf_fd; /* in */ | 1759 | __u32 dmabuf_fd; /* in */ |
1756 | __u32 page_size; /* inout, 0:= best fit to buffer */ | 1760 | __u32 page_size; /* inout, 0:= best fit to buffer */ |
@@ -1760,7 +1764,7 @@ struct nvgpu_as_map_buffer_args { | |||
1760 | } o_a; | 1764 | } o_a; |
1761 | }; | 1765 | }; |
1762 | 1766 | ||
1763 | /* | 1767 | /* |
1764 | * Mapping dmabuf fds into an address space: | 1768 | * Mapping dmabuf fds into an address space: |
1765 | * | 1769 | * |
1766 | * The caller requests a mapping to a particular page 'kind'. | 1770 | * The caller requests a mapping to a particular page 'kind'. |
@@ -1772,7 +1776,37 @@ struct nvgpu_as_map_buffer_args { | |||
1772 | struct nvgpu_as_map_buffer_ex_args { | 1776 | struct nvgpu_as_map_buffer_ex_args { |
1773 | __u32 flags; /* in/out */ | 1777 | __u32 flags; /* in/out */ |
1774 | #define NV_KIND_DEFAULT -1 | 1778 | #define NV_KIND_DEFAULT -1 |
1775 | __s32 kind; /* in (-1 represents default) */ | 1779 | union { |
1780 | /* | ||
1781 | * Used if NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL | ||
1782 | * is not set. | ||
1783 | */ | ||
1784 | __s32 kind; /* in (-1 represents default) */ | ||
1785 | |||
1786 | /* | ||
1787 | * If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is | ||
1788 | * set, this is used, instead. The rules are: | ||
1789 | * | ||
1790 | * - If both compr_kind and incompr_kind are set | ||
1791 | * (i.e., value is other than NV_KIND_INVALID), | ||
1792 | * kernel attempts to use compr_kind first. | ||
1793 | * | ||
1794 | * - If compr_kind is set, kernel attempts to allocate | ||
1795 | * comptags for the buffer. If successful, | ||
1796 | * compr_kind is used as the PTE kind. | ||
1797 | * | ||
1798 | * - If incompr_kind is set, kernel uses incompr_kind | ||
1799 | * as the PTE kind. Comptags are not allocated. | ||
1800 | * | ||
1801 | * - If neither compr_kind or incompr_kind is set, the | ||
1802 | * map call will fail. | ||
1803 | */ | ||
1804 | #define NV_KIND_INVALID -1 | ||
1805 | struct { | ||
1806 | __s16 compr_kind; | ||
1807 | __s16 incompr_kind; | ||
1808 | }; | ||
1809 | }; | ||
1776 | __u32 dmabuf_fd; /* in */ | 1810 | __u32 dmabuf_fd; /* in */ |
1777 | __u32 page_size; /* inout, 0:= best fit to buffer */ | 1811 | __u32 page_size; /* inout, 0:= best fit to buffer */ |
1778 | 1812 | ||