diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 91 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 111 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 42 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 8 |
8 files changed, 227 insertions, 43 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index eb18fa65..feb22ea8 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -151,8 +151,8 @@ static int gk20a_as_ioctl_map_buffer_ex( | |||
151 | &args->offset, args->flags, | 151 | &args->offset, args->flags, |
152 | args->kind, | 152 | args->kind, |
153 | args->buffer_offset, | 153 | args->buffer_offset, |
154 | args->mapping_size | 154 | args->mapping_size, |
155 | ); | 155 | NULL); |
156 | } | 156 | } |
157 | 157 | ||
158 | static int gk20a_as_ioctl_map_buffer( | 158 | static int gk20a_as_ioctl_map_buffer( |
@@ -163,7 +163,7 @@ static int gk20a_as_ioctl_map_buffer( | |||
163 | return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, | 163 | return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, |
164 | &args->o_a.offset, | 164 | &args->o_a.offset, |
165 | args->flags, NV_KIND_DEFAULT, | 165 | args->flags, NV_KIND_DEFAULT, |
166 | 0, 0); | 166 | 0, 0, NULL); |
167 | /* args->o_a.offset will be set if !err */ | 167 | /* args->o_a.offset will be set if !err */ |
168 | } | 168 | } |
169 | 169 | ||
@@ -172,7 +172,86 @@ static int gk20a_as_ioctl_unmap_buffer( | |||
172 | struct nvgpu_as_unmap_buffer_args *args) | 172 | struct nvgpu_as_unmap_buffer_args *args) |
173 | { | 173 | { |
174 | gk20a_dbg_fn(""); | 174 | gk20a_dbg_fn(""); |
175 | return gk20a_vm_unmap_buffer(as_share->vm, args->offset); | 175 | return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL); |
176 | } | ||
177 | |||
178 | static int gk20a_as_ioctl_map_buffer_batch( | ||
179 | struct gk20a_as_share *as_share, | ||
180 | struct nvgpu_as_map_buffer_batch_args *args) | ||
181 | { | ||
182 | struct gk20a *g = as_share->vm->mm->g; | ||
183 | u32 i; | ||
184 | int err = 0; | ||
185 | |||
186 | struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = | ||
187 | (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) | ||
188 | args->unmaps; | ||
189 | struct nvgpu_as_map_buffer_ex_args __user *user_map_args = | ||
190 | (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) | ||
191 | args->maps; | ||
192 | |||
193 | struct vm_gk20a_mapping_batch batch; | ||
194 | |||
195 | gk20a_dbg_fn(""); | ||
196 | |||
197 | if (args->num_unmaps > g->gpu_characteristics.map_buffer_batch_limit || | ||
198 | args->num_maps > g->gpu_characteristics.map_buffer_batch_limit) | ||
199 | return -EINVAL; | ||
200 | |||
201 | gk20a_vm_mapping_batch_start(&batch); | ||
202 | |||
203 | for (i = 0; i < args->num_unmaps; ++i) { | ||
204 | struct nvgpu_as_unmap_buffer_args unmap_args; | ||
205 | |||
206 | if (copy_from_user(&unmap_args, &user_unmap_args[i], | ||
207 | sizeof(unmap_args))) { | ||
208 | err = -EFAULT; | ||
209 | break; | ||
210 | } | ||
211 | |||
212 | err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset, | ||
213 | &batch); | ||
214 | if (err) | ||
215 | break; | ||
216 | } | ||
217 | |||
218 | if (err) { | ||
219 | gk20a_vm_mapping_batch_finish(as_share->vm, &batch); | ||
220 | |||
221 | args->num_unmaps = i; | ||
222 | args->num_maps = 0; | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | for (i = 0; i < args->num_maps; ++i) { | ||
227 | struct nvgpu_as_map_buffer_ex_args map_args; | ||
228 | memset(&map_args, 0, sizeof(map_args)); | ||
229 | |||
230 | if (copy_from_user(&map_args, &user_map_args[i], | ||
231 | sizeof(map_args))) { | ||
232 | err = -EFAULT; | ||
233 | break; | ||
234 | } | ||
235 | |||
236 | err = gk20a_vm_map_buffer( | ||
237 | as_share->vm, map_args.dmabuf_fd, | ||
238 | &map_args.offset, map_args.flags, | ||
239 | map_args.kind, | ||
240 | map_args.buffer_offset, | ||
241 | map_args.mapping_size, | ||
242 | &batch); | ||
243 | if (err) | ||
244 | break; | ||
245 | } | ||
246 | |||
247 | gk20a_vm_mapping_batch_finish(as_share->vm, &batch); | ||
248 | |||
249 | if (err) | ||
250 | args->num_maps = i; | ||
251 | /* note: args->num_unmaps will be unmodified, which is ok | ||
252 | * since all unmaps are done */ | ||
253 | |||
254 | return err; | ||
176 | } | 255 | } |
177 | 256 | ||
178 | static int gk20a_as_ioctl_get_va_regions( | 257 | static int gk20a_as_ioctl_get_va_regions( |
@@ -360,6 +439,10 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
360 | err = gk20a_as_ioctl_map_buffer_compbits(as_share, | 439 | err = gk20a_as_ioctl_map_buffer_compbits(as_share, |
361 | (struct nvgpu_as_map_buffer_compbits_args *)buf); | 440 | (struct nvgpu_as_map_buffer_compbits_args *)buf); |
362 | break; | 441 | break; |
442 | case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: | ||
443 | err = gk20a_as_ioctl_map_buffer_batch(as_share, | ||
444 | (struct nvgpu_as_map_buffer_batch_args *)buf); | ||
445 | break; | ||
363 | default: | 446 | default: |
364 | dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); | 447 | dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); |
365 | err = -ENOTTY; | 448 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index b4fdfb44..7f212eca 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -952,7 +952,8 @@ __releases(&cde_app->mutex) | |||
952 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 952 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
953 | compbits_kind, NULL, true, | 953 | compbits_kind, NULL, true, |
954 | gk20a_mem_flag_none, | 954 | gk20a_mem_flag_none, |
955 | map_offset, map_size); | 955 | map_offset, map_size, |
956 | NULL); | ||
956 | if (!map_vaddr) { | 957 | if (!map_vaddr) { |
957 | dma_buf_put(compbits_buf); | 958 | dma_buf_put(compbits_buf); |
958 | err = -EINVAL; | 959 | err = -EINVAL; |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 217f0056..1e247859 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -828,7 +828,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
828 | 0, | 828 | 0, |
829 | 0, | 829 | 0, |
830 | 0, | 830 | 0, |
831 | args->mapping_size); | 831 | args->mapping_size, |
832 | NULL); | ||
832 | if (err) | 833 | if (err) |
833 | return err; | 834 | return err; |
834 | 835 | ||
@@ -839,7 +840,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
839 | virt_addr_hi = u64_hi32(args->offset); | 840 | virt_addr_hi = u64_hi32(args->offset); |
840 | /* but check anyway */ | 841 | /* but check anyway */ |
841 | if (args->offset + virt_size > SZ_4G) { | 842 | if (args->offset + virt_size > SZ_4G) { |
842 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | 843 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); |
843 | return -EINVAL; | 844 | return -EINVAL; |
844 | } | 845 | } |
845 | 846 | ||
@@ -881,7 +882,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | |||
881 | perf_pmasys_mem_block_valid_false_f() | | 882 | perf_pmasys_mem_block_valid_false_f() | |
882 | perf_pmasys_mem_block_target_f(0)); | 883 | perf_pmasys_mem_block_target_f(0)); |
883 | 884 | ||
884 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | 885 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); |
885 | 886 | ||
886 | return 0; | 887 | return 0; |
887 | } | 888 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index d5208e0d..5a25eecf 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -2033,6 +2033,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
2033 | gpu->cbc_cache_line_size = g->gr.cacheline_size; | 2033 | gpu->cbc_cache_line_size = g->gr.cacheline_size; |
2034 | gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline; | 2034 | gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline; |
2035 | 2035 | ||
2036 | gpu->map_buffer_batch_limit = 256; | ||
2037 | |||
2036 | return 0; | 2038 | return 0; |
2037 | } | 2039 | } |
2038 | 2040 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9a183e44..fc2ed643 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -349,14 +349,16 @@ struct gpu_ops { | |||
349 | u32 flags, | 349 | u32 flags, |
350 | int rw_flag, | 350 | int rw_flag, |
351 | bool clear_ctags, | 351 | bool clear_ctags, |
352 | bool sparse); | 352 | bool sparse, |
353 | struct vm_gk20a_mapping_batch *batch); | ||
353 | void (*gmmu_unmap)(struct vm_gk20a *vm, | 354 | void (*gmmu_unmap)(struct vm_gk20a *vm, |
354 | u64 vaddr, | 355 | u64 vaddr, |
355 | u64 size, | 356 | u64 size, |
356 | int pgsz_idx, | 357 | int pgsz_idx, |
357 | bool va_allocated, | 358 | bool va_allocated, |
358 | int rw_flag, | 359 | int rw_flag, |
359 | bool sparse); | 360 | bool sparse, |
361 | struct vm_gk20a_mapping_batch *batch); | ||
360 | void (*vm_remove)(struct vm_gk20a *vm); | 362 | void (*vm_remove)(struct vm_gk20a *vm); |
361 | int (*vm_alloc_share)(struct gk20a_as_share *as_share, | 363 | int (*vm_alloc_share)(struct gk20a_as_share *as_share, |
362 | u32 flags); | 364 | u32 flags); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 197e2b81..f3512f90 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -787,7 +787,34 @@ static void gk20a_vm_unmap_locked_kref(struct kref *ref) | |||
787 | { | 787 | { |
788 | struct mapped_buffer_node *mapped_buffer = | 788 | struct mapped_buffer_node *mapped_buffer = |
789 | container_of(ref, struct mapped_buffer_node, ref); | 789 | container_of(ref, struct mapped_buffer_node, ref); |
790 | gk20a_vm_unmap_locked(mapped_buffer); | 790 | gk20a_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); |
791 | } | ||
792 | |||
793 | void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) | ||
794 | { | ||
795 | memset(mapping_batch, 0, sizeof(*mapping_batch)); | ||
796 | mapping_batch->gpu_l2_flushed = false; | ||
797 | mapping_batch->need_tlb_invalidate = false; | ||
798 | } | ||
799 | |||
800 | void gk20a_vm_mapping_batch_finish_locked( | ||
801 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch) | ||
802 | { | ||
803 | /* hanging kref_put batch pointer? */ | ||
804 | WARN_ON(vm->kref_put_batch == mapping_batch); | ||
805 | |||
806 | if (mapping_batch->need_tlb_invalidate) { | ||
807 | struct gk20a *g = gk20a_from_vm(vm); | ||
808 | g->ops.mm.tlb_invalidate(vm); | ||
809 | } | ||
810 | } | ||
811 | |||
812 | void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm, | ||
813 | struct vm_gk20a_mapping_batch *mapping_batch) | ||
814 | { | ||
815 | mutex_lock(&vm->update_gmmu_lock); | ||
816 | gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch); | ||
817 | mutex_unlock(&vm->update_gmmu_lock); | ||
791 | } | 818 | } |
792 | 819 | ||
793 | void gk20a_vm_put_buffers(struct vm_gk20a *vm, | 820 | void gk20a_vm_put_buffers(struct vm_gk20a *vm, |
@@ -795,19 +822,25 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm, | |||
795 | int num_buffers) | 822 | int num_buffers) |
796 | { | 823 | { |
797 | int i; | 824 | int i; |
825 | struct vm_gk20a_mapping_batch batch; | ||
798 | 826 | ||
799 | mutex_lock(&vm->update_gmmu_lock); | 827 | mutex_lock(&vm->update_gmmu_lock); |
828 | gk20a_vm_mapping_batch_start(&batch); | ||
829 | vm->kref_put_batch = &batch; | ||
800 | 830 | ||
801 | for (i = 0; i < num_buffers; ++i) | 831 | for (i = 0; i < num_buffers; ++i) |
802 | kref_put(&mapped_buffers[i]->ref, | 832 | kref_put(&mapped_buffers[i]->ref, |
803 | gk20a_vm_unmap_locked_kref); | 833 | gk20a_vm_unmap_locked_kref); |
804 | 834 | ||
835 | vm->kref_put_batch = NULL; | ||
836 | gk20a_vm_mapping_batch_finish_locked(vm, &batch); | ||
805 | mutex_unlock(&vm->update_gmmu_lock); | 837 | mutex_unlock(&vm->update_gmmu_lock); |
806 | 838 | ||
807 | nvgpu_free(mapped_buffers); | 839 | nvgpu_free(mapped_buffers); |
808 | } | 840 | } |
809 | 841 | ||
810 | static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | 842 | static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, |
843 | struct vm_gk20a_mapping_batch *batch) | ||
811 | { | 844 | { |
812 | struct device *d = dev_from_vm(vm); | 845 | struct device *d = dev_from_vm(vm); |
813 | int retries = 10000; /* 50 ms */ | 846 | int retries = 10000; /* 50 ms */ |
@@ -840,7 +873,10 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
840 | mapped_buffer->user_mapped--; | 873 | mapped_buffer->user_mapped--; |
841 | if (mapped_buffer->user_mapped == 0) | 874 | if (mapped_buffer->user_mapped == 0) |
842 | vm->num_user_mapped_buffers--; | 875 | vm->num_user_mapped_buffers--; |
876 | |||
877 | vm->kref_put_batch = batch; | ||
843 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | 878 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); |
879 | vm->kref_put_batch = NULL; | ||
844 | 880 | ||
845 | mutex_unlock(&vm->update_gmmu_lock); | 881 | mutex_unlock(&vm->update_gmmu_lock); |
846 | } | 882 | } |
@@ -1131,7 +1167,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1131 | u32 flags, | 1167 | u32 flags, |
1132 | int rw_flag, | 1168 | int rw_flag, |
1133 | bool clear_ctags, | 1169 | bool clear_ctags, |
1134 | bool sparse) | 1170 | bool sparse, |
1171 | struct vm_gk20a_mapping_batch *batch) | ||
1135 | { | 1172 | { |
1136 | int err = 0; | 1173 | int err = 0; |
1137 | bool allocated = false; | 1174 | bool allocated = false; |
@@ -1177,7 +1214,10 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1177 | goto fail_validate; | 1214 | goto fail_validate; |
1178 | } | 1215 | } |
1179 | 1216 | ||
1180 | g->ops.mm.tlb_invalidate(vm); | 1217 | if (!batch) |
1218 | g->ops.mm.tlb_invalidate(vm); | ||
1219 | else | ||
1220 | batch->need_tlb_invalidate = true; | ||
1181 | 1221 | ||
1182 | return map_offset; | 1222 | return map_offset; |
1183 | fail_validate: | 1223 | fail_validate: |
@@ -1194,7 +1234,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1194 | int pgsz_idx, | 1234 | int pgsz_idx, |
1195 | bool va_allocated, | 1235 | bool va_allocated, |
1196 | int rw_flag, | 1236 | int rw_flag, |
1197 | bool sparse) | 1237 | bool sparse, |
1238 | struct vm_gk20a_mapping_batch *batch) | ||
1198 | { | 1239 | { |
1199 | int err = 0; | 1240 | int err = 0; |
1200 | struct gk20a *g = gk20a_from_vm(vm); | 1241 | struct gk20a *g = gk20a_from_vm(vm); |
@@ -1230,9 +1271,16 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1230 | * for gmmu ptes. note the positioning of this relative to any smmu | 1271 | * for gmmu ptes. note the positioning of this relative to any smmu |
1231 | * unmapping (below). */ | 1272 | * unmapping (below). */ |
1232 | 1273 | ||
1233 | gk20a_mm_l2_flush(g, true); | 1274 | if (!batch) { |
1234 | 1275 | gk20a_mm_l2_flush(g, true); | |
1235 | g->ops.mm.tlb_invalidate(vm); | 1276 | g->ops.mm.tlb_invalidate(vm); |
1277 | } else { | ||
1278 | if (!batch->gpu_l2_flushed) { | ||
1279 | gk20a_mm_l2_flush(g, true); | ||
1280 | batch->gpu_l2_flushed = true; | ||
1281 | } | ||
1282 | batch->need_tlb_invalidate = true; | ||
1283 | } | ||
1236 | } | 1284 | } |
1237 | 1285 | ||
1238 | static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, | 1286 | static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, |
@@ -1308,7 +1356,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1308 | bool user_mapped, | 1356 | bool user_mapped, |
1309 | int rw_flag, | 1357 | int rw_flag, |
1310 | u64 buffer_offset, | 1358 | u64 buffer_offset, |
1311 | u64 mapping_size) | 1359 | u64 mapping_size, |
1360 | struct vm_gk20a_mapping_batch *batch) | ||
1312 | { | 1361 | { |
1313 | struct gk20a *g = gk20a_from_vm(vm); | 1362 | struct gk20a *g = gk20a_from_vm(vm); |
1314 | struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; | 1363 | struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; |
@@ -1509,7 +1558,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1509 | bfr.ctag_offset, | 1558 | bfr.ctag_offset, |
1510 | flags, rw_flag, | 1559 | flags, rw_flag, |
1511 | clear_ctags, | 1560 | clear_ctags, |
1512 | false); | 1561 | false, |
1562 | batch); | ||
1513 | if (!map_offset) | 1563 | if (!map_offset) |
1514 | goto clean_up; | 1564 | goto clean_up; |
1515 | 1565 | ||
@@ -1727,8 +1777,9 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm, | |||
1727 | 0, /* ctag_offset */ | 1777 | 0, /* ctag_offset */ |
1728 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 1778 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
1729 | gk20a_mem_flag_read_only, | 1779 | gk20a_mem_flag_read_only, |
1730 | false, | 1780 | false, /* clear_ctags */ |
1731 | false); | 1781 | false, /* sparse */ |
1782 | NULL); /* mapping_batch handle */ | ||
1732 | 1783 | ||
1733 | if (!mapped_buffer->ctag_map_win_addr) { | 1784 | if (!mapped_buffer->ctag_map_win_addr) { |
1734 | mutex_unlock(&vm->update_gmmu_lock); | 1785 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -1764,7 +1815,10 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1764 | 0, /* page size index = 0 i.e. SZ_4K */ | 1815 | 0, /* page size index = 0 i.e. SZ_4K */ |
1765 | 0, /* kind */ | 1816 | 0, /* kind */ |
1766 | 0, /* ctag_offset */ | 1817 | 0, /* ctag_offset */ |
1767 | flags, rw_flag, false, false); | 1818 | flags, rw_flag, |
1819 | false, /* clear_ctags */ | ||
1820 | false, /* sparse */ | ||
1821 | NULL); /* mapping_batch handle */ | ||
1768 | mutex_unlock(&vm->update_gmmu_lock); | 1822 | mutex_unlock(&vm->update_gmmu_lock); |
1769 | if (!vaddr) { | 1823 | if (!vaddr) { |
1770 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); | 1824 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); |
@@ -1930,7 +1984,8 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, | |||
1930 | 0, /* page size 4K */ | 1984 | 0, /* page size 4K */ |
1931 | true, /*va_allocated */ | 1985 | true, /*va_allocated */ |
1932 | rw_flag, | 1986 | rw_flag, |
1933 | false); | 1987 | false, |
1988 | NULL); | ||
1934 | mutex_unlock(&vm->update_gmmu_lock); | 1989 | mutex_unlock(&vm->update_gmmu_lock); |
1935 | } | 1990 | } |
1936 | 1991 | ||
@@ -2378,7 +2433,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2378 | } | 2433 | } |
2379 | 2434 | ||
2380 | /* NOTE! mapped_buffers lock must be held */ | 2435 | /* NOTE! mapped_buffers lock must be held */ |
2381 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | 2436 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, |
2437 | struct vm_gk20a_mapping_batch *batch) | ||
2382 | { | 2438 | { |
2383 | struct vm_gk20a *vm = mapped_buffer->vm; | 2439 | struct vm_gk20a *vm = mapped_buffer->vm; |
2384 | struct gk20a *g = vm->mm->g; | 2440 | struct gk20a *g = vm->mm->g; |
@@ -2392,7 +2448,8 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2392 | 0, /* page size 4k */ | 2448 | 0, /* page size 4k */ |
2393 | true, /* va allocated */ | 2449 | true, /* va allocated */ |
2394 | gk20a_mem_flag_none, | 2450 | gk20a_mem_flag_none, |
2395 | false); /* not sparse */ | 2451 | false, /* not sparse */ |
2452 | batch); /* batch handle */ | ||
2396 | } | 2453 | } |
2397 | 2454 | ||
2398 | g->ops.mm.gmmu_unmap(vm, | 2455 | g->ops.mm.gmmu_unmap(vm, |
@@ -2402,7 +2459,8 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2402 | mapped_buffer->va_allocated, | 2459 | mapped_buffer->va_allocated, |
2403 | gk20a_mem_flag_none, | 2460 | gk20a_mem_flag_none, |
2404 | mapped_buffer->va_node ? | 2461 | mapped_buffer->va_node ? |
2405 | mapped_buffer->va_node->sparse : false); | 2462 | mapped_buffer->va_node->sparse : false, |
2463 | batch); | ||
2406 | 2464 | ||
2407 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", | 2465 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", |
2408 | vm_aspace_id(vm), | 2466 | vm_aspace_id(vm), |
@@ -2479,7 +2537,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
2479 | while (node) { | 2537 | while (node) { |
2480 | mapped_buffer = | 2538 | mapped_buffer = |
2481 | container_of(node, struct mapped_buffer_node, node); | 2539 | container_of(node, struct mapped_buffer_node, node); |
2482 | gk20a_vm_unmap_locked(mapped_buffer); | 2540 | gk20a_vm_unmap_locked(mapped_buffer, NULL); |
2483 | node = rb_first(&vm->mapped_buffers); | 2541 | node = rb_first(&vm->mapped_buffers); |
2484 | } | 2542 | } |
2485 | 2543 | ||
@@ -2776,7 +2834,8 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2776 | args->flags, | 2834 | args->flags, |
2777 | gk20a_mem_flag_none, | 2835 | gk20a_mem_flag_none, |
2778 | false, | 2836 | false, |
2779 | true); | 2837 | true, |
2838 | NULL); | ||
2780 | if (!map_offset) { | 2839 | if (!map_offset) { |
2781 | mutex_unlock(&vm->update_gmmu_lock); | 2840 | mutex_unlock(&vm->update_gmmu_lock); |
2782 | gk20a_bfree(vma, vaddr_start); | 2841 | gk20a_bfree(vma, vaddr_start); |
@@ -2841,7 +2900,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2841 | va_node->pgsz_idx, | 2900 | va_node->pgsz_idx, |
2842 | true, | 2901 | true, |
2843 | gk20a_mem_flag_none, | 2902 | gk20a_mem_flag_none, |
2844 | true); | 2903 | true, |
2904 | NULL); | ||
2845 | kfree(va_node); | 2905 | kfree(va_node); |
2846 | } | 2906 | } |
2847 | mutex_unlock(&vm->update_gmmu_lock); | 2907 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -2960,7 +3020,8 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm, | |||
2960 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | 3020 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ |
2961 | int kind, | 3021 | int kind, |
2962 | u64 buffer_offset, | 3022 | u64 buffer_offset, |
2963 | u64 mapping_size) | 3023 | u64 mapping_size, |
3024 | struct vm_gk20a_mapping_batch *batch) | ||
2964 | { | 3025 | { |
2965 | int err = 0; | 3026 | int err = 0; |
2966 | struct dma_buf *dmabuf; | 3027 | struct dma_buf *dmabuf; |
@@ -2986,7 +3047,8 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm, | |||
2986 | flags, kind, NULL, true, | 3047 | flags, kind, NULL, true, |
2987 | gk20a_mem_flag_none, | 3048 | gk20a_mem_flag_none, |
2988 | buffer_offset, | 3049 | buffer_offset, |
2989 | mapping_size); | 3050 | mapping_size, |
3051 | batch); | ||
2990 | 3052 | ||
2991 | *offset_align = ret_va; | 3053 | *offset_align = ret_va; |
2992 | if (!ret_va) { | 3054 | if (!ret_va) { |
@@ -2997,11 +3059,12 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm, | |||
2997 | return err; | 3059 | return err; |
2998 | } | 3060 | } |
2999 | 3061 | ||
3000 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset) | 3062 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, |
3063 | struct vm_gk20a_mapping_batch *batch) | ||
3001 | { | 3064 | { |
3002 | gk20a_dbg_fn(""); | 3065 | gk20a_dbg_fn(""); |
3003 | 3066 | ||
3004 | gk20a_vm_unmap_user(vm, offset); | 3067 | gk20a_vm_unmap_user(vm, offset, batch); |
3005 | return 0; | 3068 | return 0; |
3006 | } | 3069 | } |
3007 | 3070 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 1e97e859..ee99c821 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -225,6 +225,13 @@ struct gk20a_mmu_level { | |||
225 | size_t entry_size; | 225 | size_t entry_size; |
226 | }; | 226 | }; |
227 | 227 | ||
228 | /* map/unmap batch state */ | ||
229 | struct vm_gk20a_mapping_batch | ||
230 | { | ||
231 | bool gpu_l2_flushed; | ||
232 | bool need_tlb_invalidate; | ||
233 | }; | ||
234 | |||
228 | struct vm_gk20a { | 235 | struct vm_gk20a { |
229 | struct mm_gk20a *mm; | 236 | struct mm_gk20a *mm; |
230 | struct gk20a_as_share *as_share; /* as_share this represents */ | 237 | struct gk20a_as_share *as_share; /* as_share this represents */ |
@@ -257,6 +264,10 @@ struct vm_gk20a { | |||
257 | u64 handle; | 264 | u64 handle; |
258 | #endif | 265 | #endif |
259 | u32 gmmu_page_sizes[gmmu_nr_page_sizes]; | 266 | u32 gmmu_page_sizes[gmmu_nr_page_sizes]; |
267 | |||
268 | /* if non-NULL, kref_put will use this batch when | ||
269 | unmapping. Must hold vm->update_gmmu_lock. */ | ||
270 | struct vm_gk20a_mapping_batch *kref_put_batch; | ||
260 | }; | 271 | }; |
261 | 272 | ||
262 | struct gk20a; | 273 | struct gk20a; |
@@ -486,7 +497,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
486 | u32 flags, | 497 | u32 flags, |
487 | int rw_flag, | 498 | int rw_flag, |
488 | bool clear_ctags, | 499 | bool clear_ctags, |
489 | bool sparse); | 500 | bool sparse, |
501 | struct vm_gk20a_mapping_batch *batch); | ||
490 | 502 | ||
491 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | 503 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, |
492 | u64 vaddr, | 504 | u64 vaddr, |
@@ -499,7 +511,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
499 | int pgsz_idx, | 511 | int pgsz_idx, |
500 | bool va_allocated, | 512 | bool va_allocated, |
501 | int rw_flag, | 513 | int rw_flag, |
502 | bool sparse); | 514 | bool sparse, |
515 | struct vm_gk20a_mapping_batch *batch); | ||
503 | 516 | ||
504 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); | 517 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); |
505 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | 518 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, |
@@ -514,7 +527,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
514 | bool user_mapped, | 527 | bool user_mapped, |
515 | int rw_flag, | 528 | int rw_flag, |
516 | u64 buffer_offset, | 529 | u64 buffer_offset, |
517 | u64 mapping_size); | 530 | u64 mapping_size, |
531 | struct vm_gk20a_mapping_batch *mapping_batch); | ||
518 | 532 | ||
519 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, | 533 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, |
520 | u64 mapping_gva, | 534 | u64 mapping_gva, |
@@ -532,7 +546,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm, | |||
532 | /* unmap handle from kernel */ | 546 | /* unmap handle from kernel */ |
533 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); | 547 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); |
534 | 548 | ||
535 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | 549 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, |
550 | struct vm_gk20a_mapping_batch *batch); | ||
536 | 551 | ||
537 | /* get reference to all currently mapped buffers */ | 552 | /* get reference to all currently mapped buffers */ |
538 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, | 553 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, |
@@ -576,13 +591,25 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
576 | struct nvgpu_as_free_space_args *args); | 591 | struct nvgpu_as_free_space_args *args); |
577 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | 592 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, |
578 | struct channel_gk20a *ch); | 593 | struct channel_gk20a *ch); |
594 | |||
595 | /* batching eliminates redundant cache flushes and invalidates */ | ||
596 | void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); | ||
597 | void gk20a_vm_mapping_batch_finish( | ||
598 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); | ||
599 | /* called when holding vm->update_gmmu_lock */ | ||
600 | void gk20a_vm_mapping_batch_finish_locked( | ||
601 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); | ||
602 | |||
603 | |||
604 | /* Note: batch may be NULL if map op is not part of a batch */ | ||
579 | int gk20a_vm_map_buffer(struct vm_gk20a *vm, | 605 | int gk20a_vm_map_buffer(struct vm_gk20a *vm, |
580 | int dmabuf_fd, | 606 | int dmabuf_fd, |
581 | u64 *offset_align, | 607 | u64 *offset_align, |
582 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ | 608 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ |
583 | int kind, | 609 | int kind, |
584 | u64 buffer_offset, | 610 | u64 buffer_offset, |
585 | u64 mapping_size); | 611 | u64 mapping_size, |
612 | struct vm_gk20a_mapping_batch *batch); | ||
586 | 613 | ||
587 | int gk20a_init_vm(struct mm_gk20a *mm, | 614 | int gk20a_init_vm(struct mm_gk20a *mm, |
588 | struct vm_gk20a *vm, | 615 | struct vm_gk20a *vm, |
@@ -592,7 +619,10 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
592 | bool big_pages, | 619 | bool big_pages, |
593 | char *name); | 620 | char *name); |
594 | void gk20a_deinit_vm(struct vm_gk20a *vm); | 621 | void gk20a_deinit_vm(struct vm_gk20a *vm); |
595 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset); | 622 | |
623 | /* Note: batch may be NULL if unmap op is not part of a batch */ | ||
624 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | ||
625 | struct vm_gk20a_mapping_batch *batch); | ||
596 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | 626 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, |
597 | struct gk20a_comptags *comptags); | 627 | struct gk20a_comptags *comptags); |
598 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); | 628 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 855aac0d..be1fa47d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -66,7 +66,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
66 | u32 flags, | 66 | u32 flags, |
67 | int rw_flag, | 67 | int rw_flag, |
68 | bool clear_ctags, | 68 | bool clear_ctags, |
69 | bool sparse) | 69 | bool sparse, |
70 | struct vm_gk20a_mapping_batch *batch) | ||
70 | { | 71 | { |
71 | int err = 0; | 72 | int err = 0; |
72 | struct device *d = dev_from_vm(vm); | 73 | struct device *d = dev_from_vm(vm); |
@@ -130,7 +131,8 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
130 | int pgsz_idx, | 131 | int pgsz_idx, |
131 | bool va_allocated, | 132 | bool va_allocated, |
132 | int rw_flag, | 133 | int rw_flag, |
133 | bool sparse) | 134 | bool sparse, |
135 | struct vm_gk20a_mapping_batch *batch) | ||
134 | { | 136 | { |
135 | struct gk20a *g = gk20a_from_vm(vm); | 137 | struct gk20a *g = gk20a_from_vm(vm); |
136 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | 138 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); |
@@ -182,7 +184,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) | |||
182 | while (node) { | 184 | while (node) { |
183 | mapped_buffer = | 185 | mapped_buffer = |
184 | container_of(node, struct mapped_buffer_node, node); | 186 | container_of(node, struct mapped_buffer_node, node); |
185 | gk20a_vm_unmap_locked(mapped_buffer); | 187 | gk20a_vm_unmap_locked(mapped_buffer, NULL); |
186 | node = rb_first(&vm->mapped_buffers); | 188 | node = rb_first(&vm->mapped_buffers); |
187 | } | 189 | } |
188 | 190 | ||