diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2015-04-20 11:12:22 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-06-30 11:35:23 -0400 |
commit | e7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch) | |
tree | e38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |
parent | ae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff) |
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially
helps transform some per-map/unmap overhead to per-batch overhead,
namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB
invalidates. Batching with size 64 has been measured to yield >20x
speed-up in low-level fixed-address mapping microbenchmarks.
Bug 1614735
Bug 1623949
Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/733231
(cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91)
Reviewed-on: http://git-master/r/763812
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 42 |
1 files changed, 36 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 1e97e859..ee99c821 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -225,6 +225,13 @@ struct gk20a_mmu_level { | |||
225 | size_t entry_size; | 225 | size_t entry_size; |
226 | }; | 226 | }; |
227 | 227 | ||
228 | /* map/unmap batch state */ | ||
229 | struct vm_gk20a_mapping_batch | ||
230 | { | ||
231 | bool gpu_l2_flushed; | ||
232 | bool need_tlb_invalidate; | ||
233 | }; | ||
234 | |||
228 | struct vm_gk20a { | 235 | struct vm_gk20a { |
229 | struct mm_gk20a *mm; | 236 | struct mm_gk20a *mm; |
230 | struct gk20a_as_share *as_share; /* as_share this represents */ | 237 | struct gk20a_as_share *as_share; /* as_share this represents */ |
@@ -257,6 +264,10 @@ struct vm_gk20a { | |||
257 | u64 handle; | 264 | u64 handle; |
258 | #endif | 265 | #endif |
259 | u32 gmmu_page_sizes[gmmu_nr_page_sizes]; | 266 | u32 gmmu_page_sizes[gmmu_nr_page_sizes]; |
267 | |||
268 | /* if non-NULL, kref_put will use this batch when | ||
269 | unmapping. Must hold vm->update_gmmu_lock. */ | ||
270 | struct vm_gk20a_mapping_batch *kref_put_batch; | ||
260 | }; | 271 | }; |
261 | 272 | ||
262 | struct gk20a; | 273 | struct gk20a; |
@@ -486,7 +497,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
486 | u32 flags, | 497 | u32 flags, |
487 | int rw_flag, | 498 | int rw_flag, |
488 | bool clear_ctags, | 499 | bool clear_ctags, |
489 | bool sparse); | 500 | bool sparse, |
501 | struct vm_gk20a_mapping_batch *batch); | ||
490 | 502 | ||
491 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | 503 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, |
492 | u64 vaddr, | 504 | u64 vaddr, |
@@ -499,7 +511,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
499 | int pgsz_idx, | 511 | int pgsz_idx, |
500 | bool va_allocated, | 512 | bool va_allocated, |
501 | int rw_flag, | 513 | int rw_flag, |
502 | bool sparse); | 514 | bool sparse, |
515 | struct vm_gk20a_mapping_batch *batch); | ||
503 | 516 | ||
504 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); | 517 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); |
505 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | 518 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, |
@@ -514,7 +527,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
514 | bool user_mapped, | 527 | bool user_mapped, |
515 | int rw_flag, | 528 | int rw_flag, |
516 | u64 buffer_offset, | 529 | u64 buffer_offset, |
517 | u64 mapping_size); | 530 | u64 mapping_size, |
531 | struct vm_gk20a_mapping_batch *mapping_batch); | ||
518 | 532 | ||
519 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, | 533 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, |
520 | u64 mapping_gva, | 534 | u64 mapping_gva, |
@@ -532,7 +546,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm, | |||
532 | /* unmap handle from kernel */ | 546 | /* unmap handle from kernel */ |
533 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); | 547 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); |
534 | 548 | ||
535 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | 549 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, |
550 | struct vm_gk20a_mapping_batch *batch); | ||
536 | 551 | ||
537 | /* get reference to all currently mapped buffers */ | 552 | /* get reference to all currently mapped buffers */ |
538 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, | 553 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, |
@@ -576,13 +591,25 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
576 | struct nvgpu_as_free_space_args *args); | 591 | struct nvgpu_as_free_space_args *args); |
577 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | 592 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, |
578 | struct channel_gk20a *ch); | 593 | struct channel_gk20a *ch); |
594 | |||
595 | /* batching eliminates redundant cache flushes and invalidates */ | ||
596 | void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); | ||
597 | void gk20a_vm_mapping_batch_finish( | ||
598 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); | ||
599 | /* called when holding vm->update_gmmu_lock */ | ||
600 | void gk20a_vm_mapping_batch_finish_locked( | ||
601 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); | ||
602 | |||
603 | |||
604 | /* Note: batch may be NULL if map op is not part of a batch */ | ||
579 | int gk20a_vm_map_buffer(struct vm_gk20a *vm, | 605 | int gk20a_vm_map_buffer(struct vm_gk20a *vm, |
580 | int dmabuf_fd, | 606 | int dmabuf_fd, |
581 | u64 *offset_align, | 607 | u64 *offset_align, |
582 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ | 608 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ |
583 | int kind, | 609 | int kind, |
584 | u64 buffer_offset, | 610 | u64 buffer_offset, |
585 | u64 mapping_size); | 611 | u64 mapping_size, |
612 | struct vm_gk20a_mapping_batch *batch); | ||
586 | 613 | ||
587 | int gk20a_init_vm(struct mm_gk20a *mm, | 614 | int gk20a_init_vm(struct mm_gk20a *mm, |
588 | struct vm_gk20a *vm, | 615 | struct vm_gk20a *vm, |
@@ -592,7 +619,10 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
592 | bool big_pages, | 619 | bool big_pages, |
593 | char *name); | 620 | char *name); |
594 | void gk20a_deinit_vm(struct vm_gk20a *vm); | 621 | void gk20a_deinit_vm(struct vm_gk20a *vm); |
595 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset); | 622 | |
623 | /* Note: batch may be NULL if unmap op is not part of a batch */ | ||
624 | int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | ||
625 | struct vm_gk20a_mapping_batch *batch); | ||
596 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | 626 | void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, |
597 | struct gk20a_comptags *comptags); | 627 | struct gk20a_comptags *comptags); |
598 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); | 628 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); |