summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-04-20 11:12:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-30 11:35:23 -0400
commite7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch)
treee38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/gk20a/mm_gk20a.h
parentae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff)
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h42
1 files changed, 36 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 1e97e859..ee99c821 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -225,6 +225,13 @@ struct gk20a_mmu_level {
225 size_t entry_size; 225 size_t entry_size;
226}; 226};
227 227
228/* map/unmap batch state */
229struct vm_gk20a_mapping_batch
230{
231 bool gpu_l2_flushed;
232 bool need_tlb_invalidate;
233};
234
228struct vm_gk20a { 235struct vm_gk20a {
229 struct mm_gk20a *mm; 236 struct mm_gk20a *mm;
230 struct gk20a_as_share *as_share; /* as_share this represents */ 237 struct gk20a_as_share *as_share; /* as_share this represents */
@@ -257,6 +264,10 @@ struct vm_gk20a {
257 u64 handle; 264 u64 handle;
258#endif 265#endif
259 u32 gmmu_page_sizes[gmmu_nr_page_sizes]; 266 u32 gmmu_page_sizes[gmmu_nr_page_sizes];
267
268 /* if non-NULL, kref_put will use this batch when
269 unmapping. Must hold vm->update_gmmu_lock. */
270 struct vm_gk20a_mapping_batch *kref_put_batch;
260}; 271};
261 272
262struct gk20a; 273struct gk20a;
@@ -486,7 +497,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
486 u32 flags, 497 u32 flags,
487 int rw_flag, 498 int rw_flag,
488 bool clear_ctags, 499 bool clear_ctags,
489 bool sparse); 500 bool sparse,
501 struct vm_gk20a_mapping_batch *batch);
490 502
491void gk20a_gmmu_unmap(struct vm_gk20a *vm, 503void gk20a_gmmu_unmap(struct vm_gk20a *vm,
492 u64 vaddr, 504 u64 vaddr,
@@ -499,7 +511,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
499 int pgsz_idx, 511 int pgsz_idx,
500 bool va_allocated, 512 bool va_allocated,
501 int rw_flag, 513 int rw_flag,
502 bool sparse); 514 bool sparse,
515 struct vm_gk20a_mapping_batch *batch);
503 516
504struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); 517struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
505void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, 518void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
@@ -514,7 +527,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
514 bool user_mapped, 527 bool user_mapped,
515 int rw_flag, 528 int rw_flag,
516 u64 buffer_offset, 529 u64 buffer_offset,
517 u64 mapping_size); 530 u64 mapping_size,
531 struct vm_gk20a_mapping_batch *mapping_batch);
518 532
519int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, 533int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
520 u64 mapping_gva, 534 u64 mapping_gva,
@@ -532,7 +546,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
532/* unmap handle from kernel */ 546/* unmap handle from kernel */
533void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); 547void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
534 548
535void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); 549void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
550 struct vm_gk20a_mapping_batch *batch);
536 551
537/* get reference to all currently mapped buffers */ 552/* get reference to all currently mapped buffers */
538int gk20a_vm_get_buffers(struct vm_gk20a *vm, 553int gk20a_vm_get_buffers(struct vm_gk20a *vm,
@@ -576,13 +591,25 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
576 struct nvgpu_as_free_space_args *args); 591 struct nvgpu_as_free_space_args *args);
577int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, 592int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
578 struct channel_gk20a *ch); 593 struct channel_gk20a *ch);
594
595/* batching eliminates redundant cache flushes and invalidates */
596void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
597void gk20a_vm_mapping_batch_finish(
598 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
599/* called when holding vm->update_gmmu_lock */
600void gk20a_vm_mapping_batch_finish_locked(
601 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
602
603
604/* Note: batch may be NULL if map op is not part of a batch */
579int gk20a_vm_map_buffer(struct vm_gk20a *vm, 605int gk20a_vm_map_buffer(struct vm_gk20a *vm,
580 int dmabuf_fd, 606 int dmabuf_fd,
581 u64 *offset_align, 607 u64 *offset_align,
582 u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ 608 u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
583 int kind, 609 int kind,
584 u64 buffer_offset, 610 u64 buffer_offset,
585 u64 mapping_size); 611 u64 mapping_size,
612 struct vm_gk20a_mapping_batch *batch);
586 613
587int gk20a_init_vm(struct mm_gk20a *mm, 614int gk20a_init_vm(struct mm_gk20a *mm,
588 struct vm_gk20a *vm, 615 struct vm_gk20a *vm,
@@ -592,7 +619,10 @@ int gk20a_init_vm(struct mm_gk20a *mm,
592 bool big_pages, 619 bool big_pages,
593 char *name); 620 char *name);
594void gk20a_deinit_vm(struct vm_gk20a *vm); 621void gk20a_deinit_vm(struct vm_gk20a *vm);
595int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset); 622
623/* Note: batch may be NULL if unmap op is not part of a batch */
624int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
625 struct vm_gk20a_mapping_batch *batch);
596void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, 626void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
597 struct gk20a_comptags *comptags); 627 struct gk20a_comptags *comptags);
598dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); 628dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);