diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-07-11 09:02:57 -0400 |
---|---|---|
committer | Vijayakumar Subbu <vsubbu@nvidia.com> | 2016-07-21 08:54:26 -0400 |
commit | 83071083d779b67ad73172675a6dfa34ed19b414 (patch) | |
tree | 19040ddfa5bd3a959a99647a2a2f0c1a08ae3d8f /drivers/gpu/nvgpu | |
parent | 306dee6cbb29599b301d68c449e76b5249f143d6 (diff) |
gpu: nvgpu: add vidmem manager
Use the nvgpu-internal buddy allocator for video memory allocations,
instead of nvmap. This allows better integration for copyengine, BAR1
mapping to userspace, etc.
Jira DNVGPU-38
Change-Id: I9fd67b76cd39721e4cd8e525ad0ed76f497e8b99
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1181151
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 108 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 8 |
3 files changed, 58 insertions, 60 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index e2f2d9e9..a69cce0c 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -224,7 +224,7 @@ static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags | |||
224 | { | 224 | { |
225 | /* there is no local memory available, | 225 | /* there is no local memory available, |
226 | don't allow local memory related CE flags */ | 226 | don't allow local memory related CE flags */ |
227 | if (!g->mm.vidmem_size) { | 227 | if (!g->mm.vidmem.size) { |
228 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | 228 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | |
229 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | 229 | NVGPU_CE_DST_LOCATION_LOCAL_FB); |
230 | } | 230 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7b2174bc..bf3d990c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -699,6 +699,14 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block) | |||
699 | gk20a_vm_remove_support_nofree(vm); | 699 | gk20a_vm_remove_support_nofree(vm); |
700 | } | 700 | } |
701 | 701 | ||
702 | static void gk20a_vidmem_destroy(struct gk20a *g) | ||
703 | { | ||
704 | #if defined(CONFIG_GK20A_VIDMEM) | ||
705 | if (gk20a_alloc_initialized(&g->mm.vidmem.allocator)) | ||
706 | gk20a_alloc_destroy(&g->mm.vidmem.allocator); | ||
707 | #endif | ||
708 | } | ||
709 | |||
702 | static void gk20a_remove_mm_support(struct mm_gk20a *mm) | 710 | static void gk20a_remove_mm_support(struct mm_gk20a *mm) |
703 | { | 711 | { |
704 | struct gk20a *g = gk20a_from_mm(mm); | 712 | struct gk20a *g = gk20a_from_mm(mm); |
@@ -711,13 +719,15 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) | |||
711 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); | 719 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); |
712 | gk20a_vm_remove_support_nofree(&mm->cde.vm); | 720 | gk20a_vm_remove_support_nofree(&mm->cde.vm); |
713 | 721 | ||
714 | if (mm->ce_vidmem_ctx_id != ~0) | 722 | if (mm->vidmem.ce_ctx_id != ~0) |
715 | gk20a_ce_delete_context(g->dev, mm->ce_vidmem_ctx_id ); | 723 | gk20a_ce_delete_context(g->dev, mm->vidmem.ce_ctx_id); |
716 | 724 | ||
717 | mm->ce_vidmem_ctx_id = ~0; | 725 | mm->vidmem.ce_ctx_id = ~0; |
718 | 726 | ||
719 | if (platform->has_ce) | 727 | if (platform->has_ce) |
720 | gk20a_vm_remove_support_nofree(&mm->ce.vm); | 728 | gk20a_vm_remove_support_nofree(&mm->ce.vm); |
729 | |||
730 | gk20a_vidmem_destroy(g); | ||
721 | } | 731 | } |
722 | 732 | ||
723 | static int gk20a_alloc_sysmem_flush(struct gk20a *g) | 733 | static int gk20a_alloc_sysmem_flush(struct gk20a *g) |
@@ -744,14 +754,15 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
744 | if (!size) | 754 | if (!size) |
745 | return 0; | 755 | return 0; |
746 | 756 | ||
747 | err = nvmap_register_vidmem_carveout(&mm->vidmem_dev, SZ_4K, size); | 757 | err = gk20a_buddy_allocator_init(&g->mm.vidmem.allocator, "vidmem", |
758 | SZ_4K, size, SZ_4K, 0); | ||
748 | if (err) { | 759 | if (err) { |
749 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", | 760 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", |
750 | size, err); | 761 | size, err); |
751 | return err; | 762 | return err; |
752 | } | 763 | } |
753 | 764 | ||
754 | mm->vidmem_size = size; | 765 | mm->vidmem.size = size; |
755 | 766 | ||
756 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | 767 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); |
757 | 768 | ||
@@ -785,7 +796,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
785 | 796 | ||
786 | gk20a_init_pramin(mm); | 797 | gk20a_init_pramin(mm); |
787 | 798 | ||
788 | mm->ce_vidmem_ctx_id = ~0; | 799 | mm->vidmem.ce_ctx_id = ~0; |
789 | 800 | ||
790 | err = gk20a_init_vidmem(mm); | 801 | err = gk20a_init_vidmem(mm); |
791 | if (err) | 802 | if (err) |
@@ -902,8 +913,8 @@ int gk20a_init_mm_support(struct gk20a *g) | |||
902 | void gk20a_init_mm_ce_context(struct gk20a *g) | 913 | void gk20a_init_mm_ce_context(struct gk20a *g) |
903 | { | 914 | { |
904 | #if defined(CONFIG_GK20A_VIDMEM) | 915 | #if defined(CONFIG_GK20A_VIDMEM) |
905 | if (g->mm.vidmem_size && (g->mm.ce_vidmem_ctx_id == ~0)) { | 916 | if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == ~0)) { |
906 | g->mm.ce_vidmem_ctx_id = | 917 | g->mm.vidmem.ce_ctx_id = |
907 | gk20a_ce_create_context_with_cb(g->dev, | 918 | gk20a_ce_create_context_with_cb(g->dev, |
908 | gk20a_fifo_get_fast_ce_runlist_id(g), | 919 | gk20a_fifo_get_fast_ce_runlist_id(g), |
909 | -1, | 920 | -1, |
@@ -911,7 +922,7 @@ void gk20a_init_mm_ce_context(struct gk20a *g) | |||
911 | -1, | 922 | -1, |
912 | NULL); | 923 | NULL); |
913 | 924 | ||
914 | if (g->mm.ce_vidmem_ctx_id == ~0) | 925 | if (g->mm.vidmem.ce_ctx_id == ~0) |
915 | gk20a_err(g->dev, | 926 | gk20a_err(g->dev, |
916 | "Failed to allocate CE context for vidmem page clearing support"); | 927 | "Failed to allocate CE context for vidmem page clearing support"); |
917 | } | 928 | } |
@@ -2518,51 +2529,55 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2518 | size_t size, struct mem_desc *mem, dma_addr_t at) | 2529 | size_t size, struct mem_desc *mem, dma_addr_t at) |
2519 | { | 2530 | { |
2520 | #if defined(CONFIG_GK20A_VIDMEM) | 2531 | #if defined(CONFIG_GK20A_VIDMEM) |
2521 | struct device *d = &g->mm.vidmem_dev; | 2532 | u64 addr; |
2522 | int err; | 2533 | int err; |
2523 | dma_addr_t iova; | ||
2524 | bool need_pramin_access = true; | 2534 | bool need_pramin_access = true; |
2525 | DEFINE_DMA_ATTRS(attrs); | ||
2526 | 2535 | ||
2527 | gk20a_dbg_fn(""); | 2536 | gk20a_dbg_fn(""); |
2528 | 2537 | ||
2529 | if (at) { | 2538 | if (!gk20a_alloc_initialized(&g->mm.vidmem.allocator)) |
2530 | void *va; | 2539 | return -ENOSYS; |
2531 | 2540 | ||
2532 | dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, &attrs); | 2541 | /* we don't support dma attributes here, except that kernel mappings |
2533 | va = dma_mark_declared_memory_occupied(d, at, size, | 2542 | * are not done anyway */ |
2534 | &attrs); | 2543 | WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING); |
2535 | 2544 | ||
2536 | if (IS_ERR(va)) | 2545 | if (at) { |
2537 | return PTR_ERR(va); | 2546 | addr = gk20a_alloc_fixed(&g->mm.vidmem.allocator, at, size); |
2547 | if (!addr) | ||
2548 | return -ENOMEM; | ||
2538 | 2549 | ||
2539 | iova = at; | ||
2540 | mem->fixed = true; | 2550 | mem->fixed = true; |
2541 | } else { | 2551 | } else { |
2542 | dma_set_attr(attr, &attrs); | 2552 | addr = gk20a_alloc(&g->mm.vidmem.allocator, size); |
2543 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | 2553 | if (!addr) |
2544 | /* cpuva has no meaning here, the following returns null */ | ||
2545 | dma_alloc_attrs(d, size, &iova, GFP_KERNEL, &attrs); | ||
2546 | |||
2547 | if (iova == DMA_ERROR_CODE) | ||
2548 | return -ENOMEM; | 2554 | return -ENOMEM; |
2549 | 2555 | ||
2550 | mem->fixed = false; | 2556 | mem->fixed = false; |
2551 | } | 2557 | } |
2552 | 2558 | ||
2553 | err = gk20a_get_sgtable(d, &mem->sgt, NULL, iova, size); | 2559 | mem->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); |
2560 | if (!mem->sgt) { | ||
2561 | err = -ENOMEM; | ||
2562 | goto fail_physfree; | ||
2563 | } | ||
2564 | |||
2565 | err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL); | ||
2554 | if (err) | 2566 | if (err) |
2555 | goto fail_free; | 2567 | goto fail_kfree; |
2568 | |||
2569 | sg_dma_address(mem->sgt->sgl) = addr; | ||
2570 | sg_set_page(mem->sgt->sgl, NULL, size, 0); | ||
2556 | 2571 | ||
2557 | mem->size = size; | 2572 | mem->size = size; |
2558 | mem->aperture = APERTURE_VIDMEM; | 2573 | mem->aperture = APERTURE_VIDMEM; |
2559 | 2574 | ||
2560 | if (g->mm.ce_vidmem_ctx_id != ~0) { | 2575 | if (g->mm.vidmem.ce_ctx_id != ~0) { |
2561 | struct gk20a_fence *gk20a_fence_out = NULL; | 2576 | struct gk20a_fence *gk20a_fence_out = NULL; |
2562 | u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | 2577 | u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); |
2563 | 2578 | ||
2564 | err = gk20a_ce_execute_ops(g->dev, | 2579 | err = gk20a_ce_execute_ops(g->dev, |
2565 | g->mm.ce_vidmem_ctx_id, | 2580 | g->mm.vidmem.ce_ctx_id, |
2566 | 0, | 2581 | 0, |
2567 | dst_bufbase, | 2582 | dst_bufbase, |
2568 | (u64)size, | 2583 | (u64)size, |
@@ -2590,18 +2605,14 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2590 | if (need_pramin_access) | 2605 | if (need_pramin_access) |
2591 | gk20a_memset(g, mem, 0, 0, size); | 2606 | gk20a_memset(g, mem, 0, 0, size); |
2592 | 2607 | ||
2593 | gk20a_dbg_fn("done"); | 2608 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); |
2594 | 2609 | ||
2595 | return 0; | 2610 | return 0; |
2596 | 2611 | ||
2597 | fail_free: | 2612 | fail_kfree: |
2598 | if (at) { | 2613 | kfree(mem->sgt); |
2599 | dma_mark_declared_memory_unoccupied(d, iova, mem->size, | 2614 | fail_physfree: |
2600 | &attrs); | 2615 | gk20a_free(&g->mm.vidmem.allocator, addr); |
2601 | } else { | ||
2602 | dma_free_attrs(d, size, NULL, iova, &attrs); | ||
2603 | } | ||
2604 | |||
2605 | return err; | 2616 | return err; |
2606 | #else | 2617 | #else |
2607 | return -ENOSYS; | 2618 | return -ENOSYS; |
@@ -2612,23 +2623,8 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
2612 | struct mem_desc *mem) | 2623 | struct mem_desc *mem) |
2613 | { | 2624 | { |
2614 | #if defined(CONFIG_GK20A_VIDMEM) | 2625 | #if defined(CONFIG_GK20A_VIDMEM) |
2615 | struct device *d = &g->mm.vidmem_dev; | 2626 | gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); |
2616 | DEFINE_DMA_ATTRS(attrs); | 2627 | gk20a_free_sgtable(&mem->sgt); |
2617 | |||
2618 | if (mem->fixed) { | ||
2619 | dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, &attrs); | ||
2620 | dma_mark_declared_memory_unoccupied(d, | ||
2621 | sg_dma_address(mem->sgt->sgl), mem->size, | ||
2622 | &attrs); | ||
2623 | } else { | ||
2624 | dma_set_attr(attr, &attrs); | ||
2625 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2626 | dma_free_attrs(d, mem->size, NULL, | ||
2627 | sg_dma_address(mem->sgt->sgl), | ||
2628 | &attrs); | ||
2629 | gk20a_free_sgtable(&mem->sgt); | ||
2630 | } | ||
2631 | |||
2632 | mem->size = 0; | 2628 | mem->size = 0; |
2633 | mem->aperture = APERTURE_INVALID; | 2629 | mem->aperture = APERTURE_INVALID; |
2634 | #endif | 2630 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 184c1f71..d7503948 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -391,9 +391,11 @@ struct mm_gk20a { | |||
391 | bool force_pramin; /* via debugfs */ | 391 | bool force_pramin; /* via debugfs */ |
392 | #endif | 392 | #endif |
393 | 393 | ||
394 | size_t vidmem_size; | 394 | struct { |
395 | struct device vidmem_dev; | 395 | size_t size; |
396 | u32 ce_vidmem_ctx_id; | 396 | struct gk20a_allocator allocator; |
397 | u32 ce_ctx_id; | ||
398 | } vidmem; | ||
397 | }; | 399 | }; |
398 | 400 | ||
399 | int gk20a_mm_init(struct mm_gk20a *mm); | 401 | int gk20a_mm_init(struct mm_gk20a *mm); |