diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 210 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | 1 |
2 files changed, 150 insertions, 61 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7c731890..dde798cf 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/lcm.h> | 29 | #include <linux/lcm.h> |
30 | #include <uapi/linux/nvgpu.h> | 30 | #include <uapi/linux/nvgpu.h> |
31 | #include <trace/events/gk20a.h> | 31 | #include <trace/events/gk20a.h> |
32 | #include <gk20a/page_allocator_priv.h> | ||
32 | 33 | ||
33 | #include "gk20a.h" | 34 | #include "gk20a.h" |
34 | #include "mm_gk20a.h" | 35 | #include "mm_gk20a.h" |
@@ -84,10 +85,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | |||
84 | mem->cpu_va = NULL; | 85 | mem->cpu_va = NULL; |
85 | } | 86 | } |
86 | 87 | ||
88 | static u64 gk20a_mem_get_vidmem_addr(struct gk20a *g, struct mem_desc *mem) | ||
89 | { | ||
90 | struct gk20a_page_alloc *alloc; | ||
91 | struct page_alloc_chunk *chunk; | ||
92 | |||
93 | if (mem && mem->aperture == APERTURE_VIDMEM) { | ||
94 | alloc = (struct gk20a_page_alloc *) | ||
95 | sg_dma_address(mem->sgt->sgl); | ||
96 | |||
97 | /* This API should not be used with > 1 chunks */ | ||
98 | if (alloc->nr_chunks != 1) | ||
99 | return 0; | ||
100 | |||
101 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
102 | struct page_alloc_chunk, list_entry); | ||
103 | return chunk->base; | ||
104 | } | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
87 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | 109 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ |
88 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | 110 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) |
89 | { | 111 | { |
90 | u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | 112 | u64 bufbase = gk20a_mem_get_vidmem_addr(g, mem); |
91 | u64 addr = bufbase + w * sizeof(u32); | 113 | u64 addr = bufbase + w * sizeof(u32); |
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 114 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 115 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
@@ -765,9 +787,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
765 | return 0; | 787 | return 0; |
766 | 788 | ||
767 | err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem", | 789 | err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem", |
768 | SZ_4K, size - SZ_4K, SZ_4K, | 790 | SZ_4K, size - SZ_4K, SZ_4K, 0); |
769 | GPU_ALLOC_FORCE_CONTIG | | ||
770 | GPU_ALLOC_NO_SCATTER_GATHER); | ||
771 | if (err) { | 791 | if (err) { |
772 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", | 792 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", |
773 | size, err); | 793 | size, err); |
@@ -2721,7 +2741,6 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2721 | #if defined(CONFIG_GK20A_VIDMEM) | 2741 | #if defined(CONFIG_GK20A_VIDMEM) |
2722 | u64 addr; | 2742 | u64 addr; |
2723 | int err; | 2743 | int err; |
2724 | bool need_pramin_access = true; | ||
2725 | 2744 | ||
2726 | gk20a_dbg_fn(""); | 2745 | gk20a_dbg_fn(""); |
2727 | 2746 | ||
@@ -2764,13 +2783,22 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2764 | 2783 | ||
2765 | if (g->mm.vidmem.ce_ctx_id != ~0) { | 2784 | if (g->mm.vidmem.ce_ctx_id != ~0) { |
2766 | struct gk20a_fence *gk20a_fence_out = NULL; | 2785 | struct gk20a_fence *gk20a_fence_out = NULL; |
2767 | u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | 2786 | struct gk20a_fence *gk20a_last_fence = NULL; |
2787 | struct gk20a_page_alloc *alloc = NULL; | ||
2788 | struct page_alloc_chunk *chunk = NULL; | ||
2768 | 2789 | ||
2769 | err = gk20a_ce_execute_ops(g->dev, | 2790 | alloc = (struct gk20a_page_alloc *) |
2791 | g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
2792 | |||
2793 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
2794 | if (gk20a_last_fence) | ||
2795 | gk20a_fence_put(gk20a_last_fence); | ||
2796 | |||
2797 | err = gk20a_ce_execute_ops(g->dev, | ||
2770 | g->mm.vidmem.ce_ctx_id, | 2798 | g->mm.vidmem.ce_ctx_id, |
2771 | 0, | 2799 | 0, |
2772 | dst_bufbase, | 2800 | chunk->base, |
2773 | (u64)size, | 2801 | chunk->length, |
2774 | 0x00000000, | 2802 | 0x00000000, |
2775 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | 2803 | NVGPU_CE_DST_LOCATION_LOCAL_FB, |
2776 | NVGPU_CE_MEMSET, | 2804 | NVGPU_CE_MEMSET, |
@@ -2778,27 +2806,31 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2778 | 0, | 2806 | 0, |
2779 | &gk20a_fence_out); | 2807 | &gk20a_fence_out); |
2780 | 2808 | ||
2781 | if (!err) { | 2809 | if (err) { |
2782 | if (gk20a_fence_out) { | 2810 | gk20a_err(g->dev, |
2783 | err = gk20a_fence_wait(gk20a_fence_out, gk20a_get_gr_idle_timeout(g)); | 2811 | "Failed gk20a_ce_execute_ops[%d]", err); |
2784 | gk20a_fence_put(gk20a_fence_out); | 2812 | goto fail_free_table; |
2785 | if (err) | ||
2786 | gk20a_err(g->dev, | ||
2787 | "Failed to get the fence_out from CE execute ops"); | ||
2788 | else | ||
2789 | need_pramin_access = false; | ||
2790 | } | 2813 | } |
2791 | } else | ||
2792 | gk20a_err(g->dev, "Failed gk20a_ce_execute_ops[%d]",err); | ||
2793 | } | ||
2794 | 2814 | ||
2795 | if (need_pramin_access) | 2815 | gk20a_last_fence = gk20a_fence_out; |
2796 | gk20a_memset(g, mem, 0, 0, size); | 2816 | } |
2817 | |||
2818 | if (gk20a_last_fence) { | ||
2819 | err = gk20a_fence_wait(gk20a_last_fence, | ||
2820 | gk20a_get_gr_idle_timeout(g)); | ||
2821 | gk20a_fence_put(gk20a_last_fence); | ||
2822 | if (err) | ||
2823 | gk20a_err(g->dev, | ||
2824 | "Failed to get the fence_out from CE execute ops"); | ||
2825 | } | ||
2826 | } | ||
2797 | 2827 | ||
2798 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); | 2828 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); |
2799 | 2829 | ||
2800 | return 0; | 2830 | return 0; |
2801 | 2831 | ||
2832 | fail_free_table: | ||
2833 | sg_free_table(mem->sgt); | ||
2802 | fail_kfree: | 2834 | fail_kfree: |
2803 | kfree(mem->sgt); | 2835 | kfree(mem->sgt); |
2804 | fail_physfree: | 2836 | fail_physfree: |
@@ -3381,13 +3413,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3381 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | 3413 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
3382 | int err; | 3414 | int err; |
3383 | struct scatterlist *sgl = NULL; | 3415 | struct scatterlist *sgl = NULL; |
3384 | 3416 | struct gk20a_page_alloc *alloc = NULL; | |
3385 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d", | 3417 | struct page_alloc_chunk *chunk = NULL; |
3386 | pgsz_idx, | 3418 | u64 length; |
3387 | sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0) | ||
3388 | : 0ULL, | ||
3389 | buffer_offset, | ||
3390 | sgt ? sgt->nents : 0); | ||
3391 | 3419 | ||
3392 | /* note: here we need to map kernel to small, since the | 3420 | /* note: here we need to map kernel to small, since the |
3393 | * low-level mmu code assumes 0 is small and 1 is big pages */ | 3421 | * low-level mmu code assumes 0 is small and 1 is big pages */ |
@@ -3397,45 +3425,105 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3397 | if (space_to_skip & (page_size - 1)) | 3425 | if (space_to_skip & (page_size - 1)) |
3398 | return -EINVAL; | 3426 | return -EINVAL; |
3399 | 3427 | ||
3400 | if (sgt) { | 3428 | err = map_gmmu_pages(g, &vm->pdb); |
3401 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | 3429 | if (err) { |
3402 | if (!vm->mm->bypass_smmu && iova) { | 3430 | gk20a_err(dev_from_vm(vm), |
3403 | iova += space_to_skip; | 3431 | "couldn't map ptes for update as=%d", |
3432 | vm_aspace_id(vm)); | ||
3433 | return err; | ||
3434 | } | ||
3435 | |||
3436 | if (aperture == APERTURE_VIDMEM) { | ||
3437 | gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx", | ||
3438 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
3439 | |||
3440 | if (sgt) { | ||
3441 | alloc = (struct gk20a_page_alloc *) | ||
3442 | g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | ||
3443 | |||
3444 | list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
3445 | list_entry) { | ||
3446 | if (space_to_skip && | ||
3447 | space_to_skip > chunk->length) { | ||
3448 | space_to_skip -= chunk->length; | ||
3449 | } else { | ||
3450 | iova = chunk->base + space_to_skip; | ||
3451 | length = chunk->length - space_to_skip; | ||
3452 | space_to_skip = 0; | ||
3453 | |||
3454 | err = update_gmmu_level_locked(vm, | ||
3455 | &vm->pdb, pgsz_idx, | ||
3456 | &sgl, | ||
3457 | &space_to_skip, | ||
3458 | &iova, | ||
3459 | gpu_va, gpu_va + length, | ||
3460 | kind_v, &ctag, | ||
3461 | cacheable, unmapped_pte, | ||
3462 | rw_flag, sparse, 0, priv, | ||
3463 | aperture); | ||
3464 | |||
3465 | /* need to set explicit zero here */ | ||
3466 | space_to_skip = 0; | ||
3467 | gpu_va += length; | ||
3468 | } | ||
3469 | } | ||
3404 | } else { | 3470 | } else { |
3405 | sgl = sgt->sgl; | 3471 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, |
3406 | 3472 | &sgl, | |
3407 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | 3473 | &space_to_skip, |
3408 | (u64)sg_phys(sgl), | 3474 | &iova, |
3409 | sgl->length); | 3475 | gpu_va, gpu_end, |
3410 | while (space_to_skip && sgl && | 3476 | kind_v, &ctag, |
3411 | space_to_skip + page_size > sgl->length) { | 3477 | cacheable, unmapped_pte, rw_flag, |
3412 | space_to_skip -= sgl->length; | 3478 | sparse, 0, priv, |
3413 | sgl = sg_next(sgl); | 3479 | aperture); |
3480 | } | ||
3481 | } else { | ||
3482 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d", | ||
3483 | pgsz_idx, | ||
3484 | sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0) | ||
3485 | : 0ULL, | ||
3486 | buffer_offset, | ||
3487 | sgt ? sgt->nents : 0); | ||
3488 | |||
3489 | gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", | ||
3490 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
3491 | |||
3492 | if (sgt) { | ||
3493 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | ||
3494 | if (!vm->mm->bypass_smmu && iova) { | ||
3495 | iova += space_to_skip; | ||
3496 | } else { | ||
3497 | sgl = sgt->sgl; | ||
3498 | |||
3414 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | 3499 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", |
3415 | (u64)sg_phys(sgl), | 3500 | (u64)sg_phys(sgl), |
3416 | sgl->length); | 3501 | sgl->length); |
3502 | |||
3503 | while (space_to_skip && sgl && | ||
3504 | space_to_skip + page_size > sgl->length) { | ||
3505 | space_to_skip -= sgl->length; | ||
3506 | sgl = sg_next(sgl); | ||
3507 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
3508 | (u64)sg_phys(sgl), | ||
3509 | sgl->length); | ||
3510 | } | ||
3511 | |||
3512 | iova = sg_phys(sgl) + space_to_skip; | ||
3417 | } | 3513 | } |
3418 | iova = sg_phys(sgl) + space_to_skip; | ||
3419 | } | 3514 | } |
3420 | } | ||
3421 | 3515 | ||
3422 | gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", | 3516 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, |
3423 | pgsz_idx, gpu_va, gpu_end-1, iova); | 3517 | &sgl, |
3424 | err = map_gmmu_pages(g, &vm->pdb); | 3518 | &space_to_skip, |
3425 | if (err) { | 3519 | &iova, |
3426 | gk20a_err(dev_from_vm(vm), | 3520 | gpu_va, gpu_end, |
3427 | "couldn't map ptes for update as=%d", | 3521 | kind_v, &ctag, |
3428 | vm_aspace_id(vm)); | 3522 | cacheable, unmapped_pte, rw_flag, |
3429 | return err; | 3523 | sparse, 0, priv, |
3524 | aperture); | ||
3430 | } | 3525 | } |
3431 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | 3526 | |
3432 | &sgl, | ||
3433 | &space_to_skip, | ||
3434 | &iova, | ||
3435 | gpu_va, gpu_end, | ||
3436 | kind_v, &ctag, | ||
3437 | cacheable, unmapped_pte, rw_flag, sparse, 0, priv, | ||
3438 | aperture); | ||
3439 | unmap_gmmu_pages(g, &vm->pdb); | 3527 | unmap_gmmu_pages(g, &vm->pdb); |
3440 | 3528 | ||
3441 | smp_mb(); | 3529 | smp_mb(); |
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h index bce5b75e..3d4e3c43 100644 --- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h +++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | |||
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/rbtree.h> | 21 | #include <linux/rbtree.h> |
22 | #include <gk20a/gk20a_allocator.h> | ||
22 | 23 | ||
23 | #include "gk20a_allocator.h" | 24 | #include "gk20a_allocator.h" |
24 | 25 | ||