summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c151
1 files changed, 112 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b12091bf..3021cb02 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -358,9 +358,12 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
358 mutex_init(&mm->l2_op_lock); 358 mutex_init(&mm->l2_op_lock);
359 359
360 /*TBD: make channel vm size configurable */ 360 /*TBD: make channel vm size configurable */
361 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; 361 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE;
362 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
362 363
363 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); 364 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
365 (int)(mm->channel.user_size >> 20),
366 (int)(mm->channel.kernel_size >> 20));
364 367
365 err = gk20a_init_bar1_vm(mm); 368 err = gk20a_init_bar1_vm(mm);
366 if (err) 369 if (err)
@@ -1052,7 +1055,7 @@ static void gmmu_select_page_size(struct vm_gk20a *vm,
1052{ 1055{
1053 int i; 1056 int i;
1054 /* choose the biggest first (top->bottom) */ 1057 /* choose the biggest first (top->bottom) */
1055 for (i = gmmu_nr_page_sizes-1; i >= 0; i--) 1058 for (i = gmmu_page_size_kernel - 1; i >= 0; i--)
1056 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { 1059 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) {
1057 bfr->pgsz_idx = i; 1060 bfr->pgsz_idx = i;
1058 break; 1061 break;
@@ -1438,8 +1441,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1438 * the alignment determined by gmmu_select_page_size(). 1441 * the alignment determined by gmmu_select_page_size().
1439 */ 1442 */
1440 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 1443 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1441 int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ? 1444 int pgsz_idx =
1442 gmmu_page_size_big : gmmu_page_size_small; 1445 __nv_gmmu_va_is_big_page_region(vm, offset_align) ?
1446 gmmu_page_size_big : gmmu_page_size_small;
1443 if (pgsz_idx > bfr.pgsz_idx) { 1447 if (pgsz_idx > bfr.pgsz_idx) {
1444 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", 1448 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
1445 offset_align, bfr.pgsz_idx, pgsz_idx); 1449 offset_align, bfr.pgsz_idx, pgsz_idx);
@@ -1816,7 +1820,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1816 *sgt, /* sg table */ 1820 *sgt, /* sg table */
1817 0, /* sg offset */ 1821 0, /* sg offset */
1818 size, 1822 size,
1819 0, /* page size index = 0 i.e. SZ_4K */ 1823 gmmu_page_size_kernel,
1820 0, /* kind */ 1824 0, /* kind */
1821 0, /* ctag_offset */ 1825 0, /* ctag_offset */
1822 flags, rw_flag, 1826 flags, rw_flag,
@@ -1991,7 +1995,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1991 g->ops.mm.gmmu_unmap(vm, 1995 g->ops.mm.gmmu_unmap(vm,
1992 vaddr, 1996 vaddr,
1993 size, 1997 size,
1994 0, /* page size 4K */ 1998 gmmu_page_size_kernel,
1995 true, /*va_allocated */ 1999 true, /*va_allocated */
1996 rw_flag, 2000 rw_flag,
1997 false, 2001 false,
@@ -2395,6 +2399,11 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2395 buffer_offset, 2399 buffer_offset,
2396 sgt ? sgt->nents : 0); 2400 sgt ? sgt->nents : 0);
2397 2401
2402 /* note: here we need to map kernel to small, since the
2403 * low-level mmu code assumes 0 is small and 1 is big pages */
2404 if (pgsz_idx == gmmu_page_size_kernel)
2405 pgsz_idx = gmmu_page_size_small;
2406
2398 if (space_to_skip & (page_size - 1)) 2407 if (space_to_skip & (page_size - 1))
2399 return -EINVAL; 2408 return -EINVAL;
2400 2409
@@ -2618,17 +2627,23 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2618 struct vm_gk20a *vm, 2627 struct vm_gk20a *vm,
2619 u32 big_page_size, 2628 u32 big_page_size,
2620 u64 low_hole, 2629 u64 low_hole,
2630 u64 kernel_reserved,
2621 u64 aperture_size, 2631 u64 aperture_size,
2622 bool big_pages, 2632 bool big_pages,
2623 char *name) 2633 char *name)
2624{ 2634{
2625 int err, i; 2635 int err, i;
2626 char alloc_name[32]; 2636 char alloc_name[32];
2627 u64 small_vma_size, large_vma_size; 2637 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
2638 kernel_vma_start, kernel_vma_limit;
2628 u32 pde_lo, pde_hi; 2639 u32 pde_lo, pde_hi;
2629 2640
2630 /* note: keep the page sizes sorted lowest to highest here */ 2641 /* note: this must match gmmu_pgsz_gk20a enum */
2631 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; 2642 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
2643
2644 WARN_ON(kernel_reserved + low_hole > aperture_size);
2645 if (kernel_reserved > aperture_size)
2646 return -ENOMEM;
2632 2647
2633 vm->mm = mm; 2648 vm->mm = mm;
2634 2649
@@ -2650,6 +2665,9 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2650 gk20a_dbg_info("big page-size (%dKB)", 2665 gk20a_dbg_info("big page-size (%dKB)",
2651 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 2666 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
2652 2667
2668 gk20a_dbg_info("kernel page-size (%dKB)",
2669 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
2670
2653 pde_range_from_vaddr_range(vm, 2671 pde_range_from_vaddr_range(vm,
2654 0, vm->va_limit-1, 2672 0, vm->va_limit-1,
2655 &pde_lo, &pde_hi); 2673 &pde_lo, &pde_hi);
@@ -2668,36 +2686,65 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2668 if (err) 2686 if (err)
2669 goto clean_up_pdes; 2687 goto clean_up_pdes;
2670 2688
2671 /* First 16GB of the address space goes towards small pages. What ever 2689 /* setup vma limits */
2672 * remains is allocated to large pages. */ 2690 small_vma_start = low_hole;
2673 small_vma_size = vm->va_limit; 2691
2674 if (big_pages) { 2692 if (big_pages) {
2675 small_vma_size = __nv_gmmu_va_small_page_limit(); 2693 /* First 16GB of the address space goes towards small
2676 large_vma_size = vm->va_limit - small_vma_size; 2694 * pages. What ever remains is allocated to large
2695 * pages. */
2696 small_vma_limit = __nv_gmmu_va_small_page_limit();
2697 large_vma_start = small_vma_limit;
2698 large_vma_limit = vm->va_limit - kernel_reserved;
2699 } else {
2700 small_vma_limit = vm->va_limit - kernel_reserved;
2701 large_vma_start = 0;
2702 large_vma_limit = 0;
2677 } 2703 }
2678 2704
2679 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2705 kernel_vma_start = vm->va_limit - kernel_reserved;
2680 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2706 kernel_vma_limit = vm->va_limit;
2681 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2682 vm, alloc_name,
2683 vm->va_start,
2684 small_vma_size - vm->va_start,
2685 SZ_4K,
2686 GPU_BALLOC_MAX_ORDER,
2687 GPU_BALLOC_GVA_SPACE);
2688 if (err)
2689 goto clean_up_ptes;
2690 2707
2691 if (big_pages) { 2708 gk20a_dbg_info(
2709 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
2710 small_vma_start, small_vma_limit,
2711 large_vma_start, large_vma_limit,
2712 kernel_vma_start, kernel_vma_limit);
2713
2714 /* check that starts do not exceed limits */
2715 WARN_ON(small_vma_start > small_vma_limit);
2716 WARN_ON(large_vma_start > large_vma_limit);
2717 /* kernel_vma must also be non-zero */
2718 WARN_ON(kernel_vma_start >= kernel_vma_limit);
2719
2720 if (small_vma_start > small_vma_limit ||
2721 large_vma_start > large_vma_limit ||
2722 kernel_vma_start >= kernel_vma_limit) {
2723 err = -EINVAL;
2724 goto clean_up_pdes;
2725 }
2726
2727 if (small_vma_start < small_vma_limit) {
2728 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2729 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
2730 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2731 vm, alloc_name,
2732 small_vma_start,
2733 small_vma_limit - small_vma_start,
2734 SZ_4K,
2735 GPU_BALLOC_MAX_ORDER,
2736 GPU_BALLOC_GVA_SPACE);
2737 if (err)
2738 goto clean_up_ptes;
2739 }
2740
2741 if (large_vma_start < large_vma_limit) {
2692 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2742 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2693 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2743 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
2694 /*
2695 * Big page VMA starts at the end of the small page VMA.
2696 */
2697 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2744 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2698 vm, alloc_name, 2745 vm, alloc_name,
2699 small_vma_size, 2746 large_vma_start,
2700 large_vma_size, 2747 large_vma_limit - large_vma_start,
2701 big_page_size, 2748 big_page_size,
2702 GPU_BALLOC_MAX_ORDER, 2749 GPU_BALLOC_MAX_ORDER,
2703 GPU_BALLOC_GVA_SPACE); 2750 GPU_BALLOC_GVA_SPACE);
@@ -2705,6 +2752,21 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2705 goto clean_up_small_allocator; 2752 goto clean_up_small_allocator;
2706 } 2753 }
2707 2754
2755 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys",
2756 name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
2757 /*
2758 * kernel reserved VMA is at the end of the aperture
2759 */
2760 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
2761 vm, alloc_name,
2762 kernel_vma_start,
2763 kernel_vma_limit - kernel_vma_start,
2764 SZ_4K,
2765 GPU_BALLOC_MAX_ORDER,
2766 GPU_BALLOC_GVA_SPACE);
2767 if (err)
2768 goto clean_up_big_allocator;
2769
2708 vm->mapped_buffers = RB_ROOT; 2770 vm->mapped_buffers = RB_ROOT;
2709 2771
2710 mutex_init(&vm->update_gmmu_lock); 2772 mutex_init(&vm->update_gmmu_lock);
@@ -2713,8 +2775,12 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2713 2775
2714 return 0; 2776 return 0;
2715 2777
2778clean_up_big_allocator:
2779 if (large_vma_start < large_vma_limit)
2780 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2716clean_up_small_allocator: 2781clean_up_small_allocator:
2717 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2782 if (small_vma_start < small_vma_limit)
2783 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2718clean_up_ptes: 2784clean_up_ptes:
2719 free_gmmu_pages(vm, &vm->pdb); 2785 free_gmmu_pages(vm, &vm->pdb);
2720clean_up_pdes: 2786clean_up_pdes:
@@ -2756,7 +2822,9 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2756 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 2822 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id);
2757 2823
2758 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 2824 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2759 mm->channel.size, !mm->disable_bigpage, name); 2825 mm->channel.kernel_size,
2826 mm->channel.user_size + mm->channel.kernel_size,
2827 !mm->disable_bigpage, name);
2760 2828
2761 return err; 2829 return err;
2762} 2830}
@@ -2886,7 +2954,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2886 args->pages, args->offset); 2954 args->pages, args->offset);
2887 2955
2888 /* determine pagesz idx */ 2956 /* determine pagesz idx */
2889 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2957 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
2890 gmmu_page_size_big : gmmu_page_size_small; 2958 gmmu_page_size_big : gmmu_page_size_small;
2891 2959
2892 vma = &vm->vma[pgsz_idx]; 2960 vma = &vm->vma[pgsz_idx];
@@ -3086,9 +3154,11 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
3086 3154
3087void gk20a_deinit_vm(struct vm_gk20a *vm) 3155void gk20a_deinit_vm(struct vm_gk20a *vm)
3088{ 3156{
3089 if (vm->big_pages) 3157 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
3158 if (vm->vma[gmmu_page_size_big].init)
3090 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 3159 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
3091 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 3160 if (vm->vma[gmmu_page_size_small].init)
3161 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
3092 3162
3093 gk20a_vm_free_entries(vm, &vm->pdb, 0); 3163 gk20a_vm_free_entries(vm, &vm->pdb, 0);
3094} 3164}
@@ -3127,6 +3197,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
3127 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 3197 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
3128 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 3198 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
3129 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 3199 gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
3200 mm->bar1.aperture_size - SZ_4K,
3130 mm->bar1.aperture_size, false, "bar1"); 3201 mm->bar1.aperture_size, false, "bar1");
3131 3202
3132 err = gk20a_alloc_inst_block(g, inst_block); 3203 err = gk20a_alloc_inst_block(g, inst_block);
@@ -3154,7 +3225,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
3154 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 3225 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
3155 3226
3156 gk20a_init_vm(mm, vm, big_page_size, 3227 gk20a_init_vm(mm, vm, big_page_size,
3157 SZ_4K * 16, GK20A_PMU_VA_SIZE, false, "system"); 3228 SZ_4K * 16, GK20A_PMU_VA_SIZE,
3229 GK20A_PMU_VA_SIZE * 2, false,
3230 "system");
3158 3231
3159 err = gk20a_alloc_inst_block(g, inst_block); 3232 err = gk20a_alloc_inst_block(g, inst_block);
3160 if (err) 3233 if (err)