summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-05-22 13:48:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-09-07 15:37:15 -0400
commiteade809c265ada214c6c47e1ad9db1706c868da0 (patch)
treea132050d7a4c2193f338e0082d258e1eac958cc1 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent57034b22ca17b217b683941dcebc0d69587d7d5e (diff)
gpu: nvgpu: Separate kernel and user GPU VA regions
Separate the kernel and userspace regions in the GPU virtual address space. Do this by reserving the last part of the GPU VA aperture for the kernel, and extend GPU VA aperture accordingly for regular address spaces. This prevents the kernel polluting the userspace-visible GPU VA regions, and thus, makes the success of fixed-address mapping more predictable. Bug 200077571 Change-Id: I63f0e73d4c815a4a9fa4a9ce568709974690ef0f Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/747191 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c151
1 files changed, 112 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b12091bf..3021cb02 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -358,9 +358,12 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
358 mutex_init(&mm->l2_op_lock); 358 mutex_init(&mm->l2_op_lock);
359 359
360 /*TBD: make channel vm size configurable */ 360 /*TBD: make channel vm size configurable */
361 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; 361 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE;
362 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
362 363
363 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); 364 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
365 (int)(mm->channel.user_size >> 20),
366 (int)(mm->channel.kernel_size >> 20));
364 367
365 err = gk20a_init_bar1_vm(mm); 368 err = gk20a_init_bar1_vm(mm);
366 if (err) 369 if (err)
@@ -1052,7 +1055,7 @@ static void gmmu_select_page_size(struct vm_gk20a *vm,
1052{ 1055{
1053 int i; 1056 int i;
1054 /* choose the biggest first (top->bottom) */ 1057 /* choose the biggest first (top->bottom) */
1055 for (i = gmmu_nr_page_sizes-1; i >= 0; i--) 1058 for (i = gmmu_page_size_kernel - 1; i >= 0; i--)
1056 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { 1059 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) {
1057 bfr->pgsz_idx = i; 1060 bfr->pgsz_idx = i;
1058 break; 1061 break;
@@ -1438,8 +1441,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1438 * the alignment determined by gmmu_select_page_size(). 1441 * the alignment determined by gmmu_select_page_size().
1439 */ 1442 */
1440 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 1443 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1441 int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ? 1444 int pgsz_idx =
1442 gmmu_page_size_big : gmmu_page_size_small; 1445 __nv_gmmu_va_is_big_page_region(vm, offset_align) ?
1446 gmmu_page_size_big : gmmu_page_size_small;
1443 if (pgsz_idx > bfr.pgsz_idx) { 1447 if (pgsz_idx > bfr.pgsz_idx) {
1444 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", 1448 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
1445 offset_align, bfr.pgsz_idx, pgsz_idx); 1449 offset_align, bfr.pgsz_idx, pgsz_idx);
@@ -1816,7 +1820,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1816 *sgt, /* sg table */ 1820 *sgt, /* sg table */
1817 0, /* sg offset */ 1821 0, /* sg offset */
1818 size, 1822 size,
1819 0, /* page size index = 0 i.e. SZ_4K */ 1823 gmmu_page_size_kernel,
1820 0, /* kind */ 1824 0, /* kind */
1821 0, /* ctag_offset */ 1825 0, /* ctag_offset */
1822 flags, rw_flag, 1826 flags, rw_flag,
@@ -1991,7 +1995,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1991 g->ops.mm.gmmu_unmap(vm, 1995 g->ops.mm.gmmu_unmap(vm,
1992 vaddr, 1996 vaddr,
1993 size, 1997 size,
1994 0, /* page size 4K */ 1998 gmmu_page_size_kernel,
1995 true, /*va_allocated */ 1999 true, /*va_allocated */
1996 rw_flag, 2000 rw_flag,
1997 false, 2001 false,
@@ -2395,6 +2399,11 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2395 buffer_offset, 2399 buffer_offset,
2396 sgt ? sgt->nents : 0); 2400 sgt ? sgt->nents : 0);
2397 2401
2402 /* note: here we need to map kernel to small, since the
2403 * low-level mmu code assumes 0 is small and 1 is big pages */
2404 if (pgsz_idx == gmmu_page_size_kernel)
2405 pgsz_idx = gmmu_page_size_small;
2406
2398 if (space_to_skip & (page_size - 1)) 2407 if (space_to_skip & (page_size - 1))
2399 return -EINVAL; 2408 return -EINVAL;
2400 2409
@@ -2618,17 +2627,23 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2618 struct vm_gk20a *vm, 2627 struct vm_gk20a *vm,
2619 u32 big_page_size, 2628 u32 big_page_size,
2620 u64 low_hole, 2629 u64 low_hole,
2630 u64 kernel_reserved,
2621 u64 aperture_size, 2631 u64 aperture_size,
2622 bool big_pages, 2632 bool big_pages,
2623 char *name) 2633 char *name)
2624{ 2634{
2625 int err, i; 2635 int err, i;
2626 char alloc_name[32]; 2636 char alloc_name[32];
2627 u64 small_vma_size, large_vma_size; 2637 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
2638 kernel_vma_start, kernel_vma_limit;
2628 u32 pde_lo, pde_hi; 2639 u32 pde_lo, pde_hi;
2629 2640
2630 /* note: keep the page sizes sorted lowest to highest here */ 2641 /* note: this must match gmmu_pgsz_gk20a enum */
2631 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; 2642 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
2643
2644 WARN_ON(kernel_reserved + low_hole > aperture_size);
2645 if (kernel_reserved > aperture_size)
2646 return -ENOMEM;
2632 2647
2633 vm->mm = mm; 2648 vm->mm = mm;
2634 2649
@@ -2650,6 +2665,9 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2650 gk20a_dbg_info("big page-size (%dKB)", 2665 gk20a_dbg_info("big page-size (%dKB)",
2651 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 2666 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
2652 2667
2668 gk20a_dbg_info("kernel page-size (%dKB)",
2669 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
2670
2653 pde_range_from_vaddr_range(vm, 2671 pde_range_from_vaddr_range(vm,
2654 0, vm->va_limit-1, 2672 0, vm->va_limit-1,
2655 &pde_lo, &pde_hi); 2673 &pde_lo, &pde_hi);
@@ -2668,36 +2686,65 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2668 if (err) 2686 if (err)
2669 goto clean_up_pdes; 2687 goto clean_up_pdes;
2670 2688
2671 /* First 16GB of the address space goes towards small pages. What ever 2689 /* setup vma limits */
2672 * remains is allocated to large pages. */ 2690 small_vma_start = low_hole;
2673 small_vma_size = vm->va_limit; 2691
2674 if (big_pages) { 2692 if (big_pages) {
2675 small_vma_size = __nv_gmmu_va_small_page_limit(); 2693 /* First 16GB of the address space goes towards small
2676 large_vma_size = vm->va_limit - small_vma_size; 2694 * pages. What ever remains is allocated to large
2695 * pages. */
2696 small_vma_limit = __nv_gmmu_va_small_page_limit();
2697 large_vma_start = small_vma_limit;
2698 large_vma_limit = vm->va_limit - kernel_reserved;
2699 } else {
2700 small_vma_limit = vm->va_limit - kernel_reserved;
2701 large_vma_start = 0;
2702 large_vma_limit = 0;
2677 } 2703 }
2678 2704
2679 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2705 kernel_vma_start = vm->va_limit - kernel_reserved;
2680 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2706 kernel_vma_limit = vm->va_limit;
2681 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2682 vm, alloc_name,
2683 vm->va_start,
2684 small_vma_size - vm->va_start,
2685 SZ_4K,
2686 GPU_BALLOC_MAX_ORDER,
2687 GPU_BALLOC_GVA_SPACE);
2688 if (err)
2689 goto clean_up_ptes;
2690 2707
2691 if (big_pages) { 2708 gk20a_dbg_info(
2709 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
2710 small_vma_start, small_vma_limit,
2711 large_vma_start, large_vma_limit,
2712 kernel_vma_start, kernel_vma_limit);
2713
2714 /* check that starts do not exceed limits */
2715 WARN_ON(small_vma_start > small_vma_limit);
2716 WARN_ON(large_vma_start > large_vma_limit);
2717 /* kernel_vma must also be non-zero */
2718 WARN_ON(kernel_vma_start >= kernel_vma_limit);
2719
2720 if (small_vma_start > small_vma_limit ||
2721 large_vma_start > large_vma_limit ||
2722 kernel_vma_start >= kernel_vma_limit) {
2723 err = -EINVAL;
2724 goto clean_up_pdes;
2725 }
2726
2727 if (small_vma_start < small_vma_limit) {
2728 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2729 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
2730 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2731 vm, alloc_name,
2732 small_vma_start,
2733 small_vma_limit - small_vma_start,
2734 SZ_4K,
2735 GPU_BALLOC_MAX_ORDER,
2736 GPU_BALLOC_GVA_SPACE);
2737 if (err)
2738 goto clean_up_ptes;
2739 }
2740
2741 if (large_vma_start < large_vma_limit) {
2692 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2742 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2693 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2743 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
2694 /*
2695 * Big page VMA starts at the end of the small page VMA.
2696 */
2697 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2744 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2698 vm, alloc_name, 2745 vm, alloc_name,
2699 small_vma_size, 2746 large_vma_start,
2700 large_vma_size, 2747 large_vma_limit - large_vma_start,
2701 big_page_size, 2748 big_page_size,
2702 GPU_BALLOC_MAX_ORDER, 2749 GPU_BALLOC_MAX_ORDER,
2703 GPU_BALLOC_GVA_SPACE); 2750 GPU_BALLOC_GVA_SPACE);
@@ -2705,6 +2752,21 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2705 goto clean_up_small_allocator; 2752 goto clean_up_small_allocator;
2706 } 2753 }
2707 2754
2755 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys",
2756 name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
2757 /*
2758 * kernel reserved VMA is at the end of the aperture
2759 */
2760 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
2761 vm, alloc_name,
2762 kernel_vma_start,
2763 kernel_vma_limit - kernel_vma_start,
2764 SZ_4K,
2765 GPU_BALLOC_MAX_ORDER,
2766 GPU_BALLOC_GVA_SPACE);
2767 if (err)
2768 goto clean_up_big_allocator;
2769
2708 vm->mapped_buffers = RB_ROOT; 2770 vm->mapped_buffers = RB_ROOT;
2709 2771
2710 mutex_init(&vm->update_gmmu_lock); 2772 mutex_init(&vm->update_gmmu_lock);
@@ -2713,8 +2775,12 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2713 2775
2714 return 0; 2776 return 0;
2715 2777
2778clean_up_big_allocator:
2779 if (large_vma_start < large_vma_limit)
2780 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2716clean_up_small_allocator: 2781clean_up_small_allocator:
2717 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2782 if (small_vma_start < small_vma_limit)
2783 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2718clean_up_ptes: 2784clean_up_ptes:
2719 free_gmmu_pages(vm, &vm->pdb); 2785 free_gmmu_pages(vm, &vm->pdb);
2720clean_up_pdes: 2786clean_up_pdes:
@@ -2756,7 +2822,9 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2756 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 2822 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id);
2757 2823
2758 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 2824 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2759 mm->channel.size, !mm->disable_bigpage, name); 2825 mm->channel.kernel_size,
2826 mm->channel.user_size + mm->channel.kernel_size,
2827 !mm->disable_bigpage, name);
2760 2828
2761 return err; 2829 return err;
2762} 2830}
@@ -2886,7 +2954,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2886 args->pages, args->offset); 2954 args->pages, args->offset);
2887 2955
2888 /* determine pagesz idx */ 2956 /* determine pagesz idx */
2889 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2957 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
2890 gmmu_page_size_big : gmmu_page_size_small; 2958 gmmu_page_size_big : gmmu_page_size_small;
2891 2959
2892 vma = &vm->vma[pgsz_idx]; 2960 vma = &vm->vma[pgsz_idx];
@@ -3086,9 +3154,11 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
3086 3154
3087void gk20a_deinit_vm(struct vm_gk20a *vm) 3155void gk20a_deinit_vm(struct vm_gk20a *vm)
3088{ 3156{
3089 if (vm->big_pages) 3157 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
3158 if (vm->vma[gmmu_page_size_big].init)
3090 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 3159 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
3091 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 3160 if (vm->vma[gmmu_page_size_small].init)
3161 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
3092 3162
3093 gk20a_vm_free_entries(vm, &vm->pdb, 0); 3163 gk20a_vm_free_entries(vm, &vm->pdb, 0);
3094} 3164}
@@ -3127,6 +3197,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
3127 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 3197 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
3128 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 3198 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
3129 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 3199 gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
3200 mm->bar1.aperture_size - SZ_4K,
3130 mm->bar1.aperture_size, false, "bar1"); 3201 mm->bar1.aperture_size, false, "bar1");
3131 3202
3132 err = gk20a_alloc_inst_block(g, inst_block); 3203 err = gk20a_alloc_inst_block(g, inst_block);
@@ -3154,7 +3225,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
3154 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 3225 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
3155 3226
3156 gk20a_init_vm(mm, vm, big_page_size, 3227 gk20a_init_vm(mm, vm, big_page_size,
3157 SZ_4K * 16, GK20A_PMU_VA_SIZE, false, "system"); 3228 SZ_4K * 16, GK20A_PMU_VA_SIZE,
3229 GK20A_PMU_VA_SIZE * 2, false,
3230 "system");
3158 3231
3159 err = gk20a_alloc_inst_block(g, inst_block); 3232 err = gk20a_alloc_inst_block(g, inst_block);
3160 if (err) 3233 if (err)