diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2015-05-22 13:48:22 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-09-07 15:37:15 -0400 |
commit | eade809c265ada214c6c47e1ad9db1706c868da0 (patch) | |
tree | a132050d7a4c2193f338e0082d258e1eac958cc1 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |
parent | 57034b22ca17b217b683941dcebc0d69587d7d5e (diff) |
gpu: nvgpu: Separate kernel and user GPU VA regions
Separate the kernel and userspace regions in the GPU virtual address
space. Do this by reserving the last part of the GPU VA aperture for
the kernel, and extend GPU VA aperture accordingly for regular address
spaces. This prevents the kernel polluting the userspace-visible GPU
VA regions, and thus, makes the success of fixed-address mapping more
predictable.
Bug 200077571
Change-Id: I63f0e73d4c815a4a9fa4a9ce568709974690ef0f
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/747191
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 151 |
1 files changed, 112 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b12091bf..3021cb02 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -358,9 +358,12 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
358 | mutex_init(&mm->l2_op_lock); | 358 | mutex_init(&mm->l2_op_lock); |
359 | 359 | ||
360 | /*TBD: make channel vm size configurable */ | 360 | /*TBD: make channel vm size configurable */ |
361 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | 361 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; |
362 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | ||
362 | 363 | ||
363 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | 364 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", |
365 | (int)(mm->channel.user_size >> 20), | ||
366 | (int)(mm->channel.kernel_size >> 20)); | ||
364 | 367 | ||
365 | err = gk20a_init_bar1_vm(mm); | 368 | err = gk20a_init_bar1_vm(mm); |
366 | if (err) | 369 | if (err) |
@@ -1052,7 +1055,7 @@ static void gmmu_select_page_size(struct vm_gk20a *vm, | |||
1052 | { | 1055 | { |
1053 | int i; | 1056 | int i; |
1054 | /* choose the biggest first (top->bottom) */ | 1057 | /* choose the biggest first (top->bottom) */ |
1055 | for (i = gmmu_nr_page_sizes-1; i >= 0; i--) | 1058 | for (i = gmmu_page_size_kernel - 1; i >= 0; i--) |
1056 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { | 1059 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { |
1057 | bfr->pgsz_idx = i; | 1060 | bfr->pgsz_idx = i; |
1058 | break; | 1061 | break; |
@@ -1438,8 +1441,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1438 | * the alignment determined by gmmu_select_page_size(). | 1441 | * the alignment determined by gmmu_select_page_size(). |
1439 | */ | 1442 | */ |
1440 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | 1443 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { |
1441 | int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ? | 1444 | int pgsz_idx = |
1442 | gmmu_page_size_big : gmmu_page_size_small; | 1445 | __nv_gmmu_va_is_big_page_region(vm, offset_align) ? |
1446 | gmmu_page_size_big : gmmu_page_size_small; | ||
1443 | if (pgsz_idx > bfr.pgsz_idx) { | 1447 | if (pgsz_idx > bfr.pgsz_idx) { |
1444 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", | 1448 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", |
1445 | offset_align, bfr.pgsz_idx, pgsz_idx); | 1449 | offset_align, bfr.pgsz_idx, pgsz_idx); |
@@ -1816,7 +1820,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1816 | *sgt, /* sg table */ | 1820 | *sgt, /* sg table */ |
1817 | 0, /* sg offset */ | 1821 | 0, /* sg offset */ |
1818 | size, | 1822 | size, |
1819 | 0, /* page size index = 0 i.e. SZ_4K */ | 1823 | gmmu_page_size_kernel, |
1820 | 0, /* kind */ | 1824 | 0, /* kind */ |
1821 | 0, /* ctag_offset */ | 1825 | 0, /* ctag_offset */ |
1822 | flags, rw_flag, | 1826 | flags, rw_flag, |
@@ -1991,7 +1995,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, | |||
1991 | g->ops.mm.gmmu_unmap(vm, | 1995 | g->ops.mm.gmmu_unmap(vm, |
1992 | vaddr, | 1996 | vaddr, |
1993 | size, | 1997 | size, |
1994 | 0, /* page size 4K */ | 1998 | gmmu_page_size_kernel, |
1995 | true, /*va_allocated */ | 1999 | true, /*va_allocated */ |
1996 | rw_flag, | 2000 | rw_flag, |
1997 | false, | 2001 | false, |
@@ -2395,6 +2399,11 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2395 | buffer_offset, | 2399 | buffer_offset, |
2396 | sgt ? sgt->nents : 0); | 2400 | sgt ? sgt->nents : 0); |
2397 | 2401 | ||
2402 | /* note: here we need to map kernel to small, since the | ||
2403 | * low-level mmu code assumes 0 is small and 1 is big pages */ | ||
2404 | if (pgsz_idx == gmmu_page_size_kernel) | ||
2405 | pgsz_idx = gmmu_page_size_small; | ||
2406 | |||
2398 | if (space_to_skip & (page_size - 1)) | 2407 | if (space_to_skip & (page_size - 1)) |
2399 | return -EINVAL; | 2408 | return -EINVAL; |
2400 | 2409 | ||
@@ -2618,17 +2627,23 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2618 | struct vm_gk20a *vm, | 2627 | struct vm_gk20a *vm, |
2619 | u32 big_page_size, | 2628 | u32 big_page_size, |
2620 | u64 low_hole, | 2629 | u64 low_hole, |
2630 | u64 kernel_reserved, | ||
2621 | u64 aperture_size, | 2631 | u64 aperture_size, |
2622 | bool big_pages, | 2632 | bool big_pages, |
2623 | char *name) | 2633 | char *name) |
2624 | { | 2634 | { |
2625 | int err, i; | 2635 | int err, i; |
2626 | char alloc_name[32]; | 2636 | char alloc_name[32]; |
2627 | u64 small_vma_size, large_vma_size; | 2637 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, |
2638 | kernel_vma_start, kernel_vma_limit; | ||
2628 | u32 pde_lo, pde_hi; | 2639 | u32 pde_lo, pde_hi; |
2629 | 2640 | ||
2630 | /* note: keep the page sizes sorted lowest to highest here */ | 2641 | /* note: this must match gmmu_pgsz_gk20a enum */ |
2631 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; | 2642 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
2643 | |||
2644 | WARN_ON(kernel_reserved + low_hole > aperture_size); | ||
2645 | if (kernel_reserved > aperture_size) | ||
2646 | return -ENOMEM; | ||
2632 | 2647 | ||
2633 | vm->mm = mm; | 2648 | vm->mm = mm; |
2634 | 2649 | ||
@@ -2650,6 +2665,9 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2650 | gk20a_dbg_info("big page-size (%dKB)", | 2665 | gk20a_dbg_info("big page-size (%dKB)", |
2651 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | 2666 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
2652 | 2667 | ||
2668 | gk20a_dbg_info("kernel page-size (%dKB)", | ||
2669 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | ||
2670 | |||
2653 | pde_range_from_vaddr_range(vm, | 2671 | pde_range_from_vaddr_range(vm, |
2654 | 0, vm->va_limit-1, | 2672 | 0, vm->va_limit-1, |
2655 | &pde_lo, &pde_hi); | 2673 | &pde_lo, &pde_hi); |
@@ -2668,36 +2686,65 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2668 | if (err) | 2686 | if (err) |
2669 | goto clean_up_pdes; | 2687 | goto clean_up_pdes; |
2670 | 2688 | ||
2671 | /* First 16GB of the address space goes towards small pages. What ever | 2689 | /* setup vma limits */ |
2672 | * remains is allocated to large pages. */ | 2690 | small_vma_start = low_hole; |
2673 | small_vma_size = vm->va_limit; | 2691 | |
2674 | if (big_pages) { | 2692 | if (big_pages) { |
2675 | small_vma_size = __nv_gmmu_va_small_page_limit(); | 2693 | /* First 16GB of the address space goes towards small |
2676 | large_vma_size = vm->va_limit - small_vma_size; | 2694 | * pages. What ever remains is allocated to large |
2695 | * pages. */ | ||
2696 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
2697 | large_vma_start = small_vma_limit; | ||
2698 | large_vma_limit = vm->va_limit - kernel_reserved; | ||
2699 | } else { | ||
2700 | small_vma_limit = vm->va_limit - kernel_reserved; | ||
2701 | large_vma_start = 0; | ||
2702 | large_vma_limit = 0; | ||
2677 | } | 2703 | } |
2678 | 2704 | ||
2679 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2705 | kernel_vma_start = vm->va_limit - kernel_reserved; |
2680 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); | 2706 | kernel_vma_limit = vm->va_limit; |
2681 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | ||
2682 | vm, alloc_name, | ||
2683 | vm->va_start, | ||
2684 | small_vma_size - vm->va_start, | ||
2685 | SZ_4K, | ||
2686 | GPU_BALLOC_MAX_ORDER, | ||
2687 | GPU_BALLOC_GVA_SPACE); | ||
2688 | if (err) | ||
2689 | goto clean_up_ptes; | ||
2690 | 2707 | ||
2691 | if (big_pages) { | 2708 | gk20a_dbg_info( |
2709 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | ||
2710 | small_vma_start, small_vma_limit, | ||
2711 | large_vma_start, large_vma_limit, | ||
2712 | kernel_vma_start, kernel_vma_limit); | ||
2713 | |||
2714 | /* check that starts do not exceed limits */ | ||
2715 | WARN_ON(small_vma_start > small_vma_limit); | ||
2716 | WARN_ON(large_vma_start > large_vma_limit); | ||
2717 | /* kernel_vma must also be non-zero */ | ||
2718 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | ||
2719 | |||
2720 | if (small_vma_start > small_vma_limit || | ||
2721 | large_vma_start > large_vma_limit || | ||
2722 | kernel_vma_start >= kernel_vma_limit) { | ||
2723 | err = -EINVAL; | ||
2724 | goto clean_up_pdes; | ||
2725 | } | ||
2726 | |||
2727 | if (small_vma_start < small_vma_limit) { | ||
2728 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | ||
2729 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | ||
2730 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | ||
2731 | vm, alloc_name, | ||
2732 | small_vma_start, | ||
2733 | small_vma_limit - small_vma_start, | ||
2734 | SZ_4K, | ||
2735 | GPU_BALLOC_MAX_ORDER, | ||
2736 | GPU_BALLOC_GVA_SPACE); | ||
2737 | if (err) | ||
2738 | goto clean_up_ptes; | ||
2739 | } | ||
2740 | |||
2741 | if (large_vma_start < large_vma_limit) { | ||
2692 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2742 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2693 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2743 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
2694 | /* | ||
2695 | * Big page VMA starts at the end of the small page VMA. | ||
2696 | */ | ||
2697 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2744 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2698 | vm, alloc_name, | 2745 | vm, alloc_name, |
2699 | small_vma_size, | 2746 | large_vma_start, |
2700 | large_vma_size, | 2747 | large_vma_limit - large_vma_start, |
2701 | big_page_size, | 2748 | big_page_size, |
2702 | GPU_BALLOC_MAX_ORDER, | 2749 | GPU_BALLOC_MAX_ORDER, |
2703 | GPU_BALLOC_GVA_SPACE); | 2750 | GPU_BALLOC_GVA_SPACE); |
@@ -2705,6 +2752,21 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2705 | goto clean_up_small_allocator; | 2752 | goto clean_up_small_allocator; |
2706 | } | 2753 | } |
2707 | 2754 | ||
2755 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", | ||
2756 | name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | ||
2757 | /* | ||
2758 | * kernel reserved VMA is at the end of the aperture | ||
2759 | */ | ||
2760 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], | ||
2761 | vm, alloc_name, | ||
2762 | kernel_vma_start, | ||
2763 | kernel_vma_limit - kernel_vma_start, | ||
2764 | SZ_4K, | ||
2765 | GPU_BALLOC_MAX_ORDER, | ||
2766 | GPU_BALLOC_GVA_SPACE); | ||
2767 | if (err) | ||
2768 | goto clean_up_big_allocator; | ||
2769 | |||
2708 | vm->mapped_buffers = RB_ROOT; | 2770 | vm->mapped_buffers = RB_ROOT; |
2709 | 2771 | ||
2710 | mutex_init(&vm->update_gmmu_lock); | 2772 | mutex_init(&vm->update_gmmu_lock); |
@@ -2713,8 +2775,12 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2713 | 2775 | ||
2714 | return 0; | 2776 | return 0; |
2715 | 2777 | ||
2778 | clean_up_big_allocator: | ||
2779 | if (large_vma_start < large_vma_limit) | ||
2780 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
2716 | clean_up_small_allocator: | 2781 | clean_up_small_allocator: |
2717 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 2782 | if (small_vma_start < small_vma_limit) |
2783 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
2718 | clean_up_ptes: | 2784 | clean_up_ptes: |
2719 | free_gmmu_pages(vm, &vm->pdb); | 2785 | free_gmmu_pages(vm, &vm->pdb); |
2720 | clean_up_pdes: | 2786 | clean_up_pdes: |
@@ -2756,7 +2822,9 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) | |||
2756 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 2822 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); |
2757 | 2823 | ||
2758 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | 2824 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, |
2759 | mm->channel.size, !mm->disable_bigpage, name); | 2825 | mm->channel.kernel_size, |
2826 | mm->channel.user_size + mm->channel.kernel_size, | ||
2827 | !mm->disable_bigpage, name); | ||
2760 | 2828 | ||
2761 | return err; | 2829 | return err; |
2762 | } | 2830 | } |
@@ -2886,7 +2954,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2886 | args->pages, args->offset); | 2954 | args->pages, args->offset); |
2887 | 2955 | ||
2888 | /* determine pagesz idx */ | 2956 | /* determine pagesz idx */ |
2889 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? | 2957 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? |
2890 | gmmu_page_size_big : gmmu_page_size_small; | 2958 | gmmu_page_size_big : gmmu_page_size_small; |
2891 | 2959 | ||
2892 | vma = &vm->vma[pgsz_idx]; | 2960 | vma = &vm->vma[pgsz_idx]; |
@@ -3086,9 +3154,11 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | |||
3086 | 3154 | ||
3087 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 3155 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
3088 | { | 3156 | { |
3089 | if (vm->big_pages) | 3157 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); |
3158 | if (vm->vma[gmmu_page_size_big].init) | ||
3090 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 3159 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); |
3091 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 3160 | if (vm->vma[gmmu_page_size_small].init) |
3161 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
3092 | 3162 | ||
3093 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 3163 | gk20a_vm_free_entries(vm, &vm->pdb, 0); |
3094 | } | 3164 | } |
@@ -3127,6 +3197,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
3127 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 3197 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
3128 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 3198 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
3129 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, | 3199 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, |
3200 | mm->bar1.aperture_size - SZ_4K, | ||
3130 | mm->bar1.aperture_size, false, "bar1"); | 3201 | mm->bar1.aperture_size, false, "bar1"); |
3131 | 3202 | ||
3132 | err = gk20a_alloc_inst_block(g, inst_block); | 3203 | err = gk20a_alloc_inst_block(g, inst_block); |
@@ -3154,7 +3225,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
3154 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 3225 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
3155 | 3226 | ||
3156 | gk20a_init_vm(mm, vm, big_page_size, | 3227 | gk20a_init_vm(mm, vm, big_page_size, |
3157 | SZ_4K * 16, GK20A_PMU_VA_SIZE, false, "system"); | 3228 | SZ_4K * 16, GK20A_PMU_VA_SIZE, |
3229 | GK20A_PMU_VA_SIZE * 2, false, | ||
3230 | "system"); | ||
3158 | 3231 | ||
3159 | err = gk20a_alloc_inst_block(g, inst_block); | 3232 | err = gk20a_alloc_inst_block(g, inst_block); |
3160 | if (err) | 3233 | if (err) |