diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2015-05-22 13:48:22 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-09-07 15:37:15 -0400 |
commit | eade809c265ada214c6c47e1ad9db1706c868da0 (patch) | |
tree | a132050d7a4c2193f338e0082d258e1eac958cc1 /drivers | |
parent | 57034b22ca17b217b683941dcebc0d69587d7d5e (diff) |
gpu: nvgpu: Separate kernel and user GPU VA regions
Separate the kernel and userspace regions in the GPU virtual address
space. Do this by reserving the last part of the GPU VA aperture for
the kernel, and extend GPU VA aperture accordingly for regular address
spaces. This prevents the kernel polluting the userspace-visible GPU
VA regions, and thus, makes the success of fixed-address mapping more
predictable.
Bug 200077571
Change-Id: I63f0e73d4c815a4a9fa4a9ce568709974690ef0f
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/747191
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 151 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 47 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 11 |
4 files changed, 141 insertions, 70 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index feb22ea8..87b32add 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -262,7 +262,7 @@ static int gk20a_as_ioctl_get_va_regions( | |||
262 | unsigned int write_entries; | 262 | unsigned int write_entries; |
263 | struct nvgpu_as_va_region __user *user_region_ptr; | 263 | struct nvgpu_as_va_region __user *user_region_ptr; |
264 | struct vm_gk20a *vm = as_share->vm; | 264 | struct vm_gk20a *vm = as_share->vm; |
265 | int page_sizes = gmmu_nr_page_sizes; | 265 | int page_sizes = gmmu_page_size_kernel; |
266 | 266 | ||
267 | gk20a_dbg_fn(""); | 267 | gk20a_dbg_fn(""); |
268 | 268 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b12091bf..3021cb02 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -358,9 +358,12 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
358 | mutex_init(&mm->l2_op_lock); | 358 | mutex_init(&mm->l2_op_lock); |
359 | 359 | ||
360 | /*TBD: make channel vm size configurable */ | 360 | /*TBD: make channel vm size configurable */ |
361 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | 361 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; |
362 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | ||
362 | 363 | ||
363 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | 364 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", |
365 | (int)(mm->channel.user_size >> 20), | ||
366 | (int)(mm->channel.kernel_size >> 20)); | ||
364 | 367 | ||
365 | err = gk20a_init_bar1_vm(mm); | 368 | err = gk20a_init_bar1_vm(mm); |
366 | if (err) | 369 | if (err) |
@@ -1052,7 +1055,7 @@ static void gmmu_select_page_size(struct vm_gk20a *vm, | |||
1052 | { | 1055 | { |
1053 | int i; | 1056 | int i; |
1054 | /* choose the biggest first (top->bottom) */ | 1057 | /* choose the biggest first (top->bottom) */ |
1055 | for (i = gmmu_nr_page_sizes-1; i >= 0; i--) | 1058 | for (i = gmmu_page_size_kernel - 1; i >= 0; i--) |
1056 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { | 1059 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { |
1057 | bfr->pgsz_idx = i; | 1060 | bfr->pgsz_idx = i; |
1058 | break; | 1061 | break; |
@@ -1438,8 +1441,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1438 | * the alignment determined by gmmu_select_page_size(). | 1441 | * the alignment determined by gmmu_select_page_size(). |
1439 | */ | 1442 | */ |
1440 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | 1443 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { |
1441 | int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ? | 1444 | int pgsz_idx = |
1442 | gmmu_page_size_big : gmmu_page_size_small; | 1445 | __nv_gmmu_va_is_big_page_region(vm, offset_align) ? |
1446 | gmmu_page_size_big : gmmu_page_size_small; | ||
1443 | if (pgsz_idx > bfr.pgsz_idx) { | 1447 | if (pgsz_idx > bfr.pgsz_idx) { |
1444 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", | 1448 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", |
1445 | offset_align, bfr.pgsz_idx, pgsz_idx); | 1449 | offset_align, bfr.pgsz_idx, pgsz_idx); |
@@ -1816,7 +1820,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1816 | *sgt, /* sg table */ | 1820 | *sgt, /* sg table */ |
1817 | 0, /* sg offset */ | 1821 | 0, /* sg offset */ |
1818 | size, | 1822 | size, |
1819 | 0, /* page size index = 0 i.e. SZ_4K */ | 1823 | gmmu_page_size_kernel, |
1820 | 0, /* kind */ | 1824 | 0, /* kind */ |
1821 | 0, /* ctag_offset */ | 1825 | 0, /* ctag_offset */ |
1822 | flags, rw_flag, | 1826 | flags, rw_flag, |
@@ -1991,7 +1995,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, | |||
1991 | g->ops.mm.gmmu_unmap(vm, | 1995 | g->ops.mm.gmmu_unmap(vm, |
1992 | vaddr, | 1996 | vaddr, |
1993 | size, | 1997 | size, |
1994 | 0, /* page size 4K */ | 1998 | gmmu_page_size_kernel, |
1995 | true, /*va_allocated */ | 1999 | true, /*va_allocated */ |
1996 | rw_flag, | 2000 | rw_flag, |
1997 | false, | 2001 | false, |
@@ -2395,6 +2399,11 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2395 | buffer_offset, | 2399 | buffer_offset, |
2396 | sgt ? sgt->nents : 0); | 2400 | sgt ? sgt->nents : 0); |
2397 | 2401 | ||
2402 | /* note: here we need to map kernel to small, since the | ||
2403 | * low-level mmu code assumes 0 is small and 1 is big pages */ | ||
2404 | if (pgsz_idx == gmmu_page_size_kernel) | ||
2405 | pgsz_idx = gmmu_page_size_small; | ||
2406 | |||
2398 | if (space_to_skip & (page_size - 1)) | 2407 | if (space_to_skip & (page_size - 1)) |
2399 | return -EINVAL; | 2408 | return -EINVAL; |
2400 | 2409 | ||
@@ -2618,17 +2627,23 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2618 | struct vm_gk20a *vm, | 2627 | struct vm_gk20a *vm, |
2619 | u32 big_page_size, | 2628 | u32 big_page_size, |
2620 | u64 low_hole, | 2629 | u64 low_hole, |
2630 | u64 kernel_reserved, | ||
2621 | u64 aperture_size, | 2631 | u64 aperture_size, |
2622 | bool big_pages, | 2632 | bool big_pages, |
2623 | char *name) | 2633 | char *name) |
2624 | { | 2634 | { |
2625 | int err, i; | 2635 | int err, i; |
2626 | char alloc_name[32]; | 2636 | char alloc_name[32]; |
2627 | u64 small_vma_size, large_vma_size; | 2637 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, |
2638 | kernel_vma_start, kernel_vma_limit; | ||
2628 | u32 pde_lo, pde_hi; | 2639 | u32 pde_lo, pde_hi; |
2629 | 2640 | ||
2630 | /* note: keep the page sizes sorted lowest to highest here */ | 2641 | /* note: this must match gmmu_pgsz_gk20a enum */ |
2631 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; | 2642 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
2643 | |||
2644 | WARN_ON(kernel_reserved + low_hole > aperture_size); | ||
2645 | if (kernel_reserved > aperture_size) | ||
2646 | return -ENOMEM; | ||
2632 | 2647 | ||
2633 | vm->mm = mm; | 2648 | vm->mm = mm; |
2634 | 2649 | ||
@@ -2650,6 +2665,9 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2650 | gk20a_dbg_info("big page-size (%dKB)", | 2665 | gk20a_dbg_info("big page-size (%dKB)", |
2651 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | 2666 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
2652 | 2667 | ||
2668 | gk20a_dbg_info("kernel page-size (%dKB)", | ||
2669 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | ||
2670 | |||
2653 | pde_range_from_vaddr_range(vm, | 2671 | pde_range_from_vaddr_range(vm, |
2654 | 0, vm->va_limit-1, | 2672 | 0, vm->va_limit-1, |
2655 | &pde_lo, &pde_hi); | 2673 | &pde_lo, &pde_hi); |
@@ -2668,36 +2686,65 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2668 | if (err) | 2686 | if (err) |
2669 | goto clean_up_pdes; | 2687 | goto clean_up_pdes; |
2670 | 2688 | ||
2671 | /* First 16GB of the address space goes towards small pages. What ever | 2689 | /* setup vma limits */ |
2672 | * remains is allocated to large pages. */ | 2690 | small_vma_start = low_hole; |
2673 | small_vma_size = vm->va_limit; | 2691 | |
2674 | if (big_pages) { | 2692 | if (big_pages) { |
2675 | small_vma_size = __nv_gmmu_va_small_page_limit(); | 2693 | /* First 16GB of the address space goes towards small |
2676 | large_vma_size = vm->va_limit - small_vma_size; | 2694 | * pages. What ever remains is allocated to large |
2695 | * pages. */ | ||
2696 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
2697 | large_vma_start = small_vma_limit; | ||
2698 | large_vma_limit = vm->va_limit - kernel_reserved; | ||
2699 | } else { | ||
2700 | small_vma_limit = vm->va_limit - kernel_reserved; | ||
2701 | large_vma_start = 0; | ||
2702 | large_vma_limit = 0; | ||
2677 | } | 2703 | } |
2678 | 2704 | ||
2679 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2705 | kernel_vma_start = vm->va_limit - kernel_reserved; |
2680 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); | 2706 | kernel_vma_limit = vm->va_limit; |
2681 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | ||
2682 | vm, alloc_name, | ||
2683 | vm->va_start, | ||
2684 | small_vma_size - vm->va_start, | ||
2685 | SZ_4K, | ||
2686 | GPU_BALLOC_MAX_ORDER, | ||
2687 | GPU_BALLOC_GVA_SPACE); | ||
2688 | if (err) | ||
2689 | goto clean_up_ptes; | ||
2690 | 2707 | ||
2691 | if (big_pages) { | 2708 | gk20a_dbg_info( |
2709 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | ||
2710 | small_vma_start, small_vma_limit, | ||
2711 | large_vma_start, large_vma_limit, | ||
2712 | kernel_vma_start, kernel_vma_limit); | ||
2713 | |||
2714 | /* check that starts do not exceed limits */ | ||
2715 | WARN_ON(small_vma_start > small_vma_limit); | ||
2716 | WARN_ON(large_vma_start > large_vma_limit); | ||
2717 | /* kernel_vma must also be non-zero */ | ||
2718 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | ||
2719 | |||
2720 | if (small_vma_start > small_vma_limit || | ||
2721 | large_vma_start > large_vma_limit || | ||
2722 | kernel_vma_start >= kernel_vma_limit) { | ||
2723 | err = -EINVAL; | ||
2724 | goto clean_up_pdes; | ||
2725 | } | ||
2726 | |||
2727 | if (small_vma_start < small_vma_limit) { | ||
2728 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | ||
2729 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | ||
2730 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | ||
2731 | vm, alloc_name, | ||
2732 | small_vma_start, | ||
2733 | small_vma_limit - small_vma_start, | ||
2734 | SZ_4K, | ||
2735 | GPU_BALLOC_MAX_ORDER, | ||
2736 | GPU_BALLOC_GVA_SPACE); | ||
2737 | if (err) | ||
2738 | goto clean_up_ptes; | ||
2739 | } | ||
2740 | |||
2741 | if (large_vma_start < large_vma_limit) { | ||
2692 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2742 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2693 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2743 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
2694 | /* | ||
2695 | * Big page VMA starts at the end of the small page VMA. | ||
2696 | */ | ||
2697 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2744 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2698 | vm, alloc_name, | 2745 | vm, alloc_name, |
2699 | small_vma_size, | 2746 | large_vma_start, |
2700 | large_vma_size, | 2747 | large_vma_limit - large_vma_start, |
2701 | big_page_size, | 2748 | big_page_size, |
2702 | GPU_BALLOC_MAX_ORDER, | 2749 | GPU_BALLOC_MAX_ORDER, |
2703 | GPU_BALLOC_GVA_SPACE); | 2750 | GPU_BALLOC_GVA_SPACE); |
@@ -2705,6 +2752,21 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2705 | goto clean_up_small_allocator; | 2752 | goto clean_up_small_allocator; |
2706 | } | 2753 | } |
2707 | 2754 | ||
2755 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", | ||
2756 | name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | ||
2757 | /* | ||
2758 | * kernel reserved VMA is at the end of the aperture | ||
2759 | */ | ||
2760 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], | ||
2761 | vm, alloc_name, | ||
2762 | kernel_vma_start, | ||
2763 | kernel_vma_limit - kernel_vma_start, | ||
2764 | SZ_4K, | ||
2765 | GPU_BALLOC_MAX_ORDER, | ||
2766 | GPU_BALLOC_GVA_SPACE); | ||
2767 | if (err) | ||
2768 | goto clean_up_big_allocator; | ||
2769 | |||
2708 | vm->mapped_buffers = RB_ROOT; | 2770 | vm->mapped_buffers = RB_ROOT; |
2709 | 2771 | ||
2710 | mutex_init(&vm->update_gmmu_lock); | 2772 | mutex_init(&vm->update_gmmu_lock); |
@@ -2713,8 +2775,12 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2713 | 2775 | ||
2714 | return 0; | 2776 | return 0; |
2715 | 2777 | ||
2778 | clean_up_big_allocator: | ||
2779 | if (large_vma_start < large_vma_limit) | ||
2780 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
2716 | clean_up_small_allocator: | 2781 | clean_up_small_allocator: |
2717 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 2782 | if (small_vma_start < small_vma_limit) |
2783 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
2718 | clean_up_ptes: | 2784 | clean_up_ptes: |
2719 | free_gmmu_pages(vm, &vm->pdb); | 2785 | free_gmmu_pages(vm, &vm->pdb); |
2720 | clean_up_pdes: | 2786 | clean_up_pdes: |
@@ -2756,7 +2822,9 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) | |||
2756 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 2822 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); |
2757 | 2823 | ||
2758 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | 2824 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, |
2759 | mm->channel.size, !mm->disable_bigpage, name); | 2825 | mm->channel.kernel_size, |
2826 | mm->channel.user_size + mm->channel.kernel_size, | ||
2827 | !mm->disable_bigpage, name); | ||
2760 | 2828 | ||
2761 | return err; | 2829 | return err; |
2762 | } | 2830 | } |
@@ -2886,7 +2954,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2886 | args->pages, args->offset); | 2954 | args->pages, args->offset); |
2887 | 2955 | ||
2888 | /* determine pagesz idx */ | 2956 | /* determine pagesz idx */ |
2889 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? | 2957 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? |
2890 | gmmu_page_size_big : gmmu_page_size_small; | 2958 | gmmu_page_size_big : gmmu_page_size_small; |
2891 | 2959 | ||
2892 | vma = &vm->vma[pgsz_idx]; | 2960 | vma = &vm->vma[pgsz_idx]; |
@@ -3086,9 +3154,11 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | |||
3086 | 3154 | ||
3087 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 3155 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
3088 | { | 3156 | { |
3089 | if (vm->big_pages) | 3157 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); |
3158 | if (vm->vma[gmmu_page_size_big].init) | ||
3090 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 3159 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); |
3091 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 3160 | if (vm->vma[gmmu_page_size_small].init) |
3161 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
3092 | 3162 | ||
3093 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 3163 | gk20a_vm_free_entries(vm, &vm->pdb, 0); |
3094 | } | 3164 | } |
@@ -3127,6 +3197,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
3127 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 3197 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
3128 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 3198 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
3129 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, | 3199 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, |
3200 | mm->bar1.aperture_size - SZ_4K, | ||
3130 | mm->bar1.aperture_size, false, "bar1"); | 3201 | mm->bar1.aperture_size, false, "bar1"); |
3131 | 3202 | ||
3132 | err = gk20a_alloc_inst_block(g, inst_block); | 3203 | err = gk20a_alloc_inst_block(g, inst_block); |
@@ -3154,7 +3225,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
3154 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 3225 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
3155 | 3226 | ||
3156 | gk20a_init_vm(mm, vm, big_page_size, | 3227 | gk20a_init_vm(mm, vm, big_page_size, |
3157 | SZ_4K * 16, GK20A_PMU_VA_SIZE, false, "system"); | 3228 | SZ_4K * 16, GK20A_PMU_VA_SIZE, |
3229 | GK20A_PMU_VA_SIZE * 2, false, | ||
3230 | "system"); | ||
3158 | 3231 | ||
3159 | err = gk20a_alloc_inst_block(g, inst_block); | 3232 | err = gk20a_alloc_inst_block(g, inst_block); |
3160 | if (err) | 3233 | if (err) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 5c6c285a..6786e3c2 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -127,9 +127,10 @@ struct gk20a_buffer_state { | |||
127 | }; | 127 | }; |
128 | 128 | ||
129 | enum gmmu_pgsz_gk20a { | 129 | enum gmmu_pgsz_gk20a { |
130 | gmmu_page_size_small = 0, | 130 | gmmu_page_size_small = 0, |
131 | gmmu_page_size_big = 1, | 131 | gmmu_page_size_big = 1, |
132 | gmmu_nr_page_sizes = 2 | 132 | gmmu_page_size_kernel = 2, |
133 | gmmu_nr_page_sizes = 3, | ||
133 | }; | 134 | }; |
134 | 135 | ||
135 | struct gk20a_comptags { | 136 | struct gk20a_comptags { |
@@ -284,8 +285,10 @@ void gk20a_mm_l2_invalidate(struct gk20a *g); | |||
284 | struct mm_gk20a { | 285 | struct mm_gk20a { |
285 | struct gk20a *g; | 286 | struct gk20a *g; |
286 | 287 | ||
288 | /* GPU VA default sizes address spaces for channels */ | ||
287 | struct { | 289 | struct { |
288 | u64 size; | 290 | u64 user_size; /* userspace-visible GPU VA region */ |
291 | u64 kernel_size; /* kernel-only GPU VA region */ | ||
289 | } channel; | 292 | } channel; |
290 | 293 | ||
291 | struct { | 294 | struct { |
@@ -340,26 +343,15 @@ static inline int bar1_aperture_size_mb_gk20a(void) | |||
340 | { | 343 | { |
341 | return 16; /* 16MB is more than enough atm. */ | 344 | return 16; /* 16MB is more than enough atm. */ |
342 | } | 345 | } |
343 | /* max address bits */ | ||
344 | static inline int max_physaddr_bits_gk20a(void) | ||
345 | { | ||
346 | return 40;/*"old" sys physaddr, meaningful? */ | ||
347 | } | ||
348 | static inline int max_vid_physaddr_bits_gk20a(void) | ||
349 | { | ||
350 | /* "vid phys" is asid/smmu phys?, | ||
351 | * i.e. is this the real sys physaddr? */ | ||
352 | return 37; | ||
353 | } | ||
354 | static inline int max_vaddr_bits_gk20a(void) | ||
355 | { | ||
356 | return 40; /* chopped for area? */ | ||
357 | } | ||
358 | 346 | ||
359 | /* | 347 | /*The maximum GPU VA range supported */ |
360 | * Amount of the GVA space we actually use is smaller than the available space. | 348 | #define NV_GMMU_VA_RANGE 38 |
361 | */ | 349 | |
362 | #define NV_GMMU_VA_RANGE 40 | 350 | /* The default userspace-visible GPU VA size */ |
351 | #define NV_MM_DEFAULT_USER_SIZE (1ULL << 37) | ||
352 | |||
353 | /* The default kernel-reserved GPU VA size */ | ||
354 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) | ||
363 | 355 | ||
364 | /* | 356 | /* |
365 | * The bottom 16GB of the space are used for small pages, the remaining high | 357 | * The bottom 16GB of the space are used for small pages, the remaining high |
@@ -370,12 +362,14 @@ static inline u64 __nv_gmmu_va_small_page_limit(void) | |||
370 | return ((u64)SZ_1G * 16); | 362 | return ((u64)SZ_1G * 16); |
371 | } | 363 | } |
372 | 364 | ||
373 | static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr) | 365 | static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) |
374 | { | 366 | { |
375 | if (!vm->big_pages) | 367 | if (!vm->big_pages) |
376 | return 0; | 368 | return 0; |
377 | 369 | ||
378 | return addr >= __nv_gmmu_va_small_page_limit(); | 370 | return addr >= vm->vma[gmmu_page_size_big].base && |
371 | addr < vm->vma[gmmu_page_size_big].base + | ||
372 | vm->vma[gmmu_page_size_big].length; | ||
379 | } | 373 | } |
380 | 374 | ||
381 | /* | 375 | /* |
@@ -391,7 +385,7 @@ static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, | |||
391 | * As a result, even though the allocator supports mixed address spaces | 385 | * As a result, even though the allocator supports mixed address spaces |
392 | * the address spaces must be treated as separate for now. | 386 | * the address spaces must be treated as separate for now. |
393 | */ | 387 | */ |
394 | if (__nv_gmmu_va_is_upper(vm, base)) | 388 | if (__nv_gmmu_va_is_big_page_region(vm, base)) |
395 | return gmmu_page_size_big; | 389 | return gmmu_page_size_big; |
396 | else | 390 | else |
397 | return gmmu_page_size_small; | 391 | return gmmu_page_size_small; |
@@ -617,6 +611,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
617 | struct vm_gk20a *vm, | 611 | struct vm_gk20a *vm, |
618 | u32 big_page_size, | 612 | u32 big_page_size, |
619 | u64 low_hole, | 613 | u64 low_hole, |
614 | u64 kernel_reserved, | ||
620 | u64 aperture_size, | 615 | u64 aperture_size, |
621 | bool big_pages, | 616 | bool big_pages, |
622 | char *name); | 617 | char *name); |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index c73037b6..2b23c4e6 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -34,9 +34,12 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g) | |||
34 | mm->g = g; | 34 | mm->g = g; |
35 | 35 | ||
36 | /*TBD: make channel vm size configurable */ | 36 | /*TBD: make channel vm size configurable */ |
37 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | 37 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; |
38 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | ||
38 | 39 | ||
39 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | 40 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", |
41 | (int)(mm->channel.user_size >> 20), | ||
42 | (int)(mm->channel.kernel_size >> 20)); | ||
40 | 43 | ||
41 | /* gk20a_init_gpu_characteristics expects this to be populated */ | 44 | /* gk20a_init_gpu_characteristics expects this to be populated */ |
42 | vm->big_page_size = big_page_size; | 45 | vm->big_page_size = big_page_size; |
@@ -276,8 +279,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
276 | vm->big_page_size = big_page_size; | 279 | vm->big_page_size = big_page_size; |
277 | 280 | ||
278 | vm->va_start = big_page_size << 10; /* create a one pde hole */ | 281 | vm->va_start = big_page_size << 10; /* create a one pde hole */ |
279 | vm->va_limit = mm->channel.size; /* note this means channel.size is | 282 | vm->va_limit = mm->channel.user_size; /* note this means channel.size |
280 | really just the max */ | 283 | is really just the max */ |
281 | 284 | ||
282 | msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; | 285 | msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; |
283 | msg.handle = platform->virt_handle; | 286 | msg.handle = platform->virt_handle; |