summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c66
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c11
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c282
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h60
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/allocator.h14
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c115
9 files changed, 318 insertions, 242 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 6f267c85..5042980f 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -55,8 +55,11 @@ static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a)
55 return ba->base + ba->length; 55 return ba->base + ba->length;
56} 56}
57 57
58/*
59 * @page_size is ignored.
60 */
58static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, 61static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a,
59 u64 base, u64 len) 62 u64 base, u64 len, u32 page_size)
60{ 63{
61 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); 64 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
62 u64 blks, offs, ret; 65 u64 blks, offs, ret;
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 39a53801..eee0b634 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -484,8 +484,9 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
484 bud = list_first_entry(balloc_get_order_list(a, order), 484 bud = list_first_entry(balloc_get_order_list(a, order),
485 struct nvgpu_buddy, buddy_entry); 485 struct nvgpu_buddy, buddy_entry);
486 486
487 if (bud->pte_size != BALLOC_PTE_SIZE_ANY && 487 if (pte_size != BALLOC_PTE_SIZE_ANY &&
488 bud->pte_size != pte_size) 488 pte_size != bud->pte_size &&
489 bud->pte_size != BALLOC_PTE_SIZE_ANY)
489 return NULL; 490 return NULL;
490 491
491 return bud; 492 return bud;
@@ -643,7 +644,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a,
643 * necessary for this buddy to exist as well. 644 * necessary for this buddy to exist as well.
644 */ 645 */
645static struct nvgpu_buddy *__balloc_make_fixed_buddy( 646static struct nvgpu_buddy *__balloc_make_fixed_buddy(
646 struct nvgpu_buddy_allocator *a, u64 base, u64 order) 647 struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size)
647{ 648{
648 struct nvgpu_buddy *bud = NULL; 649 struct nvgpu_buddy *bud = NULL;
649 struct list_head *order_list; 650 struct list_head *order_list;
@@ -664,6 +665,20 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
664 order_list = balloc_get_order_list(a, cur_order); 665 order_list = balloc_get_order_list(a, cur_order);
665 list_for_each_entry(bud, order_list, buddy_entry) { 666 list_for_each_entry(bud, order_list, buddy_entry) {
666 if (bud->start == cur_base) { 667 if (bud->start == cur_base) {
668 /*
669 * Make sure page size matches if it's smaller
670 * than a PDE sized buddy.
671 */
672 if (bud->order <= a->pte_blk_order &&
673 bud->pte_size != BALLOC_PTE_SIZE_ANY &&
674 bud->pte_size != pte_size) {
675 /* Welp, that's the end of that. */
676 alloc_dbg(balloc_owner(a),
677 "Fixed buddy PTE "
678 "size mismatch!\n");
679 return NULL;
680 }
681
667 found = 1; 682 found = 1;
668 break; 683 break;
669 } 684 }
@@ -683,7 +698,10 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
683 698
684 /* Split this buddy as necessary until we get the target buddy. */ 699 /* Split this buddy as necessary until we get the target buddy. */
685 while (bud->start != base || bud->order != order) { 700 while (bud->start != base || bud->order != order) {
686 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { 701 if (balloc_split_buddy(a, bud, pte_size)) {
702 alloc_dbg(balloc_owner(a),
703 "split buddy failed? {0x%llx, %llu}\n",
704 bud->start, bud->order);
687 balloc_coalesce(a, bud); 705 balloc_coalesce(a, bud);
688 return NULL; 706 return NULL;
689 } 707 }
@@ -700,7 +718,7 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
700 718
701static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, 719static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
702 struct nvgpu_fixed_alloc *falloc, 720 struct nvgpu_fixed_alloc *falloc,
703 u64 base, u64 len) 721 u64 base, u64 len, int pte_size)
704{ 722{
705 u64 shifted_base, inc_base; 723 u64 shifted_base, inc_base;
706 u64 align_order; 724 u64 align_order;
@@ -731,7 +749,7 @@ static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
731 749
732 bud = __balloc_make_fixed_buddy(a, 750 bud = __balloc_make_fixed_buddy(a,
733 balloc_base_unshift(a, inc_base), 751 balloc_base_unshift(a, inc_base),
734 align_order); 752 align_order, pte_size);
735 if (!bud) { 753 if (!bud) {
736 alloc_dbg(balloc_owner(a), 754 alloc_dbg(balloc_owner(a),
737 "Fixed buddy failed: {0x%llx, %llu}!\n", 755 "Fixed buddy failed: {0x%llx, %llu}!\n",
@@ -817,17 +835,8 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
817 return 0; 835 return 0;
818 } 836 }
819 837
820 /*
821 * For now pass the base address of the allocator's region to
822 * __get_pte_size(). This ensures we get the right page size for
823 * the alloc but we don't have to know what the real address is
824 * going to be quite yet.
825 *
826 * TODO: once userspace supports a unified address space pass 0 for
827 * the base. This will make only 'len' affect the PTE size.
828 */
829 if (a->flags & GPU_ALLOC_GVA_SPACE) 838 if (a->flags & GPU_ALLOC_GVA_SPACE)
830 pte_size = __get_pte_size(a->vm, a->base, len); 839 pte_size = __get_pte_size(a->vm, 0, len);
831 else 840 else
832 pte_size = BALLOC_PTE_SIZE_ANY; 841 pte_size = BALLOC_PTE_SIZE_ANY;
833 842
@@ -858,8 +867,9 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
858 * Requires @__a to be locked. 867 * Requires @__a to be locked.
859 */ 868 */
860static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, 869static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
861 u64 base, u64 len) 870 u64 base, u64 len, u32 page_size)
862{ 871{
872 int pte_size = BALLOC_PTE_SIZE_ANY;
863 u64 ret, real_bytes = 0; 873 u64 ret, real_bytes = 0;
864 struct nvgpu_buddy *bud; 874 struct nvgpu_buddy *bud;
865 struct nvgpu_fixed_alloc *falloc = NULL; 875 struct nvgpu_fixed_alloc *falloc = NULL;
@@ -874,6 +884,16 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
874 if (len == 0) 884 if (len == 0)
875 goto fail; 885 goto fail;
876 886
887 /* Check that the page size is valid. */
888 if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) {
889 if (page_size == a->vm->big_page_size)
890 pte_size = gmmu_page_size_big;
891 else if (page_size == SZ_4K)
892 pte_size = gmmu_page_size_small;
893 else
894 goto fail;
895 }
896
877 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); 897 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
878 if (!falloc) 898 if (!falloc)
879 goto fail; 899 goto fail;
@@ -889,7 +909,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
889 goto fail_unlock; 909 goto fail_unlock;
890 } 910 }
891 911
892 ret = __balloc_do_alloc_fixed(a, falloc, base, len); 912 ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size);
893 if (!ret) { 913 if (!ret) {
894 alloc_dbg(balloc_owner(a), 914 alloc_dbg(balloc_owner(a),
895 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", 915 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
@@ -927,13 +947,13 @@ fail:
927 * Please do not use this function unless _absolutely_ necessary. 947 * Please do not use this function unless _absolutely_ necessary.
928 */ 948 */
929static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, 949static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
930 u64 base, u64 len) 950 u64 base, u64 len, u32 page_size)
931{ 951{
932 u64 alloc; 952 u64 alloc;
933 struct nvgpu_buddy_allocator *a = __a->priv; 953 struct nvgpu_buddy_allocator *a = __a->priv;
934 954
935 alloc_lock(__a); 955 alloc_lock(__a);
936 alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); 956 alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size);
937 a->alloc_made = 1; 957 a->alloc_made = 1;
938 alloc_unlock(__a); 958 alloc_unlock(__a);
939 959
@@ -1034,7 +1054,7 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a,
1034 } 1054 }
1035 1055
1036 /* Should not be possible to fail... */ 1056 /* Should not be possible to fail... */
1037 addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); 1057 addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0);
1038 if (!addr) { 1058 if (!addr) {
1039 err = -ENOMEM; 1059 err = -ENOMEM;
1040 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); 1060 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__);
@@ -1310,6 +1330,10 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1310 alloc_dbg(__a, " base 0x%llx\n", a->base); 1330 alloc_dbg(__a, " base 0x%llx\n", a->base);
1311 alloc_dbg(__a, " size 0x%llx\n", a->length); 1331 alloc_dbg(__a, " size 0x%llx\n", a->length);
1312 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); 1332 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1333 if (flags & GPU_ALLOC_GVA_SPACE)
1334 alloc_dbg(balloc_owner(a),
1335 " pde_size 0x%llx\n",
1336 balloc_order_to_len(a, a->pte_blk_order));
1313 alloc_dbg(__a, " max_order %llu\n", a->max_order); 1337 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1314 alloc_dbg(__a, " flags 0x%llx\n", a->flags); 1338 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1315 1339
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index ebd779c0..cf8c4569 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -77,10 +77,11 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr)
77 a->ops->free(a, addr); 77 a->ops->free(a, addr);
78} 78}
79 79
80u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) 80u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len,
81 u32 page_size)
81{ 82{
82 if (a->ops->alloc_fixed) 83 if (a->ops->alloc_fixed)
83 return a->ops->alloc_fixed(a, base, len); 84 return a->ops->alloc_fixed(a, base, len, page_size);
84 85
85 return 0; 86 return 0;
86} 87}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index c61b2238..96f8f242 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -648,7 +648,7 @@ done:
648} 648}
649 649
650static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( 650static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
651 struct nvgpu_page_allocator *a, u64 base, u64 length) 651 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
652{ 652{
653 struct nvgpu_page_alloc *alloc; 653 struct nvgpu_page_alloc *alloc;
654 struct page_alloc_chunk *c; 654 struct page_alloc_chunk *c;
@@ -658,7 +658,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
658 if (!alloc || !c) 658 if (!alloc || !c)
659 goto fail; 659 goto fail;
660 660
661 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); 661 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
662 if (!alloc->base) { 662 if (!alloc->base) {
663 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); 663 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
664 goto fail; 664 goto fail;
@@ -680,8 +680,11 @@ fail:
680 return ERR_PTR(-ENOMEM); 680 return ERR_PTR(-ENOMEM);
681} 681}
682 682
683/*
684 * @page_size is ignored.
685 */
683static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, 686static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
684 u64 base, u64 len) 687 u64 base, u64 len, u32 page_size)
685{ 688{
686 struct nvgpu_page_allocator *a = page_allocator(__a); 689 struct nvgpu_page_allocator *a = page_allocator(__a);
687 struct nvgpu_page_alloc *alloc = NULL; 690 struct nvgpu_page_alloc *alloc = NULL;
@@ -694,7 +697,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
694 697
695 alloc_lock(__a); 698 alloc_lock(__a);
696 699
697 alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); 700 alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0);
698 if (IS_ERR(alloc)) { 701 if (IS_ERR(alloc)) {
699 alloc_unlock(__a); 702 alloc_unlock(__a);
700 return 0; 703 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 07601d42..adf0297b 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -281,7 +281,7 @@ static int gk20a_as_ioctl_get_va_regions(
281 struct nvgpu_as_va_region region; 281 struct nvgpu_as_va_region region;
282 struct nvgpu_allocator *vma = 282 struct nvgpu_allocator *vma =
283 nvgpu_alloc_initialized(&vm->fixed) ? 283 nvgpu_alloc_initialized(&vm->fixed) ?
284 &vm->fixed : &vm->vma[i]; 284 &vm->fixed : vm->vma[i];
285 285
286 memset(&region, 0, sizeof(struct nvgpu_as_va_region)); 286 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
287 287
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cdbaef79..83bbcb54 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
1001 mutex_init(&mm->l2_op_lock); 1001 mutex_init(&mm->l2_op_lock);
1002 1002
1003 /*TBD: make channel vm size configurable */ 1003 /*TBD: make channel vm size configurable */
1004 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; 1004 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
1005 NV_MM_DEFAULT_KERNEL_SIZE;
1005 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; 1006 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
1006 1007
1007 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", 1008 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
@@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1626 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 1627 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
1627 1628
1628{ 1629{
1629 struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 1630 struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx];
1630 u64 offset; 1631 u64 offset;
1631 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 1632 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
1632 1633
@@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1663 u64 offset, u64 size, 1664 u64 offset, u64 size,
1664 enum gmmu_pgsz_gk20a pgsz_idx) 1665 enum gmmu_pgsz_gk20a pgsz_idx)
1665{ 1666{
1666 struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; 1667 struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
1667 1668
1668 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 1669 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
1669 vma->name, offset, size); 1670 vma->name, offset, size);
@@ -1790,13 +1791,7 @@ struct buffer_attrs {
1790static void gmmu_select_page_size(struct vm_gk20a *vm, 1791static void gmmu_select_page_size(struct vm_gk20a *vm,
1791 struct buffer_attrs *bfr) 1792 struct buffer_attrs *bfr)
1792{ 1793{
1793 int i; 1794 bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size);
1794 /* choose the biggest first (top->bottom) */
1795 for (i = gmmu_page_size_kernel - 1; i >= 0; i--)
1796 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) {
1797 bfr->pgsz_idx = i;
1798 break;
1799 }
1800} 1795}
1801 1796
1802static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, 1797static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
@@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2497 * the alignment determined by gmmu_select_page_size(). 2492 * the alignment determined by gmmu_select_page_size().
2498 */ 2493 */
2499 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 2494 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2500 int pgsz_idx = 2495 int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size);
2501 __nv_gmmu_va_is_big_page_region(vm, offset_align) ?
2502 gmmu_page_size_big : gmmu_page_size_small;
2503 if (pgsz_idx > bfr.pgsz_idx) { 2496 if (pgsz_idx > bfr.pgsz_idx) {
2504 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", 2497 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
2505 offset_align, bfr.pgsz_idx, pgsz_idx); 2498 offset_align, bfr.pgsz_idx, pgsz_idx);
@@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
3149 u64 addr = 0; 3142 u64 addr = 0;
3150 3143
3151 if (at) 3144 if (at)
3152 addr = nvgpu_alloc_fixed(allocator, at, size); 3145 addr = nvgpu_alloc_fixed(allocator, at, size, 0);
3153 else 3146 else
3154 addr = nvgpu_alloc(allocator, size); 3147 addr = nvgpu_alloc(allocator, size);
3155 3148
@@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4260 * 4253 *
4261 * !!! TODO: cleanup. 4254 * !!! TODO: cleanup.
4262 */ 4255 */
4263 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], 4256 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
4264 vm->va_limit - 4257 vm->va_limit -
4265 mm->channel.kernel_size, 4258 mm->channel.kernel_size,
4266 512 * PAGE_SIZE); 4259 512 * PAGE_SIZE,
4260 SZ_4K);
4267 if (!sema_sea->gpu_va) { 4261 if (!sema_sea->gpu_va) {
4268 nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); 4262 nvgpu_free(&vm->kernel, sema_sea->gpu_va);
4269 gk20a_vm_put(vm); 4263 gk20a_vm_put(vm);
4270 return -ENOMEM; 4264 return -ENOMEM;
4271 } 4265 }
@@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4273 err = gk20a_semaphore_pool_map(vm->sema_pool, vm); 4267 err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
4274 if (err) { 4268 if (err) {
4275 gk20a_semaphore_pool_unmap(vm->sema_pool, vm); 4269 gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
4276 nvgpu_free(&vm->vma[gmmu_page_size_small], 4270 nvgpu_free(vm->vma[gmmu_page_size_small],
4277 vm->sema_pool->gpu_va); 4271 vm->sema_pool->gpu_va);
4278 gk20a_vm_put(vm); 4272 gk20a_vm_put(vm);
4279 } 4273 }
4280 4274
4281 return 0; 4275 return 0;
4282} 4276}
4283 4277
4278/*
4279 * Determine if the passed address space can support big pages or not.
4280 */
4281int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
4282{
4283 u64 mask = ((u64)vm->big_page_size << 10) - 1;
4284
4285 if (base & mask || size & mask)
4286 return 0;
4287 return 1;
4288}
4289
4290/*
4291 * Attempt to find a reserved memory area to determine PTE size for the passed
4292 * mapping. If no reserved area can be found use small pages but drop a warning.
4293 */
4294enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
4295 u64 base, u64 size)
4296{
4297 struct vm_reserved_va_node *node;
4298
4299 node = addr_to_reservation(vm, base);
4300 if (!node)
4301 return gmmu_page_size_small;
4302
4303 return node->pgsz_idx;
4304}
4305
4306/**
4307 * gk20a_init_vm() - Initialize an address space.
4308 *
4309 * @mm - Parent MM.
4310 * @vm - The VM to init.
4311 * @big_page_size - Size of big pages associated with this VM.
4312 * @low_hole - The size of the low hole (unaddressable memory at the bottom of
4313 * the address space.
4314 * @kernel_reserved - Space reserved for kernel only allocations.
4315 * @aperture_size - Total size of the aperture.
4316 * @big_pages - Ignored. Will be set based on other passed params.
4317 * @name - Name of the address space.
4318 *
4319 * This function initializes an address space according to the following map:
4320 *
4321 * +--+ 0x0
4322 * | |
4323 * +--+ @low_hole
4324 * | |
4325 * ~ ~ This is the "user" section.
4326 * | |
4327 * +--+ @aperture_size - @kernel_reserved
4328 * | |
4329 * ~ ~ This is the "kernel" section.
4330 * | |
4331 * +--+ @aperture_size
4332 *
4333 * The user section is therefor what ever is left over after the @low_hole and
4334 * @kernel_reserved memory have been portioned out. The @kernel_reserved is
4335 * always persent at the top of the memory space and the @low_hole is always at
4336 * the bottom.
4337 *
4338 * For certain address spaces a "user" section makes no sense (bar1, etc) so in
4339 * such cases the @kernel_reserved and @low_hole should sum to exactly
4340 * @aperture_size.
4341 */
4284int gk20a_init_vm(struct mm_gk20a *mm, 4342int gk20a_init_vm(struct mm_gk20a *mm,
4285 struct vm_gk20a *vm, 4343 struct vm_gk20a *vm,
4286 u32 big_page_size, 4344 u32 big_page_size,
@@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4293{ 4351{
4294 int err, i; 4352 int err, i;
4295 char alloc_name[32]; 4353 char alloc_name[32];
4296 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 4354 u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
4297 kernel_vma_start, kernel_vma_limit;
4298 u32 pde_lo, pde_hi; 4355 u32 pde_lo, pde_hi;
4299 struct gk20a *g = mm->g; 4356 struct gk20a *g = mm->g;
4300 4357
4301 /* note: this must match gmmu_pgsz_gk20a enum */ 4358 /* note: this must match gmmu_pgsz_gk20a enum */
4302 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; 4359 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
4303 4360
4304 WARN_ON(kernel_reserved + low_hole > aperture_size); 4361 if (WARN_ON(kernel_reserved + low_hole > aperture_size))
4305 if (kernel_reserved > aperture_size)
4306 return -ENOMEM; 4362 return -ENOMEM;
4307 4363
4308 vm->mm = mm; 4364 vm->mm = mm;
4309 4365
4366 /* Set up vma pointers. */
4367 vm->vma[0] = &vm->user;
4368 vm->vma[1] = &vm->user;
4369 vm->vma[2] = &vm->kernel;
4370
4310 vm->va_start = low_hole; 4371 vm->va_start = low_hole;
4311 vm->va_limit = aperture_size; 4372 vm->va_limit = aperture_size;
4312 vm->big_pages = big_pages; 4373 vm->big_pages = big_pages;
@@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4321 4382
4322 gk20a_dbg_info("small page-size (%dKB)", 4383 gk20a_dbg_info("small page-size (%dKB)",
4323 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 4384 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4324 4385 gk20a_dbg_info("big page-size (%dKB) (%s)\n",
4325 gk20a_dbg_info("big page-size (%dKB)", 4386 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name);
4326 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
4327
4328 gk20a_dbg_info("kernel page-size (%dKB)", 4387 gk20a_dbg_info("kernel page-size (%dKB)",
4329 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 4388 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
4330 4389
@@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4348 goto clean_up_pdes; 4407 goto clean_up_pdes;
4349 4408
4350 /* setup vma limits */ 4409 /* setup vma limits */
4351 small_vma_start = low_hole; 4410 user_vma_start = low_hole;
4352 4411 user_vma_limit = vm->va_limit - kernel_reserved;
4353 if (big_pages) {
4354 /* First 16GB of the address space goes towards small
4355 * pages. What ever remains is allocated to large
4356 * pages. */
4357 small_vma_limit = __nv_gmmu_va_small_page_limit();
4358 large_vma_start = small_vma_limit;
4359 large_vma_limit = vm->va_limit - kernel_reserved;
4360 } else {
4361 small_vma_limit = vm->va_limit - kernel_reserved;
4362 large_vma_start = 0;
4363 large_vma_limit = 0;
4364 }
4365 4412
4366 kernel_vma_start = vm->va_limit - kernel_reserved; 4413 kernel_vma_start = vm->va_limit - kernel_reserved;
4367 kernel_vma_limit = vm->va_limit; 4414 kernel_vma_limit = vm->va_limit;
4368 4415
4369 gk20a_dbg_info( 4416 gk20a_dbg_info(
4370 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", 4417 "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
4371 small_vma_start, small_vma_limit, 4418 user_vma_start, user_vma_limit,
4372 large_vma_start, large_vma_limit,
4373 kernel_vma_start, kernel_vma_limit); 4419 kernel_vma_start, kernel_vma_limit);
4374 4420
4375 /* check that starts do not exceed limits */ 4421 WARN_ON(user_vma_start > user_vma_limit);
4376 WARN_ON(small_vma_start > small_vma_limit);
4377 WARN_ON(large_vma_start > large_vma_limit);
4378 /* kernel_vma must also be non-zero */
4379 WARN_ON(kernel_vma_start >= kernel_vma_limit); 4422 WARN_ON(kernel_vma_start >= kernel_vma_limit);
4380 4423
4381 if (small_vma_start > small_vma_limit || 4424 /*
4382 large_vma_start > large_vma_limit || 4425 * A "user" area only makes sense for the GVA spaces. For VMs where
4426 * there is no "user" area user_vma_start will be equal to
4427 * user_vma_limit (i.e a 0 sized space). In such a situation the kernel
4428 * area must be non-zero in length.
4429 */
4430 if (user_vma_start > user_vma_limit ||
4383 kernel_vma_start >= kernel_vma_limit) { 4431 kernel_vma_start >= kernel_vma_limit) {
4384 err = -EINVAL; 4432 err = -EINVAL;
4385 goto clean_up_pdes; 4433 goto clean_up_pdes;
@@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4389 * Attempt to make a separate VM for fixed allocations. 4437 * Attempt to make a separate VM for fixed allocations.
4390 */ 4438 */
4391 if (g->separate_fixed_allocs && 4439 if (g->separate_fixed_allocs &&
4392 small_vma_start < small_vma_limit) { 4440 user_vma_start < user_vma_limit) {
4393 if (g->separate_fixed_allocs >= small_vma_limit) 4441 if (g->separate_fixed_allocs >= user_vma_limit)
4394 goto clean_up_pdes; 4442 goto clean_up_pdes;
4395 4443
4396 snprintf(alloc_name, sizeof(alloc_name), 4444 snprintf(alloc_name, sizeof(alloc_name),
@@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4398 4446
4399 err = __nvgpu_buddy_allocator_init(g, &vm->fixed, 4447 err = __nvgpu_buddy_allocator_init(g, &vm->fixed,
4400 vm, alloc_name, 4448 vm, alloc_name,
4401 small_vma_start, 4449 user_vma_start,
4402 g->separate_fixed_allocs, 4450 g->separate_fixed_allocs,
4403 SZ_4K, 4451 SZ_4K,
4404 GPU_BALLOC_MAX_ORDER, 4452 GPU_BALLOC_MAX_ORDER,
@@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4407 goto clean_up_ptes; 4455 goto clean_up_ptes;
4408 4456
4409 /* Make sure to update the user vma size. */ 4457 /* Make sure to update the user vma size. */
4410 small_vma_start = g->separate_fixed_allocs; 4458 user_vma_start = g->separate_fixed_allocs;
4411 }
4412
4413 if (small_vma_start < small_vma_limit) {
4414 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
4415 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4416 err = __nvgpu_buddy_allocator_init(
4417 g,
4418 &vm->vma[gmmu_page_size_small],
4419 vm, alloc_name,
4420 small_vma_start,
4421 small_vma_limit - small_vma_start,
4422 SZ_4K,
4423 GPU_BALLOC_MAX_ORDER,
4424 GPU_ALLOC_GVA_SPACE);
4425 if (err)
4426 goto clean_up_ptes;
4427 } 4459 }
4428 4460
4429 if (large_vma_start < large_vma_limit) { 4461 if (user_vma_start < user_vma_limit) {
4430 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 4462 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
4431 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 4463 if (!gk20a_big_pages_possible(vm, user_vma_start,
4432 err = __nvgpu_buddy_allocator_init( 4464 user_vma_limit - user_vma_start))
4433 g, 4465 vm->big_pages = false;
4434 &vm->vma[gmmu_page_size_big], 4466
4435 vm, alloc_name, 4467 err = __nvgpu_buddy_allocator_init(g, &vm->user,
4436 large_vma_start, 4468 vm, alloc_name,
4437 large_vma_limit - large_vma_start, 4469 user_vma_start,
4438 big_page_size, 4470 user_vma_limit -
4439 GPU_BALLOC_MAX_ORDER, 4471 user_vma_start,
4440 GPU_ALLOC_GVA_SPACE); 4472 SZ_4K,
4473 GPU_BALLOC_MAX_ORDER,
4474 GPU_ALLOC_GVA_SPACE);
4441 if (err) 4475 if (err)
4442 goto clean_up_small_allocator; 4476 goto clean_up_ptes;
4477 } else {
4478 /*
4479 * Make these allocator pointers point to the kernel allocator
4480 * since we still use the legacy notion of page size to choose
4481 * the allocator.
4482 */
4483 vm->vma[0] = &vm->kernel;
4484 vm->vma[1] = &vm->kernel;
4443 } 4485 }
4444 4486
4445 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", 4487 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
4446 name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 4488 if (!gk20a_big_pages_possible(vm, kernel_vma_start,
4447 /* 4489 kernel_vma_limit - kernel_vma_start))
4448 * kernel reserved VMA is at the end of the aperture 4490 vm->big_pages = false;
4449 */ 4491
4450 err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], 4492 err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
4451 vm, alloc_name, 4493 vm, alloc_name,
4452 kernel_vma_start, 4494 kernel_vma_start,
4453 kernel_vma_limit - kernel_vma_start, 4495 kernel_vma_limit - kernel_vma_start,
@@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4455 GPU_BALLOC_MAX_ORDER, 4497 GPU_BALLOC_MAX_ORDER,
4456 GPU_ALLOC_GVA_SPACE); 4498 GPU_ALLOC_GVA_SPACE);
4457 if (err) 4499 if (err)
4458 goto clean_up_big_allocator; 4500 goto clean_up_user_allocator;
4459 4501
4460 vm->mapped_buffers = RB_ROOT; 4502 vm->mapped_buffers = RB_ROOT;
4461 4503
@@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4471 if (vm->va_limit > SZ_4G) { 4513 if (vm->va_limit > SZ_4G) {
4472 err = gk20a_init_sema_pool(vm); 4514 err = gk20a_init_sema_pool(vm);
4473 if (err) 4515 if (err)
4474 goto clean_up_big_allocator; 4516 goto clean_up_user_allocator;
4475 } 4517 }
4476 4518
4477 return 0; 4519 return 0;
4478 4520
4479clean_up_big_allocator: 4521clean_up_user_allocator:
4480 if (large_vma_start < large_vma_limit) 4522 if (user_vma_start < user_vma_limit)
4481 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4523 nvgpu_alloc_destroy(&vm->user);
4482clean_up_small_allocator:
4483 if (small_vma_start < small_vma_limit)
4484 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4485clean_up_ptes: 4524clean_up_ptes:
4486 free_gmmu_pages(vm, &vm->pdb); 4525 free_gmmu_pages(vm, &vm->pdb);
4487clean_up_pdes: 4526clean_up_pdes:
@@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
4523 vm->as_share = as_share; 4562 vm->as_share = as_share;
4524 vm->enable_ctag = true; 4563 vm->enable_ctag = true;
4525 4564
4526 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 4565 snprintf(name, sizeof(name), "as_%d", as_share->id);
4527 4566
4528 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 4567 err = gk20a_init_vm(mm, vm, big_page_size,
4568 big_page_size << 10,
4529 mm->channel.kernel_size, 4569 mm->channel.kernel_size,
4530 mm->channel.user_size + mm->channel.kernel_size, 4570 mm->channel.user_size + mm->channel.kernel_size,
4531 !mm->disable_bigpage, userspace_managed, name); 4571 !mm->disable_bigpage, userspace_managed, name);
@@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
4586 goto clean_up; 4626 goto clean_up;
4587 } 4627 }
4588 4628
4589 vma = &vm->vma[pgsz_idx]; 4629 vma = vm->vma[pgsz_idx];
4590 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { 4630 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
4591 if (nvgpu_alloc_initialized(&vm->fixed)) 4631 if (nvgpu_alloc_initialized(&vm->fixed))
4592 vma = &vm->fixed; 4632 vma = &vm->fixed;
4593 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, 4633 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset,
4594 (u64)args->pages * 4634 (u64)args->pages *
4595 (u64)args->page_size); 4635 (u64)args->page_size,
4636 args->page_size);
4596 } else { 4637 } else {
4597 vaddr_start = nvgpu_alloc(vma, 4638 vaddr_start = nvgpu_alloc(vma,
4598 (u64)args->pages * 4639 (u64)args->pages *
@@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
4662 args->pages, args->offset); 4703 args->pages, args->offset);
4663 4704
4664 /* determine pagesz idx */ 4705 /* determine pagesz idx */
4665 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 4706 pgsz_idx = __get_pte_size(vm, args->offset,
4666 gmmu_page_size_big : gmmu_page_size_small; 4707 args->page_size * args->pages);
4667 4708
4668 if (nvgpu_alloc_initialized(&vm->fixed)) 4709 if (nvgpu_alloc_initialized(&vm->fixed))
4669 vma = &vm->fixed; 4710 vma = &vm->fixed;
4670 else 4711 else
4671 vma = &vm->vma[pgsz_idx]; 4712 vma = vm->vma[pgsz_idx];
4672 nvgpu_free(vma, args->offset); 4713 nvgpu_free(vma, args->offset);
4673 4714
4674 mutex_lock(&vm->update_gmmu_lock); 4715 mutex_lock(&vm->update_gmmu_lock);
@@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
4853 4894
4854void gk20a_deinit_vm(struct vm_gk20a *vm) 4895void gk20a_deinit_vm(struct vm_gk20a *vm)
4855{ 4896{
4856 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 4897 if (nvgpu_alloc_initialized(&vm->kernel))
4857 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) 4898 nvgpu_alloc_destroy(&vm->kernel);
4858 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4899 if (nvgpu_alloc_initialized(&vm->user))
4859 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) 4900 nvgpu_alloc_destroy(&vm->user);
4860 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4861 if (nvgpu_alloc_initialized(&vm->fixed)) 4901 if (nvgpu_alloc_initialized(&vm->fixed))
4862 nvgpu_alloc_destroy(&vm->fixed); 4902 nvgpu_alloc_destroy(&vm->fixed);
4863 4903
@@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
4908 4948
4909 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 4949 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
4910 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 4950 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
4911 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 4951 gk20a_init_vm(mm, vm,
4912 mm->bar1.aperture_size - SZ_4K, 4952 big_page_size,
4913 mm->bar1.aperture_size, false, false, "bar1"); 4953 SZ_4K, /* Low hole */
4954 mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */
4955 mm->bar1.aperture_size,
4956 true, false,
4957 "bar1");
4914 4958
4915 err = gk20a_alloc_inst_block(g, inst_block); 4959 err = gk20a_alloc_inst_block(g, inst_block);
4916 if (err) 4960 if (err)
@@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
4932 struct gk20a *g = gk20a_from_mm(mm); 4976 struct gk20a *g = gk20a_from_mm(mm);
4933 struct mem_desc *inst_block = &mm->pmu.inst_block; 4977 struct mem_desc *inst_block = &mm->pmu.inst_block;
4934 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 4978 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
4979 u32 low_hole, aperture_size;
4980
4981 /*
4982 * No user region - so we will pass that as zero sized.
4983 */
4984 low_hole = SZ_4K * 16;
4985 aperture_size = GK20A_PMU_VA_SIZE * 2;
4935 4986
4936 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 4987 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
4937 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 4988 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
4938 4989
4939 gk20a_init_vm(mm, vm, big_page_size, 4990 gk20a_init_vm(mm, vm, big_page_size,
4940 SZ_4K * 16, GK20A_PMU_VA_SIZE, 4991 low_hole,
4941 GK20A_PMU_VA_SIZE * 2, false, false, 4992 aperture_size - low_hole,
4993 aperture_size,
4994 true,
4995 false,
4942 "system"); 4996 "system");
4943 4997
4944 err = gk20a_alloc_inst_block(g, inst_block); 4998 err = gk20a_alloc_inst_block(g, inst_block);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 5ef8ae25..394d1d25 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -270,11 +270,13 @@ struct vm_gk20a {
270 270
271 struct gk20a_mm_entry pdb; 271 struct gk20a_mm_entry pdb;
272 272
273 struct nvgpu_allocator vma[gmmu_nr_page_sizes];
274
275 /* If necessary, split fixed from non-fixed. */ 273 /* If necessary, split fixed from non-fixed. */
276 struct nvgpu_allocator fixed; 274 struct nvgpu_allocator fixed;
277 275
276 struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
277 struct nvgpu_allocator kernel;
278 struct nvgpu_allocator user;
279
278 struct rb_root mapped_buffers; 280 struct rb_root mapped_buffers;
279 281
280 struct list_head reserved_va_list; 282 struct list_head reserved_va_list;
@@ -425,7 +427,7 @@ static inline int bar1_aperture_size_mb_gk20a(void)
425 return 16; /* 16MB is more than enough atm. */ 427 return 16; /* 16MB is more than enough atm. */
426} 428}
427 429
428/*The maximum GPU VA range supported */ 430/* The maximum GPU VA range supported */
429#define NV_GMMU_VA_RANGE 38 431#define NV_GMMU_VA_RANGE 38
430 432
431/* The default userspace-visible GPU VA size */ 433/* The default userspace-visible GPU VA size */
@@ -434,43 +436,39 @@ static inline int bar1_aperture_size_mb_gk20a(void)
434/* The default kernel-reserved GPU VA size */ 436/* The default kernel-reserved GPU VA size */
435#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) 437#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
436 438
437/* 439enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
438 * The bottom 16GB of the space are used for small pages, the remaining high 440 u64 base, u64 size);
439 * memory is for large pages.
440 */
441static inline u64 __nv_gmmu_va_small_page_limit(void)
442{
443 return ((u64)SZ_1G * 16);
444}
445
446static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr)
447{
448 struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big];
449
450 if (!vm->big_pages)
451 return 0;
452
453 return addr >= nvgpu_alloc_base(a) &&
454 addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a);
455}
456 441
457/* 442/*
458 * This determines the PTE size for a given alloc. Used by both the GVA space 443 * This determines the PTE size for a given alloc. Used by both the GVA space
459 * allocator and the mm core code so that agreement can be reached on how to 444 * allocator and the mm core code so that agreement can be reached on how to
460 * map allocations. 445 * map allocations.
446 *
447 * The page size of a buffer is this:
448 *
449 * o If the VM doesn't support large pages then obviously small pages
450 * must be used.
451 * o If the base address is non-zero (fixed address map):
452 * - Attempt to find a reserved memory area and use the page size
453 * based on that.
454 * - If no reserved page size is available, default to small pages.
455 * o If the base is zero:
456 * - If the size is greater than or equal to the big page size, use big
457 * pages.
458 * - Otherwise use small pages.
461 */ 459 */
462static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, 460static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
463 u64 base, u64 size) 461 u64 base, u64 size)
464{ 462{
465 /* 463 if (!vm->big_pages)
466 * Currently userspace is not ready for a true unified address space.
467 * As a result, even though the allocator supports mixed address spaces
468 * the address spaces must be treated as separate for now.
469 */
470 if (__nv_gmmu_va_is_big_page_region(vm, base))
471 return gmmu_page_size_big;
472 else
473 return gmmu_page_size_small; 464 return gmmu_page_size_small;
465
466 if (base)
467 return __get_pte_size_fixed_map(vm, base, size);
468
469 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
470 return gmmu_page_size_big;
471 return gmmu_page_size_small;
474} 472}
475 473
476/* 474/*
@@ -797,6 +795,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
797 795
798void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); 796void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
799 797
798int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
799
800extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; 800extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
801extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; 801extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
802 802
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index dee9b562..d5a90c87 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -41,11 +41,15 @@ struct nvgpu_allocator_ops {
41 * regular and fixed allocations then free_fixed() does not need to 41 * regular and fixed allocations then free_fixed() does not need to
42 * be implemented. This behavior exists for legacy reasons and should 42 * be implemented. This behavior exists for legacy reasons and should
43 * not be propagated to new allocators. 43 * not be propagated to new allocators.
44 *
45 * For allocators where the @page_size field is not applicable it can
46 * be left as 0. Otherwise a valid page size should be passed (4k or
47 * what the large page size is).
44 */ 48 */
45 u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, 49 u64 (*alloc_fixed)(struct nvgpu_allocator *allocator,
46 u64 base, u64 len); 50 u64 base, u64 len, u32 page_size);
47 void (*free_fixed)(struct nvgpu_allocator *allocator, 51 void (*free_fixed)(struct nvgpu_allocator *allocator,
48 u64 base, u64 len); 52 u64 base, u64 len);
49 53
50 /* 54 /*
51 * Allow allocators to reserve space for carveouts. 55 * Allow allocators to reserve space for carveouts.
@@ -213,7 +217,8 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *a,
213u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); 217u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len);
214void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); 218void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr);
215 219
216u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); 220u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len,
221 u32 page_size);
217void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); 222void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len);
218 223
219int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, 224int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
@@ -298,5 +303,8 @@ void nvgpu_alloc_debugfs_init(struct device *dev);
298 } while (0) 303 } while (0)
299 304
300#endif 305#endif
306#define balloc_pr(alloctor, format, arg...) \
307 pr_info("%-25s %25s() " format, \
308 alloctor->name, __func__, ##arg)
301 309
302#endif /* NVGPU_ALLOCATOR_H */ 310#endif /* NVGPU_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 66c9344b..a21a020d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
227 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 227 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
228 WARN_ON(err || msg.ret); 228 WARN_ON(err || msg.ret);
229 229
230 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 230 if (nvgpu_alloc_initialized(&vm->kernel))
231 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) 231 nvgpu_alloc_destroy(&vm->kernel);
232 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); 232 if (nvgpu_alloc_initialized(&vm->user))
233 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) 233 nvgpu_alloc_destroy(&vm->user);
234 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 234 if (nvgpu_alloc_initialized(&vm->fixed))
235 nvgpu_alloc_destroy(&vm->fixed);
235 236
236 mutex_unlock(&vm->update_gmmu_lock); 237 mutex_unlock(&vm->update_gmmu_lock);
237 238
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
273 struct tegra_vgpu_as_share_params *p = &msg.params.as_share; 274 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
274 struct mm_gk20a *mm = &g->mm; 275 struct mm_gk20a *mm = &g->mm;
275 struct vm_gk20a *vm; 276 struct vm_gk20a *vm;
276 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 277 u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
277 kernel_vma_start, kernel_vma_limit;
278 char name[32]; 278 char name[32];
279 int err, i; 279 int err, i;
280 const bool userspace_managed = 280 const bool userspace_managed =
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
306 vm->mm = mm; 306 vm->mm = mm;
307 vm->as_share = as_share; 307 vm->as_share = as_share;
308 308
309 /* Set up vma pointers. */
310 vm->vma[0] = &vm->user;
311 vm->vma[1] = &vm->user;
312 vm->vma[2] = &vm->kernel;
313
309 for (i = 0; i < gmmu_nr_page_sizes; i++) 314 for (i = 0; i < gmmu_nr_page_sizes; i++)
310 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; 315 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];
311 316
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
328 vm->handle = p->handle; 333 vm->handle = p->handle;
329 334
330 /* setup vma limits */ 335 /* setup vma limits */
331 small_vma_start = vm->va_start; 336 user_vma_start = vm->va_start;
332 337 user_vma_limit = vm->va_limit - mm->channel.kernel_size;
333 if (vm->big_pages) {
334 /* First 16GB of the address space goes towards small
335 * pages. The kernel reserved pages are at the end.
336 * What ever remains is allocated to large pages.
337 */
338 small_vma_limit = __nv_gmmu_va_small_page_limit();
339 large_vma_start = small_vma_limit;
340 large_vma_limit = vm->va_limit - mm->channel.kernel_size;
341 } else {
342 small_vma_limit = vm->va_limit - mm->channel.kernel_size;
343 large_vma_start = 0;
344 large_vma_limit = 0;
345 }
346 338
347 kernel_vma_start = vm->va_limit - mm->channel.kernel_size; 339 kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
348 kernel_vma_limit = vm->va_limit; 340 kernel_vma_limit = vm->va_limit;
349 341
350 gk20a_dbg_info( 342 gk20a_dbg_info(
351 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", 343 "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
352 small_vma_start, small_vma_limit, 344 user_vma_start, user_vma_limit,
353 large_vma_start, large_vma_limit,
354 kernel_vma_start, kernel_vma_limit); 345 kernel_vma_start, kernel_vma_limit);
355 346
356 /* check that starts do not exceed limits */ 347 WARN_ON(user_vma_start > user_vma_limit);
357 WARN_ON(small_vma_start > small_vma_limit);
358 WARN_ON(large_vma_start > large_vma_limit);
359 /* kernel_vma must also be non-zero */
360 WARN_ON(kernel_vma_start >= kernel_vma_limit); 348 WARN_ON(kernel_vma_start >= kernel_vma_limit);
361 349
362 if (small_vma_start > small_vma_limit || 350 if (user_vma_start > user_vma_limit ||
363 large_vma_start > large_vma_limit ||
364 kernel_vma_start >= kernel_vma_limit) { 351 kernel_vma_start >= kernel_vma_limit) {
365 err = -EINVAL; 352 err = -EINVAL;
366 goto clean_up_share; 353 goto clean_up_share;
367 } 354 }
368 355
369 if (small_vma_start < small_vma_limit) { 356 if (user_vma_start < user_vma_limit) {
370 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 357 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
371 gmmu_page_sizes[gmmu_page_size_small] >> 10); 358 gmmu_page_sizes[gmmu_page_size_small] >> 10);
359 if (!gk20a_big_pages_possible(vm, user_vma_start,
360 user_vma_limit - user_vma_start))
361 vm->big_pages = false;
372 362
373 err = __nvgpu_buddy_allocator_init( 363 err = __nvgpu_buddy_allocator_init(
374 g, 364 g,
375 &vm->vma[gmmu_page_size_small], 365 vm->vma[gmmu_page_size_small],
376 vm, name, 366 vm, name,
377 small_vma_start, 367 user_vma_start,
378 small_vma_limit - small_vma_start, 368 user_vma_limit - user_vma_start,
379 SZ_4K, 369 SZ_4K,
380 GPU_BALLOC_MAX_ORDER, 370 GPU_BALLOC_MAX_ORDER,
381 GPU_ALLOC_GVA_SPACE); 371 GPU_ALLOC_GVA_SPACE);
382 if (err) 372 if (err)
383 goto clean_up_share; 373 goto clean_up_share;
384 } 374 } else {
385 375 /*
386 if (large_vma_start < large_vma_limit) { 376 * Make these allocator pointers point to the kernel allocator
387 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 377 * since we still use the legacy notion of page size to choose
388 gmmu_page_sizes[gmmu_page_size_big] >> 10); 378 * the allocator.
389 err = __nvgpu_buddy_allocator_init( 379 */
390 g, 380 vm->vma[0] = &vm->kernel;
391 &vm->vma[gmmu_page_size_big], 381 vm->vma[1] = &vm->kernel;
392 vm, name,
393 large_vma_start,
394 large_vma_limit - large_vma_start,
395 big_page_size,
396 GPU_BALLOC_MAX_ORDER,
397 GPU_ALLOC_GVA_SPACE);
398 if (err)
399 goto clean_up_small_allocator;
400 } 382 }
401 383
402 snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", 384 snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
403 gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 385 gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
386 if (!gk20a_big_pages_possible(vm, kernel_vma_start,
387 kernel_vma_limit - kernel_vma_start))
388 vm->big_pages = false;
389
404 /* 390 /*
405 * kernel reserved VMA is at the end of the aperture 391 * kernel reserved VMA is at the end of the aperture
406 */ 392 */
407 err = __nvgpu_buddy_allocator_init( 393 err = __nvgpu_buddy_allocator_init(
408 g, 394 g,
409 &vm->vma[gmmu_page_size_kernel], 395 vm->vma[gmmu_page_size_kernel],
410 vm, name, 396 vm, name,
411 kernel_vma_start, 397 kernel_vma_start,
412 kernel_vma_limit - kernel_vma_start, 398 kernel_vma_limit - kernel_vma_start,
413 SZ_4K, 399 SZ_4K,
414 GPU_BALLOC_MAX_ORDER, 400 GPU_BALLOC_MAX_ORDER,
415 GPU_ALLOC_GVA_SPACE); 401 GPU_ALLOC_GVA_SPACE);
416 if (err) 402 if (err)
417 goto clean_up_big_allocator; 403 goto clean_up_user_allocator;
418 404
419 vm->mapped_buffers = RB_ROOT; 405 vm->mapped_buffers = RB_ROOT;
420 406
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
426 412
427 return 0; 413 return 0;
428 414
429clean_up_big_allocator: 415clean_up_user_allocator:
430 if (large_vma_start < large_vma_limit) 416 if (user_vma_start < user_vma_limit)
431 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 417 nvgpu_alloc_destroy(&vm->user);
432clean_up_small_allocator:
433 if (small_vma_start < small_vma_limit)
434 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
435clean_up_share: 418clean_up_share:
436 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; 419 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
437 msg.handle = vgpu_get_handle(g); 420 msg.handle = vgpu_get_handle(g);