summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2015-06-17 13:31:08 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-31 19:23:07 -0500
commitd630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch)
tree5b9cad58f585424a64e7b675d503a87bbcada254 /drivers
parent793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff)
gpu: nvgpu: Unify the small and large page address spaces
The basic structure of this patch is to make the small page allocator and the large page allocator into pointers (where they used to be just structs). Then assign each of those pointers to the same actual allocator since the buddy allocator has supported mixed page sizes since its inception. For the rest of the driver some changes had to be made in order to actually support mixed pages in a single address space. 1. Unifying the allocation page size determination Since the allocation and map operations happen at distinct times both mapping and allocation of GVA space must agree on page size. This is because the allocation has to separate allocations into separate PDEs to avoid the necessity of supporting mixed PDEs. To this end a function __get_pte_size() was introduced which is used both by the balloc code and the core GPU MM code. It determines page size based only on the length of the mapping/ allocation. 2. Fixed address allocation + page size Similar to regular mappings/GVA allocations fixed address mapping page size determination had to be modified. In the past the address of the mapping determined page size since the address space split was by address (low addresses were small pages, high addresses large pages). Since that is no longer the case the page size field in the reserve memory ioctl is now honored by the mapping code. When, for instance, CUDA makes a memory reservation it specifies small or large pages. When CUDA requests mappings to be made within that address range the page size is then looked up in the reserved memory struct. Fixed address reservations were also modified to now always allocate at a PDE granularity (64M or 128M depending on large page size. This prevents non-fixed allocations from ending up in the same PDE and causing kernel panics or GMMU faults. 3. The rest... The rest of the changes are just by products of the above. Lots of places required minor updates to use a pointer to the GVA allocator struct instead of the struct itself. Lastly, this change is not truly complete. More work remains to be done in order to fully remove the notion that there was such a thing as separate address spaces for different page sizes. Basically after this patch what remains is cleanup and proper documentation. Bug 1396644 Bug 1729947 Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1265300 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c66
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c11
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c282
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h60
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/allocator.h14
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c115
9 files changed, 318 insertions, 242 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 6f267c85..5042980f 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -55,8 +55,11 @@ static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a)
55 return ba->base + ba->length; 55 return ba->base + ba->length;
56} 56}
57 57
58/*
59 * @page_size is ignored.
60 */
58static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, 61static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a,
59 u64 base, u64 len) 62 u64 base, u64 len, u32 page_size)
60{ 63{
61 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); 64 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
62 u64 blks, offs, ret; 65 u64 blks, offs, ret;
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 39a53801..eee0b634 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -484,8 +484,9 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
484 bud = list_first_entry(balloc_get_order_list(a, order), 484 bud = list_first_entry(balloc_get_order_list(a, order),
485 struct nvgpu_buddy, buddy_entry); 485 struct nvgpu_buddy, buddy_entry);
486 486
487 if (bud->pte_size != BALLOC_PTE_SIZE_ANY && 487 if (pte_size != BALLOC_PTE_SIZE_ANY &&
488 bud->pte_size != pte_size) 488 pte_size != bud->pte_size &&
489 bud->pte_size != BALLOC_PTE_SIZE_ANY)
489 return NULL; 490 return NULL;
490 491
491 return bud; 492 return bud;
@@ -643,7 +644,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a,
643 * necessary for this buddy to exist as well. 644 * necessary for this buddy to exist as well.
644 */ 645 */
645static struct nvgpu_buddy *__balloc_make_fixed_buddy( 646static struct nvgpu_buddy *__balloc_make_fixed_buddy(
646 struct nvgpu_buddy_allocator *a, u64 base, u64 order) 647 struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size)
647{ 648{
648 struct nvgpu_buddy *bud = NULL; 649 struct nvgpu_buddy *bud = NULL;
649 struct list_head *order_list; 650 struct list_head *order_list;
@@ -664,6 +665,20 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
664 order_list = balloc_get_order_list(a, cur_order); 665 order_list = balloc_get_order_list(a, cur_order);
665 list_for_each_entry(bud, order_list, buddy_entry) { 666 list_for_each_entry(bud, order_list, buddy_entry) {
666 if (bud->start == cur_base) { 667 if (bud->start == cur_base) {
668 /*
669 * Make sure page size matches if it's smaller
670 * than a PDE sized buddy.
671 */
672 if (bud->order <= a->pte_blk_order &&
673 bud->pte_size != BALLOC_PTE_SIZE_ANY &&
674 bud->pte_size != pte_size) {
675 /* Welp, that's the end of that. */
676 alloc_dbg(balloc_owner(a),
677 "Fixed buddy PTE "
678 "size mismatch!\n");
679 return NULL;
680 }
681
667 found = 1; 682 found = 1;
668 break; 683 break;
669 } 684 }
@@ -683,7 +698,10 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
683 698
684 /* Split this buddy as necessary until we get the target buddy. */ 699 /* Split this buddy as necessary until we get the target buddy. */
685 while (bud->start != base || bud->order != order) { 700 while (bud->start != base || bud->order != order) {
686 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { 701 if (balloc_split_buddy(a, bud, pte_size)) {
702 alloc_dbg(balloc_owner(a),
703 "split buddy failed? {0x%llx, %llu}\n",
704 bud->start, bud->order);
687 balloc_coalesce(a, bud); 705 balloc_coalesce(a, bud);
688 return NULL; 706 return NULL;
689 } 707 }
@@ -700,7 +718,7 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
700 718
701static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, 719static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
702 struct nvgpu_fixed_alloc *falloc, 720 struct nvgpu_fixed_alloc *falloc,
703 u64 base, u64 len) 721 u64 base, u64 len, int pte_size)
704{ 722{
705 u64 shifted_base, inc_base; 723 u64 shifted_base, inc_base;
706 u64 align_order; 724 u64 align_order;
@@ -731,7 +749,7 @@ static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
731 749
732 bud = __balloc_make_fixed_buddy(a, 750 bud = __balloc_make_fixed_buddy(a,
733 balloc_base_unshift(a, inc_base), 751 balloc_base_unshift(a, inc_base),
734 align_order); 752 align_order, pte_size);
735 if (!bud) { 753 if (!bud) {
736 alloc_dbg(balloc_owner(a), 754 alloc_dbg(balloc_owner(a),
737 "Fixed buddy failed: {0x%llx, %llu}!\n", 755 "Fixed buddy failed: {0x%llx, %llu}!\n",
@@ -817,17 +835,8 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
817 return 0; 835 return 0;
818 } 836 }
819 837
820 /*
821 * For now pass the base address of the allocator's region to
822 * __get_pte_size(). This ensures we get the right page size for
823 * the alloc but we don't have to know what the real address is
824 * going to be quite yet.
825 *
826 * TODO: once userspace supports a unified address space pass 0 for
827 * the base. This will make only 'len' affect the PTE size.
828 */
829 if (a->flags & GPU_ALLOC_GVA_SPACE) 838 if (a->flags & GPU_ALLOC_GVA_SPACE)
830 pte_size = __get_pte_size(a->vm, a->base, len); 839 pte_size = __get_pte_size(a->vm, 0, len);
831 else 840 else
832 pte_size = BALLOC_PTE_SIZE_ANY; 841 pte_size = BALLOC_PTE_SIZE_ANY;
833 842
@@ -858,8 +867,9 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
858 * Requires @__a to be locked. 867 * Requires @__a to be locked.
859 */ 868 */
860static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, 869static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
861 u64 base, u64 len) 870 u64 base, u64 len, u32 page_size)
862{ 871{
872 int pte_size = BALLOC_PTE_SIZE_ANY;
863 u64 ret, real_bytes = 0; 873 u64 ret, real_bytes = 0;
864 struct nvgpu_buddy *bud; 874 struct nvgpu_buddy *bud;
865 struct nvgpu_fixed_alloc *falloc = NULL; 875 struct nvgpu_fixed_alloc *falloc = NULL;
@@ -874,6 +884,16 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
874 if (len == 0) 884 if (len == 0)
875 goto fail; 885 goto fail;
876 886
887 /* Check that the page size is valid. */
888 if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) {
889 if (page_size == a->vm->big_page_size)
890 pte_size = gmmu_page_size_big;
891 else if (page_size == SZ_4K)
892 pte_size = gmmu_page_size_small;
893 else
894 goto fail;
895 }
896
877 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); 897 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
878 if (!falloc) 898 if (!falloc)
879 goto fail; 899 goto fail;
@@ -889,7 +909,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
889 goto fail_unlock; 909 goto fail_unlock;
890 } 910 }
891 911
892 ret = __balloc_do_alloc_fixed(a, falloc, base, len); 912 ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size);
893 if (!ret) { 913 if (!ret) {
894 alloc_dbg(balloc_owner(a), 914 alloc_dbg(balloc_owner(a),
895 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", 915 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
@@ -927,13 +947,13 @@ fail:
927 * Please do not use this function unless _absolutely_ necessary. 947 * Please do not use this function unless _absolutely_ necessary.
928 */ 948 */
929static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, 949static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
930 u64 base, u64 len) 950 u64 base, u64 len, u32 page_size)
931{ 951{
932 u64 alloc; 952 u64 alloc;
933 struct nvgpu_buddy_allocator *a = __a->priv; 953 struct nvgpu_buddy_allocator *a = __a->priv;
934 954
935 alloc_lock(__a); 955 alloc_lock(__a);
936 alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); 956 alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size);
937 a->alloc_made = 1; 957 a->alloc_made = 1;
938 alloc_unlock(__a); 958 alloc_unlock(__a);
939 959
@@ -1034,7 +1054,7 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a,
1034 } 1054 }
1035 1055
1036 /* Should not be possible to fail... */ 1056 /* Should not be possible to fail... */
1037 addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); 1057 addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0);
1038 if (!addr) { 1058 if (!addr) {
1039 err = -ENOMEM; 1059 err = -ENOMEM;
1040 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); 1060 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__);
@@ -1310,6 +1330,10 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1310 alloc_dbg(__a, " base 0x%llx\n", a->base); 1330 alloc_dbg(__a, " base 0x%llx\n", a->base);
1311 alloc_dbg(__a, " size 0x%llx\n", a->length); 1331 alloc_dbg(__a, " size 0x%llx\n", a->length);
1312 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); 1332 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1333 if (flags & GPU_ALLOC_GVA_SPACE)
1334 alloc_dbg(balloc_owner(a),
1335 " pde_size 0x%llx\n",
1336 balloc_order_to_len(a, a->pte_blk_order));
1313 alloc_dbg(__a, " max_order %llu\n", a->max_order); 1337 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1314 alloc_dbg(__a, " flags 0x%llx\n", a->flags); 1338 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1315 1339
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index ebd779c0..cf8c4569 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -77,10 +77,11 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr)
77 a->ops->free(a, addr); 77 a->ops->free(a, addr);
78} 78}
79 79
80u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) 80u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len,
81 u32 page_size)
81{ 82{
82 if (a->ops->alloc_fixed) 83 if (a->ops->alloc_fixed)
83 return a->ops->alloc_fixed(a, base, len); 84 return a->ops->alloc_fixed(a, base, len, page_size);
84 85
85 return 0; 86 return 0;
86} 87}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index c61b2238..96f8f242 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -648,7 +648,7 @@ done:
648} 648}
649 649
650static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( 650static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
651 struct nvgpu_page_allocator *a, u64 base, u64 length) 651 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
652{ 652{
653 struct nvgpu_page_alloc *alloc; 653 struct nvgpu_page_alloc *alloc;
654 struct page_alloc_chunk *c; 654 struct page_alloc_chunk *c;
@@ -658,7 +658,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
658 if (!alloc || !c) 658 if (!alloc || !c)
659 goto fail; 659 goto fail;
660 660
661 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); 661 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
662 if (!alloc->base) { 662 if (!alloc->base) {
663 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); 663 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
664 goto fail; 664 goto fail;
@@ -680,8 +680,11 @@ fail:
680 return ERR_PTR(-ENOMEM); 680 return ERR_PTR(-ENOMEM);
681} 681}
682 682
683/*
684 * @page_size is ignored.
685 */
683static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, 686static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
684 u64 base, u64 len) 687 u64 base, u64 len, u32 page_size)
685{ 688{
686 struct nvgpu_page_allocator *a = page_allocator(__a); 689 struct nvgpu_page_allocator *a = page_allocator(__a);
687 struct nvgpu_page_alloc *alloc = NULL; 690 struct nvgpu_page_alloc *alloc = NULL;
@@ -694,7 +697,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
694 697
695 alloc_lock(__a); 698 alloc_lock(__a);
696 699
697 alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); 700 alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0);
698 if (IS_ERR(alloc)) { 701 if (IS_ERR(alloc)) {
699 alloc_unlock(__a); 702 alloc_unlock(__a);
700 return 0; 703 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 07601d42..adf0297b 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -281,7 +281,7 @@ static int gk20a_as_ioctl_get_va_regions(
281 struct nvgpu_as_va_region region; 281 struct nvgpu_as_va_region region;
282 struct nvgpu_allocator *vma = 282 struct nvgpu_allocator *vma =
283 nvgpu_alloc_initialized(&vm->fixed) ? 283 nvgpu_alloc_initialized(&vm->fixed) ?
284 &vm->fixed : &vm->vma[i]; 284 &vm->fixed : vm->vma[i];
285 285
286 memset(&region, 0, sizeof(struct nvgpu_as_va_region)); 286 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
287 287
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cdbaef79..83bbcb54 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
1001 mutex_init(&mm->l2_op_lock); 1001 mutex_init(&mm->l2_op_lock);
1002 1002
1003 /*TBD: make channel vm size configurable */ 1003 /*TBD: make channel vm size configurable */
1004 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; 1004 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
1005 NV_MM_DEFAULT_KERNEL_SIZE;
1005 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; 1006 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
1006 1007
1007 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", 1008 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
@@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1626 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 1627 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
1627 1628
1628{ 1629{
1629 struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 1630 struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx];
1630 u64 offset; 1631 u64 offset;
1631 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 1632 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
1632 1633
@@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1663 u64 offset, u64 size, 1664 u64 offset, u64 size,
1664 enum gmmu_pgsz_gk20a pgsz_idx) 1665 enum gmmu_pgsz_gk20a pgsz_idx)
1665{ 1666{
1666 struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; 1667 struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
1667 1668
1668 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 1669 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
1669 vma->name, offset, size); 1670 vma->name, offset, size);
@@ -1790,13 +1791,7 @@ struct buffer_attrs {
1790static void gmmu_select_page_size(struct vm_gk20a *vm, 1791static void gmmu_select_page_size(struct vm_gk20a *vm,
1791 struct buffer_attrs *bfr) 1792 struct buffer_attrs *bfr)
1792{ 1793{
1793 int i; 1794 bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size);
1794 /* choose the biggest first (top->bottom) */
1795 for (i = gmmu_page_size_kernel - 1; i >= 0; i--)
1796 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) {
1797 bfr->pgsz_idx = i;
1798 break;
1799 }
1800} 1795}
1801 1796
1802static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, 1797static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
@@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2497 * the alignment determined by gmmu_select_page_size(). 2492 * the alignment determined by gmmu_select_page_size().
2498 */ 2493 */
2499 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 2494 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2500 int pgsz_idx = 2495 int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size);
2501 __nv_gmmu_va_is_big_page_region(vm, offset_align) ?
2502 gmmu_page_size_big : gmmu_page_size_small;
2503 if (pgsz_idx > bfr.pgsz_idx) { 2496 if (pgsz_idx > bfr.pgsz_idx) {
2504 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", 2497 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
2505 offset_align, bfr.pgsz_idx, pgsz_idx); 2498 offset_align, bfr.pgsz_idx, pgsz_idx);
@@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
3149 u64 addr = 0; 3142 u64 addr = 0;
3150 3143
3151 if (at) 3144 if (at)
3152 addr = nvgpu_alloc_fixed(allocator, at, size); 3145 addr = nvgpu_alloc_fixed(allocator, at, size, 0);
3153 else 3146 else
3154 addr = nvgpu_alloc(allocator, size); 3147 addr = nvgpu_alloc(allocator, size);
3155 3148
@@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4260 * 4253 *
4261 * !!! TODO: cleanup. 4254 * !!! TODO: cleanup.
4262 */ 4255 */
4263 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], 4256 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
4264 vm->va_limit - 4257 vm->va_limit -
4265 mm->channel.kernel_size, 4258 mm->channel.kernel_size,
4266 512 * PAGE_SIZE); 4259 512 * PAGE_SIZE,
4260 SZ_4K);
4267 if (!sema_sea->gpu_va) { 4261 if (!sema_sea->gpu_va) {
4268 nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); 4262 nvgpu_free(&vm->kernel, sema_sea->gpu_va);
4269 gk20a_vm_put(vm); 4263 gk20a_vm_put(vm);
4270 return -ENOMEM; 4264 return -ENOMEM;
4271 } 4265 }
@@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4273 err = gk20a_semaphore_pool_map(vm->sema_pool, vm); 4267 err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
4274 if (err) { 4268 if (err) {
4275 gk20a_semaphore_pool_unmap(vm->sema_pool, vm); 4269 gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
4276 nvgpu_free(&vm->vma[gmmu_page_size_small], 4270 nvgpu_free(vm->vma[gmmu_page_size_small],
4277 vm->sema_pool->gpu_va); 4271 vm->sema_pool->gpu_va);
4278 gk20a_vm_put(vm); 4272 gk20a_vm_put(vm);
4279 } 4273 }
4280 4274
4281 return 0; 4275 return 0;
4282} 4276}
4283 4277
4278/*
4279 * Determine if the passed address space can support big pages or not.
4280 */
4281int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
4282{
4283 u64 mask = ((u64)vm->big_page_size << 10) - 1;
4284
4285 if (base & mask || size & mask)
4286 return 0;
4287 return 1;
4288}
4289
4290/*
4291 * Attempt to find a reserved memory area to determine PTE size for the passed
4292 * mapping. If no reserved area can be found use small pages but drop a warning.
4293 */
4294enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
4295 u64 base, u64 size)
4296{
4297 struct vm_reserved_va_node *node;
4298
4299 node = addr_to_reservation(vm, base);
4300 if (!node)
4301 return gmmu_page_size_small;
4302
4303 return node->pgsz_idx;
4304}
4305
4306/**
4307 * gk20a_init_vm() - Initialize an address space.
4308 *
4309 * @mm - Parent MM.
4310 * @vm - The VM to init.
4311 * @big_page_size - Size of big pages associated with this VM.
4312 * @low_hole - The size of the low hole (unaddressable memory at the bottom of
4313 * the address space.
4314 * @kernel_reserved - Space reserved for kernel only allocations.
4315 * @aperture_size - Total size of the aperture.
4316 * @big_pages - Ignored. Will be set based on other passed params.
4317 * @name - Name of the address space.
4318 *
4319 * This function initializes an address space according to the following map:
4320 *
4321 * +--+ 0x0
4322 * | |
4323 * +--+ @low_hole
4324 * | |
4325 * ~ ~ This is the "user" section.
4326 * | |
4327 * +--+ @aperture_size - @kernel_reserved
4328 * | |
4329 * ~ ~ This is the "kernel" section.
4330 * | |
4331 * +--+ @aperture_size
4332 *
4333 * The user section is therefor what ever is left over after the @low_hole and
4334 * @kernel_reserved memory have been portioned out. The @kernel_reserved is
4335 * always persent at the top of the memory space and the @low_hole is always at
4336 * the bottom.
4337 *
4338 * For certain address spaces a "user" section makes no sense (bar1, etc) so in
4339 * such cases the @kernel_reserved and @low_hole should sum to exactly
4340 * @aperture_size.
4341 */
4284int gk20a_init_vm(struct mm_gk20a *mm, 4342int gk20a_init_vm(struct mm_gk20a *mm,
4285 struct vm_gk20a *vm, 4343 struct vm_gk20a *vm,
4286 u32 big_page_size, 4344 u32 big_page_size,
@@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4293{ 4351{
4294 int err, i; 4352 int err, i;
4295 char alloc_name[32]; 4353 char alloc_name[32];
4296 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 4354 u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
4297 kernel_vma_start, kernel_vma_limit;
4298 u32 pde_lo, pde_hi; 4355 u32 pde_lo, pde_hi;
4299 struct gk20a *g = mm->g; 4356 struct gk20a *g = mm->g;
4300 4357
4301 /* note: this must match gmmu_pgsz_gk20a enum */ 4358 /* note: this must match gmmu_pgsz_gk20a enum */
4302 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; 4359 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
4303 4360
4304 WARN_ON(kernel_reserved + low_hole > aperture_size); 4361 if (WARN_ON(kernel_reserved + low_hole > aperture_size))
4305 if (kernel_reserved > aperture_size)
4306 return -ENOMEM; 4362 return -ENOMEM;
4307 4363
4308 vm->mm = mm; 4364 vm->mm = mm;
4309 4365
4366 /* Set up vma pointers. */
4367 vm->vma[0] = &vm->user;
4368 vm->vma[1] = &vm->user;
4369 vm->vma[2] = &vm->kernel;
4370
4310 vm->va_start = low_hole; 4371 vm->va_start = low_hole;
4311 vm->va_limit = aperture_size; 4372 vm->va_limit = aperture_size;
4312 vm->big_pages = big_pages; 4373 vm->big_pages = big_pages;
@@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4321 4382
4322 gk20a_dbg_info("small page-size (%dKB)", 4383 gk20a_dbg_info("small page-size (%dKB)",
4323 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 4384 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4324 4385 gk20a_dbg_info("big page-size (%dKB) (%s)\n",
4325 gk20a_dbg_info("big page-size (%dKB)", 4386 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name);
4326 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
4327
4328 gk20a_dbg_info("kernel page-size (%dKB)", 4387 gk20a_dbg_info("kernel page-size (%dKB)",
4329 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 4388 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
4330 4389
@@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4348 goto clean_up_pdes; 4407 goto clean_up_pdes;
4349 4408
4350 /* setup vma limits */ 4409 /* setup vma limits */
4351 small_vma_start = low_hole; 4410 user_vma_start = low_hole;
4352 4411 user_vma_limit = vm->va_limit - kernel_reserved;
4353 if (big_pages) {
4354 /* First 16GB of the address space goes towards small
4355 * pages. What ever remains is allocated to large
4356 * pages. */
4357 small_vma_limit = __nv_gmmu_va_small_page_limit();
4358 large_vma_start = small_vma_limit;
4359 large_vma_limit = vm->va_limit - kernel_reserved;
4360 } else {
4361 small_vma_limit = vm->va_limit - kernel_reserved;
4362 large_vma_start = 0;
4363 large_vma_limit = 0;
4364 }
4365 4412
4366 kernel_vma_start = vm->va_limit - kernel_reserved; 4413 kernel_vma_start = vm->va_limit - kernel_reserved;
4367 kernel_vma_limit = vm->va_limit; 4414 kernel_vma_limit = vm->va_limit;
4368 4415
4369 gk20a_dbg_info( 4416 gk20a_dbg_info(
4370 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", 4417 "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
4371 small_vma_start, small_vma_limit, 4418 user_vma_start, user_vma_limit,
4372 large_vma_start, large_vma_limit,
4373 kernel_vma_start, kernel_vma_limit); 4419 kernel_vma_start, kernel_vma_limit);
4374 4420
4375 /* check that starts do not exceed limits */ 4421 WARN_ON(user_vma_start > user_vma_limit);
4376 WARN_ON(small_vma_start > small_vma_limit);
4377 WARN_ON(large_vma_start > large_vma_limit);
4378 /* kernel_vma must also be non-zero */
4379 WARN_ON(kernel_vma_start >= kernel_vma_limit); 4422 WARN_ON(kernel_vma_start >= kernel_vma_limit);
4380 4423
4381 if (small_vma_start > small_vma_limit || 4424 /*
4382 large_vma_start > large_vma_limit || 4425 * A "user" area only makes sense for the GVA spaces. For VMs where
4426 * there is no "user" area user_vma_start will be equal to
4427 * user_vma_limit (i.e a 0 sized space). In such a situation the kernel
4428 * area must be non-zero in length.
4429 */
4430 if (user_vma_start > user_vma_limit ||
4383 kernel_vma_start >= kernel_vma_limit) { 4431 kernel_vma_start >= kernel_vma_limit) {
4384 err = -EINVAL; 4432 err = -EINVAL;
4385 goto clean_up_pdes; 4433 goto clean_up_pdes;
@@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4389 * Attempt to make a separate VM for fixed allocations. 4437 * Attempt to make a separate VM for fixed allocations.
4390 */ 4438 */
4391 if (g->separate_fixed_allocs && 4439 if (g->separate_fixed_allocs &&
4392 small_vma_start < small_vma_limit) { 4440 user_vma_start < user_vma_limit) {
4393 if (g->separate_fixed_allocs >= small_vma_limit) 4441 if (g->separate_fixed_allocs >= user_vma_limit)
4394 goto clean_up_pdes; 4442 goto clean_up_pdes;
4395 4443
4396 snprintf(alloc_name, sizeof(alloc_name), 4444 snprintf(alloc_name, sizeof(alloc_name),
@@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4398 4446
4399 err = __nvgpu_buddy_allocator_init(g, &vm->fixed, 4447 err = __nvgpu_buddy_allocator_init(g, &vm->fixed,
4400 vm, alloc_name, 4448 vm, alloc_name,
4401 small_vma_start, 4449 user_vma_start,
4402 g->separate_fixed_allocs, 4450 g->separate_fixed_allocs,
4403 SZ_4K, 4451 SZ_4K,
4404 GPU_BALLOC_MAX_ORDER, 4452 GPU_BALLOC_MAX_ORDER,
@@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4407 goto clean_up_ptes; 4455 goto clean_up_ptes;
4408 4456
4409 /* Make sure to update the user vma size. */ 4457 /* Make sure to update the user vma size. */
4410 small_vma_start = g->separate_fixed_allocs; 4458 user_vma_start = g->separate_fixed_allocs;
4411 }
4412
4413 if (small_vma_start < small_vma_limit) {
4414 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
4415 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4416 err = __nvgpu_buddy_allocator_init(
4417 g,
4418 &vm->vma[gmmu_page_size_small],
4419 vm, alloc_name,
4420 small_vma_start,
4421 small_vma_limit - small_vma_start,
4422 SZ_4K,
4423 GPU_BALLOC_MAX_ORDER,
4424 GPU_ALLOC_GVA_SPACE);
4425 if (err)
4426 goto clean_up_ptes;
4427 } 4459 }
4428 4460
4429 if (large_vma_start < large_vma_limit) { 4461 if (user_vma_start < user_vma_limit) {
4430 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 4462 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
4431 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 4463 if (!gk20a_big_pages_possible(vm, user_vma_start,
4432 err = __nvgpu_buddy_allocator_init( 4464 user_vma_limit - user_vma_start))
4433 g, 4465 vm->big_pages = false;
4434 &vm->vma[gmmu_page_size_big], 4466
4435 vm, alloc_name, 4467 err = __nvgpu_buddy_allocator_init(g, &vm->user,
4436 large_vma_start, 4468 vm, alloc_name,
4437 large_vma_limit - large_vma_start, 4469 user_vma_start,
4438 big_page_size, 4470 user_vma_limit -
4439 GPU_BALLOC_MAX_ORDER, 4471 user_vma_start,
4440 GPU_ALLOC_GVA_SPACE); 4472 SZ_4K,
4473 GPU_BALLOC_MAX_ORDER,
4474 GPU_ALLOC_GVA_SPACE);
4441 if (err) 4475 if (err)
4442 goto clean_up_small_allocator; 4476 goto clean_up_ptes;
4477 } else {
4478 /*
4479 * Make these allocator pointers point to the kernel allocator
4480 * since we still use the legacy notion of page size to choose
4481 * the allocator.
4482 */
4483 vm->vma[0] = &vm->kernel;
4484 vm->vma[1] = &vm->kernel;
4443 } 4485 }
4444 4486
4445 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", 4487 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
4446 name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 4488 if (!gk20a_big_pages_possible(vm, kernel_vma_start,
4447 /* 4489 kernel_vma_limit - kernel_vma_start))
4448 * kernel reserved VMA is at the end of the aperture 4490 vm->big_pages = false;
4449 */ 4491
4450 err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], 4492 err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
4451 vm, alloc_name, 4493 vm, alloc_name,
4452 kernel_vma_start, 4494 kernel_vma_start,
4453 kernel_vma_limit - kernel_vma_start, 4495 kernel_vma_limit - kernel_vma_start,
@@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4455 GPU_BALLOC_MAX_ORDER, 4497 GPU_BALLOC_MAX_ORDER,
4456 GPU_ALLOC_GVA_SPACE); 4498 GPU_ALLOC_GVA_SPACE);
4457 if (err) 4499 if (err)
4458 goto clean_up_big_allocator; 4500 goto clean_up_user_allocator;
4459 4501
4460 vm->mapped_buffers = RB_ROOT; 4502 vm->mapped_buffers = RB_ROOT;
4461 4503
@@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4471 if (vm->va_limit > SZ_4G) { 4513 if (vm->va_limit > SZ_4G) {
4472 err = gk20a_init_sema_pool(vm); 4514 err = gk20a_init_sema_pool(vm);
4473 if (err) 4515 if (err)
4474 goto clean_up_big_allocator; 4516 goto clean_up_user_allocator;
4475 } 4517 }
4476 4518
4477 return 0; 4519 return 0;
4478 4520
4479clean_up_big_allocator: 4521clean_up_user_allocator:
4480 if (large_vma_start < large_vma_limit) 4522 if (user_vma_start < user_vma_limit)
4481 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4523 nvgpu_alloc_destroy(&vm->user);
4482clean_up_small_allocator:
4483 if (small_vma_start < small_vma_limit)
4484 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4485clean_up_ptes: 4524clean_up_ptes:
4486 free_gmmu_pages(vm, &vm->pdb); 4525 free_gmmu_pages(vm, &vm->pdb);
4487clean_up_pdes: 4526clean_up_pdes:
@@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
4523 vm->as_share = as_share; 4562 vm->as_share = as_share;
4524 vm->enable_ctag = true; 4563 vm->enable_ctag = true;
4525 4564
4526 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 4565 snprintf(name, sizeof(name), "as_%d", as_share->id);
4527 4566
4528 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 4567 err = gk20a_init_vm(mm, vm, big_page_size,
4568 big_page_size << 10,
4529 mm->channel.kernel_size, 4569 mm->channel.kernel_size,
4530 mm->channel.user_size + mm->channel.kernel_size, 4570 mm->channel.user_size + mm->channel.kernel_size,
4531 !mm->disable_bigpage, userspace_managed, name); 4571 !mm->disable_bigpage, userspace_managed, name);
@@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
4586 goto clean_up; 4626 goto clean_up;
4587 } 4627 }
4588 4628
4589 vma = &vm->vma[pgsz_idx]; 4629 vma = vm->vma[pgsz_idx];
4590 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { 4630 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
4591 if (nvgpu_alloc_initialized(&vm->fixed)) 4631 if (nvgpu_alloc_initialized(&vm->fixed))
4592 vma = &vm->fixed; 4632 vma = &vm->fixed;
4593 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, 4633 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset,
4594 (u64)args->pages * 4634 (u64)args->pages *
4595 (u64)args->page_size); 4635 (u64)args->page_size,
4636 args->page_size);
4596 } else { 4637 } else {
4597 vaddr_start = nvgpu_alloc(vma, 4638 vaddr_start = nvgpu_alloc(vma,
4598 (u64)args->pages * 4639 (u64)args->pages *
@@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
4662 args->pages, args->offset); 4703 args->pages, args->offset);
4663 4704
4664 /* determine pagesz idx */ 4705 /* determine pagesz idx */
4665 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 4706 pgsz_idx = __get_pte_size(vm, args->offset,
4666 gmmu_page_size_big : gmmu_page_size_small; 4707 args->page_size * args->pages);
4667 4708
4668 if (nvgpu_alloc_initialized(&vm->fixed)) 4709 if (nvgpu_alloc_initialized(&vm->fixed))
4669 vma = &vm->fixed; 4710 vma = &vm->fixed;
4670 else 4711 else
4671 vma = &vm->vma[pgsz_idx]; 4712 vma = vm->vma[pgsz_idx];
4672 nvgpu_free(vma, args->offset); 4713 nvgpu_free(vma, args->offset);
4673 4714
4674 mutex_lock(&vm->update_gmmu_lock); 4715 mutex_lock(&vm->update_gmmu_lock);
@@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
4853 4894
4854void gk20a_deinit_vm(struct vm_gk20a *vm) 4895void gk20a_deinit_vm(struct vm_gk20a *vm)
4855{ 4896{
4856 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 4897 if (nvgpu_alloc_initialized(&vm->kernel))
4857 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) 4898 nvgpu_alloc_destroy(&vm->kernel);
4858 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4899 if (nvgpu_alloc_initialized(&vm->user))
4859 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) 4900 nvgpu_alloc_destroy(&vm->user);
4860 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4861 if (nvgpu_alloc_initialized(&vm->fixed)) 4901 if (nvgpu_alloc_initialized(&vm->fixed))
4862 nvgpu_alloc_destroy(&vm->fixed); 4902 nvgpu_alloc_destroy(&vm->fixed);
4863 4903
@@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
4908 4948
4909 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 4949 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
4910 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 4950 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
4911 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 4951 gk20a_init_vm(mm, vm,
4912 mm->bar1.aperture_size - SZ_4K, 4952 big_page_size,
4913 mm->bar1.aperture_size, false, false, "bar1"); 4953 SZ_4K, /* Low hole */
4954 mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */
4955 mm->bar1.aperture_size,
4956 true, false,
4957 "bar1");
4914 4958
4915 err = gk20a_alloc_inst_block(g, inst_block); 4959 err = gk20a_alloc_inst_block(g, inst_block);
4916 if (err) 4960 if (err)
@@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
4932 struct gk20a *g = gk20a_from_mm(mm); 4976 struct gk20a *g = gk20a_from_mm(mm);
4933 struct mem_desc *inst_block = &mm->pmu.inst_block; 4977 struct mem_desc *inst_block = &mm->pmu.inst_block;
4934 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 4978 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
4979 u32 low_hole, aperture_size;
4980
4981 /*
4982 * No user region - so we will pass that as zero sized.
4983 */
4984 low_hole = SZ_4K * 16;
4985 aperture_size = GK20A_PMU_VA_SIZE * 2;
4935 4986
4936 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 4987 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
4937 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 4988 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
4938 4989
4939 gk20a_init_vm(mm, vm, big_page_size, 4990 gk20a_init_vm(mm, vm, big_page_size,
4940 SZ_4K * 16, GK20A_PMU_VA_SIZE, 4991 low_hole,
4941 GK20A_PMU_VA_SIZE * 2, false, false, 4992 aperture_size - low_hole,
4993 aperture_size,
4994 true,
4995 false,
4942 "system"); 4996 "system");
4943 4997
4944 err = gk20a_alloc_inst_block(g, inst_block); 4998 err = gk20a_alloc_inst_block(g, inst_block);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 5ef8ae25..394d1d25 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -270,11 +270,13 @@ struct vm_gk20a {
270 270
271 struct gk20a_mm_entry pdb; 271 struct gk20a_mm_entry pdb;
272 272
273 struct nvgpu_allocator vma[gmmu_nr_page_sizes];
274
275 /* If necessary, split fixed from non-fixed. */ 273 /* If necessary, split fixed from non-fixed. */
276 struct nvgpu_allocator fixed; 274 struct nvgpu_allocator fixed;
277 275
276 struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
277 struct nvgpu_allocator kernel;
278 struct nvgpu_allocator user;
279
278 struct rb_root mapped_buffers; 280 struct rb_root mapped_buffers;
279 281
280 struct list_head reserved_va_list; 282 struct list_head reserved_va_list;
@@ -425,7 +427,7 @@ static inline int bar1_aperture_size_mb_gk20a(void)
425 return 16; /* 16MB is more than enough atm. */ 427 return 16; /* 16MB is more than enough atm. */
426} 428}
427 429
428/*The maximum GPU VA range supported */ 430/* The maximum GPU VA range supported */
429#define NV_GMMU_VA_RANGE 38 431#define NV_GMMU_VA_RANGE 38
430 432
431/* The default userspace-visible GPU VA size */ 433/* The default userspace-visible GPU VA size */
@@ -434,43 +436,39 @@ static inline int bar1_aperture_size_mb_gk20a(void)
434/* The default kernel-reserved GPU VA size */ 436/* The default kernel-reserved GPU VA size */
435#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) 437#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
436 438
437/* 439enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
438 * The bottom 16GB of the space are used for small pages, the remaining high 440 u64 base, u64 size);
439 * memory is for large pages.
440 */
441static inline u64 __nv_gmmu_va_small_page_limit(void)
442{
443 return ((u64)SZ_1G * 16);
444}
445
446static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr)
447{
448 struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big];
449
450 if (!vm->big_pages)
451 return 0;
452
453 return addr >= nvgpu_alloc_base(a) &&
454 addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a);
455}
456 441
457/* 442/*
458 * This determines the PTE size for a given alloc. Used by both the GVA space 443 * This determines the PTE size for a given alloc. Used by both the GVA space
459 * allocator and the mm core code so that agreement can be reached on how to 444 * allocator and the mm core code so that agreement can be reached on how to
460 * map allocations. 445 * map allocations.
446 *
447 * The page size of a buffer is this:
448 *
449 * o If the VM doesn't support large pages then obviously small pages
450 * must be used.
451 * o If the base address is non-zero (fixed address map):
452 * - Attempt to find a reserved memory area and use the page size
453 * based on that.
454 * - If no reserved page size is available, default to small pages.
455 * o If the base is zero:
456 * - If the size is greater than or equal to the big page size, use big
457 * pages.
458 * - Otherwise use small pages.
461 */ 459 */
462static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, 460static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
463 u64 base, u64 size) 461 u64 base, u64 size)
464{ 462{
465 /* 463 if (!vm->big_pages)
466 * Currently userspace is not ready for a true unified address space.
467 * As a result, even though the allocator supports mixed address spaces
468 * the address spaces must be treated as separate for now.
469 */
470 if (__nv_gmmu_va_is_big_page_region(vm, base))
471 return gmmu_page_size_big;
472 else
473 return gmmu_page_size_small; 464 return gmmu_page_size_small;
465
466 if (base)
467 return __get_pte_size_fixed_map(vm, base, size);
468
469 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
470 return gmmu_page_size_big;
471 return gmmu_page_size_small;
474} 472}
475 473
476/* 474/*
@@ -797,6 +795,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
797 795
798void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); 796void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
799 797
798int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
799
800extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; 800extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
801extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; 801extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
802 802
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index dee9b562..d5a90c87 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -41,11 +41,15 @@ struct nvgpu_allocator_ops {
41 * regular and fixed allocations then free_fixed() does not need to 41 * regular and fixed allocations then free_fixed() does not need to
42 * be implemented. This behavior exists for legacy reasons and should 42 * be implemented. This behavior exists for legacy reasons and should
43 * not be propagated to new allocators. 43 * not be propagated to new allocators.
44 *
45 * For allocators where the @page_size field is not applicable it can
46 * be left as 0. Otherwise a valid page size should be passed (4k or
47 * what the large page size is).
44 */ 48 */
45 u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, 49 u64 (*alloc_fixed)(struct nvgpu_allocator *allocator,
46 u64 base, u64 len); 50 u64 base, u64 len, u32 page_size);
47 void (*free_fixed)(struct nvgpu_allocator *allocator, 51 void (*free_fixed)(struct nvgpu_allocator *allocator,
48 u64 base, u64 len); 52 u64 base, u64 len);
49 53
50 /* 54 /*
51 * Allow allocators to reserve space for carveouts. 55 * Allow allocators to reserve space for carveouts.
@@ -213,7 +217,8 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *a,
213u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); 217u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len);
214void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); 218void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr);
215 219
216u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); 220u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len,
221 u32 page_size);
217void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); 222void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len);
218 223
219int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, 224int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
@@ -298,5 +303,8 @@ void nvgpu_alloc_debugfs_init(struct device *dev);
298 } while (0) 303 } while (0)
299 304
300#endif 305#endif
306#define balloc_pr(alloctor, format, arg...) \
307 pr_info("%-25s %25s() " format, \
308 alloctor->name, __func__, ##arg)
301 309
302#endif /* NVGPU_ALLOCATOR_H */ 310#endif /* NVGPU_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 66c9344b..a21a020d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
227 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 227 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
228 WARN_ON(err || msg.ret); 228 WARN_ON(err || msg.ret);
229 229
230 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 230 if (nvgpu_alloc_initialized(&vm->kernel))
231 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) 231 nvgpu_alloc_destroy(&vm->kernel);
232 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); 232 if (nvgpu_alloc_initialized(&vm->user))
233 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) 233 nvgpu_alloc_destroy(&vm->user);
234 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 234 if (nvgpu_alloc_initialized(&vm->fixed))
235 nvgpu_alloc_destroy(&vm->fixed);
235 236
236 mutex_unlock(&vm->update_gmmu_lock); 237 mutex_unlock(&vm->update_gmmu_lock);
237 238
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
273 struct tegra_vgpu_as_share_params *p = &msg.params.as_share; 274 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
274 struct mm_gk20a *mm = &g->mm; 275 struct mm_gk20a *mm = &g->mm;
275 struct vm_gk20a *vm; 276 struct vm_gk20a *vm;
276 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 277 u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
277 kernel_vma_start, kernel_vma_limit;
278 char name[32]; 278 char name[32];
279 int err, i; 279 int err, i;
280 const bool userspace_managed = 280 const bool userspace_managed =
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
306 vm->mm = mm; 306 vm->mm = mm;
307 vm->as_share = as_share; 307 vm->as_share = as_share;
308 308
309 /* Set up vma pointers. */
310 vm->vma[0] = &vm->user;
311 vm->vma[1] = &vm->user;
312 vm->vma[2] = &vm->kernel;
313
309 for (i = 0; i < gmmu_nr_page_sizes; i++) 314 for (i = 0; i < gmmu_nr_page_sizes; i++)
310 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; 315 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];
311 316
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
328 vm->handle = p->handle; 333 vm->handle = p->handle;
329 334
330 /* setup vma limits */ 335 /* setup vma limits */
331 small_vma_start = vm->va_start; 336 user_vma_start = vm->va_start;
332 337 user_vma_limit = vm->va_limit - mm->channel.kernel_size;
333 if (vm->big_pages) {
334 /* First 16GB of the address space goes towards small
335 * pages. The kernel reserved pages are at the end.
336 * What ever remains is allocated to large pages.
337 */
338 small_vma_limit = __nv_gmmu_va_small_page_limit();
339 large_vma_start = small_vma_limit;
340 large_vma_limit = vm->va_limit - mm->channel.kernel_size;
341 } else {
342 small_vma_limit = vm->va_limit - mm->channel.kernel_size;
343 large_vma_start = 0;
344 large_vma_limit = 0;
345 }
346 338
347 kernel_vma_start = vm->va_limit - mm->channel.kernel_size; 339 kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
348 kernel_vma_limit = vm->va_limit; 340 kernel_vma_limit = vm->va_limit;
349 341
350 gk20a_dbg_info( 342 gk20a_dbg_info(
351 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", 343 "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
352 small_vma_start, small_vma_limit, 344 user_vma_start, user_vma_limit,
353 large_vma_start, large_vma_limit,
354 kernel_vma_start, kernel_vma_limit); 345 kernel_vma_start, kernel_vma_limit);
355 346
356 /* check that starts do not exceed limits */ 347 WARN_ON(user_vma_start > user_vma_limit);
357 WARN_ON(small_vma_start > small_vma_limit);
358 WARN_ON(large_vma_start > large_vma_limit);
359 /* kernel_vma must also be non-zero */
360 WARN_ON(kernel_vma_start >= kernel_vma_limit); 348 WARN_ON(kernel_vma_start >= kernel_vma_limit);
361 349
362 if (small_vma_start > small_vma_limit || 350 if (user_vma_start > user_vma_limit ||
363 large_vma_start > large_vma_limit ||
364 kernel_vma_start >= kernel_vma_limit) { 351 kernel_vma_start >= kernel_vma_limit) {
365 err = -EINVAL; 352 err = -EINVAL;
366 goto clean_up_share; 353 goto clean_up_share;
367 } 354 }
368 355
369 if (small_vma_start < small_vma_limit) { 356 if (user_vma_start < user_vma_limit) {
370 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 357 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
371 gmmu_page_sizes[gmmu_page_size_small] >> 10); 358 gmmu_page_sizes[gmmu_page_size_small] >> 10);
359 if (!gk20a_big_pages_possible(vm, user_vma_start,
360 user_vma_limit - user_vma_start))
361 vm->big_pages = false;
372 362
373 err = __nvgpu_buddy_allocator_init( 363 err = __nvgpu_buddy_allocator_init(
374 g, 364 g,
375 &vm->vma[gmmu_page_size_small], 365 vm->vma[gmmu_page_size_small],
376 vm, name, 366 vm, name,
377 small_vma_start, 367 user_vma_start,
378 small_vma_limit - small_vma_start, 368 user_vma_limit - user_vma_start,
379 SZ_4K, 369 SZ_4K,
380 GPU_BALLOC_MAX_ORDER, 370 GPU_BALLOC_MAX_ORDER,
381 GPU_ALLOC_GVA_SPACE); 371 GPU_ALLOC_GVA_SPACE);
382 if (err) 372 if (err)
383 goto clean_up_share; 373 goto clean_up_share;
384 } 374 } else {
385 375 /*
386 if (large_vma_start < large_vma_limit) { 376 * Make these allocator pointers point to the kernel allocator
387 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 377 * since we still use the legacy notion of page size to choose
388 gmmu_page_sizes[gmmu_page_size_big] >> 10); 378 * the allocator.
389 err = __nvgpu_buddy_allocator_init( 379 */
390 g, 380 vm->vma[0] = &vm->kernel;
391 &vm->vma[gmmu_page_size_big], 381 vm->vma[1] = &vm->kernel;
392 vm, name,
393 large_vma_start,
394 large_vma_limit - large_vma_start,
395 big_page_size,
396 GPU_BALLOC_MAX_ORDER,
397 GPU_ALLOC_GVA_SPACE);
398 if (err)
399 goto clean_up_small_allocator;
400 } 382 }
401 383
402 snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", 384 snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
403 gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 385 gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
386 if (!gk20a_big_pages_possible(vm, kernel_vma_start,
387 kernel_vma_limit - kernel_vma_start))
388 vm->big_pages = false;
389
404 /* 390 /*
405 * kernel reserved VMA is at the end of the aperture 391 * kernel reserved VMA is at the end of the aperture
406 */ 392 */
407 err = __nvgpu_buddy_allocator_init( 393 err = __nvgpu_buddy_allocator_init(
408 g, 394 g,
409 &vm->vma[gmmu_page_size_kernel], 395 vm->vma[gmmu_page_size_kernel],
410 vm, name, 396 vm, name,
411 kernel_vma_start, 397 kernel_vma_start,
412 kernel_vma_limit - kernel_vma_start, 398 kernel_vma_limit - kernel_vma_start,
413 SZ_4K, 399 SZ_4K,
414 GPU_BALLOC_MAX_ORDER, 400 GPU_BALLOC_MAX_ORDER,
415 GPU_ALLOC_GVA_SPACE); 401 GPU_ALLOC_GVA_SPACE);
416 if (err) 402 if (err)
417 goto clean_up_big_allocator; 403 goto clean_up_user_allocator;
418 404
419 vm->mapped_buffers = RB_ROOT; 405 vm->mapped_buffers = RB_ROOT;
420 406
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
426 412
427 return 0; 413 return 0;
428 414
429clean_up_big_allocator: 415clean_up_user_allocator:
430 if (large_vma_start < large_vma_limit) 416 if (user_vma_start < user_vma_limit)
431 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 417 nvgpu_alloc_destroy(&vm->user);
432clean_up_small_allocator:
433 if (small_vma_start < small_vma_limit)
434 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
435clean_up_share: 418clean_up_share:
436 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; 419 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
437 msg.handle = vgpu_get_handle(g); 420 msg.handle = vgpu_get_handle(g);