diff options
author | Alex Waterman <alexw@nvidia.com> | 2015-06-17 13:31:08 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-31 19:23:07 -0500 |
commit | d630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch) | |
tree | 5b9cad58f585424a64e7b675d503a87bbcada254 /drivers/gpu/nvgpu | |
parent | 793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff) |
gpu: nvgpu: Unify the small and large page address spaces
The basic structure of this patch is to make the small page allocator
and the large page allocator into pointers (where they used to be just
structs). Then assign each of those pointers to the same actual
allocator since the buddy allocator has supported mixed page sizes
since its inception.
For the rest of the driver some changes had to be made in order to
actually support mixed pages in a single address space.
1. Unifying the allocation page size determination
Since the allocation and map operations happen at distinct
times both mapping and allocation of GVA space must agree
on page size. This is because the allocation has to separate
allocations into separate PDEs to avoid the necessity of
supporting mixed PDEs.
To this end a function __get_pte_size() was introduced which
is used both by the balloc code and the core GPU MM code. It
determines page size based only on the length of the mapping/
allocation.
2. Fixed address allocation + page size
Similar to regular mappings/GVA allocations fixed address
mapping page size determination had to be modified. In the
past the address of the mapping determined page size since
the address space split was by address (low addresses were
small pages, high addresses large pages). Since that is no
longer the case the page size field in the reserve memory
ioctl is now honored by the mapping code. When, for instance,
CUDA makes a memory reservation it specifies small or large
pages. When CUDA requests mappings to be made within that
address range the page size is then looked up in the reserved
memory struct.
Fixed address reservations were also modified to now always
allocate at a PDE granularity (64M or 128M depending on
large page size. This prevents non-fixed allocations from
ending up in the same PDE and causing kernel panics or GMMU
faults.
3. The rest...
The rest of the changes are just by products of the above.
Lots of places required minor updates to use a pointer to
the GVA allocator struct instead of the struct itself.
Lastly, this change is not truly complete. More work remains to be
done in order to fully remove the notion that there was such a thing
as separate address spaces for different page sizes. Basically after
this patch what remains is cleanup and proper documentation.
Bug 1396644
Bug 1729947
Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1265300
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 66 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 282 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 60 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/allocator.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 115 |
9 files changed, 318 insertions, 242 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c index 6f267c85..5042980f 100644 --- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | |||
@@ -55,8 +55,11 @@ static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a) | |||
55 | return ba->base + ba->length; | 55 | return ba->base + ba->length; |
56 | } | 56 | } |
57 | 57 | ||
58 | /* | ||
59 | * @page_size is ignored. | ||
60 | */ | ||
58 | static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, | 61 | static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, |
59 | u64 base, u64 len) | 62 | u64 base, u64 len, u32 page_size) |
60 | { | 63 | { |
61 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | 64 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); |
62 | u64 blks, offs, ret; | 65 | u64 blks, offs, ret; |
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 39a53801..eee0b634 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |||
@@ -484,8 +484,9 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, | |||
484 | bud = list_first_entry(balloc_get_order_list(a, order), | 484 | bud = list_first_entry(balloc_get_order_list(a, order), |
485 | struct nvgpu_buddy, buddy_entry); | 485 | struct nvgpu_buddy, buddy_entry); |
486 | 486 | ||
487 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | 487 | if (pte_size != BALLOC_PTE_SIZE_ANY && |
488 | bud->pte_size != pte_size) | 488 | pte_size != bud->pte_size && |
489 | bud->pte_size != BALLOC_PTE_SIZE_ANY) | ||
489 | return NULL; | 490 | return NULL; |
490 | 491 | ||
491 | return bud; | 492 | return bud; |
@@ -643,7 +644,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, | |||
643 | * necessary for this buddy to exist as well. | 644 | * necessary for this buddy to exist as well. |
644 | */ | 645 | */ |
645 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( | 646 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( |
646 | struct nvgpu_buddy_allocator *a, u64 base, u64 order) | 647 | struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size) |
647 | { | 648 | { |
648 | struct nvgpu_buddy *bud = NULL; | 649 | struct nvgpu_buddy *bud = NULL; |
649 | struct list_head *order_list; | 650 | struct list_head *order_list; |
@@ -664,6 +665,20 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
664 | order_list = balloc_get_order_list(a, cur_order); | 665 | order_list = balloc_get_order_list(a, cur_order); |
665 | list_for_each_entry(bud, order_list, buddy_entry) { | 666 | list_for_each_entry(bud, order_list, buddy_entry) { |
666 | if (bud->start == cur_base) { | 667 | if (bud->start == cur_base) { |
668 | /* | ||
669 | * Make sure page size matches if it's smaller | ||
670 | * than a PDE sized buddy. | ||
671 | */ | ||
672 | if (bud->order <= a->pte_blk_order && | ||
673 | bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
674 | bud->pte_size != pte_size) { | ||
675 | /* Welp, that's the end of that. */ | ||
676 | alloc_dbg(balloc_owner(a), | ||
677 | "Fixed buddy PTE " | ||
678 | "size mismatch!\n"); | ||
679 | return NULL; | ||
680 | } | ||
681 | |||
667 | found = 1; | 682 | found = 1; |
668 | break; | 683 | break; |
669 | } | 684 | } |
@@ -683,7 +698,10 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
683 | 698 | ||
684 | /* Split this buddy as necessary until we get the target buddy. */ | 699 | /* Split this buddy as necessary until we get the target buddy. */ |
685 | while (bud->start != base || bud->order != order) { | 700 | while (bud->start != base || bud->order != order) { |
686 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | 701 | if (balloc_split_buddy(a, bud, pte_size)) { |
702 | alloc_dbg(balloc_owner(a), | ||
703 | "split buddy failed? {0x%llx, %llu}\n", | ||
704 | bud->start, bud->order); | ||
687 | balloc_coalesce(a, bud); | 705 | balloc_coalesce(a, bud); |
688 | return NULL; | 706 | return NULL; |
689 | } | 707 | } |
@@ -700,7 +718,7 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
700 | 718 | ||
701 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | 719 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, |
702 | struct nvgpu_fixed_alloc *falloc, | 720 | struct nvgpu_fixed_alloc *falloc, |
703 | u64 base, u64 len) | 721 | u64 base, u64 len, int pte_size) |
704 | { | 722 | { |
705 | u64 shifted_base, inc_base; | 723 | u64 shifted_base, inc_base; |
706 | u64 align_order; | 724 | u64 align_order; |
@@ -731,7 +749,7 @@ static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | |||
731 | 749 | ||
732 | bud = __balloc_make_fixed_buddy(a, | 750 | bud = __balloc_make_fixed_buddy(a, |
733 | balloc_base_unshift(a, inc_base), | 751 | balloc_base_unshift(a, inc_base), |
734 | align_order); | 752 | align_order, pte_size); |
735 | if (!bud) { | 753 | if (!bud) { |
736 | alloc_dbg(balloc_owner(a), | 754 | alloc_dbg(balloc_owner(a), |
737 | "Fixed buddy failed: {0x%llx, %llu}!\n", | 755 | "Fixed buddy failed: {0x%llx, %llu}!\n", |
@@ -817,17 +835,8 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | |||
817 | return 0; | 835 | return 0; |
818 | } | 836 | } |
819 | 837 | ||
820 | /* | ||
821 | * For now pass the base address of the allocator's region to | ||
822 | * __get_pte_size(). This ensures we get the right page size for | ||
823 | * the alloc but we don't have to know what the real address is | ||
824 | * going to be quite yet. | ||
825 | * | ||
826 | * TODO: once userspace supports a unified address space pass 0 for | ||
827 | * the base. This will make only 'len' affect the PTE size. | ||
828 | */ | ||
829 | if (a->flags & GPU_ALLOC_GVA_SPACE) | 838 | if (a->flags & GPU_ALLOC_GVA_SPACE) |
830 | pte_size = __get_pte_size(a->vm, a->base, len); | 839 | pte_size = __get_pte_size(a->vm, 0, len); |
831 | else | 840 | else |
832 | pte_size = BALLOC_PTE_SIZE_ANY; | 841 | pte_size = BALLOC_PTE_SIZE_ANY; |
833 | 842 | ||
@@ -858,8 +867,9 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | |||
858 | * Requires @__a to be locked. | 867 | * Requires @__a to be locked. |
859 | */ | 868 | */ |
860 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | 869 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, |
861 | u64 base, u64 len) | 870 | u64 base, u64 len, u32 page_size) |
862 | { | 871 | { |
872 | int pte_size = BALLOC_PTE_SIZE_ANY; | ||
863 | u64 ret, real_bytes = 0; | 873 | u64 ret, real_bytes = 0; |
864 | struct nvgpu_buddy *bud; | 874 | struct nvgpu_buddy *bud; |
865 | struct nvgpu_fixed_alloc *falloc = NULL; | 875 | struct nvgpu_fixed_alloc *falloc = NULL; |
@@ -874,6 +884,16 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | |||
874 | if (len == 0) | 884 | if (len == 0) |
875 | goto fail; | 885 | goto fail; |
876 | 886 | ||
887 | /* Check that the page size is valid. */ | ||
888 | if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) { | ||
889 | if (page_size == a->vm->big_page_size) | ||
890 | pte_size = gmmu_page_size_big; | ||
891 | else if (page_size == SZ_4K) | ||
892 | pte_size = gmmu_page_size_small; | ||
893 | else | ||
894 | goto fail; | ||
895 | } | ||
896 | |||
877 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | 897 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); |
878 | if (!falloc) | 898 | if (!falloc) |
879 | goto fail; | 899 | goto fail; |
@@ -889,7 +909,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | |||
889 | goto fail_unlock; | 909 | goto fail_unlock; |
890 | } | 910 | } |
891 | 911 | ||
892 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | 912 | ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size); |
893 | if (!ret) { | 913 | if (!ret) { |
894 | alloc_dbg(balloc_owner(a), | 914 | alloc_dbg(balloc_owner(a), |
895 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | 915 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", |
@@ -927,13 +947,13 @@ fail: | |||
927 | * Please do not use this function unless _absolutely_ necessary. | 947 | * Please do not use this function unless _absolutely_ necessary. |
928 | */ | 948 | */ |
929 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | 949 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, |
930 | u64 base, u64 len) | 950 | u64 base, u64 len, u32 page_size) |
931 | { | 951 | { |
932 | u64 alloc; | 952 | u64 alloc; |
933 | struct nvgpu_buddy_allocator *a = __a->priv; | 953 | struct nvgpu_buddy_allocator *a = __a->priv; |
934 | 954 | ||
935 | alloc_lock(__a); | 955 | alloc_lock(__a); |
936 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); | 956 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size); |
937 | a->alloc_made = 1; | 957 | a->alloc_made = 1; |
938 | alloc_unlock(__a); | 958 | alloc_unlock(__a); |
939 | 959 | ||
@@ -1034,7 +1054,7 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a, | |||
1034 | } | 1054 | } |
1035 | 1055 | ||
1036 | /* Should not be possible to fail... */ | 1056 | /* Should not be possible to fail... */ |
1037 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); | 1057 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0); |
1038 | if (!addr) { | 1058 | if (!addr) { |
1039 | err = -ENOMEM; | 1059 | err = -ENOMEM; |
1040 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); | 1060 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); |
@@ -1310,6 +1330,10 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
1310 | alloc_dbg(__a, " base 0x%llx\n", a->base); | 1330 | alloc_dbg(__a, " base 0x%llx\n", a->base); |
1311 | alloc_dbg(__a, " size 0x%llx\n", a->length); | 1331 | alloc_dbg(__a, " size 0x%llx\n", a->length); |
1312 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | 1332 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); |
1333 | if (flags & GPU_ALLOC_GVA_SPACE) | ||
1334 | alloc_dbg(balloc_owner(a), | ||
1335 | " pde_size 0x%llx\n", | ||
1336 | balloc_order_to_len(a, a->pte_blk_order)); | ||
1313 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | 1337 | alloc_dbg(__a, " max_order %llu\n", a->max_order); |
1314 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | 1338 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); |
1315 | 1339 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c index ebd779c0..cf8c4569 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | |||
@@ -77,10 +77,11 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr) | |||
77 | a->ops->free(a, addr); | 77 | a->ops->free(a, addr); |
78 | } | 78 | } |
79 | 79 | ||
80 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) | 80 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len, |
81 | u32 page_size) | ||
81 | { | 82 | { |
82 | if (a->ops->alloc_fixed) | 83 | if (a->ops->alloc_fixed) |
83 | return a->ops->alloc_fixed(a, base, len); | 84 | return a->ops->alloc_fixed(a, base, len, page_size); |
84 | 85 | ||
85 | return 0; | 86 | return 0; |
86 | } | 87 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index c61b2238..96f8f242 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -648,7 +648,7 @@ done: | |||
648 | } | 648 | } |
649 | 649 | ||
650 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | 650 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( |
651 | struct nvgpu_page_allocator *a, u64 base, u64 length) | 651 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) |
652 | { | 652 | { |
653 | struct nvgpu_page_alloc *alloc; | 653 | struct nvgpu_page_alloc *alloc; |
654 | struct page_alloc_chunk *c; | 654 | struct page_alloc_chunk *c; |
@@ -658,7 +658,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
658 | if (!alloc || !c) | 658 | if (!alloc || !c) |
659 | goto fail; | 659 | goto fail; |
660 | 660 | ||
661 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); | 661 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
662 | if (!alloc->base) { | 662 | if (!alloc->base) { |
663 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); | 663 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); |
664 | goto fail; | 664 | goto fail; |
@@ -680,8 +680,11 @@ fail: | |||
680 | return ERR_PTR(-ENOMEM); | 680 | return ERR_PTR(-ENOMEM); |
681 | } | 681 | } |
682 | 682 | ||
683 | /* | ||
684 | * @page_size is ignored. | ||
685 | */ | ||
683 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | 686 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, |
684 | u64 base, u64 len) | 687 | u64 base, u64 len, u32 page_size) |
685 | { | 688 | { |
686 | struct nvgpu_page_allocator *a = page_allocator(__a); | 689 | struct nvgpu_page_allocator *a = page_allocator(__a); |
687 | struct nvgpu_page_alloc *alloc = NULL; | 690 | struct nvgpu_page_alloc *alloc = NULL; |
@@ -694,7 +697,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
694 | 697 | ||
695 | alloc_lock(__a); | 698 | alloc_lock(__a); |
696 | 699 | ||
697 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); | 700 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0); |
698 | if (IS_ERR(alloc)) { | 701 | if (IS_ERR(alloc)) { |
699 | alloc_unlock(__a); | 702 | alloc_unlock(__a); |
700 | return 0; | 703 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 07601d42..adf0297b 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -281,7 +281,7 @@ static int gk20a_as_ioctl_get_va_regions( | |||
281 | struct nvgpu_as_va_region region; | 281 | struct nvgpu_as_va_region region; |
282 | struct nvgpu_allocator *vma = | 282 | struct nvgpu_allocator *vma = |
283 | nvgpu_alloc_initialized(&vm->fixed) ? | 283 | nvgpu_alloc_initialized(&vm->fixed) ? |
284 | &vm->fixed : &vm->vma[i]; | 284 | &vm->fixed : vm->vma[i]; |
285 | 285 | ||
286 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | 286 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); |
287 | 287 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index cdbaef79..83bbcb54 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
1001 | mutex_init(&mm->l2_op_lock); | 1001 | mutex_init(&mm->l2_op_lock); |
1002 | 1002 | ||
1003 | /*TBD: make channel vm size configurable */ | 1003 | /*TBD: make channel vm size configurable */ |
1004 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; | 1004 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - |
1005 | NV_MM_DEFAULT_KERNEL_SIZE; | ||
1005 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | 1006 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; |
1006 | 1007 | ||
1007 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", | 1008 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", |
@@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
1626 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 1627 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
1627 | 1628 | ||
1628 | { | 1629 | { |
1629 | struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 1630 | struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx]; |
1630 | u64 offset; | 1631 | u64 offset; |
1631 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 1632 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
1632 | 1633 | ||
@@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
1663 | u64 offset, u64 size, | 1664 | u64 offset, u64 size, |
1664 | enum gmmu_pgsz_gk20a pgsz_idx) | 1665 | enum gmmu_pgsz_gk20a pgsz_idx) |
1665 | { | 1666 | { |
1666 | struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; | 1667 | struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; |
1667 | 1668 | ||
1668 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 1669 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
1669 | vma->name, offset, size); | 1670 | vma->name, offset, size); |
@@ -1790,13 +1791,7 @@ struct buffer_attrs { | |||
1790 | static void gmmu_select_page_size(struct vm_gk20a *vm, | 1791 | static void gmmu_select_page_size(struct vm_gk20a *vm, |
1791 | struct buffer_attrs *bfr) | 1792 | struct buffer_attrs *bfr) |
1792 | { | 1793 | { |
1793 | int i; | 1794 | bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size); |
1794 | /* choose the biggest first (top->bottom) */ | ||
1795 | for (i = gmmu_page_size_kernel - 1; i >= 0; i--) | ||
1796 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { | ||
1797 | bfr->pgsz_idx = i; | ||
1798 | break; | ||
1799 | } | ||
1800 | } | 1795 | } |
1801 | 1796 | ||
1802 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | 1797 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, |
@@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
2497 | * the alignment determined by gmmu_select_page_size(). | 2492 | * the alignment determined by gmmu_select_page_size(). |
2498 | */ | 2493 | */ |
2499 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | 2494 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { |
2500 | int pgsz_idx = | 2495 | int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size); |
2501 | __nv_gmmu_va_is_big_page_region(vm, offset_align) ? | ||
2502 | gmmu_page_size_big : gmmu_page_size_small; | ||
2503 | if (pgsz_idx > bfr.pgsz_idx) { | 2496 | if (pgsz_idx > bfr.pgsz_idx) { |
2504 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", | 2497 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", |
2505 | offset_align, bfr.pgsz_idx, pgsz_idx); | 2498 | offset_align, bfr.pgsz_idx, pgsz_idx); |
@@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, | |||
3149 | u64 addr = 0; | 3142 | u64 addr = 0; |
3150 | 3143 | ||
3151 | if (at) | 3144 | if (at) |
3152 | addr = nvgpu_alloc_fixed(allocator, at, size); | 3145 | addr = nvgpu_alloc_fixed(allocator, at, size, 0); |
3153 | else | 3146 | else |
3154 | addr = nvgpu_alloc(allocator, size); | 3147 | addr = nvgpu_alloc(allocator, size); |
3155 | 3148 | ||
@@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4260 | * | 4253 | * |
4261 | * !!! TODO: cleanup. | 4254 | * !!! TODO: cleanup. |
4262 | */ | 4255 | */ |
4263 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], | 4256 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, |
4264 | vm->va_limit - | 4257 | vm->va_limit - |
4265 | mm->channel.kernel_size, | 4258 | mm->channel.kernel_size, |
4266 | 512 * PAGE_SIZE); | 4259 | 512 * PAGE_SIZE, |
4260 | SZ_4K); | ||
4267 | if (!sema_sea->gpu_va) { | 4261 | if (!sema_sea->gpu_va) { |
4268 | nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); | 4262 | nvgpu_free(&vm->kernel, sema_sea->gpu_va); |
4269 | gk20a_vm_put(vm); | 4263 | gk20a_vm_put(vm); |
4270 | return -ENOMEM; | 4264 | return -ENOMEM; |
4271 | } | 4265 | } |
@@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4273 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); | 4267 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); |
4274 | if (err) { | 4268 | if (err) { |
4275 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); | 4269 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); |
4276 | nvgpu_free(&vm->vma[gmmu_page_size_small], | 4270 | nvgpu_free(vm->vma[gmmu_page_size_small], |
4277 | vm->sema_pool->gpu_va); | 4271 | vm->sema_pool->gpu_va); |
4278 | gk20a_vm_put(vm); | 4272 | gk20a_vm_put(vm); |
4279 | } | 4273 | } |
4280 | 4274 | ||
4281 | return 0; | 4275 | return 0; |
4282 | } | 4276 | } |
4283 | 4277 | ||
4278 | /* | ||
4279 | * Determine if the passed address space can support big pages or not. | ||
4280 | */ | ||
4281 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | ||
4282 | { | ||
4283 | u64 mask = ((u64)vm->big_page_size << 10) - 1; | ||
4284 | |||
4285 | if (base & mask || size & mask) | ||
4286 | return 0; | ||
4287 | return 1; | ||
4288 | } | ||
4289 | |||
4290 | /* | ||
4291 | * Attempt to find a reserved memory area to determine PTE size for the passed | ||
4292 | * mapping. If no reserved area can be found use small pages but drop a warning. | ||
4293 | */ | ||
4294 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | ||
4295 | u64 base, u64 size) | ||
4296 | { | ||
4297 | struct vm_reserved_va_node *node; | ||
4298 | |||
4299 | node = addr_to_reservation(vm, base); | ||
4300 | if (!node) | ||
4301 | return gmmu_page_size_small; | ||
4302 | |||
4303 | return node->pgsz_idx; | ||
4304 | } | ||
4305 | |||
4306 | /** | ||
4307 | * gk20a_init_vm() - Initialize an address space. | ||
4308 | * | ||
4309 | * @mm - Parent MM. | ||
4310 | * @vm - The VM to init. | ||
4311 | * @big_page_size - Size of big pages associated with this VM. | ||
4312 | * @low_hole - The size of the low hole (unaddressable memory at the bottom of | ||
4313 | * the address space. | ||
4314 | * @kernel_reserved - Space reserved for kernel only allocations. | ||
4315 | * @aperture_size - Total size of the aperture. | ||
4316 | * @big_pages - Ignored. Will be set based on other passed params. | ||
4317 | * @name - Name of the address space. | ||
4318 | * | ||
4319 | * This function initializes an address space according to the following map: | ||
4320 | * | ||
4321 | * +--+ 0x0 | ||
4322 | * | | | ||
4323 | * +--+ @low_hole | ||
4324 | * | | | ||
4325 | * ~ ~ This is the "user" section. | ||
4326 | * | | | ||
4327 | * +--+ @aperture_size - @kernel_reserved | ||
4328 | * | | | ||
4329 | * ~ ~ This is the "kernel" section. | ||
4330 | * | | | ||
4331 | * +--+ @aperture_size | ||
4332 | * | ||
4333 | * The user section is therefor what ever is left over after the @low_hole and | ||
4334 | * @kernel_reserved memory have been portioned out. The @kernel_reserved is | ||
4335 | * always persent at the top of the memory space and the @low_hole is always at | ||
4336 | * the bottom. | ||
4337 | * | ||
4338 | * For certain address spaces a "user" section makes no sense (bar1, etc) so in | ||
4339 | * such cases the @kernel_reserved and @low_hole should sum to exactly | ||
4340 | * @aperture_size. | ||
4341 | */ | ||
4284 | int gk20a_init_vm(struct mm_gk20a *mm, | 4342 | int gk20a_init_vm(struct mm_gk20a *mm, |
4285 | struct vm_gk20a *vm, | 4343 | struct vm_gk20a *vm, |
4286 | u32 big_page_size, | 4344 | u32 big_page_size, |
@@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4293 | { | 4351 | { |
4294 | int err, i; | 4352 | int err, i; |
4295 | char alloc_name[32]; | 4353 | char alloc_name[32]; |
4296 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 4354 | u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; |
4297 | kernel_vma_start, kernel_vma_limit; | ||
4298 | u32 pde_lo, pde_hi; | 4355 | u32 pde_lo, pde_hi; |
4299 | struct gk20a *g = mm->g; | 4356 | struct gk20a *g = mm->g; |
4300 | 4357 | ||
4301 | /* note: this must match gmmu_pgsz_gk20a enum */ | 4358 | /* note: this must match gmmu_pgsz_gk20a enum */ |
4302 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; | 4359 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
4303 | 4360 | ||
4304 | WARN_ON(kernel_reserved + low_hole > aperture_size); | 4361 | if (WARN_ON(kernel_reserved + low_hole > aperture_size)) |
4305 | if (kernel_reserved > aperture_size) | ||
4306 | return -ENOMEM; | 4362 | return -ENOMEM; |
4307 | 4363 | ||
4308 | vm->mm = mm; | 4364 | vm->mm = mm; |
4309 | 4365 | ||
4366 | /* Set up vma pointers. */ | ||
4367 | vm->vma[0] = &vm->user; | ||
4368 | vm->vma[1] = &vm->user; | ||
4369 | vm->vma[2] = &vm->kernel; | ||
4370 | |||
4310 | vm->va_start = low_hole; | 4371 | vm->va_start = low_hole; |
4311 | vm->va_limit = aperture_size; | 4372 | vm->va_limit = aperture_size; |
4312 | vm->big_pages = big_pages; | 4373 | vm->big_pages = big_pages; |
@@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4321 | 4382 | ||
4322 | gk20a_dbg_info("small page-size (%dKB)", | 4383 | gk20a_dbg_info("small page-size (%dKB)", |
4323 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | 4384 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
4324 | 4385 | gk20a_dbg_info("big page-size (%dKB) (%s)\n", | |
4325 | gk20a_dbg_info("big page-size (%dKB)", | 4386 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name); |
4326 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | ||
4327 | |||
4328 | gk20a_dbg_info("kernel page-size (%dKB)", | 4387 | gk20a_dbg_info("kernel page-size (%dKB)", |
4329 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 4388 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
4330 | 4389 | ||
@@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4348 | goto clean_up_pdes; | 4407 | goto clean_up_pdes; |
4349 | 4408 | ||
4350 | /* setup vma limits */ | 4409 | /* setup vma limits */ |
4351 | small_vma_start = low_hole; | 4410 | user_vma_start = low_hole; |
4352 | 4411 | user_vma_limit = vm->va_limit - kernel_reserved; | |
4353 | if (big_pages) { | ||
4354 | /* First 16GB of the address space goes towards small | ||
4355 | * pages. What ever remains is allocated to large | ||
4356 | * pages. */ | ||
4357 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
4358 | large_vma_start = small_vma_limit; | ||
4359 | large_vma_limit = vm->va_limit - kernel_reserved; | ||
4360 | } else { | ||
4361 | small_vma_limit = vm->va_limit - kernel_reserved; | ||
4362 | large_vma_start = 0; | ||
4363 | large_vma_limit = 0; | ||
4364 | } | ||
4365 | 4412 | ||
4366 | kernel_vma_start = vm->va_limit - kernel_reserved; | 4413 | kernel_vma_start = vm->va_limit - kernel_reserved; |
4367 | kernel_vma_limit = vm->va_limit; | 4414 | kernel_vma_limit = vm->va_limit; |
4368 | 4415 | ||
4369 | gk20a_dbg_info( | 4416 | gk20a_dbg_info( |
4370 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | 4417 | "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", |
4371 | small_vma_start, small_vma_limit, | 4418 | user_vma_start, user_vma_limit, |
4372 | large_vma_start, large_vma_limit, | ||
4373 | kernel_vma_start, kernel_vma_limit); | 4419 | kernel_vma_start, kernel_vma_limit); |
4374 | 4420 | ||
4375 | /* check that starts do not exceed limits */ | 4421 | WARN_ON(user_vma_start > user_vma_limit); |
4376 | WARN_ON(small_vma_start > small_vma_limit); | ||
4377 | WARN_ON(large_vma_start > large_vma_limit); | ||
4378 | /* kernel_vma must also be non-zero */ | ||
4379 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | 4422 | WARN_ON(kernel_vma_start >= kernel_vma_limit); |
4380 | 4423 | ||
4381 | if (small_vma_start > small_vma_limit || | 4424 | /* |
4382 | large_vma_start > large_vma_limit || | 4425 | * A "user" area only makes sense for the GVA spaces. For VMs where |
4426 | * there is no "user" area user_vma_start will be equal to | ||
4427 | * user_vma_limit (i.e a 0 sized space). In such a situation the kernel | ||
4428 | * area must be non-zero in length. | ||
4429 | */ | ||
4430 | if (user_vma_start > user_vma_limit || | ||
4383 | kernel_vma_start >= kernel_vma_limit) { | 4431 | kernel_vma_start >= kernel_vma_limit) { |
4384 | err = -EINVAL; | 4432 | err = -EINVAL; |
4385 | goto clean_up_pdes; | 4433 | goto clean_up_pdes; |
@@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4389 | * Attempt to make a separate VM for fixed allocations. | 4437 | * Attempt to make a separate VM for fixed allocations. |
4390 | */ | 4438 | */ |
4391 | if (g->separate_fixed_allocs && | 4439 | if (g->separate_fixed_allocs && |
4392 | small_vma_start < small_vma_limit) { | 4440 | user_vma_start < user_vma_limit) { |
4393 | if (g->separate_fixed_allocs >= small_vma_limit) | 4441 | if (g->separate_fixed_allocs >= user_vma_limit) |
4394 | goto clean_up_pdes; | 4442 | goto clean_up_pdes; |
4395 | 4443 | ||
4396 | snprintf(alloc_name, sizeof(alloc_name), | 4444 | snprintf(alloc_name, sizeof(alloc_name), |
@@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4398 | 4446 | ||
4399 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, | 4447 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, |
4400 | vm, alloc_name, | 4448 | vm, alloc_name, |
4401 | small_vma_start, | 4449 | user_vma_start, |
4402 | g->separate_fixed_allocs, | 4450 | g->separate_fixed_allocs, |
4403 | SZ_4K, | 4451 | SZ_4K, |
4404 | GPU_BALLOC_MAX_ORDER, | 4452 | GPU_BALLOC_MAX_ORDER, |
@@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4407 | goto clean_up_ptes; | 4455 | goto clean_up_ptes; |
4408 | 4456 | ||
4409 | /* Make sure to update the user vma size. */ | 4457 | /* Make sure to update the user vma size. */ |
4410 | small_vma_start = g->separate_fixed_allocs; | 4458 | user_vma_start = g->separate_fixed_allocs; |
4411 | } | ||
4412 | |||
4413 | if (small_vma_start < small_vma_limit) { | ||
4414 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | ||
4415 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | ||
4416 | err = __nvgpu_buddy_allocator_init( | ||
4417 | g, | ||
4418 | &vm->vma[gmmu_page_size_small], | ||
4419 | vm, alloc_name, | ||
4420 | small_vma_start, | ||
4421 | small_vma_limit - small_vma_start, | ||
4422 | SZ_4K, | ||
4423 | GPU_BALLOC_MAX_ORDER, | ||
4424 | GPU_ALLOC_GVA_SPACE); | ||
4425 | if (err) | ||
4426 | goto clean_up_ptes; | ||
4427 | } | 4459 | } |
4428 | 4460 | ||
4429 | if (large_vma_start < large_vma_limit) { | 4461 | if (user_vma_start < user_vma_limit) { |
4430 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 4462 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); |
4431 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | 4463 | if (!gk20a_big_pages_possible(vm, user_vma_start, |
4432 | err = __nvgpu_buddy_allocator_init( | 4464 | user_vma_limit - user_vma_start)) |
4433 | g, | 4465 | vm->big_pages = false; |
4434 | &vm->vma[gmmu_page_size_big], | 4466 | |
4435 | vm, alloc_name, | 4467 | err = __nvgpu_buddy_allocator_init(g, &vm->user, |
4436 | large_vma_start, | 4468 | vm, alloc_name, |
4437 | large_vma_limit - large_vma_start, | 4469 | user_vma_start, |
4438 | big_page_size, | 4470 | user_vma_limit - |
4439 | GPU_BALLOC_MAX_ORDER, | 4471 | user_vma_start, |
4440 | GPU_ALLOC_GVA_SPACE); | 4472 | SZ_4K, |
4473 | GPU_BALLOC_MAX_ORDER, | ||
4474 | GPU_ALLOC_GVA_SPACE); | ||
4441 | if (err) | 4475 | if (err) |
4442 | goto clean_up_small_allocator; | 4476 | goto clean_up_ptes; |
4477 | } else { | ||
4478 | /* | ||
4479 | * Make these allocator pointers point to the kernel allocator | ||
4480 | * since we still use the legacy notion of page size to choose | ||
4481 | * the allocator. | ||
4482 | */ | ||
4483 | vm->vma[0] = &vm->kernel; | ||
4484 | vm->vma[1] = &vm->kernel; | ||
4443 | } | 4485 | } |
4444 | 4486 | ||
4445 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", | 4487 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); |
4446 | name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 4488 | if (!gk20a_big_pages_possible(vm, kernel_vma_start, |
4447 | /* | 4489 | kernel_vma_limit - kernel_vma_start)) |
4448 | * kernel reserved VMA is at the end of the aperture | 4490 | vm->big_pages = false; |
4449 | */ | 4491 | |
4450 | err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], | 4492 | err = __nvgpu_buddy_allocator_init(g, &vm->kernel, |
4451 | vm, alloc_name, | 4493 | vm, alloc_name, |
4452 | kernel_vma_start, | 4494 | kernel_vma_start, |
4453 | kernel_vma_limit - kernel_vma_start, | 4495 | kernel_vma_limit - kernel_vma_start, |
@@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4455 | GPU_BALLOC_MAX_ORDER, | 4497 | GPU_BALLOC_MAX_ORDER, |
4456 | GPU_ALLOC_GVA_SPACE); | 4498 | GPU_ALLOC_GVA_SPACE); |
4457 | if (err) | 4499 | if (err) |
4458 | goto clean_up_big_allocator; | 4500 | goto clean_up_user_allocator; |
4459 | 4501 | ||
4460 | vm->mapped_buffers = RB_ROOT; | 4502 | vm->mapped_buffers = RB_ROOT; |
4461 | 4503 | ||
@@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4471 | if (vm->va_limit > SZ_4G) { | 4513 | if (vm->va_limit > SZ_4G) { |
4472 | err = gk20a_init_sema_pool(vm); | 4514 | err = gk20a_init_sema_pool(vm); |
4473 | if (err) | 4515 | if (err) |
4474 | goto clean_up_big_allocator; | 4516 | goto clean_up_user_allocator; |
4475 | } | 4517 | } |
4476 | 4518 | ||
4477 | return 0; | 4519 | return 0; |
4478 | 4520 | ||
4479 | clean_up_big_allocator: | 4521 | clean_up_user_allocator: |
4480 | if (large_vma_start < large_vma_limit) | 4522 | if (user_vma_start < user_vma_limit) |
4481 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4523 | nvgpu_alloc_destroy(&vm->user); |
4482 | clean_up_small_allocator: | ||
4483 | if (small_vma_start < small_vma_limit) | ||
4484 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
4485 | clean_up_ptes: | 4524 | clean_up_ptes: |
4486 | free_gmmu_pages(vm, &vm->pdb); | 4525 | free_gmmu_pages(vm, &vm->pdb); |
4487 | clean_up_pdes: | 4526 | clean_up_pdes: |
@@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, | |||
4523 | vm->as_share = as_share; | 4562 | vm->as_share = as_share; |
4524 | vm->enable_ctag = true; | 4563 | vm->enable_ctag = true; |
4525 | 4564 | ||
4526 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 4565 | snprintf(name, sizeof(name), "as_%d", as_share->id); |
4527 | 4566 | ||
4528 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | 4567 | err = gk20a_init_vm(mm, vm, big_page_size, |
4568 | big_page_size << 10, | ||
4529 | mm->channel.kernel_size, | 4569 | mm->channel.kernel_size, |
4530 | mm->channel.user_size + mm->channel.kernel_size, | 4570 | mm->channel.user_size + mm->channel.kernel_size, |
4531 | !mm->disable_bigpage, userspace_managed, name); | 4571 | !mm->disable_bigpage, userspace_managed, name); |
@@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
4586 | goto clean_up; | 4626 | goto clean_up; |
4587 | } | 4627 | } |
4588 | 4628 | ||
4589 | vma = &vm->vma[pgsz_idx]; | 4629 | vma = vm->vma[pgsz_idx]; |
4590 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { | 4630 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { |
4591 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4631 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4592 | vma = &vm->fixed; | 4632 | vma = &vm->fixed; |
4593 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, | 4633 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, |
4594 | (u64)args->pages * | 4634 | (u64)args->pages * |
4595 | (u64)args->page_size); | 4635 | (u64)args->page_size, |
4636 | args->page_size); | ||
4596 | } else { | 4637 | } else { |
4597 | vaddr_start = nvgpu_alloc(vma, | 4638 | vaddr_start = nvgpu_alloc(vma, |
4598 | (u64)args->pages * | 4639 | (u64)args->pages * |
@@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
4662 | args->pages, args->offset); | 4703 | args->pages, args->offset); |
4663 | 4704 | ||
4664 | /* determine pagesz idx */ | 4705 | /* determine pagesz idx */ |
4665 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? | 4706 | pgsz_idx = __get_pte_size(vm, args->offset, |
4666 | gmmu_page_size_big : gmmu_page_size_small; | 4707 | args->page_size * args->pages); |
4667 | 4708 | ||
4668 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4709 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4669 | vma = &vm->fixed; | 4710 | vma = &vm->fixed; |
4670 | else | 4711 | else |
4671 | vma = &vm->vma[pgsz_idx]; | 4712 | vma = vm->vma[pgsz_idx]; |
4672 | nvgpu_free(vma, args->offset); | 4713 | nvgpu_free(vma, args->offset); |
4673 | 4714 | ||
4674 | mutex_lock(&vm->update_gmmu_lock); | 4715 | mutex_lock(&vm->update_gmmu_lock); |
@@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | |||
4853 | 4894 | ||
4854 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 4895 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
4855 | { | 4896 | { |
4856 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); | 4897 | if (nvgpu_alloc_initialized(&vm->kernel)) |
4857 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) | 4898 | nvgpu_alloc_destroy(&vm->kernel); |
4858 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4899 | if (nvgpu_alloc_initialized(&vm->user)) |
4859 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) | 4900 | nvgpu_alloc_destroy(&vm->user); |
4860 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
4861 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4901 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4862 | nvgpu_alloc_destroy(&vm->fixed); | 4902 | nvgpu_alloc_destroy(&vm->fixed); |
4863 | 4903 | ||
@@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
4908 | 4948 | ||
4909 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 4949 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
4910 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 4950 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
4911 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, | 4951 | gk20a_init_vm(mm, vm, |
4912 | mm->bar1.aperture_size - SZ_4K, | 4952 | big_page_size, |
4913 | mm->bar1.aperture_size, false, false, "bar1"); | 4953 | SZ_4K, /* Low hole */ |
4954 | mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */ | ||
4955 | mm->bar1.aperture_size, | ||
4956 | true, false, | ||
4957 | "bar1"); | ||
4914 | 4958 | ||
4915 | err = gk20a_alloc_inst_block(g, inst_block); | 4959 | err = gk20a_alloc_inst_block(g, inst_block); |
4916 | if (err) | 4960 | if (err) |
@@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
4932 | struct gk20a *g = gk20a_from_mm(mm); | 4976 | struct gk20a *g = gk20a_from_mm(mm); |
4933 | struct mem_desc *inst_block = &mm->pmu.inst_block; | 4977 | struct mem_desc *inst_block = &mm->pmu.inst_block; |
4934 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | 4978 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; |
4979 | u32 low_hole, aperture_size; | ||
4980 | |||
4981 | /* | ||
4982 | * No user region - so we will pass that as zero sized. | ||
4983 | */ | ||
4984 | low_hole = SZ_4K * 16; | ||
4985 | aperture_size = GK20A_PMU_VA_SIZE * 2; | ||
4935 | 4986 | ||
4936 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | 4987 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; |
4937 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 4988 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
4938 | 4989 | ||
4939 | gk20a_init_vm(mm, vm, big_page_size, | 4990 | gk20a_init_vm(mm, vm, big_page_size, |
4940 | SZ_4K * 16, GK20A_PMU_VA_SIZE, | 4991 | low_hole, |
4941 | GK20A_PMU_VA_SIZE * 2, false, false, | 4992 | aperture_size - low_hole, |
4993 | aperture_size, | ||
4994 | true, | ||
4995 | false, | ||
4942 | "system"); | 4996 | "system"); |
4943 | 4997 | ||
4944 | err = gk20a_alloc_inst_block(g, inst_block); | 4998 | err = gk20a_alloc_inst_block(g, inst_block); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 5ef8ae25..394d1d25 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -270,11 +270,13 @@ struct vm_gk20a { | |||
270 | 270 | ||
271 | struct gk20a_mm_entry pdb; | 271 | struct gk20a_mm_entry pdb; |
272 | 272 | ||
273 | struct nvgpu_allocator vma[gmmu_nr_page_sizes]; | ||
274 | |||
275 | /* If necessary, split fixed from non-fixed. */ | 273 | /* If necessary, split fixed from non-fixed. */ |
276 | struct nvgpu_allocator fixed; | 274 | struct nvgpu_allocator fixed; |
277 | 275 | ||
276 | struct nvgpu_allocator *vma[gmmu_nr_page_sizes]; | ||
277 | struct nvgpu_allocator kernel; | ||
278 | struct nvgpu_allocator user; | ||
279 | |||
278 | struct rb_root mapped_buffers; | 280 | struct rb_root mapped_buffers; |
279 | 281 | ||
280 | struct list_head reserved_va_list; | 282 | struct list_head reserved_va_list; |
@@ -425,7 +427,7 @@ static inline int bar1_aperture_size_mb_gk20a(void) | |||
425 | return 16; /* 16MB is more than enough atm. */ | 427 | return 16; /* 16MB is more than enough atm. */ |
426 | } | 428 | } |
427 | 429 | ||
428 | /*The maximum GPU VA range supported */ | 430 | /* The maximum GPU VA range supported */ |
429 | #define NV_GMMU_VA_RANGE 38 | 431 | #define NV_GMMU_VA_RANGE 38 |
430 | 432 | ||
431 | /* The default userspace-visible GPU VA size */ | 433 | /* The default userspace-visible GPU VA size */ |
@@ -434,43 +436,39 @@ static inline int bar1_aperture_size_mb_gk20a(void) | |||
434 | /* The default kernel-reserved GPU VA size */ | 436 | /* The default kernel-reserved GPU VA size */ |
435 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) | 437 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) |
436 | 438 | ||
437 | /* | 439 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, |
438 | * The bottom 16GB of the space are used for small pages, the remaining high | 440 | u64 base, u64 size); |
439 | * memory is for large pages. | ||
440 | */ | ||
441 | static inline u64 __nv_gmmu_va_small_page_limit(void) | ||
442 | { | ||
443 | return ((u64)SZ_1G * 16); | ||
444 | } | ||
445 | |||
446 | static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) | ||
447 | { | ||
448 | struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big]; | ||
449 | |||
450 | if (!vm->big_pages) | ||
451 | return 0; | ||
452 | |||
453 | return addr >= nvgpu_alloc_base(a) && | ||
454 | addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a); | ||
455 | } | ||
456 | 441 | ||
457 | /* | 442 | /* |
458 | * This determines the PTE size for a given alloc. Used by both the GVA space | 443 | * This determines the PTE size for a given alloc. Used by both the GVA space |
459 | * allocator and the mm core code so that agreement can be reached on how to | 444 | * allocator and the mm core code so that agreement can be reached on how to |
460 | * map allocations. | 445 | * map allocations. |
446 | * | ||
447 | * The page size of a buffer is this: | ||
448 | * | ||
449 | * o If the VM doesn't support large pages then obviously small pages | ||
450 | * must be used. | ||
451 | * o If the base address is non-zero (fixed address map): | ||
452 | * - Attempt to find a reserved memory area and use the page size | ||
453 | * based on that. | ||
454 | * - If no reserved page size is available, default to small pages. | ||
455 | * o If the base is zero: | ||
456 | * - If the size is greater than or equal to the big page size, use big | ||
457 | * pages. | ||
458 | * - Otherwise use small pages. | ||
461 | */ | 459 | */ |
462 | static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, | 460 | static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, |
463 | u64 base, u64 size) | 461 | u64 base, u64 size) |
464 | { | 462 | { |
465 | /* | 463 | if (!vm->big_pages) |
466 | * Currently userspace is not ready for a true unified address space. | ||
467 | * As a result, even though the allocator supports mixed address spaces | ||
468 | * the address spaces must be treated as separate for now. | ||
469 | */ | ||
470 | if (__nv_gmmu_va_is_big_page_region(vm, base)) | ||
471 | return gmmu_page_size_big; | ||
472 | else | ||
473 | return gmmu_page_size_small; | 464 | return gmmu_page_size_small; |
465 | |||
466 | if (base) | ||
467 | return __get_pte_size_fixed_map(vm, base, size); | ||
468 | |||
469 | if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) | ||
470 | return gmmu_page_size_big; | ||
471 | return gmmu_page_size_small; | ||
474 | } | 472 | } |
475 | 473 | ||
476 | /* | 474 | /* |
@@ -797,6 +795,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, | |||
797 | 795 | ||
798 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); | 796 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); |
799 | 797 | ||
798 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); | ||
799 | |||
800 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | 800 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; |
801 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | 801 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; |
802 | 802 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h index dee9b562..d5a90c87 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h | |||
@@ -41,11 +41,15 @@ struct nvgpu_allocator_ops { | |||
41 | * regular and fixed allocations then free_fixed() does not need to | 41 | * regular and fixed allocations then free_fixed() does not need to |
42 | * be implemented. This behavior exists for legacy reasons and should | 42 | * be implemented. This behavior exists for legacy reasons and should |
43 | * not be propagated to new allocators. | 43 | * not be propagated to new allocators. |
44 | * | ||
45 | * For allocators where the @page_size field is not applicable it can | ||
46 | * be left as 0. Otherwise a valid page size should be passed (4k or | ||
47 | * what the large page size is). | ||
44 | */ | 48 | */ |
45 | u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, | 49 | u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, |
46 | u64 base, u64 len); | 50 | u64 base, u64 len, u32 page_size); |
47 | void (*free_fixed)(struct nvgpu_allocator *allocator, | 51 | void (*free_fixed)(struct nvgpu_allocator *allocator, |
48 | u64 base, u64 len); | 52 | u64 base, u64 len); |
49 | 53 | ||
50 | /* | 54 | /* |
51 | * Allow allocators to reserve space for carveouts. | 55 | * Allow allocators to reserve space for carveouts. |
@@ -213,7 +217,8 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, | |||
213 | u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); | 217 | u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); |
214 | void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); | 218 | void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); |
215 | 219 | ||
216 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); | 220 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len, |
221 | u32 page_size); | ||
217 | void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); | 222 | void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); |
218 | 223 | ||
219 | int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, | 224 | int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, |
@@ -298,5 +303,8 @@ void nvgpu_alloc_debugfs_init(struct device *dev); | |||
298 | } while (0) | 303 | } while (0) |
299 | 304 | ||
300 | #endif | 305 | #endif |
306 | #define balloc_pr(alloctor, format, arg...) \ | ||
307 | pr_info("%-25s %25s() " format, \ | ||
308 | alloctor->name, __func__, ##arg) | ||
301 | 309 | ||
302 | #endif /* NVGPU_ALLOCATOR_H */ | 310 | #endif /* NVGPU_ALLOCATOR_H */ |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 66c9344b..a21a020d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) | |||
227 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 227 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
228 | WARN_ON(err || msg.ret); | 228 | WARN_ON(err || msg.ret); |
229 | 229 | ||
230 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); | 230 | if (nvgpu_alloc_initialized(&vm->kernel)) |
231 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) | 231 | nvgpu_alloc_destroy(&vm->kernel); |
232 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | 232 | if (nvgpu_alloc_initialized(&vm->user)) |
233 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) | 233 | nvgpu_alloc_destroy(&vm->user); |
234 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 234 | if (nvgpu_alloc_initialized(&vm->fixed)) |
235 | nvgpu_alloc_destroy(&vm->fixed); | ||
235 | 236 | ||
236 | mutex_unlock(&vm->update_gmmu_lock); | 237 | mutex_unlock(&vm->update_gmmu_lock); |
237 | 238 | ||
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
273 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | 274 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; |
274 | struct mm_gk20a *mm = &g->mm; | 275 | struct mm_gk20a *mm = &g->mm; |
275 | struct vm_gk20a *vm; | 276 | struct vm_gk20a *vm; |
276 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 277 | u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; |
277 | kernel_vma_start, kernel_vma_limit; | ||
278 | char name[32]; | 278 | char name[32]; |
279 | int err, i; | 279 | int err, i; |
280 | const bool userspace_managed = | 280 | const bool userspace_managed = |
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
306 | vm->mm = mm; | 306 | vm->mm = mm; |
307 | vm->as_share = as_share; | 307 | vm->as_share = as_share; |
308 | 308 | ||
309 | /* Set up vma pointers. */ | ||
310 | vm->vma[0] = &vm->user; | ||
311 | vm->vma[1] = &vm->user; | ||
312 | vm->vma[2] = &vm->kernel; | ||
313 | |||
309 | for (i = 0; i < gmmu_nr_page_sizes; i++) | 314 | for (i = 0; i < gmmu_nr_page_sizes; i++) |
310 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; | 315 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; |
311 | 316 | ||
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
328 | vm->handle = p->handle; | 333 | vm->handle = p->handle; |
329 | 334 | ||
330 | /* setup vma limits */ | 335 | /* setup vma limits */ |
331 | small_vma_start = vm->va_start; | 336 | user_vma_start = vm->va_start; |
332 | 337 | user_vma_limit = vm->va_limit - mm->channel.kernel_size; | |
333 | if (vm->big_pages) { | ||
334 | /* First 16GB of the address space goes towards small | ||
335 | * pages. The kernel reserved pages are at the end. | ||
336 | * What ever remains is allocated to large pages. | ||
337 | */ | ||
338 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
339 | large_vma_start = small_vma_limit; | ||
340 | large_vma_limit = vm->va_limit - mm->channel.kernel_size; | ||
341 | } else { | ||
342 | small_vma_limit = vm->va_limit - mm->channel.kernel_size; | ||
343 | large_vma_start = 0; | ||
344 | large_vma_limit = 0; | ||
345 | } | ||
346 | 338 | ||
347 | kernel_vma_start = vm->va_limit - mm->channel.kernel_size; | 339 | kernel_vma_start = vm->va_limit - mm->channel.kernel_size; |
348 | kernel_vma_limit = vm->va_limit; | 340 | kernel_vma_limit = vm->va_limit; |
349 | 341 | ||
350 | gk20a_dbg_info( | 342 | gk20a_dbg_info( |
351 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | 343 | "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", |
352 | small_vma_start, small_vma_limit, | 344 | user_vma_start, user_vma_limit, |
353 | large_vma_start, large_vma_limit, | ||
354 | kernel_vma_start, kernel_vma_limit); | 345 | kernel_vma_start, kernel_vma_limit); |
355 | 346 | ||
356 | /* check that starts do not exceed limits */ | 347 | WARN_ON(user_vma_start > user_vma_limit); |
357 | WARN_ON(small_vma_start > small_vma_limit); | ||
358 | WARN_ON(large_vma_start > large_vma_limit); | ||
359 | /* kernel_vma must also be non-zero */ | ||
360 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | 348 | WARN_ON(kernel_vma_start >= kernel_vma_limit); |
361 | 349 | ||
362 | if (small_vma_start > small_vma_limit || | 350 | if (user_vma_start > user_vma_limit || |
363 | large_vma_start > large_vma_limit || | ||
364 | kernel_vma_start >= kernel_vma_limit) { | 351 | kernel_vma_start >= kernel_vma_limit) { |
365 | err = -EINVAL; | 352 | err = -EINVAL; |
366 | goto clean_up_share; | 353 | goto clean_up_share; |
367 | } | 354 | } |
368 | 355 | ||
369 | if (small_vma_start < small_vma_limit) { | 356 | if (user_vma_start < user_vma_limit) { |
370 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 357 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, |
371 | gmmu_page_sizes[gmmu_page_size_small] >> 10); | 358 | gmmu_page_sizes[gmmu_page_size_small] >> 10); |
359 | if (!gk20a_big_pages_possible(vm, user_vma_start, | ||
360 | user_vma_limit - user_vma_start)) | ||
361 | vm->big_pages = false; | ||
372 | 362 | ||
373 | err = __nvgpu_buddy_allocator_init( | 363 | err = __nvgpu_buddy_allocator_init( |
374 | g, | 364 | g, |
375 | &vm->vma[gmmu_page_size_small], | 365 | vm->vma[gmmu_page_size_small], |
376 | vm, name, | 366 | vm, name, |
377 | small_vma_start, | 367 | user_vma_start, |
378 | small_vma_limit - small_vma_start, | 368 | user_vma_limit - user_vma_start, |
379 | SZ_4K, | 369 | SZ_4K, |
380 | GPU_BALLOC_MAX_ORDER, | 370 | GPU_BALLOC_MAX_ORDER, |
381 | GPU_ALLOC_GVA_SPACE); | 371 | GPU_ALLOC_GVA_SPACE); |
382 | if (err) | 372 | if (err) |
383 | goto clean_up_share; | 373 | goto clean_up_share; |
384 | } | 374 | } else { |
385 | 375 | /* | |
386 | if (large_vma_start < large_vma_limit) { | 376 | * Make these allocator pointers point to the kernel allocator |
387 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 377 | * since we still use the legacy notion of page size to choose |
388 | gmmu_page_sizes[gmmu_page_size_big] >> 10); | 378 | * the allocator. |
389 | err = __nvgpu_buddy_allocator_init( | 379 | */ |
390 | g, | 380 | vm->vma[0] = &vm->kernel; |
391 | &vm->vma[gmmu_page_size_big], | 381 | vm->vma[1] = &vm->kernel; |
392 | vm, name, | ||
393 | large_vma_start, | ||
394 | large_vma_limit - large_vma_start, | ||
395 | big_page_size, | ||
396 | GPU_BALLOC_MAX_ORDER, | ||
397 | GPU_ALLOC_GVA_SPACE); | ||
398 | if (err) | ||
399 | goto clean_up_small_allocator; | ||
400 | } | 382 | } |
401 | 383 | ||
402 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", | 384 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", |
403 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 385 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
386 | if (!gk20a_big_pages_possible(vm, kernel_vma_start, | ||
387 | kernel_vma_limit - kernel_vma_start)) | ||
388 | vm->big_pages = false; | ||
389 | |||
404 | /* | 390 | /* |
405 | * kernel reserved VMA is at the end of the aperture | 391 | * kernel reserved VMA is at the end of the aperture |
406 | */ | 392 | */ |
407 | err = __nvgpu_buddy_allocator_init( | 393 | err = __nvgpu_buddy_allocator_init( |
408 | g, | 394 | g, |
409 | &vm->vma[gmmu_page_size_kernel], | 395 | vm->vma[gmmu_page_size_kernel], |
410 | vm, name, | 396 | vm, name, |
411 | kernel_vma_start, | 397 | kernel_vma_start, |
412 | kernel_vma_limit - kernel_vma_start, | 398 | kernel_vma_limit - kernel_vma_start, |
413 | SZ_4K, | 399 | SZ_4K, |
414 | GPU_BALLOC_MAX_ORDER, | 400 | GPU_BALLOC_MAX_ORDER, |
415 | GPU_ALLOC_GVA_SPACE); | 401 | GPU_ALLOC_GVA_SPACE); |
416 | if (err) | 402 | if (err) |
417 | goto clean_up_big_allocator; | 403 | goto clean_up_user_allocator; |
418 | 404 | ||
419 | vm->mapped_buffers = RB_ROOT; | 405 | vm->mapped_buffers = RB_ROOT; |
420 | 406 | ||
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
426 | 412 | ||
427 | return 0; | 413 | return 0; |
428 | 414 | ||
429 | clean_up_big_allocator: | 415 | clean_up_user_allocator: |
430 | if (large_vma_start < large_vma_limit) | 416 | if (user_vma_start < user_vma_limit) |
431 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 417 | nvgpu_alloc_destroy(&vm->user); |
432 | clean_up_small_allocator: | ||
433 | if (small_vma_start < small_vma_limit) | ||
434 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
435 | clean_up_share: | 418 | clean_up_share: |
436 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; | 419 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; |
437 | msg.handle = vgpu_get_handle(g); | 420 | msg.handle = vgpu_get_handle(g); |