diff options
author | Alex Waterman <alexw@nvidia.com> | 2015-06-17 13:31:08 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-31 19:23:07 -0500 |
commit | d630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch) | |
tree | 5b9cad58f585424a64e7b675d503a87bbcada254 /drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |
parent | 793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff) |
gpu: nvgpu: Unify the small and large page address spaces
The basic structure of this patch is to make the small page allocator
and the large page allocator into pointers (where they used to be just
structs). Then assign each of those pointers to the same actual
allocator since the buddy allocator has supported mixed page sizes
since its inception.
For the rest of the driver some changes had to be made in order to
actually support mixed pages in a single address space.
1. Unifying the allocation page size determination
Since the allocation and map operations happen at distinct
times both mapping and allocation of GVA space must agree
on page size. This is because the allocation has to separate
allocations into separate PDEs to avoid the necessity of
supporting mixed PDEs.
To this end a function __get_pte_size() was introduced which
is used both by the balloc code and the core GPU MM code. It
determines page size based only on the length of the mapping/
allocation.
2. Fixed address allocation + page size
Similar to regular mappings/GVA allocations fixed address
mapping page size determination had to be modified. In the
past the address of the mapping determined page size since
the address space split was by address (low addresses were
small pages, high addresses large pages). Since that is no
longer the case the page size field in the reserve memory
ioctl is now honored by the mapping code. When, for instance,
CUDA makes a memory reservation it specifies small or large
pages. When CUDA requests mappings to be made within that
address range the page size is then looked up in the reserved
memory struct.
Fixed address reservations were also modified to now always
allocate at a PDE granularity (64M or 128M depending on
large page size. This prevents non-fixed allocations from
ending up in the same PDE and causing kernel panics or GMMU
faults.
3. The rest...
The rest of the changes are just by products of the above.
Lots of places required minor updates to use a pointer to
the GVA allocator struct instead of the struct itself.
Lastly, this change is not truly complete. More work remains to be
done in order to fully remove the notion that there was such a thing
as separate address spaces for different page sizes. Basically after
this patch what remains is cleanup and proper documentation.
Bug 1396644
Bug 1729947
Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1265300
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/buddy_allocator.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 66 |
1 files changed, 45 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 39a53801..eee0b634 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |||
@@ -484,8 +484,9 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, | |||
484 | bud = list_first_entry(balloc_get_order_list(a, order), | 484 | bud = list_first_entry(balloc_get_order_list(a, order), |
485 | struct nvgpu_buddy, buddy_entry); | 485 | struct nvgpu_buddy, buddy_entry); |
486 | 486 | ||
487 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | 487 | if (pte_size != BALLOC_PTE_SIZE_ANY && |
488 | bud->pte_size != pte_size) | 488 | pte_size != bud->pte_size && |
489 | bud->pte_size != BALLOC_PTE_SIZE_ANY) | ||
489 | return NULL; | 490 | return NULL; |
490 | 491 | ||
491 | return bud; | 492 | return bud; |
@@ -643,7 +644,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, | |||
643 | * necessary for this buddy to exist as well. | 644 | * necessary for this buddy to exist as well. |
644 | */ | 645 | */ |
645 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( | 646 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( |
646 | struct nvgpu_buddy_allocator *a, u64 base, u64 order) | 647 | struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size) |
647 | { | 648 | { |
648 | struct nvgpu_buddy *bud = NULL; | 649 | struct nvgpu_buddy *bud = NULL; |
649 | struct list_head *order_list; | 650 | struct list_head *order_list; |
@@ -664,6 +665,20 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
664 | order_list = balloc_get_order_list(a, cur_order); | 665 | order_list = balloc_get_order_list(a, cur_order); |
665 | list_for_each_entry(bud, order_list, buddy_entry) { | 666 | list_for_each_entry(bud, order_list, buddy_entry) { |
666 | if (bud->start == cur_base) { | 667 | if (bud->start == cur_base) { |
668 | /* | ||
669 | * Make sure page size matches if it's smaller | ||
670 | * than a PDE sized buddy. | ||
671 | */ | ||
672 | if (bud->order <= a->pte_blk_order && | ||
673 | bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
674 | bud->pte_size != pte_size) { | ||
675 | /* Welp, that's the end of that. */ | ||
676 | alloc_dbg(balloc_owner(a), | ||
677 | "Fixed buddy PTE " | ||
678 | "size mismatch!\n"); | ||
679 | return NULL; | ||
680 | } | ||
681 | |||
667 | found = 1; | 682 | found = 1; |
668 | break; | 683 | break; |
669 | } | 684 | } |
@@ -683,7 +698,10 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
683 | 698 | ||
684 | /* Split this buddy as necessary until we get the target buddy. */ | 699 | /* Split this buddy as necessary until we get the target buddy. */ |
685 | while (bud->start != base || bud->order != order) { | 700 | while (bud->start != base || bud->order != order) { |
686 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | 701 | if (balloc_split_buddy(a, bud, pte_size)) { |
702 | alloc_dbg(balloc_owner(a), | ||
703 | "split buddy failed? {0x%llx, %llu}\n", | ||
704 | bud->start, bud->order); | ||
687 | balloc_coalesce(a, bud); | 705 | balloc_coalesce(a, bud); |
688 | return NULL; | 706 | return NULL; |
689 | } | 707 | } |
@@ -700,7 +718,7 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
700 | 718 | ||
701 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | 719 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, |
702 | struct nvgpu_fixed_alloc *falloc, | 720 | struct nvgpu_fixed_alloc *falloc, |
703 | u64 base, u64 len) | 721 | u64 base, u64 len, int pte_size) |
704 | { | 722 | { |
705 | u64 shifted_base, inc_base; | 723 | u64 shifted_base, inc_base; |
706 | u64 align_order; | 724 | u64 align_order; |
@@ -731,7 +749,7 @@ static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | |||
731 | 749 | ||
732 | bud = __balloc_make_fixed_buddy(a, | 750 | bud = __balloc_make_fixed_buddy(a, |
733 | balloc_base_unshift(a, inc_base), | 751 | balloc_base_unshift(a, inc_base), |
734 | align_order); | 752 | align_order, pte_size); |
735 | if (!bud) { | 753 | if (!bud) { |
736 | alloc_dbg(balloc_owner(a), | 754 | alloc_dbg(balloc_owner(a), |
737 | "Fixed buddy failed: {0x%llx, %llu}!\n", | 755 | "Fixed buddy failed: {0x%llx, %llu}!\n", |
@@ -817,17 +835,8 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | |||
817 | return 0; | 835 | return 0; |
818 | } | 836 | } |
819 | 837 | ||
820 | /* | ||
821 | * For now pass the base address of the allocator's region to | ||
822 | * __get_pte_size(). This ensures we get the right page size for | ||
823 | * the alloc but we don't have to know what the real address is | ||
824 | * going to be quite yet. | ||
825 | * | ||
826 | * TODO: once userspace supports a unified address space pass 0 for | ||
827 | * the base. This will make only 'len' affect the PTE size. | ||
828 | */ | ||
829 | if (a->flags & GPU_ALLOC_GVA_SPACE) | 838 | if (a->flags & GPU_ALLOC_GVA_SPACE) |
830 | pte_size = __get_pte_size(a->vm, a->base, len); | 839 | pte_size = __get_pte_size(a->vm, 0, len); |
831 | else | 840 | else |
832 | pte_size = BALLOC_PTE_SIZE_ANY; | 841 | pte_size = BALLOC_PTE_SIZE_ANY; |
833 | 842 | ||
@@ -858,8 +867,9 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | |||
858 | * Requires @__a to be locked. | 867 | * Requires @__a to be locked. |
859 | */ | 868 | */ |
860 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | 869 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, |
861 | u64 base, u64 len) | 870 | u64 base, u64 len, u32 page_size) |
862 | { | 871 | { |
872 | int pte_size = BALLOC_PTE_SIZE_ANY; | ||
863 | u64 ret, real_bytes = 0; | 873 | u64 ret, real_bytes = 0; |
864 | struct nvgpu_buddy *bud; | 874 | struct nvgpu_buddy *bud; |
865 | struct nvgpu_fixed_alloc *falloc = NULL; | 875 | struct nvgpu_fixed_alloc *falloc = NULL; |
@@ -874,6 +884,16 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | |||
874 | if (len == 0) | 884 | if (len == 0) |
875 | goto fail; | 885 | goto fail; |
876 | 886 | ||
887 | /* Check that the page size is valid. */ | ||
888 | if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) { | ||
889 | if (page_size == a->vm->big_page_size) | ||
890 | pte_size = gmmu_page_size_big; | ||
891 | else if (page_size == SZ_4K) | ||
892 | pte_size = gmmu_page_size_small; | ||
893 | else | ||
894 | goto fail; | ||
895 | } | ||
896 | |||
877 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | 897 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); |
878 | if (!falloc) | 898 | if (!falloc) |
879 | goto fail; | 899 | goto fail; |
@@ -889,7 +909,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | |||
889 | goto fail_unlock; | 909 | goto fail_unlock; |
890 | } | 910 | } |
891 | 911 | ||
892 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | 912 | ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size); |
893 | if (!ret) { | 913 | if (!ret) { |
894 | alloc_dbg(balloc_owner(a), | 914 | alloc_dbg(balloc_owner(a), |
895 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | 915 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", |
@@ -927,13 +947,13 @@ fail: | |||
927 | * Please do not use this function unless _absolutely_ necessary. | 947 | * Please do not use this function unless _absolutely_ necessary. |
928 | */ | 948 | */ |
929 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | 949 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, |
930 | u64 base, u64 len) | 950 | u64 base, u64 len, u32 page_size) |
931 | { | 951 | { |
932 | u64 alloc; | 952 | u64 alloc; |
933 | struct nvgpu_buddy_allocator *a = __a->priv; | 953 | struct nvgpu_buddy_allocator *a = __a->priv; |
934 | 954 | ||
935 | alloc_lock(__a); | 955 | alloc_lock(__a); |
936 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); | 956 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size); |
937 | a->alloc_made = 1; | 957 | a->alloc_made = 1; |
938 | alloc_unlock(__a); | 958 | alloc_unlock(__a); |
939 | 959 | ||
@@ -1034,7 +1054,7 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a, | |||
1034 | } | 1054 | } |
1035 | 1055 | ||
1036 | /* Should not be possible to fail... */ | 1056 | /* Should not be possible to fail... */ |
1037 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); | 1057 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0); |
1038 | if (!addr) { | 1058 | if (!addr) { |
1039 | err = -ENOMEM; | 1059 | err = -ENOMEM; |
1040 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); | 1060 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); |
@@ -1310,6 +1330,10 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
1310 | alloc_dbg(__a, " base 0x%llx\n", a->base); | 1330 | alloc_dbg(__a, " base 0x%llx\n", a->base); |
1311 | alloc_dbg(__a, " size 0x%llx\n", a->length); | 1331 | alloc_dbg(__a, " size 0x%llx\n", a->length); |
1312 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | 1332 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); |
1333 | if (flags & GPU_ALLOC_GVA_SPACE) | ||
1334 | alloc_dbg(balloc_owner(a), | ||
1335 | " pde_size 0x%llx\n", | ||
1336 | balloc_order_to_len(a, a->pte_blk_order)); | ||
1313 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | 1337 | alloc_dbg(__a, " max_order %llu\n", a->max_order); |
1314 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | 1338 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); |
1315 | 1339 | ||