diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 66 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 282 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 60 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/allocator.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 115 |
9 files changed, 318 insertions, 242 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c index 6f267c85..5042980f 100644 --- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | |||
@@ -55,8 +55,11 @@ static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a) | |||
55 | return ba->base + ba->length; | 55 | return ba->base + ba->length; |
56 | } | 56 | } |
57 | 57 | ||
58 | /* | ||
59 | * @page_size is ignored. | ||
60 | */ | ||
58 | static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, | 61 | static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, |
59 | u64 base, u64 len) | 62 | u64 base, u64 len, u32 page_size) |
60 | { | 63 | { |
61 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | 64 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); |
62 | u64 blks, offs, ret; | 65 | u64 blks, offs, ret; |
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 39a53801..eee0b634 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |||
@@ -484,8 +484,9 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, | |||
484 | bud = list_first_entry(balloc_get_order_list(a, order), | 484 | bud = list_first_entry(balloc_get_order_list(a, order), |
485 | struct nvgpu_buddy, buddy_entry); | 485 | struct nvgpu_buddy, buddy_entry); |
486 | 486 | ||
487 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | 487 | if (pte_size != BALLOC_PTE_SIZE_ANY && |
488 | bud->pte_size != pte_size) | 488 | pte_size != bud->pte_size && |
489 | bud->pte_size != BALLOC_PTE_SIZE_ANY) | ||
489 | return NULL; | 490 | return NULL; |
490 | 491 | ||
491 | return bud; | 492 | return bud; |
@@ -643,7 +644,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, | |||
643 | * necessary for this buddy to exist as well. | 644 | * necessary for this buddy to exist as well. |
644 | */ | 645 | */ |
645 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( | 646 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( |
646 | struct nvgpu_buddy_allocator *a, u64 base, u64 order) | 647 | struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size) |
647 | { | 648 | { |
648 | struct nvgpu_buddy *bud = NULL; | 649 | struct nvgpu_buddy *bud = NULL; |
649 | struct list_head *order_list; | 650 | struct list_head *order_list; |
@@ -664,6 +665,20 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
664 | order_list = balloc_get_order_list(a, cur_order); | 665 | order_list = balloc_get_order_list(a, cur_order); |
665 | list_for_each_entry(bud, order_list, buddy_entry) { | 666 | list_for_each_entry(bud, order_list, buddy_entry) { |
666 | if (bud->start == cur_base) { | 667 | if (bud->start == cur_base) { |
668 | /* | ||
669 | * Make sure page size matches if it's smaller | ||
670 | * than a PDE sized buddy. | ||
671 | */ | ||
672 | if (bud->order <= a->pte_blk_order && | ||
673 | bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
674 | bud->pte_size != pte_size) { | ||
675 | /* Welp, that's the end of that. */ | ||
676 | alloc_dbg(balloc_owner(a), | ||
677 | "Fixed buddy PTE " | ||
678 | "size mismatch!\n"); | ||
679 | return NULL; | ||
680 | } | ||
681 | |||
667 | found = 1; | 682 | found = 1; |
668 | break; | 683 | break; |
669 | } | 684 | } |
@@ -683,7 +698,10 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
683 | 698 | ||
684 | /* Split this buddy as necessary until we get the target buddy. */ | 699 | /* Split this buddy as necessary until we get the target buddy. */ |
685 | while (bud->start != base || bud->order != order) { | 700 | while (bud->start != base || bud->order != order) { |
686 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | 701 | if (balloc_split_buddy(a, bud, pte_size)) { |
702 | alloc_dbg(balloc_owner(a), | ||
703 | "split buddy failed? {0x%llx, %llu}\n", | ||
704 | bud->start, bud->order); | ||
687 | balloc_coalesce(a, bud); | 705 | balloc_coalesce(a, bud); |
688 | return NULL; | 706 | return NULL; |
689 | } | 707 | } |
@@ -700,7 +718,7 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( | |||
700 | 718 | ||
701 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | 719 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, |
702 | struct nvgpu_fixed_alloc *falloc, | 720 | struct nvgpu_fixed_alloc *falloc, |
703 | u64 base, u64 len) | 721 | u64 base, u64 len, int pte_size) |
704 | { | 722 | { |
705 | u64 shifted_base, inc_base; | 723 | u64 shifted_base, inc_base; |
706 | u64 align_order; | 724 | u64 align_order; |
@@ -731,7 +749,7 @@ static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | |||
731 | 749 | ||
732 | bud = __balloc_make_fixed_buddy(a, | 750 | bud = __balloc_make_fixed_buddy(a, |
733 | balloc_base_unshift(a, inc_base), | 751 | balloc_base_unshift(a, inc_base), |
734 | align_order); | 752 | align_order, pte_size); |
735 | if (!bud) { | 753 | if (!bud) { |
736 | alloc_dbg(balloc_owner(a), | 754 | alloc_dbg(balloc_owner(a), |
737 | "Fixed buddy failed: {0x%llx, %llu}!\n", | 755 | "Fixed buddy failed: {0x%llx, %llu}!\n", |
@@ -817,17 +835,8 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | |||
817 | return 0; | 835 | return 0; |
818 | } | 836 | } |
819 | 837 | ||
820 | /* | ||
821 | * For now pass the base address of the allocator's region to | ||
822 | * __get_pte_size(). This ensures we get the right page size for | ||
823 | * the alloc but we don't have to know what the real address is | ||
824 | * going to be quite yet. | ||
825 | * | ||
826 | * TODO: once userspace supports a unified address space pass 0 for | ||
827 | * the base. This will make only 'len' affect the PTE size. | ||
828 | */ | ||
829 | if (a->flags & GPU_ALLOC_GVA_SPACE) | 838 | if (a->flags & GPU_ALLOC_GVA_SPACE) |
830 | pte_size = __get_pte_size(a->vm, a->base, len); | 839 | pte_size = __get_pte_size(a->vm, 0, len); |
831 | else | 840 | else |
832 | pte_size = BALLOC_PTE_SIZE_ANY; | 841 | pte_size = BALLOC_PTE_SIZE_ANY; |
833 | 842 | ||
@@ -858,8 +867,9 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | |||
858 | * Requires @__a to be locked. | 867 | * Requires @__a to be locked. |
859 | */ | 868 | */ |
860 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | 869 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, |
861 | u64 base, u64 len) | 870 | u64 base, u64 len, u32 page_size) |
862 | { | 871 | { |
872 | int pte_size = BALLOC_PTE_SIZE_ANY; | ||
863 | u64 ret, real_bytes = 0; | 873 | u64 ret, real_bytes = 0; |
864 | struct nvgpu_buddy *bud; | 874 | struct nvgpu_buddy *bud; |
865 | struct nvgpu_fixed_alloc *falloc = NULL; | 875 | struct nvgpu_fixed_alloc *falloc = NULL; |
@@ -874,6 +884,16 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | |||
874 | if (len == 0) | 884 | if (len == 0) |
875 | goto fail; | 885 | goto fail; |
876 | 886 | ||
887 | /* Check that the page size is valid. */ | ||
888 | if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) { | ||
889 | if (page_size == a->vm->big_page_size) | ||
890 | pte_size = gmmu_page_size_big; | ||
891 | else if (page_size == SZ_4K) | ||
892 | pte_size = gmmu_page_size_small; | ||
893 | else | ||
894 | goto fail; | ||
895 | } | ||
896 | |||
877 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | 897 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); |
878 | if (!falloc) | 898 | if (!falloc) |
879 | goto fail; | 899 | goto fail; |
@@ -889,7 +909,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | |||
889 | goto fail_unlock; | 909 | goto fail_unlock; |
890 | } | 910 | } |
891 | 911 | ||
892 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | 912 | ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size); |
893 | if (!ret) { | 913 | if (!ret) { |
894 | alloc_dbg(balloc_owner(a), | 914 | alloc_dbg(balloc_owner(a), |
895 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | 915 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", |
@@ -927,13 +947,13 @@ fail: | |||
927 | * Please do not use this function unless _absolutely_ necessary. | 947 | * Please do not use this function unless _absolutely_ necessary. |
928 | */ | 948 | */ |
929 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | 949 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, |
930 | u64 base, u64 len) | 950 | u64 base, u64 len, u32 page_size) |
931 | { | 951 | { |
932 | u64 alloc; | 952 | u64 alloc; |
933 | struct nvgpu_buddy_allocator *a = __a->priv; | 953 | struct nvgpu_buddy_allocator *a = __a->priv; |
934 | 954 | ||
935 | alloc_lock(__a); | 955 | alloc_lock(__a); |
936 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); | 956 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size); |
937 | a->alloc_made = 1; | 957 | a->alloc_made = 1; |
938 | alloc_unlock(__a); | 958 | alloc_unlock(__a); |
939 | 959 | ||
@@ -1034,7 +1054,7 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a, | |||
1034 | } | 1054 | } |
1035 | 1055 | ||
1036 | /* Should not be possible to fail... */ | 1056 | /* Should not be possible to fail... */ |
1037 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); | 1057 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0); |
1038 | if (!addr) { | 1058 | if (!addr) { |
1039 | err = -ENOMEM; | 1059 | err = -ENOMEM; |
1040 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); | 1060 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); |
@@ -1310,6 +1330,10 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
1310 | alloc_dbg(__a, " base 0x%llx\n", a->base); | 1330 | alloc_dbg(__a, " base 0x%llx\n", a->base); |
1311 | alloc_dbg(__a, " size 0x%llx\n", a->length); | 1331 | alloc_dbg(__a, " size 0x%llx\n", a->length); |
1312 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | 1332 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); |
1333 | if (flags & GPU_ALLOC_GVA_SPACE) | ||
1334 | alloc_dbg(balloc_owner(a), | ||
1335 | " pde_size 0x%llx\n", | ||
1336 | balloc_order_to_len(a, a->pte_blk_order)); | ||
1313 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | 1337 | alloc_dbg(__a, " max_order %llu\n", a->max_order); |
1314 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | 1338 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); |
1315 | 1339 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c index ebd779c0..cf8c4569 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | |||
@@ -77,10 +77,11 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr) | |||
77 | a->ops->free(a, addr); | 77 | a->ops->free(a, addr); |
78 | } | 78 | } |
79 | 79 | ||
80 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) | 80 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len, |
81 | u32 page_size) | ||
81 | { | 82 | { |
82 | if (a->ops->alloc_fixed) | 83 | if (a->ops->alloc_fixed) |
83 | return a->ops->alloc_fixed(a, base, len); | 84 | return a->ops->alloc_fixed(a, base, len, page_size); |
84 | 85 | ||
85 | return 0; | 86 | return 0; |
86 | } | 87 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index c61b2238..96f8f242 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -648,7 +648,7 @@ done: | |||
648 | } | 648 | } |
649 | 649 | ||
650 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | 650 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( |
651 | struct nvgpu_page_allocator *a, u64 base, u64 length) | 651 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) |
652 | { | 652 | { |
653 | struct nvgpu_page_alloc *alloc; | 653 | struct nvgpu_page_alloc *alloc; |
654 | struct page_alloc_chunk *c; | 654 | struct page_alloc_chunk *c; |
@@ -658,7 +658,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
658 | if (!alloc || !c) | 658 | if (!alloc || !c) |
659 | goto fail; | 659 | goto fail; |
660 | 660 | ||
661 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); | 661 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
662 | if (!alloc->base) { | 662 | if (!alloc->base) { |
663 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); | 663 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); |
664 | goto fail; | 664 | goto fail; |
@@ -680,8 +680,11 @@ fail: | |||
680 | return ERR_PTR(-ENOMEM); | 680 | return ERR_PTR(-ENOMEM); |
681 | } | 681 | } |
682 | 682 | ||
683 | /* | ||
684 | * @page_size is ignored. | ||
685 | */ | ||
683 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | 686 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, |
684 | u64 base, u64 len) | 687 | u64 base, u64 len, u32 page_size) |
685 | { | 688 | { |
686 | struct nvgpu_page_allocator *a = page_allocator(__a); | 689 | struct nvgpu_page_allocator *a = page_allocator(__a); |
687 | struct nvgpu_page_alloc *alloc = NULL; | 690 | struct nvgpu_page_alloc *alloc = NULL; |
@@ -694,7 +697,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
694 | 697 | ||
695 | alloc_lock(__a); | 698 | alloc_lock(__a); |
696 | 699 | ||
697 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); | 700 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0); |
698 | if (IS_ERR(alloc)) { | 701 | if (IS_ERR(alloc)) { |
699 | alloc_unlock(__a); | 702 | alloc_unlock(__a); |
700 | return 0; | 703 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 07601d42..adf0297b 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -281,7 +281,7 @@ static int gk20a_as_ioctl_get_va_regions( | |||
281 | struct nvgpu_as_va_region region; | 281 | struct nvgpu_as_va_region region; |
282 | struct nvgpu_allocator *vma = | 282 | struct nvgpu_allocator *vma = |
283 | nvgpu_alloc_initialized(&vm->fixed) ? | 283 | nvgpu_alloc_initialized(&vm->fixed) ? |
284 | &vm->fixed : &vm->vma[i]; | 284 | &vm->fixed : vm->vma[i]; |
285 | 285 | ||
286 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | 286 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); |
287 | 287 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index cdbaef79..83bbcb54 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
1001 | mutex_init(&mm->l2_op_lock); | 1001 | mutex_init(&mm->l2_op_lock); |
1002 | 1002 | ||
1003 | /*TBD: make channel vm size configurable */ | 1003 | /*TBD: make channel vm size configurable */ |
1004 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; | 1004 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - |
1005 | NV_MM_DEFAULT_KERNEL_SIZE; | ||
1005 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | 1006 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; |
1006 | 1007 | ||
1007 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", | 1008 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", |
@@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
1626 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 1627 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
1627 | 1628 | ||
1628 | { | 1629 | { |
1629 | struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 1630 | struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx]; |
1630 | u64 offset; | 1631 | u64 offset; |
1631 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 1632 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
1632 | 1633 | ||
@@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
1663 | u64 offset, u64 size, | 1664 | u64 offset, u64 size, |
1664 | enum gmmu_pgsz_gk20a pgsz_idx) | 1665 | enum gmmu_pgsz_gk20a pgsz_idx) |
1665 | { | 1666 | { |
1666 | struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; | 1667 | struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; |
1667 | 1668 | ||
1668 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 1669 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
1669 | vma->name, offset, size); | 1670 | vma->name, offset, size); |
@@ -1790,13 +1791,7 @@ struct buffer_attrs { | |||
1790 | static void gmmu_select_page_size(struct vm_gk20a *vm, | 1791 | static void gmmu_select_page_size(struct vm_gk20a *vm, |
1791 | struct buffer_attrs *bfr) | 1792 | struct buffer_attrs *bfr) |
1792 | { | 1793 | { |
1793 | int i; | 1794 | bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size); |
1794 | /* choose the biggest first (top->bottom) */ | ||
1795 | for (i = gmmu_page_size_kernel - 1; i >= 0; i--) | ||
1796 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { | ||
1797 | bfr->pgsz_idx = i; | ||
1798 | break; | ||
1799 | } | ||
1800 | } | 1795 | } |
1801 | 1796 | ||
1802 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | 1797 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, |
@@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
2497 | * the alignment determined by gmmu_select_page_size(). | 2492 | * the alignment determined by gmmu_select_page_size(). |
2498 | */ | 2493 | */ |
2499 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | 2494 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { |
2500 | int pgsz_idx = | 2495 | int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size); |
2501 | __nv_gmmu_va_is_big_page_region(vm, offset_align) ? | ||
2502 | gmmu_page_size_big : gmmu_page_size_small; | ||
2503 | if (pgsz_idx > bfr.pgsz_idx) { | 2496 | if (pgsz_idx > bfr.pgsz_idx) { |
2504 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", | 2497 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", |
2505 | offset_align, bfr.pgsz_idx, pgsz_idx); | 2498 | offset_align, bfr.pgsz_idx, pgsz_idx); |
@@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, | |||
3149 | u64 addr = 0; | 3142 | u64 addr = 0; |
3150 | 3143 | ||
3151 | if (at) | 3144 | if (at) |
3152 | addr = nvgpu_alloc_fixed(allocator, at, size); | 3145 | addr = nvgpu_alloc_fixed(allocator, at, size, 0); |
3153 | else | 3146 | else |
3154 | addr = nvgpu_alloc(allocator, size); | 3147 | addr = nvgpu_alloc(allocator, size); |
3155 | 3148 | ||
@@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4260 | * | 4253 | * |
4261 | * !!! TODO: cleanup. | 4254 | * !!! TODO: cleanup. |
4262 | */ | 4255 | */ |
4263 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], | 4256 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, |
4264 | vm->va_limit - | 4257 | vm->va_limit - |
4265 | mm->channel.kernel_size, | 4258 | mm->channel.kernel_size, |
4266 | 512 * PAGE_SIZE); | 4259 | 512 * PAGE_SIZE, |
4260 | SZ_4K); | ||
4267 | if (!sema_sea->gpu_va) { | 4261 | if (!sema_sea->gpu_va) { |
4268 | nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); | 4262 | nvgpu_free(&vm->kernel, sema_sea->gpu_va); |
4269 | gk20a_vm_put(vm); | 4263 | gk20a_vm_put(vm); |
4270 | return -ENOMEM; | 4264 | return -ENOMEM; |
4271 | } | 4265 | } |
@@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4273 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); | 4267 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); |
4274 | if (err) { | 4268 | if (err) { |
4275 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); | 4269 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); |
4276 | nvgpu_free(&vm->vma[gmmu_page_size_small], | 4270 | nvgpu_free(vm->vma[gmmu_page_size_small], |
4277 | vm->sema_pool->gpu_va); | 4271 | vm->sema_pool->gpu_va); |
4278 | gk20a_vm_put(vm); | 4272 | gk20a_vm_put(vm); |
4279 | } | 4273 | } |
4280 | 4274 | ||
4281 | return 0; | 4275 | return 0; |
4282 | } | 4276 | } |
4283 | 4277 | ||
4278 | /* | ||
4279 | * Determine if the passed address space can support big pages or not. | ||
4280 | */ | ||
4281 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | ||
4282 | { | ||
4283 | u64 mask = ((u64)vm->big_page_size << 10) - 1; | ||
4284 | |||
4285 | if (base & mask || size & mask) | ||
4286 | return 0; | ||
4287 | return 1; | ||
4288 | } | ||
4289 | |||
4290 | /* | ||
4291 | * Attempt to find a reserved memory area to determine PTE size for the passed | ||
4292 | * mapping. If no reserved area can be found use small pages but drop a warning. | ||
4293 | */ | ||
4294 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | ||
4295 | u64 base, u64 size) | ||
4296 | { | ||
4297 | struct vm_reserved_va_node *node; | ||
4298 | |||
4299 | node = addr_to_reservation(vm, base); | ||
4300 | if (!node) | ||
4301 | return gmmu_page_size_small; | ||
4302 | |||
4303 | return node->pgsz_idx; | ||
4304 | } | ||
4305 | |||
4306 | /** | ||
4307 | * gk20a_init_vm() - Initialize an address space. | ||
4308 | * | ||
4309 | * @mm - Parent MM. | ||
4310 | * @vm - The VM to init. | ||
4311 | * @big_page_size - Size of big pages associated with this VM. | ||
4312 | * @low_hole - The size of the low hole (unaddressable memory at the bottom of | ||
4313 | * the address space. | ||
4314 | * @kernel_reserved - Space reserved for kernel only allocations. | ||
4315 | * @aperture_size - Total size of the aperture. | ||
4316 | * @big_pages - Ignored. Will be set based on other passed params. | ||
4317 | * @name - Name of the address space. | ||
4318 | * | ||
4319 | * This function initializes an address space according to the following map: | ||
4320 | * | ||
4321 | * +--+ 0x0 | ||
4322 | * | | | ||
4323 | * +--+ @low_hole | ||
4324 | * | | | ||
4325 | * ~ ~ This is the "user" section. | ||
4326 | * | | | ||
4327 | * +--+ @aperture_size - @kernel_reserved | ||
4328 | * | | | ||
4329 | * ~ ~ This is the "kernel" section. | ||
4330 | * | | | ||
4331 | * +--+ @aperture_size | ||
4332 | * | ||
4333 | * The user section is therefor what ever is left over after the @low_hole and | ||
4334 | * @kernel_reserved memory have been portioned out. The @kernel_reserved is | ||
4335 | * always persent at the top of the memory space and the @low_hole is always at | ||
4336 | * the bottom. | ||
4337 | * | ||
4338 | * For certain address spaces a "user" section makes no sense (bar1, etc) so in | ||
4339 | * such cases the @kernel_reserved and @low_hole should sum to exactly | ||
4340 | * @aperture_size. | ||
4341 | */ | ||
4284 | int gk20a_init_vm(struct mm_gk20a *mm, | 4342 | int gk20a_init_vm(struct mm_gk20a *mm, |
4285 | struct vm_gk20a *vm, | 4343 | struct vm_gk20a *vm, |
4286 | u32 big_page_size, | 4344 | u32 big_page_size, |
@@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4293 | { | 4351 | { |
4294 | int err, i; | 4352 | int err, i; |
4295 | char alloc_name[32]; | 4353 | char alloc_name[32]; |
4296 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 4354 | u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; |
4297 | kernel_vma_start, kernel_vma_limit; | ||
4298 | u32 pde_lo, pde_hi; | 4355 | u32 pde_lo, pde_hi; |
4299 | struct gk20a *g = mm->g; | 4356 | struct gk20a *g = mm->g; |
4300 | 4357 | ||
4301 | /* note: this must match gmmu_pgsz_gk20a enum */ | 4358 | /* note: this must match gmmu_pgsz_gk20a enum */ |
4302 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; | 4359 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
4303 | 4360 | ||
4304 | WARN_ON(kernel_reserved + low_hole > aperture_size); | 4361 | if (WARN_ON(kernel_reserved + low_hole > aperture_size)) |
4305 | if (kernel_reserved > aperture_size) | ||
4306 | return -ENOMEM; | 4362 | return -ENOMEM; |
4307 | 4363 | ||
4308 | vm->mm = mm; | 4364 | vm->mm = mm; |
4309 | 4365 | ||
4366 | /* Set up vma pointers. */ | ||
4367 | vm->vma[0] = &vm->user; | ||
4368 | vm->vma[1] = &vm->user; | ||
4369 | vm->vma[2] = &vm->kernel; | ||
4370 | |||
4310 | vm->va_start = low_hole; | 4371 | vm->va_start = low_hole; |
4311 | vm->va_limit = aperture_size; | 4372 | vm->va_limit = aperture_size; |
4312 | vm->big_pages = big_pages; | 4373 | vm->big_pages = big_pages; |
@@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4321 | 4382 | ||
4322 | gk20a_dbg_info("small page-size (%dKB)", | 4383 | gk20a_dbg_info("small page-size (%dKB)", |
4323 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | 4384 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
4324 | 4385 | gk20a_dbg_info("big page-size (%dKB) (%s)\n", | |
4325 | gk20a_dbg_info("big page-size (%dKB)", | 4386 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name); |
4326 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | ||
4327 | |||
4328 | gk20a_dbg_info("kernel page-size (%dKB)", | 4387 | gk20a_dbg_info("kernel page-size (%dKB)", |
4329 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 4388 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
4330 | 4389 | ||
@@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4348 | goto clean_up_pdes; | 4407 | goto clean_up_pdes; |
4349 | 4408 | ||
4350 | /* setup vma limits */ | 4409 | /* setup vma limits */ |
4351 | small_vma_start = low_hole; | 4410 | user_vma_start = low_hole; |
4352 | 4411 | user_vma_limit = vm->va_limit - kernel_reserved; | |
4353 | if (big_pages) { | ||
4354 | /* First 16GB of the address space goes towards small | ||
4355 | * pages. What ever remains is allocated to large | ||
4356 | * pages. */ | ||
4357 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
4358 | large_vma_start = small_vma_limit; | ||
4359 | large_vma_limit = vm->va_limit - kernel_reserved; | ||
4360 | } else { | ||
4361 | small_vma_limit = vm->va_limit - kernel_reserved; | ||
4362 | large_vma_start = 0; | ||
4363 | large_vma_limit = 0; | ||
4364 | } | ||
4365 | 4412 | ||
4366 | kernel_vma_start = vm->va_limit - kernel_reserved; | 4413 | kernel_vma_start = vm->va_limit - kernel_reserved; |
4367 | kernel_vma_limit = vm->va_limit; | 4414 | kernel_vma_limit = vm->va_limit; |
4368 | 4415 | ||
4369 | gk20a_dbg_info( | 4416 | gk20a_dbg_info( |
4370 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | 4417 | "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", |
4371 | small_vma_start, small_vma_limit, | 4418 | user_vma_start, user_vma_limit, |
4372 | large_vma_start, large_vma_limit, | ||
4373 | kernel_vma_start, kernel_vma_limit); | 4419 | kernel_vma_start, kernel_vma_limit); |
4374 | 4420 | ||
4375 | /* check that starts do not exceed limits */ | 4421 | WARN_ON(user_vma_start > user_vma_limit); |
4376 | WARN_ON(small_vma_start > small_vma_limit); | ||
4377 | WARN_ON(large_vma_start > large_vma_limit); | ||
4378 | /* kernel_vma must also be non-zero */ | ||
4379 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | 4422 | WARN_ON(kernel_vma_start >= kernel_vma_limit); |
4380 | 4423 | ||
4381 | if (small_vma_start > small_vma_limit || | 4424 | /* |
4382 | large_vma_start > large_vma_limit || | 4425 | * A "user" area only makes sense for the GVA spaces. For VMs where |
4426 | * there is no "user" area user_vma_start will be equal to | ||
4427 | * user_vma_limit (i.e a 0 sized space). In such a situation the kernel | ||
4428 | * area must be non-zero in length. | ||
4429 | */ | ||
4430 | if (user_vma_start > user_vma_limit || | ||
4383 | kernel_vma_start >= kernel_vma_limit) { | 4431 | kernel_vma_start >= kernel_vma_limit) { |
4384 | err = -EINVAL; | 4432 | err = -EINVAL; |
4385 | goto clean_up_pdes; | 4433 | goto clean_up_pdes; |
@@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4389 | * Attempt to make a separate VM for fixed allocations. | 4437 | * Attempt to make a separate VM for fixed allocations. |
4390 | */ | 4438 | */ |
4391 | if (g->separate_fixed_allocs && | 4439 | if (g->separate_fixed_allocs && |
4392 | small_vma_start < small_vma_limit) { | 4440 | user_vma_start < user_vma_limit) { |
4393 | if (g->separate_fixed_allocs >= small_vma_limit) | 4441 | if (g->separate_fixed_allocs >= user_vma_limit) |
4394 | goto clean_up_pdes; | 4442 | goto clean_up_pdes; |
4395 | 4443 | ||
4396 | snprintf(alloc_name, sizeof(alloc_name), | 4444 | snprintf(alloc_name, sizeof(alloc_name), |
@@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4398 | 4446 | ||
4399 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, | 4447 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, |
4400 | vm, alloc_name, | 4448 | vm, alloc_name, |
4401 | small_vma_start, | 4449 | user_vma_start, |
4402 | g->separate_fixed_allocs, | 4450 | g->separate_fixed_allocs, |
4403 | SZ_4K, | 4451 | SZ_4K, |
4404 | GPU_BALLOC_MAX_ORDER, | 4452 | GPU_BALLOC_MAX_ORDER, |
@@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4407 | goto clean_up_ptes; | 4455 | goto clean_up_ptes; |
4408 | 4456 | ||
4409 | /* Make sure to update the user vma size. */ | 4457 | /* Make sure to update the user vma size. */ |
4410 | small_vma_start = g->separate_fixed_allocs; | 4458 | user_vma_start = g->separate_fixed_allocs; |
4411 | } | ||
4412 | |||
4413 | if (small_vma_start < small_vma_limit) { | ||
4414 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | ||
4415 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | ||
4416 | err = __nvgpu_buddy_allocator_init( | ||
4417 | g, | ||
4418 | &vm->vma[gmmu_page_size_small], | ||
4419 | vm, alloc_name, | ||
4420 | small_vma_start, | ||
4421 | small_vma_limit - small_vma_start, | ||
4422 | SZ_4K, | ||
4423 | GPU_BALLOC_MAX_ORDER, | ||
4424 | GPU_ALLOC_GVA_SPACE); | ||
4425 | if (err) | ||
4426 | goto clean_up_ptes; | ||
4427 | } | 4459 | } |
4428 | 4460 | ||
4429 | if (large_vma_start < large_vma_limit) { | 4461 | if (user_vma_start < user_vma_limit) { |
4430 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 4462 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); |
4431 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | 4463 | if (!gk20a_big_pages_possible(vm, user_vma_start, |
4432 | err = __nvgpu_buddy_allocator_init( | 4464 | user_vma_limit - user_vma_start)) |
4433 | g, | 4465 | vm->big_pages = false; |
4434 | &vm->vma[gmmu_page_size_big], | 4466 | |
4435 | vm, alloc_name, | 4467 | err = __nvgpu_buddy_allocator_init(g, &vm->user, |
4436 | large_vma_start, | 4468 | vm, alloc_name, |
4437 | large_vma_limit - large_vma_start, | 4469 | user_vma_start, |
4438 | big_page_size, | 4470 | user_vma_limit - |
4439 | GPU_BALLOC_MAX_ORDER, | 4471 | user_vma_start, |
4440 | GPU_ALLOC_GVA_SPACE); | 4472 | SZ_4K, |
4473 | GPU_BALLOC_MAX_ORDER, | ||
4474 | GPU_ALLOC_GVA_SPACE); | ||
4441 | if (err) | 4475 | if (err) |
4442 | goto clean_up_small_allocator; | 4476 | goto clean_up_ptes; |
4477 | } else { | ||
4478 | /* | ||
4479 | * Make these allocator pointers point to the kernel allocator | ||
4480 | * since we still use the legacy notion of page size to choose | ||
4481 | * the allocator. | ||
4482 | */ | ||
4483 | vm->vma[0] = &vm->kernel; | ||
4484 | vm->vma[1] = &vm->kernel; | ||
4443 | } | 4485 | } |
4444 | 4486 | ||
4445 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", | 4487 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); |
4446 | name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 4488 | if (!gk20a_big_pages_possible(vm, kernel_vma_start, |
4447 | /* | 4489 | kernel_vma_limit - kernel_vma_start)) |
4448 | * kernel reserved VMA is at the end of the aperture | 4490 | vm->big_pages = false; |
4449 | */ | 4491 | |
4450 | err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], | 4492 | err = __nvgpu_buddy_allocator_init(g, &vm->kernel, |
4451 | vm, alloc_name, | 4493 | vm, alloc_name, |
4452 | kernel_vma_start, | 4494 | kernel_vma_start, |
4453 | kernel_vma_limit - kernel_vma_start, | 4495 | kernel_vma_limit - kernel_vma_start, |
@@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4455 | GPU_BALLOC_MAX_ORDER, | 4497 | GPU_BALLOC_MAX_ORDER, |
4456 | GPU_ALLOC_GVA_SPACE); | 4498 | GPU_ALLOC_GVA_SPACE); |
4457 | if (err) | 4499 | if (err) |
4458 | goto clean_up_big_allocator; | 4500 | goto clean_up_user_allocator; |
4459 | 4501 | ||
4460 | vm->mapped_buffers = RB_ROOT; | 4502 | vm->mapped_buffers = RB_ROOT; |
4461 | 4503 | ||
@@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4471 | if (vm->va_limit > SZ_4G) { | 4513 | if (vm->va_limit > SZ_4G) { |
4472 | err = gk20a_init_sema_pool(vm); | 4514 | err = gk20a_init_sema_pool(vm); |
4473 | if (err) | 4515 | if (err) |
4474 | goto clean_up_big_allocator; | 4516 | goto clean_up_user_allocator; |
4475 | } | 4517 | } |
4476 | 4518 | ||
4477 | return 0; | 4519 | return 0; |
4478 | 4520 | ||
4479 | clean_up_big_allocator: | 4521 | clean_up_user_allocator: |
4480 | if (large_vma_start < large_vma_limit) | 4522 | if (user_vma_start < user_vma_limit) |
4481 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4523 | nvgpu_alloc_destroy(&vm->user); |
4482 | clean_up_small_allocator: | ||
4483 | if (small_vma_start < small_vma_limit) | ||
4484 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
4485 | clean_up_ptes: | 4524 | clean_up_ptes: |
4486 | free_gmmu_pages(vm, &vm->pdb); | 4525 | free_gmmu_pages(vm, &vm->pdb); |
4487 | clean_up_pdes: | 4526 | clean_up_pdes: |
@@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, | |||
4523 | vm->as_share = as_share; | 4562 | vm->as_share = as_share; |
4524 | vm->enable_ctag = true; | 4563 | vm->enable_ctag = true; |
4525 | 4564 | ||
4526 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 4565 | snprintf(name, sizeof(name), "as_%d", as_share->id); |
4527 | 4566 | ||
4528 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | 4567 | err = gk20a_init_vm(mm, vm, big_page_size, |
4568 | big_page_size << 10, | ||
4529 | mm->channel.kernel_size, | 4569 | mm->channel.kernel_size, |
4530 | mm->channel.user_size + mm->channel.kernel_size, | 4570 | mm->channel.user_size + mm->channel.kernel_size, |
4531 | !mm->disable_bigpage, userspace_managed, name); | 4571 | !mm->disable_bigpage, userspace_managed, name); |
@@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
4586 | goto clean_up; | 4626 | goto clean_up; |
4587 | } | 4627 | } |
4588 | 4628 | ||
4589 | vma = &vm->vma[pgsz_idx]; | 4629 | vma = vm->vma[pgsz_idx]; |
4590 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { | 4630 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { |
4591 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4631 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4592 | vma = &vm->fixed; | 4632 | vma = &vm->fixed; |
4593 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, | 4633 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, |
4594 | (u64)args->pages * | 4634 | (u64)args->pages * |
4595 | (u64)args->page_size); | 4635 | (u64)args->page_size, |
4636 | args->page_size); | ||
4596 | } else { | 4637 | } else { |
4597 | vaddr_start = nvgpu_alloc(vma, | 4638 | vaddr_start = nvgpu_alloc(vma, |
4598 | (u64)args->pages * | 4639 | (u64)args->pages * |
@@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
4662 | args->pages, args->offset); | 4703 | args->pages, args->offset); |
4663 | 4704 | ||
4664 | /* determine pagesz idx */ | 4705 | /* determine pagesz idx */ |
4665 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? | 4706 | pgsz_idx = __get_pte_size(vm, args->offset, |
4666 | gmmu_page_size_big : gmmu_page_size_small; | 4707 | args->page_size * args->pages); |
4667 | 4708 | ||
4668 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4709 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4669 | vma = &vm->fixed; | 4710 | vma = &vm->fixed; |
4670 | else | 4711 | else |
4671 | vma = &vm->vma[pgsz_idx]; | 4712 | vma = vm->vma[pgsz_idx]; |
4672 | nvgpu_free(vma, args->offset); | 4713 | nvgpu_free(vma, args->offset); |
4673 | 4714 | ||
4674 | mutex_lock(&vm->update_gmmu_lock); | 4715 | mutex_lock(&vm->update_gmmu_lock); |
@@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | |||
4853 | 4894 | ||
4854 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 4895 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
4855 | { | 4896 | { |
4856 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); | 4897 | if (nvgpu_alloc_initialized(&vm->kernel)) |
4857 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) | 4898 | nvgpu_alloc_destroy(&vm->kernel); |
4858 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4899 | if (nvgpu_alloc_initialized(&vm->user)) |
4859 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) | 4900 | nvgpu_alloc_destroy(&vm->user); |
4860 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
4861 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4901 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4862 | nvgpu_alloc_destroy(&vm->fixed); | 4902 | nvgpu_alloc_destroy(&vm->fixed); |
4863 | 4903 | ||
@@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
4908 | 4948 | ||
4909 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 4949 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
4910 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 4950 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
4911 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, | 4951 | gk20a_init_vm(mm, vm, |
4912 | mm->bar1.aperture_size - SZ_4K, | 4952 | big_page_size, |
4913 | mm->bar1.aperture_size, false, false, "bar1"); | 4953 | SZ_4K, /* Low hole */ |
4954 | mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */ | ||
4955 | mm->bar1.aperture_size, | ||
4956 | true, false, | ||
4957 | "bar1"); | ||
4914 | 4958 | ||
4915 | err = gk20a_alloc_inst_block(g, inst_block); | 4959 | err = gk20a_alloc_inst_block(g, inst_block); |
4916 | if (err) | 4960 | if (err) |
@@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
4932 | struct gk20a *g = gk20a_from_mm(mm); | 4976 | struct gk20a *g = gk20a_from_mm(mm); |
4933 | struct mem_desc *inst_block = &mm->pmu.inst_block; | 4977 | struct mem_desc *inst_block = &mm->pmu.inst_block; |
4934 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | 4978 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; |
4979 | u32 low_hole, aperture_size; | ||
4980 | |||
4981 | /* | ||
4982 | * No user region - so we will pass that as zero sized. | ||
4983 | */ | ||
4984 | low_hole = SZ_4K * 16; | ||
4985 | aperture_size = GK20A_PMU_VA_SIZE * 2; | ||
4935 | 4986 | ||
4936 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | 4987 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; |
4937 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 4988 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
4938 | 4989 | ||
4939 | gk20a_init_vm(mm, vm, big_page_size, | 4990 | gk20a_init_vm(mm, vm, big_page_size, |
4940 | SZ_4K * 16, GK20A_PMU_VA_SIZE, | 4991 | low_hole, |
4941 | GK20A_PMU_VA_SIZE * 2, false, false, | 4992 | aperture_size - low_hole, |
4993 | aperture_size, | ||
4994 | true, | ||
4995 | false, | ||
4942 | "system"); | 4996 | "system"); |
4943 | 4997 | ||
4944 | err = gk20a_alloc_inst_block(g, inst_block); | 4998 | err = gk20a_alloc_inst_block(g, inst_block); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 5ef8ae25..394d1d25 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -270,11 +270,13 @@ struct vm_gk20a { | |||
270 | 270 | ||
271 | struct gk20a_mm_entry pdb; | 271 | struct gk20a_mm_entry pdb; |
272 | 272 | ||
273 | struct nvgpu_allocator vma[gmmu_nr_page_sizes]; | ||
274 | |||
275 | /* If necessary, split fixed from non-fixed. */ | 273 | /* If necessary, split fixed from non-fixed. */ |
276 | struct nvgpu_allocator fixed; | 274 | struct nvgpu_allocator fixed; |
277 | 275 | ||
276 | struct nvgpu_allocator *vma[gmmu_nr_page_sizes]; | ||
277 | struct nvgpu_allocator kernel; | ||
278 | struct nvgpu_allocator user; | ||
279 | |||
278 | struct rb_root mapped_buffers; | 280 | struct rb_root mapped_buffers; |
279 | 281 | ||
280 | struct list_head reserved_va_list; | 282 | struct list_head reserved_va_list; |
@@ -425,7 +427,7 @@ static inline int bar1_aperture_size_mb_gk20a(void) | |||
425 | return 16; /* 16MB is more than enough atm. */ | 427 | return 16; /* 16MB is more than enough atm. */ |
426 | } | 428 | } |
427 | 429 | ||
428 | /*The maximum GPU VA range supported */ | 430 | /* The maximum GPU VA range supported */ |
429 | #define NV_GMMU_VA_RANGE 38 | 431 | #define NV_GMMU_VA_RANGE 38 |
430 | 432 | ||
431 | /* The default userspace-visible GPU VA size */ | 433 | /* The default userspace-visible GPU VA size */ |
@@ -434,43 +436,39 @@ static inline int bar1_aperture_size_mb_gk20a(void) | |||
434 | /* The default kernel-reserved GPU VA size */ | 436 | /* The default kernel-reserved GPU VA size */ |
435 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) | 437 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) |
436 | 438 | ||
437 | /* | 439 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, |
438 | * The bottom 16GB of the space are used for small pages, the remaining high | 440 | u64 base, u64 size); |
439 | * memory is for large pages. | ||
440 | */ | ||
441 | static inline u64 __nv_gmmu_va_small_page_limit(void) | ||
442 | { | ||
443 | return ((u64)SZ_1G * 16); | ||
444 | } | ||
445 | |||
446 | static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) | ||
447 | { | ||
448 | struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big]; | ||
449 | |||
450 | if (!vm->big_pages) | ||
451 | return 0; | ||
452 | |||
453 | return addr >= nvgpu_alloc_base(a) && | ||
454 | addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a); | ||
455 | } | ||
456 | 441 | ||
457 | /* | 442 | /* |
458 | * This determines the PTE size for a given alloc. Used by both the GVA space | 443 | * This determines the PTE size for a given alloc. Used by both the GVA space |
459 | * allocator and the mm core code so that agreement can be reached on how to | 444 | * allocator and the mm core code so that agreement can be reached on how to |
460 | * map allocations. | 445 | * map allocations. |
446 | * | ||
447 | * The page size of a buffer is this: | ||
448 | * | ||
449 | * o If the VM doesn't support large pages then obviously small pages | ||
450 | * must be used. | ||
451 | * o If the base address is non-zero (fixed address map): | ||
452 | * - Attempt to find a reserved memory area and use the page size | ||
453 | * based on that. | ||
454 | * - If no reserved page size is available, default to small pages. | ||
455 | * o If the base is zero: | ||
456 | * - If the size is greater than or equal to the big page size, use big | ||
457 | * pages. | ||
458 | * - Otherwise use small pages. | ||
461 | */ | 459 | */ |
462 | static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, | 460 | static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, |
463 | u64 base, u64 size) | 461 | u64 base, u64 size) |
464 | { | 462 | { |
465 | /* | 463 | if (!vm->big_pages) |
466 | * Currently userspace is not ready for a true unified address space. | ||
467 | * As a result, even though the allocator supports mixed address spaces | ||
468 | * the address spaces must be treated as separate for now. | ||
469 | */ | ||
470 | if (__nv_gmmu_va_is_big_page_region(vm, base)) | ||
471 | return gmmu_page_size_big; | ||
472 | else | ||
473 | return gmmu_page_size_small; | 464 | return gmmu_page_size_small; |
465 | |||
466 | if (base) | ||
467 | return __get_pte_size_fixed_map(vm, base, size); | ||
468 | |||
469 | if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) | ||
470 | return gmmu_page_size_big; | ||
471 | return gmmu_page_size_small; | ||
474 | } | 472 | } |
475 | 473 | ||
476 | /* | 474 | /* |
@@ -797,6 +795,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, | |||
797 | 795 | ||
798 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); | 796 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); |
799 | 797 | ||
798 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); | ||
799 | |||
800 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | 800 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; |
801 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | 801 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; |
802 | 802 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h index dee9b562..d5a90c87 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h | |||
@@ -41,11 +41,15 @@ struct nvgpu_allocator_ops { | |||
41 | * regular and fixed allocations then free_fixed() does not need to | 41 | * regular and fixed allocations then free_fixed() does not need to |
42 | * be implemented. This behavior exists for legacy reasons and should | 42 | * be implemented. This behavior exists for legacy reasons and should |
43 | * not be propagated to new allocators. | 43 | * not be propagated to new allocators. |
44 | * | ||
45 | * For allocators where the @page_size field is not applicable it can | ||
46 | * be left as 0. Otherwise a valid page size should be passed (4k or | ||
47 | * what the large page size is). | ||
44 | */ | 48 | */ |
45 | u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, | 49 | u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, |
46 | u64 base, u64 len); | 50 | u64 base, u64 len, u32 page_size); |
47 | void (*free_fixed)(struct nvgpu_allocator *allocator, | 51 | void (*free_fixed)(struct nvgpu_allocator *allocator, |
48 | u64 base, u64 len); | 52 | u64 base, u64 len); |
49 | 53 | ||
50 | /* | 54 | /* |
51 | * Allow allocators to reserve space for carveouts. | 55 | * Allow allocators to reserve space for carveouts. |
@@ -213,7 +217,8 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, | |||
213 | u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); | 217 | u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); |
214 | void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); | 218 | void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); |
215 | 219 | ||
216 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); | 220 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len, |
221 | u32 page_size); | ||
217 | void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); | 222 | void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); |
218 | 223 | ||
219 | int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, | 224 | int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, |
@@ -298,5 +303,8 @@ void nvgpu_alloc_debugfs_init(struct device *dev); | |||
298 | } while (0) | 303 | } while (0) |
299 | 304 | ||
300 | #endif | 305 | #endif |
306 | #define balloc_pr(alloctor, format, arg...) \ | ||
307 | pr_info("%-25s %25s() " format, \ | ||
308 | alloctor->name, __func__, ##arg) | ||
301 | 309 | ||
302 | #endif /* NVGPU_ALLOCATOR_H */ | 310 | #endif /* NVGPU_ALLOCATOR_H */ |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 66c9344b..a21a020d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) | |||
227 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 227 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
228 | WARN_ON(err || msg.ret); | 228 | WARN_ON(err || msg.ret); |
229 | 229 | ||
230 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); | 230 | if (nvgpu_alloc_initialized(&vm->kernel)) |
231 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) | 231 | nvgpu_alloc_destroy(&vm->kernel); |
232 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | 232 | if (nvgpu_alloc_initialized(&vm->user)) |
233 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) | 233 | nvgpu_alloc_destroy(&vm->user); |
234 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 234 | if (nvgpu_alloc_initialized(&vm->fixed)) |
235 | nvgpu_alloc_destroy(&vm->fixed); | ||
235 | 236 | ||
236 | mutex_unlock(&vm->update_gmmu_lock); | 237 | mutex_unlock(&vm->update_gmmu_lock); |
237 | 238 | ||
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
273 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | 274 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; |
274 | struct mm_gk20a *mm = &g->mm; | 275 | struct mm_gk20a *mm = &g->mm; |
275 | struct vm_gk20a *vm; | 276 | struct vm_gk20a *vm; |
276 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 277 | u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; |
277 | kernel_vma_start, kernel_vma_limit; | ||
278 | char name[32]; | 278 | char name[32]; |
279 | int err, i; | 279 | int err, i; |
280 | const bool userspace_managed = | 280 | const bool userspace_managed = |
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
306 | vm->mm = mm; | 306 | vm->mm = mm; |
307 | vm->as_share = as_share; | 307 | vm->as_share = as_share; |
308 | 308 | ||
309 | /* Set up vma pointers. */ | ||
310 | vm->vma[0] = &vm->user; | ||
311 | vm->vma[1] = &vm->user; | ||
312 | vm->vma[2] = &vm->kernel; | ||
313 | |||
309 | for (i = 0; i < gmmu_nr_page_sizes; i++) | 314 | for (i = 0; i < gmmu_nr_page_sizes; i++) |
310 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; | 315 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; |
311 | 316 | ||
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
328 | vm->handle = p->handle; | 333 | vm->handle = p->handle; |
329 | 334 | ||
330 | /* setup vma limits */ | 335 | /* setup vma limits */ |
331 | small_vma_start = vm->va_start; | 336 | user_vma_start = vm->va_start; |
332 | 337 | user_vma_limit = vm->va_limit - mm->channel.kernel_size; | |
333 | if (vm->big_pages) { | ||
334 | /* First 16GB of the address space goes towards small | ||
335 | * pages. The kernel reserved pages are at the end. | ||
336 | * What ever remains is allocated to large pages. | ||
337 | */ | ||
338 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
339 | large_vma_start = small_vma_limit; | ||
340 | large_vma_limit = vm->va_limit - mm->channel.kernel_size; | ||
341 | } else { | ||
342 | small_vma_limit = vm->va_limit - mm->channel.kernel_size; | ||
343 | large_vma_start = 0; | ||
344 | large_vma_limit = 0; | ||
345 | } | ||
346 | 338 | ||
347 | kernel_vma_start = vm->va_limit - mm->channel.kernel_size; | 339 | kernel_vma_start = vm->va_limit - mm->channel.kernel_size; |
348 | kernel_vma_limit = vm->va_limit; | 340 | kernel_vma_limit = vm->va_limit; |
349 | 341 | ||
350 | gk20a_dbg_info( | 342 | gk20a_dbg_info( |
351 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | 343 | "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", |
352 | small_vma_start, small_vma_limit, | 344 | user_vma_start, user_vma_limit, |
353 | large_vma_start, large_vma_limit, | ||
354 | kernel_vma_start, kernel_vma_limit); | 345 | kernel_vma_start, kernel_vma_limit); |
355 | 346 | ||
356 | /* check that starts do not exceed limits */ | 347 | WARN_ON(user_vma_start > user_vma_limit); |
357 | WARN_ON(small_vma_start > small_vma_limit); | ||
358 | WARN_ON(large_vma_start > large_vma_limit); | ||
359 | /* kernel_vma must also be non-zero */ | ||
360 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | 348 | WARN_ON(kernel_vma_start >= kernel_vma_limit); |
361 | 349 | ||
362 | if (small_vma_start > small_vma_limit || | 350 | if (user_vma_start > user_vma_limit || |
363 | large_vma_start > large_vma_limit || | ||
364 | kernel_vma_start >= kernel_vma_limit) { | 351 | kernel_vma_start >= kernel_vma_limit) { |
365 | err = -EINVAL; | 352 | err = -EINVAL; |
366 | goto clean_up_share; | 353 | goto clean_up_share; |
367 | } | 354 | } |
368 | 355 | ||
369 | if (small_vma_start < small_vma_limit) { | 356 | if (user_vma_start < user_vma_limit) { |
370 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 357 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, |
371 | gmmu_page_sizes[gmmu_page_size_small] >> 10); | 358 | gmmu_page_sizes[gmmu_page_size_small] >> 10); |
359 | if (!gk20a_big_pages_possible(vm, user_vma_start, | ||
360 | user_vma_limit - user_vma_start)) | ||
361 | vm->big_pages = false; | ||
372 | 362 | ||
373 | err = __nvgpu_buddy_allocator_init( | 363 | err = __nvgpu_buddy_allocator_init( |
374 | g, | 364 | g, |
375 | &vm->vma[gmmu_page_size_small], | 365 | vm->vma[gmmu_page_size_small], |
376 | vm, name, | 366 | vm, name, |
377 | small_vma_start, | 367 | user_vma_start, |
378 | small_vma_limit - small_vma_start, | 368 | user_vma_limit - user_vma_start, |
379 | SZ_4K, | 369 | SZ_4K, |
380 | GPU_BALLOC_MAX_ORDER, | 370 | GPU_BALLOC_MAX_ORDER, |
381 | GPU_ALLOC_GVA_SPACE); | 371 | GPU_ALLOC_GVA_SPACE); |
382 | if (err) | 372 | if (err) |
383 | goto clean_up_share; | 373 | goto clean_up_share; |
384 | } | 374 | } else { |
385 | 375 | /* | |
386 | if (large_vma_start < large_vma_limit) { | 376 | * Make these allocator pointers point to the kernel allocator |
387 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 377 | * since we still use the legacy notion of page size to choose |
388 | gmmu_page_sizes[gmmu_page_size_big] >> 10); | 378 | * the allocator. |
389 | err = __nvgpu_buddy_allocator_init( | 379 | */ |
390 | g, | 380 | vm->vma[0] = &vm->kernel; |
391 | &vm->vma[gmmu_page_size_big], | 381 | vm->vma[1] = &vm->kernel; |
392 | vm, name, | ||
393 | large_vma_start, | ||
394 | large_vma_limit - large_vma_start, | ||
395 | big_page_size, | ||
396 | GPU_BALLOC_MAX_ORDER, | ||
397 | GPU_ALLOC_GVA_SPACE); | ||
398 | if (err) | ||
399 | goto clean_up_small_allocator; | ||
400 | } | 382 | } |
401 | 383 | ||
402 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", | 384 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", |
403 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 385 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
386 | if (!gk20a_big_pages_possible(vm, kernel_vma_start, | ||
387 | kernel_vma_limit - kernel_vma_start)) | ||
388 | vm->big_pages = false; | ||
389 | |||
404 | /* | 390 | /* |
405 | * kernel reserved VMA is at the end of the aperture | 391 | * kernel reserved VMA is at the end of the aperture |
406 | */ | 392 | */ |
407 | err = __nvgpu_buddy_allocator_init( | 393 | err = __nvgpu_buddy_allocator_init( |
408 | g, | 394 | g, |
409 | &vm->vma[gmmu_page_size_kernel], | 395 | vm->vma[gmmu_page_size_kernel], |
410 | vm, name, | 396 | vm, name, |
411 | kernel_vma_start, | 397 | kernel_vma_start, |
412 | kernel_vma_limit - kernel_vma_start, | 398 | kernel_vma_limit - kernel_vma_start, |
413 | SZ_4K, | 399 | SZ_4K, |
414 | GPU_BALLOC_MAX_ORDER, | 400 | GPU_BALLOC_MAX_ORDER, |
415 | GPU_ALLOC_GVA_SPACE); | 401 | GPU_ALLOC_GVA_SPACE); |
416 | if (err) | 402 | if (err) |
417 | goto clean_up_big_allocator; | 403 | goto clean_up_user_allocator; |
418 | 404 | ||
419 | vm->mapped_buffers = RB_ROOT; | 405 | vm->mapped_buffers = RB_ROOT; |
420 | 406 | ||
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
426 | 412 | ||
427 | return 0; | 413 | return 0; |
428 | 414 | ||
429 | clean_up_big_allocator: | 415 | clean_up_user_allocator: |
430 | if (large_vma_start < large_vma_limit) | 416 | if (user_vma_start < user_vma_limit) |
431 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 417 | nvgpu_alloc_destroy(&vm->user); |
432 | clean_up_small_allocator: | ||
433 | if (small_vma_start < small_vma_limit) | ||
434 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
435 | clean_up_share: | 418 | clean_up_share: |
436 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; | 419 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; |
437 | msg.handle = vgpu_get_handle(g); | 420 | msg.handle = vgpu_get_handle(g); |