From 3cf92ec89ba8deac77d726f02d79cba7c0e73e4d Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 2 Jul 2018 17:14:27 -0700
Subject: gpu: nvgpu: Fix several issues with the buddy allocator

The issues are:

1. Non-fixed allocs must take into account explicit PTE size
   requests. Previously the PTE size was determines from the
   allocation size which was incorect. To do this, the PTE size
   is now plumbed through all GPU VA allocations. This is what
   the new alloc_pte() op does.

2. Fix buddy PTE size assignment. This changes a '<=' into a
   '<' in the buddy allocation logic. Effectively this is now
   leaving the PTE size for buddy blocks equal to the PDE block
   size as 'ANY'.

   This prevents a buddy block of PDE size which has yet to be
   allocated from having a specific PDE size. Without this its
   possible to do a fixed alloc that fails unexpectedly due to
   mismatching PDE sizes.

   Consider two PDE block sized fixed allocs that are contained
   in one buddy twice the size of a PDE block. Let's call these
   fixed allocs S and B (small and big). Let's assume that two
   fixed allocs are done, each targeting S and B, in that order.
   With the current logic the first alloc, when we create the
   two buddies S and B, causes both S and B to have a PTE size of
   SMALL. Now when the second alloc happens we attempt to find
   a buddy B with a PTE size of either BIG or ANY. But we cannot
   becasue B already has size SMALL. This casues us to appear
   like we have a conflicting fixed alloc despite this not being
   the case.

3. Misc cleanups & bug fixes:
   - Clean up some MISRA issues
   - Delete an extraneous unlock that could have caused a
     deadlock.

Bug 200105199

Change-Id: Ib5447ec6705a5a289ac0cf3d5e90c79b5d67582d
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1768582
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/buddy_allocator.c      | 108 ++++++++++++++-------
 drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h |  18 ++--
 drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c      |   5 +
 drivers/gpu/nvgpu/common/mm/vm.c                   |   2 +-
 drivers/gpu/nvgpu/common/mm/vm_area.c              |  24 +++--
 5 files changed, 106 insertions(+), 51 deletions(-)

(limited to 'drivers/gpu/nvgpu/common/mm')

diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index a9f90069..b29045ba 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -64,6 +64,25 @@ static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
  *      easily PDE aligned so this hasn't been a problem.
  */
 
+static u32 nvgpu_balloc_page_size_to_pte_size(struct nvgpu_buddy_allocator *a,
+					      u32 page_size)
+{
+	if ((a->flags & GPU_ALLOC_GVA_SPACE) == 0ULL) {
+		return BALLOC_PTE_SIZE_ANY;
+	}
+
+	/*
+	 * Make sure the page size is actually valid!
+	 */
+	if (page_size == a->vm->big_page_size) {
+		return BALLOC_PTE_SIZE_BIG;
+	} else if (page_size == SZ_4K) {
+		return BALLOC_PTE_SIZE_SMALL;
+	} else {
+		return BALLOC_PTE_SIZE_INVALID;
+	}
+}
+
 /*
  * Pick a suitable maximum order for this allocator.
  *
@@ -142,7 +161,7 @@ static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a,
 	 * without cycling through the entire list.
 	 */
 	if (a->flags & GPU_ALLOC_GVA_SPACE &&
-	    b->pte_size == GMMU_PAGE_SIZE_BIG) {
+	    b->pte_size == BALLOC_PTE_SIZE_BIG) {
 		nvgpu_list_add_tail(&b->buddy_entry, list);
 	} else {
 		nvgpu_list_add(&b->buddy_entry, list);
@@ -387,7 +406,7 @@ static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
  * @a must be locked.
  */
 static int balloc_split_buddy(struct nvgpu_buddy_allocator *a,
-			      struct nvgpu_buddy *b, int pte_size)
+			      struct nvgpu_buddy *b, u32 pte_size)
 {
 	struct nvgpu_buddy *left, *right;
 	u64 half;
@@ -415,9 +434,22 @@ static int balloc_split_buddy(struct nvgpu_buddy_allocator *a,
 	left->parent = b;
 	right->parent = b;
 
-	/* PTE considerations. */
+	/*
+	 * Potentially assign a PTE size to the new buddies. The obvious case is
+	 * when we don't have a GPU VA space; just leave it alone. When we do
+	 * have a GVA space we need to assign the passed PTE size to the buddy
+	 * only if the buddy is less than the PDE block size. This is because if
+	 * the buddy is less than the PDE block size then the buddy's  parent
+	 * may already have a PTE size. Thus we can only allocate this buddy to
+	 * mappings with that PTE size (due to the large/small PTE separation
+	 * requirement).
+	 *
+	 * When the buddy size is greater than or equal to the block size then
+	 * we can leave the buddies PTE field alone since the PDE block has yet
+	 * to be assigned a PTE size.
+	 */
 	if (a->flags & GPU_ALLOC_GVA_SPACE &&
-	    left->order <= a->pte_blk_order) {
+	    left->order < a->pte_blk_order) {
 		left->pte_size = pte_size;
 		right->pte_size = pte_size;
 	}
@@ -477,7 +509,7 @@ static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
  * Find a suitable buddy for the given order and PTE type (big or little).
  */
 static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
-					       u64 order, int pte_size)
+					       u64 order, u32 pte_size)
 {
 	struct nvgpu_buddy *bud;
 
@@ -487,7 +519,7 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
 	}
 
 	if (a->flags & GPU_ALLOC_GVA_SPACE &&
-	    pte_size == GMMU_PAGE_SIZE_BIG) {
+	    pte_size == BALLOC_PTE_SIZE_BIG) {
 		bud = nvgpu_list_last_entry(balloc_get_order_list(a, order),
 				      nvgpu_buddy, buddy_entry);
 	} else {
@@ -514,7 +546,7 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
  * @a must be locked.
  */
 static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a,
-			     u64 order, int pte_size)
+			     u64 order, u32 pte_size)
 {
 	u64 split_order;
 	struct nvgpu_buddy *bud = NULL;
@@ -637,7 +669,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a,
  * necessary for this buddy to exist as well.
  */
 static struct nvgpu_buddy *__balloc_make_fixed_buddy(
-	struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size)
+	struct nvgpu_buddy_allocator *a, u64 base, u64 order, u32 pte_size)
 {
 	struct nvgpu_buddy *bud = NULL;
 	struct nvgpu_list_node *order_list;
@@ -714,11 +746,19 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy(
 
 static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
 				   struct nvgpu_fixed_alloc *falloc,
-				   u64 base, u64 len, int pte_size)
+				   u64 base, u64 len, u32 pte_size)
 {
 	u64 shifted_base, inc_base;
 	u64 align_order;
 
+	/*
+	 * Ensure that we have a valid PTE size here (ANY is a valid size). If
+	 * this is INVALID then we are going to experience imminent corruption
+	 * in the lists that hold buddies. This leads to some very strange
+	 * crashes.
+	 */
+	BUG_ON(pte_size == BALLOC_PTE_SIZE_INVALID);
+
 	shifted_base = balloc_base_shift(a, base);
 	if (shifted_base == 0U) {
 		align_order = __fls(len >> a->blk_shift);
@@ -814,10 +854,11 @@ static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
 /*
  * Allocate memory from the passed allocator.
  */
-static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len)
+static u64 nvgpu_buddy_balloc_pte(struct nvgpu_allocator *na, u64 len,
+				  u32 page_size)
 {
 	u64 order, addr;
-	int pte_size;
+	u32 pte_size;
 	struct nvgpu_buddy_allocator *a = na->priv;
 
 	alloc_lock(na);
@@ -830,22 +871,21 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len)
 		return 0;
 	}
 
-	if (a->flags & GPU_ALLOC_GVA_SPACE) {
-		pte_size = __get_pte_size(a->vm, 0, len);
-	} else {
-		pte_size = BALLOC_PTE_SIZE_ANY;
+	pte_size = nvgpu_balloc_page_size_to_pte_size(a, page_size);
+	if (pte_size == BALLOC_PTE_SIZE_INVALID) {
+		return 0ULL;
 	}
 
 	addr = __balloc_do_alloc(a, order, pte_size);
 
-	if (addr) {
+	if (addr != 0ULL) {
 		a->bytes_alloced += len;
 		a->bytes_alloced_real += balloc_order_to_len(a, order);
 		alloc_dbg(balloc_owner(a),
 			  "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s",
 			  addr, order, len,
-			  pte_size == GMMU_PAGE_SIZE_BIG   ? "big" :
-			  pte_size == GMMU_PAGE_SIZE_SMALL ? "small" :
+			  pte_size == BALLOC_PTE_SIZE_BIG   ? "big" :
+			  pte_size == BALLOC_PTE_SIZE_SMALL ? "small" :
 			  "NA/any");
 	} else {
 		alloc_dbg(balloc_owner(a), "Alloc failed: no mem!");
@@ -858,13 +898,15 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len)
 	return addr;
 }
 
-/*
- * Requires @na to be locked.
- */
+static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len)
+{
+	return nvgpu_buddy_balloc_pte(na, len, BALLOC_PTE_SIZE_ANY);
+}
+
 static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na,
 				      u64 base, u64 len, u32 page_size)
 {
-	int pte_size = BALLOC_PTE_SIZE_ANY;
+	u32 pte_size;
 	u64 ret, real_bytes = 0;
 	struct nvgpu_buddy *bud;
 	struct nvgpu_fixed_alloc *falloc = NULL;
@@ -879,15 +921,9 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na,
 		goto fail;
 	}
 
-	/* Check that the page size is valid. */
-	if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) {
-		if (page_size == a->vm->big_page_size) {
-			pte_size = GMMU_PAGE_SIZE_BIG;
-		} else if (page_size == SZ_4K) {
-			pte_size = GMMU_PAGE_SIZE_SMALL;
-		} else {
-			goto fail;
-		}
+	pte_size = nvgpu_balloc_page_size_to_pte_size(a, page_size);
+	if (pte_size == BALLOC_PTE_SIZE_INVALID) {
+		goto fail;
 	}
 
 	falloc = nvgpu_kmalloc(nvgpu_alloc_to_gpu(na), sizeof(*falloc));
@@ -903,7 +939,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na,
 		alloc_dbg(balloc_owner(a),
 			  "Range not free: 0x%llx -> 0x%llx",
 			  base, base + len);
-		goto fail_unlock;
+		goto fail;
 	}
 
 	ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size);
@@ -911,7 +947,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na,
 		alloc_dbg(balloc_owner(a),
 			  "Alloc-fixed failed ?? 0x%llx -> 0x%llx",
 			  base, base + len);
-		goto fail_unlock;
+		goto fail;
 	}
 
 	balloc_alloc_fixed(a, falloc);
@@ -928,8 +964,6 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na,
 
 	return base;
 
-fail_unlock:
-	alloc_unlock(na);
 fail:
 	nvgpu_kfree(nvgpu_alloc_to_gpu(na), falloc);
 	return 0;
@@ -1051,7 +1085,8 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *na,
 	}
 
 	/* Should not be possible to fail... */
-	addr = __nvgpu_balloc_fixed_buddy(na, co->base, co->length, 0);
+	addr = __nvgpu_balloc_fixed_buddy(na, co->base, co->length,
+					  BALLOC_PTE_SIZE_ANY);
 	if (!addr) {
 		err = -ENOMEM;
 		nvgpu_warn(na->g,
@@ -1206,6 +1241,7 @@ static void nvgpu_buddy_print_stats(struct nvgpu_allocator *na,
 
 static const struct nvgpu_allocator_ops buddy_ops = {
 	.alloc		= nvgpu_buddy_balloc,
+	.alloc_pte	= nvgpu_buddy_balloc_pte,
 	.free		= nvgpu_buddy_bfree,
 
 	.alloc_fixed	= nvgpu_balloc_fixed_buddy,
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
index fe3926b9..a90530b6 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
@@ -46,18 +46,20 @@ struct nvgpu_buddy {
 	u64 end;			/* End address of this buddy. */
 	u64 order;			/* Buddy order. */
 
-#define BALLOC_BUDDY_ALLOCED	0x1
-#define BALLOC_BUDDY_SPLIT	0x2
-#define BALLOC_BUDDY_IN_LIST	0x4
-	int flags;			/* List of associated flags. */
+#define BALLOC_BUDDY_ALLOCED	0x1U
+#define BALLOC_BUDDY_SPLIT	0x2U
+#define BALLOC_BUDDY_IN_LIST	0x4U
+	u32 flags;			/* List of associated flags. */
 
 	/*
 	 * Size of the PDE this buddy is using. This allows for grouping like
-	 * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a
-	 * enum except for the BALLOC_PTE_SIZE_ANY specifier.
+	 * sized allocations into the same PDE.
 	 */
-#define BALLOC_PTE_SIZE_ANY	-1
-	int pte_size;
+#define BALLOC_PTE_SIZE_ANY	(~0U)
+#define BALLOC_PTE_SIZE_INVALID	0U
+#define BALLOC_PTE_SIZE_SMALL	1U
+#define BALLOC_PTE_SIZE_BIG	2U
+	u32 pte_size;
 };
 
 static inline struct nvgpu_buddy *
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index 4057a599..ec0aa888 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -77,6 +77,11 @@ u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len)
 	return a->ops->alloc(a, len);
 }
 
+u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size)
+{
+	return a->ops->alloc_pte(a, len, page_size);
+}
+
 void nvgpu_free(struct nvgpu_allocator *a, u64 addr)
 {
 	a->ops->free(a, addr);
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 3cb8ed60..57d9afb5 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -152,7 +152,7 @@ u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, u32 pgsz_idx)
 	/* Be certain we round up to page_size if needed */
 	size = (size + ((u64)page_size - 1U)) & ~((u64)page_size - 1U);
 
-	addr = nvgpu_alloc(vma, size);
+	addr = nvgpu_alloc_pte(vma, size, page_size);
 	if (!addr) {
 		nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size);
 		return 0;
diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c
index c2c0d569..d096de5d 100644
--- a/drivers/gpu/nvgpu/common/mm/vm_area.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -99,11 +99,22 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
 	struct nvgpu_allocator *vma;
 	struct nvgpu_vm_area *vm_area;
 	u64 vaddr_start = 0;
+	u64 our_addr = *addr;
 	u32 pgsz_idx = GMMU_PAGE_SIZE_SMALL;
 
+	/*
+	 * If we have a fixed address then use the passed address in *addr. This
+	 * corresponds to the o_a field in the IOCTL. But since we do not
+	 * support specific alignments in the buddy allocator we ignore the
+	 * field if it isn't a fixed offset.
+	 */
+	if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) {
+		our_addr = *addr;
+	}
+
 	nvgpu_log(g, gpu_dbg_map,
-		  "ADD vm_area: pgsz=%#-8x pages=%-9u addr=%#-14llx flags=0x%x",
-		  page_size, pages, *addr, flags);
+		  "ADD vm_area: pgsz=%#-8x pages=%-9u a/o=%#-14llx flags=0x%x",
+		  page_size, pages, our_addr, flags);
 
 	for (; pgsz_idx < GMMU_NR_PAGE_SIZES; pgsz_idx++) {
 		if (vm->gmmu_page_sizes[pgsz_idx] == page_size) {
@@ -133,14 +144,15 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
 
 	vma = vm->vma[pgsz_idx];
 	if (flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) {
-		vaddr_start = nvgpu_alloc_fixed(vma, *addr,
+		vaddr_start = nvgpu_alloc_fixed(vma, our_addr,
 						(u64)pages *
 						(u64)page_size,
 						page_size);
 	} else {
-		vaddr_start = nvgpu_alloc(vma,
-					  (u64)pages *
-					  (u64)page_size);
+		vaddr_start = nvgpu_alloc_pte(vma,
+					      (u64)pages *
+					      (u64)page_size,
+					      page_size);
 	}
 
 	if (!vaddr_start) {
-- 
cgit v1.2.2