From d630f1d99f60b1c2ec87506a2738bac4d1895b07 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Wed, 17 Jun 2015 10:31:08 -0700
Subject: gpu: nvgpu: Unify the small and large page address spaces

The basic structure of this patch is to make the small page allocator
and the large page allocator into pointers (where they used to be just
structs). Then assign each of those pointers to the same actual
allocator since the buddy allocator has supported mixed page sizes
since its inception.

For the rest of the driver some changes had to be made in order to
actually support mixed pages in a single address space.

1. Unifying the allocation page size determination

   Since the allocation and map operations happen at distinct
   times both mapping and allocation of GVA space must agree
   on page size. This is because the allocation has to separate
   allocations into separate PDEs to avoid the necessity of
   supporting mixed PDEs.

   To this end a function __get_pte_size() was introduced which
   is used both by the balloc code and the core GPU MM code. It
   determines page size based only on the length of the mapping/
   allocation.

2. Fixed address allocation + page size

   Similar to regular mappings/GVA allocations fixed address
   mapping page size determination had to be modified. In the
   past the address of the mapping determined page size since
   the address space split was by address (low addresses were
   small pages, high addresses large pages). Since that is no
   longer the case the page size field in the reserve memory
   ioctl is now honored by the mapping code. When, for instance,
   CUDA makes a memory reservation it specifies small or large
   pages. When CUDA requests mappings to be made within that
   address range the page size is then looked up in the reserved
   memory struct.

   Fixed address reservations were also modified to now always
   allocate at a PDE granularity (64M or 128M depending on
   large page size. This prevents non-fixed allocations from
   ending up in the same PDE and causing kernel panics or GMMU
   faults.

3. The rest...

   The rest of the changes are just by products of the above.
   Lots of places required minor updates to use a pointer to
   the GVA allocator struct instead of the struct itself.

Lastly, this change is not truly complete. More work remains to be
done in order to fully remove the notion that there was such a thing
as separate address spaces for different page sizes. Basically after
this patch what remains is cleanup and proper documentation.

Bug 1396644
Bug 1729947

Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1265300
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 60 +++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 5ef8ae25..394d1d25 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -270,11 +270,13 @@ struct vm_gk20a {
 
 	struct gk20a_mm_entry pdb;
 
-	struct nvgpu_allocator vma[gmmu_nr_page_sizes];
-
 	/* If necessary, split fixed from non-fixed. */
 	struct nvgpu_allocator fixed;
 
+	struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
+	struct nvgpu_allocator kernel;
+	struct nvgpu_allocator user;
+
 	struct rb_root mapped_buffers;
 
 	struct list_head reserved_va_list;
@@ -425,7 +427,7 @@ static inline int bar1_aperture_size_mb_gk20a(void)
 	return 16; /* 16MB is more than enough atm. */
 }
 
-/*The maximum GPU VA range supported */
+/* The maximum GPU VA range supported */
 #define NV_GMMU_VA_RANGE          38
 
 /* The default userspace-visible GPU VA size */
@@ -434,43 +436,39 @@ static inline int bar1_aperture_size_mb_gk20a(void)
 /* The default kernel-reserved GPU VA size */
 #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
 
-/*
- * The bottom 16GB of the space are used for small pages, the remaining high
- * memory is for large pages.
- */
-static inline u64 __nv_gmmu_va_small_page_limit(void)
-{
-	return ((u64)SZ_1G * 16);
-}
-
-static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr)
-{
-	struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big];
-
-	if (!vm->big_pages)
-		return 0;
-
-	return addr >= nvgpu_alloc_base(a) &&
-		addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a);
-}
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+					      u64 base, u64 size);
 
 /*
  * This determines the PTE size for a given alloc. Used by both the GVA space
  * allocator and the mm core code so that agreement can be reached on how to
  * map allocations.
+ *
+ * The page size of a buffer is this:
+ *
+ *   o  If the VM doesn't support large pages then obviously small pages
+ *      must be used.
+ *   o  If the base address is non-zero (fixed address map):
+ *      - Attempt to find a reserved memory area and use the page size
+ *        based on that.
+ *      - If no reserved page size is available, default to small pages.
+ *   o  If the base is zero:
+ *      - If the size is greater than or equal to the big page size, use big
+ *        pages.
+ *      - Otherwise use small pages.
  */
 static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
 						  u64 base, u64 size)
 {
-	/*
-	 * Currently userspace is not ready for a true unified address space.
-	 * As a result, even though the allocator supports mixed address spaces
-	 * the address spaces must be treated as separate for now.
-	 */
-	if (__nv_gmmu_va_is_big_page_region(vm, base))
-		return gmmu_page_size_big;
-	else
+	if (!vm->big_pages)
 		return gmmu_page_size_small;
+
+	if (base)
+		return __get_pte_size_fixed_map(vm, base, size);
+
+	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+		return gmmu_page_size_big;
+	return gmmu_page_size_small;
 }
 
 /*
@@ -797,6 +795,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
 
 void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
 
+int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
+
 extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
 extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
 
-- 
cgit v1.2.2