2 files changed, 56 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7d359ff4..5d1ff563 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1437,7 +1437,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
         * the alignment determined by gmmu_select_page_size().
         */
        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-                int pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
+                int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ?
                                gmmu_page_size_big : gmmu_page_size_small;
                if (pgsz_idx > bfr.pgsz_idx) {
                        gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
@@ -2441,6 +2441,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        /* note: keep the page sizes sorted lowest to highest here */
        u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
+        /*
+         * Linsim bug: seems like we can't have pushbuffers above 4GB. Easy WAR for sim
+         * is to just limit the address space to 4GB.
+         */
+        if (tegra_platform_is_linsim() && aperture_size > SZ_4G)
+                aperture_size = SZ_4G;
        vm->mm = mm;
        vm->va_start = low_hole;
@@ -2483,7 +2490,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
         * remains is allocated to large pages. */
        small_vma_size = vm->va_limit;
        if (big_pages) {
-                small_vma_size = (u64)16 << 30;
+                small_vma_size = __nv_gmmu_va_small_page_limit();
                large_vma_size = vm->va_limit - small_vma_size;
        }
@@ -2698,7 +2705,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                        args->pages, args->offset);
        /* determine pagesz idx */
-        pgsz_idx = NV_GMMU_VA_IS_UPPER(args->offset) ?
+        pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
                        gmmu_page_size_big : gmmu_page_size_small;
        start_page_nr = (u32)(args->offset >>
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 24309abc..57f7a373 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -21,18 +21,11 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-attrs.h>
 #include <linux/iommu.h>
+#include <linux/tegra-soc.h>
 #include <asm/dma-iommu.h>
 #include <asm/cacheflush.h>
 #include "gk20a_allocator.h"
-/*
- * Amount of the GVA space we actually use is smaller than the available space.
- * The bottom 16GB of the space are used for small pages, the remaining high
- * memory is for large pages.
- */
-#define NV_GMMU_VA_RANGE        37ULL
-#define NV_GMMU_VA_IS_UPPER(x)  ((x) >= ((u64)SZ_1G * 16))
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
 #define __cpuc_flush_dcache_area __flush_dcache_area
@@ -344,6 +337,51 @@ static inline int max_vaddr_bits_gk20a(void)
        return 40; /* chopped for area? */
 }
+/*
+ * Amount of the GVA space we actually use is smaller than the available space.
+ */
+#define NV_GMMU_VA_RANGE        37
+/*
+ * The bottom 16GB of the space are used for small pages, the remaining high
+ * memory is for large pages. On simulation use 2GB for small pages, 2GB for
+ * large pages (if enabled).
+ */
+static inline u64 __nv_gmmu_va_small_page_limit(void)
+{
+        if (tegra_platform_is_linsim())
+                return ((u64)SZ_1G * 2);
+        else
+                return ((u64)SZ_1G * 16);
+}
+static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr)
+{
+        if (!vm->big_pages)
+                return 0;
+        return addr >= __nv_gmmu_va_small_page_limit();
+}
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ */
+static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
+                                                  u64 base, u64 size)
+{
+        /*
+         * Currently userspace is not ready for a true unified address space.
+         * As a result, even though the allocator supports mixed address spaces
+         * the address spaces must be treated as separate for now.
+         */
+        if (__nv_gmmu_va_is_upper(vm, base))
+                return gmmu_page_size_big;
+        else
+                return gmmu_page_size_small;
+}
 #if 0 /*related to addr bits above, concern below TBD on which is accurate */
 #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
                                           bus_bar1_block_ptr_s())

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7d359ff4..5d1ff563 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1437,7 +1437,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1437	* the alignment determined by gmmu_select_page_size().	1437	* the alignment determined by gmmu_select_page_size().
1438	*/	1438	*/
1439	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {	1439	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1440	int pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?	1440	int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ?
1441	gmmu_page_size_big : gmmu_page_size_small;	1441	gmmu_page_size_big : gmmu_page_size_small;
1442	if (pgsz_idx > bfr.pgsz_idx) {	1442	if (pgsz_idx > bfr.pgsz_idx) {
1443	gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",	1443	gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
@@ -2441,6 +2441,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2441	/* note: keep the page sizes sorted lowest to highest here */	2441	/* note: keep the page sizes sorted lowest to highest here */
2442	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };	2442	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
2443		2443
		2444	/*
		2445	* Linsim bug: seems like we can't have pushbuffers above 4GB. Easy WAR for sim
		2446	* is to just limit the address space to 4GB.
		2447	*/
		2448	if (tegra_platform_is_linsim() && aperture_size > SZ_4G)
		2449	aperture_size = SZ_4G;
		2450
2444	vm->mm = mm;	2451	vm->mm = mm;
2445		2452
2446	vm->va_start = low_hole;	2453	vm->va_start = low_hole;
@@ -2483,7 +2490,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2483	* remains is allocated to large pages. */	2490	* remains is allocated to large pages. */
2484	small_vma_size = vm->va_limit;	2491	small_vma_size = vm->va_limit;
2485	if (big_pages) {	2492	if (big_pages) {
2486	small_vma_size = (u64)16 << 30;	2493	small_vma_size = __nv_gmmu_va_small_page_limit();
2487	large_vma_size = vm->va_limit - small_vma_size;	2494	large_vma_size = vm->va_limit - small_vma_size;
2488	}	2495	}
2489		2496
@@ -2698,7 +2705,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2698	args->pages, args->offset);	2705	args->pages, args->offset);
2699		2706
2700	/* determine pagesz idx */	2707	/* determine pagesz idx */
2701	pgsz_idx = NV_GMMU_VA_IS_UPPER(args->offset) ?	2708	pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2702	gmmu_page_size_big : gmmu_page_size_small;	2709	gmmu_page_size_big : gmmu_page_size_small;
2703		2710
2704	start_page_nr = (u32)(args->offset >>	2711	start_page_nr = (u32)(args->offset >>


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 24309abc..57f7a373 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -21,18 +21,11 @@
21	#include <linux/scatterlist.h>	21	#include <linux/scatterlist.h>
22	#include <linux/dma-attrs.h>	22	#include <linux/dma-attrs.h>
23	#include <linux/iommu.h>	23	#include <linux/iommu.h>
		24	#include <linux/tegra-soc.h>
24	#include <asm/dma-iommu.h>	25	#include <asm/dma-iommu.h>
25	#include <asm/cacheflush.h>	26	#include <asm/cacheflush.h>
26	#include "gk20a_allocator.h"	27	#include "gk20a_allocator.h"
27		28
28	/*
29	* Amount of the GVA space we actually use is smaller than the available space.
30	* The bottom 16GB of the space are used for small pages, the remaining high
31	* memory is for large pages.
32	*/
33	#define NV_GMMU_VA_RANGE 37ULL
34	#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16))
35
36	#ifdef CONFIG_ARM64	29	#ifdef CONFIG_ARM64
37	#define outer_flush_range(a, b)	30	#define outer_flush_range(a, b)
38	#define __cpuc_flush_dcache_area __flush_dcache_area	31	#define __cpuc_flush_dcache_area __flush_dcache_area
@@ -344,6 +337,51 @@ static inline int max_vaddr_bits_gk20a(void)
344	return 40; /* chopped for area? */	337	return 40; /* chopped for area? */
345	}	338	}
346		339
		340	/*
		341	* Amount of the GVA space we actually use is smaller than the available space.
		342	*/
		343	#define NV_GMMU_VA_RANGE 37
		344
		345	/*
		346	* The bottom 16GB of the space are used for small pages, the remaining high
		347	* memory is for large pages. On simulation use 2GB for small pages, 2GB for
		348	* large pages (if enabled).
		349	*/
		350	static inline u64 __nv_gmmu_va_small_page_limit(void)
		351	{
		352	if (tegra_platform_is_linsim())
		353	return ((u64)SZ_1G * 2);
		354	else
		355	return ((u64)SZ_1G * 16);
		356	}
		357
		358	static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr)
		359	{
		360	if (!vm->big_pages)
		361	return 0;
		362
		363	return addr >= __nv_gmmu_va_small_page_limit();
		364	}
		365
		366	/*
		367	* This determines the PTE size for a given alloc. Used by both the GVA space
		368	* allocator and the mm core code so that agreement can be reached on how to
		369	* map allocations.
		370	*/
		371	static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
		372	u64 base, u64 size)
		373	{
		374	/*
		375	* Currently userspace is not ready for a true unified address space.
		376	* As a result, even though the allocator supports mixed address spaces
		377	* the address spaces must be treated as separate for now.
		378	*/
		379	if (__nv_gmmu_va_is_upper(vm, base))
		380	return gmmu_page_size_big;
		381	else
		382	return gmmu_page_size_small;
		383	}
		384
347	#if 0 /related to addr bits above, concern below TBD on which is accurate /	385	#if 0 /related to addr bits above, concern below TBD on which is accurate /
348	#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\	386	#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
349	bus_bar1_block_ptr_s())	387	bus_bar1_block_ptr_s())