gpu: nvgpu: WAR for simulator bug

On linsim, when the push buffers are allowed to be allocated with small pages above 4GB the simulator crashes. This patch ensures that for linsim all small page allocations are forced to be below 4GB in the GPU VA space. By doing so the simulator no longer crashes. This bug has come up because the GPU buddy allocator work generates allocations at the top of the address space first. Thus push buffers were located at between 12GB and 16GB in the GPU VA space. Change-Id: Iaef0af3fda3f37ac09a66b5e1179527d6fe08ccc Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/740728 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2015-04-16 14:46:22 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-05-11 11:52:09 -0400
commit: 0566aee853eb32f4f796499b6b00ddf0f1d7de34 (patch)
tree: e640f945d82c598645b0b2c5cbc6eb194444026d /drivers/gpu/nvgpu
parent: e206fdecb3e380adced41dd432779c848d18caa0 (diff)
2 files changed, 56 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7d359ff4..5d1ff563 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1437,7 +1437,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
         * the alignment determined by gmmu_select_page_size().
         */
        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-                int pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
+                int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ?
                                gmmu_page_size_big : gmmu_page_size_small;
                if (pgsz_idx > bfr.pgsz_idx) {
                        gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
@@ -2441,6 +2441,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        /* note: keep the page sizes sorted lowest to highest here */
        u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
+        /*
+         * Linsim bug: seems like we can't have pushbuffers above 4GB. Easy WAR for sim
+         * is to just limit the address space to 4GB.
+         */
+        if (tegra_platform_is_linsim() && aperture_size > SZ_4G)
+                aperture_size = SZ_4G;
        vm->mm = mm;
        vm->va_start = low_hole;
@@ -2483,7 +2490,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
         * remains is allocated to large pages. */
        small_vma_size = vm->va_limit;
        if (big_pages) {
-                small_vma_size = (u64)16 << 30;
+                small_vma_size = __nv_gmmu_va_small_page_limit();
                large_vma_size = vm->va_limit - small_vma_size;
        }
@@ -2698,7 +2705,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                        args->pages, args->offset);
        /* determine pagesz idx */
-        pgsz_idx = NV_GMMU_VA_IS_UPPER(args->offset) ?
+        pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
                        gmmu_page_size_big : gmmu_page_size_small;
        start_page_nr = (u32)(args->offset >>
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 24309abc..57f7a373 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -21,18 +21,11 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-attrs.h>
 #include <linux/iommu.h>
+#include <linux/tegra-soc.h>
 #include <asm/dma-iommu.h>
 #include <asm/cacheflush.h>
 #include "gk20a_allocator.h"
-/*
- * Amount of the GVA space we actually use is smaller than the available space.
- * The bottom 16GB of the space are used for small pages, the remaining high
- * memory is for large pages.
- */
-#define NV_GMMU_VA_RANGE        37ULL
-#define NV_GMMU_VA_IS_UPPER(x)  ((x) >= ((u64)SZ_1G * 16))
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
 #define __cpuc_flush_dcache_area __flush_dcache_area
@@ -344,6 +337,51 @@ static inline int max_vaddr_bits_gk20a(void)
        return 40; /* chopped for area? */
 }
+/*
+ * Amount of the GVA space we actually use is smaller than the available space.
+ */
+#define NV_GMMU_VA_RANGE        37
+/*
+ * The bottom 16GB of the space are used for small pages, the remaining high
+ * memory is for large pages. On simulation use 2GB for small pages, 2GB for
+ * large pages (if enabled).
+ */
+static inline u64 __nv_gmmu_va_small_page_limit(void)
+{
+        if (tegra_platform_is_linsim())
+                return ((u64)SZ_1G * 2);
+        else
+                return ((u64)SZ_1G * 16);
+}
+static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr)
+{
+        if (!vm->big_pages)
+                return 0;
+        return addr >= __nv_gmmu_va_small_page_limit();
+}
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ */
+static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
+                                                  u64 base, u64 size)
+{
+        /*
+         * Currently userspace is not ready for a true unified address space.
+         * As a result, even though the allocator supports mixed address spaces
+         * the address spaces must be treated as separate for now.
+         */
+        if (__nv_gmmu_va_is_upper(vm, base))
+                return gmmu_page_size_big;
+        else
+                return gmmu_page_size_small;
+}
 #if 0 /*related to addr bits above, concern below TBD on which is accurate */
 #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
                                           bus_bar1_block_ptr_s())
author	Alex Waterman <alexw@nvidia.com>	2015-04-16 14:46:22 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-05-11 11:52:09 -0400
commit	0566aee853eb32f4f796499b6b00ddf0f1d7de34 (patch)
tree	e640f945d82c598645b0b2c5cbc6eb194444026d /drivers/gpu/nvgpu
parent	e206fdecb3e380adced41dd432779c848d18caa0 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7d359ff4..5d1ff563 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1437,7 +1437,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1437	* the alignment determined by gmmu_select_page_size().	1437	* the alignment determined by gmmu_select_page_size().
1438	*/	1438	*/
1439	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {	1439	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1440	int pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?	1440	int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ?
1441	gmmu_page_size_big : gmmu_page_size_small;	1441	gmmu_page_size_big : gmmu_page_size_small;
1442	if (pgsz_idx > bfr.pgsz_idx) {	1442	if (pgsz_idx > bfr.pgsz_idx) {
1443	gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",	1443	gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
@@ -2441,6 +2441,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2441	/* note: keep the page sizes sorted lowest to highest here */	2441	/* note: keep the page sizes sorted lowest to highest here */
2442	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };	2442	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
2443		2443
		2444	/*
		2445	* Linsim bug: seems like we can't have pushbuffers above 4GB. Easy WAR for sim
		2446	* is to just limit the address space to 4GB.
		2447	*/
		2448	if (tegra_platform_is_linsim() && aperture_size > SZ_4G)
		2449	aperture_size = SZ_4G;
		2450
2444	vm->mm = mm;	2451	vm->mm = mm;
2445		2452
2446	vm->va_start = low_hole;	2453	vm->va_start = low_hole;
@@ -2483,7 +2490,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2483	* remains is allocated to large pages. */	2490	* remains is allocated to large pages. */
2484	small_vma_size = vm->va_limit;	2491	small_vma_size = vm->va_limit;
2485	if (big_pages) {	2492	if (big_pages) {
2486	small_vma_size = (u64)16 << 30;	2493	small_vma_size = __nv_gmmu_va_small_page_limit();
2487	large_vma_size = vm->va_limit - small_vma_size;	2494	large_vma_size = vm->va_limit - small_vma_size;
2488	}	2495	}
2489		2496
@@ -2698,7 +2705,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2698	args->pages, args->offset);	2705	args->pages, args->offset);
2699		2706
2700	/* determine pagesz idx */	2707	/* determine pagesz idx */
2701	pgsz_idx = NV_GMMU_VA_IS_UPPER(args->offset) ?	2708	pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2702	gmmu_page_size_big : gmmu_page_size_small;	2709	gmmu_page_size_big : gmmu_page_size_small;
2703		2710
2704	start_page_nr = (u32)(args->offset >>	2711	start_page_nr = (u32)(args->offset >>


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 24309abc..57f7a373 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -21,18 +21,11 @@
21	#include <linux/scatterlist.h>	21	#include <linux/scatterlist.h>
22	#include <linux/dma-attrs.h>	22	#include <linux/dma-attrs.h>
23	#include <linux/iommu.h>	23	#include <linux/iommu.h>
		24	#include <linux/tegra-soc.h>
24	#include <asm/dma-iommu.h>	25	#include <asm/dma-iommu.h>
25	#include <asm/cacheflush.h>	26	#include <asm/cacheflush.h>
26	#include "gk20a_allocator.h"	27	#include "gk20a_allocator.h"
27		28
28	/*
29	* Amount of the GVA space we actually use is smaller than the available space.
30	* The bottom 16GB of the space are used for small pages, the remaining high
31	* memory is for large pages.
32	*/
33	#define NV_GMMU_VA_RANGE 37ULL
34	#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16))
35
36	#ifdef CONFIG_ARM64	29	#ifdef CONFIG_ARM64
37	#define outer_flush_range(a, b)	30	#define outer_flush_range(a, b)
38	#define __cpuc_flush_dcache_area __flush_dcache_area	31	#define __cpuc_flush_dcache_area __flush_dcache_area
@@ -344,6 +337,51 @@ static inline int max_vaddr_bits_gk20a(void)
344	return 40; /* chopped for area? */	337	return 40; /* chopped for area? */
345	}	338	}
346		339
		340	/*
		341	* Amount of the GVA space we actually use is smaller than the available space.
		342	*/
		343	#define NV_GMMU_VA_RANGE 37
		344
		345	/*
		346	* The bottom 16GB of the space are used for small pages, the remaining high
		347	* memory is for large pages. On simulation use 2GB for small pages, 2GB for
		348	* large pages (if enabled).
		349	*/
		350	static inline u64 __nv_gmmu_va_small_page_limit(void)
		351	{
		352	if (tegra_platform_is_linsim())
		353	return ((u64)SZ_1G * 2);
		354	else
		355	return ((u64)SZ_1G * 16);
		356	}
		357
		358	static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr)
		359	{
		360	if (!vm->big_pages)
		361	return 0;
		362
		363	return addr >= __nv_gmmu_va_small_page_limit();
		364	}
		365
		366	/*
		367	* This determines the PTE size for a given alloc. Used by both the GVA space
		368	* allocator and the mm core code so that agreement can be reached on how to
		369	* map allocations.
		370	*/
		371	static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
		372	u64 base, u64 size)
		373	{
		374	/*
		375	* Currently userspace is not ready for a true unified address space.
		376	* As a result, even though the allocator supports mixed address spaces
		377	* the address spaces must be treated as separate for now.
		378	*/
		379	if (__nv_gmmu_va_is_upper(vm, base))
		380	return gmmu_page_size_big;
		381	else
		382	return gmmu_page_size_small;
		383	}
		384
347	#if 0 /related to addr bits above, concern below TBD on which is accurate /	385	#if 0 /related to addr bits above, concern below TBD on which is accurate /
348	#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\	386	#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
349	bus_bar1_block_ptr_s())	387	bus_bar1_block_ptr_s())