summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2015-04-16 14:46:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-05-11 11:52:09 -0400
commit0566aee853eb32f4f796499b6b00ddf0f1d7de34 (patch)
treee640f945d82c598645b0b2c5cbc6eb194444026d /drivers/gpu/nvgpu/gk20a/mm_gk20a.h
parente206fdecb3e380adced41dd432779c848d18caa0 (diff)
gpu: nvgpu: WAR for simulator bug
On linsim, when the push buffers are allowed to be allocated with small pages above 4GB the simulator crashes. This patch ensures that for linsim all small page allocations are forced to be below 4GB in the GPU VA space. By doing so the simulator no longer crashes. This bug has come up because the GPU buddy allocator work generates allocations at the top of the address space first. Thus push buffers were located at between 12GB and 16GB in the GPU VA space. Change-Id: Iaef0af3fda3f37ac09a66b5e1179527d6fe08ccc Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/740728 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h54
1 files changed, 46 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 24309abc..57f7a373 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -21,18 +21,11 @@
21#include <linux/scatterlist.h> 21#include <linux/scatterlist.h>
22#include <linux/dma-attrs.h> 22#include <linux/dma-attrs.h>
23#include <linux/iommu.h> 23#include <linux/iommu.h>
24#include <linux/tegra-soc.h>
24#include <asm/dma-iommu.h> 25#include <asm/dma-iommu.h>
25#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
26#include "gk20a_allocator.h" 27#include "gk20a_allocator.h"
27 28
28/*
29 * Amount of the GVA space we actually use is smaller than the available space.
30 * The bottom 16GB of the space are used for small pages, the remaining high
31 * memory is for large pages.
32 */
33#define NV_GMMU_VA_RANGE 37ULL
34#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16))
35
36#ifdef CONFIG_ARM64 29#ifdef CONFIG_ARM64
37#define outer_flush_range(a, b) 30#define outer_flush_range(a, b)
38#define __cpuc_flush_dcache_area __flush_dcache_area 31#define __cpuc_flush_dcache_area __flush_dcache_area
@@ -344,6 +337,51 @@ static inline int max_vaddr_bits_gk20a(void)
344 return 40; /* chopped for area? */ 337 return 40; /* chopped for area? */
345} 338}
346 339
340/*
341 * Amount of the GVA space we actually use is smaller than the available space.
342 */
343#define NV_GMMU_VA_RANGE 37
344
345/*
346 * The bottom 16GB of the space are used for small pages, the remaining high
347 * memory is for large pages. On simulation use 2GB for small pages, 2GB for
348 * large pages (if enabled).
349 */
350static inline u64 __nv_gmmu_va_small_page_limit(void)
351{
352 if (tegra_platform_is_linsim())
353 return ((u64)SZ_1G * 2);
354 else
355 return ((u64)SZ_1G * 16);
356}
357
358static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr)
359{
360 if (!vm->big_pages)
361 return 0;
362
363 return addr >= __nv_gmmu_va_small_page_limit();
364}
365
366/*
367 * This determines the PTE size for a given alloc. Used by both the GVA space
368 * allocator and the mm core code so that agreement can be reached on how to
369 * map allocations.
370 */
371static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
372 u64 base, u64 size)
373{
374 /*
375 * Currently userspace is not ready for a true unified address space.
376 * As a result, even though the allocator supports mixed address spaces
377 * the address spaces must be treated as separate for now.
378 */
379 if (__nv_gmmu_va_is_upper(vm, base))
380 return gmmu_page_size_big;
381 else
382 return gmmu_page_size_small;
383}
384
347#if 0 /*related to addr bits above, concern below TBD on which is accurate */ 385#if 0 /*related to addr bits above, concern below TBD on which is accurate */
348#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ 386#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
349 bus_bar1_block_ptr_s()) 387 bus_bar1_block_ptr_s())