summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h54
2 files changed, 56 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7d359ff4..5d1ff563 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1437,7 +1437,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1437 * the alignment determined by gmmu_select_page_size(). 1437 * the alignment determined by gmmu_select_page_size().
1438 */ 1438 */
1439 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 1439 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1440 int pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ? 1440 int pgsz_idx = __nv_gmmu_va_is_upper(vm, offset_align) ?
1441 gmmu_page_size_big : gmmu_page_size_small; 1441 gmmu_page_size_big : gmmu_page_size_small;
1442 if (pgsz_idx > bfr.pgsz_idx) { 1442 if (pgsz_idx > bfr.pgsz_idx) {
1443 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", 1443 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
@@ -2441,6 +2441,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2441 /* note: keep the page sizes sorted lowest to highest here */ 2441 /* note: keep the page sizes sorted lowest to highest here */
2442 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; 2442 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
2443 2443
2444 /*
2445 * Linsim bug: seems like we can't have pushbuffers above 4GB. Easy WAR for sim
2446 * is to just limit the address space to 4GB.
2447 */
2448 if (tegra_platform_is_linsim() && aperture_size > SZ_4G)
2449 aperture_size = SZ_4G;
2450
2444 vm->mm = mm; 2451 vm->mm = mm;
2445 2452
2446 vm->va_start = low_hole; 2453 vm->va_start = low_hole;
@@ -2483,7 +2490,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2483 * remains is allocated to large pages. */ 2490 * remains is allocated to large pages. */
2484 small_vma_size = vm->va_limit; 2491 small_vma_size = vm->va_limit;
2485 if (big_pages) { 2492 if (big_pages) {
2486 small_vma_size = (u64)16 << 30; 2493 small_vma_size = __nv_gmmu_va_small_page_limit();
2487 large_vma_size = vm->va_limit - small_vma_size; 2494 large_vma_size = vm->va_limit - small_vma_size;
2488 } 2495 }
2489 2496
@@ -2698,7 +2705,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2698 args->pages, args->offset); 2705 args->pages, args->offset);
2699 2706
2700 /* determine pagesz idx */ 2707 /* determine pagesz idx */
2701 pgsz_idx = NV_GMMU_VA_IS_UPPER(args->offset) ? 2708 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2702 gmmu_page_size_big : gmmu_page_size_small; 2709 gmmu_page_size_big : gmmu_page_size_small;
2703 2710
2704 start_page_nr = (u32)(args->offset >> 2711 start_page_nr = (u32)(args->offset >>
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 24309abc..57f7a373 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -21,18 +21,11 @@
21#include <linux/scatterlist.h> 21#include <linux/scatterlist.h>
22#include <linux/dma-attrs.h> 22#include <linux/dma-attrs.h>
23#include <linux/iommu.h> 23#include <linux/iommu.h>
24#include <linux/tegra-soc.h>
24#include <asm/dma-iommu.h> 25#include <asm/dma-iommu.h>
25#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
26#include "gk20a_allocator.h" 27#include "gk20a_allocator.h"
27 28
28/*
29 * Amount of the GVA space we actually use is smaller than the available space.
30 * The bottom 16GB of the space are used for small pages, the remaining high
31 * memory is for large pages.
32 */
33#define NV_GMMU_VA_RANGE 37ULL
34#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16))
35
36#ifdef CONFIG_ARM64 29#ifdef CONFIG_ARM64
37#define outer_flush_range(a, b) 30#define outer_flush_range(a, b)
38#define __cpuc_flush_dcache_area __flush_dcache_area 31#define __cpuc_flush_dcache_area __flush_dcache_area
@@ -344,6 +337,51 @@ static inline int max_vaddr_bits_gk20a(void)
344 return 40; /* chopped for area? */ 337 return 40; /* chopped for area? */
345} 338}
346 339
340/*
341 * Amount of the GVA space we actually use is smaller than the available space.
342 */
343#define NV_GMMU_VA_RANGE 37
344
345/*
346 * The bottom 16GB of the space are used for small pages, the remaining high
347 * memory is for large pages. On simulation use 2GB for small pages, 2GB for
348 * large pages (if enabled).
349 */
350static inline u64 __nv_gmmu_va_small_page_limit(void)
351{
352 if (tegra_platform_is_linsim())
353 return ((u64)SZ_1G * 2);
354 else
355 return ((u64)SZ_1G * 16);
356}
357
358static inline int __nv_gmmu_va_is_upper(struct vm_gk20a *vm, u64 addr)
359{
360 if (!vm->big_pages)
361 return 0;
362
363 return addr >= __nv_gmmu_va_small_page_limit();
364}
365
366/*
367 * This determines the PTE size for a given alloc. Used by both the GVA space
368 * allocator and the mm core code so that agreement can be reached on how to
369 * map allocations.
370 */
371static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
372 u64 base, u64 size)
373{
374 /*
375 * Currently userspace is not ready for a true unified address space.
376 * As a result, even though the allocator supports mixed address spaces
377 * the address spaces must be treated as separate for now.
378 */
379 if (__nv_gmmu_va_is_upper(vm, base))
380 return gmmu_page_size_big;
381 else
382 return gmmu_page_size_small;
383}
384
347#if 0 /*related to addr bits above, concern below TBD on which is accurate */ 385#if 0 /*related to addr bits above, concern below TBD on which is accurate */
348#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ 386#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
349 bus_bar1_block_ptr_s()) 387 bus_bar1_block_ptr_s())