summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2014-12-11 14:33:52 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:02:38 -0400
commita99bbc5f6070a346006cf3f63e7f5f2120f30a2f (patch)
treed400d071aee0404aa1dca924b1e554c173d11735 /drivers/gpu
parentedaf6188d54683bd955f1dc6673b3bb3ba18367e (diff)
gpu: nvgpu: make larger address space work
Implement several fixes for allowing the GVA address space to grow to larger than 32GB and increase the address space to 128GB. o Implement dynamic allocation of PDE backing pages. The memory to store the PDE entries was hard coded to 1 page. Now the number of pages necessary is computed dynamically based on the size of the address space and the size of large pages. o Fix an arithmetic problem in the gm20b sparse texture code that caused large address spaces to be truncated when sparse PDEs/PTEs were being filled in. This caused a kernel panic when freeing the address space since a lot of the backing PTE memory was not allocated. o Change the address space split for large and small pages. Small pages now occupy the bottom 16GB of the address space. Large pages are used for the rest of the address space. Now, with a 128GB address space, there are 112GB of large page GVA available. This patch exists to allow large (16GB) sparse textures to be allocated without running into lack of memory issues and kernel panics. Bug 1574267 Change-Id: I7c59ee54bd573dfc53b58c346156df37a85dfc22 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/671204 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h11
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c2
3 files changed, 38 insertions, 20 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 949237b1..09948a25 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1811,6 +1811,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1811 goto clean_up; 1811 goto clean_up;
1812 } 1812 }
1813 1813
1814 BUG_ON(!pte_kv_cur);
1815
1814 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); 1816 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1815 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { 1817 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1816 if (likely(sgt)) { 1818 if (likely(sgt)) {
@@ -2128,9 +2130,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2128 char *name) 2130 char *name)
2129{ 2131{
2130 int err, i; 2132 int err, i;
2131 u32 num_pages, low_hole_pages; 2133 u32 num_small_pages, num_large_pages, low_hole_pages;
2132 char alloc_name[32]; 2134 char alloc_name[32];
2133 u64 vma_size; 2135 u64 small_vma_size, large_vma_size;
2136 u32 pde_pages;
2134 2137
2135 /* note: keep the page sizes sorted lowest to highest here */ 2138 /* note: keep the page sizes sorted lowest to highest here */
2136 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; 2139 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
@@ -2206,7 +2209,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2206 name, vm->va_limit, vm->pdes.num_pdes); 2209 name, vm->va_limit, vm->pdes.num_pdes);
2207 2210
2208 /* allocate the page table directory */ 2211 /* allocate the page table directory */
2209 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, 2212 pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
2213
2214 gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages);
2215 err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref,
2210 &vm->pdes.sgt, &vm->pdes.size); 2216 &vm->pdes.sgt, &vm->pdes.size);
2211 if (err) 2217 if (err)
2212 goto clean_up_pdes; 2218 goto clean_up_pdes;
@@ -2220,13 +2226,15 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2220 vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); 2226 vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));
2221 /* we could release vm->pdes.kv but it's only one page... */ 2227 /* we could release vm->pdes.kv but it's only one page... */
2222 2228
2223 /* low-half: alloc small pages */ 2229 /* First 16GB of the address space goes towards small pages. What ever
2224 /* high-half: alloc big pages */ 2230 * remains is allocated to large pages. */
2225 vma_size = vm->va_limit; 2231 small_vma_size = vm->va_limit;
2226 if (big_pages) 2232 if (big_pages) {
2227 vma_size /= 2; 2233 small_vma_size = (u64)16 << 30;
2234 large_vma_size = vm->va_limit - small_vma_size;
2235 }
2228 2236
2229 num_pages = (u32)(vma_size >> 2237 num_small_pages = (u32)(small_vma_size >>
2230 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); 2238 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2231 2239
2232 /* num_pages above is without regard to the low-side hole. */ 2240 /* num_pages above is without regard to the low-side hole. */
@@ -2238,20 +2246,22 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2238 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 2246 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2239 alloc_name, 2247 alloc_name,
2240 low_hole_pages, /*start*/ 2248 low_hole_pages, /*start*/
2241 num_pages - low_hole_pages);/* length*/ 2249 num_small_pages - low_hole_pages);/* length*/
2242 if (err) 2250 if (err)
2243 goto clean_up_map_pde; 2251 goto clean_up_map_pde;
2244 2252
2245 if (big_pages) { 2253 if (big_pages) {
2246 num_pages = (u32)((vm->va_limit / 2) >> 2254 u32 start = (u32)(small_vma_size >>
2255 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2256 num_large_pages = (u32)(large_vma_size >>
2247 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); 2257 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2248 2258
2249 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2259 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2250 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2260 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2251 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2261 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2252 alloc_name, 2262 alloc_name,
2253 num_pages, /* start */ 2263 start, /* start */
2254 num_pages); /* length */ 2264 num_large_pages); /* length */
2255 if (err) 2265 if (err)
2256 goto clean_up_small_allocator; 2266 goto clean_up_small_allocator;
2257 } 2267 }
@@ -2269,7 +2279,7 @@ clean_up_small_allocator:
2269clean_up_map_pde: 2279clean_up_map_pde:
2270 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); 2280 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2271clean_up_ptes: 2281clean_up_ptes:
2272 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, 2282 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
2273 vm->pdes.size); 2283 vm->pdes.size);
2274clean_up_pdes: 2284clean_up_pdes:
2275 kfree(vm->pdes.ptes[gmmu_page_size_small]); 2285 kfree(vm->pdes.ptes[gmmu_page_size_small]);
@@ -2647,10 +2657,15 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2647 2657
2648static void gk20a_deinit_vm(struct vm_gk20a *vm) 2658static void gk20a_deinit_vm(struct vm_gk20a *vm)
2649{ 2659{
2660 u32 pde_pages;
2661
2650 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 2662 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2651 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2663 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2664
2652 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); 2665 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2653 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, 2666
2667 pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
2668 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
2654 vm->pdes.size); 2669 vm->pdes.size);
2655 kfree(vm->pdes.ptes[gmmu_page_size_small]); 2670 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2656 kfree(vm->pdes.ptes[gmmu_page_size_big]); 2671 kfree(vm->pdes.ptes[gmmu_page_size_big]);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 04f9446b..d39dcff0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -25,10 +25,13 @@
25#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
26#include "gk20a_allocator.h" 26#include "gk20a_allocator.h"
27 27
28/* For now keep the size relatively small-ish compared to the full 28/*
29 * 40b va. 32GB for now. It consists of two 16GB spaces. */ 29 * Amount of the GVA space we actually use is smaller than the available space.
30#define NV_GMMU_VA_RANGE 35ULL 30 * The bottom 16GB of the space are used for small pages, the remaining high
31#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) 31 * memory is for large pages.
32 */
33#define NV_GMMU_VA_RANGE 37ULL
34#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16))
32 35
33#ifdef CONFIG_ARM64 36#ifdef CONFIG_ARM64
34#define outer_flush_range(a, b) 37#define outer_flush_range(a, b)
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 5b1a9a04..1adff5ab 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -111,7 +111,7 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
111 111
112 gk20a_dbg_fn(""); 112 gk20a_dbg_fn("");
113 113
114 vaddr_hi = vaddr + pgsz * num_pages - 1; 114 vaddr_hi = vaddr + pgsz * (u64)num_pages - 1;
115 pde_range_from_vaddr_range(vm, 115 pde_range_from_vaddr_range(vm,
116 vaddr, 116 vaddr,
117 vaddr_hi, 117 vaddr_hi,