diff options
author | Alex Waterman <alexw@nvidia.com> | 2014-12-11 14:33:52 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:02:38 -0400 |
commit | a99bbc5f6070a346006cf3f63e7f5f2120f30a2f (patch) | |
tree | d400d071aee0404aa1dca924b1e554c173d11735 /drivers/gpu/nvgpu/gk20a | |
parent | edaf6188d54683bd955f1dc6673b3bb3ba18367e (diff) |
gpu: nvgpu: make larger address space work
Implement several fixes for allowing the GVA address space to grow
to larger than 32GB and increase the address space to 128GB.
o Implement dynamic allocation of PDE backing pages. The memory
to store the PDE entries was hard coded to 1 page. Now the
number of pages necessary is computed dynamically based on the
size of the address space and the size of large pages.
o Fix an arithmetic problem in the gm20b sparse texture code
that caused large address spaces to be truncated when sparse
PDEs/PTEs were being filled in. This caused a kernel panic
when freeing the address space since a lot of the backing
PTE memory was not allocated.
o Change the address space split for large and small pages. Small
pages now occupy the bottom 16GB of the address space. Large
pages are used for the rest of the address space. Now, with a
128GB address space, there are 112GB of large page GVA available.
This patch exists to allow large (16GB) sparse textures to be allocated
without running into lack of memory issues and kernel panics.
Bug 1574267
Change-Id: I7c59ee54bd573dfc53b58c346156df37a85dfc22
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/671204
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 45 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 11 |
2 files changed, 37 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 949237b1..09948a25 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1811,6 +1811,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1811 | goto clean_up; | 1811 | goto clean_up; |
1812 | } | 1812 | } |
1813 | 1813 | ||
1814 | BUG_ON(!pte_kv_cur); | ||
1815 | |||
1814 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | 1816 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); |
1815 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | 1817 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { |
1816 | if (likely(sgt)) { | 1818 | if (likely(sgt)) { |
@@ -2128,9 +2130,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2128 | char *name) | 2130 | char *name) |
2129 | { | 2131 | { |
2130 | int err, i; | 2132 | int err, i; |
2131 | u32 num_pages, low_hole_pages; | 2133 | u32 num_small_pages, num_large_pages, low_hole_pages; |
2132 | char alloc_name[32]; | 2134 | char alloc_name[32]; |
2133 | u64 vma_size; | 2135 | u64 small_vma_size, large_vma_size; |
2136 | u32 pde_pages; | ||
2134 | 2137 | ||
2135 | /* note: keep the page sizes sorted lowest to highest here */ | 2138 | /* note: keep the page sizes sorted lowest to highest here */ |
2136 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; | 2139 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; |
@@ -2206,7 +2209,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2206 | name, vm->va_limit, vm->pdes.num_pdes); | 2209 | name, vm->va_limit, vm->pdes.num_pdes); |
2207 | 2210 | ||
2208 | /* allocate the page table directory */ | 2211 | /* allocate the page table directory */ |
2209 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | 2212 | pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512); |
2213 | |||
2214 | gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages); | ||
2215 | err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref, | ||
2210 | &vm->pdes.sgt, &vm->pdes.size); | 2216 | &vm->pdes.sgt, &vm->pdes.size); |
2211 | if (err) | 2217 | if (err) |
2212 | goto clean_up_pdes; | 2218 | goto clean_up_pdes; |
@@ -2220,13 +2226,15 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2220 | vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); | 2226 | vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); |
2221 | /* we could release vm->pdes.kv but it's only one page... */ | 2227 | /* we could release vm->pdes.kv but it's only one page... */ |
2222 | 2228 | ||
2223 | /* low-half: alloc small pages */ | 2229 | /* First 16GB of the address space goes towards small pages. What ever |
2224 | /* high-half: alloc big pages */ | 2230 | * remains is allocated to large pages. */ |
2225 | vma_size = vm->va_limit; | 2231 | small_vma_size = vm->va_limit; |
2226 | if (big_pages) | 2232 | if (big_pages) { |
2227 | vma_size /= 2; | 2233 | small_vma_size = (u64)16 << 30; |
2234 | large_vma_size = vm->va_limit - small_vma_size; | ||
2235 | } | ||
2228 | 2236 | ||
2229 | num_pages = (u32)(vma_size >> | 2237 | num_small_pages = (u32)(small_vma_size >> |
2230 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | 2238 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); |
2231 | 2239 | ||
2232 | /* num_pages above is without regard to the low-side hole. */ | 2240 | /* num_pages above is without regard to the low-side hole. */ |
@@ -2238,20 +2246,22 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2238 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 2246 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
2239 | alloc_name, | 2247 | alloc_name, |
2240 | low_hole_pages, /*start*/ | 2248 | low_hole_pages, /*start*/ |
2241 | num_pages - low_hole_pages);/* length*/ | 2249 | num_small_pages - low_hole_pages);/* length*/ |
2242 | if (err) | 2250 | if (err) |
2243 | goto clean_up_map_pde; | 2251 | goto clean_up_map_pde; |
2244 | 2252 | ||
2245 | if (big_pages) { | 2253 | if (big_pages) { |
2246 | num_pages = (u32)((vm->va_limit / 2) >> | 2254 | u32 start = (u32)(small_vma_size >> |
2255 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2256 | num_large_pages = (u32)(large_vma_size >> | ||
2247 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | 2257 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); |
2248 | 2258 | ||
2249 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2259 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2250 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2260 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); |
2251 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2261 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2252 | alloc_name, | 2262 | alloc_name, |
2253 | num_pages, /* start */ | 2263 | start, /* start */ |
2254 | num_pages); /* length */ | 2264 | num_large_pages); /* length */ |
2255 | if (err) | 2265 | if (err) |
2256 | goto clean_up_small_allocator; | 2266 | goto clean_up_small_allocator; |
2257 | } | 2267 | } |
@@ -2269,7 +2279,7 @@ clean_up_small_allocator: | |||
2269 | clean_up_map_pde: | 2279 | clean_up_map_pde: |
2270 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | 2280 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); |
2271 | clean_up_ptes: | 2281 | clean_up_ptes: |
2272 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | 2282 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages, |
2273 | vm->pdes.size); | 2283 | vm->pdes.size); |
2274 | clean_up_pdes: | 2284 | clean_up_pdes: |
2275 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | 2285 | kfree(vm->pdes.ptes[gmmu_page_size_small]); |
@@ -2647,10 +2657,15 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset) | |||
2647 | 2657 | ||
2648 | static void gk20a_deinit_vm(struct vm_gk20a *vm) | 2658 | static void gk20a_deinit_vm(struct vm_gk20a *vm) |
2649 | { | 2659 | { |
2660 | u32 pde_pages; | ||
2661 | |||
2650 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 2662 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); |
2651 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 2663 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); |
2664 | |||
2652 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | 2665 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); |
2653 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | 2666 | |
2667 | pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512); | ||
2668 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages, | ||
2654 | vm->pdes.size); | 2669 | vm->pdes.size); |
2655 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | 2670 | kfree(vm->pdes.ptes[gmmu_page_size_small]); |
2656 | kfree(vm->pdes.ptes[gmmu_page_size_big]); | 2671 | kfree(vm->pdes.ptes[gmmu_page_size_big]); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 04f9446b..d39dcff0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -25,10 +25,13 @@ | |||
25 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
26 | #include "gk20a_allocator.h" | 26 | #include "gk20a_allocator.h" |
27 | 27 | ||
28 | /* For now keep the size relatively small-ish compared to the full | 28 | /* |
29 | * 40b va. 32GB for now. It consists of two 16GB spaces. */ | 29 | * Amount of the GVA space we actually use is smaller than the available space. |
30 | #define NV_GMMU_VA_RANGE 35ULL | 30 | * The bottom 16GB of the space are used for small pages, the remaining high |
31 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) | 31 | * memory is for large pages. |
32 | */ | ||
33 | #define NV_GMMU_VA_RANGE 37ULL | ||
34 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16)) | ||
32 | 35 | ||
33 | #ifdef CONFIG_ARM64 | 36 | #ifdef CONFIG_ARM64 |
34 | #define outer_flush_range(a, b) | 37 | #define outer_flush_range(a, b) |