diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 45 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 11 |
2 files changed, 37 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 949237b1..09948a25 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1811,6 +1811,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1811 | goto clean_up; | 1811 | goto clean_up; |
1812 | } | 1812 | } |
1813 | 1813 | ||
1814 | BUG_ON(!pte_kv_cur); | ||
1815 | |||
1814 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | 1816 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); |
1815 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | 1817 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { |
1816 | if (likely(sgt)) { | 1818 | if (likely(sgt)) { |
@@ -2128,9 +2130,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2128 | char *name) | 2130 | char *name) |
2129 | { | 2131 | { |
2130 | int err, i; | 2132 | int err, i; |
2131 | u32 num_pages, low_hole_pages; | 2133 | u32 num_small_pages, num_large_pages, low_hole_pages; |
2132 | char alloc_name[32]; | 2134 | char alloc_name[32]; |
2133 | u64 vma_size; | 2135 | u64 small_vma_size, large_vma_size; |
2136 | u32 pde_pages; | ||
2134 | 2137 | ||
2135 | /* note: keep the page sizes sorted lowest to highest here */ | 2138 | /* note: keep the page sizes sorted lowest to highest here */ |
2136 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; | 2139 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; |
@@ -2206,7 +2209,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2206 | name, vm->va_limit, vm->pdes.num_pdes); | 2209 | name, vm->va_limit, vm->pdes.num_pdes); |
2207 | 2210 | ||
2208 | /* allocate the page table directory */ | 2211 | /* allocate the page table directory */ |
2209 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | 2212 | pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512); |
2213 | |||
2214 | gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages); | ||
2215 | err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref, | ||
2210 | &vm->pdes.sgt, &vm->pdes.size); | 2216 | &vm->pdes.sgt, &vm->pdes.size); |
2211 | if (err) | 2217 | if (err) |
2212 | goto clean_up_pdes; | 2218 | goto clean_up_pdes; |
@@ -2220,13 +2226,15 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2220 | vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); | 2226 | vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl)); |
2221 | /* we could release vm->pdes.kv but it's only one page... */ | 2227 | /* we could release vm->pdes.kv but it's only one page... */ |
2222 | 2228 | ||
2223 | /* low-half: alloc small pages */ | 2229 | /* First 16GB of the address space goes towards small pages. What ever |
2224 | /* high-half: alloc big pages */ | 2230 | * remains is allocated to large pages. */ |
2225 | vma_size = vm->va_limit; | 2231 | small_vma_size = vm->va_limit; |
2226 | if (big_pages) | 2232 | if (big_pages) { |
2227 | vma_size /= 2; | 2233 | small_vma_size = (u64)16 << 30; |
2234 | large_vma_size = vm->va_limit - small_vma_size; | ||
2235 | } | ||
2228 | 2236 | ||
2229 | num_pages = (u32)(vma_size >> | 2237 | num_small_pages = (u32)(small_vma_size >> |
2230 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | 2238 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); |
2231 | 2239 | ||
2232 | /* num_pages above is without regard to the low-side hole. */ | 2240 | /* num_pages above is without regard to the low-side hole. */ |
@@ -2238,20 +2246,22 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2238 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 2246 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
2239 | alloc_name, | 2247 | alloc_name, |
2240 | low_hole_pages, /*start*/ | 2248 | low_hole_pages, /*start*/ |
2241 | num_pages - low_hole_pages);/* length*/ | 2249 | num_small_pages - low_hole_pages);/* length*/ |
2242 | if (err) | 2250 | if (err) |
2243 | goto clean_up_map_pde; | 2251 | goto clean_up_map_pde; |
2244 | 2252 | ||
2245 | if (big_pages) { | 2253 | if (big_pages) { |
2246 | num_pages = (u32)((vm->va_limit / 2) >> | 2254 | u32 start = (u32)(small_vma_size >> |
2255 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2256 | num_large_pages = (u32)(large_vma_size >> | ||
2247 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | 2257 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); |
2248 | 2258 | ||
2249 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2259 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2250 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2260 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); |
2251 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2261 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2252 | alloc_name, | 2262 | alloc_name, |
2253 | num_pages, /* start */ | 2263 | start, /* start */ |
2254 | num_pages); /* length */ | 2264 | num_large_pages); /* length */ |
2255 | if (err) | 2265 | if (err) |
2256 | goto clean_up_small_allocator; | 2266 | goto clean_up_small_allocator; |
2257 | } | 2267 | } |
@@ -2269,7 +2279,7 @@ clean_up_small_allocator: | |||
2269 | clean_up_map_pde: | 2279 | clean_up_map_pde: |
2270 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | 2280 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); |
2271 | clean_up_ptes: | 2281 | clean_up_ptes: |
2272 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | 2282 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages, |
2273 | vm->pdes.size); | 2283 | vm->pdes.size); |
2274 | clean_up_pdes: | 2284 | clean_up_pdes: |
2275 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | 2285 | kfree(vm->pdes.ptes[gmmu_page_size_small]); |
@@ -2647,10 +2657,15 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset) | |||
2647 | 2657 | ||
2648 | static void gk20a_deinit_vm(struct vm_gk20a *vm) | 2658 | static void gk20a_deinit_vm(struct vm_gk20a *vm) |
2649 | { | 2659 | { |
2660 | u32 pde_pages; | ||
2661 | |||
2650 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 2662 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); |
2651 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 2663 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); |
2664 | |||
2652 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | 2665 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); |
2653 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | 2666 | |
2667 | pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512); | ||
2668 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages, | ||
2654 | vm->pdes.size); | 2669 | vm->pdes.size); |
2655 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | 2670 | kfree(vm->pdes.ptes[gmmu_page_size_small]); |
2656 | kfree(vm->pdes.ptes[gmmu_page_size_big]); | 2671 | kfree(vm->pdes.ptes[gmmu_page_size_big]); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 04f9446b..d39dcff0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -25,10 +25,13 @@ | |||
25 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
26 | #include "gk20a_allocator.h" | 26 | #include "gk20a_allocator.h" |
27 | 27 | ||
28 | /* For now keep the size relatively small-ish compared to the full | 28 | /* |
29 | * 40b va. 32GB for now. It consists of two 16GB spaces. */ | 29 | * Amount of the GVA space we actually use is smaller than the available space. |
30 | #define NV_GMMU_VA_RANGE 35ULL | 30 | * The bottom 16GB of the space are used for small pages, the remaining high |
31 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) | 31 | * memory is for large pages. |
32 | */ | ||
33 | #define NV_GMMU_VA_RANGE 37ULL | ||
34 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16)) | ||
32 | 35 | ||
33 | #ifdef CONFIG_ARM64 | 36 | #ifdef CONFIG_ARM64 |
34 | #define outer_flush_range(a, b) | 37 | #define outer_flush_range(a, b) |