summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorKevin Huang <kevinh@nvidia.com>2014-07-29 18:56:17 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:52 -0400
commit62e80a189cfa0b6dbb9e27712a1c782e953c32f4 (patch)
tree3c8f95d9af8ab26d792e765b64547a7df4273e60 /drivers/gpu/nvgpu
parent4439a8e311e09b1f3af1a70d4111c75e002a607d (diff)
gpu: nvgpu: clear sparse in space free
Gk20a unmaps the addresses binding to dummy page to clear sparse. On Gm20b, we need to free the allocated page table entry for sparse memory. Bug 1538384 Change-Id: Ie2409ab016c29f42c5f7d97dd7287b093b47f9df Signed-off-by: Kevin Huang <kevinh@nvidia.com> Reviewed-on: http://git-master/r/448645 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c52
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c68
4 files changed, 106 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index edf1d548..b5ef3f0d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -234,7 +234,11 @@ struct gpu_ops {
234 } gr_ctx; 234 } gr_ctx;
235 struct { 235 struct {
236 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, 236 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
237 u32 num_pages, u32 pgsz_idx, bool refplus);
238 int (*put_empty)(struct vm_gk20a *vm, u64 vaddr,
237 u32 num_pages, u32 pgsz_idx); 239 u32 num_pages, u32 pgsz_idx);
240 void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
241 u64 size, u32 pgsz_idx);
238 } mm; 242 } mm;
239 struct { 243 struct {
240 int (*prepare_ucode)(struct gk20a *g); 244 int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 57c61d51..93a29b13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
100 u64 first_vaddr, u64 last_vaddr, 100 u64 first_vaddr, u64 last_vaddr,
101 u8 kind_v, u32 ctag_offset, bool cacheable, 101 u8 kind_v, u32 ctag_offset, bool cacheable,
102 int rw_flag); 102 int rw_flag);
103static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
104static void gk20a_vm_remove_support(struct vm_gk20a *vm); 103static void gk20a_vm_remove_support(struct vm_gk20a *vm);
105static int gk20a_init_system_vm(struct mm_gk20a *mm); 104static int gk20a_init_system_vm(struct mm_gk20a *mm);
106static int gk20a_init_bar1_vm(struct mm_gk20a *mm); 105static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
@@ -444,7 +443,7 @@ err_out:
444 return -ENOMEM; 443 return -ENOMEM;
445} 444}
446 445
447static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, 446void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
448 struct sg_table *sgt, u32 order, 447 struct sg_table *sgt, u32 order,
449 size_t size) 448 size_t size)
450{ 449{
@@ -534,7 +533,7 @@ err_out:
534 return -ENOMEM; 533 return -ENOMEM;
535} 534}
536 535
537static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, 536void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
538 struct sg_table *sgt, u32 order, 537 struct sg_table *sgt, u32 order,
539 size_t size) 538 size_t size)
540{ 539{
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
1865 made. So, superfluous updates will cause unnecessary 1864 made. So, superfluous updates will cause unnecessary
1866 pde invalidations. 1865 pde invalidations.
1867*/ 1866*/
1868static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) 1867void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1869{ 1868{
1870 bool small_valid, big_valid; 1869 bool small_valid, big_valid;
1871 u64 pte_addr[2] = {0, 0}; 1870 u64 pte_addr[2] = {0, 0};
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1882 if (small_valid) 1881 if (small_valid)
1883 pte_addr[gmmu_page_size_small] = 1882 pte_addr[gmmu_page_size_small] =
1884 gk20a_mm_iova_addr(small_pte->sgt->sgl); 1883 gk20a_mm_iova_addr(small_pte->sgt->sgl);
1884
1885 if (big_valid) 1885 if (big_valid)
1886 pte_addr[gmmu_page_size_big] = 1886 pte_addr[gmmu_page_size_big] =
1887 gk20a_mm_iova_addr(big_pte->sgt->sgl); 1887 gk20a_mm_iova_addr(big_pte->sgt->sgl);
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1920 vm->tlb_dirty = true; 1920 vm->tlb_dirty = true;
1921} 1921}
1922 1922
1923
1924static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, 1923static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1925 u32 num_pages, u32 pgsz_idx) 1924 u32 num_pages, u32 pgsz_idx)
1926{ 1925{
@@ -1986,6 +1985,18 @@ err_unmap:
1986 return -EINVAL; 1985 return -EINVAL;
1987} 1986}
1988 1987
1988static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
1989 u32 num_pages, u32 pgsz_idx, bool refplus)
1990{
1991 return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1992}
1993
1994void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
1995 u64 size, u32 pgsz_idx) {
1996 __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
1997 false, gk20a_mem_flag_none);
1998}
1999
1989/* NOTE! mapped_buffers lock must be held */ 2000/* NOTE! mapped_buffers lock must be held */
1990static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) 2001static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1991{ 2002{
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2000 gmmu_page_shifts[pgsz_idx]; 2011 gmmu_page_shifts[pgsz_idx];
2001 2012
2002 /* there is little we can do if this fails... */ 2013 /* there is little we can do if this fails... */
2003 g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx); 2014 if (g->ops.mm.put_empty) {
2004 2015 g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
2016 } else {
2017 __locked_gmmu_unmap(vm,
2018 mapped_buffer->addr,
2019 mapped_buffer->size,
2020 mapped_buffer->pgsz_idx,
2021 mapped_buffer->va_allocated,
2022 gk20a_mem_flag_none);
2023 g->ops.mm.set_sparse(vm, vaddr,
2024 num_pages, pgsz_idx, false);
2025 }
2005 } else 2026 } else
2006 __locked_gmmu_unmap(vm, 2027 __locked_gmmu_unmap(vm,
2007 mapped_buffer->addr, 2028 mapped_buffer->addr,
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2328 /* mark that we need to use sparse mappings here */ 2349 /* mark that we need to use sparse mappings here */
2329 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { 2350 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2330 err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, 2351 err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
2331 pgsz_idx); 2352 pgsz_idx, true);
2332 if (err) { 2353 if (err) {
2333 mutex_unlock(&vm->update_gmmu_lock); 2354 mutex_unlock(&vm->update_gmmu_lock);
2334 vma->free(vma, start_page_nr, args->pages); 2355 vma->free(vma, start_page_nr, args->pages);
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2357 struct gk20a_allocator *vma; 2378 struct gk20a_allocator *vma;
2358 struct vm_gk20a *vm = as_share->vm; 2379 struct vm_gk20a *vm = as_share->vm;
2359 struct vm_reserved_va_node *va_node; 2380 struct vm_reserved_va_node *va_node;
2381 struct gk20a *g = gk20a_from_vm(vm);
2360 2382
2361 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, 2383 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2362 args->pages, args->offset); 2384 args->pages, args->offset);
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2400 2422
2401 /* if this was a sparse mapping, free the va */ 2423 /* if this was a sparse mapping, free the va */
2402 if (va_node->sparse) 2424 if (va_node->sparse)
2403 __locked_gmmu_unmap(vm, 2425 g->ops.mm.clear_sparse(vm,
2404 va_node->vaddr_start, 2426 va_node->vaddr_start,
2405 va_node->size, 2427 va_node->size,
2406 va_node->pgsz_idx, 2428 va_node->pgsz_idx);
2407 false,
2408 gk20a_mem_flag_none);
2409 kfree(va_node); 2429 kfree(va_node);
2410 } 2430 }
2411 mutex_unlock(&vm->update_gmmu_lock); 2431 mutex_unlock(&vm->update_gmmu_lock);
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3088 3108
3089void gk20a_init_mm(struct gpu_ops *gops) 3109void gk20a_init_mm(struct gpu_ops *gops)
3090{ 3110{
3091 gops->mm.set_sparse = gk20a_vm_put_empty; 3111 gops->mm.set_sparse = gk20a_vm_put_sparse;
3112 gops->mm.put_empty = gk20a_vm_put_empty;
3113 gops->mm.clear_sparse = gk20a_vm_clear_sparse;
3092} 3114}
3093 3115
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index da19f83e..b8726c62 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,6 +524,12 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
524int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, 524int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
525 enum gmmu_pgsz_gk20a gmmu_pgsz_idx, 525 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
526 struct page_table_gk20a *pte); 526 struct page_table_gk20a *pte);
527
528void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
529 struct sg_table *sgt, u32 order,
530 size_t size);
531void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
532
527struct gpu_ops; 533struct gpu_ops;
528void gk20a_init_mm(struct gpu_ops *gops); 534void gk20a_init_mm(struct gpu_ops *gops);
529#endif /*_MM_GK20A_H_ */ 535#endif /*_MM_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 2c211a57..a16f4adf 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -27,7 +27,8 @@ static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
27 27
28static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, 28static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
29 enum gmmu_pgsz_gk20a pgsz_idx, 29 enum gmmu_pgsz_gk20a pgsz_idx,
30 u64 first_vaddr, u64 last_vaddr) 30 u64 first_vaddr, u64 last_vaddr,
31 bool clear, bool refplus)
31{ 32{
32 int err; 33 int err;
33 u32 pte_lo, pte_hi; 34 u32 pte_lo, pte_hi;
@@ -50,6 +51,8 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
50 BUG_ON(pde_lo != pde_hi); 51 BUG_ON(pde_lo != pde_hi);
51 52
52 pte = vm->pdes.ptes[pgsz_idx] + pde_lo; 53 pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
54 if (refplus)
55 pte->ref_cnt++;
53 56
54 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); 57 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
55 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); 58 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
@@ -62,7 +65,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
62 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); 65 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
63 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { 66 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
64 pte_w[0] = gmmu_pte_valid_false_f(); 67 pte_w[0] = gmmu_pte_valid_false_f();
65 pte_w[1] = gmmu_pte_vol_true_f(); 68 pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f();
66 69
67 gk20a_dbg(gpu_dbg_pte, 70 gk20a_dbg(gpu_dbg_pte,
68 "pte_cur=%d addr=%llx refs=%d" 71 "pte_cur=%d addr=%llx refs=%d"
@@ -147,7 +150,7 @@ static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo,
147} 150}
148 151
149static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, 152static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
150 u32 num_pages, u32 pgsz_idx) 153 u32 num_pages, u32 pgsz_idx, bool refplus)
151{ 154{
152 struct mm_gk20a *mm = vm->mm; 155 struct mm_gk20a *mm = vm->mm;
153 u32 pgsz = gmmu_page_sizes[pgsz_idx]; 156 u32 pgsz = gmmu_page_sizes[pgsz_idx];
@@ -168,8 +171,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
168 171
169 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " 172 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
170 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", 173 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d",
171 vaddr, vaddr_hi, pde_lo, pde_hi, 174 vaddr, vaddr_hi, pde_lo, pde_hi, pgsz,
172 vm->mm->pde_stride_shift, pgsz); 175 vm->mm->pde_stride_shift);
173 176
174 for (i = pde_lo; i <= pde_hi; i++) { 177 for (i = pde_lo; i <= pde_hi; i++) {
175 /* Mark all ptes as sparse. */ 178 /* Mark all ptes as sparse. */
@@ -188,20 +191,22 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
188 allocate_gmmu_ptes_sparse(vm, pgsz_idx, 191 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
189 vaddr_pde_start, 192 vaddr_pde_start,
190 PDE_ADDR_END(vaddr_pde_start, 193 PDE_ADDR_END(vaddr_pde_start,
191 pde_shift)); 194 pde_shift), false, refplus);
192 } else { 195 } else {
193 /* Check leading and trailing spaces which doesn't fit 196 /* Check leading and trailing spaces which doesn't fit
194 * into entire pde. */ 197 * into entire pde. */
195 if (pde_lo == pde_hi) 198 if (pde_lo == pde_hi)
196 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, 199 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
197 vaddr_hi); 200 vaddr_hi, false, refplus);
198 else if (i == pde_lo) 201 else if (i == pde_lo)
199 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, 202 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
200 PDE_ADDR_END(vaddr, pde_shift)); 203 PDE_ADDR_END(vaddr, pde_shift), false,
204 refplus);
201 else 205 else
202 allocate_gmmu_ptes_sparse(vm, pgsz_idx, 206 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
203 PDE_ADDR_START(vaddr_hi, pde_shift), 207 PDE_ADDR_START(vaddr_hi, pde_shift),
204 vaddr_hi); 208 vaddr_hi, false,
209 refplus);
205 } 210 }
206 } 211 }
207 212
@@ -265,7 +270,52 @@ fail:
265 return ret; 270 return ret;
266} 271}
267 272
273void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
274 u64 size, u32 pgsz) {
275 int pgsz_idx;
276 u64 vaddr_hi;
277 u32 pde_lo, pde_hi, pde_i;
278
279 gk20a_dbg_fn("");
280 /* determine pagesz idx */
281 for (pgsz_idx = gmmu_page_size_small;
282 pgsz_idx < gmmu_nr_page_sizes;
283 pgsz_idx++) {
284 if (gmmu_page_sizes[pgsz_idx] == pgsz)
285 break;
286 }
287 vaddr_hi = vaddr + size - 1;
288 pde_range_from_vaddr_range(vm,
289 vaddr,
290 vaddr_hi,
291 &pde_lo, &pde_hi);
292
293 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
294 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d",
295 vaddr, vaddr_hi, pde_lo, pde_hi, pgsz,
296 vm->mm->pde_stride_shift);
297
298 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
299 u32 pte_lo, pte_hi;
300 u32 pte_cur;
301 void *pte_kv_cur;
302
303 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
304 pte->ref_cnt--;
305
306 if (pte->ref_cnt == 0) {
307 free_gmmu_pages(vm, pte->ref, pte->sgt,
308 vm->mm->page_table_sizing[pgsz_idx].order,
309 pte->size);
310 update_gmmu_pde_locked(vm, pde_i);
311 }
312 }
313
314 return;
315}
316
268void gm20b_init_mm(struct gpu_ops *gops) 317void gm20b_init_mm(struct gpu_ops *gops)
269{ 318{
270 gops->mm.set_sparse = gm20b_vm_put_sparse; 319 gops->mm.set_sparse = gm20b_vm_put_sparse;
320 gops->mm.clear_sparse = gm20b_vm_clear_sparse;
271} 321}