summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c52
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c68
4 files changed, 106 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index edf1d548..b5ef3f0d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -234,7 +234,11 @@ struct gpu_ops {
234 } gr_ctx; 234 } gr_ctx;
235 struct { 235 struct {
236 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, 236 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
237 u32 num_pages, u32 pgsz_idx, bool refplus);
238 int (*put_empty)(struct vm_gk20a *vm, u64 vaddr,
237 u32 num_pages, u32 pgsz_idx); 239 u32 num_pages, u32 pgsz_idx);
240 void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
241 u64 size, u32 pgsz_idx);
238 } mm; 242 } mm;
239 struct { 243 struct {
240 int (*prepare_ucode)(struct gk20a *g); 244 int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 57c61d51..93a29b13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
100 u64 first_vaddr, u64 last_vaddr, 100 u64 first_vaddr, u64 last_vaddr,
101 u8 kind_v, u32 ctag_offset, bool cacheable, 101 u8 kind_v, u32 ctag_offset, bool cacheable,
102 int rw_flag); 102 int rw_flag);
103static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
104static void gk20a_vm_remove_support(struct vm_gk20a *vm); 103static void gk20a_vm_remove_support(struct vm_gk20a *vm);
105static int gk20a_init_system_vm(struct mm_gk20a *mm); 104static int gk20a_init_system_vm(struct mm_gk20a *mm);
106static int gk20a_init_bar1_vm(struct mm_gk20a *mm); 105static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
@@ -444,7 +443,7 @@ err_out:
444 return -ENOMEM; 443 return -ENOMEM;
445} 444}
446 445
447static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, 446void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
448 struct sg_table *sgt, u32 order, 447 struct sg_table *sgt, u32 order,
449 size_t size) 448 size_t size)
450{ 449{
@@ -534,7 +533,7 @@ err_out:
534 return -ENOMEM; 533 return -ENOMEM;
535} 534}
536 535
537static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, 536void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
538 struct sg_table *sgt, u32 order, 537 struct sg_table *sgt, u32 order,
539 size_t size) 538 size_t size)
540{ 539{
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
1865 made. So, superfluous updates will cause unnecessary 1864 made. So, superfluous updates will cause unnecessary
1866 pde invalidations. 1865 pde invalidations.
1867*/ 1866*/
1868static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) 1867void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1869{ 1868{
1870 bool small_valid, big_valid; 1869 bool small_valid, big_valid;
1871 u64 pte_addr[2] = {0, 0}; 1870 u64 pte_addr[2] = {0, 0};
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1882 if (small_valid) 1881 if (small_valid)
1883 pte_addr[gmmu_page_size_small] = 1882 pte_addr[gmmu_page_size_small] =
1884 gk20a_mm_iova_addr(small_pte->sgt->sgl); 1883 gk20a_mm_iova_addr(small_pte->sgt->sgl);
1884
1885 if (big_valid) 1885 if (big_valid)
1886 pte_addr[gmmu_page_size_big] = 1886 pte_addr[gmmu_page_size_big] =
1887 gk20a_mm_iova_addr(big_pte->sgt->sgl); 1887 gk20a_mm_iova_addr(big_pte->sgt->sgl);
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1920 vm->tlb_dirty = true; 1920 vm->tlb_dirty = true;
1921} 1921}
1922 1922
1923
1924static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, 1923static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1925 u32 num_pages, u32 pgsz_idx) 1924 u32 num_pages, u32 pgsz_idx)
1926{ 1925{
@@ -1986,6 +1985,18 @@ err_unmap:
1986 return -EINVAL; 1985 return -EINVAL;
1987} 1986}
1988 1987
1988static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
1989 u32 num_pages, u32 pgsz_idx, bool refplus)
1990{
1991 return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1992}
1993
1994void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
1995 u64 size, u32 pgsz_idx) {
1996 __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
1997 false, gk20a_mem_flag_none);
1998}
1999
1989/* NOTE! mapped_buffers lock must be held */ 2000/* NOTE! mapped_buffers lock must be held */
1990static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) 2001static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1991{ 2002{
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2000 gmmu_page_shifts[pgsz_idx]; 2011 gmmu_page_shifts[pgsz_idx];
2001 2012
2002 /* there is little we can do if this fails... */ 2013 /* there is little we can do if this fails... */
2003 g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx); 2014 if (g->ops.mm.put_empty) {
2004 2015 g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
2016 } else {
2017 __locked_gmmu_unmap(vm,
2018 mapped_buffer->addr,
2019 mapped_buffer->size,
2020 mapped_buffer->pgsz_idx,
2021 mapped_buffer->va_allocated,
2022 gk20a_mem_flag_none);
2023 g->ops.mm.set_sparse(vm, vaddr,
2024 num_pages, pgsz_idx, false);
2025 }
2005 } else 2026 } else
2006 __locked_gmmu_unmap(vm, 2027 __locked_gmmu_unmap(vm,
2007 mapped_buffer->addr, 2028 mapped_buffer->addr,
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2328 /* mark that we need to use sparse mappings here */ 2349 /* mark that we need to use sparse mappings here */
2329 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { 2350 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2330 err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, 2351 err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
2331 pgsz_idx); 2352 pgsz_idx, true);
2332 if (err) { 2353 if (err) {
2333 mutex_unlock(&vm->update_gmmu_lock); 2354 mutex_unlock(&vm->update_gmmu_lock);
2334 vma->free(vma, start_page_nr, args->pages); 2355 vma->free(vma, start_page_nr, args->pages);
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2357 struct gk20a_allocator *vma; 2378 struct gk20a_allocator *vma;
2358 struct vm_gk20a *vm = as_share->vm; 2379 struct vm_gk20a *vm = as_share->vm;
2359 struct vm_reserved_va_node *va_node; 2380 struct vm_reserved_va_node *va_node;
2381 struct gk20a *g = gk20a_from_vm(vm);
2360 2382
2361 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, 2383 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2362 args->pages, args->offset); 2384 args->pages, args->offset);
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2400 2422
2401 /* if this was a sparse mapping, free the va */ 2423 /* if this was a sparse mapping, free the va */
2402 if (va_node->sparse) 2424 if (va_node->sparse)
2403 __locked_gmmu_unmap(vm, 2425 g->ops.mm.clear_sparse(vm,
2404 va_node->vaddr_start, 2426 va_node->vaddr_start,
2405 va_node->size, 2427 va_node->size,
2406 va_node->pgsz_idx, 2428 va_node->pgsz_idx);
2407 false,
2408 gk20a_mem_flag_none);
2409 kfree(va_node); 2429 kfree(va_node);
2410 } 2430 }
2411 mutex_unlock(&vm->update_gmmu_lock); 2431 mutex_unlock(&vm->update_gmmu_lock);
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3088 3108
3089void gk20a_init_mm(struct gpu_ops *gops) 3109void gk20a_init_mm(struct gpu_ops *gops)
3090{ 3110{
3091 gops->mm.set_sparse = gk20a_vm_put_empty; 3111 gops->mm.set_sparse = gk20a_vm_put_sparse;
3112 gops->mm.put_empty = gk20a_vm_put_empty;
3113 gops->mm.clear_sparse = gk20a_vm_clear_sparse;
3092} 3114}
3093 3115
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index da19f83e..b8726c62 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,6 +524,12 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
524int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, 524int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
525 enum gmmu_pgsz_gk20a gmmu_pgsz_idx, 525 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
526 struct page_table_gk20a *pte); 526 struct page_table_gk20a *pte);
527
528void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
529 struct sg_table *sgt, u32 order,
530 size_t size);
531void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
532
527struct gpu_ops; 533struct gpu_ops;
528void gk20a_init_mm(struct gpu_ops *gops); 534void gk20a_init_mm(struct gpu_ops *gops);
529#endif /*_MM_GK20A_H_ */ 535#endif /*_MM_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 2c211a57..a16f4adf 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -27,7 +27,8 @@ static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
27 27
28static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, 28static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
29 enum gmmu_pgsz_gk20a pgsz_idx, 29 enum gmmu_pgsz_gk20a pgsz_idx,
30 u64 first_vaddr, u64 last_vaddr) 30 u64 first_vaddr, u64 last_vaddr,
31 bool clear, bool refplus)
31{ 32{
32 int err; 33 int err;
33 u32 pte_lo, pte_hi; 34 u32 pte_lo, pte_hi;
@@ -50,6 +51,8 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
50 BUG_ON(pde_lo != pde_hi); 51 BUG_ON(pde_lo != pde_hi);
51 52
52 pte = vm->pdes.ptes[pgsz_idx] + pde_lo; 53 pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
54 if (refplus)
55 pte->ref_cnt++;
53 56
54 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); 57 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
55 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); 58 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
@@ -62,7 +65,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
62 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); 65 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
63 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { 66 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
64 pte_w[0] = gmmu_pte_valid_false_f(); 67 pte_w[0] = gmmu_pte_valid_false_f();
65 pte_w[1] = gmmu_pte_vol_true_f(); 68 pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f();
66 69
67 gk20a_dbg(gpu_dbg_pte, 70 gk20a_dbg(gpu_dbg_pte,
68 "pte_cur=%d addr=%llx refs=%d" 71 "pte_cur=%d addr=%llx refs=%d"
@@ -147,7 +150,7 @@ static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo,
147} 150}
148 151
149static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, 152static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
150 u32 num_pages, u32 pgsz_idx) 153 u32 num_pages, u32 pgsz_idx, bool refplus)
151{ 154{
152 struct mm_gk20a *mm = vm->mm; 155 struct mm_gk20a *mm = vm->mm;
153 u32 pgsz = gmmu_page_sizes[pgsz_idx]; 156 u32 pgsz = gmmu_page_sizes[pgsz_idx];
@@ -168,8 +171,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
168 171
169 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " 172 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
170 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", 173 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d",
171 vaddr, vaddr_hi, pde_lo, pde_hi, 174 vaddr, vaddr_hi, pde_lo, pde_hi, pgsz,
172 vm->mm->pde_stride_shift, pgsz); 175 vm->mm->pde_stride_shift);
173 176
174 for (i = pde_lo; i <= pde_hi; i++) { 177 for (i = pde_lo; i <= pde_hi; i++) {
175 /* Mark all ptes as sparse. */ 178 /* Mark all ptes as sparse. */
@@ -188,20 +191,22 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
188 allocate_gmmu_ptes_sparse(vm, pgsz_idx, 191 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
189 vaddr_pde_start, 192 vaddr_pde_start,
190 PDE_ADDR_END(vaddr_pde_start, 193 PDE_ADDR_END(vaddr_pde_start,
191 pde_shift)); 194 pde_shift), false, refplus);
192 } else { 195 } else {
193 /* Check leading and trailing spaces which doesn't fit 196 /* Check leading and trailing spaces which doesn't fit
194 * into entire pde. */ 197 * into entire pde. */
195 if (pde_lo == pde_hi) 198 if (pde_lo == pde_hi)
196 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, 199 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
197 vaddr_hi); 200 vaddr_hi, false, refplus);
198 else if (i == pde_lo) 201 else if (i == pde_lo)
199 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, 202 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
200 PDE_ADDR_END(vaddr, pde_shift)); 203 PDE_ADDR_END(vaddr, pde_shift), false,
204 refplus);
201 else 205 else
202 allocate_gmmu_ptes_sparse(vm, pgsz_idx, 206 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
203 PDE_ADDR_START(vaddr_hi, pde_shift), 207 PDE_ADDR_START(vaddr_hi, pde_shift),
204 vaddr_hi); 208 vaddr_hi, false,
209 refplus);
205 } 210 }
206 } 211 }
207 212
@@ -265,7 +270,52 @@ fail:
265 return ret; 270 return ret;
266} 271}
267 272
273void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
274 u64 size, u32 pgsz) {
275 int pgsz_idx;
276 u64 vaddr_hi;
277 u32 pde_lo, pde_hi, pde_i;
278
279 gk20a_dbg_fn("");
280 /* determine pagesz idx */
281 for (pgsz_idx = gmmu_page_size_small;
282 pgsz_idx < gmmu_nr_page_sizes;
283 pgsz_idx++) {
284 if (gmmu_page_sizes[pgsz_idx] == pgsz)
285 break;
286 }
287 vaddr_hi = vaddr + size - 1;
288 pde_range_from_vaddr_range(vm,
289 vaddr,
290 vaddr_hi,
291 &pde_lo, &pde_hi);
292
293 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
294 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d",
295 vaddr, vaddr_hi, pde_lo, pde_hi, pgsz,
296 vm->mm->pde_stride_shift);
297
298 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
299 u32 pte_lo, pte_hi;
300 u32 pte_cur;
301 void *pte_kv_cur;
302
303 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
304 pte->ref_cnt--;
305
306 if (pte->ref_cnt == 0) {
307 free_gmmu_pages(vm, pte->ref, pte->sgt,
308 vm->mm->page_table_sizing[pgsz_idx].order,
309 pte->size);
310 update_gmmu_pde_locked(vm, pde_i);
311 }
312 }
313
314 return;
315}
316
268void gm20b_init_mm(struct gpu_ops *gops) 317void gm20b_init_mm(struct gpu_ops *gops)
269{ 318{
270 gops->mm.set_sparse = gm20b_vm_put_sparse; 319 gops->mm.set_sparse = gm20b_vm_put_sparse;
320 gops->mm.clear_sparse = gm20b_vm_clear_sparse;
271} 321}