diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 52 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 68 |
4 files changed, 106 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index edf1d548..b5ef3f0d 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -234,7 +234,11 @@ struct gpu_ops { | |||
234 | } gr_ctx; | 234 | } gr_ctx; |
235 | struct { | 235 | struct { |
236 | int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, | 236 | int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, |
237 | u32 num_pages, u32 pgsz_idx, bool refplus); | ||
238 | int (*put_empty)(struct vm_gk20a *vm, u64 vaddr, | ||
237 | u32 num_pages, u32 pgsz_idx); | 239 | u32 num_pages, u32 pgsz_idx); |
240 | void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr, | ||
241 | u64 size, u32 pgsz_idx); | ||
238 | } mm; | 242 | } mm; |
239 | struct { | 243 | struct { |
240 | int (*prepare_ucode)(struct gk20a *g); | 244 | int (*prepare_ucode)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 57c61d51..93a29b13 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
100 | u64 first_vaddr, u64 last_vaddr, | 100 | u64 first_vaddr, u64 last_vaddr, |
101 | u8 kind_v, u32 ctag_offset, bool cacheable, | 101 | u8 kind_v, u32 ctag_offset, bool cacheable, |
102 | int rw_flag); | 102 | int rw_flag); |
103 | static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); | ||
104 | static void gk20a_vm_remove_support(struct vm_gk20a *vm); | 103 | static void gk20a_vm_remove_support(struct vm_gk20a *vm); |
105 | static int gk20a_init_system_vm(struct mm_gk20a *mm); | 104 | static int gk20a_init_system_vm(struct mm_gk20a *mm); |
106 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm); | 105 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm); |
@@ -444,7 +443,7 @@ err_out: | |||
444 | return -ENOMEM; | 443 | return -ENOMEM; |
445 | } | 444 | } |
446 | 445 | ||
447 | static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | 446 | void free_gmmu_pages(struct vm_gk20a *vm, void *handle, |
448 | struct sg_table *sgt, u32 order, | 447 | struct sg_table *sgt, u32 order, |
449 | size_t size) | 448 | size_t size) |
450 | { | 449 | { |
@@ -534,7 +533,7 @@ err_out: | |||
534 | return -ENOMEM; | 533 | return -ENOMEM; |
535 | } | 534 | } |
536 | 535 | ||
537 | static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | 536 | void free_gmmu_pages(struct vm_gk20a *vm, void *handle, |
538 | struct sg_table *sgt, u32 order, | 537 | struct sg_table *sgt, u32 order, |
539 | size_t size) | 538 | size_t size) |
540 | { | 539 | { |
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr) | |||
1865 | made. So, superfluous updates will cause unnecessary | 1864 | made. So, superfluous updates will cause unnecessary |
1866 | pde invalidations. | 1865 | pde invalidations. |
1867 | */ | 1866 | */ |
1868 | static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | 1867 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) |
1869 | { | 1868 | { |
1870 | bool small_valid, big_valid; | 1869 | bool small_valid, big_valid; |
1871 | u64 pte_addr[2] = {0, 0}; | 1870 | u64 pte_addr[2] = {0, 0}; |
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | |||
1882 | if (small_valid) | 1881 | if (small_valid) |
1883 | pte_addr[gmmu_page_size_small] = | 1882 | pte_addr[gmmu_page_size_small] = |
1884 | gk20a_mm_iova_addr(small_pte->sgt->sgl); | 1883 | gk20a_mm_iova_addr(small_pte->sgt->sgl); |
1884 | |||
1885 | if (big_valid) | 1885 | if (big_valid) |
1886 | pte_addr[gmmu_page_size_big] = | 1886 | pte_addr[gmmu_page_size_big] = |
1887 | gk20a_mm_iova_addr(big_pte->sgt->sgl); | 1887 | gk20a_mm_iova_addr(big_pte->sgt->sgl); |
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | |||
1920 | vm->tlb_dirty = true; | 1920 | vm->tlb_dirty = true; |
1921 | } | 1921 | } |
1922 | 1922 | ||
1923 | |||
1924 | static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | 1923 | static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, |
1925 | u32 num_pages, u32 pgsz_idx) | 1924 | u32 num_pages, u32 pgsz_idx) |
1926 | { | 1925 | { |
@@ -1986,6 +1985,18 @@ err_unmap: | |||
1986 | return -EINVAL; | 1985 | return -EINVAL; |
1987 | } | 1986 | } |
1988 | 1987 | ||
1988 | static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
1989 | u32 num_pages, u32 pgsz_idx, bool refplus) | ||
1990 | { | ||
1991 | return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx); | ||
1992 | } | ||
1993 | |||
1994 | void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
1995 | u64 size, u32 pgsz_idx) { | ||
1996 | __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx, | ||
1997 | false, gk20a_mem_flag_none); | ||
1998 | } | ||
1999 | |||
1989 | /* NOTE! mapped_buffers lock must be held */ | 2000 | /* NOTE! mapped_buffers lock must be held */ |
1990 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | 2001 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) |
1991 | { | 2002 | { |
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2000 | gmmu_page_shifts[pgsz_idx]; | 2011 | gmmu_page_shifts[pgsz_idx]; |
2001 | 2012 | ||
2002 | /* there is little we can do if this fails... */ | 2013 | /* there is little we can do if this fails... */ |
2003 | g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx); | 2014 | if (g->ops.mm.put_empty) { |
2004 | 2015 | g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx); | |
2016 | } else { | ||
2017 | __locked_gmmu_unmap(vm, | ||
2018 | mapped_buffer->addr, | ||
2019 | mapped_buffer->size, | ||
2020 | mapped_buffer->pgsz_idx, | ||
2021 | mapped_buffer->va_allocated, | ||
2022 | gk20a_mem_flag_none); | ||
2023 | g->ops.mm.set_sparse(vm, vaddr, | ||
2024 | num_pages, pgsz_idx, false); | ||
2025 | } | ||
2005 | } else | 2026 | } else |
2006 | __locked_gmmu_unmap(vm, | 2027 | __locked_gmmu_unmap(vm, |
2007 | mapped_buffer->addr, | 2028 | mapped_buffer->addr, |
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2328 | /* mark that we need to use sparse mappings here */ | 2349 | /* mark that we need to use sparse mappings here */ |
2329 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { | 2350 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { |
2330 | err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, | 2351 | err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, |
2331 | pgsz_idx); | 2352 | pgsz_idx, true); |
2332 | if (err) { | 2353 | if (err) { |
2333 | mutex_unlock(&vm->update_gmmu_lock); | 2354 | mutex_unlock(&vm->update_gmmu_lock); |
2334 | vma->free(vma, start_page_nr, args->pages); | 2355 | vma->free(vma, start_page_nr, args->pages); |
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2357 | struct gk20a_allocator *vma; | 2378 | struct gk20a_allocator *vma; |
2358 | struct vm_gk20a *vm = as_share->vm; | 2379 | struct vm_gk20a *vm = as_share->vm; |
2359 | struct vm_reserved_va_node *va_node; | 2380 | struct vm_reserved_va_node *va_node; |
2381 | struct gk20a *g = gk20a_from_vm(vm); | ||
2360 | 2382 | ||
2361 | gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, | 2383 | gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, |
2362 | args->pages, args->offset); | 2384 | args->pages, args->offset); |
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2400 | 2422 | ||
2401 | /* if this was a sparse mapping, free the va */ | 2423 | /* if this was a sparse mapping, free the va */ |
2402 | if (va_node->sparse) | 2424 | if (va_node->sparse) |
2403 | __locked_gmmu_unmap(vm, | 2425 | g->ops.mm.clear_sparse(vm, |
2404 | va_node->vaddr_start, | 2426 | va_node->vaddr_start, |
2405 | va_node->size, | 2427 | va_node->size, |
2406 | va_node->pgsz_idx, | 2428 | va_node->pgsz_idx); |
2407 | false, | ||
2408 | gk20a_mem_flag_none); | ||
2409 | kfree(va_node); | 2429 | kfree(va_node); |
2410 | } | 2430 | } |
2411 | mutex_unlock(&vm->update_gmmu_lock); | 2431 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g) | |||
3088 | 3108 | ||
3089 | void gk20a_init_mm(struct gpu_ops *gops) | 3109 | void gk20a_init_mm(struct gpu_ops *gops) |
3090 | { | 3110 | { |
3091 | gops->mm.set_sparse = gk20a_vm_put_empty; | 3111 | gops->mm.set_sparse = gk20a_vm_put_sparse; |
3112 | gops->mm.put_empty = gk20a_vm_put_empty; | ||
3113 | gops->mm.clear_sparse = gk20a_vm_clear_sparse; | ||
3092 | } | 3114 | } |
3093 | 3115 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index da19f83e..b8726c62 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -524,6 +524,12 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | |||
524 | int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | 524 | int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, |
525 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, | 525 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, |
526 | struct page_table_gk20a *pte); | 526 | struct page_table_gk20a *pte); |
527 | |||
528 | void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | ||
529 | struct sg_table *sgt, u32 order, | ||
530 | size_t size); | ||
531 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); | ||
532 | |||
527 | struct gpu_ops; | 533 | struct gpu_ops; |
528 | void gk20a_init_mm(struct gpu_ops *gops); | 534 | void gk20a_init_mm(struct gpu_ops *gops); |
529 | #endif /*_MM_GK20A_H_ */ | 535 | #endif /*_MM_GK20A_H_ */ |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 2c211a57..a16f4adf 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -27,7 +27,8 @@ static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | |||
27 | 27 | ||
28 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | 28 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, |
29 | enum gmmu_pgsz_gk20a pgsz_idx, | 29 | enum gmmu_pgsz_gk20a pgsz_idx, |
30 | u64 first_vaddr, u64 last_vaddr) | 30 | u64 first_vaddr, u64 last_vaddr, |
31 | bool clear, bool refplus) | ||
31 | { | 32 | { |
32 | int err; | 33 | int err; |
33 | u32 pte_lo, pte_hi; | 34 | u32 pte_lo, pte_hi; |
@@ -50,6 +51,8 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
50 | BUG_ON(pde_lo != pde_hi); | 51 | BUG_ON(pde_lo != pde_hi); |
51 | 52 | ||
52 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; | 53 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; |
54 | if (refplus) | ||
55 | pte->ref_cnt++; | ||
53 | 56 | ||
54 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); | 57 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); |
55 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); | 58 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); |
@@ -62,7 +65,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
62 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | 65 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); |
63 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | 66 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { |
64 | pte_w[0] = gmmu_pte_valid_false_f(); | 67 | pte_w[0] = gmmu_pte_valid_false_f(); |
65 | pte_w[1] = gmmu_pte_vol_true_f(); | 68 | pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); |
66 | 69 | ||
67 | gk20a_dbg(gpu_dbg_pte, | 70 | gk20a_dbg(gpu_dbg_pte, |
68 | "pte_cur=%d addr=%llx refs=%d" | 71 | "pte_cur=%d addr=%llx refs=%d" |
@@ -147,7 +150,7 @@ static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | |||
147 | } | 150 | } |
148 | 151 | ||
149 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | 152 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, |
150 | u32 num_pages, u32 pgsz_idx) | 153 | u32 num_pages, u32 pgsz_idx, bool refplus) |
151 | { | 154 | { |
152 | struct mm_gk20a *mm = vm->mm; | 155 | struct mm_gk20a *mm = vm->mm; |
153 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | 156 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; |
@@ -168,8 +171,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
168 | 171 | ||
169 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | 172 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " |
170 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | 173 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", |
171 | vaddr, vaddr_hi, pde_lo, pde_hi, | 174 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, |
172 | vm->mm->pde_stride_shift, pgsz); | 175 | vm->mm->pde_stride_shift); |
173 | 176 | ||
174 | for (i = pde_lo; i <= pde_hi; i++) { | 177 | for (i = pde_lo; i <= pde_hi; i++) { |
175 | /* Mark all ptes as sparse. */ | 178 | /* Mark all ptes as sparse. */ |
@@ -188,20 +191,22 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
188 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | 191 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, |
189 | vaddr_pde_start, | 192 | vaddr_pde_start, |
190 | PDE_ADDR_END(vaddr_pde_start, | 193 | PDE_ADDR_END(vaddr_pde_start, |
191 | pde_shift)); | 194 | pde_shift), false, refplus); |
192 | } else { | 195 | } else { |
193 | /* Check leading and trailing spaces which doesn't fit | 196 | /* Check leading and trailing spaces which doesn't fit |
194 | * into entire pde. */ | 197 | * into entire pde. */ |
195 | if (pde_lo == pde_hi) | 198 | if (pde_lo == pde_hi) |
196 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | 199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, |
197 | vaddr_hi); | 200 | vaddr_hi, false, refplus); |
198 | else if (i == pde_lo) | 201 | else if (i == pde_lo) |
199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | 202 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, |
200 | PDE_ADDR_END(vaddr, pde_shift)); | 203 | PDE_ADDR_END(vaddr, pde_shift), false, |
204 | refplus); | ||
201 | else | 205 | else |
202 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | 206 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, |
203 | PDE_ADDR_START(vaddr_hi, pde_shift), | 207 | PDE_ADDR_START(vaddr_hi, pde_shift), |
204 | vaddr_hi); | 208 | vaddr_hi, false, |
209 | refplus); | ||
205 | } | 210 | } |
206 | } | 211 | } |
207 | 212 | ||
@@ -265,7 +270,52 @@ fail: | |||
265 | return ret; | 270 | return ret; |
266 | } | 271 | } |
267 | 272 | ||
273 | void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
274 | u64 size, u32 pgsz) { | ||
275 | int pgsz_idx; | ||
276 | u64 vaddr_hi; | ||
277 | u32 pde_lo, pde_hi, pde_i; | ||
278 | |||
279 | gk20a_dbg_fn(""); | ||
280 | /* determine pagesz idx */ | ||
281 | for (pgsz_idx = gmmu_page_size_small; | ||
282 | pgsz_idx < gmmu_nr_page_sizes; | ||
283 | pgsz_idx++) { | ||
284 | if (gmmu_page_sizes[pgsz_idx] == pgsz) | ||
285 | break; | ||
286 | } | ||
287 | vaddr_hi = vaddr + size - 1; | ||
288 | pde_range_from_vaddr_range(vm, | ||
289 | vaddr, | ||
290 | vaddr_hi, | ||
291 | &pde_lo, &pde_hi); | ||
292 | |||
293 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | ||
294 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | ||
295 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, | ||
296 | vm->mm->pde_stride_shift); | ||
297 | |||
298 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
299 | u32 pte_lo, pte_hi; | ||
300 | u32 pte_cur; | ||
301 | void *pte_kv_cur; | ||
302 | |||
303 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; | ||
304 | pte->ref_cnt--; | ||
305 | |||
306 | if (pte->ref_cnt == 0) { | ||
307 | free_gmmu_pages(vm, pte->ref, pte->sgt, | ||
308 | vm->mm->page_table_sizing[pgsz_idx].order, | ||
309 | pte->size); | ||
310 | update_gmmu_pde_locked(vm, pde_i); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | return; | ||
315 | } | ||
316 | |||
268 | void gm20b_init_mm(struct gpu_ops *gops) | 317 | void gm20b_init_mm(struct gpu_ops *gops) |
269 | { | 318 | { |
270 | gops->mm.set_sparse = gm20b_vm_put_sparse; | 319 | gops->mm.set_sparse = gm20b_vm_put_sparse; |
320 | gops->mm.clear_sparse = gm20b_vm_clear_sparse; | ||
271 | } | 321 | } |