diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/mm_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 68 |
1 files changed, 59 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 2c211a57..a16f4adf 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -27,7 +27,8 @@ static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | |||
27 | 27 | ||
28 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | 28 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, |
29 | enum gmmu_pgsz_gk20a pgsz_idx, | 29 | enum gmmu_pgsz_gk20a pgsz_idx, |
30 | u64 first_vaddr, u64 last_vaddr) | 30 | u64 first_vaddr, u64 last_vaddr, |
31 | bool clear, bool refplus) | ||
31 | { | 32 | { |
32 | int err; | 33 | int err; |
33 | u32 pte_lo, pte_hi; | 34 | u32 pte_lo, pte_hi; |
@@ -50,6 +51,8 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
50 | BUG_ON(pde_lo != pde_hi); | 51 | BUG_ON(pde_lo != pde_hi); |
51 | 52 | ||
52 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; | 53 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; |
54 | if (refplus) | ||
55 | pte->ref_cnt++; | ||
53 | 56 | ||
54 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); | 57 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); |
55 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); | 58 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); |
@@ -62,7 +65,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
62 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | 65 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); |
63 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | 66 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { |
64 | pte_w[0] = gmmu_pte_valid_false_f(); | 67 | pte_w[0] = gmmu_pte_valid_false_f(); |
65 | pte_w[1] = gmmu_pte_vol_true_f(); | 68 | pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); |
66 | 69 | ||
67 | gk20a_dbg(gpu_dbg_pte, | 70 | gk20a_dbg(gpu_dbg_pte, |
68 | "pte_cur=%d addr=%llx refs=%d" | 71 | "pte_cur=%d addr=%llx refs=%d" |
@@ -147,7 +150,7 @@ static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | |||
147 | } | 150 | } |
148 | 151 | ||
149 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | 152 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, |
150 | u32 num_pages, u32 pgsz_idx) | 153 | u32 num_pages, u32 pgsz_idx, bool refplus) |
151 | { | 154 | { |
152 | struct mm_gk20a *mm = vm->mm; | 155 | struct mm_gk20a *mm = vm->mm; |
153 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | 156 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; |
@@ -168,8 +171,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
168 | 171 | ||
169 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | 172 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " |
170 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | 173 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", |
171 | vaddr, vaddr_hi, pde_lo, pde_hi, | 174 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, |
172 | vm->mm->pde_stride_shift, pgsz); | 175 | vm->mm->pde_stride_shift); |
173 | 176 | ||
174 | for (i = pde_lo; i <= pde_hi; i++) { | 177 | for (i = pde_lo; i <= pde_hi; i++) { |
175 | /* Mark all ptes as sparse. */ | 178 | /* Mark all ptes as sparse. */ |
@@ -188,20 +191,22 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
188 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | 191 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, |
189 | vaddr_pde_start, | 192 | vaddr_pde_start, |
190 | PDE_ADDR_END(vaddr_pde_start, | 193 | PDE_ADDR_END(vaddr_pde_start, |
191 | pde_shift)); | 194 | pde_shift), false, refplus); |
192 | } else { | 195 | } else { |
193 | /* Check leading and trailing spaces which doesn't fit | 196 | /* Check leading and trailing spaces which doesn't fit |
194 | * into entire pde. */ | 197 | * into entire pde. */ |
195 | if (pde_lo == pde_hi) | 198 | if (pde_lo == pde_hi) |
196 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | 199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, |
197 | vaddr_hi); | 200 | vaddr_hi, false, refplus); |
198 | else if (i == pde_lo) | 201 | else if (i == pde_lo) |
199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | 202 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, |
200 | PDE_ADDR_END(vaddr, pde_shift)); | 203 | PDE_ADDR_END(vaddr, pde_shift), false, |
204 | refplus); | ||
201 | else | 205 | else |
202 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | 206 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, |
203 | PDE_ADDR_START(vaddr_hi, pde_shift), | 207 | PDE_ADDR_START(vaddr_hi, pde_shift), |
204 | vaddr_hi); | 208 | vaddr_hi, false, |
209 | refplus); | ||
205 | } | 210 | } |
206 | } | 211 | } |
207 | 212 | ||
@@ -265,7 +270,52 @@ fail: | |||
265 | return ret; | 270 | return ret; |
266 | } | 271 | } |
267 | 272 | ||
273 | void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
274 | u64 size, u32 pgsz) { | ||
275 | int pgsz_idx; | ||
276 | u64 vaddr_hi; | ||
277 | u32 pde_lo, pde_hi, pde_i; | ||
278 | |||
279 | gk20a_dbg_fn(""); | ||
280 | /* determine pagesz idx */ | ||
281 | for (pgsz_idx = gmmu_page_size_small; | ||
282 | pgsz_idx < gmmu_nr_page_sizes; | ||
283 | pgsz_idx++) { | ||
284 | if (gmmu_page_sizes[pgsz_idx] == pgsz) | ||
285 | break; | ||
286 | } | ||
287 | vaddr_hi = vaddr + size - 1; | ||
288 | pde_range_from_vaddr_range(vm, | ||
289 | vaddr, | ||
290 | vaddr_hi, | ||
291 | &pde_lo, &pde_hi); | ||
292 | |||
293 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | ||
294 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | ||
295 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, | ||
296 | vm->mm->pde_stride_shift); | ||
297 | |||
298 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
299 | u32 pte_lo, pte_hi; | ||
300 | u32 pte_cur; | ||
301 | void *pte_kv_cur; | ||
302 | |||
303 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; | ||
304 | pte->ref_cnt--; | ||
305 | |||
306 | if (pte->ref_cnt == 0) { | ||
307 | free_gmmu_pages(vm, pte->ref, pte->sgt, | ||
308 | vm->mm->page_table_sizing[pgsz_idx].order, | ||
309 | pte->size); | ||
310 | update_gmmu_pde_locked(vm, pde_i); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | return; | ||
315 | } | ||
316 | |||
268 | void gm20b_init_mm(struct gpu_ops *gops) | 317 | void gm20b_init_mm(struct gpu_ops *gops) |
269 | { | 318 | { |
270 | gops->mm.set_sparse = gm20b_vm_put_sparse; | 319 | gops->mm.set_sparse = gm20b_vm_put_sparse; |
320 | gops->mm.clear_sparse = gm20b_vm_clear_sparse; | ||
271 | } | 321 | } |