diff options
author | Kevin Huang <kevinh@nvidia.com> | 2014-07-29 18:56:17 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:10:52 -0400 |
commit | 62e80a189cfa0b6dbb9e27712a1c782e953c32f4 (patch) | |
tree | 3c8f95d9af8ab26d792e765b64547a7df4273e60 /drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |
parent | 4439a8e311e09b1f3af1a70d4111c75e002a607d (diff) |
gpu: nvgpu: clear sparse in space free
Gk20a unmaps the addresses binding to dummy page to clear sparse.
On Gm20b, we need to free the allocated page table entry for sparse
memory.
Bug 1538384
Change-Id: Ie2409ab016c29f42c5f7d97dd7287b093b47f9df
Signed-off-by: Kevin Huang <kevinh@nvidia.com>
Reviewed-on: http://git-master/r/448645
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/mm_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 68 |
1 files changed, 59 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 2c211a57..a16f4adf 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -27,7 +27,8 @@ static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | |||
27 | 27 | ||
28 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | 28 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, |
29 | enum gmmu_pgsz_gk20a pgsz_idx, | 29 | enum gmmu_pgsz_gk20a pgsz_idx, |
30 | u64 first_vaddr, u64 last_vaddr) | 30 | u64 first_vaddr, u64 last_vaddr, |
31 | bool clear, bool refplus) | ||
31 | { | 32 | { |
32 | int err; | 33 | int err; |
33 | u32 pte_lo, pte_hi; | 34 | u32 pte_lo, pte_hi; |
@@ -50,6 +51,8 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
50 | BUG_ON(pde_lo != pde_hi); | 51 | BUG_ON(pde_lo != pde_hi); |
51 | 52 | ||
52 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; | 53 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; |
54 | if (refplus) | ||
55 | pte->ref_cnt++; | ||
53 | 56 | ||
54 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); | 57 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); |
55 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); | 58 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); |
@@ -62,7 +65,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
62 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | 65 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); |
63 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | 66 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { |
64 | pte_w[0] = gmmu_pte_valid_false_f(); | 67 | pte_w[0] = gmmu_pte_valid_false_f(); |
65 | pte_w[1] = gmmu_pte_vol_true_f(); | 68 | pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); |
66 | 69 | ||
67 | gk20a_dbg(gpu_dbg_pte, | 70 | gk20a_dbg(gpu_dbg_pte, |
68 | "pte_cur=%d addr=%llx refs=%d" | 71 | "pte_cur=%d addr=%llx refs=%d" |
@@ -147,7 +150,7 @@ static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | |||
147 | } | 150 | } |
148 | 151 | ||
149 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | 152 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, |
150 | u32 num_pages, u32 pgsz_idx) | 153 | u32 num_pages, u32 pgsz_idx, bool refplus) |
151 | { | 154 | { |
152 | struct mm_gk20a *mm = vm->mm; | 155 | struct mm_gk20a *mm = vm->mm; |
153 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | 156 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; |
@@ -168,8 +171,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
168 | 171 | ||
169 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | 172 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " |
170 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | 173 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", |
171 | vaddr, vaddr_hi, pde_lo, pde_hi, | 174 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, |
172 | vm->mm->pde_stride_shift, pgsz); | 175 | vm->mm->pde_stride_shift); |
173 | 176 | ||
174 | for (i = pde_lo; i <= pde_hi; i++) { | 177 | for (i = pde_lo; i <= pde_hi; i++) { |
175 | /* Mark all ptes as sparse. */ | 178 | /* Mark all ptes as sparse. */ |
@@ -188,20 +191,22 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
188 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | 191 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, |
189 | vaddr_pde_start, | 192 | vaddr_pde_start, |
190 | PDE_ADDR_END(vaddr_pde_start, | 193 | PDE_ADDR_END(vaddr_pde_start, |
191 | pde_shift)); | 194 | pde_shift), false, refplus); |
192 | } else { | 195 | } else { |
193 | /* Check leading and trailing spaces which doesn't fit | 196 | /* Check leading and trailing spaces which doesn't fit |
194 | * into entire pde. */ | 197 | * into entire pde. */ |
195 | if (pde_lo == pde_hi) | 198 | if (pde_lo == pde_hi) |
196 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | 199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, |
197 | vaddr_hi); | 200 | vaddr_hi, false, refplus); |
198 | else if (i == pde_lo) | 201 | else if (i == pde_lo) |
199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | 202 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, |
200 | PDE_ADDR_END(vaddr, pde_shift)); | 203 | PDE_ADDR_END(vaddr, pde_shift), false, |
204 | refplus); | ||
201 | else | 205 | else |
202 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | 206 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, |
203 | PDE_ADDR_START(vaddr_hi, pde_shift), | 207 | PDE_ADDR_START(vaddr_hi, pde_shift), |
204 | vaddr_hi); | 208 | vaddr_hi, false, |
209 | refplus); | ||
205 | } | 210 | } |
206 | } | 211 | } |
207 | 212 | ||
@@ -265,7 +270,52 @@ fail: | |||
265 | return ret; | 270 | return ret; |
266 | } | 271 | } |
267 | 272 | ||
273 | void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
274 | u64 size, u32 pgsz) { | ||
275 | int pgsz_idx; | ||
276 | u64 vaddr_hi; | ||
277 | u32 pde_lo, pde_hi, pde_i; | ||
278 | |||
279 | gk20a_dbg_fn(""); | ||
280 | /* determine pagesz idx */ | ||
281 | for (pgsz_idx = gmmu_page_size_small; | ||
282 | pgsz_idx < gmmu_nr_page_sizes; | ||
283 | pgsz_idx++) { | ||
284 | if (gmmu_page_sizes[pgsz_idx] == pgsz) | ||
285 | break; | ||
286 | } | ||
287 | vaddr_hi = vaddr + size - 1; | ||
288 | pde_range_from_vaddr_range(vm, | ||
289 | vaddr, | ||
290 | vaddr_hi, | ||
291 | &pde_lo, &pde_hi); | ||
292 | |||
293 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | ||
294 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | ||
295 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, | ||
296 | vm->mm->pde_stride_shift); | ||
297 | |||
298 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
299 | u32 pte_lo, pte_hi; | ||
300 | u32 pte_cur; | ||
301 | void *pte_kv_cur; | ||
302 | |||
303 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; | ||
304 | pte->ref_cnt--; | ||
305 | |||
306 | if (pte->ref_cnt == 0) { | ||
307 | free_gmmu_pages(vm, pte->ref, pte->sgt, | ||
308 | vm->mm->page_table_sizing[pgsz_idx].order, | ||
309 | pte->size); | ||
310 | update_gmmu_pde_locked(vm, pde_i); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | return; | ||
315 | } | ||
316 | |||
268 | void gm20b_init_mm(struct gpu_ops *gops) | 317 | void gm20b_init_mm(struct gpu_ops *gops) |
269 | { | 318 | { |
270 | gops->mm.set_sparse = gm20b_vm_put_sparse; | 319 | gops->mm.set_sparse = gm20b_vm_put_sparse; |
320 | gops->mm.clear_sparse = gm20b_vm_clear_sparse; | ||
271 | } | 321 | } |