summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-11-09 12:04:59 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-11-09 13:11:23 -0500
commitcccd038f8d753c045d3592fc2730f750766df78b (patch)
tree4fa027aa9c74d7040bbdba2fb1ba8aa36fb835f8 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent19b3bd28b3e277d8892f663e7c61a813dbc54feb (diff)
Revert "gpu: nvgpu: Implement sparse PDEs"
This reverts commit c44947b1314bb2afa1f116b4928f4e8a4c34d7b1. It introduces a regression in T124. Bug 1702063 Change-Id: I64e333f66d98bd4dbcfe40a60f1aa825d90376a5 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/830786 GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c92
1 files changed, 31 insertions, 61 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 8481044e..859e46fc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2164,10 +2164,8 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2164 2164
2165 gk20a_dbg_fn(""); 2165 gk20a_dbg_fn("");
2166 2166
2167 small_valid = !sparse && entry->size 2167 small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
2168 && entry->pgsz == gmmu_page_size_small; 2168 big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
2169 big_valid = !sparse && entry->size
2170 && entry->pgsz == gmmu_page_size_big;
2171 2169
2172 if (small_valid) 2170 if (small_valid)
2173 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0); 2171 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
@@ -2187,9 +2185,6 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2187 (big_valid ? (gmmu_pde_vol_big_true_f()) : 2185 (big_valid ? (gmmu_pde_vol_big_true_f()) :
2188 gmmu_pde_vol_big_false_f()); 2186 gmmu_pde_vol_big_false_f());
2189 2187
2190 if (sparse)
2191 pde_v[1] |= gmmu_pde_vol_big_true_f();
2192
2193 pde = pde_from_index(vm, i); 2188 pde = pde_from_index(vm, i);
2194 2189
2195 gk20a_mem_wr32(pde, 0, pde_v[0]); 2190 gk20a_mem_wr32(pde, 0, pde_v[0]);
@@ -2264,8 +2259,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2264 } else if (sparse) { 2259 } else if (sparse) {
2265 pte_w[0] = gmmu_pte_valid_false_f(); 2260 pte_w[0] = gmmu_pte_valid_false_f();
2266 pte_w[1] |= gmmu_pte_vol_true_f(); 2261 pte_w[1] |= gmmu_pte_vol_true_f();
2267 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x%08x,0x%08x]",
2268 i, pte_w[1], pte_w[0]);
2269 } else { 2262 } else {
2270 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 2263 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
2271 } 2264 }
@@ -2324,39 +2317,41 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2324 2317
2325 while (gpu_va < gpu_end) { 2318 while (gpu_va < gpu_end) {
2326 struct gk20a_mm_entry *next_pte = NULL; 2319 struct gk20a_mm_entry *next_pte = NULL;
2327 u64 next = (gpu_va + pde_size) & ~(pde_size-1); 2320 u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
2328 u64 curr = gpu_va & ~(pde_size-1);
2329 bool sparse_entry = sparse &&
2330 ((gpu_va == curr && gpu_end >= next) ||
2331 !next_l->update_entry);
2332
2333 gk20a_dbg(gpu_dbg_pte, "pde_i %d [%llx-%llx] gpu_va %llx sparse %d (%d)\n",
2334 pde_i, curr, next, gpu_va, sparse_entry, pte->sparse);
2335 2321
2336 /* Allocate next level */ 2322 /* Allocate next level */
2337 if (!pte->entries) { 2323 if (next_l->update_entry) {
2338 int num_entries = 2324 if (!pte->entries) {
2339 1 << 2325 int num_entries =
2340 (l->hi_bit[pgsz_idx] 2326 1 <<
2341 - l->lo_bit[pgsz_idx] + 1); 2327 (l->hi_bit[pgsz_idx]
2342 pte->entries = 2328 - l->lo_bit[pgsz_idx] + 1);
2343 vzalloc(sizeof(struct gk20a_mm_entry) * 2329 pte->entries =
2344 num_entries); 2330 vzalloc(sizeof(struct gk20a_mm_entry) *
2345 if (!pte->entries) 2331 num_entries);
2346 return -ENOMEM; 2332 if (!pte->entries)
2347 pte->pgsz = pgsz_idx; 2333 return -ENOMEM;
2348 pte->num_entries = num_entries; 2334 pte->pgsz = pgsz_idx;
2349 } 2335 pte->num_entries = num_entries;
2350 next_pte = pte->entries + pde_i; 2336 }
2337 next_pte = pte->entries + pde_i;
2351 2338
2352 if (next_l->update_entry && !sparse_entry) {
2353 if (!next_pte->size) { 2339 if (!next_pte->size) {
2354 err = gk20a_zalloc_gmmu_page_table(vm, 2340 err = gk20a_zalloc_gmmu_page_table(vm,
2355 pgsz_idx, next_l, next_pte); 2341 pgsz_idx, next_l, next_pte);
2356 if (err) 2342 if (err)
2357 return err; 2343 return err;
2358 } 2344 }
2345 }
2346
2347 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2348 sgl, offset, iova,
2349 kind_v, ctag, cacheable, unmapped_pte,
2350 rw_flag, sparse, priv);
2351 if (err)
2352 return err;
2359 2353
2354 if (next_l->update_entry) {
2360 /* get cpu access to the ptes */ 2355 /* get cpu access to the ptes */
2361 err = map_gmmu_pages(next_pte); 2356 err = map_gmmu_pages(next_pte);
2362 if (err) { 2357 if (err) {
@@ -2365,29 +2360,13 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2365 vm_aspace_id(vm)); 2360 vm_aspace_id(vm));
2366 return err; 2361 return err;
2367 } 2362 }
2368 if (next_pte->sparse) {
2369 u64 null = 0;
2370
2371 gk20a_dbg(gpu_dbg_pte, "convert sparse PDE to sparse PTE array [%llx,%llx]",
2372 curr, next);
2373 err = update_gmmu_level_locked(vm, next_pte,
2374 pgsz_idx,
2375 sgl,
2376 offset,
2377 &null,
2378 curr,
2379 next,
2380 kind_v, NULL, cacheable, unmapped_pte,
2381 rw_flag, true, lvl+1, priv);
2382 next_pte->sparse = false;
2383 }
2384 err = update_gmmu_level_locked(vm, next_pte, 2363 err = update_gmmu_level_locked(vm, next_pte,
2385 pgsz_idx, 2364 pgsz_idx,
2386 sgl, 2365 sgl,
2387 offset, 2366 offset,
2388 iova, 2367 iova,
2389 gpu_va, 2368 gpu_va,
2390 min(next, gpu_end), 2369 next,
2391 kind_v, ctag, cacheable, unmapped_pte, 2370 kind_v, ctag, cacheable, unmapped_pte,
2392 rw_flag, sparse, lvl+1, priv); 2371 rw_flag, sparse, lvl+1, priv);
2393 unmap_gmmu_pages(next_pte); 2372 unmap_gmmu_pages(next_pte);
@@ -2396,15 +2375,6 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2396 return err; 2375 return err;
2397 } 2376 }
2398 2377
2399 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2400 sgl, offset, iova,
2401 kind_v, ctag, cacheable, unmapped_pte,
2402 rw_flag, sparse_entry, priv);
2403 if (err)
2404 return err;
2405
2406 next_pte->sparse = sparse_entry;
2407
2408 pde_i++; 2378 pde_i++;
2409 gpu_va = next; 2379 gpu_va = next;
2410 } 2380 }
@@ -2471,8 +2441,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2471 } 2441 }
2472 } 2442 }
2473 2443
2474 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx, sparse=%d", 2444 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
2475 pgsz_idx, gpu_va, gpu_end-1, iova, sparse); 2445 pgsz_idx, gpu_va, gpu_end-1, iova);
2476 err = map_gmmu_pages(&vm->pdb); 2446 err = map_gmmu_pages(&vm->pdb);
2477 if (err) { 2447 if (err) {
2478 gk20a_err(dev_from_vm(vm), 2448 gk20a_err(dev_from_vm(vm),
@@ -3026,7 +2996,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3026 va_node->pgsz_idx, 2996 va_node->pgsz_idx,
3027 true, 2997 true,
3028 gk20a_mem_flag_none, 2998 gk20a_mem_flag_none,
3029 false, 2999 true,
3030 NULL); 3000 NULL);
3031 kfree(va_node); 3001 kfree(va_node);
3032 } 3002 }