summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-06-15 21:09:35 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-10-30 19:36:06 -0400
commit4b5c08f4c0cf12076a208c640a46447a536308e8 (patch)
tree333a7896521911282f370b7d9d9c618fc3f2d678
parent004a1880ed80f3b384cf3d0d37e0a58eff29fcaf (diff)
gpu: nvgpu: Implement sparse PDEs
Change-Id: Idfeb3bf95751902d52a895d77045a529f69abc0b Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/758651 GVS: Gerrit_Virtual_Submit
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c92
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h1
2 files changed, 62 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 859e46fc..8481044e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2164,8 +2164,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2164 2164
2165 gk20a_dbg_fn(""); 2165 gk20a_dbg_fn("");
2166 2166
2167 small_valid = entry->size && entry->pgsz == gmmu_page_size_small; 2167 small_valid = !sparse && entry->size
2168 big_valid = entry->size && entry->pgsz == gmmu_page_size_big; 2168 && entry->pgsz == gmmu_page_size_small;
2169 big_valid = !sparse && entry->size
2170 && entry->pgsz == gmmu_page_size_big;
2169 2171
2170 if (small_valid) 2172 if (small_valid)
2171 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0); 2173 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
@@ -2185,6 +2187,9 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2185 (big_valid ? (gmmu_pde_vol_big_true_f()) : 2187 (big_valid ? (gmmu_pde_vol_big_true_f()) :
2186 gmmu_pde_vol_big_false_f()); 2188 gmmu_pde_vol_big_false_f());
2187 2189
2190 if (sparse)
2191 pde_v[1] |= gmmu_pde_vol_big_true_f();
2192
2188 pde = pde_from_index(vm, i); 2193 pde = pde_from_index(vm, i);
2189 2194
2190 gk20a_mem_wr32(pde, 0, pde_v[0]); 2195 gk20a_mem_wr32(pde, 0, pde_v[0]);
@@ -2259,6 +2264,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2259 } else if (sparse) { 2264 } else if (sparse) {
2260 pte_w[0] = gmmu_pte_valid_false_f(); 2265 pte_w[0] = gmmu_pte_valid_false_f();
2261 pte_w[1] |= gmmu_pte_vol_true_f(); 2266 pte_w[1] |= gmmu_pte_vol_true_f();
2267 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x%08x,0x%08x]",
2268 i, pte_w[1], pte_w[0]);
2262 } else { 2269 } else {
2263 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 2270 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
2264 } 2271 }
@@ -2317,41 +2324,39 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2317 2324
2318 while (gpu_va < gpu_end) { 2325 while (gpu_va < gpu_end) {
2319 struct gk20a_mm_entry *next_pte = NULL; 2326 struct gk20a_mm_entry *next_pte = NULL;
2320 u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); 2327 u64 next = (gpu_va + pde_size) & ~(pde_size-1);
2328 u64 curr = gpu_va & ~(pde_size-1);
2329 bool sparse_entry = sparse &&
2330 ((gpu_va == curr && gpu_end >= next) ||
2331 !next_l->update_entry);
2332
2333 gk20a_dbg(gpu_dbg_pte, "pde_i %d [%llx-%llx] gpu_va %llx sparse %d (%d)\n",
2334 pde_i, curr, next, gpu_va, sparse_entry, pte->sparse);
2321 2335
2322 /* Allocate next level */ 2336 /* Allocate next level */
2323 if (next_l->update_entry) { 2337 if (!pte->entries) {
2324 if (!pte->entries) { 2338 int num_entries =
2325 int num_entries = 2339 1 <<
2326 1 << 2340 (l->hi_bit[pgsz_idx]
2327 (l->hi_bit[pgsz_idx] 2341 - l->lo_bit[pgsz_idx] + 1);
2328 - l->lo_bit[pgsz_idx] + 1); 2342 pte->entries =
2329 pte->entries = 2343 vzalloc(sizeof(struct gk20a_mm_entry) *
2330 vzalloc(sizeof(struct gk20a_mm_entry) * 2344 num_entries);
2331 num_entries); 2345 if (!pte->entries)
2332 if (!pte->entries) 2346 return -ENOMEM;
2333 return -ENOMEM; 2347 pte->pgsz = pgsz_idx;
2334 pte->pgsz = pgsz_idx; 2348 pte->num_entries = num_entries;
2335 pte->num_entries = num_entries; 2349 }
2336 } 2350 next_pte = pte->entries + pde_i;
2337 next_pte = pte->entries + pde_i;
2338 2351
2352 if (next_l->update_entry && !sparse_entry) {
2339 if (!next_pte->size) { 2353 if (!next_pte->size) {
2340 err = gk20a_zalloc_gmmu_page_table(vm, 2354 err = gk20a_zalloc_gmmu_page_table(vm,
2341 pgsz_idx, next_l, next_pte); 2355 pgsz_idx, next_l, next_pte);
2342 if (err) 2356 if (err)
2343 return err; 2357 return err;
2344 } 2358 }
2345 }
2346
2347 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2348 sgl, offset, iova,
2349 kind_v, ctag, cacheable, unmapped_pte,
2350 rw_flag, sparse, priv);
2351 if (err)
2352 return err;
2353 2359
2354 if (next_l->update_entry) {
2355 /* get cpu access to the ptes */ 2360 /* get cpu access to the ptes */
2356 err = map_gmmu_pages(next_pte); 2361 err = map_gmmu_pages(next_pte);
2357 if (err) { 2362 if (err) {
@@ -2360,13 +2365,29 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2360 vm_aspace_id(vm)); 2365 vm_aspace_id(vm));
2361 return err; 2366 return err;
2362 } 2367 }
2368 if (next_pte->sparse) {
2369 u64 null = 0;
2370
2371 gk20a_dbg(gpu_dbg_pte, "convert sparse PDE to sparse PTE array [%llx,%llx]",
2372 curr, next);
2373 err = update_gmmu_level_locked(vm, next_pte,
2374 pgsz_idx,
2375 sgl,
2376 offset,
2377 &null,
2378 curr,
2379 next,
2380 kind_v, NULL, cacheable, unmapped_pte,
2381 rw_flag, true, lvl+1, priv);
2382 next_pte->sparse = false;
2383 }
2363 err = update_gmmu_level_locked(vm, next_pte, 2384 err = update_gmmu_level_locked(vm, next_pte,
2364 pgsz_idx, 2385 pgsz_idx,
2365 sgl, 2386 sgl,
2366 offset, 2387 offset,
2367 iova, 2388 iova,
2368 gpu_va, 2389 gpu_va,
2369 next, 2390 min(next, gpu_end),
2370 kind_v, ctag, cacheable, unmapped_pte, 2391 kind_v, ctag, cacheable, unmapped_pte,
2371 rw_flag, sparse, lvl+1, priv); 2392 rw_flag, sparse, lvl+1, priv);
2372 unmap_gmmu_pages(next_pte); 2393 unmap_gmmu_pages(next_pte);
@@ -2375,6 +2396,15 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2375 return err; 2396 return err;
2376 } 2397 }
2377 2398
2399 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2400 sgl, offset, iova,
2401 kind_v, ctag, cacheable, unmapped_pte,
2402 rw_flag, sparse_entry, priv);
2403 if (err)
2404 return err;
2405
2406 next_pte->sparse = sparse_entry;
2407
2378 pde_i++; 2408 pde_i++;
2379 gpu_va = next; 2409 gpu_va = next;
2380 } 2410 }
@@ -2441,8 +2471,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2441 } 2471 }
2442 } 2472 }
2443 2473
2444 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", 2474 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx, sparse=%d",
2445 pgsz_idx, gpu_va, gpu_end-1, iova); 2475 pgsz_idx, gpu_va, gpu_end-1, iova, sparse);
2446 err = map_gmmu_pages(&vm->pdb); 2476 err = map_gmmu_pages(&vm->pdb);
2447 if (err) { 2477 if (err) {
2448 gk20a_err(dev_from_vm(vm), 2478 gk20a_err(dev_from_vm(vm),
@@ -2996,7 +3026,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2996 va_node->pgsz_idx, 3026 va_node->pgsz_idx,
2997 true, 3027 true,
2998 gk20a_mem_flag_none, 3028 gk20a_mem_flag_none,
2999 true, 3029 false,
3000 NULL); 3030 NULL);
3001 kfree(va_node); 3031 kfree(va_node);
3002 } 3032 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index e44ee631..c13ae2a2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -153,6 +153,7 @@ struct gk20a_mm_entry {
153 int pgsz; 153 int pgsz;
154 struct gk20a_mm_entry *entries; 154 struct gk20a_mm_entry *entries;
155 int num_entries; 155 int num_entries;
156 bool sparse;
156}; 157};
157 158
158struct priv_cmd_queue { 159struct priv_cmd_queue {