diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-06-15 21:09:35 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-10-30 19:36:06 -0400 |
commit | 4b5c08f4c0cf12076a208c640a46447a536308e8 (patch) | |
tree | 333a7896521911282f370b7d9d9c618fc3f2d678 /drivers/gpu/nvgpu/gk20a | |
parent | 004a1880ed80f3b384cf3d0d37e0a58eff29fcaf (diff) |
gpu: nvgpu: Implement sparse PDEs
Change-Id: Idfeb3bf95751902d52a895d77045a529f69abc0b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/758651
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 92 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 |
2 files changed, 62 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 859e46fc..8481044e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -2164,8 +2164,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2164 | 2164 | ||
2165 | gk20a_dbg_fn(""); | 2165 | gk20a_dbg_fn(""); |
2166 | 2166 | ||
2167 | small_valid = entry->size && entry->pgsz == gmmu_page_size_small; | 2167 | small_valid = !sparse && entry->size |
2168 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; | 2168 | && entry->pgsz == gmmu_page_size_small; |
2169 | big_valid = !sparse && entry->size | ||
2170 | && entry->pgsz == gmmu_page_size_big; | ||
2169 | 2171 | ||
2170 | if (small_valid) | 2172 | if (small_valid) |
2171 | pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0); | 2173 | pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0); |
@@ -2185,6 +2187,9 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2185 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | 2187 | (big_valid ? (gmmu_pde_vol_big_true_f()) : |
2186 | gmmu_pde_vol_big_false_f()); | 2188 | gmmu_pde_vol_big_false_f()); |
2187 | 2189 | ||
2190 | if (sparse) | ||
2191 | pde_v[1] |= gmmu_pde_vol_big_true_f(); | ||
2192 | |||
2188 | pde = pde_from_index(vm, i); | 2193 | pde = pde_from_index(vm, i); |
2189 | 2194 | ||
2190 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 2195 | gk20a_mem_wr32(pde, 0, pde_v[0]); |
@@ -2259,6 +2264,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2259 | } else if (sparse) { | 2264 | } else if (sparse) { |
2260 | pte_w[0] = gmmu_pte_valid_false_f(); | 2265 | pte_w[0] = gmmu_pte_valid_false_f(); |
2261 | pte_w[1] |= gmmu_pte_vol_true_f(); | 2266 | pte_w[1] |= gmmu_pte_vol_true_f(); |
2267 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x%08x,0x%08x]", | ||
2268 | i, pte_w[1], pte_w[0]); | ||
2262 | } else { | 2269 | } else { |
2263 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 2270 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
2264 | } | 2271 | } |
@@ -2317,41 +2324,39 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2317 | 2324 | ||
2318 | while (gpu_va < gpu_end) { | 2325 | while (gpu_va < gpu_end) { |
2319 | struct gk20a_mm_entry *next_pte = NULL; | 2326 | struct gk20a_mm_entry *next_pte = NULL; |
2320 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | 2327 | u64 next = (gpu_va + pde_size) & ~(pde_size-1); |
2328 | u64 curr = gpu_va & ~(pde_size-1); | ||
2329 | bool sparse_entry = sparse && | ||
2330 | ((gpu_va == curr && gpu_end >= next) || | ||
2331 | !next_l->update_entry); | ||
2332 | |||
2333 | gk20a_dbg(gpu_dbg_pte, "pde_i %d [%llx-%llx] gpu_va %llx sparse %d (%d)\n", | ||
2334 | pde_i, curr, next, gpu_va, sparse_entry, pte->sparse); | ||
2321 | 2335 | ||
2322 | /* Allocate next level */ | 2336 | /* Allocate next level */ |
2323 | if (next_l->update_entry) { | 2337 | if (!pte->entries) { |
2324 | if (!pte->entries) { | 2338 | int num_entries = |
2325 | int num_entries = | 2339 | 1 << |
2326 | 1 << | 2340 | (l->hi_bit[pgsz_idx] |
2327 | (l->hi_bit[pgsz_idx] | 2341 | - l->lo_bit[pgsz_idx] + 1); |
2328 | - l->lo_bit[pgsz_idx] + 1); | 2342 | pte->entries = |
2329 | pte->entries = | 2343 | vzalloc(sizeof(struct gk20a_mm_entry) * |
2330 | vzalloc(sizeof(struct gk20a_mm_entry) * | 2344 | num_entries); |
2331 | num_entries); | 2345 | if (!pte->entries) |
2332 | if (!pte->entries) | 2346 | return -ENOMEM; |
2333 | return -ENOMEM; | 2347 | pte->pgsz = pgsz_idx; |
2334 | pte->pgsz = pgsz_idx; | 2348 | pte->num_entries = num_entries; |
2335 | pte->num_entries = num_entries; | 2349 | } |
2336 | } | 2350 | next_pte = pte->entries + pde_i; |
2337 | next_pte = pte->entries + pde_i; | ||
2338 | 2351 | ||
2352 | if (next_l->update_entry && !sparse_entry) { | ||
2339 | if (!next_pte->size) { | 2353 | if (!next_pte->size) { |
2340 | err = gk20a_zalloc_gmmu_page_table(vm, | 2354 | err = gk20a_zalloc_gmmu_page_table(vm, |
2341 | pgsz_idx, next_l, next_pte); | 2355 | pgsz_idx, next_l, next_pte); |
2342 | if (err) | 2356 | if (err) |
2343 | return err; | 2357 | return err; |
2344 | } | 2358 | } |
2345 | } | ||
2346 | |||
2347 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
2348 | sgl, offset, iova, | ||
2349 | kind_v, ctag, cacheable, unmapped_pte, | ||
2350 | rw_flag, sparse, priv); | ||
2351 | if (err) | ||
2352 | return err; | ||
2353 | 2359 | ||
2354 | if (next_l->update_entry) { | ||
2355 | /* get cpu access to the ptes */ | 2360 | /* get cpu access to the ptes */ |
2356 | err = map_gmmu_pages(next_pte); | 2361 | err = map_gmmu_pages(next_pte); |
2357 | if (err) { | 2362 | if (err) { |
@@ -2360,13 +2365,29 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2360 | vm_aspace_id(vm)); | 2365 | vm_aspace_id(vm)); |
2361 | return err; | 2366 | return err; |
2362 | } | 2367 | } |
2368 | if (next_pte->sparse) { | ||
2369 | u64 null = 0; | ||
2370 | |||
2371 | gk20a_dbg(gpu_dbg_pte, "convert sparse PDE to sparse PTE array [%llx,%llx]", | ||
2372 | curr, next); | ||
2373 | err = update_gmmu_level_locked(vm, next_pte, | ||
2374 | pgsz_idx, | ||
2375 | sgl, | ||
2376 | offset, | ||
2377 | &null, | ||
2378 | curr, | ||
2379 | next, | ||
2380 | kind_v, NULL, cacheable, unmapped_pte, | ||
2381 | rw_flag, true, lvl+1, priv); | ||
2382 | next_pte->sparse = false; | ||
2383 | } | ||
2363 | err = update_gmmu_level_locked(vm, next_pte, | 2384 | err = update_gmmu_level_locked(vm, next_pte, |
2364 | pgsz_idx, | 2385 | pgsz_idx, |
2365 | sgl, | 2386 | sgl, |
2366 | offset, | 2387 | offset, |
2367 | iova, | 2388 | iova, |
2368 | gpu_va, | 2389 | gpu_va, |
2369 | next, | 2390 | min(next, gpu_end), |
2370 | kind_v, ctag, cacheable, unmapped_pte, | 2391 | kind_v, ctag, cacheable, unmapped_pte, |
2371 | rw_flag, sparse, lvl+1, priv); | 2392 | rw_flag, sparse, lvl+1, priv); |
2372 | unmap_gmmu_pages(next_pte); | 2393 | unmap_gmmu_pages(next_pte); |
@@ -2375,6 +2396,15 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2375 | return err; | 2396 | return err; |
2376 | } | 2397 | } |
2377 | 2398 | ||
2399 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
2400 | sgl, offset, iova, | ||
2401 | kind_v, ctag, cacheable, unmapped_pte, | ||
2402 | rw_flag, sparse_entry, priv); | ||
2403 | if (err) | ||
2404 | return err; | ||
2405 | |||
2406 | next_pte->sparse = sparse_entry; | ||
2407 | |||
2378 | pde_i++; | 2408 | pde_i++; |
2379 | gpu_va = next; | 2409 | gpu_va = next; |
2380 | } | 2410 | } |
@@ -2441,8 +2471,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2441 | } | 2471 | } |
2442 | } | 2472 | } |
2443 | 2473 | ||
2444 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", | 2474 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx, sparse=%d", |
2445 | pgsz_idx, gpu_va, gpu_end-1, iova); | 2475 | pgsz_idx, gpu_va, gpu_end-1, iova, sparse); |
2446 | err = map_gmmu_pages(&vm->pdb); | 2476 | err = map_gmmu_pages(&vm->pdb); |
2447 | if (err) { | 2477 | if (err) { |
2448 | gk20a_err(dev_from_vm(vm), | 2478 | gk20a_err(dev_from_vm(vm), |
@@ -2996,7 +3026,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2996 | va_node->pgsz_idx, | 3026 | va_node->pgsz_idx, |
2997 | true, | 3027 | true, |
2998 | gk20a_mem_flag_none, | 3028 | gk20a_mem_flag_none, |
2999 | true, | 3029 | false, |
3000 | NULL); | 3030 | NULL); |
3001 | kfree(va_node); | 3031 | kfree(va_node); |
3002 | } | 3032 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e44ee631..c13ae2a2 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -153,6 +153,7 @@ struct gk20a_mm_entry { | |||
153 | int pgsz; | 153 | int pgsz; |
154 | struct gk20a_mm_entry *entries; | 154 | struct gk20a_mm_entry *entries; |
155 | int num_entries; | 155 | int num_entries; |
156 | bool sparse; | ||
156 | }; | 157 | }; |
157 | 158 | ||
158 | struct priv_cmd_queue { | 159 | struct priv_cmd_queue { |