diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 92 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 |
2 files changed, 31 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 8481044e..859e46fc 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -2164,10 +2164,8 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2164 | 2164 | ||
2165 | gk20a_dbg_fn(""); | 2165 | gk20a_dbg_fn(""); |
2166 | 2166 | ||
2167 | small_valid = !sparse && entry->size | 2167 | small_valid = entry->size && entry->pgsz == gmmu_page_size_small; |
2168 | && entry->pgsz == gmmu_page_size_small; | 2168 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; |
2169 | big_valid = !sparse && entry->size | ||
2170 | && entry->pgsz == gmmu_page_size_big; | ||
2171 | 2169 | ||
2172 | if (small_valid) | 2170 | if (small_valid) |
2173 | pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0); | 2171 | pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0); |
@@ -2187,9 +2185,6 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2187 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | 2185 | (big_valid ? (gmmu_pde_vol_big_true_f()) : |
2188 | gmmu_pde_vol_big_false_f()); | 2186 | gmmu_pde_vol_big_false_f()); |
2189 | 2187 | ||
2190 | if (sparse) | ||
2191 | pde_v[1] |= gmmu_pde_vol_big_true_f(); | ||
2192 | |||
2193 | pde = pde_from_index(vm, i); | 2188 | pde = pde_from_index(vm, i); |
2194 | 2189 | ||
2195 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 2190 | gk20a_mem_wr32(pde, 0, pde_v[0]); |
@@ -2264,8 +2259,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2264 | } else if (sparse) { | 2259 | } else if (sparse) { |
2265 | pte_w[0] = gmmu_pte_valid_false_f(); | 2260 | pte_w[0] = gmmu_pte_valid_false_f(); |
2266 | pte_w[1] |= gmmu_pte_vol_true_f(); | 2261 | pte_w[1] |= gmmu_pte_vol_true_f(); |
2267 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x%08x,0x%08x]", | ||
2268 | i, pte_w[1], pte_w[0]); | ||
2269 | } else { | 2262 | } else { |
2270 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 2263 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
2271 | } | 2264 | } |
@@ -2324,39 +2317,41 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2324 | 2317 | ||
2325 | while (gpu_va < gpu_end) { | 2318 | while (gpu_va < gpu_end) { |
2326 | struct gk20a_mm_entry *next_pte = NULL; | 2319 | struct gk20a_mm_entry *next_pte = NULL; |
2327 | u64 next = (gpu_va + pde_size) & ~(pde_size-1); | 2320 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); |
2328 | u64 curr = gpu_va & ~(pde_size-1); | ||
2329 | bool sparse_entry = sparse && | ||
2330 | ((gpu_va == curr && gpu_end >= next) || | ||
2331 | !next_l->update_entry); | ||
2332 | |||
2333 | gk20a_dbg(gpu_dbg_pte, "pde_i %d [%llx-%llx] gpu_va %llx sparse %d (%d)\n", | ||
2334 | pde_i, curr, next, gpu_va, sparse_entry, pte->sparse); | ||
2335 | 2321 | ||
2336 | /* Allocate next level */ | 2322 | /* Allocate next level */ |
2337 | if (!pte->entries) { | 2323 | if (next_l->update_entry) { |
2338 | int num_entries = | 2324 | if (!pte->entries) { |
2339 | 1 << | 2325 | int num_entries = |
2340 | (l->hi_bit[pgsz_idx] | 2326 | 1 << |
2341 | - l->lo_bit[pgsz_idx] + 1); | 2327 | (l->hi_bit[pgsz_idx] |
2342 | pte->entries = | 2328 | - l->lo_bit[pgsz_idx] + 1); |
2343 | vzalloc(sizeof(struct gk20a_mm_entry) * | 2329 | pte->entries = |
2344 | num_entries); | 2330 | vzalloc(sizeof(struct gk20a_mm_entry) * |
2345 | if (!pte->entries) | 2331 | num_entries); |
2346 | return -ENOMEM; | 2332 | if (!pte->entries) |
2347 | pte->pgsz = pgsz_idx; | 2333 | return -ENOMEM; |
2348 | pte->num_entries = num_entries; | 2334 | pte->pgsz = pgsz_idx; |
2349 | } | 2335 | pte->num_entries = num_entries; |
2350 | next_pte = pte->entries + pde_i; | 2336 | } |
2337 | next_pte = pte->entries + pde_i; | ||
2351 | 2338 | ||
2352 | if (next_l->update_entry && !sparse_entry) { | ||
2353 | if (!next_pte->size) { | 2339 | if (!next_pte->size) { |
2354 | err = gk20a_zalloc_gmmu_page_table(vm, | 2340 | err = gk20a_zalloc_gmmu_page_table(vm, |
2355 | pgsz_idx, next_l, next_pte); | 2341 | pgsz_idx, next_l, next_pte); |
2356 | if (err) | 2342 | if (err) |
2357 | return err; | 2343 | return err; |
2358 | } | 2344 | } |
2345 | } | ||
2346 | |||
2347 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
2348 | sgl, offset, iova, | ||
2349 | kind_v, ctag, cacheable, unmapped_pte, | ||
2350 | rw_flag, sparse, priv); | ||
2351 | if (err) | ||
2352 | return err; | ||
2359 | 2353 | ||
2354 | if (next_l->update_entry) { | ||
2360 | /* get cpu access to the ptes */ | 2355 | /* get cpu access to the ptes */ |
2361 | err = map_gmmu_pages(next_pte); | 2356 | err = map_gmmu_pages(next_pte); |
2362 | if (err) { | 2357 | if (err) { |
@@ -2365,29 +2360,13 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2365 | vm_aspace_id(vm)); | 2360 | vm_aspace_id(vm)); |
2366 | return err; | 2361 | return err; |
2367 | } | 2362 | } |
2368 | if (next_pte->sparse) { | ||
2369 | u64 null = 0; | ||
2370 | |||
2371 | gk20a_dbg(gpu_dbg_pte, "convert sparse PDE to sparse PTE array [%llx,%llx]", | ||
2372 | curr, next); | ||
2373 | err = update_gmmu_level_locked(vm, next_pte, | ||
2374 | pgsz_idx, | ||
2375 | sgl, | ||
2376 | offset, | ||
2377 | &null, | ||
2378 | curr, | ||
2379 | next, | ||
2380 | kind_v, NULL, cacheable, unmapped_pte, | ||
2381 | rw_flag, true, lvl+1, priv); | ||
2382 | next_pte->sparse = false; | ||
2383 | } | ||
2384 | err = update_gmmu_level_locked(vm, next_pte, | 2363 | err = update_gmmu_level_locked(vm, next_pte, |
2385 | pgsz_idx, | 2364 | pgsz_idx, |
2386 | sgl, | 2365 | sgl, |
2387 | offset, | 2366 | offset, |
2388 | iova, | 2367 | iova, |
2389 | gpu_va, | 2368 | gpu_va, |
2390 | min(next, gpu_end), | 2369 | next, |
2391 | kind_v, ctag, cacheable, unmapped_pte, | 2370 | kind_v, ctag, cacheable, unmapped_pte, |
2392 | rw_flag, sparse, lvl+1, priv); | 2371 | rw_flag, sparse, lvl+1, priv); |
2393 | unmap_gmmu_pages(next_pte); | 2372 | unmap_gmmu_pages(next_pte); |
@@ -2396,15 +2375,6 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2396 | return err; | 2375 | return err; |
2397 | } | 2376 | } |
2398 | 2377 | ||
2399 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
2400 | sgl, offset, iova, | ||
2401 | kind_v, ctag, cacheable, unmapped_pte, | ||
2402 | rw_flag, sparse_entry, priv); | ||
2403 | if (err) | ||
2404 | return err; | ||
2405 | |||
2406 | next_pte->sparse = sparse_entry; | ||
2407 | |||
2408 | pde_i++; | 2378 | pde_i++; |
2409 | gpu_va = next; | 2379 | gpu_va = next; |
2410 | } | 2380 | } |
@@ -2471,8 +2441,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2471 | } | 2441 | } |
2472 | } | 2442 | } |
2473 | 2443 | ||
2474 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx, sparse=%d", | 2444 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", |
2475 | pgsz_idx, gpu_va, gpu_end-1, iova, sparse); | 2445 | pgsz_idx, gpu_va, gpu_end-1, iova); |
2476 | err = map_gmmu_pages(&vm->pdb); | 2446 | err = map_gmmu_pages(&vm->pdb); |
2477 | if (err) { | 2447 | if (err) { |
2478 | gk20a_err(dev_from_vm(vm), | 2448 | gk20a_err(dev_from_vm(vm), |
@@ -3026,7 +2996,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
3026 | va_node->pgsz_idx, | 2996 | va_node->pgsz_idx, |
3027 | true, | 2997 | true, |
3028 | gk20a_mem_flag_none, | 2998 | gk20a_mem_flag_none, |
3029 | false, | 2999 | true, |
3030 | NULL); | 3000 | NULL); |
3031 | kfree(va_node); | 3001 | kfree(va_node); |
3032 | } | 3002 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c13ae2a2..e44ee631 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -153,7 +153,6 @@ struct gk20a_mm_entry { | |||
153 | int pgsz; | 153 | int pgsz; |
154 | struct gk20a_mm_entry *entries; | 154 | struct gk20a_mm_entry *entries; |
155 | int num_entries; | 155 | int num_entries; |
156 | bool sparse; | ||
157 | }; | 156 | }; |
158 | 157 | ||
159 | struct priv_cmd_queue { | 158 | struct priv_cmd_queue { |