gpu: nvgpu: Implement sparse PDEs

Change-Id: Idfeb3bf95751902d52a895d77045a529f69abc0b Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/758651 GVS: Gerrit_Virtual_Submit
author: Terje Bergstrom <tbergstrom@nvidia.com> 2015-06-15 21:09:35 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-10-30 19:36:06 -0400
commit: 4b5c08f4c0cf12076a208c640a46447a536308e8 (patch)
tree: 333a7896521911282f370b7d9d9c618fc3f2d678
parent: 004a1880ed80f3b384cf3d0d37e0a58eff29fcaf (diff)
2 files changed, 62 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 859e46fc..8481044e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2164,8 +2164,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
        gk20a_dbg_fn("");
-        small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
+        small_valid = !sparse && entry->size
-        big_valid   = entry->size && entry->pgsz == gmmu_page_size_big;
+                              && entry->pgsz == gmmu_page_size_small;
+        big_valid   = !sparse && entry->size
+                              && entry->pgsz == gmmu_page_size_big;
        if (small_valid)
                pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
@@ -2185,6 +2187,9 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
                    (big_valid ? (gmmu_pde_vol_big_true_f()) :
                     gmmu_pde_vol_big_false_f());
+        if (sparse)
+                pde_v[1] |= gmmu_pde_vol_big_true_f();
        pde = pde_from_index(vm, i);
        gk20a_mem_wr32(pde, 0, pde_v[0]);
@@ -2259,6 +2264,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
        } else if (sparse) {
                pte_w[0] = gmmu_pte_valid_false_f();
                pte_w[1] |= gmmu_pte_vol_true_f();
+                gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x%08x,0x%08x]",
+                          i, pte_w[1], pte_w[0]);
        } else {
                gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
        }
@@ -2317,41 +2324,39 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
        while (gpu_va < gpu_end) {
                struct gk20a_mm_entry *next_pte = NULL;
-                u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
+                u64 next = (gpu_va + pde_size) & ~(pde_size-1);
+                u64 curr = gpu_va  & ~(pde_size-1);
+                bool sparse_entry = sparse &&
+                                    ((gpu_va == curr && gpu_end >= next) ||
+                                      !next_l->update_entry);
+                gk20a_dbg(gpu_dbg_pte, "pde_i %d [%llx-%llx] gpu_va %llx sparse %d (%d)\n",
+                                pde_i, curr, next, gpu_va, sparse_entry, pte->sparse);
                /* Allocate next level */
-                if (next_l->update_entry) {
+                if (!pte->entries) {
-                        if (!pte->entries) {
+                        int num_entries =
-                                int num_entries =
+                                1 <<
-                                        1 <<
+                                 (l->hi_bit[pgsz_idx]
-                                         (l->hi_bit[pgsz_idx]
+                                  - l->lo_bit[pgsz_idx] + 1);
-                                          - l->lo_bit[pgsz_idx] + 1);
+                        pte->entries =
-                                pte->entries =
+                                vzalloc(sizeof(struct gk20a_mm_entry) *
-                                        vzalloc(sizeof(struct gk20a_mm_entry) *
+                                        num_entries);
-                                                num_entries);
+                        if (!pte->entries)
-                                if (!pte->entries)
+                                return -ENOMEM;
-                                        return -ENOMEM;
+                        pte->pgsz = pgsz_idx;
-                                pte->pgsz = pgsz_idx;
+                        pte->num_entries = num_entries;
-                                pte->num_entries = num_entries;
+                }
-                        }
+                next_pte = pte->entries + pde_i;
-                        next_pte = pte->entries + pde_i;
+                if (next_l->update_entry && !sparse_entry) {
                        if (!next_pte->size) {
                                err = gk20a_zalloc_gmmu_page_table(vm,
                                        pgsz_idx, next_l, next_pte);
                                if (err)
                                        return err;
                        }
-                }
-                err = l->update_entry(vm, pte, pde_i, pgsz_idx,
-                                sgl, offset, iova,
-                                kind_v, ctag, cacheable, unmapped_pte,
-                                rw_flag, sparse, priv);
-                if (err)
-                        return err;
-                if (next_l->update_entry) {
                        /* get cpu access to the ptes */
                        err = map_gmmu_pages(next_pte);
                        if (err) {
@@ -2360,13 +2365,29 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
                                           vm_aspace_id(vm));
                                return err;
                        }
+                        if (next_pte->sparse) {
+                                u64 null = 0;
+                                gk20a_dbg(gpu_dbg_pte, "convert sparse PDE to sparse PTE array [%llx,%llx]",
+                                          curr, next);
+                                err = update_gmmu_level_locked(vm, next_pte,
+                                        pgsz_idx,
+                                        sgl,
+                                        offset,
+                                        &null,
+                                        curr,
+                                        next,
+                                        kind_v, NULL, cacheable, unmapped_pte,
+                                        rw_flag, true, lvl+1, priv);
+                                next_pte->sparse = false;
+                        }
                        err = update_gmmu_level_locked(vm, next_pte,
                                pgsz_idx,
                                sgl,
                                offset,
                                iova,
                                gpu_va,
-                                next,
+                                min(next, gpu_end),
                                kind_v, ctag, cacheable, unmapped_pte,
                                rw_flag, sparse, lvl+1, priv);
                        unmap_gmmu_pages(next_pte);
@@ -2375,6 +2396,15 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
                                return err;
                }
+                err = l->update_entry(vm, pte, pde_i, pgsz_idx,
+                                sgl, offset, iova,
+                                kind_v, ctag, cacheable, unmapped_pte,
+                                rw_flag, sparse_entry, priv);
+                if (err)
+                        return err;
+                next_pte->sparse = sparse_entry;
                pde_i++;
                gpu_va = next;
        }
@@ -2441,8 +2471,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                }
        }
-        gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
+        gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx, sparse=%d",
-                        pgsz_idx, gpu_va, gpu_end-1, iova);
+                        pgsz_idx, gpu_va, gpu_end-1, iova, sparse);
        err = map_gmmu_pages(&vm->pdb);
        if (err) {
                gk20a_err(dev_from_vm(vm),
@@ -2996,7 +3026,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                                        va_node->pgsz_idx,
                                        true,
                                        gk20a_mem_flag_none,
-                                        true,
+                                        false,
                                        NULL);
                kfree(va_node);
        }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index e44ee631..c13ae2a2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -153,6 +153,7 @@ struct gk20a_mm_entry {
        int pgsz;
        struct gk20a_mm_entry *entries;
        int num_entries;
+        bool sparse;
 };
 struct priv_cmd_queue {
author	Terje Bergstrom <tbergstrom@nvidia.com>	2015-06-15 21:09:35 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-10-30 19:36:06 -0400
commit	4b5c08f4c0cf12076a208c640a46447a536308e8 (patch)
tree	333a7896521911282f370b7d9d9c618fc3f2d678
parent	004a1880ed80f3b384cf3d0d37e0a58eff29fcaf (diff)