1 files changed, 145 insertions, 164 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index d7391c6d..c3867e9d 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -14,6 +14,7 @@
 */
 #include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -149,206 +150,186 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
        return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
 }
-static u32 pde3_from_index(u32 i)
+static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
-{
+                                    const struct gk20a_mmu_level *l,
-        return i * gmmu_new_pde__size_v() / sizeof(u32);
+                                    struct nvgpu_gmmu_pd *pd,
-}
+                                    u32 pd_idx,
+                                    u64 virt_addr,
-static u32 pte3_from_index(u32 i)
+                                    u64 phys_addr,
-{
+                                    struct nvgpu_gmmu_attrs *attrs)
-        return i * gmmu_new_pte__size_v() / sizeof(u32);
-}
-static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
-                           struct gk20a_mm_entry *parent,
-                           u32 i, u32 gmmu_pgsz_idx,
-                           struct scatterlist **sgl,
-                           u64 *offset,
-                           u64 *iova,
-                           u32 kind_v, u64 *ctag,
-                           bool cacheable, bool unmapped_pte,
-                           int rw_flag, bool sparse, bool priv,
-                           enum nvgpu_aperture aperture)
 {
        struct gk20a *g = gk20a_from_vm(vm);
-        u64 pte_addr = 0;
+        u32 pd_offset = pd_offset_from_index(l, pd_idx);
-        struct gk20a_mm_entry *pte = parent->entries + i;
        u32 pde_v[2] = {0, 0};
-        u32 pde;
-        gk20a_dbg_fn("");
-        pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v();
+        phys_addr >>= gmmu_new_pde_address_shift_v();
-        pde_v[0] |= nvgpu_aperture_mask(g, &pte->mem,
+        pde_v[0] |= nvgpu_aperture_mask(g, &pd->mem,
                        gmmu_new_pde_aperture_sys_mem_ncoh_f(),
                        gmmu_new_pde_aperture_video_memory_f());
-        pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
+        pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
        pde_v[0] |= gmmu_new_pde_vol_true_f();
-        pde_v[1] |= pte_addr >> 24;
+        pde_v[1] |= phys_addr >> 24;
-        pde = pde3_from_index(i);
+        pd_write(g, pd, pd_offset + 0, pde_v[0]);
-        gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]);
+        pd_write(g, pd, pd_offset + 1, pde_v[1]);
-        gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]);
+        pte_dbg(g, attrs,
-        gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
+                "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | "
-                  i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
+                "GPU %#-12llx  phys %#-12llx "
-        gk20a_dbg_fn("done");
+                "[0x%08x, 0x%08x]",
-        return 0;
+                pd_idx, l->entry_size, pd_offset,
+                virt_addr, phys_addr,
+                pde_v[1], pde_v[0]);
 }
-static u32 pde0_from_index(u32 i)
+static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
-{
+                                    const struct gk20a_mmu_level *l,
-        return i * gmmu_new_dual_pde__size_v() / sizeof(u32);
+                                    struct nvgpu_gmmu_pd *pd,
-}
+                                    u32 pd_idx,
+                                    u64 virt_addr,
-static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
+                                    u64 phys_addr,
-                           struct gk20a_mm_entry *pte,
+                                    struct nvgpu_gmmu_attrs *attrs)
-                           u32 i, u32 gmmu_pgsz_idx,
-                           struct scatterlist **sgl,
-                           u64 *offset,
-                           u64 *iova,
-                           u32 kind_v, u64 *ctag,
-                           bool cacheable, bool unmapped_pte,
-                           int rw_flag, bool sparse, bool priv,
-                           enum nvgpu_aperture aperture)
 {
        struct gk20a *g = gk20a_from_vm(vm);
        bool small_valid, big_valid;
-        u32 pte_addr_small = 0, pte_addr_big = 0;
+        u32 small_addr = 0, big_addr = 0;
-        struct gk20a_mm_entry *entry = pte->entries + i;
+        u32 pd_offset = pd_offset_from_index(l, pd_idx);
        u32 pde_v[4] = {0, 0, 0, 0};
-        u32 pde;
-        gk20a_dbg_fn("");
-        small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small;
+        small_valid = attrs->pgsz == gmmu_page_size_small;
-        big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;
+        big_valid   = attrs->pgsz == gmmu_page_size_big;
-        if (small_valid) {
+        if (small_valid)
-                pte_addr_small = gk20a_pde_addr(g, entry)
+                small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v();
-                                 >> gmmu_new_dual_pde_address_shift_v();
-        }
        if (big_valid)
-                pte_addr_big = gk20a_pde_addr(g, entry)
+                big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v();
-                               >> gmmu_new_dual_pde_address_big_shift_v();
        if (small_valid) {
-                pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
+                pde_v[2] |=
-                pde_v[2] |= nvgpu_aperture_mask(g, &entry->mem,
+                        gmmu_new_dual_pde_address_small_sys_f(small_addr);
+                pde_v[2] |= nvgpu_aperture_mask(g, &pd->mem,
                        gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
                        gmmu_new_dual_pde_aperture_small_video_memory_f());
                pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
-                pde_v[3] |= pte_addr_small >> 24;
+                pde_v[3] |= small_addr >> 24;
        }
        if (big_valid) {
-                pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
+                pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr);
                pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
-                pde_v[0] |= nvgpu_aperture_mask(g, &entry->mem,
+                pde_v[0] |= nvgpu_aperture_mask(g, &pd->mem,
                        gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
                        gmmu_new_dual_pde_aperture_big_video_memory_f());
-                pde_v[1] |= pte_addr_big >> 28;
+                pde_v[1] |= big_addr >> 28;
        }
-        pde = pde0_from_index(i);
+        pd_write(g, pd, pd_offset + 0, pde_v[0]);
+        pd_write(g, pd, pd_offset + 1, pde_v[1]);
-        gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]);
+        pd_write(g, pd, pd_offset + 2, pde_v[2]);
-        gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]);
+        pd_write(g, pd, pd_offset + 3, pde_v[3]);
-        gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]);
-        gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]);
+        pte_dbg(g, attrs,
+                "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
-        gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
+                "GPU %#-12llx  phys %#-12llx "
-                  i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
+                "[0x%08x, 0x%08x, 0x%08x, 0x%08x]",
-        gk20a_dbg_fn("done");
+                pd_idx, l->entry_size, pd_offset,
-        return 0;
+                small_valid ? 'S' : '-',
+                big_valid ?   'B' : '-',
+                virt_addr, phys_addr,
+                pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
 }
-static int update_gmmu_pte_locked(struct vm_gk20a *vm,
+static void __update_pte(struct vm_gk20a *vm,
-                           struct gk20a_mm_entry *pte,
+                         u32 *pte_w,
-                           u32 i, u32 gmmu_pgsz_idx,
+                         u64 phys_addr,
-                           struct scatterlist **sgl,
+                         struct nvgpu_gmmu_attrs *attrs)
-                           u64 *offset,
-                           u64 *iova,
-                           u32 kind_v, u64 *ctag,
-                           bool cacheable, bool unmapped_pte,
-                           int rw_flag, bool sparse, bool priv,
-                           enum nvgpu_aperture aperture)
 {
-        struct gk20a *g = vm->mm->g;
+        struct gk20a *g = gk20a_from_vm(vm);
-        u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
        u64 ctag_granularity = g->ops.fb.compression_page_size(g);
-        u32 pte_w[2] = {0, 0}; /* invalid pte */
+        u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
-        u32 pte_i;
+        u32 pte_valid = attrs->valid ?
+                gmmu_new_pte_valid_true_f() :
-        if (*iova) {
+                gmmu_new_pte_valid_false_f();
-                u32 pte_valid = unmapped_pte ?
+        u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v();
-                        gmmu_new_pte_valid_false_f() :
+        u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
-                        gmmu_new_pte_valid_true_f();
+                gmmu_new_pte_address_sys_f(phys_shifted) :
-                u32 iova_v = *iova >> gmmu_new_pte_address_shift_v();
+                gmmu_new_pte_address_vid_f(phys_shifted);
-                u32 pte_addr = aperture == APERTURE_SYSMEM ?
+        u32 pte_tgt = __nvgpu_aperture_mask(g, attrs->aperture,
-                                gmmu_new_pte_address_sys_f(iova_v) :
+                gmmu_new_pte_aperture_sys_mem_ncoh_f(),
-                                gmmu_new_pte_address_vid_f(iova_v);
+                gmmu_new_pte_aperture_video_memory_f());
-                u32 pte_tgt = __nvgpu_aperture_mask(g, aperture,
-                                gmmu_new_pte_aperture_sys_mem_ncoh_f(),
+        pte_w[0] = pte_valid | pte_addr | pte_tgt;
-                                gmmu_new_pte_aperture_video_memory_f());
+        if (attrs->priv)
-                pte_w[0] = pte_valid | pte_addr | pte_tgt;
+                pte_w[0] |= gmmu_new_pte_privilege_true_f();
-                if (priv)
+        pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) |
-                        pte_w[0] |= gmmu_new_pte_privilege_true_f();
+                gmmu_new_pte_kind_f(attrs->kind_v) |
+                gmmu_new_pte_comptagline_f((u32)(attrs->ctag /
-                pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) |
+                                                 ctag_granularity));
-                           gmmu_new_pte_kind_f(kind_v) |
-                           gmmu_new_pte_comptagline_f((u32)(*ctag / ctag_granularity));
+        if (attrs->rw_flag == gk20a_mem_flag_read_only)
+                pte_w[0] |= gmmu_new_pte_read_only_true_f();
-                if (rw_flag == gk20a_mem_flag_read_only)
-                        pte_w[0] |= gmmu_new_pte_read_only_true_f();
+        if (!attrs->valid && !attrs->cacheable)
-                if (unmapped_pte && !cacheable)
+                pte_w[0] |= gmmu_new_pte_read_only_true_f();
-                        pte_w[0] |= gmmu_new_pte_read_only_true_f();
+        else if (!attrs->cacheable)
-                else if (!cacheable)
-                        pte_w[0] |= gmmu_new_pte_vol_true_f();
-                gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
-                           " ctag=%d vol=%d"
-                           " [0x%08x, 0x%08x]",
-                           i, *iova,
-                           kind_v, (u32)(*ctag / ctag_granularity), !cacheable,
-                           pte_w[1], pte_w[0]);
-                if (*ctag)
-                        *ctag += page_size;
-        } else if (sparse) {
-                pte_w[0] = gmmu_new_pte_valid_false_f();
                pte_w[0] |= gmmu_new_pte_vol_true_f();
-        } else {
-                gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
-        }
-        pte_i = pte3_from_index(i);
+        if (attrs->ctag)
+                attrs->ctag += page_size;
-        gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]);
-        gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]);
+}
-        if (*iova) {
+static void __update_pte_sparse(u32 *pte_w)
-                *iova += page_size;
+{
-                *offset += page_size;
+        pte_w[0] = gmmu_new_pte_valid_false_f();
-                if (*sgl && *offset + page_size > (*sgl)->length) {
+        pte_w[0] |= gmmu_new_pte_vol_true_f();
-                        u64 new_iova;
+}
-                        *sgl = sg_next(*sgl);
-                        if (*sgl) {
+static void update_gmmu_pte_locked(struct vm_gk20a *vm,
-                                new_iova = sg_phys(*sgl);
+                                   const struct gk20a_mmu_level *l,
-                                gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
+                                   struct nvgpu_gmmu_pd *pd,
-                                          new_iova, (*sgl)->length);
+                                   u32 pd_idx,
-                                if (new_iova) {
+                                   u64 virt_addr,
-                                        *offset = 0;
+                                   u64 phys_addr,
-                                        *iova = new_iova;
+                                   struct nvgpu_gmmu_attrs *attrs)
-                                }
+{
-                        }
+        struct gk20a *g = vm->mm->g;
-                }
+        u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
-        }
+        u32 pd_offset = pd_offset_from_index(l, pd_idx);
-        return 0;
+        u32 pte_w[2] = {0, 0};
+        if (phys_addr)
+                __update_pte(vm, pte_w, phys_addr, attrs);
+        else if (attrs->sparse)
+                __update_pte_sparse(pte_w);
+        pte_dbg(g, attrs,
+                "vm=%s "
+                "PTE: i=%-4u size=%-2u offs=%-4u | "
+                "GPU %#-12llx  phys %#-12llx "
+                "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
+                "ctag=0x%08x "
+                "[0x%08x, 0x%08x]",
+                vm->name,
+                pd_idx, l->entry_size, pd_offset,
+                virt_addr, phys_addr,
+                page_size >> 10,
+                nvgpu_gmmu_perm_str(attrs->rw_flag),
+                attrs->kind_v,
+                nvgpu_aperture_str(attrs->aperture),
+                attrs->valid     ? 'V' : '-',
+                attrs->cacheable ? 'C' : '-',
+                attrs->sparse    ? 'S' : '-',
+                attrs->priv      ? 'P' : '-',
+                (u32)attrs->ctag / g->ops.fb.compression_page_size(g),
+                pte_w[1], pte_w[0]);
+        pd_write(g, pd, pd_offset + 0, pte_w[0]);
+        pd_write(g, pd, pd_offset + 1, pte_w[1]);
 }
 static const struct gk20a_mmu_level gp10b_mm_levels[] = {
@@ -384,7 +365,7 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
 static void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
                struct vm_gk20a *vm)
 {
-        u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0);
+        u64 pdb_addr = nvgpu_mem_get_base_addr(g, &vm->pdb.mem, 0);
        u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
        u32 pdb_addr_hi = u64_hi32(pdb_addr);

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index d7391c6d..c3867e9d 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -14,6 +14,7 @@
14	*/	14	*/
15		15
16	#include <nvgpu/dma.h>	16	#include <nvgpu/dma.h>
		17	#include <nvgpu/gmmu.h>
17		18
18	#include "gk20a/gk20a.h"	19	#include "gk20a/gk20a.h"
19	#include "gk20a/platform_gk20a.h"	20	#include "gk20a/platform_gk20a.h"
@@ -149,206 +150,186 @@ static u64 gp10b_mm_iova_addr(struct gk20a g, struct scatterlist sgl,
149	return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));	150	return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
150	}	151	}
151		152
152	static u32 pde3_from_index(u32 i)	153	static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
153	{	154	const struct gk20a_mmu_level *l,
154	return i * gmmu_new_pde__size_v() / sizeof(u32);	155	struct nvgpu_gmmu_pd *pd,
155	}	156	u32 pd_idx,
156		157	u64 virt_addr,
157	static u32 pte3_from_index(u32 i)	158	u64 phys_addr,
158	{	159	struct nvgpu_gmmu_attrs *attrs)
159	return i * gmmu_new_pte__size_v() / sizeof(u32);
160	}
161
162	static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
163	struct gk20a_mm_entry *parent,
164	u32 i, u32 gmmu_pgsz_idx,
165	struct scatterlist **sgl,
166	u64 *offset,
167	u64 *iova,
168	u32 kind_v, u64 *ctag,
169	bool cacheable, bool unmapped_pte,
170	int rw_flag, bool sparse, bool priv,
171	enum nvgpu_aperture aperture)
172	{	160	{
173	struct gk20a *g = gk20a_from_vm(vm);	161	struct gk20a *g = gk20a_from_vm(vm);
174	u64 pte_addr = 0;	162	u32 pd_offset = pd_offset_from_index(l, pd_idx);
175	struct gk20a_mm_entry *pte = parent->entries + i;
176	u32 pde_v[2] = {0, 0};	163	u32 pde_v[2] = {0, 0};
177	u32 pde;
178
179	gk20a_dbg_fn("");
180		164
181	pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v();	165	phys_addr >>= gmmu_new_pde_address_shift_v();
182		166
183	pde_v[0] \|= nvgpu_aperture_mask(g, &pte->mem,	167	pde_v[0] \|= nvgpu_aperture_mask(g, &pd->mem,
184	gmmu_new_pde_aperture_sys_mem_ncoh_f(),	168	gmmu_new_pde_aperture_sys_mem_ncoh_f(),
185	gmmu_new_pde_aperture_video_memory_f());	169	gmmu_new_pde_aperture_video_memory_f());
186	pde_v[0] \|= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));	170	pde_v[0] \|= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
187	pde_v[0] \|= gmmu_new_pde_vol_true_f();	171	pde_v[0] \|= gmmu_new_pde_vol_true_f();
188	pde_v[1] \|= pte_addr >> 24;	172	pde_v[1] \|= phys_addr >> 24;
189	pde = pde3_from_index(i);	173
190		174	pd_write(g, pd, pd_offset + 0, pde_v[0]);
191	gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]);	175	pd_write(g, pd, pd_offset + 1, pde_v[1]);
192	gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]);	176
193		177	pte_dbg(g, attrs,
194	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",	178	"PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- \| "
195	i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);	179	"GPU %#-12llx phys %#-12llx "
196	gk20a_dbg_fn("done");	180	"[0x%08x, 0x%08x]",
197	return 0;	181	pd_idx, l->entry_size, pd_offset,
		182	virt_addr, phys_addr,
		183	pde_v[1], pde_v[0]);
198	}	184	}
199		185
200	static u32 pde0_from_index(u32 i)	186	static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
201	{	187	const struct gk20a_mmu_level *l,
202	return i * gmmu_new_dual_pde__size_v() / sizeof(u32);	188	struct nvgpu_gmmu_pd *pd,
203	}	189	u32 pd_idx,
204		190	u64 virt_addr,
205	static int update_gmmu_pde0_locked(struct vm_gk20a *vm,	191	u64 phys_addr,
206	struct gk20a_mm_entry *pte,	192	struct nvgpu_gmmu_attrs *attrs)
207	u32 i, u32 gmmu_pgsz_idx,
208	struct scatterlist **sgl,
209	u64 *offset,
210	u64 *iova,
211	u32 kind_v, u64 *ctag,
212	bool cacheable, bool unmapped_pte,
213	int rw_flag, bool sparse, bool priv,
214	enum nvgpu_aperture aperture)
215	{	193	{
216	struct gk20a *g = gk20a_from_vm(vm);	194	struct gk20a *g = gk20a_from_vm(vm);
217	bool small_valid, big_valid;	195	bool small_valid, big_valid;
218	u32 pte_addr_small = 0, pte_addr_big = 0;	196	u32 small_addr = 0, big_addr = 0;
219	struct gk20a_mm_entry *entry = pte->entries + i;	197	u32 pd_offset = pd_offset_from_index(l, pd_idx);
220	u32 pde_v[4] = {0, 0, 0, 0};	198	u32 pde_v[4] = {0, 0, 0, 0};
221	u32 pde;
222
223	gk20a_dbg_fn("");
224		199
225	small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small;	200	small_valid = attrs->pgsz == gmmu_page_size_small;
226	big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;	201	big_valid = attrs->pgsz == gmmu_page_size_big;
227		202
228	if (small_valid) {	203	if (small_valid)
229	pte_addr_small = gk20a_pde_addr(g, entry)	204	small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v();
230	>> gmmu_new_dual_pde_address_shift_v();
231	}
232		205
233	if (big_valid)	206	if (big_valid)
234	pte_addr_big = gk20a_pde_addr(g, entry)	207	big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v();
235	>> gmmu_new_dual_pde_address_big_shift_v();
236		208
237	if (small_valid) {	209	if (small_valid) {
238	pde_v[2] \|= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);	210	pde_v[2] \|=
239	pde_v[2] \|= nvgpu_aperture_mask(g, &entry->mem,	211	gmmu_new_dual_pde_address_small_sys_f(small_addr);
		212	pde_v[2] \|= nvgpu_aperture_mask(g, &pd->mem,
240	gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),	213	gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
241	gmmu_new_dual_pde_aperture_small_video_memory_f());	214	gmmu_new_dual_pde_aperture_small_video_memory_f());
242	pde_v[2] \|= gmmu_new_dual_pde_vol_small_true_f();	215	pde_v[2] \|= gmmu_new_dual_pde_vol_small_true_f();
243	pde_v[3] \|= pte_addr_small >> 24;	216	pde_v[3] \|= small_addr >> 24;
244	}	217	}
245		218
246	if (big_valid) {	219	if (big_valid) {
247	pde_v[0] \|= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);	220	pde_v[0] \|= gmmu_new_dual_pde_address_big_sys_f(big_addr);
248	pde_v[0] \|= gmmu_new_dual_pde_vol_big_true_f();	221	pde_v[0] \|= gmmu_new_dual_pde_vol_big_true_f();
249	pde_v[0] \|= nvgpu_aperture_mask(g, &entry->mem,	222	pde_v[0] \|= nvgpu_aperture_mask(g, &pd->mem,
250	gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),	223	gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
251	gmmu_new_dual_pde_aperture_big_video_memory_f());	224	gmmu_new_dual_pde_aperture_big_video_memory_f());
252	pde_v[1] \|= pte_addr_big >> 28;	225	pde_v[1] \|= big_addr >> 28;
253	}	226	}
254		227
255	pde = pde0_from_index(i);	228	pd_write(g, pd, pd_offset + 0, pde_v[0]);
256		229	pd_write(g, pd, pd_offset + 1, pde_v[1]);
257	gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]);	230	pd_write(g, pd, pd_offset + 2, pde_v[2]);
258	gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]);	231	pd_write(g, pd, pd_offset + 3, pde_v[3]);
259	gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]);	232
260	gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]);	233	pte_dbg(g, attrs,
261		234	"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c \| "
262	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",	235	"GPU %#-12llx phys %#-12llx "
263	i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);	236	"[0x%08x, 0x%08x, 0x%08x, 0x%08x]",
264	gk20a_dbg_fn("done");	237	pd_idx, l->entry_size, pd_offset,
265	return 0;	238	small_valid ? 'S' : '-',
		239	big_valid ? 'B' : '-',
		240	virt_addr, phys_addr,
		241	pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
266	}	242	}
267		243
268	static int update_gmmu_pte_locked(struct vm_gk20a *vm,	244	static void __update_pte(struct vm_gk20a *vm,
269	struct gk20a_mm_entry *pte,	245	u32 *pte_w,
270	u32 i, u32 gmmu_pgsz_idx,	246	u64 phys_addr,
271	struct scatterlist **sgl,	247	struct nvgpu_gmmu_attrs *attrs)
272	u64 *offset,
273	u64 *iova,
274	u32 kind_v, u64 *ctag,
275	bool cacheable, bool unmapped_pte,
276	int rw_flag, bool sparse, bool priv,
277	enum nvgpu_aperture aperture)
278	{	248	{
279	struct gk20a *g = vm->mm->g;	249	struct gk20a *g = gk20a_from_vm(vm);
280	u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
281	u64 ctag_granularity = g->ops.fb.compression_page_size(g);	250	u64 ctag_granularity = g->ops.fb.compression_page_size(g);
282	u32 pte_w[2] = {0, 0}; /* invalid pte */	251	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
283	u32 pte_i;	252	u32 pte_valid = attrs->valid ?
284		253	gmmu_new_pte_valid_true_f() :
285	if (*iova) {	254	gmmu_new_pte_valid_false_f();
286	u32 pte_valid = unmapped_pte ?	255	u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v();
287	gmmu_new_pte_valid_false_f() :	256	u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
288	gmmu_new_pte_valid_true_f();	257	gmmu_new_pte_address_sys_f(phys_shifted) :
289	u32 iova_v = *iova >> gmmu_new_pte_address_shift_v();	258	gmmu_new_pte_address_vid_f(phys_shifted);
290	u32 pte_addr = aperture == APERTURE_SYSMEM ?	259	u32 pte_tgt = __nvgpu_aperture_mask(g, attrs->aperture,
291	gmmu_new_pte_address_sys_f(iova_v) :	260	gmmu_new_pte_aperture_sys_mem_ncoh_f(),
292	gmmu_new_pte_address_vid_f(iova_v);	261	gmmu_new_pte_aperture_video_memory_f());
293	u32 pte_tgt = __nvgpu_aperture_mask(g, aperture,	262
294	gmmu_new_pte_aperture_sys_mem_ncoh_f(),	263	pte_w[0] = pte_valid \| pte_addr \| pte_tgt;
295	gmmu_new_pte_aperture_video_memory_f());	264
296		265	if (attrs->priv)
297	pte_w[0] = pte_valid \| pte_addr \| pte_tgt;	266	pte_w[0] \|= gmmu_new_pte_privilege_true_f();
298		267
299	if (priv)	268	pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) \|
300	pte_w[0] \|= gmmu_new_pte_privilege_true_f();	269	gmmu_new_pte_kind_f(attrs->kind_v) \|
301		270	gmmu_new_pte_comptagline_f((u32)(attrs->ctag /
302	pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) \|	271	ctag_granularity));
303	gmmu_new_pte_kind_f(kind_v) \|	272
304	gmmu_new_pte_comptagline_f((u32)(*ctag / ctag_granularity));	273	if (attrs->rw_flag == gk20a_mem_flag_read_only)
305		274	pte_w[0] \|= gmmu_new_pte_read_only_true_f();
306	if (rw_flag == gk20a_mem_flag_read_only)	275
307	pte_w[0] \|= gmmu_new_pte_read_only_true_f();	276	if (!attrs->valid && !attrs->cacheable)
308	if (unmapped_pte && !cacheable)	277	pte_w[0] \|= gmmu_new_pte_read_only_true_f();
309	pte_w[0] \|= gmmu_new_pte_read_only_true_f();	278	else if (!attrs->cacheable)
310	else if (!cacheable)
311	pte_w[0] \|= gmmu_new_pte_vol_true_f();
312
313	gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
314	" ctag=%d vol=%d"
315	" [0x%08x, 0x%08x]",
316	i, *iova,
317	kind_v, (u32)(*ctag / ctag_granularity), !cacheable,
318	pte_w[1], pte_w[0]);
319
320	if (*ctag)
321	*ctag += page_size;
322	} else if (sparse) {
323	pte_w[0] = gmmu_new_pte_valid_false_f();
324	pte_w[0] \|= gmmu_new_pte_vol_true_f();	279	pte_w[0] \|= gmmu_new_pte_vol_true_f();
325	} else {
326	gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
327	}
328		280
329	pte_i = pte3_from_index(i);	281	if (attrs->ctag)
330		282	attrs->ctag += page_size;
331	gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]);	283
332	gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]);	284	}
333		285
334	if (*iova) {	286	static void __update_pte_sparse(u32 *pte_w)
335	*iova += page_size;	287	{
336	*offset += page_size;	288	pte_w[0] = gmmu_new_pte_valid_false_f();
337	if (sgl && offset + page_size > (*sgl)->length) {	289	pte_w[0] \|= gmmu_new_pte_vol_true_f();
338	u64 new_iova;	290	}
339	sgl = sg_next(sgl);	291
340	if (*sgl) {	292	static void update_gmmu_pte_locked(struct vm_gk20a *vm,
341	new_iova = sg_phys(*sgl);	293	const struct gk20a_mmu_level *l,
342	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",	294	struct nvgpu_gmmu_pd *pd,
343	new_iova, (*sgl)->length);	295	u32 pd_idx,
344	if (new_iova) {	296	u64 virt_addr,
345	*offset = 0;	297	u64 phys_addr,
346	*iova = new_iova;	298	struct nvgpu_gmmu_attrs *attrs)
347	}	299	{
348	}	300	struct gk20a *g = vm->mm->g;
349	}	301	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
350	}	302	u32 pd_offset = pd_offset_from_index(l, pd_idx);
351	return 0;	303	u32 pte_w[2] = {0, 0};
		304
		305	if (phys_addr)
		306	__update_pte(vm, pte_w, phys_addr, attrs);
		307	else if (attrs->sparse)
		308	__update_pte_sparse(pte_w);
		309
		310	pte_dbg(g, attrs,
		311	"vm=%s "
		312	"PTE: i=%-4u size=%-2u offs=%-4u \| "
		313	"GPU %#-12llx phys %#-12llx "
		314	"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
		315	"ctag=0x%08x "
		316	"[0x%08x, 0x%08x]",
		317	vm->name,
		318	pd_idx, l->entry_size, pd_offset,
		319	virt_addr, phys_addr,
		320	page_size >> 10,
		321	nvgpu_gmmu_perm_str(attrs->rw_flag),
		322	attrs->kind_v,
		323	nvgpu_aperture_str(attrs->aperture),
		324	attrs->valid ? 'V' : '-',
		325	attrs->cacheable ? 'C' : '-',
		326	attrs->sparse ? 'S' : '-',
		327	attrs->priv ? 'P' : '-',
		328	(u32)attrs->ctag / g->ops.fb.compression_page_size(g),
		329	pte_w[1], pte_w[0]);
		330
		331	pd_write(g, pd, pd_offset + 0, pte_w[0]);
		332	pd_write(g, pd, pd_offset + 1, pte_w[1]);
352	}	333	}
353		334
354	static const struct gk20a_mmu_level gp10b_mm_levels[] = {	335	static const struct gk20a_mmu_level gp10b_mm_levels[] = {
@@ -384,7 +365,7 @@ static const struct gk20a_mmu_level gp10b_mm_get_mmu_levels(struct gk20a g,
384	static void gp10b_mm_init_pdb(struct gk20a g, struct nvgpu_mem inst_block,	365	static void gp10b_mm_init_pdb(struct gk20a g, struct nvgpu_mem inst_block,
385	struct vm_gk20a *vm)	366	struct vm_gk20a *vm)
386	{	367	{
387	u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0);	368	u64 pdb_addr = nvgpu_mem_get_base_addr(g, &vm->pdb.mem, 0);
388	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());	369	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
389	u32 pdb_addr_hi = u64_hi32(pdb_addr);	370	u32 pdb_addr_hi = u64_hi32(pdb_addr);
390		371