From 048c6b062ae381a329dccbc7ca0599113dbd7417 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Thu, 11 May 2017 18:25:47 +0100
Subject: gpu: nvgpu: Separate GMMU mapping impl from mm_gk20a.c

Separate the non-chip specific GMMU mapping implementation code
out of mm_gk20a.c. This puts all of the chip-agnostic code into
common/mm/gmmu.c in preparation for rewriting it.

JIRA NVGPU-12
JIRA NVGPU-30

Change-Id: I6f7fdac3422703f5e80bb22ad304dc27bba4814d
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1480228
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 521 -------------------------------------
 1 file changed, 521 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a1873a30..e7bcf6f0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -124,15 +124,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl)
  *
  */
 
-static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
-				   enum gmmu_pgsz_gk20a pgsz_idx,
-				   struct sg_table *sgt, u64 buffer_offset,
-				   u64 first_vaddr, u64 last_vaddr,
-				   u8 kind_v, u32 ctag_offset, bool cacheable,
-				   bool umapped_pte, int rw_flag,
-				   bool sparse,
-				   bool priv,
-				   enum nvgpu_aperture aperture);
 static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
 static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
 static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
@@ -781,104 +772,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g)
 #endif
 }
 
-static void free_gmmu_phys_pages(struct vm_gk20a *vm,
-			    struct gk20a_mm_entry *entry)
-{
-	gk20a_dbg_fn("");
-
-	/* note: mem_desc slightly abused (wrt. free_gmmu_pages) */
-
-	free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size));
-	entry->mem.cpu_va = NULL;
-
-	sg_free_table(entry->mem.priv.sgt);
-	nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
-	entry->mem.priv.sgt = NULL;
-	entry->mem.size = 0;
-	entry->mem.aperture = APERTURE_INVALID;
-}
-
-static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
-{
-	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-			 sg_phys(entry->mem.priv.sgt->sgl),
-			 entry->mem.priv.sgt->sgl->length);
-	return 0;
-}
-
-static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
-{
-	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-			 sg_phys(entry->mem.priv.sgt->sgl),
-			 entry->mem.priv.sgt->sgl->length);
-}
-
-void free_gmmu_pages(struct vm_gk20a *vm,
-		     struct gk20a_mm_entry *entry)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	gk20a_dbg_fn("");
-
-	if (!entry->mem.size)
-		return;
-
-	if (entry->woffset) /* fake shadow mem */
-		return;
-
-	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
-		free_gmmu_phys_pages(vm, entry);
-		return;
-	}
-
-	nvgpu_dma_free(g, &entry->mem);
-}
-
-int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
-{
-	gk20a_dbg_fn("");
-
-	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
-		return map_gmmu_phys_pages(entry);
-
-	if (IS_ENABLED(CONFIG_ARM64)) {
-		if (entry->mem.aperture == APERTURE_VIDMEM)
-			return 0;
-
-		FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-				 sg_phys(entry->mem.priv.sgt->sgl),
-				 entry->mem.size);
-	} else {
-		int err = nvgpu_mem_begin(g, &entry->mem);
-
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
-{
-	gk20a_dbg_fn("");
-
-	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
-		unmap_gmmu_phys_pages(entry);
-		return;
-	}
-
-	if (IS_ENABLED(CONFIG_ARM64)) {
-		if (entry->mem.aperture == APERTURE_VIDMEM)
-			return;
-
-		FLUSH_CPU_DCACHE(entry->mem.cpu_va,
-				 sg_phys(entry->mem.priv.sgt->sgl),
-				 entry->mem.size);
-	} else {
-		nvgpu_mem_end(g, &entry->mem);
-	}
-}
-
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
 	return vm->mmu_levels[0].lo_bit[0];
@@ -909,21 +802,6 @@ static u32 pte_from_index(u32 i)
 	return i * gmmu_pte__size_v() / sizeof(u32);
 }
 
-u32 pte_index_from_vaddr(struct vm_gk20a *vm,
-				       u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
-{
-	u32 ret;
-	/* mask off pde part */
-	addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL);
-
-	/* shift over to get pte index. note assumption that pte index
-	 * doesn't leak over into the high 32b */
-	ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx]));
-
-	gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
-	return ret;
-}
-
 int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
 			 struct nvgpu_mapped_buf ***mapped_buffers,
 			 int *num_buffers)
@@ -1096,141 +974,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
 	return 0;
 }
 
-u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
-			u64 map_offset,
-			struct sg_table *sgt,
-			u64 buffer_offset,
-			u64 size,
-			int pgsz_idx,
-			u8 kind_v,
-			u32 ctag_offset,
-			u32 flags,
-			int rw_flag,
-			bool clear_ctags,
-			bool sparse,
-			bool priv,
-			struct vm_gk20a_mapping_batch *batch,
-			enum nvgpu_aperture aperture)
-{
-	int err = 0;
-	bool allocated = false;
-	struct gk20a *g = gk20a_from_vm(vm);
-	int ctag_granularity = g->ops.fb.compression_page_size(g);
-	u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
-
-	/* Allocate (or validate when map_offset != 0) the virtual address. */
-	if (!map_offset) {
-		map_offset = __nvgpu_vm_alloc_va(vm, size,
-					  pgsz_idx);
-		if (!map_offset) {
-			nvgpu_err(g, "failed to allocate va space");
-			err = -ENOMEM;
-			goto fail_alloc;
-		}
-		allocated = true;
-	}
-
-	gk20a_dbg(gpu_dbg_map,
-		  "gv: 0x%04x_%08x + 0x%-7llx "
-		  "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
-		  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
-		  "kind=0x%x flags=0x%x apt=%s",
-		  u64_hi32(map_offset), u64_lo32(map_offset), size,
-		  sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
-		  sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
-		  sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
-		  sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
-		  vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
-		  ctag_lines, ctag_offset,
-		  kind_v, flags, nvgpu_aperture_str(aperture));
-
-	err = update_gmmu_ptes_locked(vm, pgsz_idx,
-				      sgt,
-				      buffer_offset,
-				      map_offset, map_offset + size,
-				      kind_v,
-				      ctag_offset,
-				      flags &
-				      NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
-				      flags &
-				      NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
-				      rw_flag,
-				      sparse,
-				      priv,
-				      aperture);
-	if (err) {
-		nvgpu_err(g, "failed to update ptes on map");
-		goto fail_validate;
-	}
-
-	if (!batch)
-		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
-	else
-		batch->need_tlb_invalidate = true;
-
-	return map_offset;
-fail_validate:
-	if (allocated)
-		__nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
-fail_alloc:
-	nvgpu_err(g, "%s: failed with err=%d", __func__, err);
-	return 0;
-}
-
-void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
-			u64 vaddr,
-			u64 size,
-			int pgsz_idx,
-			bool va_allocated,
-			int rw_flag,
-			bool sparse,
-			struct vm_gk20a_mapping_batch *batch)
-{
-	int err = 0;
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	if (va_allocated) {
-		err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
-		if (err) {
-			nvgpu_err(g, "failed to free va");
-			return;
-		}
-	}
-
-	/* unmap here needs to know the page size we assigned at mapping */
-	err = update_gmmu_ptes_locked(vm,
-				pgsz_idx,
-				NULL, /* n/a for unmap */
-				0,
-				vaddr,
-				vaddr + size,
-				0, 0, false /* n/a for unmap */,
-				false, rw_flag,
-				sparse, 0,
-				APERTURE_INVALID); /* don't care for unmap */
-	if (err)
-		nvgpu_err(g, "failed to update gmmu ptes on unmap");
-
-	/* flush l2 so any dirty lines are written out *now*.
-	 *  also as we could potentially be switching this buffer
-	 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
-	 * some point in the future we need to invalidate l2.  e.g. switching
-	 * from a render buffer unmap (here) to later using the same memory
-	 * for gmmu ptes.  note the positioning of this relative to any smmu
-	 * unmapping (below). */
-
-	if (!batch) {
-		gk20a_mm_l2_flush(g, true);
-		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
-	} else {
-		if (!batch->gpu_l2_flushed) {
-			gk20a_mm_l2_flush(g, true);
-			batch->gpu_l2_flushed = true;
-		}
-		batch->need_tlb_invalidate = true;
-	}
-}
-
 enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
 					  struct dma_buf *dmabuf)
 {
@@ -2036,254 +1779,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 	return 0;
 }
 
-static int update_gmmu_level_locked(struct vm_gk20a *vm,
-				    struct gk20a_mm_entry *pte,
-				    enum gmmu_pgsz_gk20a pgsz_idx,
-				    struct scatterlist **sgl,
-				    u64 *offset,
-				    u64 *iova,
-				    u64 gpu_va, u64 gpu_end,
-				    u8 kind_v, u64 *ctag,
-				    bool cacheable, bool unmapped_pte,
-				    int rw_flag,
-				    bool sparse,
-				    int lvl,
-				    bool priv,
-				    enum nvgpu_aperture aperture)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
-	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
-	int err = 0;
-	u32 pde_i;
-	u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
-	struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL;
-
-	gk20a_dbg_fn("");
-
-	pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
-		>> (u64)l->lo_bit[pgsz_idx];
-
-	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
-		  pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
-
-	while (gpu_va < gpu_end) {
-		u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
-
-		/* Allocate next level */
-		if (next_l->update_entry) {
-			if (!pte->entries) {
-				int num_entries =
-					1 <<
-					 (l->hi_bit[pgsz_idx]
-					  - l->lo_bit[pgsz_idx] + 1);
-				pte->entries =
-					nvgpu_vzalloc(g,
-						sizeof(struct gk20a_mm_entry) *
-						num_entries);
-				if (!pte->entries)
-					return -ENOMEM;
-				pte->pgsz = pgsz_idx;
-				pte->num_entries = num_entries;
-			}
-			prev_pte = next_pte;
-			next_pte = pte->entries + pde_i;
-
-			if (!next_pte->mem.size) {
-				err = nvgpu_zalloc_gmmu_page_table(vm,
-					pgsz_idx, next_l, next_pte, prev_pte);
-				if (err)
-					return err;
-			}
-		}
-
-		err = l->update_entry(vm, pte, pde_i, pgsz_idx,
-				sgl, offset, iova,
-				kind_v, ctag, cacheable, unmapped_pte,
-				rw_flag, sparse, priv, aperture);
-		if (err)
-			return err;
-
-		if (next_l->update_entry) {
-			/* get cpu access to the ptes */
-			err = map_gmmu_pages(g, next_pte);
-			if (err) {
-				nvgpu_err(g,
-					   "couldn't map ptes for update as=%d",
-					   vm_aspace_id(vm));
-				return err;
-			}
-			err = update_gmmu_level_locked(vm, next_pte,
-				pgsz_idx,
-				sgl,
-				offset,
-				iova,
-				gpu_va,
-				next,
-				kind_v, ctag, cacheable, unmapped_pte,
-				rw_flag, sparse, lvl+1, priv, aperture);
-			unmap_gmmu_pages(g, next_pte);
-
-			if (err)
-				return err;
-		}
-
-		pde_i++;
-		gpu_va = next;
-	}
-
-	gk20a_dbg_fn("done");
-
-	return 0;
-}
-
-static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
-				   enum gmmu_pgsz_gk20a pgsz_idx,
-				   struct sg_table *sgt,
-				   u64 buffer_offset,
-				   u64 gpu_va, u64 gpu_end,
-				   u8 kind_v, u32 ctag_offset,
-				   bool cacheable, bool unmapped_pte,
-				   int rw_flag,
-				   bool sparse,
-				   bool priv,
-				   enum nvgpu_aperture aperture)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	int ctag_granularity = g->ops.fb.compression_page_size(g);
-	u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
-	u64 iova = 0;
-	u64 space_to_skip = buffer_offset;
-	u64 map_size = gpu_end - gpu_va;
-	u32 page_size  = vm->gmmu_page_sizes[pgsz_idx];
-	int err;
-	struct scatterlist *sgl = NULL;
-	struct nvgpu_page_alloc *alloc = NULL;
-	struct page_alloc_chunk *chunk = NULL;
-	u64 length;
-
-	/* note: here we need to map kernel to small, since the
-	 * low-level mmu code assumes 0 is small and 1 is big pages */
-	if (pgsz_idx == gmmu_page_size_kernel)
-		pgsz_idx = gmmu_page_size_small;
-
-	if (space_to_skip & (page_size - 1))
-		return -EINVAL;
-
-	err = map_gmmu_pages(g, &vm->pdb);
-	if (err) {
-		nvgpu_err(g,
-			   "couldn't map ptes for update as=%d",
-			   vm_aspace_id(vm));
-		return err;
-	}
-
-	if (aperture == APERTURE_VIDMEM) {
-		gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx",
-				pgsz_idx, gpu_va, gpu_end-1, iova);
-
-		if (sgt) {
-			alloc = get_vidmem_page_alloc(sgt->sgl);
-
-			nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
-						 page_alloc_chunk, list_entry) {
-				if (space_to_skip &&
-				    space_to_skip > chunk->length) {
-					space_to_skip -= chunk->length;
-				} else {
-					iova = chunk->base + space_to_skip;
-					length = chunk->length - space_to_skip;
-					length = min(length, map_size);
-					space_to_skip = 0;
-
-					err = update_gmmu_level_locked(vm,
-						&vm->pdb, pgsz_idx,
-						&sgl,
-						&space_to_skip,
-						&iova,
-						gpu_va, gpu_va + length,
-						kind_v, &ctag,
-						cacheable, unmapped_pte,
-						rw_flag, sparse, 0, priv,
-						aperture);
-					if (err)
-						break;
-
-					/* need to set explicit zero here */
-					space_to_skip = 0;
-					gpu_va += length;
-					map_size -= length;
-
-					if (!map_size)
-						break;
-				}
-			}
-		} else {
-			err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
-					&sgl,
-					&space_to_skip,
-					&iova,
-					gpu_va, gpu_end,
-					kind_v, &ctag,
-					cacheable, unmapped_pte, rw_flag,
-					sparse, 0, priv,
-					aperture);
-		}
-	} else {
-		gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
-			   pgsz_idx,
-			   sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
-			       : 0ULL,
-			   buffer_offset,
-			   sgt ? sgt->nents : 0);
-
-		gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
-				pgsz_idx, gpu_va, gpu_end-1, iova);
-
-		if (sgt) {
-			iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
-			if (!vm->mm->bypass_smmu && iova) {
-				iova += space_to_skip;
-			} else {
-				sgl = sgt->sgl;
-
-				gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
-						(u64)sg_phys(sgl),
-						sgl->length);
-
-				while (space_to_skip && sgl &&
-				      space_to_skip + page_size > sgl->length) {
-					space_to_skip -= sgl->length;
-					sgl = sg_next(sgl);
-					gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
-							(u64)sg_phys(sgl),
-							sgl->length);
-				}
-
-				iova = sg_phys(sgl) + space_to_skip;
-			}
-		}
-
-		err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
-				&sgl,
-				&space_to_skip,
-				&iova,
-				gpu_va, gpu_end,
-				kind_v, &ctag,
-				cacheable, unmapped_pte, rw_flag,
-				sparse, 0, priv,
-				aperture);
-	}
-
-	unmap_gmmu_pages(g, &vm->pdb);
-
-	smp_mb();
-
-	gk20a_dbg_fn("done");
-
-	return err;
-}
-
 /* NOTE! mapped_buffers lock must be held */
 void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
 			   struct vm_gk20a_mapping_batch *batch)
@@ -2341,22 +1836,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
 	return;
 }
 
-void gk20a_vm_free_entries(struct vm_gk20a *vm,
-			   struct gk20a_mm_entry *parent,
-			   int level)
-{
-	int i;
-
-	if (parent->entries)
-		for (i = 0; i < parent->num_entries; i++)
-			gk20a_vm_free_entries(vm, &parent->entries[i], level+1);
-
-	if (parent->mem.size)
-		free_gmmu_pages(vm, parent);
-	nvgpu_vfree(vm->mm->g, parent->entries);
-	parent->entries = NULL;
-}
-
 const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
 	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
 	 .lo_bit = {26, 26},
-- 
cgit v1.2.2