From 434385ca54053b13ac06a5f11cb7564d6740f02d Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Wed, 8 Nov 2017 20:55:47 +0200
Subject: gpu: nvgpu: Clean up comptag data structs and alloc

Clean up the comptag-related data structures and allocation logic. The
most important change is that we only ever try comptag allocation once
to prevent incorrect map aliasing.

If we were to retry the allocation on further map calls, the following
situation would become possible:
(1) Request compressible kind mapping for a buffer. Comptag alloc failed
    and we proceed with incompressible kind fallback.
(2) Request another compressible kind mapping for a buffer. Comptag alloc
    retry succeeded and now we use the compressible kind.
(3) After writes through the compressible kind mapping, the buffer is no
    longer legible via the fallback incompressible kind mapping.

The other changes are about removing the unused comptag-related fields
in gk20a_comptags and nvgpu_mapped_buf, and retrieving comptags info
only for compressible buffers. We also make nvgpu_ctag_buffer_info and
nvgpu_vm_compute_compression as private mm/vm.c definitions, since
they're not used elsewhere.

Bug 1902982

Change-Id: I0c9fe48ccc585a80dd2c05ec606a079c1c1d41f1
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1595153
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/comptags.c  |  25 +++---
 drivers/gpu/nvgpu/common/linux/dmabuf.c    |   4 +-
 drivers/gpu/nvgpu/common/linux/vm.c        |   3 +-
 drivers/gpu/nvgpu/common/mm/vm.c           | 136 +++++++++++++++++++----------
 drivers/gpu/nvgpu/include/nvgpu/comptags.h |  10 ++-
 drivers/gpu/nvgpu/include/nvgpu/vm.h       |  17 ----
 6 files changed, 118 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c
index f55989f7..92e8aa3e 100644
--- a/drivers/gpu/nvgpu/common/linux/comptags.c
+++ b/drivers/gpu/nvgpu/common/linux/comptags.c
@@ -46,7 +46,6 @@ int gk20a_alloc_comptags(struct gk20a *g,
 {
 	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
 							     buf->dev);
-	u32 ctaglines_allocsize;
 	u32 offset;
 	int err;
 
@@ -56,18 +55,24 @@ int gk20a_alloc_comptags(struct gk20a *g,
 	if (!lines)
 		return -EINVAL;
 
-	ctaglines_allocsize = lines;
-
 	/* store the allocator so we can use it when we free the ctags */
 	priv->comptag_allocator = allocator;
-	err = gk20a_comptaglines_alloc(allocator, &offset,
-			       ctaglines_allocsize);
-	if (err)
-		return err;
+	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
+	if (!err) {
+		priv->comptags.offset = offset;
+		priv->comptags.lines = lines;
+	} else {
+		priv->comptags.offset = 0;
+		priv->comptags.lines = 0;
+	}
 
-	priv->comptags.offset = offset;
-	priv->comptags.lines = lines;
-	priv->comptags.allocated_lines = ctaglines_allocsize;
+	/*
+	 * We don't report an error here if comptag alloc failed. The
+	 * caller will simply fallback to incompressible kinds. It
+	 * would not be safe to re-allocate comptags anyways on
+	 * successive calls, as that would break map aliasing.
+	 */
+	priv->comptags.allocated = true;
 
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c
index 6b44ff55..08cf5f2b 100644
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.c
+++ b/drivers/gpu/nvgpu/common/linux/dmabuf.c
@@ -41,11 +41,11 @@ static void gk20a_mm_delete_priv(void *_priv)
 
 	g = priv->g;
 
-	if (priv->comptags.lines) {
+	if (priv->comptags.allocated && priv->comptags.lines) {
 		BUG_ON(!priv->comptag_allocator);
 		gk20a_comptaglines_free(priv->comptag_allocator,
 				priv->comptags.offset,
-				priv->comptags.allocated_lines);
+				priv->comptags.lines);
 	}
 
 	/* Free buffer states */
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index d6d86c94..8e464627 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -132,7 +132,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 	nvgpu_log(g, gpu_dbg_map,
 		  "gv: 0x%04x_%08x + 0x%-7zu "
 		  "[dma: 0x%010llx, pa: 0x%010llx] "
-		  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
+		  "pgsz=%-3dKb as=%-2d "
 		  "flags=0x%x apt=%s (reused)",
 		  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
 		  os_buf->dmabuf->size,
@@ -140,7 +140,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 		  (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
 		  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
 		  vm_aspace_id(vm),
-		  mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
 		  mapped_buffer->flags,
 		  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
 
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index cfac4f8e..be7e4207 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -39,6 +39,20 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
+struct nvgpu_ctag_buffer_info {
+	u64			size;
+	enum gmmu_pgsz_gk20a	pgsz_idx;
+	u32			flags;
+
+	s16			compr_kind;
+	s16			incompr_kind;
+
+	u32			ctag_lines;
+};
+
+static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
+					struct nvgpu_ctag_buffer_info *binfo);
+
 static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
 			     struct vm_gk20a_mapping_batch *batch);
 
@@ -731,11 +745,10 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct nvgpu_mapped_buf *mapped_buffer = NULL;
 	struct nvgpu_ctag_buffer_info binfo = { 0 };
-	struct gk20a_comptags comptags;
 	struct nvgpu_vm_area *vm_area = NULL;
 	int err = 0;
 	u64 align;
-	u32 ctag_offset;
+	u32 ctag_offset = 0;
 	bool clear_ctags = false;
 	bool va_allocated = true;
 
@@ -746,6 +759,11 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 	 */
 	s16 map_key_kind;
 
+	/*
+	 * The actual GMMU PTE kind
+	 */
+	u8 pte_kind;
+
 	if (vm->userspace_managed &&
 	    !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
 		nvgpu_err(g,
@@ -835,57 +853,91 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 	if (!vm->enable_ctag)
 		binfo.ctag_lines = 0;
 
-	gk20a_get_comptags(os_buf, &comptags);
+	if (binfo.ctag_lines) {
+		struct gk20a_comptags comptags = { 0 };
 
-	if (binfo.ctag_lines && !comptags.lines) {
 		/*
-		 * Allocate compression resources if needed.
+		 * Get the comptags state
 		 */
-		if (gk20a_alloc_comptags(g,
-					 os_buf,
-					 &g->gr.comp_tags,
-					 binfo.ctag_lines)) {
-
-			/*
-			 * Prevent compression...
-			 */
-			binfo.compr_kind = NV_KIND_INVALID;
-
-			/*
-			 * ... And make sure we have a fallback.
-			 */
-			if (binfo.incompr_kind == NV_KIND_INVALID) {
-				nvgpu_err(g, "comptag alloc failed and no "
-					     "fallback kind specified");
-				err = -ENOMEM;
+		gk20a_get_comptags(os_buf, &comptags);
 
+		/*
+		 * Allocate if not yet allocated
+		 */
+		if (!comptags.allocated) {
+			err = gk20a_alloc_comptags(g, os_buf,
+						   &g->gr.comp_tags,
+						   binfo.ctag_lines);
+			if (err) {
 				/*
-				 * Any alloced comptags are cleaned up when the
-				 * dmabuf is freed.
+				 * This is an irrecoverable failure and we need
+				 * to abort. In particular, it is not safe to
+				 * proceed with incompressible fallback, since
+				 * we could not mark our alloc failure
+				 * anywere. Later we would retry allocation and
+				 * break compressible map aliasing.
 				 */
+				nvgpu_err(g,
+					  "Error %d setting up comptags", err);
 				goto clean_up;
 			}
-		} else {
+
+			/*
+			 * Refresh comptags state after alloc. Field
+			 * comptags.lines will be 0 if alloc failed.
+			 */
 			gk20a_get_comptags(os_buf, &comptags);
 
-			if (g->ops.ltc.cbc_ctrl)
-				g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
-					      comptags.offset,
-					      comptags.offset +
-					          comptags.allocated_lines - 1);
-			else
-				clear_ctags = true;
+			/*
+			 * Newly allocated comptags needs to be cleared
+			 */
+			if (comptags.lines) {
+				if (g->ops.ltc.cbc_ctrl)
+					g->ops.ltc.cbc_ctrl(
+						g, gk20a_cbc_op_clear,
+						comptags.offset,
+						(comptags.offset +
+						 comptags.lines - 1));
+				else
+					/*
+					 * The comptags will be cleared as part
+					 * of mapping (vgpu)
+					 */
+					clear_ctags = true;
+			}
 		}
+
+		/*
+		 * Store the ctag offset for later use if we got the comptags
+		 */
+		if (comptags.lines)
+			ctag_offset = comptags.offset;
 	}
 
 	/*
-	 * Calculate comptag index for this mapping. Differs in case of partial
-	 * mapping.
+	 * Figure out the kind and ctag offset for the GMMU page tables
 	 */
-	ctag_offset = comptags.offset;
-	if (ctag_offset)
+	if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) {
+		/*
+		 * Adjust the ctag_offset as per the buffer map offset
+		 */
 		ctag_offset += phys_offset >>
-			       ilog2(g->ops.fb.compression_page_size(g));
+			ilog2(g->ops.fb.compression_page_size(g));
+		pte_kind = binfo.compr_kind;
+	} else if (binfo.incompr_kind != NV_KIND_INVALID) {
+		/*
+		 * Incompressible kind, ctag offset will not be programmed
+		 */
+		ctag_offset = 0;
+		pte_kind = binfo.incompr_kind;
+	} else {
+		/*
+		 * Caller required compression, but we cannot provide it
+		 */
+		nvgpu_err(g, "No comptags and no incompressible fallback kind");
+		err = -ENOMEM;
+		goto clean_up;
+	}
 
 	map_addr = g->ops.mm.gmmu_map(vm,
 				      map_addr,
@@ -893,8 +945,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 				      phys_offset,
 				      map_size,
 				      binfo.pgsz_idx,
-				      binfo.compr_kind != NV_KIND_INVALID ?
-					  binfo.compr_kind : binfo.incompr_kind,
+				      pte_kind,
 				      ctag_offset,
 				      flags,
 				      rw,
@@ -913,9 +964,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 	mapped_buffer->addr         = map_addr;
 	mapped_buffer->size         = map_size;
 	mapped_buffer->pgsz_idx     = binfo.pgsz_idx;
-	mapped_buffer->ctag_offset  = ctag_offset;
-	mapped_buffer->ctag_lines   = binfo.ctag_lines;
-	mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
 	mapped_buffer->vm           = vm;
 	mapped_buffer->flags        = flags;
 	mapped_buffer->kind         = map_key_kind;
@@ -1074,8 +1122,8 @@ done:
 	return;
 }
 
-int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
-				 struct nvgpu_ctag_buffer_info *binfo)
+static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
+					struct nvgpu_ctag_buffer_info *binfo)
 {
 	bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);
 	struct gk20a *g = gk20a_from_vm(vm);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
index 5482d0ce..2d9f034a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
@@ -25,8 +25,14 @@ struct nvgpu_os_buffer;
 struct gk20a_comptags {
 	u32 offset;
 	u32 lines;
-	u32 allocated_lines;
-	bool user_mappable;
+
+	/*
+	 * This signals whether allocation has been attempted. Observe
+	 * 'lines' to see whether comptags were actually allocated. We
+	 * try alloc only once per buffer in order not to break
+	 * multiple compressible-kind mappings.
+	 */
+	bool allocated;
 };
 
 struct gk20a_comptag_allocator {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index abb297ab..c0a4124c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -110,9 +110,6 @@ struct nvgpu_mapped_buf {
 	u64 size;
 
 	u32 pgsz_idx;
-	u32 ctag_offset;
-	u32 ctag_lines;
-	u32 ctag_allocated_lines;
 
 	u32 flags;
 	u32 kind;
@@ -143,17 +140,6 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
 		  ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node));
 }
 
-struct nvgpu_ctag_buffer_info {
-	u64			size;
-	enum gmmu_pgsz_gk20a	pgsz_idx;
-	u32			flags;
-
-	s16			compr_kind;
-	s16			incompr_kind;
-
-	u32			ctag_lines;
-};
-
 struct vm_gk20a {
 	struct mm_gk20a *mm;
 	struct gk20a_as_share *as_share; /* as_share this represents */
@@ -221,9 +207,6 @@ void nvgpu_vm_put(struct vm_gk20a *vm);
 int vm_aspace_id(struct vm_gk20a *vm);
 int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
 
-int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
-				 struct nvgpu_ctag_buffer_info *binfo);
-
 /* batching eliminates redundant cache flushes and invalidates */
 void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
 void nvgpu_vm_mapping_batch_finish(
-- 
cgit v1.2.2