6 files changed, 118 insertions, 77 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c
index f55989f7..92e8aa3e 100644
--- a/drivers/gpu/nvgpu/common/linux/comptags.c
+++ b/drivers/gpu/nvgpu/common/linux/comptags.c
@@ -46,7 +46,6 @@ int gk20a_alloc_comptags(struct gk20a *g,
 {
        struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
                                                             buf->dev);
-        u32 ctaglines_allocsize;
        u32 offset;
        int err;
@@ -56,18 +55,24 @@ int gk20a_alloc_comptags(struct gk20a *g,
        if (!lines)
                return -EINVAL;
-        ctaglines_allocsize = lines;
        /* store the allocator so we can use it when we free the ctags */
        priv->comptag_allocator = allocator;
-        err = gk20a_comptaglines_alloc(allocator, &offset,
+        err = gk20a_comptaglines_alloc(allocator, &offset, lines);
-                               ctaglines_allocsize);
+        if (!err) {
-        if (err)
+                priv->comptags.offset = offset;
-                return err;
+                priv->comptags.lines = lines;
+        } else {
+                priv->comptags.offset = 0;
+                priv->comptags.lines = 0;
+        }
-        priv->comptags.offset = offset;
+        /*
-        priv->comptags.lines = lines;
+         * We don't report an error here if comptag alloc failed. The
-        priv->comptags.allocated_lines = ctaglines_allocsize;
+         * caller will simply fallback to incompressible kinds. It
+         * would not be safe to re-allocate comptags anyways on
+         * successive calls, as that would break map aliasing.
+         */
+        priv->comptags.allocated = true;
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c
index 6b44ff55..08cf5f2b 100644
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.c
+++ b/drivers/gpu/nvgpu/common/linux/dmabuf.c
@@ -41,11 +41,11 @@ static void gk20a_mm_delete_priv(void *_priv)
        g = priv->g;
-        if (priv->comptags.lines) {
+        if (priv->comptags.allocated && priv->comptags.lines) {
                BUG_ON(!priv->comptag_allocator);
                gk20a_comptaglines_free(priv->comptag_allocator,
                                priv->comptags.offset,
-                                priv->comptags.allocated_lines);
+                                priv->comptags.lines);
        }
        /* Free buffer states */
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index d6d86c94..8e464627 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -132,7 +132,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
        nvgpu_log(g, gpu_dbg_map,
                  "gv: 0x%04x_%08x + 0x%-7zu "
                  "[dma: 0x%010llx, pa: 0x%010llx] "
-                  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
+                  "pgsz=%-3dKb as=%-2d "
                  "flags=0x%x apt=%s (reused)",
                  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
                  os_buf->dmabuf->size,
@@ -140,7 +140,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
                  (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
                  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
                  vm_aspace_id(vm),
-                  mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
                  mapped_buffer->flags,
                  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index cfac4f8e..be7e4207 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -39,6 +39,20 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
+struct nvgpu_ctag_buffer_info {
+        u64                     size;
+        enum gmmu_pgsz_gk20a    pgsz_idx;
+        u32                     flags;
+        s16                     compr_kind;
+        s16                     incompr_kind;
+        u32                     ctag_lines;
+};
+static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
+                                        struct nvgpu_ctag_buffer_info *binfo);
 static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
                             struct vm_gk20a_mapping_batch *batch);
@@ -731,11 +745,10 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
        struct gk20a *g = gk20a_from_vm(vm);
        struct nvgpu_mapped_buf *mapped_buffer = NULL;
        struct nvgpu_ctag_buffer_info binfo = { 0 };
-        struct gk20a_comptags comptags;
        struct nvgpu_vm_area *vm_area = NULL;
        int err = 0;
        u64 align;
-        u32 ctag_offset;
+        u32 ctag_offset = 0;
        bool clear_ctags = false;
        bool va_allocated = true;
@@ -746,6 +759,11 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
         */
        s16 map_key_kind;
+        /*
+         * The actual GMMU PTE kind
+         */
+        u8 pte_kind;
        if (vm->userspace_managed &&
            !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
                nvgpu_err(g,
@@ -835,57 +853,91 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
        if (!vm->enable_ctag)
                binfo.ctag_lines = 0;
-        gk20a_get_comptags(os_buf, &comptags);
+        if (binfo.ctag_lines) {
+                struct gk20a_comptags comptags = { 0 };
-        if (binfo.ctag_lines && !comptags.lines) {
                /*
-                 * Allocate compression resources if needed.
+                 * Get the comptags state
                 */
-                if (gk20a_alloc_comptags(g,
+                gk20a_get_comptags(os_buf, &comptags);
-                                         os_buf,
-                                         &g->gr.comp_tags,
-                                         binfo.ctag_lines)) {
-                        /*
-                         * Prevent compression...
-                         */
-                        binfo.compr_kind = NV_KIND_INVALID;
-                        /*
-                         * ... And make sure we have a fallback.
-                         */
-                        if (binfo.incompr_kind == NV_KIND_INVALID) {
-                                nvgpu_err(g, "comptag alloc failed and no "
-                                             "fallback kind specified");
-                                err = -ENOMEM;
+                /*
+                 * Allocate if not yet allocated
+                 */
+                if (!comptags.allocated) {
+                        err = gk20a_alloc_comptags(g, os_buf,
+                                                   &g->gr.comp_tags,
+                                                   binfo.ctag_lines);
+                        if (err) {
                                /*
-                                 * Any alloced comptags are cleaned up when the
+                                 * This is an irrecoverable failure and we need
-                                 * dmabuf is freed.
+                                 * to abort. In particular, it is not safe to
+                                 * proceed with incompressible fallback, since
+                                 * we could not mark our alloc failure
+                                 * anywere. Later we would retry allocation and
+                                 * break compressible map aliasing.
                                 */
+                                nvgpu_err(g,
+                                          "Error %d setting up comptags", err);
                                goto clean_up;
                        }
-                } else {
+                        /*
+                         * Refresh comptags state after alloc. Field
+                         * comptags.lines will be 0 if alloc failed.
+                         */
                        gk20a_get_comptags(os_buf, &comptags);
-                        if (g->ops.ltc.cbc_ctrl)
+                        /*
-                                g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+                         * Newly allocated comptags needs to be cleared
-                                              comptags.offset,
+                         */
-                                              comptags.offset +
+                        if (comptags.lines) {
-                                                  comptags.allocated_lines - 1);
+                                if (g->ops.ltc.cbc_ctrl)
-                        else
+                                        g->ops.ltc.cbc_ctrl(
-                                clear_ctags = true;
+                                                g, gk20a_cbc_op_clear,
+                                                comptags.offset,
+                                                (comptags.offset +
+                                                 comptags.lines - 1));
+                                else
+                                        /*
+                                         * The comptags will be cleared as part
+                                         * of mapping (vgpu)
+                                         */
+                                        clear_ctags = true;
+                        }
                }
+                /*
+                 * Store the ctag offset for later use if we got the comptags
+                 */
+                if (comptags.lines)
+                        ctag_offset = comptags.offset;
        }
        /*
-         * Calculate comptag index for this mapping. Differs in case of partial
+         * Figure out the kind and ctag offset for the GMMU page tables
-         * mapping.
         */
-        ctag_offset = comptags.offset;
+        if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) {
-        if (ctag_offset)
+                /*
+                 * Adjust the ctag_offset as per the buffer map offset
+                 */
                ctag_offset += phys_offset >>
-                               ilog2(g->ops.fb.compression_page_size(g));
+                        ilog2(g->ops.fb.compression_page_size(g));
+                pte_kind = binfo.compr_kind;
+        } else if (binfo.incompr_kind != NV_KIND_INVALID) {
+                /*
+                 * Incompressible kind, ctag offset will not be programmed
+                 */
+                ctag_offset = 0;
+                pte_kind = binfo.incompr_kind;
+        } else {
+                /*
+                 * Caller required compression, but we cannot provide it
+                 */
+                nvgpu_err(g, "No comptags and no incompressible fallback kind");
+                err = -ENOMEM;
+                goto clean_up;
+        }
        map_addr = g->ops.mm.gmmu_map(vm,
                                      map_addr,
@@ -893,8 +945,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
                                      phys_offset,
                                      map_size,
                                      binfo.pgsz_idx,
-                                      binfo.compr_kind != NV_KIND_INVALID ?
+                                      pte_kind,
-                                          binfo.compr_kind : binfo.incompr_kind,
                                      ctag_offset,
                                      flags,
                                      rw,
@@ -913,9 +964,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
        mapped_buffer->addr         = map_addr;
        mapped_buffer->size         = map_size;
        mapped_buffer->pgsz_idx     = binfo.pgsz_idx;
-        mapped_buffer->ctag_offset  = ctag_offset;
-        mapped_buffer->ctag_lines   = binfo.ctag_lines;
-        mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
        mapped_buffer->vm           = vm;
        mapped_buffer->flags        = flags;
        mapped_buffer->kind         = map_key_kind;
@@ -1074,8 +1122,8 @@ done:
        return;
 }
-int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
+static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
-                                 struct nvgpu_ctag_buffer_info *binfo)
+                                        struct nvgpu_ctag_buffer_info *binfo)
 {
        bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);
        struct gk20a *g = gk20a_from_vm(vm);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
index 5482d0ce..2d9f034a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
@@ -25,8 +25,14 @@ struct nvgpu_os_buffer;
 struct gk20a_comptags {
        u32 offset;
        u32 lines;
-        u32 allocated_lines;
-        bool user_mappable;
+        /*
+         * This signals whether allocation has been attempted. Observe
+         * 'lines' to see whether comptags were actually allocated. We
+         * try alloc only once per buffer in order not to break
+         * multiple compressible-kind mappings.
+         */
+        bool allocated;
 };
 struct gk20a_comptag_allocator {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index abb297ab..c0a4124c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -110,9 +110,6 @@ struct nvgpu_mapped_buf {
        u64 size;
        u32 pgsz_idx;
-        u32 ctag_offset;
-        u32 ctag_lines;
-        u32 ctag_allocated_lines;
        u32 flags;
        u32 kind;
@@ -143,17 +140,6 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
                  ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node));
 }
-struct nvgpu_ctag_buffer_info {
-        u64                     size;
-        enum gmmu_pgsz_gk20a    pgsz_idx;
-        u32                     flags;
-        s16                     compr_kind;
-        s16                     incompr_kind;
-        u32                     ctag_lines;
-};
 struct vm_gk20a {
        struct mm_gk20a *mm;
        struct gk20a_as_share *as_share; /* as_share this represents */
@@ -221,9 +207,6 @@ void nvgpu_vm_put(struct vm_gk20a *vm);
 int vm_aspace_id(struct vm_gk20a *vm);
 int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
-int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
-                                 struct nvgpu_ctag_buffer_info *binfo);
 /* batching eliminates redundant cache flushes and invalidates */
 void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
 void nvgpu_vm_mapping_batch_finish(

diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c index f55989f7..92e8aa3e 100644 --- a/drivers/gpu/nvgpu/common/linux/comptags.c +++ b/drivers/gpu/nvgpu/common/linux/comptags.c
@@ -46,7 +46,6 @@ int gk20a_alloc_comptags(struct gk20a *g,
46	{	46	{
47	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,	47	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
48	buf->dev);	48	buf->dev);
49	u32 ctaglines_allocsize;
50	u32 offset;	49	u32 offset;
51	int err;	50	int err;
52		51
@@ -56,18 +55,24 @@ int gk20a_alloc_comptags(struct gk20a *g,
56	if (!lines)	55	if (!lines)
57	return -EINVAL;	56	return -EINVAL;
58		57
59	ctaglines_allocsize = lines;
60
61	/* store the allocator so we can use it when we free the ctags */	58	/* store the allocator so we can use it when we free the ctags */
62	priv->comptag_allocator = allocator;	59	priv->comptag_allocator = allocator;
63	err = gk20a_comptaglines_alloc(allocator, &offset,	60	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
64	ctaglines_allocsize);	61	if (!err) {
65	if (err)	62	priv->comptags.offset = offset;
66	return err;	63	priv->comptags.lines = lines;
		64	} else {
		65	priv->comptags.offset = 0;
		66	priv->comptags.lines = 0;
		67	}
67		68
68	priv->comptags.offset = offset;	69	/*
69	priv->comptags.lines = lines;	70	* We don't report an error here if comptag alloc failed. The
70	priv->comptags.allocated_lines = ctaglines_allocsize;	71	* caller will simply fallback to incompressible kinds. It
		72	* would not be safe to re-allocate comptags anyways on
		73	* successive calls, as that would break map aliasing.
		74	*/
		75	priv->comptags.allocated = true;
71		76
72	return 0;	77	return 0;
73	}	78	}


diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c index 6b44ff55..08cf5f2b 100644 --- a/drivers/gpu/nvgpu/common/linux/dmabuf.c +++ b/drivers/gpu/nvgpu/common/linux/dmabuf.c
@@ -41,11 +41,11 @@ static void gk20a_mm_delete_priv(void *_priv)
41		41
42	g = priv->g;	42	g = priv->g;
43		43
44	if (priv->comptags.lines) {	44	if (priv->comptags.allocated && priv->comptags.lines) {
45	BUG_ON(!priv->comptag_allocator);	45	BUG_ON(!priv->comptag_allocator);
46	gk20a_comptaglines_free(priv->comptag_allocator,	46	gk20a_comptaglines_free(priv->comptag_allocator,
47	priv->comptags.offset,	47	priv->comptags.offset,
48	priv->comptags.allocated_lines);	48	priv->comptags.lines);
49	}	49	}
50		50
51	/* Free buffer states */	51	/* Free buffer states */


diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index d6d86c94..8e464627 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -132,7 +132,7 @@ struct nvgpu_mapped_buf nvgpu_vm_find_mapping(struct vm_gk20a vm,
132	nvgpu_log(g, gpu_dbg_map,	132	nvgpu_log(g, gpu_dbg_map,
133	"gv: 0x%04x_%08x + 0x%-7zu "	133	"gv: 0x%04x_%08x + 0x%-7zu "
134	"[dma: 0x%010llx, pa: 0x%010llx] "	134	"[dma: 0x%010llx, pa: 0x%010llx] "
135	"pgsz=%-3dKb as=%-2d ctags=%d start=%d "	135	"pgsz=%-3dKb as=%-2d "
136	"flags=0x%x apt=%s (reused)",	136	"flags=0x%x apt=%s (reused)",
137	u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),	137	u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
138	os_buf->dmabuf->size,	138	os_buf->dmabuf->size,
@@ -140,7 +140,6 @@ struct nvgpu_mapped_buf nvgpu_vm_find_mapping(struct vm_gk20a vm,
140	(u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),	140	(u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
141	vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,	141	vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
142	vm_aspace_id(vm),	142	vm_aspace_id(vm),
143	mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
144	mapped_buffer->flags,	143	mapped_buffer->flags,
145	nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));	144	nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
146		145


diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index cfac4f8e..be7e4207 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -39,6 +39,20 @@
39	#include "gk20a/gk20a.h"	39	#include "gk20a/gk20a.h"
40	#include "gk20a/mm_gk20a.h"	40	#include "gk20a/mm_gk20a.h"
41		41
		42	struct nvgpu_ctag_buffer_info {
		43	u64 size;
		44	enum gmmu_pgsz_gk20a pgsz_idx;
		45	u32 flags;
		46
		47	s16 compr_kind;
		48	s16 incompr_kind;
		49
		50	u32 ctag_lines;
		51	};
		52
		53	static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
		54	struct nvgpu_ctag_buffer_info *binfo);
		55
42	static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,	56	static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
43	struct vm_gk20a_mapping_batch *batch);	57	struct vm_gk20a_mapping_batch *batch);
44		58
@@ -731,11 +745,10 @@ struct nvgpu_mapped_buf nvgpu_vm_map(struct vm_gk20a vm,
731	struct gk20a *g = gk20a_from_vm(vm);	745	struct gk20a *g = gk20a_from_vm(vm);
732	struct nvgpu_mapped_buf *mapped_buffer = NULL;	746	struct nvgpu_mapped_buf *mapped_buffer = NULL;
733	struct nvgpu_ctag_buffer_info binfo = { 0 };	747	struct nvgpu_ctag_buffer_info binfo = { 0 };
734	struct gk20a_comptags comptags;
735	struct nvgpu_vm_area *vm_area = NULL;	748	struct nvgpu_vm_area *vm_area = NULL;
736	int err = 0;	749	int err = 0;
737	u64 align;	750	u64 align;
738	u32 ctag_offset;	751	u32 ctag_offset = 0;
739	bool clear_ctags = false;	752	bool clear_ctags = false;
740	bool va_allocated = true;	753	bool va_allocated = true;
741		754
@@ -746,6 +759,11 @@ struct nvgpu_mapped_buf nvgpu_vm_map(struct vm_gk20a vm,
746	*/	759	*/
747	s16 map_key_kind;	760	s16 map_key_kind;
748		761
		762	/*
		763	* The actual GMMU PTE kind
		764	*/
		765	u8 pte_kind;
		766
749	if (vm->userspace_managed &&	767	if (vm->userspace_managed &&
750	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {	768	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
751	nvgpu_err(g,	769	nvgpu_err(g,
@@ -835,57 +853,91 @@ struct nvgpu_mapped_buf nvgpu_vm_map(struct vm_gk20a vm,
835	if (!vm->enable_ctag)	853	if (!vm->enable_ctag)
836	binfo.ctag_lines = 0;	854	binfo.ctag_lines = 0;
837		855
838	gk20a_get_comptags(os_buf, &comptags);	856	if (binfo.ctag_lines) {
		857	struct gk20a_comptags comptags = { 0 };
839		858
840	if (binfo.ctag_lines && !comptags.lines) {
841	/*	859	/*
842	* Allocate compression resources if needed.	860	* Get the comptags state
843	*/	861	*/
844	if (gk20a_alloc_comptags(g,	862	gk20a_get_comptags(os_buf, &comptags);
845	os_buf,
846	&g->gr.comp_tags,
847	binfo.ctag_lines)) {
848
849	/*
850	* Prevent compression...
851	*/
852	binfo.compr_kind = NV_KIND_INVALID;
853
854	/*
855	* ... And make sure we have a fallback.
856	*/
857	if (binfo.incompr_kind == NV_KIND_INVALID) {
858	nvgpu_err(g, "comptag alloc failed and no "
859	"fallback kind specified");
860	err = -ENOMEM;
861		863
		864	/*
		865	* Allocate if not yet allocated
		866	*/
		867	if (!comptags.allocated) {
		868	err = gk20a_alloc_comptags(g, os_buf,
		869	&g->gr.comp_tags,
		870	binfo.ctag_lines);
		871	if (err) {
862	/*	872	/*
863	* Any alloced comptags are cleaned up when the	873	* This is an irrecoverable failure and we need
864	* dmabuf is freed.	874	* to abort. In particular, it is not safe to
		875	* proceed with incompressible fallback, since
		876	* we could not mark our alloc failure
		877	* anywere. Later we would retry allocation and
		878	* break compressible map aliasing.
865	*/	879	*/
		880	nvgpu_err(g,
		881	"Error %d setting up comptags", err);
866	goto clean_up;	882	goto clean_up;
867	}	883	}
868	} else {	884
		885	/*
		886	* Refresh comptags state after alloc. Field
		887	* comptags.lines will be 0 if alloc failed.
		888	*/
869	gk20a_get_comptags(os_buf, &comptags);	889	gk20a_get_comptags(os_buf, &comptags);
870		890
871	if (g->ops.ltc.cbc_ctrl)	891	/*
872	g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,	892	* Newly allocated comptags needs to be cleared
873	comptags.offset,	893	*/
874	comptags.offset +	894	if (comptags.lines) {
875	comptags.allocated_lines - 1);	895	if (g->ops.ltc.cbc_ctrl)
876	else	896	g->ops.ltc.cbc_ctrl(
877	clear_ctags = true;	897	g, gk20a_cbc_op_clear,
		898	comptags.offset,
		899	(comptags.offset +
		900	comptags.lines - 1));
		901	else
		902	/*
		903	* The comptags will be cleared as part
		904	* of mapping (vgpu)
		905	*/
		906	clear_ctags = true;
		907	}
878	}	908	}
		909
		910	/*
		911	* Store the ctag offset for later use if we got the comptags
		912	*/
		913	if (comptags.lines)
		914	ctag_offset = comptags.offset;
879	}	915	}
880		916
881	/*	917	/*
882	* Calculate comptag index for this mapping. Differs in case of partial	918	* Figure out the kind and ctag offset for the GMMU page tables
883	* mapping.
884	*/	919	*/
885	ctag_offset = comptags.offset;	920	if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) {
886	if (ctag_offset)	921	/*
		922	* Adjust the ctag_offset as per the buffer map offset
		923	*/
887	ctag_offset += phys_offset >>	924	ctag_offset += phys_offset >>
888	ilog2(g->ops.fb.compression_page_size(g));	925	ilog2(g->ops.fb.compression_page_size(g));
		926	pte_kind = binfo.compr_kind;
		927	} else if (binfo.incompr_kind != NV_KIND_INVALID) {
		928	/*
		929	* Incompressible kind, ctag offset will not be programmed
		930	*/
		931	ctag_offset = 0;
		932	pte_kind = binfo.incompr_kind;
		933	} else {
		934	/*
		935	* Caller required compression, but we cannot provide it
		936	*/
		937	nvgpu_err(g, "No comptags and no incompressible fallback kind");
		938	err = -ENOMEM;
		939	goto clean_up;
		940	}
889		941
890	map_addr = g->ops.mm.gmmu_map(vm,	942	map_addr = g->ops.mm.gmmu_map(vm,
891	map_addr,	943	map_addr,
@@ -893,8 +945,7 @@ struct nvgpu_mapped_buf nvgpu_vm_map(struct vm_gk20a vm,
893	phys_offset,	945	phys_offset,
894	map_size,	946	map_size,
895	binfo.pgsz_idx,	947	binfo.pgsz_idx,
896	binfo.compr_kind != NV_KIND_INVALID ?	948	pte_kind,
897	binfo.compr_kind : binfo.incompr_kind,
898	ctag_offset,	949	ctag_offset,
899	flags,	950	flags,
900	rw,	951	rw,
@@ -913,9 +964,6 @@ struct nvgpu_mapped_buf nvgpu_vm_map(struct vm_gk20a vm,
913	mapped_buffer->addr = map_addr;	964	mapped_buffer->addr = map_addr;
914	mapped_buffer->size = map_size;	965	mapped_buffer->size = map_size;
915	mapped_buffer->pgsz_idx = binfo.pgsz_idx;	966	mapped_buffer->pgsz_idx = binfo.pgsz_idx;
916	mapped_buffer->ctag_offset = ctag_offset;
917	mapped_buffer->ctag_lines = binfo.ctag_lines;
918	mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
919	mapped_buffer->vm = vm;	967	mapped_buffer->vm = vm;
920	mapped_buffer->flags = flags;	968	mapped_buffer->flags = flags;
921	mapped_buffer->kind = map_key_kind;	969	mapped_buffer->kind = map_key_kind;
@@ -1074,8 +1122,8 @@ done:
1074	return;	1122	return;
1075	}	1123	}
1076		1124
1077	int nvgpu_vm_compute_compression(struct vm_gk20a *vm,	1125	static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
1078	struct nvgpu_ctag_buffer_info *binfo)	1126	struct nvgpu_ctag_buffer_info *binfo)
1079	{	1127	{
1080	bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);	1128	bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);
1081	struct gk20a *g = gk20a_from_vm(vm);	1129	struct gk20a *g = gk20a_from_vm(vm);


diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h index 5482d0ce..2d9f034a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h +++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
@@ -25,8 +25,14 @@ struct nvgpu_os_buffer;
25	struct gk20a_comptags {	25	struct gk20a_comptags {
26	u32 offset;	26	u32 offset;
27	u32 lines;	27	u32 lines;
28	u32 allocated_lines;	28
29	bool user_mappable;	29	/*
		30	* This signals whether allocation has been attempted. Observe
		31	* 'lines' to see whether comptags were actually allocated. We
		32	* try alloc only once per buffer in order not to break
		33	* multiple compressible-kind mappings.
		34	*/
		35	bool allocated;
30	};	36	};
31		37
32	struct gk20a_comptag_allocator {	38	struct gk20a_comptag_allocator {


diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index abb297ab..c0a4124c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -110,9 +110,6 @@ struct nvgpu_mapped_buf {
110	u64 size;	110	u64 size;
111		111
112	u32 pgsz_idx;	112	u32 pgsz_idx;
113	u32 ctag_offset;
114	u32 ctag_lines;
115	u32 ctag_allocated_lines;
116		113
117	u32 flags;	114	u32 flags;
118	u32 kind;	115	u32 kind;
@@ -143,17 +140,6 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
143	((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node));	140	((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node));
144	}	141	}
145		142
146	struct nvgpu_ctag_buffer_info {
147	u64 size;
148	enum gmmu_pgsz_gk20a pgsz_idx;
149	u32 flags;
150
151	s16 compr_kind;
152	s16 incompr_kind;
153
154	u32 ctag_lines;
155	};
156
157	struct vm_gk20a {	143	struct vm_gk20a {
158	struct mm_gk20a *mm;	144	struct mm_gk20a *mm;
159	struct gk20a_as_share as_share; / as_share this represents */	145	struct gk20a_as_share as_share; / as_share this represents */
@@ -221,9 +207,6 @@ void nvgpu_vm_put(struct vm_gk20a *vm);
221	int vm_aspace_id(struct vm_gk20a *vm);	207	int vm_aspace_id(struct vm_gk20a *vm);
222	int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);	208	int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
223		209
224	int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
225	struct nvgpu_ctag_buffer_info *binfo);
226
227	/* batching eliminates redundant cache flushes and invalidates */	210	/* batching eliminates redundant cache flushes and invalidates */
228	void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);	211	void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
229	void nvgpu_vm_mapping_batch_finish(	212	void nvgpu_vm_mapping_batch_finish(