7 files changed, 169 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
index 5b0fb910..0c52271a 100644
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -975,7 +975,7 @@ __releases(&l->cde_app->mutex)
        u64 big_page_mask = 0;
        u32 flags;
        int err, i;
-        const s32 compbits_kind = 0;
+        const s16 compbits_kind = 0;
        gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
                  compbits_byte_offset, scatterbuffer_byte_offset);
@@ -1038,8 +1038,11 @@ __releases(&l->cde_app->mutex)
        /* map the destination buffer */
        get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
        map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
-                                 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+                                 NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE |
-                                 compbits_kind, true,
+                                 NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL,
+                                 NV_KIND_INVALID,
+                                 compbits_kind, /* incompressible kind */
+                                 true,
                                 gk20a_mem_flag_none,
                                 map_offset, map_size,
                                 NULL);
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
index d4242955..cfc4e7ef 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -79,11 +79,22 @@ static int gk20a_as_ioctl_map_buffer_ex(
                struct gk20a_as_share *as_share,
                struct nvgpu_as_map_buffer_ex_args *args)
 {
+        s16 compressible_kind;
+        s16 incompressible_kind;
        gk20a_dbg_fn("");
+        if (args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
+                compressible_kind = args->compr_kind;
+                incompressible_kind = args->incompr_kind;
+        } else {
+                compressible_kind = args->kind;
+                incompressible_kind = NV_KIND_INVALID;
+        }
        return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
                                   &args->offset, args->flags,
-                                   args->kind,
+                                   compressible_kind, incompressible_kind,
                                   args->buffer_offset,
                                   args->mapping_size,
                                   NULL);
@@ -97,6 +108,7 @@ static int gk20a_as_ioctl_map_buffer(
        return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
                                   &args->o_a.offset,
                                   args->flags, NV_KIND_DEFAULT,
+                                   NV_KIND_DEFAULT,
                                   0, 0, NULL);
        /* args->o_a.offset will be set if !err */
 }
@@ -158,6 +170,9 @@ static int gk20a_as_ioctl_map_buffer_batch(
        }
        for (i = 0; i < args->num_maps; ++i) {
+                s16 compressible_kind;
+                s16 incompressible_kind;
                struct nvgpu_as_map_buffer_ex_args map_args;
                memset(&map_args, 0, sizeof(map_args));
@@ -167,10 +182,19 @@ static int gk20a_as_ioctl_map_buffer_batch(
                        break;
                }
+                if (map_args.flags &
+                    NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
+                        compressible_kind = map_args.compr_kind;
+                        incompressible_kind = map_args.incompr_kind;
+                } else {
+                        compressible_kind = map_args.kind;
+                        incompressible_kind = NV_KIND_INVALID;
+                }
                err = nvgpu_vm_map_buffer(
                        as_share->vm, map_args.dmabuf_fd,
                        &map_args.offset, map_args.flags,
-                        map_args.kind,
+                        compressible_kind, incompressible_kind,
                        map_args.buffer_offset,
                        map_args.mapping_size,
                        &batch);
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 58e2da13..86d8bec9 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -177,11 +177,46 @@ static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
        return mapped_buffer->addr;
 }
+static int setup_bfr_kind_fields(struct buffer_attrs *bfr, s16 compr_kind,
+                                 s16 incompr_kind, u32 flags)
+{
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
+                /* were we supplied with a kind in either parameter? */
+                if ((compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE) &&
+                    (incompr_kind < 0 || incompr_kind >= NV_KIND_ATTR_SIZE))
+                        return -EINVAL;
+                if (compr_kind != NV_KIND_INVALID) {
+                        bfr->use_kind_v = true;
+                        bfr->kind_v = (u8)compr_kind;
+                }
+                if (incompr_kind != NV_KIND_INVALID) {
+                        bfr->use_uc_kind_v = true;
+                        bfr->uc_kind_v = (u8)incompr_kind;
+                }
+        } else {
+                if (compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE)
+                        return -EINVAL;
+                bfr->use_kind_v = true;
+                bfr->kind_v = (u8)compr_kind;
+                /*
+                 * Note: setup_buffer_kind_and_compression() will
+                 * figure out uc_kind_v or return an error
+                 */
+        }
+        return 0;
+}
 u64 nvgpu_vm_map(struct vm_gk20a *vm,
                 struct dma_buf *dmabuf,
                 u64 offset_align,
                 u32 flags,
-                 int kind,
+                 s16 compr_kind,
+                 s16 incompr_kind,
                 bool user_mapped,
                 int rw_flag,
                 u64 buffer_offset,
@@ -203,6 +238,22 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        u32 ctag_offset;
        enum nvgpu_aperture aperture;
+        /*
+         * The kind used as part of the key for map caching. HW may
+         * actually be programmed with the fallback kind in case the
+         * key kind is compressible but we're out of comptags.
+         */
+        s16 map_key_kind;
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
+                if (compr_kind != NV_KIND_INVALID)
+                        map_key_kind = compr_kind;
+                else
+                        map_key_kind = incompr_kind;
+        } else {
+                map_key_kind = compr_kind;
+        }
        if (user_mapped && vm->userspace_managed &&
            !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
                nvgpu_err(g, "non-fixed-offset mapping not available on "
@@ -216,7 +267,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        if (!vm->userspace_managed) {
                map_offset = __nvgpu_vm_find_mapping(
                        vm, dmabuf, offset_align,
-                        flags, kind,
+                        flags, map_key_kind,
                        user_mapped, rw_flag);
                if (map_offset) {
                        nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -239,12 +290,10 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
                goto clean_up;
        }
-        if (kind >= NV_KIND_ATTR_SIZE) {
+        err = setup_bfr_kind_fields(&bfr, compr_kind, incompr_kind, flags);
-                err = -EINVAL;
+        if (err)
                goto clean_up;
-        } else {
-                bfr.kind_v = (u8)kind;
-        }
        bfr.size = dmabuf->size;
        sgl = bfr.sgt->sgl;
@@ -306,10 +355,15 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
                err = gk20a_alloc_comptags(g, dev, dmabuf,
                                           ctag_allocator,
                                           bfr.ctag_lines);
-                if (err) {
+                if (unlikely(err)) {
-                        /* ok to fall back here if we ran out */
                        /* TBD: we can partially alloc ctags as well... */
-                        bfr.kind_v = bfr.uc_kind_v;
+                        if (bfr.use_uc_kind_v) {
+                                /* no comptags, but fallback kind available */
+                                bfr.kind_v = bfr.uc_kind_v;
+                        } else {
+                                nvgpu_err(g, "comptag alloc failed and no fallback kind specified");
+                                goto clean_up;
+                        }
                } else {
                        gk20a_get_comptags(dev,
                                           dmabuf, &comptags);
@@ -371,7 +425,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
        mapped_buffer->vm          = vm;
        mapped_buffer->flags       = flags;
-        mapped_buffer->kind        = kind;
+        mapped_buffer->kind        = map_key_kind;
        mapped_buffer->va_allocated = va_allocated;
        mapped_buffer->user_mapped = user_mapped ? 1 : 0;
        mapped_buffer->own_mem_ref = user_mapped;
diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h
index 4f6b10bb..1eadf1d0 100644
--- a/drivers/gpu/nvgpu/common/linux/vm_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h
@@ -34,7 +34,9 @@ struct buffer_attrs {
        u32 ctag_allocated_lines;
        int pgsz_idx;
        u8 kind_v;
+        bool use_kind_v;
        u8 uc_kind_v;
+        bool use_uc_kind_v;
        bool ctag_user_mappable;
 };
@@ -42,19 +44,43 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
                 struct dma_buf *dmabuf,
                 u64 offset_align,
                 u32 flags,
-                 int kind,
+                 /*
+                  * compressible kind if
+                  * NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is
+                  * specified, otherwise just the kind
+                  */
+                 s16 compr_kind,
+                 /*
+                  * incompressible kind if
+                  * NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is
+                  * specified, otherwise ignored
+                  */
+                 s16 incompr_kind,
                 bool user_mapped,
                 int rw_flag,
                 u64 buffer_offset,
                 u64 mapping_size,
                 struct vm_gk20a_mapping_batch *mapping_batch);
-/* Note: batch may be NULL if map op is not part of a batch */
+/*
+ * Notes:
+ * - Batch may be NULL if map op is not part of a batch.
+ * - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is set,
+ *   compr_kind and incompr_kind work as explained in nvgpu.h.
+ * - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is NOT set,
+ *   compr_kind holds the kind and kernel will figure out whether
+ *   it is a compressible or incompressible kind. If compressible, kernel will
+ *   also figure out the incompressible counterpart or return an error.
+ */
 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
                        int dmabuf_fd,
                        u64 *offset_align,
                        u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
-                        int kind,
+                        s16 compr_kind,
+                        s16 incompr_kind,
                        u64 buffer_offset,
                        u64 mapping_size,
                        struct vm_gk20a_mapping_batch *batch);
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 19433df9..2b37a62a 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1929,6 +1929,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
                        0,
                        0,
                        0,
+                        0,
                        args->mapping_size,
                        NULL);
        if (err)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 455fa238..e9948c16 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -411,7 +411,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
                        gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
        }
-        gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS;
+        gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS |
+                NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL;
        if (IS_ENABLED(CONFIG_SYNC))
                gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a6507d2d..97b7aa80 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -822,14 +822,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
-int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
+static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr,
-                                      u32 flags,
+                             bool *pkind_compressible)
-                                      struct buffer_attrs *bfr,
-                                      enum gmmu_pgsz_gk20a pgsz_idx)
 {
-        bool kind_compressible;
        struct gk20a *g = gk20a_from_vm(vm);
-        int ctag_granularity = g->ops.fb.compression_page_size(g);
+        bool kind_compressible;
        if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
                bfr->kind_v = gmmu_pte_kind_pitch_v();
@@ -840,7 +837,7 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
        }
        bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
-        /* find a suitable uncompressed kind if it becomes necessary later */
+        /* find a suitable incompressible kind if it becomes necessary later */
        kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
        if (kind_compressible) {
                bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
@@ -852,6 +849,36 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
                        return -EINVAL;
                }
        }
+        *pkind_compressible = kind_compressible;
+        return 0;
+}
+int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
+                                      u32 flags,
+                                      struct buffer_attrs *bfr,
+                                      enum gmmu_pgsz_gk20a pgsz_idx)
+{
+        bool kind_compressible;
+        struct gk20a *g = gk20a_from_vm(vm);
+        int ctag_granularity = g->ops.fb.compression_page_size(g);
+        if (!bfr->use_kind_v)
+                bfr->kind_v = gmmu_pte_kind_invalid_v();
+        if (!bfr->use_uc_kind_v)
+                bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
+                kind_compressible = (bfr->kind_v != gmmu_pte_kind_invalid_v());
+                if (!kind_compressible)
+                        bfr->kind_v = bfr->uc_kind_v;
+        } else {
+                int err = setup_kind_legacy(vm, bfr, &kind_compressible);
+                if (err)
+                        return err;
+        }
        /* comptags only supported for suitable kinds, 128KB pagesize */
        if (kind_compressible &&
            vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) {
@@ -865,6 +892,9 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
        else
                bfr->ctag_lines = 0;
+        bfr->use_kind_v = (bfr->kind_v != gmmu_pte_kind_invalid_v());
+        bfr->use_uc_kind_v = (bfr->uc_kind_v != gmmu_pte_kind_invalid_v());
        return 0;
 }
@@ -1649,7 +1679,8 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
                        int dmabuf_fd,
                        u64 *offset_align,
                        u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
-                        int kind,
+                        s16 compr_kind,
+                        s16 incompr_kind,
                        u64 buffer_offset,
                        u64 mapping_size,
                        struct vm_gk20a_mapping_batch *batch)
@@ -1690,7 +1721,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
        }
        ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
-                        flags, kind, true,
+                        flags, compr_kind, incompr_kind, true,
                        gk20a_mem_flag_none,
                        buffer_offset,
                        mapping_size,

diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c index 5b0fb910..0c52271a 100644 --- a/drivers/gpu/nvgpu/common/linux/cde.c +++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -975,7 +975,7 @@ __releases(&l->cde_app->mutex)
975	u64 big_page_mask = 0;	975	u64 big_page_mask = 0;
976	u32 flags;	976	u32 flags;
977	int err, i;	977	int err, i;
978	const s32 compbits_kind = 0;	978	const s16 compbits_kind = 0;
979		979
980	gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",	980	gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
981	compbits_byte_offset, scatterbuffer_byte_offset);	981	compbits_byte_offset, scatterbuffer_byte_offset);
@@ -1038,8 +1038,11 @@ __releases(&l->cde_app->mutex)
1038	/* map the destination buffer */	1038	/* map the destination buffer */
1039	get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */	1039	get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
1040	map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,	1040	map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
1041	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,	1041	NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE \|
1042	compbits_kind, true,	1042	NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL,
		1043	NV_KIND_INVALID,
		1044	compbits_kind, /* incompressible kind */
		1045	true,
1043	gk20a_mem_flag_none,	1046	gk20a_mem_flag_none,
1044	map_offset, map_size,	1047	map_offset, map_size,
1045	NULL);	1048	NULL);


diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c index d4242955..cfc4e7ef 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -79,11 +79,22 @@ static int gk20a_as_ioctl_map_buffer_ex(
79	struct gk20a_as_share *as_share,	79	struct gk20a_as_share *as_share,
80	struct nvgpu_as_map_buffer_ex_args *args)	80	struct nvgpu_as_map_buffer_ex_args *args)
81	{	81	{
		82	s16 compressible_kind;
		83	s16 incompressible_kind;
		84
82	gk20a_dbg_fn("");	85	gk20a_dbg_fn("");
83		86
		87	if (args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
		88	compressible_kind = args->compr_kind;
		89	incompressible_kind = args->incompr_kind;
		90	} else {
		91	compressible_kind = args->kind;
		92	incompressible_kind = NV_KIND_INVALID;
		93	}
		94
84	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,	95	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
85	&args->offset, args->flags,	96	&args->offset, args->flags,
86	args->kind,	97	compressible_kind, incompressible_kind,
87	args->buffer_offset,	98	args->buffer_offset,
88	args->mapping_size,	99	args->mapping_size,
89	NULL);	100	NULL);
@@ -97,6 +108,7 @@ static int gk20a_as_ioctl_map_buffer(
97	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,	108	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
98	&args->o_a.offset,	109	&args->o_a.offset,
99	args->flags, NV_KIND_DEFAULT,	110	args->flags, NV_KIND_DEFAULT,
		111	NV_KIND_DEFAULT,
100	0, 0, NULL);	112	0, 0, NULL);
101	/* args->o_a.offset will be set if !err */	113	/* args->o_a.offset will be set if !err */
102	}	114	}
@@ -158,6 +170,9 @@ static int gk20a_as_ioctl_map_buffer_batch(
158	}	170	}
159		171
160	for (i = 0; i < args->num_maps; ++i) {	172	for (i = 0; i < args->num_maps; ++i) {
		173	s16 compressible_kind;
		174	s16 incompressible_kind;
		175
161	struct nvgpu_as_map_buffer_ex_args map_args;	176	struct nvgpu_as_map_buffer_ex_args map_args;
162	memset(&map_args, 0, sizeof(map_args));	177	memset(&map_args, 0, sizeof(map_args));
163		178
@@ -167,10 +182,19 @@ static int gk20a_as_ioctl_map_buffer_batch(
167	break;	182	break;
168	}	183	}
169		184
		185	if (map_args.flags &
		186	NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
		187	compressible_kind = map_args.compr_kind;
		188	incompressible_kind = map_args.incompr_kind;
		189	} else {
		190	compressible_kind = map_args.kind;
		191	incompressible_kind = NV_KIND_INVALID;
		192	}
		193
170	err = nvgpu_vm_map_buffer(	194	err = nvgpu_vm_map_buffer(
171	as_share->vm, map_args.dmabuf_fd,	195	as_share->vm, map_args.dmabuf_fd,
172	&map_args.offset, map_args.flags,	196	&map_args.offset, map_args.flags,
173	map_args.kind,	197	compressible_kind, incompressible_kind,
174	map_args.buffer_offset,	198	map_args.buffer_offset,
175	map_args.mapping_size,	199	map_args.mapping_size,
176	&batch);	200	&batch);


diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 58e2da13..86d8bec9 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -177,11 +177,46 @@ static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
177	return mapped_buffer->addr;	177	return mapped_buffer->addr;
178	}	178	}
179		179
		180	static int setup_bfr_kind_fields(struct buffer_attrs *bfr, s16 compr_kind,
		181	s16 incompr_kind, u32 flags)
		182	{
		183	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
		184	/* were we supplied with a kind in either parameter? */
		185	if ((compr_kind < 0 \|\| compr_kind >= NV_KIND_ATTR_SIZE) &&
		186	(incompr_kind < 0 \|\| incompr_kind >= NV_KIND_ATTR_SIZE))
		187	return -EINVAL;
		188
		189	if (compr_kind != NV_KIND_INVALID) {
		190	bfr->use_kind_v = true;
		191	bfr->kind_v = (u8)compr_kind;
		192	}
		193
		194	if (incompr_kind != NV_KIND_INVALID) {
		195	bfr->use_uc_kind_v = true;
		196	bfr->uc_kind_v = (u8)incompr_kind;
		197	}
		198	} else {
		199	if (compr_kind < 0 \|\| compr_kind >= NV_KIND_ATTR_SIZE)
		200	return -EINVAL;
		201
		202	bfr->use_kind_v = true;
		203	bfr->kind_v = (u8)compr_kind;
		204
		205	/*
		206	* Note: setup_buffer_kind_and_compression() will
		207	* figure out uc_kind_v or return an error
		208	*/
		209	}
		210
		211	return 0;
		212	}
		213
180	u64 nvgpu_vm_map(struct vm_gk20a *vm,	214	u64 nvgpu_vm_map(struct vm_gk20a *vm,
181	struct dma_buf *dmabuf,	215	struct dma_buf *dmabuf,
182	u64 offset_align,	216	u64 offset_align,
183	u32 flags,	217	u32 flags,
184	int kind,	218	s16 compr_kind,
		219	s16 incompr_kind,
185	bool user_mapped,	220	bool user_mapped,
186	int rw_flag,	221	int rw_flag,
187	u64 buffer_offset,	222	u64 buffer_offset,
@@ -203,6 +238,22 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
203	u32 ctag_offset;	238	u32 ctag_offset;
204	enum nvgpu_aperture aperture;	239	enum nvgpu_aperture aperture;
205		240
		241	/*
		242	* The kind used as part of the key for map caching. HW may
		243	* actually be programmed with the fallback kind in case the
		244	* key kind is compressible but we're out of comptags.
		245	*/
		246	s16 map_key_kind;
		247
		248	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
		249	if (compr_kind != NV_KIND_INVALID)
		250	map_key_kind = compr_kind;
		251	else
		252	map_key_kind = incompr_kind;
		253	} else {
		254	map_key_kind = compr_kind;
		255	}
		256
206	if (user_mapped && vm->userspace_managed &&	257	if (user_mapped && vm->userspace_managed &&
207	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {	258	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
208	nvgpu_err(g, "non-fixed-offset mapping not available on "	259	nvgpu_err(g, "non-fixed-offset mapping not available on "
@@ -216,7 +267,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
216	if (!vm->userspace_managed) {	267	if (!vm->userspace_managed) {
217	map_offset = __nvgpu_vm_find_mapping(	268	map_offset = __nvgpu_vm_find_mapping(
218	vm, dmabuf, offset_align,	269	vm, dmabuf, offset_align,
219	flags, kind,	270	flags, map_key_kind,
220	user_mapped, rw_flag);	271	user_mapped, rw_flag);
221	if (map_offset) {	272	if (map_offset) {
222	nvgpu_mutex_release(&vm->update_gmmu_lock);	273	nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -239,12 +290,10 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
239	goto clean_up;	290	goto clean_up;
240	}	291	}
241		292
242	if (kind >= NV_KIND_ATTR_SIZE) {	293	err = setup_bfr_kind_fields(&bfr, compr_kind, incompr_kind, flags);
243	err = -EINVAL;	294	if (err)
244	goto clean_up;	295	goto clean_up;
245	} else {	296
246	bfr.kind_v = (u8)kind;
247	}
248	bfr.size = dmabuf->size;	297	bfr.size = dmabuf->size;
249	sgl = bfr.sgt->sgl;	298	sgl = bfr.sgt->sgl;
250		299
@@ -306,10 +355,15 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
306	err = gk20a_alloc_comptags(g, dev, dmabuf,	355	err = gk20a_alloc_comptags(g, dev, dmabuf,
307	ctag_allocator,	356	ctag_allocator,
308	bfr.ctag_lines);	357	bfr.ctag_lines);
309	if (err) {	358	if (unlikely(err)) {
310	/* ok to fall back here if we ran out */
311	/* TBD: we can partially alloc ctags as well... */	359	/* TBD: we can partially alloc ctags as well... */
312	bfr.kind_v = bfr.uc_kind_v;	360	if (bfr.use_uc_kind_v) {
		361	/* no comptags, but fallback kind available */
		362	bfr.kind_v = bfr.uc_kind_v;
		363	} else {
		364	nvgpu_err(g, "comptag alloc failed and no fallback kind specified");
		365	goto clean_up;
		366	}
313	} else {	367	} else {
314	gk20a_get_comptags(dev,	368	gk20a_get_comptags(dev,
315	dmabuf, &comptags);	369	dmabuf, &comptags);
@@ -371,7 +425,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
371	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;	425	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
372	mapped_buffer->vm = vm;	426	mapped_buffer->vm = vm;
373	mapped_buffer->flags = flags;	427	mapped_buffer->flags = flags;
374	mapped_buffer->kind = kind;	428	mapped_buffer->kind = map_key_kind;
375	mapped_buffer->va_allocated = va_allocated;	429	mapped_buffer->va_allocated = va_allocated;
376	mapped_buffer->user_mapped = user_mapped ? 1 : 0;	430	mapped_buffer->user_mapped = user_mapped ? 1 : 0;
377	mapped_buffer->own_mem_ref = user_mapped;	431	mapped_buffer->own_mem_ref = user_mapped;


diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h index 4f6b10bb..1eadf1d0 100644 --- a/drivers/gpu/nvgpu/common/linux/vm_priv.h +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h
@@ -34,7 +34,9 @@ struct buffer_attrs {
34	u32 ctag_allocated_lines;	34	u32 ctag_allocated_lines;
35	int pgsz_idx;	35	int pgsz_idx;
36	u8 kind_v;	36	u8 kind_v;
		37	bool use_kind_v;
37	u8 uc_kind_v;	38	u8 uc_kind_v;
		39	bool use_uc_kind_v;
38	bool ctag_user_mappable;	40	bool ctag_user_mappable;
39	};	41	};
40		42
@@ -42,19 +44,43 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
42	struct dma_buf *dmabuf,	44	struct dma_buf *dmabuf,
43	u64 offset_align,	45	u64 offset_align,
44	u32 flags,	46	u32 flags,
45	int kind,	47
		48	/*
		49	* compressible kind if
		50	* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is
		51	* specified, otherwise just the kind
		52	*/
		53	s16 compr_kind,
		54
		55	/*
		56	* incompressible kind if
		57	* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is
		58	* specified, otherwise ignored
		59	*/
		60	s16 incompr_kind,
		61
46	bool user_mapped,	62	bool user_mapped,
47	int rw_flag,	63	int rw_flag,
48	u64 buffer_offset,	64	u64 buffer_offset,
49	u64 mapping_size,	65	u64 mapping_size,
50	struct vm_gk20a_mapping_batch *mapping_batch);	66	struct vm_gk20a_mapping_batch *mapping_batch);
51		67
52	/* Note: batch may be NULL if map op is not part of a batch */	68	/*
		69	* Notes:
		70	* - Batch may be NULL if map op is not part of a batch.
		71	* - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is set,
		72	* compr_kind and incompr_kind work as explained in nvgpu.h.
		73	* - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is NOT set,
		74	* compr_kind holds the kind and kernel will figure out whether
		75	* it is a compressible or incompressible kind. If compressible, kernel will
		76	* also figure out the incompressible counterpart or return an error.
		77	*/
53	int nvgpu_vm_map_buffer(struct vm_gk20a *vm,	78	int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
54	int dmabuf_fd,	79	int dmabuf_fd,
55	u64 *offset_align,	80	u64 *offset_align,
56	u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */	81	u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
57	int kind,	82	s16 compr_kind,
		83	s16 incompr_kind,
58	u64 buffer_offset,	84	u64 buffer_offset,
59	u64 mapping_size,	85	u64 mapping_size,
60	struct vm_gk20a_mapping_batch *batch);	86	struct vm_gk20a_mapping_batch *batch);


diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 19433df9..2b37a62a 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1929,6 +1929,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1929	0,	1929	0,
1930	0,	1930	0,
1931	0,	1931	0,
		1932	0,
1932	args->mapping_size,	1933	args->mapping_size,
1933	NULL);	1934	NULL);
1934	if (err)	1935	if (err)


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 455fa238..e9948c16 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -411,7 +411,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
411	gpu->available_big_page_sizes \|= g->ops.mm.get_big_page_sizes();	411	gpu->available_big_page_sizes \|= g->ops.mm.get_big_page_sizes();
412	}	412	}
413		413
414	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS;	414	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS \|
		415	NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL;
415		416
416	if (IS_ENABLED(CONFIG_SYNC))	417	if (IS_ENABLED(CONFIG_SYNC))
417	gpu->flags \|= NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;	418	gpu->flags \|= NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a6507d2d..97b7aa80 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -822,14 +822,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
822	nvgpu_mutex_release(&vm->update_gmmu_lock);	822	nvgpu_mutex_release(&vm->update_gmmu_lock);
823	}	823	}
824		824
825	int setup_buffer_kind_and_compression(struct vm_gk20a *vm,	825	static int setup_kind_legacy(struct vm_gk20a vm, struct buffer_attrs bfr,
826	u32 flags,	826	bool *pkind_compressible)
827	struct buffer_attrs *bfr,
828	enum gmmu_pgsz_gk20a pgsz_idx)
829	{	827	{
830	bool kind_compressible;
831	struct gk20a *g = gk20a_from_vm(vm);	828	struct gk20a *g = gk20a_from_vm(vm);
832	int ctag_granularity = g->ops.fb.compression_page_size(g);	829	bool kind_compressible;
833		830
834	if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))	831	if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
835	bfr->kind_v = gmmu_pte_kind_pitch_v();	832	bfr->kind_v = gmmu_pte_kind_pitch_v();
@@ -840,7 +837,7 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
840	}	837	}
841		838
842	bfr->uc_kind_v = gmmu_pte_kind_invalid_v();	839	bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
843	/* find a suitable uncompressed kind if it becomes necessary later */	840	/* find a suitable incompressible kind if it becomes necessary later */
844	kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);	841	kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
845	if (kind_compressible) {	842	if (kind_compressible) {
846	bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);	843	bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
@@ -852,6 +849,36 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
852	return -EINVAL;	849	return -EINVAL;
853	}	850	}
854	}	851	}
		852
		853	*pkind_compressible = kind_compressible;
		854	return 0;
		855	}
		856
		857	int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
		858	u32 flags,
		859	struct buffer_attrs *bfr,
		860	enum gmmu_pgsz_gk20a pgsz_idx)
		861	{
		862	bool kind_compressible;
		863	struct gk20a *g = gk20a_from_vm(vm);
		864	int ctag_granularity = g->ops.fb.compression_page_size(g);
		865
		866	if (!bfr->use_kind_v)
		867	bfr->kind_v = gmmu_pte_kind_invalid_v();
		868	if (!bfr->use_uc_kind_v)
		869	bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
		870
		871	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
		872	kind_compressible = (bfr->kind_v != gmmu_pte_kind_invalid_v());
		873	if (!kind_compressible)
		874	bfr->kind_v = bfr->uc_kind_v;
		875	} else {
		876	int err = setup_kind_legacy(vm, bfr, &kind_compressible);
		877
		878	if (err)
		879	return err;
		880	}
		881
855	/* comptags only supported for suitable kinds, 128KB pagesize */	882	/* comptags only supported for suitable kinds, 128KB pagesize */
856	if (kind_compressible &&	883	if (kind_compressible &&
857	vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) {	884	vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) {
@@ -865,6 +892,9 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
865	else	892	else
866	bfr->ctag_lines = 0;	893	bfr->ctag_lines = 0;
867		894
		895	bfr->use_kind_v = (bfr->kind_v != gmmu_pte_kind_invalid_v());
		896	bfr->use_uc_kind_v = (bfr->uc_kind_v != gmmu_pte_kind_invalid_v());
		897
868	return 0;	898	return 0;
869	}	899	}
870		900
@@ -1649,7 +1679,8 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
1649	int dmabuf_fd,	1679	int dmabuf_fd,
1650	u64 *offset_align,	1680	u64 *offset_align,
1651	u32 flags, /NVGPU_AS_MAP_BUFFER_FLAGS_/	1681	u32 flags, /NVGPU_AS_MAP_BUFFER_FLAGS_/
1652	int kind,	1682	s16 compr_kind,
		1683	s16 incompr_kind,
1653	u64 buffer_offset,	1684	u64 buffer_offset,
1654	u64 mapping_size,	1685	u64 mapping_size,
1655	struct vm_gk20a_mapping_batch *batch)	1686	struct vm_gk20a_mapping_batch *batch)
@@ -1690,7 +1721,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
1690	}	1721	}
1691		1722
1692	ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,	1723	ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
1693	flags, kind, true,	1724	flags, compr_kind, incompr_kind, true,
1694	gk20a_mem_flag_none,	1725	gk20a_mem_flag_none,
1695	buffer_offset,	1726	buffer_offset,
1696	mapping_size,	1727	mapping_size,