gpu: nvgpu: Implement compbits mapping

Implement NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO for requesting info on compbits-mappable buffers; and NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS, which enables mapping compbits to the GPU address space of said buffers. This, subsequently, enables moving comptag swizzling from GPU to CDEH/CDEV formats to userspace. Compbits mapping is conservative and it may map more than what is strictly needed. This is because two reasons: 1) mapping must be done on small page alignment (4kB), and 2) GPU comptags are swizzled all around the aggregate cache line, which means that the whole cache line must be visible even if only some comptag lines are required from it. Cache line size is not necessarily a multiple of the small page size. Bug 200077571 Change-Id: I5ae88fe6b616e5ea37d3bff0dff46c07e9c9267e Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/719710 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Sami Kiminki <skiminki@nvidia.com> 2015-03-19 15:28:34 -0400
committer: Ishan Mittal <imittal@nvidia.com> 2015-05-18 02:03:19 -0400
commit: 520ff00e870eadc98a50f58ecd514ced53a8612f (patch)
tree: 6822d8ad2a51f98c2df421ba9cc7727e06757fcb /drivers/gpu
parent: 069accc8571716dc616c9f96776d54bf657afaee (diff)
4 files changed, 226 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 038fa4c8..63569008 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Address Spaces
 *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -225,6 +225,31 @@ static int gk20a_as_ioctl_get_va_regions(
        return 0;
 }
+static int gk20a_as_ioctl_get_buffer_compbits_info(
+                struct gk20a_as_share *as_share,
+                struct nvgpu_as_get_buffer_compbits_info_args *args)
+{
+        gk20a_dbg_fn("");
+        return gk20a_vm_get_compbits_info(as_share->vm,
+                                          args->mapping_gva,
+                                          &args->compbits_win_size,
+                                          &args->compbits_win_ctagline,
+                                          &args->mapping_ctagline,
+                                          &args->flags);
+}
+static int gk20a_as_ioctl_map_buffer_compbits(
+                struct gk20a_as_share *as_share,
+                struct nvgpu_as_map_buffer_compbits_args *args)
+{
+        gk20a_dbg_fn("");
+        return gk20a_vm_map_compbits(as_share->vm,
+                                     args->mapping_gva,
+                                     &args->compbits_win_gva,
+                                     &args->mapping_iova,
+                                     args->flags);
+}
 int gk20a_as_dev_open(struct inode *inode, struct file *filp)
 {
        struct gk20a_as_share *as_share;
@@ -334,6 +359,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                err = gk20a_as_ioctl_get_va_regions(as_share,
                                (struct nvgpu_as_get_va_regions_args *)buf);
                break;
+        case NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO:
+                err = gk20a_as_ioctl_get_buffer_compbits_info(as_share,
+                                (struct nvgpu_as_get_buffer_compbits_info_args *)buf);
+                break;
+        case NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS:
+                err = gk20a_as_ioctl_map_buffer_compbits(as_share,
+                                (struct nvgpu_as_map_buffer_compbits_args *)buf);
+                break;
        default:
                dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
                err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 470729b7..d3114ecd 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2016,8 +2016,13 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
        gpu->max_ltc_per_fbp =  g->ops.gr.get_max_ltc_per_fbp(g);
        gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
        g->ops.gr.get_rop_l2_en_mask(g);
+        gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
-        gpu->reserved = 0;
+        gpu->gr_gobs_per_comptagline_per_slice =
+                g->gr.gobs_per_comptagline_per_slice;
+        gpu->num_ltc = g->ltc_count;
+        gpu->lts_per_ltc = g->gr.slices_per_ltc;
+        gpu->cbc_cache_line_size = g->gr.cacheline_size;
+        gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5d1ff563..d896d783 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
                                struct device *dev,
                                struct dma_buf *dmabuf,
                                struct gk20a_allocator *allocator,
-                                u32 lines, bool user_mappable)
+                                u32 lines, bool user_mappable,
+                                u64 *ctag_map_win_size,
+                                u32 *ctag_map_win_ctagline)
 {
        struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
        u32 offset = 0;
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g,
                                first_unneeded_cacheline *
                                g->gr.comptags_per_cacheline;
+                        u64 win_size;
                        if (needed_ctaglines < ctaglines_to_allocate) {
                                /* free alignment lines */
                                int tmp=
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g,
                                ctaglines_to_allocate = needed_ctaglines;
                        }
+                        *ctag_map_win_ctagline = offset;
+                        win_size =
+                                DIV_ROUND_UP(lines,
+                                             g->gr.comptags_per_cacheline) *
+                                aggregate_cacheline_sz;
+                        *ctag_map_win_size = round_up(win_size, small_pgsz);
                }
                priv->comptags.offset = offset;
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        bool clear_ctags = false;
        struct scatterlist *sgl;
        u64 buf_addr;
+        u64 ctag_map_win_size = 0;
+        u32 ctag_map_win_ctagline = 0;
        mutex_lock(&vm->update_gmmu_lock);
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                /* allocate compression resources if needed */
                err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
-                                           bfr.ctag_lines, user_mappable);
+                                           bfr.ctag_lines, user_mappable,
+                                           &ctag_map_win_size,
+                                           &ctag_map_win_ctagline);
                if (err) {
                        /* ok to fall back here if we ran out */
                        /* TBD: we can partially alloc ctags as well... */
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        mapped_buffer->ctag_lines  = bfr.ctag_lines;
        mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
        mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
+        mapped_buffer->ctag_map_win_size = ctag_map_win_size;
+        mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
        mapped_buffer->vm          = vm;
        mapped_buffer->flags       = flags;
        mapped_buffer->kind        = kind;
@@ -1640,6 +1658,140 @@ clean_up:
        return 0;
 }
+int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
+                               u64 mapping_gva,
+                               u64 *compbits_win_size,
+                               u32 *compbits_win_ctagline,
+                               u32 *mapping_ctagline,
+                               u32 *flags)
+{
+        struct mapped_buffer_node *mapped_buffer;
+        struct device *d = dev_from_vm(vm);
+        mutex_lock(&vm->update_gmmu_lock);
+        mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
+        if (!mapped_buffer | !mapped_buffer->user_mapped)
+        {
+                mutex_unlock(&vm->update_gmmu_lock);
+                gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
+                return -EFAULT;
+        }
+        *compbits_win_size = 0;
+        *compbits_win_ctagline = 0;
+        *mapping_ctagline = 0;
+        *flags = 0;
+        if (mapped_buffer->ctag_offset)
+                *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
+        if (mapped_buffer->ctags_mappable)
+        {
+                *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
+                *compbits_win_size = mapped_buffer->ctag_map_win_size;
+                *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
+                *mapping_ctagline = mapped_buffer->ctag_offset;
+        }
+        mutex_unlock(&vm->update_gmmu_lock);
+        return 0;
+}
+int gk20a_vm_map_compbits(struct vm_gk20a *vm,
+                          u64 mapping_gva,
+                          u64 *compbits_win_gva,
+                          u64 *mapping_iova,
+                          u32 flags)
+{
+        struct mapped_buffer_node *mapped_buffer;
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct device *d = dev_from_vm(vm);
+        if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
+                /* This will be implemented later */
+                gk20a_err(d,
+                          "%s: fixed-offset compbits mapping not yet supported",
+                          __func__);
+                return -EFAULT;
+        }
+        mutex_lock(&vm->update_gmmu_lock);
+        mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
+        if (!mapped_buffer || !mapped_buffer->user_mapped) {
+                mutex_unlock(&vm->update_gmmu_lock);
+                gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
+                return -EFAULT;
+        }
+        if (!mapped_buffer->ctags_mappable) {
+                mutex_unlock(&vm->update_gmmu_lock);
+                gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
+                return -EFAULT;
+        }
+        if (!mapped_buffer->ctag_map_win_addr) {
+                const u32 small_pgsz_index = 0; /* small pages, 4K */
+                const u32 aggregate_cacheline_sz =
+                        g->gr.cacheline_size * g->gr.slices_per_ltc *
+                        g->ltc_count;
+                /* first aggregate cacheline to map */
+                u32 cacheline_start; /* inclusive */
+                /* offset of the start cacheline (will be page aligned) */
+                u64 cacheline_offset_start;
+                if (!mapped_buffer->ctag_map_win_size) {
+                        mutex_unlock(&vm->update_gmmu_lock);
+                        gk20a_err(d,
+                                  "%s: mapping 0x%llx does not have "
+                                  "mappable comptags",
+                                  __func__, mapping_gva);
+                        return -EFAULT;
+                }
+                cacheline_start = mapped_buffer->ctag_offset /
+                        g->gr.comptags_per_cacheline;
+                cacheline_offset_start =
+                        cacheline_start * aggregate_cacheline_sz;
+                mapped_buffer->ctag_map_win_addr =
+                        g->ops.mm.gmmu_map(
+                                vm,
+                                0,
+                                g->gr.compbit_store.mem.sgt,
+                                cacheline_offset_start, /* sg offset */
+                                mapped_buffer->ctag_map_win_size, /* size */
+                                small_pgsz_index,
+                                0, /* kind */
+                                0, /* ctag_offset */
+                                NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+                                gk20a_mem_flag_read_only,
+                                false,
+                                false);
+                if (!mapped_buffer->ctag_map_win_addr) {
+                        mutex_unlock(&vm->update_gmmu_lock);
+                        gk20a_err(d,
+                                  "%s: failed to map comptags for mapping 0x%llx",
+                                  __func__, mapping_gva);
+                        return -ENOMEM;
+                }
+        }
+        *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
+        *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
+        mutex_unlock(&vm->update_gmmu_lock);
+        return 0;
+}
 u64 gk20a_gmmu_map(struct vm_gk20a *vm,
                struct sg_table **sgt,
                u64 size,
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
        struct vm_gk20a *vm = mapped_buffer->vm;
        struct gk20a *g = vm->mm->g;
+        if (mapped_buffer->ctag_map_win_addr) {
+                /* unmap compbits */
+                g->ops.mm.gmmu_unmap(vm,
+                                     mapped_buffer->ctag_map_win_addr,
+                                     mapped_buffer->ctag_map_win_size,
+                                     0,       /* page size 4k */
+                                     true,    /* va allocated */
+                                     gk20a_mem_flag_none,
+                                     false);  /* not sparse */
+        }
        g->ops.mm.gmmu_unmap(vm,
                mapped_buffer->addr,
                mapped_buffer->size,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 0ff11d09..e07b95fe 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -186,7 +186,13 @@ struct mapped_buffer_node {
        u32 ctag_offset;
        u32 ctag_lines;
        u32 ctag_allocated_lines;
+        /* For comptag mapping, these are the mapping window parameters */
        bool ctags_mappable;
+        u64 ctag_map_win_addr; /* non-zero if mapped */
+        u64 ctag_map_win_size; /* non-zero if ctags_mappable */
+        u32 ctag_map_win_ctagline; /* ctagline at win start, set if
+                                    * ctags_mappable */
        u32 flags;
        u32 kind;
@@ -504,6 +510,19 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                 u64 buffer_offset,
                 u64 mapping_size);
+int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
+                               u64 mapping_gva,
+                               u64 *compbits_win_size,
+                               u32 *compbits_win_ctagline,
+                               u32 *mapping_ctagline,
+                               u32 *flags);
+int gk20a_vm_map_compbits(struct vm_gk20a *vm,
+                          u64 mapping_gva,
+                          u64 *compbits_win_gva,
+                          u64 *mapping_iova,
+                          u32 flags);
 /* unmap handle from kernel */
 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
author	Sami Kiminki <skiminki@nvidia.com>	2015-03-19 15:28:34 -0400
committer	Ishan Mittal <imittal@nvidia.com>	2015-05-18 02:03:19 -0400
commit	520ff00e870eadc98a50f58ecd514ced53a8612f (patch)
tree	6822d8ad2a51f98c2df421ba9cc7727e06757fcb /drivers/gpu
parent	069accc8571716dc616c9f96776d54bf657afaee (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 038fa4c8..63569008 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* GK20A Address Spaces	2	* GK20A Address Spaces
3	*	3	*
4	* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* This program is free software; you can redistribute it and/or modify it	6	* This program is free software; you can redistribute it and/or modify it
7	* under the terms and conditions of the GNU General Public License,	7	* under the terms and conditions of the GNU General Public License,
@@ -225,6 +225,31 @@ static int gk20a_as_ioctl_get_va_regions(
225	return 0;	225	return 0;
226	}	226	}
227		227
		228	static int gk20a_as_ioctl_get_buffer_compbits_info(
		229	struct gk20a_as_share *as_share,
		230	struct nvgpu_as_get_buffer_compbits_info_args *args)
		231	{
		232	gk20a_dbg_fn("");
		233	return gk20a_vm_get_compbits_info(as_share->vm,
		234	args->mapping_gva,
		235	&args->compbits_win_size,
		236	&args->compbits_win_ctagline,
		237	&args->mapping_ctagline,
		238	&args->flags);
		239	}
		240
		241	static int gk20a_as_ioctl_map_buffer_compbits(
		242	struct gk20a_as_share *as_share,
		243	struct nvgpu_as_map_buffer_compbits_args *args)
		244	{
		245	gk20a_dbg_fn("");
		246	return gk20a_vm_map_compbits(as_share->vm,
		247	args->mapping_gva,
		248	&args->compbits_win_gva,
		249	&args->mapping_iova,
		250	args->flags);
		251	}
		252
228	int gk20a_as_dev_open(struct inode inode, struct file filp)	253	int gk20a_as_dev_open(struct inode inode, struct file filp)
229	{	254	{
230	struct gk20a_as_share *as_share;	255	struct gk20a_as_share *as_share;
@@ -334,6 +359,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
334	err = gk20a_as_ioctl_get_va_regions(as_share,	359	err = gk20a_as_ioctl_get_va_regions(as_share,
335	(struct nvgpu_as_get_va_regions_args *)buf);	360	(struct nvgpu_as_get_va_regions_args *)buf);
336	break;	361	break;
		362	case NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO:
		363	err = gk20a_as_ioctl_get_buffer_compbits_info(as_share,
		364	(struct nvgpu_as_get_buffer_compbits_info_args *)buf);
		365	break;
		366	case NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS:
		367	err = gk20a_as_ioctl_map_buffer_compbits(as_share,
		368	(struct nvgpu_as_map_buffer_compbits_args *)buf);
		369	break;
337	default:	370	default:
338	dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);	371	dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
339	err = -ENOTTY;	372	err = -ENOTTY;


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 470729b7..d3114ecd 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2016,8 +2016,13 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
2016	gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);	2016	gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
2017	gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);	2017	gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
2018	g->ops.gr.get_rop_l2_en_mask(g);	2018	g->ops.gr.get_rop_l2_en_mask(g);
2019		2019	gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
2020	gpu->reserved = 0;	2020	gpu->gr_gobs_per_comptagline_per_slice =
		2021	g->gr.gobs_per_comptagline_per_slice;
		2022	gpu->num_ltc = g->ltc_count;
		2023	gpu->lts_per_ltc = g->gr.slices_per_ltc;
		2024	gpu->cbc_cache_line_size = g->gr.cacheline_size;
		2025	gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
2021		2026
2022	return 0;	2027	return 0;
2023	}	2028	}


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5d1ff563..d896d783 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
221	struct device *dev,	221	struct device *dev,
222	struct dma_buf *dmabuf,	222	struct dma_buf *dmabuf,
223	struct gk20a_allocator *allocator,	223	struct gk20a_allocator *allocator,
224	u32 lines, bool user_mappable)	224	u32 lines, bool user_mappable,
		225	u64 *ctag_map_win_size,
		226	u32 *ctag_map_win_ctagline)
225	{	227	{
226	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);	228	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227	u32 offset = 0;	229	u32 offset = 0;
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g,
313	first_unneeded_cacheline *	315	first_unneeded_cacheline *
314	g->gr.comptags_per_cacheline;	316	g->gr.comptags_per_cacheline;
315		317
		318	u64 win_size;
		319
316	if (needed_ctaglines < ctaglines_to_allocate) {	320	if (needed_ctaglines < ctaglines_to_allocate) {
317	/* free alignment lines */	321	/* free alignment lines */
318	int tmp=	322	int tmp=
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g,
326		330
327	ctaglines_to_allocate = needed_ctaglines;	331	ctaglines_to_allocate = needed_ctaglines;
328	}	332	}
		333
		334	*ctag_map_win_ctagline = offset;
		335	win_size =
		336	DIV_ROUND_UP(lines,
		337	g->gr.comptags_per_cacheline) *
		338	aggregate_cacheline_sz;
		339
		340	*ctag_map_win_size = round_up(win_size, small_pgsz);
329	}	341	}
330		342
331	priv->comptags.offset = offset;	343	priv->comptags.offset = offset;
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1374	bool clear_ctags = false;	1386	bool clear_ctags = false;
1375	struct scatterlist *sgl;	1387	struct scatterlist *sgl;
1376	u64 buf_addr;	1388	u64 buf_addr;
		1389	u64 ctag_map_win_size = 0;
		1390	u32 ctag_map_win_ctagline = 0;
1377		1391
1378	mutex_lock(&vm->update_gmmu_lock);	1392	mutex_lock(&vm->update_gmmu_lock);
1379		1393
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1501		1515
1502	/* allocate compression resources if needed */	1516	/* allocate compression resources if needed */
1503	err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,	1517	err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
1504	bfr.ctag_lines, user_mappable);	1518	bfr.ctag_lines, user_mappable,
		1519	&ctag_map_win_size,
		1520	&ctag_map_win_ctagline);
1505	if (err) {	1521	if (err) {
1506	/* ok to fall back here if we ran out */	1522	/* ok to fall back here if we ran out */
1507	/* TBD: we can partially alloc ctags as well... */	1523	/* TBD: we can partially alloc ctags as well... */
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1588	mapped_buffer->ctag_lines = bfr.ctag_lines;	1604	mapped_buffer->ctag_lines = bfr.ctag_lines;
1589	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;	1605	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
1590	mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;	1606	mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
		1607	mapped_buffer->ctag_map_win_size = ctag_map_win_size;
		1608	mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
1591	mapped_buffer->vm = vm;	1609	mapped_buffer->vm = vm;
1592	mapped_buffer->flags = flags;	1610	mapped_buffer->flags = flags;
1593	mapped_buffer->kind = kind;	1611	mapped_buffer->kind = kind;
@@ -1640,6 +1658,140 @@ clean_up:
1640	return 0;	1658	return 0;
1641	}	1659	}
1642		1660
		1661	int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
		1662	u64 mapping_gva,
		1663	u64 *compbits_win_size,
		1664	u32 *compbits_win_ctagline,
		1665	u32 *mapping_ctagline,
		1666	u32 *flags)
		1667	{
		1668	struct mapped_buffer_node *mapped_buffer;
		1669	struct device *d = dev_from_vm(vm);
		1670
		1671	mutex_lock(&vm->update_gmmu_lock);
		1672
		1673	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
		1674
		1675	if (!mapped_buffer \| !mapped_buffer->user_mapped)
		1676	{
		1677	mutex_unlock(&vm->update_gmmu_lock);
		1678	gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
		1679	return -EFAULT;
		1680	}
		1681
		1682	*compbits_win_size = 0;
		1683	*compbits_win_ctagline = 0;
		1684	*mapping_ctagline = 0;
		1685	*flags = 0;
		1686
		1687	if (mapped_buffer->ctag_offset)
		1688	*flags \|= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
		1689
		1690	if (mapped_buffer->ctags_mappable)
		1691	{
		1692	*flags \|= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
		1693	*compbits_win_size = mapped_buffer->ctag_map_win_size;
		1694	*compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
		1695	*mapping_ctagline = mapped_buffer->ctag_offset;
		1696	}
		1697
		1698	mutex_unlock(&vm->update_gmmu_lock);
		1699	return 0;
		1700	}
		1701
		1702
		1703	int gk20a_vm_map_compbits(struct vm_gk20a *vm,
		1704	u64 mapping_gva,
		1705	u64 *compbits_win_gva,
		1706	u64 *mapping_iova,
		1707	u32 flags)
		1708	{
		1709	struct mapped_buffer_node *mapped_buffer;
		1710	struct gk20a *g = gk20a_from_vm(vm);
		1711	struct device *d = dev_from_vm(vm);
		1712
		1713	if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
		1714	/* This will be implemented later */
		1715	gk20a_err(d,
		1716	"%s: fixed-offset compbits mapping not yet supported",
		1717	__func__);
		1718	return -EFAULT;
		1719	}
		1720
		1721	mutex_lock(&vm->update_gmmu_lock);
		1722
		1723	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
		1724
		1725	if (!mapped_buffer \|\| !mapped_buffer->user_mapped) {
		1726	mutex_unlock(&vm->update_gmmu_lock);
		1727	gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
		1728	return -EFAULT;
		1729	}
		1730
		1731	if (!mapped_buffer->ctags_mappable) {
		1732	mutex_unlock(&vm->update_gmmu_lock);
		1733	gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
		1734	return -EFAULT;
		1735	}
		1736
		1737	if (!mapped_buffer->ctag_map_win_addr) {
		1738	const u32 small_pgsz_index = 0; /* small pages, 4K */
		1739	const u32 aggregate_cacheline_sz =
		1740	g->gr.cacheline_size * g->gr.slices_per_ltc *
		1741	g->ltc_count;
		1742
		1743	/* first aggregate cacheline to map */
		1744	u32 cacheline_start; /* inclusive */
		1745
		1746	/* offset of the start cacheline (will be page aligned) */
		1747	u64 cacheline_offset_start;
		1748
		1749	if (!mapped_buffer->ctag_map_win_size) {
		1750	mutex_unlock(&vm->update_gmmu_lock);
		1751	gk20a_err(d,
		1752	"%s: mapping 0x%llx does not have "
		1753	"mappable comptags",
		1754	__func__, mapping_gva);
		1755	return -EFAULT;
		1756	}
		1757
		1758	cacheline_start = mapped_buffer->ctag_offset /
		1759	g->gr.comptags_per_cacheline;
		1760	cacheline_offset_start =
		1761	cacheline_start * aggregate_cacheline_sz;
		1762
		1763	mapped_buffer->ctag_map_win_addr =
		1764	g->ops.mm.gmmu_map(
		1765	vm,
		1766	0,
		1767	g->gr.compbit_store.mem.sgt,
		1768	cacheline_offset_start, /* sg offset */
		1769	mapped_buffer->ctag_map_win_size, /* size */
		1770	small_pgsz_index,
		1771	0, /* kind */
		1772	0, /* ctag_offset */
		1773	NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
		1774	gk20a_mem_flag_read_only,
		1775	false,
		1776	false);
		1777
		1778	if (!mapped_buffer->ctag_map_win_addr) {
		1779	mutex_unlock(&vm->update_gmmu_lock);
		1780	gk20a_err(d,
		1781	"%s: failed to map comptags for mapping 0x%llx",
		1782	__func__, mapping_gva);
		1783	return -ENOMEM;
		1784	}
		1785	}
		1786
		1787	*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
		1788	*compbits_win_gva = mapped_buffer->ctag_map_win_addr;
		1789
		1790	mutex_unlock(&vm->update_gmmu_lock);
		1791
		1792	return 0;
		1793	}
		1794
1643	u64 gk20a_gmmu_map(struct vm_gk20a *vm,	1795	u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1644	struct sg_table **sgt,	1796	struct sg_table **sgt,
1645	u64 size,	1797	u64 size,
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2276	struct vm_gk20a *vm = mapped_buffer->vm;	2428	struct vm_gk20a *vm = mapped_buffer->vm;
2277	struct gk20a *g = vm->mm->g;	2429	struct gk20a *g = vm->mm->g;
2278		2430
		2431	if (mapped_buffer->ctag_map_win_addr) {
		2432	/* unmap compbits */
		2433
		2434	g->ops.mm.gmmu_unmap(vm,
		2435	mapped_buffer->ctag_map_win_addr,
		2436	mapped_buffer->ctag_map_win_size,
		2437	0, /* page size 4k */
		2438	true, /* va allocated */
		2439	gk20a_mem_flag_none,
		2440	false); /* not sparse */
		2441	}
		2442
2279	g->ops.mm.gmmu_unmap(vm,	2443	g->ops.mm.gmmu_unmap(vm,
2280	mapped_buffer->addr,	2444	mapped_buffer->addr,
2281	mapped_buffer->size,	2445	mapped_buffer->size,


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 0ff11d09..e07b95fe 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -186,7 +186,13 @@ struct mapped_buffer_node {
186	u32 ctag_offset;	186	u32 ctag_offset;
187	u32 ctag_lines;	187	u32 ctag_lines;
188	u32 ctag_allocated_lines;	188	u32 ctag_allocated_lines;
		189
		190	/* For comptag mapping, these are the mapping window parameters */
189	bool ctags_mappable;	191	bool ctags_mappable;
		192	u64 ctag_map_win_addr; /* non-zero if mapped */
		193	u64 ctag_map_win_size; /* non-zero if ctags_mappable */
		194	u32 ctag_map_win_ctagline; /* ctagline at win start, set if
		195	* ctags_mappable */
190		196
191	u32 flags;	197	u32 flags;
192	u32 kind;	198	u32 kind;
@@ -504,6 +510,19 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
504	u64 buffer_offset,	510	u64 buffer_offset,
505	u64 mapping_size);	511	u64 mapping_size);
506		512
		513	int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
		514	u64 mapping_gva,
		515	u64 *compbits_win_size,
		516	u32 *compbits_win_ctagline,
		517	u32 *mapping_ctagline,
		518	u32 *flags);
		519
		520	int gk20a_vm_map_compbits(struct vm_gk20a *vm,
		521	u64 mapping_gva,
		522	u64 *compbits_win_gva,
		523	u64 *mapping_iova,
		524	u32 flags);
		525
507	/* unmap handle from kernel */	526	/* unmap handle from kernel */
508	void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);	527	void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
509		528