gpu: nvgpu: support gk20a virtualization

The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Aingara Paramakuru <aparamakuru@nvidia.com> 2014-05-05 21:14:22 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:11:01 -0400
commit: 1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree: 3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parent: 69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)
1 files changed, 425 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
new file mode 100644
index 00000000..6ed1dece
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -0,0 +1,425 @@
+/*
+ * Virtualized GPU Memory Management
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/dma-mapping.h>
+#include "vgpu/vgpu.h"
+/* note: keep the page sizes sorted lowest to highest here */
+static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
+static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
+static int vgpu_init_mm_setup_sw(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        gk20a_dbg_fn("");
+        if (mm->sw_ready) {
+                gk20a_dbg_fn("skip init");
+                return 0;
+        }
+        mm->g = g;
+        mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
+        mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
+        mm->pde_stride    = mm->big_page_size << 10;
+        mm->pde_stride_shift = ilog2(mm->pde_stride);
+        BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
+        /*TBD: make channel vm size configurable */
+        mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
+        gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
+        mm->sw_ready = true;
+        return 0;
+}
+int vgpu_init_mm_support(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        return vgpu_init_mm_setup_sw(g);
+}
+static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
+                                u64 map_offset,
+                                struct sg_table *sgt,
+                                u64 buffer_offset,
+                                u64 size,
+                                int pgsz_idx,
+                                u8 kind_v,
+                                u32 ctag_offset,
+                                u32 flags,
+                                int rw_flag,
+                                bool clear_ctags)
+{
+        int err = 0;
+        struct device *d = dev_from_vm(vm);
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+        u64 addr = gk20a_mm_iova_addr(sgt->sgl);
+        u8 prot;
+        gk20a_dbg_fn("");
+        /* Allocate (or validate when map_offset != 0) the virtual address. */
+        if (!map_offset) {
+                map_offset = gk20a_vm_alloc_va(vm, size,
+                                          pgsz_idx);
+                if (!map_offset) {
+                        gk20a_err(d, "failed to allocate va space");
+                        err = -ENOMEM;
+                        goto fail;
+                }
+        }
+        if (rw_flag == gk20a_mem_flag_read_only)
+                prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
+        else if (rw_flag == gk20a_mem_flag_write_only)
+                prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
+        else
+                prot = TEGRA_VGPU_MAP_PROT_NONE;
+        msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
+        msg.handle = platform->virt_handle;
+        p->handle = vm->handle;
+        p->addr = addr;
+        p->gpu_va = map_offset;
+        p->size = size;
+        p->pgsz_idx = pgsz_idx;
+        p->iova = mapping ? 1 : 0;
+        p->kind = kind_v;
+        p->cacheable =
+                (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
+        p->prot = prot;
+        p->ctag_offset = ctag_offset;
+        p->clear_ctags = clear_ctags;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                goto fail;
+        vm->tlb_dirty = true;
+        return map_offset;
+fail:
+        gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
+        return 0;
+}
+static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
+                                u64 vaddr,
+                                u64 size,
+                                int pgsz_idx,
+                                bool va_allocated,
+                                int rw_flag)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+        int err;
+        gk20a_dbg_fn("");
+        if (va_allocated) {
+                err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
+                if (err) {
+                        dev_err(dev_from_vm(vm),
+                                "failed to free va");
+                        return;
+                }
+        }
+        msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
+        msg.handle = platform->virt_handle;
+        p->handle = vm->handle;
+        p->gpu_va = vaddr;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                dev_err(dev_from_vm(vm),
+                        "failed to update gmmu ptes on unmap");
+        vm->tlb_dirty = true;
+}
+static void vgpu_vm_remove_support(struct vm_gk20a *vm)
+{
+        struct gk20a *g = vm->mm->g;
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct mapped_buffer_node *mapped_buffer;
+        struct vm_reserved_va_node *va_node, *va_node_tmp;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
+        struct rb_node *node;
+        int err;
+        gk20a_dbg_fn("");
+        mutex_lock(&vm->update_gmmu_lock);
+        /* TBD: add a flag here for the unmap code to recognize teardown
+         * and short-circuit any otherwise expensive operations. */
+        node = rb_first(&vm->mapped_buffers);
+        while (node) {
+                mapped_buffer =
+                        container_of(node, struct mapped_buffer_node, node);
+                gk20a_vm_unmap_locked(mapped_buffer);
+                node = rb_first(&vm->mapped_buffers);
+        }
+        /* destroy remaining reserved memory areas */
+        list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
+                reserved_va_list) {
+                list_del(&va_node->reserved_va_list);
+                kfree(va_node);
+        }
+        msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
+        msg.handle = platform->virt_handle;
+        p->handle = vm->handle;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+        mutex_unlock(&vm->update_gmmu_lock);
+        /* release zero page if used */
+        if (vm->zero_page_cpuva)
+                dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
+                                  vm->zero_page_cpuva, vm->zero_page_iova);
+        /* vm is not used anymore. release it. */
+        kfree(vm);
+}
+u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct dma_iommu_mapping *mapping =
+                        to_dma_iommu_mapping(dev_from_gk20a(g));
+        u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
+        msg.handle = platform->virt_handle;
+        p->addr = addr;
+        p->size = size;
+        p->iova = mapping ? 1 : 0;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                addr = 0;
+        else
+                addr = p->gpu_va;
+        return addr;
+}
+/* address space interfaces for the gk20a module */
+static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
+{
+        struct gk20a_as *as = as_share->as;
+        struct gk20a *g = gk20a_from_as(as);
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm;
+        u64 vma_size;
+        u32 num_pages, low_hole_pages;
+        char name[32];
+        int err;
+        gk20a_dbg_fn("");
+        vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+        if (!vm)
+                return -ENOMEM;
+        as_share->vm = vm;
+        vm->mm = mm;
+        vm->as_share = as_share;
+        vm->big_pages = true;
+        vm->va_start  = mm->pde_stride;   /* create a one pde hole */
+        vm->va_limit  = mm->channel.size; /* note this means channel.size is
+                                             really just the max */
+        msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
+        msg.handle = platform->virt_handle;
+        p->size = vm->va_limit;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                return -ENOMEM;
+        vm->handle = p->handle;
+        /* low-half: alloc small pages */
+        /* high-half: alloc big pages */
+        vma_size = mm->channel.size >> 1;
+        snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                 gmmu_page_sizes[gmmu_page_size_small]>>10);
+        num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
+        /* num_pages above is without regard to the low-side hole. */
+        low_hole_pages = (vm->va_start >>
+                          gmmu_page_shifts[gmmu_page_size_small]);
+        gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
+              low_hole_pages,             /* start */
+              num_pages - low_hole_pages, /* length */
+              1);                         /* align */
+        snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                 gmmu_page_sizes[gmmu_page_size_big]>>10);
+        num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
+        gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
+                              num_pages, /* start */
+                              num_pages, /* length */
+                              1); /* align */
+        vm->mapped_buffers = RB_ROOT;
+        mutex_init(&vm->update_gmmu_lock);
+        kref_init(&vm->ref);
+        INIT_LIST_HEAD(&vm->reserved_va_list);
+        vm->enable_ctag = true;
+        return 0;
+}
+static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
+                                struct channel_gk20a *ch)
+{
+        struct vm_gk20a *vm = as_share->vm;
+        struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
+        int err;
+        gk20a_dbg_fn("");
+        ch->vm = vm;
+        msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
+        msg.handle = platform->virt_handle;
+        p->as_handle = vm->handle;
+        p->chan_handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                ch->vm = NULL;
+                err = -ENOMEM;
+        }
+        return err;
+}
+static void vgpu_cache_maint(u64 handle, u8 op)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
+        msg.handle = handle;
+        p->op = op;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+static int vgpu_mm_fb_flush(struct gk20a *g)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        gk20a_dbg_fn("");
+        vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
+        return 0;
+}
+static void vgpu_mm_l2_invalidate(struct gk20a *g)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        gk20a_dbg_fn("");
+        vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
+}
+static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        u8 op;
+        gk20a_dbg_fn("");
+        if (invalidate)
+                op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
+        else
+                op =  TEGRA_VGPU_L2_MAINT_FLUSH;
+        vgpu_cache_maint(platform->virt_handle, op);
+}
+static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
+        int err;
+        gk20a_dbg_fn("");
+        /* No need to invalidate if tlb is clean */
+        mutex_lock(&vm->update_gmmu_lock);
+        if (!vm->tlb_dirty) {
+                mutex_unlock(&vm->update_gmmu_lock);
+                return;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
+        msg.handle = platform->virt_handle;
+        p->handle = vm->handle;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        vm->tlb_dirty = false;
+        mutex_unlock(&vm->update_gmmu_lock);
+}
+void vgpu_init_mm_ops(struct gpu_ops *gops)
+{
+        gops->mm.gmmu_map = vgpu_locked_gmmu_map;
+        gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
+        gops->mm.vm_remove = vgpu_vm_remove_support;
+        gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
+        gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
+        gops->mm.fb_flush = vgpu_mm_fb_flush;
+        gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
+        gops->mm.l2_flush = vgpu_mm_l2_flush;
+        gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
+}
author	Aingara Paramakuru <aparamakuru@nvidia.com>	2014-05-05 21:14:22 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:11:01 -0400
commit	1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree	3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parent	69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c new file mode 100644 index 00000000..6ed1dece --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -0,0 +1,425 @@
	1	/*
	2	* Virtualized GPU Memory Management
	3	*
	4	* Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*/
	15
	16	#include <linux/dma-mapping.h>
	17	#include "vgpu/vgpu.h"
	18
	19	/* note: keep the page sizes sorted lowest to highest here */
	20	static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
	21	static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
	22
	23	static int vgpu_init_mm_setup_sw(struct gk20a *g)
	24	{
	25	struct mm_gk20a *mm = &g->mm;
	26
	27	gk20a_dbg_fn("");
	28
	29	if (mm->sw_ready) {
	30	gk20a_dbg_fn("skip init");
	31	return 0;
	32	}
	33
	34	mm->g = g;
	35	mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
	36	mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
	37	mm->pde_stride = mm->big_page_size << 10;
	38	mm->pde_stride_shift = ilog2(mm->pde_stride);
	39	BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
	40
	41	/TBD: make channel vm size configurable /
	42	mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
	43
	44	gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
	45
	46	mm->sw_ready = true;
	47
	48	return 0;
	49	}
	50
	51	int vgpu_init_mm_support(struct gk20a *g)
	52	{
	53	gk20a_dbg_fn("");
	54
	55	return vgpu_init_mm_setup_sw(g);
	56	}
	57
	58	static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
	59	u64 map_offset,
	60	struct sg_table *sgt,
	61	u64 buffer_offset,
	62	u64 size,
	63	int pgsz_idx,
	64	u8 kind_v,
	65	u32 ctag_offset,
	66	u32 flags,
	67	int rw_flag,
	68	bool clear_ctags)
	69	{
	70	int err = 0;
	71	struct device *d = dev_from_vm(vm);
	72	struct gk20a *g = gk20a_from_vm(vm);
	73	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	74	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
	75	struct tegra_vgpu_cmd_msg msg;
	76	struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
	77	u64 addr = gk20a_mm_iova_addr(sgt->sgl);
	78	u8 prot;
	79
	80	gk20a_dbg_fn("");
	81
	82	/* Allocate (or validate when map_offset != 0) the virtual address. */
	83	if (!map_offset) {
	84	map_offset = gk20a_vm_alloc_va(vm, size,
	85	pgsz_idx);
	86	if (!map_offset) {
	87	gk20a_err(d, "failed to allocate va space");
	88	err = -ENOMEM;
	89	goto fail;
	90	}
	91	}
	92
	93	if (rw_flag == gk20a_mem_flag_read_only)
	94	prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
	95	else if (rw_flag == gk20a_mem_flag_write_only)
	96	prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
	97	else
	98	prot = TEGRA_VGPU_MAP_PROT_NONE;
	99
	100	msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
	101	msg.handle = platform->virt_handle;
	102	p->handle = vm->handle;
	103	p->addr = addr;
	104	p->gpu_va = map_offset;
	105	p->size = size;
	106	p->pgsz_idx = pgsz_idx;
	107	p->iova = mapping ? 1 : 0;
	108	p->kind = kind_v;
	109	p->cacheable =
	110	(flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
	111	p->prot = prot;
	112	p->ctag_offset = ctag_offset;
	113	p->clear_ctags = clear_ctags;
	114	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	115	if (err \|\| msg.ret)
	116	goto fail;
	117
	118	vm->tlb_dirty = true;
	119	return map_offset;
	120	fail:
	121	gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
	122	return 0;
	123	}
	124
	125	static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
	126	u64 vaddr,
	127	u64 size,
	128	int pgsz_idx,
	129	bool va_allocated,
	130	int rw_flag)
	131	{
	132	struct gk20a *g = gk20a_from_vm(vm);
	133	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	134	struct tegra_vgpu_cmd_msg msg;
	135	struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
	136	int err;
	137
	138	gk20a_dbg_fn("");
	139
	140	if (va_allocated) {
	141	err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
	142	if (err) {
	143	dev_err(dev_from_vm(vm),
	144	"failed to free va");
	145	return;
	146	}
	147	}
	148
	149	msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
	150	msg.handle = platform->virt_handle;
	151	p->handle = vm->handle;
	152	p->gpu_va = vaddr;
	153	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	154	if (err \|\| msg.ret)
	155	dev_err(dev_from_vm(vm),
	156	"failed to update gmmu ptes on unmap");
	157
	158	vm->tlb_dirty = true;
	159	}
	160
	161	static void vgpu_vm_remove_support(struct vm_gk20a *vm)
	162	{
	163	struct gk20a *g = vm->mm->g;
	164	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	165	struct mapped_buffer_node *mapped_buffer;
	166	struct vm_reserved_va_node va_node, va_node_tmp;
	167	struct tegra_vgpu_cmd_msg msg;
	168	struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
	169	struct rb_node *node;
	170	int err;
	171
	172	gk20a_dbg_fn("");
	173	mutex_lock(&vm->update_gmmu_lock);
	174
	175	/* TBD: add a flag here for the unmap code to recognize teardown
	176	* and short-circuit any otherwise expensive operations. */
	177
	178	node = rb_first(&vm->mapped_buffers);
	179	while (node) {
	180	mapped_buffer =
	181	container_of(node, struct mapped_buffer_node, node);
	182	gk20a_vm_unmap_locked(mapped_buffer);
	183	node = rb_first(&vm->mapped_buffers);
	184	}
	185
	186	/* destroy remaining reserved memory areas */
	187	list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
	188	reserved_va_list) {
	189	list_del(&va_node->reserved_va_list);
	190	kfree(va_node);
	191	}
	192
	193	msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
	194	msg.handle = platform->virt_handle;
	195	p->handle = vm->handle;
	196	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	197	WARN_ON(err \|\| msg.ret);
	198
	199	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
	200	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
	201
	202	mutex_unlock(&vm->update_gmmu_lock);
	203
	204	/* release zero page if used */
	205	if (vm->zero_page_cpuva)
	206	dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
	207	vm->zero_page_cpuva, vm->zero_page_iova);
	208
	209	/* vm is not used anymore. release it. */
	210	kfree(vm);
	211	}
	212
	213	u64 vgpu_bar1_map(struct gk20a g, struct sg_table *sgt, u64 size)
	214	{
	215	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	216	struct dma_iommu_mapping *mapping =
	217	to_dma_iommu_mapping(dev_from_gk20a(g));
	218	u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
	219	struct tegra_vgpu_cmd_msg msg;
	220	struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
	221	int err;
	222
	223	msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
	224	msg.handle = platform->virt_handle;
	225	p->addr = addr;
	226	p->size = size;
	227	p->iova = mapping ? 1 : 0;
	228	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	229	if (err \|\| msg.ret)
	230	addr = 0;
	231	else
	232	addr = p->gpu_va;
	233
	234	return addr;
	235	}
	236
	237	/* address space interfaces for the gk20a module */
	238	static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
	239	{
	240	struct gk20a_as *as = as_share->as;
	241	struct gk20a *g = gk20a_from_as(as);
	242	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	243	struct tegra_vgpu_cmd_msg msg;
	244	struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
	245	struct mm_gk20a *mm = &g->mm;
	246	struct vm_gk20a *vm;
	247	u64 vma_size;
	248	u32 num_pages, low_hole_pages;
	249	char name[32];
	250	int err;
	251
	252	gk20a_dbg_fn("");
	253
	254	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
	255	if (!vm)
	256	return -ENOMEM;
	257
	258	as_share->vm = vm;
	259
	260	vm->mm = mm;
	261	vm->as_share = as_share;
	262
	263	vm->big_pages = true;
	264
	265	vm->va_start = mm->pde_stride; /* create a one pde hole */
	266	vm->va_limit = mm->channel.size; /* note this means channel.size is
	267	really just the max */
	268
	269	msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
	270	msg.handle = platform->virt_handle;
	271	p->size = vm->va_limit;
	272	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	273	if (err \|\| msg.ret)
	274	return -ENOMEM;
	275
	276	vm->handle = p->handle;
	277
	278	/* low-half: alloc small pages */
	279	/* high-half: alloc big pages */
	280	vma_size = mm->channel.size >> 1;
	281
	282	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
	283	gmmu_page_sizes[gmmu_page_size_small]>>10);
	284	num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
	285
	286	/* num_pages above is without regard to the low-side hole. */
	287	low_hole_pages = (vm->va_start >>
	288	gmmu_page_shifts[gmmu_page_size_small]);
	289
	290	gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
	291	low_hole_pages, /* start */
	292	num_pages - low_hole_pages, /* length */
	293	1); /* align */
	294
	295	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
	296	gmmu_page_sizes[gmmu_page_size_big]>>10);
	297
	298	num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
	299	gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
	300	num_pages, /* start */
	301	num_pages, /* length */
	302	1); /* align */
	303
	304	vm->mapped_buffers = RB_ROOT;
	305
	306	mutex_init(&vm->update_gmmu_lock);
	307	kref_init(&vm->ref);
	308	INIT_LIST_HEAD(&vm->reserved_va_list);
	309
	310	vm->enable_ctag = true;
	311
	312	return 0;
	313	}
	314
	315	static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
	316	struct channel_gk20a *ch)
	317	{
	318	struct vm_gk20a *vm = as_share->vm;
	319	struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
	320	struct tegra_vgpu_cmd_msg msg;
	321	struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
	322	int err;
	323
	324	gk20a_dbg_fn("");
	325
	326	ch->vm = vm;
	327	msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
	328	msg.handle = platform->virt_handle;
	329	p->as_handle = vm->handle;
	330	p->chan_handle = ch->virt_ctx;
	331	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	332
	333	if (err \|\| msg.ret) {
	334	ch->vm = NULL;
	335	err = -ENOMEM;
	336	}
	337
	338	return err;
	339	}
	340
	341	static void vgpu_cache_maint(u64 handle, u8 op)
	342	{
	343	struct tegra_vgpu_cmd_msg msg;
	344	struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
	345	int err;
	346
	347	msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
	348	msg.handle = handle;
	349	p->op = op;
	350	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	351	WARN_ON(err \|\| msg.ret);
	352	}
	353
	354	static int vgpu_mm_fb_flush(struct gk20a *g)
	355	{
	356	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	357
	358	gk20a_dbg_fn("");
	359
	360	vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
	361	return 0;
	362	}
	363
	364	static void vgpu_mm_l2_invalidate(struct gk20a *g)
	365	{
	366	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	367
	368	gk20a_dbg_fn("");
	369
	370	vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
	371	}
	372
	373	static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
	374	{
	375	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	376	u8 op;
	377
	378	gk20a_dbg_fn("");
	379
	380	if (invalidate)
	381	op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
	382	else
	383	op = TEGRA_VGPU_L2_MAINT_FLUSH;
	384
	385	vgpu_cache_maint(platform->virt_handle, op);
	386	}
	387
	388	static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
	389	{
	390	struct gk20a *g = gk20a_from_vm(vm);
	391	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	392	struct tegra_vgpu_cmd_msg msg;
	393	struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
	394	int err;
	395
	396	gk20a_dbg_fn("");
	397
	398	/* No need to invalidate if tlb is clean */
	399	mutex_lock(&vm->update_gmmu_lock);
	400	if (!vm->tlb_dirty) {
	401	mutex_unlock(&vm->update_gmmu_lock);
	402	return;
	403	}
	404
	405	msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
	406	msg.handle = platform->virt_handle;
	407	p->handle = vm->handle;
	408	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	409	WARN_ON(err \|\| msg.ret);
	410	vm->tlb_dirty = false;
	411	mutex_unlock(&vm->update_gmmu_lock);
	412	}
	413
	414	void vgpu_init_mm_ops(struct gpu_ops *gops)
	415	{
	416	gops->mm.gmmu_map = vgpu_locked_gmmu_map;
	417	gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
	418	gops->mm.vm_remove = vgpu_vm_remove_support;
	419	gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
	420	gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
	421	gops->mm.fb_flush = vgpu_mm_fb_flush;
	422	gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
	423	gops->mm.l2_flush = vgpu_mm_l2_flush;
	424	gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
	425	}