summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-04-17 19:26:28 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-19 18:34:01 -0400
commitd37e8f7dcf190f31f9c0c12583db2bb0c0d313c0 (patch)
tree4807c89bf40954c54804c3a8dd88c16849181f29 /drivers/gpu
parentdcb744acfbbc11e66cac2d0a674a42e62d908b9d (diff)
gpu: nvgpu: Split VM interface out
This patch begins the major rework of the GPU's virtual memory manager (VMM). The VMM is the piece of code that handles the userspace interface to buffers and their mappings into the GMMU. The core data structure is the VM - for now still known as 'struct vm_gk20a'. Each one of these structs represents one addres space to which channels or TSGs may bind themselves to. The VMM splits the interface up into two broad categories. First there's the common, OS independent interfaces; and second there's the OS specific interfaces. OS independent -------------- This is the code that manages the lifetime of VMs, the buffers inside VMs (search, batch mapping) creation, destruction, etc. OS Specific ----------- This handles mapping of buffers represented as they are represented by the OS (dma_buf's for example on Linux). This patch is by no means complete. There's still Linux specific functions scattered in ostensibly OS independent code. This is the first step. A patch that rewrites everything in one go would simply be too big to effectively review. Instead the goal of this change is to simply separate out the basic OS specific and OS agnostic interfaces into their own header files. The next series of patches will start to pull the relevant implementations into OS specific C files and common C files. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I242c7206047b6c769296226d855b7e44d5c4bfa8 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1464939 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_as.c21
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm_priv.h62
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c129
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm_area.c223
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c16
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c17
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c17
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c183
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h174
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h35
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vm.h144
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c4
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c6
18 files changed, 694 insertions, 350 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 04107dbc..0a60eece 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -45,6 +45,7 @@ nvgpu-y := \
45 common/mm/page_allocator.o \ 45 common/mm/page_allocator.o \
46 common/mm/lockless_allocator.o \ 46 common/mm/lockless_allocator.o \
47 common/mm/gmmu.o \ 47 common/mm/gmmu.o \
48 common/mm/vm.o \
48 common/pramin.o \ 49 common/pramin.o \
49 common/semaphore.o \ 50 common/semaphore.o \
50 common/as.o \ 51 common/as.o \
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
index 4bbcedda..7a24a14f 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -28,6 +28,7 @@
28#include "gk20a/gk20a.h" 28#include "gk20a/gk20a.h"
29#include "gk20a/platform_gk20a.h" 29#include "gk20a/platform_gk20a.h"
30#include "ioctl_as.h" 30#include "ioctl_as.h"
31#include "vm_priv.h"
31 32
32static int gk20a_as_ioctl_bind_channel( 33static int gk20a_as_ioctl_bind_channel(
33 struct gk20a_as_share *as_share, 34 struct gk20a_as_share *as_share,
@@ -72,7 +73,7 @@ static int gk20a_as_ioctl_map_buffer_ex(
72{ 73{
73 gk20a_dbg_fn(""); 74 gk20a_dbg_fn("");
74 75
75 return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, 76 return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
76 &args->offset, args->flags, 77 &args->offset, args->flags,
77 args->kind, 78 args->kind,
78 args->buffer_offset, 79 args->buffer_offset,
@@ -85,7 +86,7 @@ static int gk20a_as_ioctl_map_buffer(
85 struct nvgpu_as_map_buffer_args *args) 86 struct nvgpu_as_map_buffer_args *args)
86{ 87{
87 gk20a_dbg_fn(""); 88 gk20a_dbg_fn("");
88 return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, 89 return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
89 &args->o_a.offset, 90 &args->o_a.offset,
90 args->flags, NV_KIND_DEFAULT, 91 args->flags, NV_KIND_DEFAULT,
91 0, 0, NULL); 92 0, 0, NULL);
@@ -97,7 +98,7 @@ static int gk20a_as_ioctl_unmap_buffer(
97 struct nvgpu_as_unmap_buffer_args *args) 98 struct nvgpu_as_unmap_buffer_args *args)
98{ 99{
99 gk20a_dbg_fn(""); 100 gk20a_dbg_fn("");
100 return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL); 101 return nvgpu_vm_unmap_buffer(as_share->vm, args->offset, NULL);
101} 102}
102 103
103static int gk20a_as_ioctl_map_buffer_batch( 104static int gk20a_as_ioctl_map_buffer_batch(
@@ -123,7 +124,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
123 args->num_maps > g->gpu_characteristics.map_buffer_batch_limit) 124 args->num_maps > g->gpu_characteristics.map_buffer_batch_limit)
124 return -EINVAL; 125 return -EINVAL;
125 126
126 gk20a_vm_mapping_batch_start(&batch); 127 nvgpu_vm_mapping_batch_start(&batch);
127 128
128 for (i = 0; i < args->num_unmaps; ++i) { 129 for (i = 0; i < args->num_unmaps; ++i) {
129 struct nvgpu_as_unmap_buffer_args unmap_args; 130 struct nvgpu_as_unmap_buffer_args unmap_args;
@@ -134,14 +135,14 @@ static int gk20a_as_ioctl_map_buffer_batch(
134 break; 135 break;
135 } 136 }
136 137
137 err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset, 138 err = nvgpu_vm_unmap_buffer(as_share->vm, unmap_args.offset,
138 &batch); 139 &batch);
139 if (err) 140 if (err)
140 break; 141 break;
141 } 142 }
142 143
143 if (err) { 144 if (err) {
144 gk20a_vm_mapping_batch_finish(as_share->vm, &batch); 145 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
145 146
146 args->num_unmaps = i; 147 args->num_unmaps = i;
147 args->num_maps = 0; 148 args->num_maps = 0;
@@ -158,7 +159,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
158 break; 159 break;
159 } 160 }
160 161
161 err = gk20a_vm_map_buffer( 162 err = nvgpu_vm_map_buffer(
162 as_share->vm, map_args.dmabuf_fd, 163 as_share->vm, map_args.dmabuf_fd,
163 &map_args.offset, map_args.flags, 164 &map_args.offset, map_args.flags,
164 map_args.kind, 165 map_args.kind,
@@ -169,7 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
169 break; 170 break;
170 } 171 }
171 172
172 gk20a_vm_mapping_batch_finish(as_share->vm, &batch); 173 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
173 174
174 if (err) 175 if (err)
175 args->num_maps = i; 176 args->num_maps = i;
@@ -228,7 +229,7 @@ static int gk20a_as_ioctl_get_buffer_compbits_info(
228 struct nvgpu_as_get_buffer_compbits_info_args *args) 229 struct nvgpu_as_get_buffer_compbits_info_args *args)
229{ 230{
230 gk20a_dbg_fn(""); 231 gk20a_dbg_fn("");
231 return gk20a_vm_get_compbits_info(as_share->vm, 232 return nvgpu_vm_get_compbits_info(as_share->vm,
232 args->mapping_gva, 233 args->mapping_gva,
233 &args->compbits_win_size, 234 &args->compbits_win_size,
234 &args->compbits_win_ctagline, 235 &args->compbits_win_ctagline,
@@ -241,7 +242,7 @@ static int gk20a_as_ioctl_map_buffer_compbits(
241 struct nvgpu_as_map_buffer_compbits_args *args) 242 struct nvgpu_as_map_buffer_compbits_args *args)
242{ 243{
243 gk20a_dbg_fn(""); 244 gk20a_dbg_fn("");
244 return gk20a_vm_map_compbits(as_share->vm, 245 return nvgpu_vm_map_compbits(as_share->vm,
245 args->mapping_gva, 246 args->mapping_gva,
246 &args->compbits_win_gva, 247 &args->compbits_win_gva,
247 &args->mapping_iova, 248 &args->mapping_iova,
diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h
new file mode 100644
index 00000000..c0fb0ffe
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __COMMON_LINUX_VM_PRIV_H__
18#define __COMMON_LINUX_VM_PRIV_H__
19
20#include <nvgpu/types.h>
21
22struct sg_table;
23struct dma_buf;
24
25struct vm_gk20a;
26struct vm_gk20a_mapping_batch;
27
28u64 nvgpu_vm_map(struct vm_gk20a *vm,
29 struct dma_buf *dmabuf,
30 u64 offset_align,
31 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
32 int kind,
33 struct sg_table **sgt,
34 bool user_mapped,
35 int rw_flag,
36 u64 buffer_offset,
37 u64 mapping_size,
38 struct vm_gk20a_mapping_batch *mapping_batch);
39
40int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
41 u64 mapping_gva,
42 u64 *compbits_win_gva,
43 u64 *mapping_iova,
44 u32 flags);
45
46/* Note: batch may be NULL if map op is not part of a batch */
47int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
48 int dmabuf_fd,
49 u64 *offset_align,
50 u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
51 int kind,
52 u64 buffer_offset,
53 u64 mapping_size,
54 struct vm_gk20a_mapping_batch *batch);
55
56void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset);
57
58/* find buffer corresponding to va */
59int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
60 struct dma_buf **dmabuf,
61 u64 *offset);
62#endif
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
new file mode 100644
index 00000000..eaf30fd0
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -0,0 +1,129 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/vm.h>
18#include <nvgpu/lock.h>
19#include <nvgpu/list.h>
20#include <nvgpu/rbtree.h>
21#include <nvgpu/semaphore.h>
22
23#include "gk20a/gk20a.h"
24#include "gk20a/mm_gk20a.h"
25
26void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
27{
28 memset(mapping_batch, 0, sizeof(*mapping_batch));
29 mapping_batch->gpu_l2_flushed = false;
30 mapping_batch->need_tlb_invalidate = false;
31}
32
33void nvgpu_vm_mapping_batch_finish_locked(
34 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
35{
36 /* hanging kref_put batch pointer? */
37 WARN_ON(vm->kref_put_batch == mapping_batch);
38
39 if (mapping_batch->need_tlb_invalidate) {
40 struct gk20a *g = gk20a_from_vm(vm);
41 g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
42 }
43}
44
45void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
46 struct vm_gk20a_mapping_batch *mapping_batch)
47{
48 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
49 nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch);
50 nvgpu_mutex_release(&vm->update_gmmu_lock);
51}
52
53void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
54{
55 struct mapped_buffer_node *mapped_buffer;
56 struct vm_reserved_va_node *va_node, *va_node_tmp;
57 struct nvgpu_rbtree_node *node = NULL;
58 struct gk20a *g = vm->mm->g;
59
60 gk20a_dbg_fn("");
61
62 /*
63 * Do this outside of the update_gmmu_lock since unmapping the semaphore
64 * pool involves unmapping a GMMU mapping which means aquiring the
65 * update_gmmu_lock.
66 */
67 if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) {
68 if (vm->sema_pool) {
69 nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
70 nvgpu_semaphore_pool_put(vm->sema_pool);
71 }
72 }
73
74 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
75
76 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
77 while (node) {
78 mapped_buffer = mapped_buffer_from_rbtree_node(node);
79 nvgpu_vm_unmap_locked(mapped_buffer, NULL);
80 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
81 }
82
83 /* destroy remaining reserved memory areas */
84 nvgpu_list_for_each_entry_safe(va_node, va_node_tmp,
85 &vm->reserved_va_list,
86 vm_reserved_va_node, reserved_va_list) {
87 nvgpu_list_del(&va_node->reserved_va_list);
88 nvgpu_kfree(vm->mm->g, va_node);
89 }
90
91 nvgpu_deinit_vm(vm);
92
93 nvgpu_mutex_release(&vm->update_gmmu_lock);
94}
95
96void nvgpu_vm_remove_support(struct vm_gk20a *vm)
97{
98 nvgpu_vm_remove_support_nofree(vm);
99 /* vm is not used anymore. release it. */
100 nvgpu_kfree(vm->mm->g, vm);
101}
102
103static void nvgpu_vm_remove_support_kref(struct kref *ref)
104{
105 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
106 struct gk20a *g = gk20a_from_vm(vm);
107
108 g->ops.mm.vm_remove(vm);
109}
110
111void nvgpu_vm_get(struct vm_gk20a *vm)
112{
113 kref_get(&vm->ref);
114}
115
116void nvgpu_vm_put(struct vm_gk20a *vm)
117{
118 kref_put(&vm->ref, nvgpu_vm_remove_support_kref);
119}
120
121void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
122{
123 struct gk20a *g = vm->mm->g;
124
125 gk20a_dbg_fn("");
126
127 gk20a_free_inst_block(g, inst_block);
128 nvgpu_vm_remove_support_nofree(vm);
129}
diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c
new file mode 100644
index 00000000..7b831947
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/vm.h>
18#include <nvgpu/vm_area.h>
19
20#include "gk20a/gk20a.h"
21#include "gk20a/mm_gk20a.h"
22
23struct nvgpu_vm_area *nvgpu_vm_area_find(struct vm_gk20a *vm, u64 addr)
24{
25 struct nvgpu_vm_area *vm_area;
26
27 nvgpu_list_for_each_entry(vm_area, &vm->vm_area_list,
28 nvgpu_vm_area, vm_area_list) {
29 if (addr >= vm_area->addr &&
30 addr < (u64)vm_area->addr + (u64)vm_area->size)
31 return vm_area;
32 }
33
34 return NULL;
35}
36
37int nvgpu_vm_area_validate_buffer(struct vm_gk20a *vm,
38 u64 map_addr, u64 map_size, int pgsz_idx,
39 struct nvgpu_vm_area **pvm_area)
40{
41 struct gk20a *g = vm->mm->g;
42 struct nvgpu_vm_area *vm_area;
43 struct nvgpu_mapped_buf *buffer;
44 u64 map_end = map_addr + map_size;
45
46 /* can wrap around with insane map_size; zero is disallowed too */
47 if (map_end <= map_addr) {
48 nvgpu_warn(g, "fixed offset mapping with invalid map_size");
49 return -EINVAL;
50 }
51
52 if (map_addr & (vm->gmmu_page_sizes[pgsz_idx] - 1)) {
53 nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx",
54 map_addr);
55 return -EINVAL;
56 }
57
58 /* Find the space reservation, but it's ok to have none for
59 * userspace-managed address spaces */
60 vm_area = nvgpu_vm_area_find(vm, map_addr);
61 if (!vm_area && !vm->userspace_managed) {
62 nvgpu_warn(g, "fixed offset mapping without space allocation");
63 return -EINVAL;
64 }
65
66 /* Mapped area should fit inside va, if there's one */
67 if (vm_area && map_end > vm_area->addr + vm_area->size) {
68 nvgpu_warn(g, "fixed offset mapping size overflows va node");
69 return -EINVAL;
70 }
71
72 /* check that this mapping does not collide with existing
73 * mappings by checking the buffer with the highest GPU VA
74 * that is less than our buffer end */
75 buffer = __nvgpu_vm_find_mapped_buf_less_than(
76 vm, map_addr + map_size);
77 if (buffer && buffer->addr + buffer->size > map_addr) {
78 nvgpu_warn(g, "overlapping buffer map requested");
79 return -EINVAL;
80 }
81
82 *pvm_area = vm_area;
83
84 return 0;
85}
86
87int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
88 u64 *addr, u32 flags)
89{
90 struct gk20a *g = vm->mm->g;
91 struct nvgpu_allocator *vma;
92 struct nvgpu_vm_area *vm_area;
93 u64 vaddr_start = 0;
94 int pgsz_idx = gmmu_page_size_small;
95
96 nvgpu_log(g, gpu_dbg_map,
97 "ADD vm_area: pgsz=%#-8x pages=%-9u addr=%#-14llx flags=0x%x",
98 page_size, pages, *addr, flags);
99
100 for (; pgsz_idx < gmmu_nr_page_sizes; pgsz_idx++) {
101 if (vm->gmmu_page_sizes[pgsz_idx] == page_size)
102 break;
103 }
104
105 if (pgsz_idx > gmmu_page_size_big)
106 return -EINVAL;
107
108 if (!vm->big_pages && pgsz_idx == gmmu_page_size_big)
109 return -EINVAL;
110
111 vm_area = nvgpu_kzalloc(g, sizeof(*vm_area));
112 if (!vm_area)
113 goto clean_up_err;
114
115 vma = vm->vma[pgsz_idx];
116 if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
117 vaddr_start = nvgpu_alloc_fixed(vma, *addr,
118 (u64)pages *
119 (u64)page_size,
120 page_size);
121 else
122 vaddr_start = nvgpu_alloc(vma,
123 (u64)pages *
124 (u64)page_size);
125
126 if (!vaddr_start)
127 goto clean_up_err;
128
129 vm_area->flags = flags;
130 vm_area->addr = vaddr_start;
131 vm_area->size = (u64)page_size * (u64)pages;
132 vm_area->pgsz_idx = pgsz_idx;
133 nvgpu_init_list_node(&vm_area->buffer_list_head);
134 nvgpu_init_list_node(&vm_area->vm_area_list);
135
136 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
137
138 if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) {
139 u64 map_addr = g->ops.mm.gmmu_map(vm, vaddr_start,
140 NULL,
141 0,
142 vm_area->size,
143 pgsz_idx,
144 0,
145 0,
146 flags,
147 gk20a_mem_flag_none,
148 false,
149 true,
150 false,
151 NULL,
152 APERTURE_INVALID);
153 if (!map_addr) {
154 nvgpu_mutex_release(&vm->update_gmmu_lock);
155 goto clean_up_err;
156 }
157
158 vm_area->sparse = true;
159 }
160 nvgpu_list_add_tail(&vm_area->vm_area_list, &vm->vm_area_list);
161
162 nvgpu_mutex_release(&vm->update_gmmu_lock);
163
164 *addr = vaddr_start;
165 return 0;
166
167clean_up_err:
168 if (vaddr_start)
169 nvgpu_free(vma, vaddr_start);
170 if (vm_area)
171 nvgpu_kfree(g, vm_area);
172 return -ENOMEM;
173}
174
175int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
176{
177 struct gk20a *g = gk20a_from_vm(vm);
178 struct nvgpu_mapped_buf *buffer, *n;
179 struct nvgpu_vm_area *vm_area;
180
181 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
182 vm_area = nvgpu_vm_area_find(vm, addr);
183 if (!vm_area) {
184 nvgpu_mutex_release(&vm->update_gmmu_lock);
185 return 0;
186 }
187 nvgpu_list_del(&vm_area->vm_area_list);
188 nvgpu_mutex_release(&vm->update_gmmu_lock);
189
190 nvgpu_log(g, gpu_dbg_map,
191 "DEL vm_area: pgsz=%#-8x pages=%-9llu "
192 "addr=%#-14llx flags=0x%x",
193 vm->gmmu_page_sizes[vm_area->pgsz_idx],
194 vm_area->size / vm->gmmu_page_sizes[vm_area->pgsz_idx],
195 vm_area->addr,
196 vm_area->flags);
197
198 /* Decrement the ref count on all buffers in this vm_area. This
199 * allows userspace to let the kernel free mappings that are
200 * only used by this vm_area. */
201 nvgpu_list_for_each_entry_safe(buffer, n,
202 &vm_area->buffer_list_head,
203 nvgpu_mapped_buf, buffer_list) {
204 nvgpu_list_del(&buffer->buffer_list);
205 kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref);
206 }
207
208 /* if this was a sparse mapping, free the va */
209 if (vm_area->sparse)
210 g->ops.mm.gmmu_unmap(vm,
211 vm_area->addr,
212 vm_area->size,
213 vm_area->pgsz_idx,
214 true,
215 gk20a_mem_flag_none,
216 true,
217 NULL);
218
219 nvgpu_free(vm->vma[vm_area->pgsz_idx], vm_area->addr);
220 nvgpu_kfree(g, vm_area);
221
222 return 0;
223}
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index d5d75be5..cf95019b 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -46,6 +46,12 @@
46#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> 46#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 47#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
48 48
49/*
50 * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the
51 * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem.
52 */
53#include "common/linux/vm_priv.h"
54
49static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); 55static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
50static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); 56static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
51 57
@@ -1016,8 +1022,8 @@ __releases(&cde_app->mutex)
1016 1022
1017 1023
1018 /* map the destination buffer */ 1024 /* map the destination buffer */
1019 get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */ 1025 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
1020 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, 1026 map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
1021 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 1027 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1022 compbits_kind, NULL, true, 1028 compbits_kind, NULL, true,
1023 gk20a_mem_flag_none, 1029 gk20a_mem_flag_none,
@@ -1136,7 +1142,7 @@ __releases(&cde_app->mutex)
1136 cde_ctx->init_cmd_executed = true; 1142 cde_ctx->init_cmd_executed = true;
1137 1143
1138 /* unmap the buffers - channel holds references to them now */ 1144 /* unmap the buffers - channel holds references to them now */
1139 gk20a_vm_unmap(cde_ctx->vm, map_vaddr); 1145 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1140 1146
1141 return err; 1147 return err;
1142 1148
@@ -1144,7 +1150,7 @@ exit_unmap_surface:
1144 if (surface) 1150 if (surface)
1145 dma_buf_vunmap(compbits_scatter_buf, surface); 1151 dma_buf_vunmap(compbits_scatter_buf, surface);
1146exit_unmap_vaddr: 1152exit_unmap_vaddr:
1147 gk20a_vm_unmap(cde_ctx->vm, map_vaddr); 1153 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1148exit_idle: 1154exit_idle:
1149 gk20a_idle(g); 1155 gk20a_idle(g);
1150 return err; 1156 return err;
@@ -1277,7 +1283,7 @@ err_init_cde_img:
1277 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); 1283 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1278err_map_backingstore: 1284err_map_backingstore:
1279err_alloc_gpfifo: 1285err_alloc_gpfifo:
1280 gk20a_vm_put(ch->vm); 1286 nvgpu_vm_put(ch->vm);
1281err_commit_va: 1287err_commit_va:
1282err_get_gk20a_channel: 1288err_get_gk20a_channel:
1283 nvgpu_release_firmware(g, img); 1289 nvgpu_release_firmware(g, img);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 31358468..b7fb363e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -39,6 +39,13 @@
39#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 39#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
40 40
41/* 41/*
42 * This is required for nvgpu_vm_find_buffer() which is used in the tracing
43 * code. Once we can get and access userspace buffers without requiring
44 * direct dma_buf usage this can be removed.
45 */
46#include "common/linux/vm_priv.h"
47
48/*
42 * Although channels do have pointers back to the gk20a struct that they were 49 * Although channels do have pointers back to the gk20a struct that they were
43 * created under in cases where the driver is killed that pointer can be bad. 50 * created under in cases where the driver is killed that pointer can be bad.
44 * The channel memory can be freed before the release() function for a given 51 * The channel memory can be freed before the release() function for a given
@@ -550,7 +557,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
550 /* 557 /*
551 * When releasing the channel we unbind the VM - so release the ref. 558 * When releasing the channel we unbind the VM - so release the ref.
552 */ 559 */
553 gk20a_vm_put(ch_vm); 560 nvgpu_vm_put(ch_vm);
554 561
555 nvgpu_spinlock_acquire(&ch->update_fn_lock); 562 nvgpu_spinlock_acquire(&ch->update_fn_lock);
556 ch->update_fn = NULL; 563 ch->update_fn = NULL;
@@ -1399,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
1399 int err; 1406 int err;
1400 1407
1401 words = pbdma_gp_entry1_length_v(g->entry1); 1408 words = pbdma_gp_entry1_length_v(g->entry1);
1402 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); 1409 err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1403 if (!err) 1410 if (!err)
1404 mem = dma_buf_vmap(dmabuf); 1411 mem = dma_buf_vmap(dmabuf);
1405 } 1412 }
@@ -1901,7 +1908,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1901 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); 1908 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
1902 1909
1903 if (!skip_buffer_refcounting) { 1910 if (!skip_buffer_refcounting) {
1904 err = gk20a_vm_get_buffers(vm, &mapped_buffers, 1911 err = nvgpu_vm_get_buffers(vm, &mapped_buffers,
1905 &num_mapped_buffers); 1912 &num_mapped_buffers);
1906 if (err) 1913 if (err)
1907 return err; 1914 return err;
@@ -1940,7 +1947,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1940 return 0; 1947 return 0;
1941 1948
1942err_put_buffers: 1949err_put_buffers:
1943 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); 1950 nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1944 1951
1945 return err; 1952 return err;
1946} 1953}
@@ -2039,7 +2046,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2039 } 2046 }
2040 2047
2041 if (job->num_mapped_buffers) 2048 if (job->num_mapped_buffers)
2042 gk20a_vm_put_buffers(vm, job->mapped_buffers, 2049 nvgpu_vm_put_buffers(vm, job->mapped_buffers,
2043 job->num_mapped_buffers); 2050 job->num_mapped_buffers);
2044 2051
2045 /* Remove job from channel's job list before we close the 2052 /* Remove job from channel's job list before we close the
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index a08eb047..5351750a 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
26 26
27#include <nvgpu/kmem.h> 27#include <nvgpu/kmem.h>
28#include <nvgpu/log.h> 28#include <nvgpu/log.h>
29#include <nvgpu/vm.h>
29 30
30#include "gk20a.h" 31#include "gk20a.h"
31#include "gk20a/platform_gk20a.h" 32#include "gk20a/platform_gk20a.h"
@@ -38,6 +39,14 @@
38#include <nvgpu/hw/gk20a/hw_perf_gk20a.h> 39#include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
39 40
40/* 41/*
42 * Currently this code uses nvgpu_vm_map_buffer() since it takes dmabuf FDs from
43 * the dbg ioctls. That has to change; this needs to hide the usage of dmabufs
44 * in Linux specific code. All core driver usage of mapping must be done through
45 * nvgpu_gmmu_map().
46 */
47#include "common/linux/vm_priv.h"
48
49/*
41 * API to get first channel from the list of all channels 50 * API to get first channel from the list of all channels
42 * bound to the debug session 51 * bound to the debug session
43 */ 52 */
@@ -1844,7 +1853,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1844 return -EBUSY; 1853 return -EBUSY;
1845 } 1854 }
1846 1855
1847 err = gk20a_init_vm(mm, vm, big_page_size, 1856 err = nvgpu_init_vm(mm, vm, big_page_size,
1848 big_page_size << 10, 1857 big_page_size << 10,
1849 NV_MM_DEFAULT_KERNEL_SIZE, 1858 NV_MM_DEFAULT_KERNEL_SIZE,
1850 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, 1859 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -1860,7 +1869,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1860 1869
1861 g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0); 1870 g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
1862 1871
1863 err = gk20a_vm_map_buffer(vm, 1872 err = nvgpu_vm_map_buffer(vm,
1864 args->dmabuf_fd, 1873 args->dmabuf_fd,
1865 &args->offset, 1874 &args->offset,
1866 0, 1875 0,
@@ -1913,7 +1922,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1913 return 0; 1922 return 0;
1914 1923
1915err_unmap: 1924err_unmap:
1916 gk20a_vm_unmap_buffer(vm, args->offset, NULL); 1925 nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
1917err_remove_vm: 1926err_remove_vm:
1918 gk20a_remove_vm(vm, &mm->perfbuf.inst_block); 1927 gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
1919 nvgpu_mutex_release(&g->dbg_sessions_lock); 1928 nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1952,7 +1961,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
1952 1961
1953 err = gk20a_perfbuf_disable_locked(g); 1962 err = gk20a_perfbuf_disable_locked(g);
1954 1963
1955 gk20a_vm_unmap_buffer(vm, offset, NULL); 1964 nvgpu_vm_unmap_buffer(vm, offset, NULL);
1956 gk20a_remove_vm(vm, &mm->perfbuf.inst_block); 1965 gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
1957 1966
1958 g->perfbuf.owner = NULL; 1967 g->perfbuf.owner = NULL;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4d41f9ff..9bd07894 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3182,14 +3182,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3182 } else { 3182 } else {
3183 if (!tsg->tsg_gr_ctx) { 3183 if (!tsg->tsg_gr_ctx) {
3184 tsg->vm = c->vm; 3184 tsg->vm = c->vm;
3185 gk20a_vm_get(tsg->vm); 3185 nvgpu_vm_get(tsg->vm);
3186 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, 3186 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
3187 args->class_num, 3187 args->class_num,
3188 args->flags); 3188 args->flags);
3189 if (err) { 3189 if (err) {
3190 nvgpu_err(g, 3190 nvgpu_err(g,
3191 "fail to allocate TSG gr ctx buffer"); 3191 "fail to allocate TSG gr ctx buffer");
3192 gk20a_vm_put(tsg->vm); 3192 nvgpu_vm_put(tsg->vm);
3193 tsg->vm = NULL; 3193 tsg->vm = NULL;
3194 goto out; 3194 goto out;
3195 } 3195 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 201c2090..72a3ee13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -55,6 +55,12 @@
55#include <nvgpu/hw/gk20a/hw_flush_gk20a.h> 55#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
56#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> 56#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
57 57
58/*
59 * Necessary while transitioning to less coupled code. Will be removed once
60 * all the common APIs no longers have Linux stuff in them.
61 */
62#include "common/linux/vm_priv.h"
63
58#if defined(CONFIG_GK20A_VIDMEM) 64#if defined(CONFIG_GK20A_VIDMEM)
59static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); 65static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
60#endif 66#endif
@@ -177,8 +183,6 @@ struct gk20a_vidmem_buf {
177 void (*dmabuf_priv_delete)(void *); 183 void (*dmabuf_priv_delete)(void *);
178}; 184};
179 185
180static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
181
182static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, 186static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
183 u32 *offset, u32 len) 187 u32 *offset, u32 len)
184{ 188{
@@ -460,16 +464,6 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
460 return 0; 464 return 0;
461} 465}
462 466
463void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
464{
465 struct gk20a *g = vm->mm->g;
466
467 gk20a_dbg_fn("");
468
469 gk20a_free_inst_block(g, inst_block);
470 gk20a_vm_remove_support_nofree(vm);
471}
472
473static void gk20a_vidmem_destroy(struct gk20a *g) 467static void gk20a_vidmem_destroy(struct gk20a *g)
474{ 468{
475#if defined(CONFIG_GK20A_VIDMEM) 469#if defined(CONFIG_GK20A_VIDMEM)
@@ -487,7 +481,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
487 481
488 mm->vidmem.ce_ctx_id = (u32)~0; 482 mm->vidmem.ce_ctx_id = (u32)~0;
489 483
490 gk20a_vm_remove_support_nofree(&mm->ce.vm); 484 nvgpu_vm_remove_support_nofree(&mm->ce.vm);
491 485
492} 486}
493 487
@@ -503,7 +497,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
503 497
504 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); 498 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
505 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); 499 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
506 gk20a_vm_remove_support_nofree(&mm->cde.vm); 500 nvgpu_vm_remove_support_nofree(&mm->cde.vm);
507 501
508 gk20a_semaphore_sea_destroy(g); 502 gk20a_semaphore_sea_destroy(g);
509 gk20a_vidmem_destroy(g); 503 gk20a_vidmem_destroy(g);
@@ -1102,7 +1096,7 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
1102 return NULL; 1096 return NULL;
1103} 1097}
1104 1098
1105int gk20a_vm_get_buffers(struct vm_gk20a *vm, 1099int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
1106 struct mapped_buffer_node ***mapped_buffers, 1100 struct mapped_buffer_node ***mapped_buffers,
1107 int *num_buffers) 1101 int *num_buffers)
1108{ 1102{
@@ -1151,37 +1145,10 @@ static void gk20a_vm_unmap_locked_kref(struct kref *ref)
1151{ 1145{
1152 struct mapped_buffer_node *mapped_buffer = 1146 struct mapped_buffer_node *mapped_buffer =
1153 container_of(ref, struct mapped_buffer_node, ref); 1147 container_of(ref, struct mapped_buffer_node, ref);
1154 gk20a_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); 1148 nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
1155}
1156
1157void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
1158{
1159 memset(mapping_batch, 0, sizeof(*mapping_batch));
1160 mapping_batch->gpu_l2_flushed = false;
1161 mapping_batch->need_tlb_invalidate = false;
1162} 1149}
1163 1150
1164void gk20a_vm_mapping_batch_finish_locked( 1151void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
1165 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
1166{
1167 /* hanging kref_put batch pointer? */
1168 WARN_ON(vm->kref_put_batch == mapping_batch);
1169
1170 if (mapping_batch->need_tlb_invalidate) {
1171 struct gk20a *g = gk20a_from_vm(vm);
1172 g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1173 }
1174}
1175
1176void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm,
1177 struct vm_gk20a_mapping_batch *mapping_batch)
1178{
1179 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1180 gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch);
1181 nvgpu_mutex_release(&vm->update_gmmu_lock);
1182}
1183
1184void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1185 struct mapped_buffer_node **mapped_buffers, 1152 struct mapped_buffer_node **mapped_buffers,
1186 int num_buffers) 1153 int num_buffers)
1187{ 1154{
@@ -1192,7 +1159,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1192 return; 1159 return;
1193 1160
1194 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1161 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1195 gk20a_vm_mapping_batch_start(&batch); 1162 nvgpu_vm_mapping_batch_start(&batch);
1196 vm->kref_put_batch = &batch; 1163 vm->kref_put_batch = &batch;
1197 1164
1198 for (i = 0; i < num_buffers; ++i) 1165 for (i = 0; i < num_buffers; ++i)
@@ -1200,13 +1167,13 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1200 gk20a_vm_unmap_locked_kref); 1167 gk20a_vm_unmap_locked_kref);
1201 1168
1202 vm->kref_put_batch = NULL; 1169 vm->kref_put_batch = NULL;
1203 gk20a_vm_mapping_batch_finish_locked(vm, &batch); 1170 nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
1204 nvgpu_mutex_release(&vm->update_gmmu_lock); 1171 nvgpu_mutex_release(&vm->update_gmmu_lock);
1205 1172
1206 nvgpu_big_free(vm->mm->g, mapped_buffers); 1173 nvgpu_big_free(vm->mm->g, mapped_buffers);
1207} 1174}
1208 1175
1209static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, 1176static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
1210 struct vm_gk20a_mapping_batch *batch) 1177 struct vm_gk20a_mapping_batch *batch)
1211{ 1178{
1212 struct gk20a *g = vm->mm->g; 1179 struct gk20a *g = vm->mm->g;
@@ -1650,7 +1617,7 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1650 } 1617 }
1651} 1618}
1652 1619
1653static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, 1620static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm,
1654 struct dma_buf *dmabuf, 1621 struct dma_buf *dmabuf,
1655 u64 offset_align, 1622 u64 offset_align,
1656 u32 flags, 1623 u32 flags,
@@ -1997,7 +1964,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
1997 return align; 1964 return align;
1998} 1965}
1999 1966
2000u64 gk20a_vm_map(struct vm_gk20a *vm, 1967u64 nvgpu_vm_map(struct vm_gk20a *vm,
2001 struct dma_buf *dmabuf, 1968 struct dma_buf *dmabuf,
2002 u64 offset_align, 1969 u64 offset_align,
2003 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, 1970 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
@@ -2038,7 +2005,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2038 2005
2039 /* check if this buffer is already mapped */ 2006 /* check if this buffer is already mapped */
2040 if (!vm->userspace_managed) { 2007 if (!vm->userspace_managed) {
2041 map_offset = gk20a_vm_map_duplicate_locked( 2008 map_offset = nvgpu_vm_map_duplicate_locked(
2042 vm, dmabuf, offset_align, 2009 vm, dmabuf, offset_align,
2043 flags, kind, sgt, 2010 flags, kind, sgt,
2044 user_mapped, rw_flag); 2011 user_mapped, rw_flag);
@@ -2256,7 +2223,7 @@ clean_up:
2256 return 0; 2223 return 0;
2257} 2224}
2258 2225
2259int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, 2226int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
2260 u64 mapping_gva, 2227 u64 mapping_gva,
2261 u64 *compbits_win_size, 2228 u64 *compbits_win_size,
2262 u32 *compbits_win_ctagline, 2229 u32 *compbits_win_ctagline,
@@ -2298,7 +2265,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
2298} 2265}
2299 2266
2300 2267
2301int gk20a_vm_map_compbits(struct vm_gk20a *vm, 2268int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
2302 u64 mapping_gva, 2269 u64 mapping_gva,
2303 u64 *compbits_win_gva, 2270 u64 *compbits_win_gva,
2304 u64 *mapping_iova, 2271 u64 *mapping_iova,
@@ -3059,7 +3026,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3059} 3026}
3060 3027
3061/* NOTE! mapped_buffers lock must be held */ 3028/* NOTE! mapped_buffers lock must be held */
3062void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, 3029void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3063 struct vm_gk20a_mapping_batch *batch) 3030 struct vm_gk20a_mapping_batch *batch)
3064{ 3031{
3065 struct vm_gk20a *vm = mapped_buffer->vm; 3032 struct vm_gk20a *vm = mapped_buffer->vm;
@@ -3115,7 +3082,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3115 return; 3082 return;
3116} 3083}
3117 3084
3118void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) 3085void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
3119{ 3086{
3120 struct gk20a *g = vm->mm->g; 3087 struct gk20a *g = vm->mm->g;
3121 struct mapped_buffer_node *mapped_buffer; 3088 struct mapped_buffer_node *mapped_buffer;
@@ -3148,76 +3115,6 @@ static void gk20a_vm_free_entries(struct vm_gk20a *vm,
3148 parent->entries = NULL; 3115 parent->entries = NULL;
3149} 3116}
3150 3117
3151static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
3152{
3153 struct mapped_buffer_node *mapped_buffer;
3154 struct vm_reserved_va_node *va_node, *va_node_tmp;
3155 struct nvgpu_rbtree_node *node = NULL;
3156 struct gk20a *g = vm->mm->g;
3157
3158 gk20a_dbg_fn("");
3159
3160 /*
3161 * Do this outside of the update_gmmu_lock since unmapping the semaphore
3162 * pool involves unmapping a GMMU mapping which means aquiring the
3163 * update_gmmu_lock.
3164 */
3165 if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) {
3166 if (vm->sema_pool) {
3167 nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
3168 nvgpu_semaphore_pool_put(vm->sema_pool);
3169 }
3170 }
3171
3172 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3173
3174 /* TBD: add a flag here for the unmap code to recognize teardown
3175 * and short-circuit any otherwise expensive operations. */
3176
3177 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
3178 while (node) {
3179 mapped_buffer = mapped_buffer_from_rbtree_node(node);
3180 gk20a_vm_unmap_locked(mapped_buffer, NULL);
3181 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
3182 }
3183
3184 /* destroy remaining reserved memory areas */
3185 nvgpu_list_for_each_entry_safe(va_node, va_node_tmp,
3186 &vm->reserved_va_list,
3187 vm_reserved_va_node, reserved_va_list) {
3188 nvgpu_list_del(&va_node->reserved_va_list);
3189 nvgpu_kfree(vm->mm->g, va_node);
3190 }
3191
3192 gk20a_deinit_vm(vm);
3193
3194 nvgpu_mutex_release(&vm->update_gmmu_lock);
3195}
3196
3197void gk20a_vm_remove_support(struct vm_gk20a *vm)
3198{
3199 gk20a_vm_remove_support_nofree(vm);
3200 /* vm is not used anymore. release it. */
3201 nvgpu_kfree(vm->mm->g, vm);
3202}
3203
3204static void gk20a_vm_remove_support_kref(struct kref *ref)
3205{
3206 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
3207 struct gk20a *g = gk20a_from_vm(vm);
3208 g->ops.mm.vm_remove(vm);
3209}
3210
3211void gk20a_vm_get(struct vm_gk20a *vm)
3212{
3213 kref_get(&vm->ref);
3214}
3215
3216void gk20a_vm_put(struct vm_gk20a *vm)
3217{
3218 kref_put(&vm->ref, gk20a_vm_remove_support_kref);
3219}
3220
3221const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { 3118const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
3222 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, 3119 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
3223 .lo_bit = {26, 26}, 3120 .lo_bit = {26, 26},
@@ -3284,7 +3181,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
3284 SZ_4K); 3181 SZ_4K);
3285 if (!sema_sea->gpu_va) { 3182 if (!sema_sea->gpu_va) {
3286 nvgpu_free(&vm->kernel, sema_sea->gpu_va); 3183 nvgpu_free(&vm->kernel, sema_sea->gpu_va);
3287 gk20a_vm_put(vm); 3184 nvgpu_vm_put(vm);
3288 return -ENOMEM; 3185 return -ENOMEM;
3289 } 3186 }
3290 3187
@@ -3408,7 +3305,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm)
3408} 3305}
3409 3306
3410/** 3307/**
3411 * gk20a_init_vm() - Initialize an address space. 3308 * nvgpu_init_vm() - Initialize an address space.
3412 * 3309 *
3413 * @mm - Parent MM. 3310 * @mm - Parent MM.
3414 * @vm - The VM to init. 3311 * @vm - The VM to init.
@@ -3443,7 +3340,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm)
3443 * such cases the @kernel_reserved and @low_hole should sum to exactly 3340 * such cases the @kernel_reserved and @low_hole should sum to exactly
3444 * @aperture_size. 3341 * @aperture_size.
3445 */ 3342 */
3446int gk20a_init_vm(struct mm_gk20a *mm, 3343int nvgpu_init_vm(struct mm_gk20a *mm,
3447 struct vm_gk20a *vm, 3344 struct vm_gk20a *vm,
3448 u32 big_page_size, 3345 u32 big_page_size,
3449 u64 low_hole, 3346 u64 low_hole,
@@ -3683,7 +3580,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
3683 3580
3684 snprintf(name, sizeof(name), "as_%d", as_share->id); 3581 snprintf(name, sizeof(name), "as_%d", as_share->id);
3685 3582
3686 err = gk20a_init_vm(mm, vm, big_page_size, 3583 err = nvgpu_init_vm(mm, vm, big_page_size,
3687 big_page_size << 10, 3584 big_page_size << 10,
3688 mm->channel.kernel_size, 3585 mm->channel.kernel_size,
3689 mm->channel.user_size + mm->channel.kernel_size, 3586 mm->channel.user_size + mm->channel.kernel_size,
@@ -3701,7 +3598,7 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
3701 vm->as_share = NULL; 3598 vm->as_share = NULL;
3702 as_share->vm = NULL; 3599 as_share->vm = NULL;
3703 3600
3704 gk20a_vm_put(vm); 3601 nvgpu_vm_put(vm);
3705 3602
3706 return 0; 3603 return 0;
3707} 3604}
@@ -3864,7 +3761,7 @@ int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
3864 3761
3865 gk20a_dbg_fn(""); 3762 gk20a_dbg_fn("");
3866 3763
3867 gk20a_vm_get(vm); 3764 nvgpu_vm_get(vm);
3868 ch->vm = vm; 3765 ch->vm = vm;
3869 err = channel_gk20a_commit_va(ch); 3766 err = channel_gk20a_commit_va(ch);
3870 if (err) 3767 if (err)
@@ -3960,7 +3857,7 @@ out:
3960 3857
3961} 3858}
3962 3859
3963int gk20a_vm_map_buffer(struct vm_gk20a *vm, 3860int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
3964 int dmabuf_fd, 3861 int dmabuf_fd,
3965 u64 *offset_align, 3862 u64 *offset_align,
3966 u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ 3863 u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
@@ -3989,7 +3886,7 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
3989 return err; 3886 return err;
3990 } 3887 }
3991 3888
3992 ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, 3889 ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
3993 flags, kind, NULL, true, 3890 flags, kind, NULL, true,
3994 gk20a_mem_flag_none, 3891 gk20a_mem_flag_none,
3995 buffer_offset, 3892 buffer_offset,
@@ -4005,16 +3902,16 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
4005 return err; 3902 return err;
4006} 3903}
4007 3904
4008int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, 3905int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
4009 struct vm_gk20a_mapping_batch *batch) 3906 struct vm_gk20a_mapping_batch *batch)
4010{ 3907{
4011 gk20a_dbg_fn(""); 3908 gk20a_dbg_fn("");
4012 3909
4013 gk20a_vm_unmap_user(vm, offset, batch); 3910 nvgpu_vm_unmap_user(vm, offset, batch);
4014 return 0; 3911 return 0;
4015} 3912}
4016 3913
4017void gk20a_deinit_vm(struct vm_gk20a *vm) 3914void nvgpu_deinit_vm(struct vm_gk20a *vm)
4018{ 3915{
4019 if (nvgpu_alloc_initialized(&vm->kernel)) 3916 if (nvgpu_alloc_initialized(&vm->kernel))
4020 nvgpu_alloc_destroy(&vm->kernel); 3917 nvgpu_alloc_destroy(&vm->kernel);
@@ -4069,7 +3966,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
4069 3966
4070 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 3967 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
4071 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 3968 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
4072 gk20a_init_vm(mm, vm, 3969 nvgpu_init_vm(mm, vm,
4073 big_page_size, 3970 big_page_size,
4074 SZ_4K, /* Low hole */ 3971 SZ_4K, /* Low hole */
4075 mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */ 3972 mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */
@@ -4085,7 +3982,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
4085 return 0; 3982 return 0;
4086 3983
4087clean_up_va: 3984clean_up_va:
4088 gk20a_deinit_vm(vm); 3985 nvgpu_deinit_vm(vm);
4089 return err; 3986 return err;
4090} 3987}
4091 3988
@@ -4108,7 +4005,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
4108 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 4005 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
4109 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 4006 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
4110 4007
4111 gk20a_init_vm(mm, vm, big_page_size, 4008 nvgpu_init_vm(mm, vm, big_page_size,
4112 low_hole, 4009 low_hole,
4113 aperture_size - low_hole, 4010 aperture_size - low_hole,
4114 aperture_size, 4011 aperture_size,
@@ -4124,7 +4021,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
4124 return 0; 4021 return 0;
4125 4022
4126clean_up_va: 4023clean_up_va:
4127 gk20a_deinit_vm(vm); 4024 nvgpu_deinit_vm(vm);
4128 return err; 4025 return err;
4129} 4026}
4130 4027
@@ -4149,7 +4046,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
4149 struct gk20a *g = gk20a_from_mm(mm); 4046 struct gk20a *g = gk20a_from_mm(mm);
4150 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 4047 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
4151 4048
4152 return gk20a_init_vm(mm, vm, big_page_size, 4049 return nvgpu_init_vm(mm, vm, big_page_size,
4153 big_page_size << 10, 4050 big_page_size << 10,
4154 NV_MM_DEFAULT_KERNEL_SIZE, 4051 NV_MM_DEFAULT_KERNEL_SIZE,
4155 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, 4052 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -4162,7 +4059,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
4162 struct gk20a *g = gk20a_from_mm(mm); 4059 struct gk20a *g = gk20a_from_mm(mm);
4163 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 4060 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
4164 4061
4165 return gk20a_init_vm(mm, vm, big_page_size, 4062 return nvgpu_init_vm(mm, vm, big_page_size,
4166 big_page_size << 10, 4063 big_page_size << 10,
4167 NV_MM_DEFAULT_KERNEL_SIZE, 4064 NV_MM_DEFAULT_KERNEL_SIZE,
4168 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, 4065 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
@@ -4399,7 +4296,7 @@ hw_was_off:
4399 gk20a_idle_nosuspend(g->dev); 4296 gk20a_idle_nosuspend(g->dev);
4400} 4297}
4401 4298
4402int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, 4299int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
4403 struct dma_buf **dmabuf, 4300 struct dma_buf **dmabuf,
4404 u64 *offset) 4301 u64 *offset)
4405{ 4302{
@@ -4503,7 +4400,7 @@ void gk20a_init_mm(struct gpu_ops *gops)
4503{ 4400{
4504 gops->mm.gmmu_map = gk20a_locked_gmmu_map; 4401 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
4505 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; 4402 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
4506 gops->mm.vm_remove = gk20a_vm_remove_support; 4403 gops->mm.vm_remove = nvgpu_vm_remove_support;
4507 gops->mm.vm_alloc_share = gk20a_vm_alloc_share; 4404 gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
4508 gops->mm.vm_bind_channel = gk20a_vm_bind_channel; 4405 gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
4509 gops->mm.fb_flush = gk20a_mm_fb_flush; 4406 gops->mm.fb_flush = gk20a_mm_fb_flush;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 0a102cb2..331843cc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -26,6 +26,7 @@
26 26
27#include <nvgpu/nvgpu_mem.h> 27#include <nvgpu/nvgpu_mem.h>
28#include <nvgpu/allocator.h> 28#include <nvgpu/allocator.h>
29#include <nvgpu/vm.h>
29#include <nvgpu/list.h> 30#include <nvgpu/list.h>
30#include <nvgpu/rbtree.h> 31#include <nvgpu/rbtree.h>
31#include <nvgpu/kref.h> 32#include <nvgpu/kref.h>
@@ -116,13 +117,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
116 ((uintptr_t)node - offsetof(struct gk20a_buffer_state, list)); 117 ((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
117}; 118};
118 119
119enum gmmu_pgsz_gk20a {
120 gmmu_page_size_small = 0,
121 gmmu_page_size_big = 1,
122 gmmu_page_size_kernel = 2,
123 gmmu_nr_page_sizes = 3,
124};
125
126struct gk20a_comptags { 120struct gk20a_comptags {
127 u32 offset; 121 u32 offset;
128 u32 lines; 122 u32 lines;
@@ -130,15 +124,6 @@ struct gk20a_comptags {
130 bool user_mappable; 124 bool user_mappable;
131}; 125};
132 126
133struct gk20a_mm_entry {
134 /* backing for */
135 struct nvgpu_mem mem;
136 u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
137 int pgsz;
138 struct gk20a_mm_entry *entries;
139 int num_entries;
140};
141
142struct priv_cmd_queue { 127struct priv_cmd_queue {
143 struct nvgpu_mem mem; 128 struct nvgpu_mem mem;
144 u32 size; /* num of entries in words */ 129 u32 size; /* num of entries in words */
@@ -214,84 +199,6 @@ vm_reserved_va_node_from_reserved_va_list(struct nvgpu_list_node *node)
214 ((uintptr_t)node - offsetof(struct vm_reserved_va_node, reserved_va_list)); 199 ((uintptr_t)node - offsetof(struct vm_reserved_va_node, reserved_va_list));
215}; 200};
216 201
217struct gk20a_mmu_level {
218 int hi_bit[2];
219 int lo_bit[2];
220 int (*update_entry)(struct vm_gk20a *vm,
221 struct gk20a_mm_entry *pte,
222 u32 i, u32 gmmu_pgsz_idx,
223 struct scatterlist **sgl,
224 u64 *offset,
225 u64 *iova,
226 u32 kind_v, u64 *ctag,
227 bool cacheable, bool unmapped_pte,
228 int rw_flag, bool sparse, bool priv,
229 enum nvgpu_aperture aperture);
230 size_t entry_size;
231};
232
233/* map/unmap batch state */
234struct vm_gk20a_mapping_batch
235{
236 bool gpu_l2_flushed;
237 bool need_tlb_invalidate;
238};
239
240struct vm_gk20a {
241 struct mm_gk20a *mm;
242 struct gk20a_as_share *as_share; /* as_share this represents */
243
244 u64 va_start;
245 u64 va_limit;
246
247 int num_user_mapped_buffers;
248
249 bool big_pages; /* enable large page support */
250 bool enable_ctag;
251 bool mapped;
252
253 u32 big_page_size;
254
255 bool userspace_managed;
256
257 const struct gk20a_mmu_level *mmu_levels;
258
259 struct kref ref;
260
261 struct nvgpu_mutex update_gmmu_lock;
262
263 struct gk20a_mm_entry pdb;
264
265 /*
266 * These structs define the address spaces. In some cases it's possible
267 * to merge address spaces (user and user_lp) and in other cases it's
268 * not. vma[] allows the code to be agnostic to this by always using
269 * address spaces through this pointer array.
270 */
271 struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
272 struct nvgpu_allocator kernel;
273 struct nvgpu_allocator user;
274 struct nvgpu_allocator user_lp;
275
276 struct nvgpu_rbtree_node *mapped_buffers;
277
278 struct nvgpu_list_node reserved_va_list;
279
280#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
281 u64 handle;
282#endif
283 u32 gmmu_page_sizes[gmmu_nr_page_sizes];
284
285 /* if non-NULL, kref_put will use this batch when
286 unmapping. Must hold vm->update_gmmu_lock. */
287 struct vm_gk20a_mapping_batch *kref_put_batch;
288
289 /*
290 * Each address space needs to have a semaphore pool.
291 */
292 struct nvgpu_semaphore_pool *sema_pool;
293};
294
295struct gk20a; 202struct gk20a;
296struct channel_gk20a; 203struct channel_gk20a;
297 204
@@ -562,57 +469,13 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
562void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, 469void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
563 struct sg_table *sgt); 470 struct sg_table *sgt);
564 471
565u64 gk20a_vm_map(struct vm_gk20a *vm, 472int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
566 struct dma_buf *dmabuf,
567 u64 offset_align,
568 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
569 int kind,
570 struct sg_table **sgt,
571 bool user_mapped,
572 int rw_flag,
573 u64 buffer_offset,
574 u64 mapping_size,
575 struct vm_gk20a_mapping_batch *mapping_batch);
576
577int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
578 u64 mapping_gva, 473 u64 mapping_gva,
579 u64 *compbits_win_size, 474 u64 *compbits_win_size,
580 u32 *compbits_win_ctagline, 475 u32 *compbits_win_ctagline,
581 u32 *mapping_ctagline, 476 u32 *mapping_ctagline,
582 u32 *flags); 477 u32 *flags);
583 478
584int gk20a_vm_map_compbits(struct vm_gk20a *vm,
585 u64 mapping_gva,
586 u64 *compbits_win_gva,
587 u64 *mapping_iova,
588 u32 flags);
589
590/* unmap handle from kernel */
591void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
592
593void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
594 struct vm_gk20a_mapping_batch *batch);
595
596/* get reference to all currently mapped buffers */
597int gk20a_vm_get_buffers(struct vm_gk20a *vm,
598 struct mapped_buffer_node ***mapped_buffers,
599 int *num_buffers);
600
601/* put references on the given buffers */
602void gk20a_vm_put_buffers(struct vm_gk20a *vm,
603 struct mapped_buffer_node **mapped_buffers,
604 int num_buffers);
605
606/* find buffer corresponding to va */
607int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
608 struct dma_buf **dmabuf,
609 u64 *offset);
610
611void gk20a_vm_get(struct vm_gk20a *vm);
612void gk20a_vm_put(struct vm_gk20a *vm);
613
614void gk20a_vm_remove_support(struct vm_gk20a *vm);
615
616u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 479u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
617 u64 size, 480 u64 size,
618 enum gmmu_pgsz_gk20a gmmu_pgsz_idx); 481 enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
@@ -635,44 +498,11 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
635 struct channel_gk20a *ch); 498 struct channel_gk20a *ch);
636int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch); 499int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
637 500
638/* batching eliminates redundant cache flushes and invalidates */
639void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
640void gk20a_vm_mapping_batch_finish(
641 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
642/* called when holding vm->update_gmmu_lock */
643void gk20a_vm_mapping_batch_finish_locked(
644 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
645
646
647int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes); 501int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
648int gk20a_vidmem_get_space(struct gk20a *g, u64 *space); 502int gk20a_vidmem_get_space(struct gk20a *g, u64 *space);
649int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, 503int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
650 void *buffer, u64 offset, u64 size, u32 cmd); 504 void *buffer, u64 offset, u64 size, u32 cmd);
651 505
652/* Note: batch may be NULL if map op is not part of a batch */
653int gk20a_vm_map_buffer(struct vm_gk20a *vm,
654 int dmabuf_fd,
655 u64 *offset_align,
656 u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
657 int kind,
658 u64 buffer_offset,
659 u64 mapping_size,
660 struct vm_gk20a_mapping_batch *batch);
661
662int gk20a_init_vm(struct mm_gk20a *mm,
663 struct vm_gk20a *vm,
664 u32 big_page_size,
665 u64 low_hole,
666 u64 kernel_reserved,
667 u64 aperture_size,
668 bool big_pages,
669 bool userspace_managed,
670 char *name);
671void gk20a_deinit_vm(struct vm_gk20a *vm);
672
673/* Note: batch may be NULL if unmap op is not part of a batch */
674int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
675 struct vm_gk20a_mapping_batch *batch);
676void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, 506void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
677 struct gk20a_comptags *comptags); 507 struct gk20a_comptags *comptags);
678dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); 508dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index d1e667b6..f9884cfb 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -289,7 +289,7 @@ void gk20a_tsg_release(struct kref *ref)
289 tsg->tsg_gr_ctx = NULL; 289 tsg->tsg_gr_ctx = NULL;
290 } 290 }
291 if (tsg->vm) { 291 if (tsg->vm) {
292 gk20a_vm_put(tsg->vm); 292 nvgpu_vm_put(tsg->vm);
293 tsg->vm = NULL; 293 tsg->vm = NULL;
294 } 294 }
295 295
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index bf3dd240..1405ef30 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -60,7 +60,7 @@ void gm20b_init_mm(struct gpu_ops *gops)
60 gops->mm.support_sparse = gm20b_mm_support_sparse; 60 gops->mm.support_sparse = gm20b_mm_support_sparse;
61 gops->mm.gmmu_map = gk20a_locked_gmmu_map; 61 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
62 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; 62 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
63 gops->mm.vm_remove = gk20a_vm_remove_support; 63 gops->mm.vm_remove = nvgpu_vm_remove_support;
64 gops->mm.vm_alloc_share = gk20a_vm_alloc_share; 64 gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
65 gops->mm.vm_bind_channel = gk20a_vm_bind_channel; 65 gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
66 gops->mm.fb_flush = gk20a_mm_fb_flush; 66 gops->mm.fb_flush = gk20a_mm_fb_flush;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 6b7f63b3..eab51175 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -74,7 +74,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
74 /* BAR2 aperture size is 32MB */ 74 /* BAR2 aperture size is 32MB */
75 mm->bar2.aperture_size = 32 << 20; 75 mm->bar2.aperture_size = 32 << 20;
76 gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size); 76 gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
77 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 77 nvgpu_init_vm(mm, vm, big_page_size, SZ_4K,
78 mm->bar2.aperture_size - SZ_4K, 78 mm->bar2.aperture_size - SZ_4K,
79 mm->bar2.aperture_size, false, false, "bar2"); 79 mm->bar2.aperture_size, false, false, "bar2");
80 80
@@ -88,7 +88,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
88 return 0; 88 return 0;
89 89
90clean_up_va: 90clean_up_va:
91 gk20a_deinit_vm(vm); 91 nvgpu_deinit_vm(vm);
92 return err; 92 return err;
93} 93}
94 94
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 7fb0147e..6d8aa025 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -18,6 +18,9 @@
18#define __NVGPU_GMMU_H__ 18#define __NVGPU_GMMU_H__
19 19
20#include <nvgpu/types.h> 20#include <nvgpu/types.h>
21#include <nvgpu/nvgpu_mem.h>
22
23struct scatterlist;
21 24
22/* 25/*
23 * This is the GMMU API visible to blocks outside of the GMMU. Basically this 26 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
@@ -28,7 +31,37 @@
28struct vm_gk20a; 31struct vm_gk20a;
29struct nvgpu_mem; 32struct nvgpu_mem;
30 33
31enum nvgpu_aperture; 34enum gmmu_pgsz_gk20a {
35 gmmu_page_size_small = 0,
36 gmmu_page_size_big = 1,
37 gmmu_page_size_kernel = 2,
38 gmmu_nr_page_sizes = 3,
39};
40
41struct gk20a_mm_entry {
42 /* backing for */
43 struct nvgpu_mem mem;
44 u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
45 int pgsz;
46 struct gk20a_mm_entry *entries;
47 int num_entries;
48};
49
50struct gk20a_mmu_level {
51 int hi_bit[2];
52 int lo_bit[2];
53 int (*update_entry)(struct vm_gk20a *vm,
54 struct gk20a_mm_entry *pte,
55 u32 i, u32 gmmu_pgsz_idx,
56 struct scatterlist **sgl,
57 u64 *offset,
58 u64 *iova,
59 u32 kind_v, u64 *ctag,
60 bool cacheable, bool unmapped_pte,
61 int rw_flag, bool sparse, bool priv,
62 enum nvgpu_aperture aperture);
63 size_t entry_size;
64};
32 65
33/** 66/**
34 * nvgpu_gmmu_map - Map memory into the GMMU. 67 * nvgpu_gmmu_map - Map memory into the GMMU.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
new file mode 100644
index 00000000..1fb772d5
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -0,0 +1,144 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_VM_H__
18#define __NVGPU_VM_H__
19
20#include <nvgpu/kref.h>
21#include <nvgpu/list.h>
22#include <nvgpu/rbtree.h>
23#include <nvgpu/types.h>
24#include <nvgpu/gmmu.h>
25#include <nvgpu/nvgpu_mem.h>
26#include <nvgpu/allocator.h>
27
28struct vm_gk20a;
29struct mapped_buffer_node;
30
31/**
32 * This header contains the OS agnostic APIs for dealing with VMs. Most of the
33 * VM implementation is system specific - it must translate from a platform's
34 * representation of DMA'able memory to our nvgpu_mem notion.
35 *
36 * However, some stuff is platform agnostic. VM ref-counting and the VM struct
37 * itself are platform agnostic. Also, the initialization and destruction of
38 * VMs is the same across all platforms (for now).
39 */
40
41/* map/unmap batch state */
42struct vm_gk20a_mapping_batch {
43 bool gpu_l2_flushed;
44 bool need_tlb_invalidate;
45};
46
47struct vm_gk20a {
48 struct mm_gk20a *mm;
49 struct gk20a_as_share *as_share; /* as_share this represents */
50
51 u64 va_start;
52 u64 va_limit;
53
54 int num_user_mapped_buffers;
55
56 bool big_pages; /* enable large page support */
57 bool enable_ctag;
58 bool mapped;
59
60 u32 big_page_size;
61
62 bool userspace_managed;
63
64 const struct gk20a_mmu_level *mmu_levels;
65
66 struct kref ref;
67
68 struct nvgpu_mutex update_gmmu_lock;
69
70 struct gk20a_mm_entry pdb;
71
72 /*
73 * These structs define the address spaces. In some cases it's possible
74 * to merge address spaces (user and user_lp) and in other cases it's
75 * not. vma[] allows the code to be agnostic to this by always using
76 * address spaces through this pointer array.
77 */
78 struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
79 struct nvgpu_allocator kernel;
80 struct nvgpu_allocator user;
81 struct nvgpu_allocator user_lp;
82
83 struct nvgpu_rbtree_node *mapped_buffers;
84
85 struct nvgpu_list_node reserved_va_list;
86
87#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
88 u64 handle;
89#endif
90 u32 gmmu_page_sizes[gmmu_nr_page_sizes];
91
92 /* if non-NULL, kref_put will use this batch when
93 unmapping. Must hold vm->update_gmmu_lock. */
94 struct vm_gk20a_mapping_batch *kref_put_batch;
95
96 /*
97 * Each address space needs to have a semaphore pool.
98 */
99 struct nvgpu_semaphore_pool *sema_pool;
100};
101
102void nvgpu_vm_get(struct vm_gk20a *vm);
103void nvgpu_vm_put(struct vm_gk20a *vm);
104
105/* batching eliminates redundant cache flushes and invalidates */
106void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
107void nvgpu_vm_mapping_batch_finish(
108 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
109/* called when holding vm->update_gmmu_lock */
110void nvgpu_vm_mapping_batch_finish_locked(
111 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
112
113/* get reference to all currently mapped buffers */
114int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
115 struct mapped_buffer_node ***mapped_buffers,
116 int *num_buffers);
117
118/* put references on the given buffers */
119void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
120 struct mapped_buffer_node **mapped_buffers,
121 int num_buffers);
122
123/* Note: batch may be NULL if unmap op is not part of a batch */
124int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
125 struct vm_gk20a_mapping_batch *batch);
126
127void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
128 struct vm_gk20a_mapping_batch *batch);
129
130void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm);
131void nvgpu_vm_remove_support(struct vm_gk20a *vm);
132
133int nvgpu_init_vm(struct mm_gk20a *mm,
134 struct vm_gk20a *vm,
135 u32 big_page_size,
136 u64 low_hole,
137 u64 kernel_reserved,
138 u64 aperture_size,
139 bool big_pages,
140 bool userspace_managed,
141 char *name);
142void nvgpu_deinit_vm(struct vm_gk20a *vm);
143
144#endif
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index c7960c1f..dd515f41 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -526,7 +526,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c,
526 } else { 526 } else {
527 if (!tsg->tsg_gr_ctx) { 527 if (!tsg->tsg_gr_ctx) {
528 tsg->vm = c->vm; 528 tsg->vm = c->vm;
529 gk20a_vm_get(tsg->vm); 529 nvgpu_vm_get(tsg->vm);
530 err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx, 530 err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
531 c->vm, 531 c->vm,
532 args->class_num, 532 args->class_num,
@@ -536,7 +536,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c,
536 if (err) { 536 if (err) {
537 nvgpu_err(g, 537 nvgpu_err(g,
538 "fail to allocate TSG gr ctx buffer, err=%d", err); 538 "fail to allocate TSG gr ctx buffer, err=%d", err);
539 gk20a_vm_put(tsg->vm); 539 nvgpu_vm_put(tsg->vm);
540 tsg->vm = NULL; 540 tsg->vm = NULL;
541 goto out; 541 goto out;
542 } 542 }
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 7e42c198..cfa9e428 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -22,6 +22,8 @@
22#include "vgpu/vgpu.h" 22#include "vgpu/vgpu.h"
23#include "gk20a/mm_gk20a.h" 23#include "gk20a/mm_gk20a.h"
24 24
25#include "common/linux/vm_priv.h"
26
25static int vgpu_init_mm_setup_sw(struct gk20a *g) 27static int vgpu_init_mm_setup_sw(struct gk20a *g)
26{ 28{
27 struct mm_gk20a *mm = &g->mm; 29 struct mm_gk20a *mm = &g->mm;
@@ -216,7 +218,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
216 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); 218 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
217 while (node) { 219 while (node) {
218 mapped_buffer = mapped_buffer_from_rbtree_node(node); 220 mapped_buffer = mapped_buffer_from_rbtree_node(node);
219 gk20a_vm_unmap_locked(mapped_buffer, NULL); 221 nvgpu_vm_unmap_locked(mapped_buffer, NULL);
220 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); 222 nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
221 } 223 }
222 224
@@ -454,7 +456,7 @@ static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
454 } 456 }
455 457
456 if (ch->vm) 458 if (ch->vm)
457 gk20a_vm_get(ch->vm); 459 nvgpu_vm_get(ch->vm);
458 460
459 return err; 461 return err;
460} 462}