summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_dbg.c2
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/mm.c426
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c11
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c441
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h208
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c4
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c3
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c1
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c1
-rw-r--r--drivers/gpu/nvgpu/gp106/sec2_gp106.c5
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c7
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/mm.h220
25 files changed, 724 insertions, 664 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index ce4f67b0..e689aa7f 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -65,6 +65,7 @@ nvgpu-y := \
65 common/mm/vm_area.o \ 65 common/mm/vm_area.o \
66 common/mm/nvgpu_mem.o \ 66 common/mm/nvgpu_mem.o \
67 common/mm/comptags.o \ 67 common/mm/comptags.o \
68 common/mm/mm.o \
68 common/bus.o \ 69 common/bus.o \
69 common/enabled.o \ 70 common/enabled.o \
70 common/pramin.o \ 71 common/pramin.o \
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
index 56edc11b..c8831a97 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
@@ -1372,7 +1372,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
1372 err = g->ops.dbg_session_ops.perfbuffer_disable(g); 1372 err = g->ops.dbg_session_ops.perfbuffer_disable(g);
1373 1373
1374 nvgpu_vm_unmap_buffer(vm, offset, NULL); 1374 nvgpu_vm_unmap_buffer(vm, offset, NULL);
1375 gk20a_free_inst_block(g, &mm->perfbuf.inst_block); 1375 nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
1376 nvgpu_vm_put(vm); 1376 nvgpu_vm_put(vm);
1377 1377
1378 g->perfbuf.owner = NULL; 1378 g->perfbuf.owner = NULL;
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index c6f10a69..a2546e9d 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -25,8 +25,8 @@
25#include <nvgpu/bug.h> 25#include <nvgpu/bug.h>
26#include <nvgpu/log2.h> 26#include <nvgpu/log2.h>
27#include <nvgpu/barrier.h> 27#include <nvgpu/barrier.h>
28 28#include <nvgpu/mm.h>
29#include "gk20a/mm_gk20a.h" 29#include <nvgpu/vm.h>
30 30
31#include "buddy_allocator_priv.h" 31#include "buddy_allocator_priv.h"
32 32
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
new file mode 100644
index 00000000..1027ed28
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,426 @@
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
6 * and/or sell copies of the Software, and to permit persons to whom the
7 * Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
18 * DEALINGS IN THE SOFTWARE.
19 */
20
21#include <nvgpu/mm.h>
22#include <nvgpu/vm.h>
23#include <nvgpu/dma.h>
24#include <nvgpu/vm_area.h>
25#include <nvgpu/gmmu.h>
26#include <nvgpu/vidmem.h>
27#include <nvgpu/semaphore.h>
28#include <nvgpu/pramin.h>
29#include <nvgpu/enabled.h>
30
31#include "gk20a/gk20a.h"
32
33/*
34 * Attempt to find a reserved memory area to determine PTE size for the passed
35 * mapping. If no reserved area can be found use small pages.
36 */
37enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
38 u64 base, u64 size)
39{
40 struct nvgpu_vm_area *vm_area;
41
42 vm_area = nvgpu_vm_area_find(vm, base);
43 if (!vm_area)
44 return gmmu_page_size_small;
45
46 return vm_area->pgsz_idx;
47}
48
49/*
50 * This is for when the address space does not support unified address spaces.
51 */
52static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
53 u64 base, u64 size)
54{
55 if (!base) {
56 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
57 return gmmu_page_size_big;
58 return gmmu_page_size_small;
59 } else {
60 if (base < __nv_gmmu_va_small_page_limit())
61 return gmmu_page_size_small;
62 else
63 return gmmu_page_size_big;
64 }
65}
66
67/*
68 * This determines the PTE size for a given alloc. Used by both the GVA space
69 * allocator and the mm core code so that agreement can be reached on how to
70 * map allocations.
71 *
72 * The page size of a buffer is this:
73 *
74 * o If the VM doesn't support large pages then obviously small pages
75 * must be used.
76 * o If the base address is non-zero (fixed address map):
77 * - Attempt to find a reserved memory area and use the page size
78 * based on that.
79 * - If no reserved page size is available, default to small pages.
80 * o If the base is zero:
81 * - If the size is larger than or equal to the big page size, use big
82 * pages.
83 * - Otherwise use small pages.
84 */
85enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
86{
87 struct gk20a *g = gk20a_from_vm(vm);
88
89 if (!vm->big_pages)
90 return gmmu_page_size_small;
91
92 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
93 return __get_pte_size_split_addr(vm, base, size);
94
95 if (base)
96 return __get_pte_size_fixed_map(vm, base, size);
97
98 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
99 return gmmu_page_size_big;
100 return gmmu_page_size_small;
101}
102
103int nvgpu_mm_suspend(struct gk20a *g)
104{
105 nvgpu_info(g, "MM suspend running...");
106
107 nvgpu_vidmem_thread_pause_sync(&g->mm);
108
109 g->ops.mm.cbc_clean(g);
110 g->ops.mm.l2_flush(g, false);
111
112 nvgpu_info(g, "MM suspend done!");
113
114 return 0;
115}
116
117u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
118{
119 if (g->mm.has_physical_mode)
120 return nvgpu_mem_get_phys_addr(g, inst_block);
121 else
122 return nvgpu_mem_get_addr(g, inst_block);
123}
124
125void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
126{
127 if (nvgpu_mem_is_valid(inst_block))
128 nvgpu_dma_free(g, inst_block);
129}
130
131static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
132{
133 return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
134}
135
136static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
137{
138 struct gk20a *g = gk20a_from_mm(mm);
139
140 if (mm->vidmem.ce_ctx_id != (u32)~0)
141 gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
142
143 mm->vidmem.ce_ctx_id = (u32)~0;
144
145 nvgpu_vm_put(mm->ce.vm);
146}
147
148static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
149{
150 struct gk20a *g = gk20a_from_mm(mm);
151
152 if (g->ops.mm.fault_info_mem_destroy)
153 g->ops.mm.fault_info_mem_destroy(g);
154
155 if (g->ops.mm.remove_bar2_vm)
156 g->ops.mm.remove_bar2_vm(g);
157
158 if (g->ops.mm.is_bar1_supported(g)) {
159 nvgpu_free_inst_block(g, &mm->bar1.inst_block);
160 nvgpu_vm_put(mm->bar1.vm);
161 }
162
163 nvgpu_free_inst_block(g, &mm->pmu.inst_block);
164 nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
165 nvgpu_vm_put(mm->pmu.vm);
166 nvgpu_vm_put(mm->cde.vm);
167
168 nvgpu_semaphore_sea_destroy(g);
169 nvgpu_vidmem_destroy(g);
170 nvgpu_pd_cache_fini(g);
171}
172
173/* pmu vm, share channel_vm interfaces */
174static int nvgpu_init_system_vm(struct mm_gk20a *mm)
175{
176 int err;
177 struct gk20a *g = gk20a_from_mm(mm);
178 struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
179 u32 big_page_size = g->ops.mm.get_default_big_page_size();
180 u32 low_hole, aperture_size;
181
182 /*
183 * No user region - so we will pass that as zero sized.
184 */
185 low_hole = SZ_4K * 16;
186 aperture_size = GK20A_PMU_VA_SIZE * 2;
187
188 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
189 nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
190
191 mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
192 low_hole,
193 aperture_size - low_hole,
194 aperture_size,
195 true,
196 false,
197 "system");
198 if (!mm->pmu.vm)
199 return -ENOMEM;
200
201 err = g->ops.mm.alloc_inst_block(g, inst_block);
202 if (err)
203 goto clean_up_vm;
204 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
205
206 return 0;
207
208clean_up_vm:
209 nvgpu_vm_put(mm->pmu.vm);
210 return err;
211}
212
213static int nvgpu_init_hwpm(struct mm_gk20a *mm)
214{
215 int err;
216 struct gk20a *g = gk20a_from_mm(mm);
217 struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
218
219 err = g->ops.mm.alloc_inst_block(g, inst_block);
220 if (err)
221 return err;
222 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
223
224 return 0;
225}
226
227static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
228{
229 struct gk20a *g = gk20a_from_mm(mm);
230 u32 big_page_size = g->ops.mm.get_default_big_page_size();
231
232 mm->cde.vm = nvgpu_vm_init(g, big_page_size,
233 big_page_size << 10,
234 NV_MM_DEFAULT_KERNEL_SIZE,
235 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
236 false, false, "cde");
237 if (!mm->cde.vm)
238 return -ENOMEM;
239 return 0;
240}
241
242static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
243{
244 struct gk20a *g = gk20a_from_mm(mm);
245 u32 big_page_size = g->ops.mm.get_default_big_page_size();
246
247 mm->ce.vm = nvgpu_vm_init(g, big_page_size,
248 big_page_size << 10,
249 NV_MM_DEFAULT_KERNEL_SIZE,
250 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
251 false, false, "ce");
252 if (!mm->ce.vm)
253 return -ENOMEM;
254 return 0;
255}
256
257void nvgpu_init_mm_ce_context(struct gk20a *g)
258{
259#if defined(CONFIG_GK20A_VIDMEM)
260 if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
261 g->mm.vidmem.ce_ctx_id =
262 gk20a_ce_create_context_with_cb(g,
263 gk20a_fifo_get_fast_ce_runlist_id(g),
264 -1,
265 -1,
266 -1,
267 NULL);
268
269 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
270 nvgpu_err(g,
271 "Failed to allocate CE context for vidmem page clearing support");
272 }
273#endif
274}
275
276static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
277{
278 if (g->ops.fb.reset)
279 g->ops.fb.reset(g);
280
281 if (g->ops.clock_gating.slcg_fb_load_gating_prod)
282 g->ops.clock_gating.slcg_fb_load_gating_prod(g,
283 g->slcg_enabled);
284 if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
285 g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
286 g->slcg_enabled);
287 if (g->ops.clock_gating.blcg_fb_load_gating_prod)
288 g->ops.clock_gating.blcg_fb_load_gating_prod(g,
289 g->blcg_enabled);
290 if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
291 g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
292 g->blcg_enabled);
293
294 if (g->ops.fb.init_fs_state)
295 g->ops.fb.init_fs_state(g);
296
297 return 0;
298}
299
300static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
301{
302 int err;
303 struct gk20a *g = gk20a_from_mm(mm);
304 struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
305 u32 big_page_size = g->ops.mm.get_default_big_page_size();
306
307 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
308 nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
309 mm->bar1.vm = nvgpu_vm_init(g,
310 big_page_size,
311 SZ_4K,
312 mm->bar1.aperture_size - SZ_4K,
313 mm->bar1.aperture_size,
314 true, false,
315 "bar1");
316 if (!mm->bar1.vm)
317 return -ENOMEM;
318
319 err = g->ops.mm.alloc_inst_block(g, inst_block);
320 if (err)
321 goto clean_up_vm;
322 g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
323
324 return 0;
325
326clean_up_vm:
327 nvgpu_vm_put(mm->bar1.vm);
328 return err;
329}
330
331static int nvgpu_init_mm_setup_sw(struct gk20a *g)
332{
333 struct mm_gk20a *mm = &g->mm;
334 int err;
335
336 if (mm->sw_ready) {
337 nvgpu_info(g, "skip init");
338 return 0;
339 }
340
341 mm->g = g;
342 nvgpu_mutex_init(&mm->l2_op_lock);
343
344 /*TBD: make channel vm size configurable */
345 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
346 NV_MM_DEFAULT_KERNEL_SIZE;
347 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
348
349 nvgpu_info(g, "channel vm size: user %dMB kernel %dMB",
350 (int)(mm->channel.user_size >> 20),
351 (int)(mm->channel.kernel_size >> 20));
352
353 nvgpu_init_pramin(mm);
354
355 mm->vidmem.ce_ctx_id = (u32)~0;
356
357 err = nvgpu_vidmem_init(mm);
358 if (err)
359 return err;
360
361 /*
362 * this requires fixed allocations in vidmem which must be
363 * allocated before all other buffers
364 */
365 if (g->ops.pmu.alloc_blob_space
366 && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
367 err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
368 if (err)
369 return err;
370 }
371
372 err = nvgpu_alloc_sysmem_flush(g);
373 if (err)
374 return err;
375
376 if (g->ops.mm.is_bar1_supported(g)) {
377 err = nvgpu_init_bar1_vm(mm);
378 if (err)
379 return err;
380 }
381 if (g->ops.mm.init_bar2_vm) {
382 err = g->ops.mm.init_bar2_vm(g);
383 if (err)
384 return err;
385 }
386 err = nvgpu_init_system_vm(mm);
387 if (err)
388 return err;
389
390 err = nvgpu_init_hwpm(mm);
391 if (err)
392 return err;
393
394 err = nvgpu_init_cde_vm(mm);
395 if (err)
396 return err;
397
398 err = nvgpu_init_ce_vm(mm);
399 if (err)
400 return err;
401
402 mm->remove_support = nvgpu_remove_mm_support;
403 mm->remove_ce_support = nvgpu_remove_mm_ce_support;
404
405 mm->sw_ready = true;
406
407 return 0;
408}
409
410int nvgpu_init_mm_support(struct gk20a *g)
411{
412 u32 err;
413
414 err = nvgpu_init_mm_reset_enable_hw(g);
415 if (err)
416 return err;
417
418 err = nvgpu_init_mm_setup_sw(g);
419 if (err)
420 return err;
421
422 if (g->ops.mm.init_mm_setup_hw)
423 err = g->ops.mm.init_mm_setup_hw(g);
424
425 return err;
426}
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 938c4b00..9b031bbf 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -24,6 +24,7 @@
24#include <nvgpu/log.h> 24#include <nvgpu/log.h>
25#include <nvgpu/soc.h> 25#include <nvgpu/soc.h>
26#include <nvgpu/bus.h> 26#include <nvgpu/bus.h>
27#include <nvgpu/mm.h>
27 28
28#include "gk20a.h" 29#include "gk20a.h"
29#include "bus_gk20a.h" 30#include "bus_gk20a.h"
@@ -137,8 +138,8 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
137 138
138int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) 139int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
139{ 140{
140 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); 141 u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
141 u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); 142 u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
142 143
143 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); 144 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
144 145
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 725ae278..e3896981 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -29,6 +29,7 @@
29#include <nvgpu/kmem.h> 29#include <nvgpu/kmem.h>
30#include <nvgpu/lock.h> 30#include <nvgpu/lock.h>
31#include <nvgpu/dma.h> 31#include <nvgpu/dma.h>
32#include <nvgpu/mm.h>
32 33
33#include "gk20a.h" 34#include "gk20a.h"
34#include "css_gr_gk20a.h" 35#include "css_gr_gk20a.h"
@@ -183,7 +184,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch,
183 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); 184 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
184 185
185 /* this field is aligned to 4K */ 186 /* this field is aligned to 4K */
186 inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; 187 inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
187 188
188 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK 189 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
189 * should be written last */ 190 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 8c39ecb7..802ccd76 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
26#include <nvgpu/log.h> 26#include <nvgpu/log.h>
27#include <nvgpu/vm.h> 27#include <nvgpu/vm.h>
28#include <nvgpu/atomic.h> 28#include <nvgpu/atomic.h>
29#include <nvgpu/mm.h>
29 30
30#include "gk20a.h" 31#include "gk20a.h"
31#include "gk20a/platform_gk20a.h" 32#include "gk20a/platform_gk20a.h"
@@ -305,7 +306,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
305 return err; 306 return err;
306 } 307 }
307 308
308 err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); 309 err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
309 if (err) 310 if (err)
310 return err; 311 return err;
311 312
@@ -322,8 +323,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
322 gk20a_writel(g, perf_pmasys_outsize_r(), size); 323 gk20a_writel(g, perf_pmasys_outsize_r(), size);
323 324
324 /* this field is aligned to 4K */ 325 /* this field is aligned to 4K */
325 inst_pa_page = gk20a_mm_inst_block_addr(g, 326 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
326 &mm->perfbuf.inst_block) >> 12;
327 327
328 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK 328 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
329 * should be written last */ 329 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 7fd1793c..12d7dcb9 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -32,6 +32,7 @@
32#include <nvgpu/circ_buf.h> 32#include <nvgpu/circ_buf.h>
33#include <nvgpu/thread.h> 33#include <nvgpu/thread.h>
34#include <nvgpu/barrier.h> 34#include <nvgpu/barrier.h>
35#include <nvgpu/mm.h>
35 36
36#include "ctxsw_trace_gk20a.h" 37#include "ctxsw_trace_gk20a.h"
37#include "fecs_trace_gk20a.h" 38#include "fecs_trace_gk20a.h"
@@ -93,7 +94,7 @@ static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
93 94
94static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) 95static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
95{ 96{
96 return (u32) (gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 12LL); 97 return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
97} 98}
98 99
99static inline int gk20a_fecs_trace_num_ts(void) 100static inline int gk20a_fecs_trace_num_ts(void)
@@ -633,12 +634,12 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
633 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, 634 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
634 "chid=%d context_ptr=%x inst_block=%llx", 635 "chid=%d context_ptr=%x inst_block=%llx",
635 ch->chid, context_ptr, 636 ch->chid, context_ptr,
636 gk20a_mm_inst_block_addr(g, &ch->inst_block)); 637 nvgpu_inst_block_addr(g, &ch->inst_block));
637 638
638 if (!trace) 639 if (!trace)
639 return -ENOMEM; 640 return -ENOMEM;
640 641
641 pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); 642 pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
642 if (!pa) 643 if (!pa)
643 return -ENOMEM; 644 return -ENOMEM;
644 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 645 aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 03ca6984..fc71c358 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -24,6 +24,7 @@
24 24
25#include <trace/events/gk20a.h> 25#include <trace/events/gk20a.h>
26 26
27#include <nvgpu/mm.h>
27#include <nvgpu/dma.h> 28#include <nvgpu/dma.h>
28#include <nvgpu/timers.h> 29#include <nvgpu/timers.h>
29#include <nvgpu/semaphore.h> 30#include <nvgpu/semaphore.h>
@@ -1058,7 +1059,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
1058 if (!ch) 1059 if (!ch)
1059 continue; 1060 continue;
1060 1061
1061 ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block); 1062 ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
1062 if (inst_ptr == ch_inst_ptr) 1063 if (inst_ptr == ch_inst_ptr)
1063 return ch; 1064 return ch;
1064 1065
@@ -1659,10 +1660,10 @@ static bool gk20a_fifo_handle_mmu_fault(
1659 ch->chid); 1660 ch->chid);
1660 } 1661 }
1661 } else if (mmfault_info.inst_ptr == 1662 } else if (mmfault_info.inst_ptr ==
1662 gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) { 1663 nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
1663 nvgpu_err(g, "mmu fault from bar1"); 1664 nvgpu_err(g, "mmu fault from bar1");
1664 } else if (mmfault_info.inst_ptr == 1665 } else if (mmfault_info.inst_ptr ==
1665 gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) { 1666 nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
1666 nvgpu_err(g, "mmu fault from pmu"); 1667 nvgpu_err(g, "mmu fault from pmu");
1667 } else 1668 } else
1668 nvgpu_err(g, "couldn't locate channel for mmu fault"); 1669 nvgpu_err(g, "couldn't locate channel for mmu fault");
@@ -3973,12 +3974,12 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
3973 3974
3974 gk20a_dbg_fn(""); 3975 gk20a_dbg_fn("");
3975 3976
3976 err = gk20a_alloc_inst_block(g, &ch->inst_block); 3977 err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
3977 if (err) 3978 if (err)
3978 return err; 3979 return err;
3979 3980
3980 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", 3981 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
3981 ch->chid, gk20a_mm_inst_block_addr(g, &ch->inst_block)); 3982 ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
3982 3983
3983 gk20a_dbg_fn("done"); 3984 gk20a_dbg_fn("done");
3984 return 0; 3985 return 0;
@@ -3986,7 +3987,7 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
3986 3987
3987void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch) 3988void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
3988{ 3989{
3989 gk20a_free_inst_block(g, &ch->inst_block); 3990 nvgpu_free_inst_block(g, &ch->inst_block);
3990} 3991}
3991 3992
3992u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) 3993u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 2bc7d9a8..ea5d55a4 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -36,6 +36,7 @@
36#include <nvgpu/kref.h> 36#include <nvgpu/kref.h>
37 37
38struct gk20a_debug_output; 38struct gk20a_debug_output;
39struct mmu_fault_info;
39 40
40#define MAX_RUNLIST_BUFFERS 2 41#define MAX_RUNLIST_BUFFERS 2
41 42
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 2d09c0bb..e3c2397c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -34,6 +34,7 @@
34#include <nvgpu/gmmu.h> 34#include <nvgpu/gmmu.h>
35#include <nvgpu/ltc.h> 35#include <nvgpu/ltc.h>
36#include <nvgpu/vidmem.h> 36#include <nvgpu/vidmem.h>
37#include <nvgpu/mm.h>
37 38
38#include <trace/events/gk20a.h> 39#include <trace/events/gk20a.h>
39 40
@@ -107,7 +108,7 @@ int gk20a_prepare_poweroff(struct gk20a *g)
107 ret |= nvgpu_pmu_destroy(g); 108 ret |= nvgpu_pmu_destroy(g);
108 109
109 ret |= gk20a_gr_suspend(g); 110 ret |= gk20a_gr_suspend(g);
110 ret |= gk20a_mm_suspend(g); 111 ret |= nvgpu_mm_suspend(g);
111 ret |= gk20a_fifo_suspend(g); 112 ret |= gk20a_fifo_suspend(g);
112 113
113 gk20a_ce_suspend(g); 114 gk20a_ce_suspend(g);
@@ -213,7 +214,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
213 goto done; 214 goto done;
214 } 215 }
215 216
216 err = gk20a_init_mm_support(g); 217 err = nvgpu_init_mm_support(g);
217 if (err) { 218 if (err) {
218 nvgpu_err(g, "failed to init gk20a mm"); 219 nvgpu_err(g, "failed to init gk20a mm");
219 goto done; 220 goto done;
@@ -314,7 +315,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
314 315
315 gk20a_init_ce_support(g); 316 gk20a_init_ce_support(g);
316 317
317 gk20a_init_mm_ce_context(g); 318 nvgpu_init_mm_ce_context(g);
318 319
319 if (g->ops.xve.available_speeds) { 320 if (g->ops.xve.available_speeds) {
320 u32 speed; 321 u32 speed;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 92bcb618..9c09e85f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -47,6 +47,7 @@ struct nvgpu_warpstate;
47#include <nvgpu/thread.h> 47#include <nvgpu/thread.h>
48#include <nvgpu/io.h> 48#include <nvgpu/io.h>
49 49
50#include <nvgpu/mm.h>
50#include <nvgpu/as.h> 51#include <nvgpu/as.h>
51#include <nvgpu/log.h> 52#include <nvgpu/log.h>
52#include <nvgpu/pramin.h> 53#include <nvgpu/pramin.h>
@@ -756,6 +757,8 @@ struct gpu_ops {
756 u64 (*gpu_phys_addr)(struct gk20a *g, 757 u64 (*gpu_phys_addr)(struct gk20a *g,
757 struct nvgpu_gmmu_attrs *attrs, u64 phys); 758 struct nvgpu_gmmu_attrs *attrs, u64 phys);
758 size_t (*get_vidmem_size)(struct gk20a *g); 759 size_t (*get_vidmem_size)(struct gk20a *g);
760 int (*alloc_inst_block)(struct gk20a *g,
761 struct nvgpu_mem *inst_block);
759 void (*init_inst_block)(struct nvgpu_mem *inst_block, 762 void (*init_inst_block)(struct nvgpu_mem *inst_block,
760 struct vm_gk20a *vm, u32 big_page_size); 763 struct vm_gk20a *vm, u32 big_page_size);
761 bool (*mmu_fault_pending)(struct gk20a *g); 764 bool (*mmu_fault_pending)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d6732453..6d370250 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -37,6 +37,7 @@
37#include <nvgpu/enabled.h> 37#include <nvgpu/enabled.h>
38#include <nvgpu/debug.h> 38#include <nvgpu/debug.h>
39#include <nvgpu/barrier.h> 39#include <nvgpu/barrier.h>
40#include <nvgpu/mm.h>
40 41
41#include "gk20a.h" 42#include "gk20a.h"
42#include "kind_gk20a.h" 43#include "kind_gk20a.h"
@@ -731,7 +732,7 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
731 732
732static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) 733static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
733{ 734{
734 u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block) 735 u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
735 >> ram_in_base_shift_v()); 736 >> ram_in_base_shift_v());
736 u32 aperture = nvgpu_aperture_mask(g, inst_block, 737 u32 aperture = nvgpu_aperture_mask(g, inst_block,
737 gr_fecs_current_ctx_target_sys_mem_ncoh_f(), 738 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
@@ -744,7 +745,7 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
744static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, 745static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
745 struct channel_gk20a *c) 746 struct channel_gk20a *c)
746{ 747{
747 u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) 748 u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
748 >> ram_in_base_shift_v()); 749 >> ram_in_base_shift_v());
749 u32 data = fecs_current_ctx_data(g, &c->inst_block); 750 u32 data = fecs_current_ctx_data(g, &c->inst_block);
750 u32 ret; 751 u32 ret;
@@ -1980,7 +1981,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1980 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 1981 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
1981 int err; 1982 int err;
1982 1983
1983 err = gk20a_alloc_inst_block(g, &ucode_info->inst_blk_desc); 1984 err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
1984 if (err) 1985 if (err)
1985 return err; 1986 return err;
1986 1987
@@ -2154,7 +2155,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2154 2155
2155 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); 2156 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
2156 2157
2157 inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc); 2158 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2158 gk20a_writel(g, gr_fecs_new_ctx_r(), 2159 gk20a_writel(g, gr_fecs_new_ctx_r(),
2159 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 2160 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2160 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2161 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
@@ -5455,7 +5456,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5455 if (!gk20a_channel_get(ch)) 5456 if (!gk20a_channel_get(ch))
5456 continue; 5457 continue;
5457 5458
5458 if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 5459 if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
5459 ram_in_base_shift_v()) == 5460 ram_in_base_shift_v()) ==
5460 gr_fecs_current_ctx_ptr_v(curr_ctx)) { 5461 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5461 tsgid = ch->tsgid; 5462 tsgid = ch->tsgid;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * GK20A memory management
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 * 3 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
24 22
25#include <trace/events/gk20a.h> 23#include <trace/events/gk20a.h>
26 24
25#include <nvgpu/mm.h>
27#include <nvgpu/vm.h> 26#include <nvgpu/vm.h>
28#include <nvgpu/vm_area.h> 27#include <nvgpu/vm_area.h>
29#include <nvgpu/dma.h> 28#include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
88 * 87 *
89 */ 88 */
90 89
91static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
92static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
93static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
94static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
95static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
96
97static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
98{
99 gk20a_dbg_fn("");
100 if (g->ops.fb.reset)
101 g->ops.fb.reset(g);
102
103 if (g->ops.clock_gating.slcg_fb_load_gating_prod)
104 g->ops.clock_gating.slcg_fb_load_gating_prod(g,
105 g->slcg_enabled);
106 if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
107 g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
108 g->slcg_enabled);
109 if (g->ops.clock_gating.blcg_fb_load_gating_prod)
110 g->ops.clock_gating.blcg_fb_load_gating_prod(g,
111 g->blcg_enabled);
112 if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
113 g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
114 g->blcg_enabled);
115
116 if (g->ops.fb.init_fs_state)
117 g->ops.fb.init_fs_state(g);
118
119 return 0;
120}
121
122static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
123{
124 struct gk20a *g = gk20a_from_mm(mm);
125
126 if (mm->vidmem.ce_ctx_id != (u32)~0)
127 gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
128
129 mm->vidmem.ce_ctx_id = (u32)~0;
130
131 nvgpu_vm_put(mm->ce.vm);
132}
133
134static void gk20a_remove_mm_support(struct mm_gk20a *mm)
135{
136 struct gk20a *g = gk20a_from_mm(mm);
137
138 if (g->ops.mm.fault_info_mem_destroy)
139 g->ops.mm.fault_info_mem_destroy(g);
140
141 if (g->ops.mm.remove_bar2_vm)
142 g->ops.mm.remove_bar2_vm(g);
143
144 if (g->ops.mm.is_bar1_supported(g)) {
145 gk20a_free_inst_block(g, &mm->bar1.inst_block);
146 nvgpu_vm_put(mm->bar1.vm);
147 }
148
149 gk20a_free_inst_block(g, &mm->pmu.inst_block);
150 gk20a_free_inst_block(g, &mm->hwpm.inst_block);
151 nvgpu_vm_put(mm->pmu.vm);
152 nvgpu_vm_put(mm->cde.vm);
153
154 nvgpu_semaphore_sea_destroy(g);
155 nvgpu_vidmem_destroy(g);
156 nvgpu_pd_cache_fini(g);
157}
158
159static int gk20a_alloc_sysmem_flush(struct gk20a *g)
160{
161 return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
162}
163
164int gk20a_init_mm_setup_sw(struct gk20a *g)
165{
166 struct mm_gk20a *mm = &g->mm;
167 int err;
168
169 gk20a_dbg_fn("");
170
171 if (mm->sw_ready) {
172 gk20a_dbg_fn("skip init");
173 return 0;
174 }
175
176 mm->g = g;
177 nvgpu_mutex_init(&mm->l2_op_lock);
178
179 /*TBD: make channel vm size configurable */
180 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
181 NV_MM_DEFAULT_KERNEL_SIZE;
182 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
183
184 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
185 (int)(mm->channel.user_size >> 20),
186 (int)(mm->channel.kernel_size >> 20));
187
188 nvgpu_init_pramin(mm);
189
190 mm->vidmem.ce_ctx_id = (u32)~0;
191
192 err = nvgpu_vidmem_init(mm);
193 if (err)
194 return err;
195
196 /*
197 * this requires fixed allocations in vidmem which must be
198 * allocated before all other buffers
199 */
200 if (g->ops.pmu.alloc_blob_space
201 && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
202 err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
203 if (err)
204 return err;
205 }
206
207 err = gk20a_alloc_sysmem_flush(g);
208 if (err)
209 return err;
210
211 if (g->ops.mm.is_bar1_supported(g)) {
212 err = gk20a_init_bar1_vm(mm);
213 if (err)
214 return err;
215 }
216 if (g->ops.mm.init_bar2_vm) {
217 err = g->ops.mm.init_bar2_vm(g);
218 if (err)
219 return err;
220 }
221 err = gk20a_init_system_vm(mm);
222 if (err)
223 return err;
224
225 err = gk20a_init_hwpm(mm);
226 if (err)
227 return err;
228
229 err = gk20a_init_cde_vm(mm);
230 if (err)
231 return err;
232
233 err = gk20a_init_ce_vm(mm);
234 if (err)
235 return err;
236
237 mm->remove_support = gk20a_remove_mm_support;
238 mm->remove_ce_support = gk20a_remove_mm_ce_support;
239
240 mm->sw_ready = true;
241
242 gk20a_dbg_fn("done");
243 return 0;
244}
245
246/* make sure gk20a_init_mm_support is called before */ 90/* make sure gk20a_init_mm_support is called before */
247int gk20a_init_mm_setup_hw(struct gk20a *g) 91int gk20a_init_mm_setup_hw(struct gk20a *g)
248{ 92{
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
274 return 0; 118 return 0;
275} 119}
276 120
277int gk20a_init_mm_support(struct gk20a *g)
278{
279 u32 err;
280
281 err = gk20a_init_mm_reset_enable_hw(g);
282 if (err)
283 return err;
284
285 err = gk20a_init_mm_setup_sw(g);
286 if (err)
287 return err;
288
289 if (g->ops.mm.init_mm_setup_hw)
290 err = g->ops.mm.init_mm_setup_hw(g);
291
292 return err;
293}
294
295void gk20a_init_mm_ce_context(struct gk20a *g)
296{
297#if defined(CONFIG_GK20A_VIDMEM)
298 if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
299 g->mm.vidmem.ce_ctx_id =
300 gk20a_ce_create_context_with_cb(g,
301 gk20a_fifo_get_fast_ce_runlist_id(g),
302 -1,
303 -1,
304 -1,
305 NULL);
306
307 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
308 nvgpu_err(g,
309 "Failed to allocate CE context for vidmem page clearing support");
310 }
311#endif
312}
313
314int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) 121int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
315{ 122{
316 return vm->mmu_levels[0].lo_bit[0]; 123 return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
505 {.update_entry = NULL} 312 {.update_entry = NULL}
506}; 313};
507 314
508/*
509 * Attempt to find a reserved memory area to determine PTE size for the passed
510 * mapping. If no reserved area can be found use small pages.
511 */
512enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
513 u64 base, u64 size)
514{
515 struct nvgpu_vm_area *vm_area;
516
517 vm_area = nvgpu_vm_area_find(vm, base);
518 if (!vm_area)
519 return gmmu_page_size_small;
520
521 return vm_area->pgsz_idx;
522}
523
524/*
525 * This is for when the address space does not support unified address spaces.
526 */
527static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
528 u64 base, u64 size)
529{
530 if (!base) {
531 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
532 return gmmu_page_size_big;
533 return gmmu_page_size_small;
534 } else {
535 if (base < __nv_gmmu_va_small_page_limit())
536 return gmmu_page_size_small;
537 else
538 return gmmu_page_size_big;
539 }
540}
541
542/*
543 * This determines the PTE size for a given alloc. Used by both the GVA space
544 * allocator and the mm core code so that agreement can be reached on how to
545 * map allocations.
546 *
547 * The page size of a buffer is this:
548 *
549 * o If the VM doesn't support large pages then obviously small pages
550 * must be used.
551 * o If the base address is non-zero (fixed address map):
552 * - Attempt to find a reserved memory area and use the page size
553 * based on that.
554 * - If no reserved page size is available, default to small pages.
555 * o If the base is zero:
556 * - If the size is larger than or equal to the big page size, use big
557 * pages.
558 * - Otherwise use small pages.
559 */
560enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
561{
562 struct gk20a *g = gk20a_from_vm(vm);
563
564 if (!vm->big_pages)
565 return gmmu_page_size_small;
566
567 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
568 return __get_pte_size_split_addr(vm, base, size);
569
570 if (base)
571 return __get_pte_size_fixed_map(vm, base, size);
572
573 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
574 return gmmu_page_size_big;
575 return gmmu_page_size_small;
576}
577
578int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) 315int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
579{ 316{
580 int err = 0; 317 int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
599 return __gk20a_vm_bind_channel(as_share->vm, ch); 336 return __gk20a_vm_bind_channel(as_share->vm, ch);
600} 337}
601 338
602int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
603{
604 int err;
605
606 gk20a_dbg_fn("");
607
608 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
609 if (err) {
610 nvgpu_err(g, "%s: memory allocation failed", __func__);
611 return err;
612 }
613
614 gk20a_dbg_fn("done");
615 return 0;
616}
617
618void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
619{
620 if (inst_block->size)
621 nvgpu_dma_free(g, inst_block);
622}
623
624u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
625{
626 if (g->mm.has_physical_mode)
627 return nvgpu_mem_get_phys_addr(g, inst_block);
628 else
629 return nvgpu_mem_get_addr(g, inst_block);
630}
631
632static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
633{
634 int err;
635 struct gk20a *g = gk20a_from_mm(mm);
636 struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
637 u32 big_page_size = g->ops.mm.get_default_big_page_size();
638
639 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
640 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
641 mm->bar1.vm = nvgpu_vm_init(g,
642 big_page_size,
643 SZ_4K,
644 mm->bar1.aperture_size - SZ_4K,
645 mm->bar1.aperture_size,
646 true, false,
647 "bar1");
648 if (!mm->bar1.vm)
649 return -ENOMEM;
650
651 err = gk20a_alloc_inst_block(g, inst_block);
652 if (err)
653 goto clean_up_vm;
654 g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
655
656 return 0;
657
658clean_up_vm:
659 nvgpu_vm_put(mm->bar1.vm);
660 return err;
661}
662
663/* pmu vm, share channel_vm interfaces */
664static int gk20a_init_system_vm(struct mm_gk20a *mm)
665{
666 int err;
667 struct gk20a *g = gk20a_from_mm(mm);
668 struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
669 u32 big_page_size = g->ops.mm.get_default_big_page_size();
670 u32 low_hole, aperture_size;
671
672 /*
673 * No user region - so we will pass that as zero sized.
674 */
675 low_hole = SZ_4K * 16;
676 aperture_size = GK20A_PMU_VA_SIZE * 2;
677
678 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
679 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
680
681 mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
682 low_hole,
683 aperture_size - low_hole,
684 aperture_size,
685 true,
686 false,
687 "system");
688 if (!mm->pmu.vm)
689 return -ENOMEM;
690
691 err = gk20a_alloc_inst_block(g, inst_block);
692 if (err)
693 goto clean_up_vm;
694 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
695
696 return 0;
697
698clean_up_vm:
699 nvgpu_vm_put(mm->pmu.vm);
700 return err;
701}
702
703static int gk20a_init_hwpm(struct mm_gk20a *mm)
704{
705 int err;
706 struct gk20a *g = gk20a_from_mm(mm);
707 struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
708
709 err = gk20a_alloc_inst_block(g, inst_block);
710 if (err)
711 return err;
712 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
713
714 return 0;
715}
716
717static int gk20a_init_cde_vm(struct mm_gk20a *mm)
718{
719 struct gk20a *g = gk20a_from_mm(mm);
720 u32 big_page_size = g->ops.mm.get_default_big_page_size();
721
722 mm->cde.vm = nvgpu_vm_init(g, big_page_size,
723 big_page_size << 10,
724 NV_MM_DEFAULT_KERNEL_SIZE,
725 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
726 false, false, "cde");
727 if (!mm->cde.vm)
728 return -ENOMEM;
729 return 0;
730}
731
732static int gk20a_init_ce_vm(struct mm_gk20a *mm)
733{
734 struct gk20a *g = gk20a_from_mm(mm);
735 u32 big_page_size = g->ops.mm.get_default_big_page_size();
736
737 mm->ce.vm = nvgpu_vm_init(g, big_page_size,
738 big_page_size << 10,
739 NV_MM_DEFAULT_KERNEL_SIZE,
740 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
741 false, false, "ce");
742 if (!mm->ce.vm)
743 return -ENOMEM;
744 return 0;
745}
746
747void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, 339void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
748 struct vm_gk20a *vm) 340 struct vm_gk20a *vm)
749{ 341{
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
770 struct gk20a *g = gk20a_from_vm(vm); 362 struct gk20a *g = gk20a_from_vm(vm);
771 363
772 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 364 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
773 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); 365 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
774 366
775 g->ops.mm.init_pdb(g, inst_block, vm); 367 g->ops.mm.init_pdb(g, inst_block, vm);
776 368
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
784 g->ops.mm.set_big_page_size(g, inst_block, big_page_size); 376 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
785} 377}
786 378
379int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
380{
381 int err;
382
383 gk20a_dbg_fn("");
384
385 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
386 if (err) {
387 nvgpu_err(g, "%s: memory allocation failed", __func__);
388 return err;
389 }
390
391 gk20a_dbg_fn("done");
392 return 0;
393}
394
787int gk20a_mm_fb_flush(struct gk20a *g) 395int gk20a_mm_fb_flush(struct gk20a *g)
788{ 396{
789 struct mm_gk20a *mm = &g->mm; 397 struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
992 gk20a_idle_nosuspend(g); 600 gk20a_idle_nosuspend(g);
993} 601}
994 602
995int gk20a_mm_suspend(struct gk20a *g)
996{
997 gk20a_dbg_fn("");
998
999 nvgpu_vidmem_thread_pause_sync(&g->mm);
1000
1001 g->ops.mm.cbc_clean(g);
1002 g->ops.mm.l2_flush(g, false);
1003
1004 gk20a_dbg_fn("done");
1005 return 0;
1006}
1007
1008u32 gk20a_mm_get_iommu_bit(struct gk20a *g) 603u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
1009{ 604{
1010 return 34; 605 return 34;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 15876b10..434fc422 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -35,11 +35,6 @@
35#include <nvgpu/list.h> 35#include <nvgpu/list.h>
36#include <nvgpu/rbtree.h> 36#include <nvgpu/rbtree.h>
37#include <nvgpu/kref.h> 37#include <nvgpu/kref.h>
38#include <nvgpu/atomic.h>
39#include <nvgpu/cond.h>
40#include <nvgpu/thread.h>
41
42struct nvgpu_pd_cache;
43 38
44#ifdef CONFIG_ARM64 39#ifdef CONFIG_ARM64
45#define outer_flush_range(a, b) 40#define outer_flush_range(a, b)
@@ -138,218 +133,23 @@ struct priv_cmd_entry {
138struct gk20a; 133struct gk20a;
139struct channel_gk20a; 134struct channel_gk20a;
140 135
141int gk20a_init_mm_support(struct gk20a *g);
142int gk20a_init_mm_setup_sw(struct gk20a *g);
143int gk20a_init_mm_setup_hw(struct gk20a *g);
144void gk20a_init_mm_ce_context(struct gk20a *g);
145
146int gk20a_mm_fb_flush(struct gk20a *g); 136int gk20a_mm_fb_flush(struct gk20a *g);
147void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); 137void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
148void gk20a_mm_cbc_clean(struct gk20a *g); 138void gk20a_mm_cbc_clean(struct gk20a *g);
149void gk20a_mm_l2_invalidate(struct gk20a *g); 139void gk20a_mm_l2_invalidate(struct gk20a *g);
150 140
151#define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */
152
153struct mmu_fault_info {
154 u64 inst_ptr;
155 u32 inst_aperture;
156 u64 fault_addr;
157 u32 fault_addr_aperture;
158 u32 timestamp_lo;
159 u32 timestamp_hi;
160 u32 mmu_engine_id;
161 u32 gpc_id;
162 u32 client_type;
163 u32 client_id;
164 u32 fault_type;
165 u32 access_type;
166 u32 protected_mode;
167 u32 replayable_fault;
168 u32 replay_fault_en;
169 u32 valid;
170 u32 faulted_pbdma;
171 u32 faulted_engine;
172 u32 faulted_subid;
173 u32 chid;
174 struct channel_gk20a *refch;
175 const char *client_type_desc;
176 const char *fault_type_desc;
177 const char *client_id_desc;
178};
179
180struct mm_gk20a {
181 struct gk20a *g;
182
183 /* GPU VA default sizes address spaces for channels */
184 struct {
185 u64 user_size; /* userspace-visible GPU VA region */
186 u64 kernel_size; /* kernel-only GPU VA region */
187 } channel;
188
189 struct {
190 u32 aperture_size;
191 struct vm_gk20a *vm;
192 struct nvgpu_mem inst_block;
193 } bar1;
194
195 struct {
196 u32 aperture_size;
197 struct vm_gk20a *vm;
198 struct nvgpu_mem inst_block;
199 } bar2;
200
201 struct {
202 u32 aperture_size;
203 struct vm_gk20a *vm;
204 struct nvgpu_mem inst_block;
205 } pmu;
206
207 struct {
208 /* using pmu vm currently */
209 struct nvgpu_mem inst_block;
210 } hwpm;
211
212 struct {
213 struct vm_gk20a *vm;
214 struct nvgpu_mem inst_block;
215 } perfbuf;
216
217 struct {
218 struct vm_gk20a *vm;
219 } cde;
220
221 struct {
222 struct vm_gk20a *vm;
223 } ce;
224
225 struct nvgpu_pd_cache *pd_cache;
226
227 struct nvgpu_mutex l2_op_lock;
228 struct nvgpu_mutex tlb_lock;
229 struct nvgpu_mutex priv_lock;
230
231 struct nvgpu_mem bar2_desc;
232
233#ifdef CONFIG_TEGRA_19x_GPU
234 struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
235 unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
236 struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
237 struct nvgpu_mutex hub_isr_mutex;
238 u32 hub_intr_types;
239#endif
240 /*
241 * Separate function to cleanup the CE since it requires a channel to
242 * be closed which must happen before fifo cleanup.
243 */
244 void (*remove_ce_support)(struct mm_gk20a *mm);
245 void (*remove_support)(struct mm_gk20a *mm);
246 bool sw_ready;
247 int physical_bits;
248 bool use_full_comp_tag_line;
249 bool ltc_enabled_current;
250 bool ltc_enabled_target;
251 bool bypass_smmu;
252 bool disable_bigpage;
253 bool has_physical_mode;
254
255 struct nvgpu_mem sysmem_flush;
256
257 u32 pramin_window;
258 struct nvgpu_spinlock pramin_window_lock;
259 bool force_pramin; /* via debugfs */
260
261 struct {
262 size_t size;
263 u64 base;
264 size_t bootstrap_size;
265 u64 bootstrap_base;
266
267 struct nvgpu_allocator allocator;
268 struct nvgpu_allocator bootstrap_allocator;
269
270 u32 ce_ctx_id;
271 volatile bool cleared;
272 struct nvgpu_mutex first_clear_mutex;
273
274 struct nvgpu_list_node clear_list_head;
275 struct nvgpu_mutex clear_list_mutex;
276
277 struct nvgpu_cond clearing_thread_cond;
278 struct nvgpu_thread clearing_thread;
279 struct nvgpu_mutex clearing_thread_lock;
280 nvgpu_atomic_t pause_count;
281
282 nvgpu_atomic64_t bytes_pending;
283 } vidmem;
284};
285
286int gk20a_mm_init(struct mm_gk20a *mm);
287
288#define gk20a_from_mm(mm) ((mm)->g)
289#define gk20a_from_vm(vm) ((vm)->mm->g)
290
291#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) 141#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
292 142
293#define DEFAULT_ALLOC_ALIGNMENT (4*1024) 143void gk20a_mm_ltc_isr(struct gk20a *g);
294
295static inline int bar1_aperture_size_mb_gk20a(void)
296{
297 return 16; /* 16MB is more than enough atm. */
298}
299
300/* The maximum GPU VA range supported */
301#define NV_GMMU_VA_RANGE 38
302
303/* The default userspace-visible GPU VA size */
304#define NV_MM_DEFAULT_USER_SIZE (1ULL << 37)
305
306/* The default kernel-reserved GPU VA size */
307#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
308
309/*
310 * When not using unified address spaces, the bottom 56GB of the space are used
311 * for small pages, and the remaining high memory is used for large pages.
312 */
313static inline u64 __nv_gmmu_va_small_page_limit(void)
314{
315 return ((u64)SZ_1G * 56);
316}
317
318enum nvgpu_flush_op {
319 NVGPU_FLUSH_DEFAULT,
320 NVGPU_FLUSH_FB,
321 NVGPU_FLUSH_L2_INV,
322 NVGPU_FLUSH_L2_FLUSH,
323 NVGPU_FLUSH_CBC_CLEAN,
324};
325 144
326enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, 145bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
327 u64 base, u64 size);
328enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
329 146
330#if 0 /*related to addr bits above, concern below TBD on which is accurate */ 147int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
331#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
332 bus_bar1_block_ptr_s())
333#else
334#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
335#endif
336 148
337int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); 149int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
338void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
339void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, 150void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
340 u32 big_page_size); 151 u32 big_page_size);
341u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem); 152int gk20a_init_mm_setup_hw(struct gk20a *g);
342
343void gk20a_mm_dump_vm(struct vm_gk20a *vm,
344 u64 va_begin, u64 va_end, char *label);
345
346int gk20a_mm_suspend(struct gk20a *g);
347
348void gk20a_mm_ltc_isr(struct gk20a *g);
349
350bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
351
352int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
353 153
354u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 154u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
355 u64 map_offset, 155 u64 map_offset,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index e4dd6a59..2b954e1a 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -30,6 +30,7 @@
30#include <nvgpu/bug.h> 30#include <nvgpu/bug.h>
31#include <nvgpu/firmware.h> 31#include <nvgpu/firmware.h>
32#include <nvgpu/falcon.h> 32#include <nvgpu/falcon.h>
33#include <nvgpu/mm.h>
33 34
34#include "gk20a.h" 35#include "gk20a.h"
35#include "gr_gk20a.h" 36#include "gr_gk20a.h"
@@ -181,7 +182,7 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu)
181 pwr_falcon_itfen_ctxen_enable_f()); 182 pwr_falcon_itfen_ctxen_enable_f());
182 gk20a_writel(g, pwr_pmu_new_instblk_r(), 183 gk20a_writel(g, pwr_pmu_new_instblk_r(),
183 pwr_pmu_new_instblk_ptr_f( 184 pwr_pmu_new_instblk_ptr_f(
184 gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 185 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
185 pwr_pmu_new_instblk_valid_f(1) | 186 pwr_pmu_new_instblk_valid_f(1) |
186 pwr_pmu_new_instblk_target_sys_coh_f()); 187 pwr_pmu_new_instblk_target_sys_coh_f());
187 188
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 7029b477..557948e1 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -21,7 +21,6 @@
21 */ 21 */
22 22
23#include <nvgpu/types.h> 23#include <nvgpu/types.h>
24
25#include <nvgpu/dma.h> 24#include <nvgpu/dma.h>
26#include <nvgpu/gmmu.h> 25#include <nvgpu/gmmu.h>
27#include <nvgpu/timers.h> 26#include <nvgpu/timers.h>
@@ -33,6 +32,7 @@
33#include <nvgpu/pmu.h> 32#include <nvgpu/pmu.h>
34#include <nvgpu/falcon.h> 33#include <nvgpu/falcon.h>
35#include <nvgpu/enabled.h> 34#include <nvgpu/enabled.h>
35#include <nvgpu/mm.h>
36 36
37#include "gk20a/gk20a.h" 37#include "gk20a/gk20a.h"
38#include "gk20a/pmu_gk20a.h" 38#include "gk20a/pmu_gk20a.h"
@@ -1170,7 +1170,7 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
1170 pwr_falcon_itfen_ctxen_enable_f()); 1170 pwr_falcon_itfen_ctxen_enable_f());
1171 gk20a_writel(g, pwr_pmu_new_instblk_r(), 1171 gk20a_writel(g, pwr_pmu_new_instblk_r(),
1172 pwr_pmu_new_instblk_ptr_f( 1172 pwr_pmu_new_instblk_ptr_f(
1173 gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 1173 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
1174 pwr_pmu_new_instblk_valid_f(1) | 1174 pwr_pmu_new_instblk_valid_f(1) |
1175 pwr_pmu_new_instblk_target_sys_coh_f()); 1175 pwr_pmu_new_instblk_target_sys_coh_f());
1176 1176
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index b8d42f7a..34c8d4b7 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -24,6 +24,7 @@
24 24
25#include <nvgpu/timers.h> 25#include <nvgpu/timers.h>
26#include <nvgpu/bus.h> 26#include <nvgpu/bus.h>
27#include <nvgpu/mm.h>
27 28
28#include "bus_gm20b.h" 29#include "bus_gm20b.h"
29#include "gk20a/gk20a.h" 30#include "gk20a/gk20a.h"
@@ -35,8 +36,8 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
35{ 36{
36 struct nvgpu_timeout timeout; 37 struct nvgpu_timeout timeout;
37 int err = 0; 38 int err = 0;
38 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); 39 u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
39 u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); 40 u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
40 41
41 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); 42 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
42 43
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index f4ddd92f..0762e8bd 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -31,6 +31,7 @@
31#include <nvgpu/log.h> 31#include <nvgpu/log.h>
32#include <nvgpu/atomic.h> 32#include <nvgpu/atomic.h>
33#include <nvgpu/barrier.h> 33#include <nvgpu/barrier.h>
34#include <nvgpu/mm.h>
34 35
35#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> 36#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
36#include <nvgpu/hw/gm20b/hw_ram_gm20b.h> 37#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -42,7 +43,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
42{ 43{
43 struct gk20a *g = c->g; 44 struct gk20a *g = c->g;
44 45
45 u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) 46 u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block)
46 >> ram_in_base_shift_v(); 47 >> ram_in_base_shift_v();
47 48
48 gk20a_dbg_info("bind channel %d inst ptr 0x%08x", 49 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 269fd7f1..d081fb24 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -442,6 +442,7 @@ static const struct gpu_ops gm20b_ops = {
442 .init_pdb = gk20a_mm_init_pdb, 442 .init_pdb = gk20a_mm_init_pdb,
443 .init_mm_setup_hw = gk20a_init_mm_setup_hw, 443 .init_mm_setup_hw = gk20a_init_mm_setup_hw,
444 .is_bar1_supported = gm20b_mm_is_bar1_supported, 444 .is_bar1_supported = gm20b_mm_is_bar1_supported,
445 .alloc_inst_block = gk20a_alloc_inst_block,
445 .init_inst_block = gk20a_init_inst_block, 446 .init_inst_block = gk20a_init_inst_block,
446 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, 447 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
447 .get_kind_invalid = gm20b_get_kind_invalid, 448 .get_kind_invalid = gm20b_get_kind_invalid,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 1246ee7f..59f72e13 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -524,6 +524,7 @@ static const struct gpu_ops gp106_ops = {
524 .init_pdb = gp10b_mm_init_pdb, 524 .init_pdb = gp10b_mm_init_pdb,
525 .init_mm_setup_hw = gp10b_init_mm_setup_hw, 525 .init_mm_setup_hw = gp10b_init_mm_setup_hw,
526 .is_bar1_supported = gm20b_mm_is_bar1_supported, 526 .is_bar1_supported = gm20b_mm_is_bar1_supported,
527 .alloc_inst_block = gk20a_alloc_inst_block,
527 .init_inst_block = gk20a_init_inst_block, 528 .init_inst_block = gk20a_init_inst_block,
528 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, 529 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
529 .init_bar2_vm = gb10b_init_bar2_vm, 530 .init_bar2_vm = gb10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 9f0fe375..26ded39e 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -22,6 +22,7 @@
22 22
23#include <nvgpu/pmu.h> 23#include <nvgpu/pmu.h>
24#include <nvgpu/falcon.h> 24#include <nvgpu/falcon.h>
25#include <nvgpu/mm.h>
25 26
26#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
27#include "sec2_gp106.h" 28#include "sec2_gp106.h"
@@ -88,7 +89,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
88 89
89 gk20a_writel(g, psec_falcon_nxtctx_r(), 90 gk20a_writel(g, psec_falcon_nxtctx_r(),
90 pwr_pmu_new_instblk_ptr_f( 91 pwr_pmu_new_instblk_ptr_f(
91 gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 92 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
92 pwr_pmu_new_instblk_valid_f(1) | 93 pwr_pmu_new_instblk_valid_f(1) |
93 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 94 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
94 pwr_pmu_new_instblk_target_sys_coh_f(), 95 pwr_pmu_new_instblk_target_sys_coh_f(),
@@ -154,7 +155,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
154 pwr_falcon_itfen_ctxen_enable_f()); 155 pwr_falcon_itfen_ctxen_enable_f());
155 gk20a_writel(g, pwr_pmu_new_instblk_r(), 156 gk20a_writel(g, pwr_pmu_new_instblk_r(),
156 pwr_pmu_new_instblk_ptr_f( 157 pwr_pmu_new_instblk_ptr_f(
157 gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 158 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
158 pwr_pmu_new_instblk_valid_f(1) | 159 pwr_pmu_new_instblk_valid_f(1) |
159 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 160 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
160 pwr_pmu_new_instblk_target_sys_coh_f(), 161 pwr_pmu_new_instblk_target_sys_coh_f(),
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index b80722b8..a10df740 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -484,6 +484,7 @@ static const struct gpu_ops gp10b_ops = {
484 .init_pdb = gp10b_mm_init_pdb, 484 .init_pdb = gp10b_mm_init_pdb,
485 .init_mm_setup_hw = gp10b_init_mm_setup_hw, 485 .init_mm_setup_hw = gp10b_init_mm_setup_hw,
486 .is_bar1_supported = gm20b_mm_is_bar1_supported, 486 .is_bar1_supported = gm20b_mm_is_bar1_supported,
487 .alloc_inst_block = gk20a_alloc_inst_block,
487 .init_inst_block = gk20a_init_inst_block, 488 .init_inst_block = gk20a_init_inst_block,
488 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, 489 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
489 .init_bar2_vm = gb10b_init_bar2_vm, 490 .init_bar2_vm = gb10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 06a9b929..dc746153 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -22,6 +22,7 @@
22 * DEALINGS IN THE SOFTWARE. 22 * DEALINGS IN THE SOFTWARE.
23 */ 23 */
24 24
25#include <nvgpu/mm.h>
25#include <nvgpu/dma.h> 26#include <nvgpu/dma.h>
26#include <nvgpu/gmmu.h> 27#include <nvgpu/gmmu.h>
27 28
@@ -95,7 +96,7 @@ int gb10b_init_bar2_vm(struct gk20a *g)
95 return -ENOMEM; 96 return -ENOMEM;
96 97
97 /* allocate instance mem for bar2 */ 98 /* allocate instance mem for bar2 */
98 err = gk20a_alloc_inst_block(g, inst_block); 99 err = g->ops.mm.alloc_inst_block(g, inst_block);
99 if (err) 100 if (err)
100 goto clean_up_va; 101 goto clean_up_va;
101 102
@@ -112,7 +113,7 @@ int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
112{ 113{
113 struct mm_gk20a *mm = &g->mm; 114 struct mm_gk20a *mm = &g->mm;
114 struct nvgpu_mem *inst_block = &mm->bar2.inst_block; 115 struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
115 u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block); 116 u64 inst_pa = nvgpu_inst_block_addr(g, inst_block);
116 117
117 gk20a_dbg_fn(""); 118 gk20a_dbg_fn("");
118 119
@@ -374,6 +375,6 @@ void gp10b_remove_bar2_vm(struct gk20a *g)
374 struct mm_gk20a *mm = &g->mm; 375 struct mm_gk20a *mm = &g->mm;
375 376
376 gp10b_replayable_pagefault_buffer_deinit(g); 377 gp10b_replayable_pagefault_buffer_deinit(g);
377 gk20a_free_inst_block(g, &mm->bar2.inst_block); 378 nvgpu_free_inst_block(g, &mm->bar2.inst_block);
378 nvgpu_vm_put(mm->bar2.vm); 379 nvgpu_vm_put(mm->bar2.vm);
379} 380}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h
new file mode 100644
index 00000000..13b33d9f
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -0,0 +1,220 @@
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
6 * and/or sell copies of the Software, and to permit persons to whom the
7 * Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
18 * DEALINGS IN THE SOFTWARE.
19 */
20
21#ifndef __NVGPU_MM_H__
22#define __NVGPU_MM_H__
23
24#include <nvgpu/types.h>
25#include <nvgpu/cond.h>
26#include <nvgpu/thread.h>
27#include <nvgpu/lock.h>
28#include <nvgpu/atomic.h>
29#include <nvgpu/nvgpu_mem.h>
30#include <nvgpu/allocator.h>
31#include <nvgpu/list.h>
32
33struct gk20a;
34struct vm_gk20a;
35struct nvgpu_mem;
36struct nvgpu_pd_cache;
37
38#define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */
39
40struct mmu_fault_info {
41 u64 inst_ptr;
42 u32 inst_aperture;
43 u64 fault_addr;
44 u32 fault_addr_aperture;
45 u32 timestamp_lo;
46 u32 timestamp_hi;
47 u32 mmu_engine_id;
48 u32 gpc_id;
49 u32 client_type;
50 u32 client_id;
51 u32 fault_type;
52 u32 access_type;
53 u32 protected_mode;
54 u32 replayable_fault;
55 u32 replay_fault_en;
56 u32 valid;
57 u32 faulted_pbdma;
58 u32 faulted_engine;
59 u32 faulted_subid;
60 u32 chid;
61 struct channel_gk20a *refch;
62 const char *client_type_desc;
63 const char *fault_type_desc;
64 const char *client_id_desc;
65};
66
67enum nvgpu_flush_op {
68 NVGPU_FLUSH_DEFAULT,
69 NVGPU_FLUSH_FB,
70 NVGPU_FLUSH_L2_INV,
71 NVGPU_FLUSH_L2_FLUSH,
72 NVGPU_FLUSH_CBC_CLEAN,
73};
74
75struct mm_gk20a {
76 struct gk20a *g;
77
78 /* GPU VA default sizes address spaces for channels */
79 struct {
80 u64 user_size; /* userspace-visible GPU VA region */
81 u64 kernel_size; /* kernel-only GPU VA region */
82 } channel;
83
84 struct {
85 u32 aperture_size;
86 struct vm_gk20a *vm;
87 struct nvgpu_mem inst_block;
88 } bar1;
89
90 struct {
91 u32 aperture_size;
92 struct vm_gk20a *vm;
93 struct nvgpu_mem inst_block;
94 } bar2;
95
96 struct {
97 u32 aperture_size;
98 struct vm_gk20a *vm;
99 struct nvgpu_mem inst_block;
100 } pmu;
101
102 struct {
103 /* using pmu vm currently */
104 struct nvgpu_mem inst_block;
105 } hwpm;
106
107 struct {
108 struct vm_gk20a *vm;
109 struct nvgpu_mem inst_block;
110 } perfbuf;
111
112 struct {
113 struct vm_gk20a *vm;
114 } cde;
115
116 struct {
117 struct vm_gk20a *vm;
118 } ce;
119
120 struct nvgpu_pd_cache *pd_cache;
121
122 struct nvgpu_mutex l2_op_lock;
123 struct nvgpu_mutex tlb_lock;
124 struct nvgpu_mutex priv_lock;
125
126 struct nvgpu_mem bar2_desc;
127
128#ifdef CONFIG_TEGRA_19x_GPU
129 struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
130 unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
131 struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
132 struct nvgpu_mutex hub_isr_mutex;
133 u32 hub_intr_types;
134#endif
135 /*
136 * Separate function to cleanup the CE since it requires a channel to
137 * be closed which must happen before fifo cleanup.
138 */
139 void (*remove_ce_support)(struct mm_gk20a *mm);
140 void (*remove_support)(struct mm_gk20a *mm);
141 bool sw_ready;
142 int physical_bits;
143 bool use_full_comp_tag_line;
144 bool ltc_enabled_current;
145 bool ltc_enabled_target;
146 bool bypass_smmu;
147 bool disable_bigpage;
148 bool has_physical_mode;
149
150 struct nvgpu_mem sysmem_flush;
151
152 u32 pramin_window;
153 struct nvgpu_spinlock pramin_window_lock;
154 bool force_pramin; /* via debugfs */
155
156 struct {
157 size_t size;
158 u64 base;
159 size_t bootstrap_size;
160 u64 bootstrap_base;
161
162 struct nvgpu_allocator allocator;
163 struct nvgpu_allocator bootstrap_allocator;
164
165 u32 ce_ctx_id;
166 volatile bool cleared;
167 struct nvgpu_mutex first_clear_mutex;
168
169 struct nvgpu_list_node clear_list_head;
170 struct nvgpu_mutex clear_list_mutex;
171
172 struct nvgpu_cond clearing_thread_cond;
173 struct nvgpu_thread clearing_thread;
174 struct nvgpu_mutex clearing_thread_lock;
175 nvgpu_atomic_t pause_count;
176
177 nvgpu_atomic64_t bytes_pending;
178 } vidmem;
179};
180
181#define gk20a_from_mm(mm) ((mm)->g)
182#define gk20a_from_vm(vm) ((vm)->mm->g)
183
184static inline int bar1_aperture_size_mb_gk20a(void)
185{
186 return 16; /* 16MB is more than enough atm. */
187}
188
189/* The maximum GPU VA range supported */
190#define NV_GMMU_VA_RANGE 38
191
192/* The default userspace-visible GPU VA size */
193#define NV_MM_DEFAULT_USER_SIZE (1ULL << 37)
194
195/* The default kernel-reserved GPU VA size */
196#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
197
198/*
199 * When not using unified address spaces, the bottom 56GB of the space are used
200 * for small pages, and the remaining high memory is used for large pages.
201 */
202static inline u64 __nv_gmmu_va_small_page_limit(void)
203{
204 return ((u64)SZ_1G * 56);
205}
206
207enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
208 u64 base, u64 size);
209enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
210
211void nvgpu_init_mm_ce_context(struct gk20a *g);
212int nvgpu_init_mm_support(struct gk20a *g);
213int nvgpu_init_mm_setup_hw(struct gk20a *g);
214
215u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
216void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
217
218int nvgpu_mm_suspend(struct gk20a *g);
219
220#endif