aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/gk20a/mm_gk20a.c')
-rw-r--r--include/gk20a/mm_gk20a.c654
1 files changed, 654 insertions, 0 deletions
diff --git a/include/gk20a/mm_gk20a.c b/include/gk20a/mm_gk20a.c
new file mode 100644
index 0000000..10ca84d
--- /dev/null
+++ b/include/gk20a/mm_gk20a.c
@@ -0,0 +1,654 @@
1/*
2 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/mm.h>
26#include <nvgpu/vm.h>
27#include <nvgpu/vm_area.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/timers.h>
31#include <nvgpu/pramin.h>
32#include <nvgpu/list.h>
33#include <nvgpu/nvgpu_mem.h>
34#include <nvgpu/allocator.h>
35#include <nvgpu/semaphore.h>
36#include <nvgpu/page_allocator.h>
37#include <nvgpu/log.h>
38#include <nvgpu/bug.h>
39#include <nvgpu/log2.h>
40#include <nvgpu/enabled.h>
41#include <nvgpu/vidmem.h>
42#include <nvgpu/sizes.h>
43#include <nvgpu/io.h>
44#include <nvgpu/utils.h>
45#include <nvgpu/channel.h>
46
47#include "gk20a.h"
48#include "mm_gk20a.h"
49#include "fence_gk20a.h"
50
51#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
52#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
53#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
54#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
55
56/*
57 * GPU mapping life cycle
58 * ======================
59 *
60 * Kernel mappings
61 * ---------------
62 *
63 * Kernel mappings are created through vm.map(..., false):
64 *
65 * - Mappings to the same allocations are reused and refcounted.
66 * - This path does not support deferred unmapping (i.e. kernel must wait for
67 * all hw operations on the buffer to complete before unmapping).
68 * - References to dmabuf are owned and managed by the (kernel) clients of
69 * the gk20a_vm layer.
70 *
71 *
72 * User space mappings
73 * -------------------
74 *
75 * User space mappings are created through as.map_buffer -> vm.map(..., true):
76 *
77 * - Mappings to the same allocations are reused and refcounted.
78 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
79 * until all hw operations have completed).
80 * - References to dmabuf are owned and managed by the vm_gk20a
81 * layer itself. vm.map acquires these refs, and sets
82 * mapped_buffer->own_mem_ref to record that we must release the refs when we
83 * actually unmap.
84 *
85 */
86
87/* make sure gk20a_init_mm_support is called before */
88int gk20a_init_mm_setup_hw(struct gk20a *g)
89{
90 struct mm_gk20a *mm = &g->mm;
91 int err;
92
93 nvgpu_log_fn(g, " ");
94
95 if (g->ops.fb.set_mmu_page_size) {
96 g->ops.fb.set_mmu_page_size(g);
97 }
98
99 if (g->ops.fb.set_use_full_comp_tag_line) {
100 mm->use_full_comp_tag_line =
101 g->ops.fb.set_use_full_comp_tag_line(g);
102 }
103
104 g->ops.fb.init_hw(g);
105
106 if (g->ops.bus.bar1_bind) {
107 g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
108 }
109
110 if (g->ops.bus.bar2_bind) {
111 err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
112 if (err) {
113 return err;
114 }
115 }
116
117 if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g)) {
118 return -EBUSY;
119 }
120
121 nvgpu_log_fn(g, "done");
122 return 0;
123}
124
125/* for gk20a the "video memory" apertures here are misnomers. */
126static inline u32 big_valid_pde0_bits(struct gk20a *g,
127 struct nvgpu_gmmu_pd *pd, u64 addr)
128{
129 u32 pde0_bits =
130 nvgpu_aperture_mask(g, pd->mem,
131 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
132 gmmu_pde_aperture_big_sys_mem_coh_f(),
133 gmmu_pde_aperture_big_video_memory_f()) |
134 gmmu_pde_address_big_sys_f(
135 (u32)(addr >> gmmu_pde_address_shift_v()));
136
137 return pde0_bits;
138}
139
140static inline u32 small_valid_pde1_bits(struct gk20a *g,
141 struct nvgpu_gmmu_pd *pd, u64 addr)
142{
143 u32 pde1_bits =
144 nvgpu_aperture_mask(g, pd->mem,
145 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
146 gmmu_pde_aperture_small_sys_mem_coh_f(),
147 gmmu_pde_aperture_small_video_memory_f()) |
148 gmmu_pde_vol_small_true_f() | /* tbd: why? */
149 gmmu_pde_address_small_sys_f(
150 (u32)(addr >> gmmu_pde_address_shift_v()));
151
152 return pde1_bits;
153}
154
155static void update_gmmu_pde_locked(struct vm_gk20a *vm,
156 const struct gk20a_mmu_level *l,
157 struct nvgpu_gmmu_pd *pd,
158 u32 pd_idx,
159 u64 virt_addr,
160 u64 phys_addr,
161 struct nvgpu_gmmu_attrs *attrs)
162{
163 struct gk20a *g = gk20a_from_vm(vm);
164 bool small_valid, big_valid;
165 u32 pd_offset = pd_offset_from_index(l, pd_idx);
166 u32 pde_v[2] = {0, 0};
167
168 small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
169 big_valid = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
170
171 pde_v[0] = gmmu_pde_size_full_f();
172 pde_v[0] |= big_valid ?
173 big_valid_pde0_bits(g, pd, phys_addr) :
174 gmmu_pde_aperture_big_invalid_f();
175
176 pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
177 (gmmu_pde_aperture_small_invalid_f() |
178 gmmu_pde_vol_small_false_f()))
179 |
180 (big_valid ? (gmmu_pde_vol_big_true_f()) :
181 gmmu_pde_vol_big_false_f());
182
183 pte_dbg(g, attrs,
184 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
185 "GPU %#-12llx phys %#-12llx "
186 "[0x%08x, 0x%08x]",
187 pd_idx, l->entry_size, pd_offset,
188 small_valid ? 'S' : '-',
189 big_valid ? 'B' : '-',
190 virt_addr, phys_addr,
191 pde_v[1], pde_v[0]);
192
193 pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
194 pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
195}
196
197static void __update_pte_sparse(u32 *pte_w)
198{
199 pte_w[0] = gmmu_pte_valid_false_f();
200 pte_w[1] |= gmmu_pte_vol_true_f();
201}
202
203static void __update_pte(struct vm_gk20a *vm,
204 u32 *pte_w,
205 u64 phys_addr,
206 struct nvgpu_gmmu_attrs *attrs)
207{
208 struct gk20a *g = gk20a_from_vm(vm);
209 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
210 u32 pte_valid = attrs->valid ?
211 gmmu_pte_valid_true_f() :
212 gmmu_pte_valid_false_f();
213 u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
214 u32 addr = attrs->aperture == APERTURE_SYSMEM ?
215 gmmu_pte_address_sys_f(phys_shifted) :
216 gmmu_pte_address_vid_f(phys_shifted);
217 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
218
219 pte_w[0] = pte_valid | addr;
220
221 if (attrs->priv) {
222 pte_w[0] |= gmmu_pte_privilege_true_f();
223 }
224
225 pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
226 gmmu_pte_aperture_sys_mem_ncoh_f(),
227 gmmu_pte_aperture_sys_mem_coh_f(),
228 gmmu_pte_aperture_video_memory_f()) |
229 gmmu_pte_kind_f(attrs->kind_v) |
230 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
231
232 if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
233 phys_addr & 0x10000) {
234 pte_w[1] |= gmmu_pte_comptagline_f(
235 1 << (gmmu_pte_comptagline_s() - 1));
236 }
237
238 if (attrs->rw_flag == gk20a_mem_flag_read_only) {
239 pte_w[0] |= gmmu_pte_read_only_true_f();
240 pte_w[1] |= gmmu_pte_write_disable_true_f();
241 } else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
242 pte_w[1] |= gmmu_pte_read_disable_true_f();
243 }
244
245 if (!attrs->cacheable) {
246 pte_w[1] |= gmmu_pte_vol_true_f();
247 }
248
249 if (attrs->ctag) {
250 attrs->ctag += page_size;
251 }
252}
253
254static void update_gmmu_pte_locked(struct vm_gk20a *vm,
255 const struct gk20a_mmu_level *l,
256 struct nvgpu_gmmu_pd *pd,
257 u32 pd_idx,
258 u64 virt_addr,
259 u64 phys_addr,
260 struct nvgpu_gmmu_attrs *attrs)
261{
262 struct gk20a *g = gk20a_from_vm(vm);
263 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
264 u32 pd_offset = pd_offset_from_index(l, pd_idx);
265 u32 pte_w[2] = {0, 0};
266 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
267
268 if (phys_addr) {
269 __update_pte(vm, pte_w, phys_addr, attrs);
270 } else if (attrs->sparse) {
271 __update_pte_sparse(pte_w);
272 }
273
274 pte_dbg(g, attrs,
275 "PTE: i=%-4u size=%-2u offs=%-4u | "
276 "GPU %#-12llx phys %#-12llx "
277 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
278 "ctag=0x%08x "
279 "[0x%08x, 0x%08x]",
280 pd_idx, l->entry_size, pd_offset,
281 virt_addr, phys_addr,
282 page_size >> 10,
283 nvgpu_gmmu_perm_str(attrs->rw_flag),
284 attrs->kind_v,
285 nvgpu_aperture_str(g, attrs->aperture),
286 attrs->cacheable ? 'C' : '-',
287 attrs->sparse ? 'S' : '-',
288 attrs->priv ? 'P' : '-',
289 attrs->valid ? 'V' : '-',
290 (u32)attrs->ctag >> ctag_shift,
291 pte_w[1], pte_w[0]);
292
293 pd_write(g, pd, pd_offset + 0, pte_w[0]);
294 pd_write(g, pd, pd_offset + 1, pte_w[1]);
295}
296
297u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
298 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
299{
300 /*
301 * big and small page sizes are the same
302 */
303 return GMMU_PAGE_SIZE_SMALL;
304}
305
306u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
307 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
308{
309 /*
310 * return invalid
311 */
312 return GMMU_NR_PAGE_SIZES;
313}
314
315const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
316 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
317 .lo_bit = {26, 26},
318 .update_entry = update_gmmu_pde_locked,
319 .entry_size = 8,
320 .get_pgsz = gk20a_get_pde_pgsz},
321 {.hi_bit = {25, 25},
322 .lo_bit = {12, 16},
323 .update_entry = update_gmmu_pte_locked,
324 .entry_size = 8,
325 .get_pgsz = gk20a_get_pte_pgsz},
326 {.update_entry = NULL}
327};
328
329const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
330 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
331 .lo_bit = {27, 27},
332 .update_entry = update_gmmu_pde_locked,
333 .entry_size = 8,
334 .get_pgsz = gk20a_get_pde_pgsz},
335 {.hi_bit = {26, 26},
336 .lo_bit = {12, 17},
337 .update_entry = update_gmmu_pte_locked,
338 .entry_size = 8,
339 .get_pgsz = gk20a_get_pte_pgsz},
340 {.update_entry = NULL}
341};
342
343int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
344{
345 int err = 0;
346
347 nvgpu_log_fn(ch->g, " ");
348
349 nvgpu_vm_get(vm);
350 ch->vm = vm;
351 err = channel_gk20a_commit_va(ch);
352 if (err) {
353 ch->vm = NULL;
354 }
355
356 nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
357 ch->chid, vm->name);
358
359 return err;
360}
361
362void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
363 struct vm_gk20a *vm)
364{
365 u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
366 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
367 u32 pdb_addr_hi = u64_hi32(pdb_addr);
368
369 nvgpu_log_info(g, "pde pa=0x%llx", pdb_addr);
370
371 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
372 nvgpu_aperture_mask(g, vm->pdb.mem,
373 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
374 ram_in_page_dir_base_target_sys_mem_coh_f(),
375 ram_in_page_dir_base_target_vid_mem_f()) |
376 ram_in_page_dir_base_vol_true_f() |
377 ram_in_page_dir_base_lo_f(pdb_addr_lo));
378
379 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
380 ram_in_page_dir_base_hi_f(pdb_addr_hi));
381}
382
383void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
384 u32 big_page_size)
385{
386 struct gk20a *g = gk20a_from_vm(vm);
387
388 nvgpu_log_info(g, "inst block phys = 0x%llx, kv = 0x%p",
389 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
390
391 g->ops.mm.init_pdb(g, inst_block, vm);
392
393 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
394 u64_lo32(vm->va_limit - 1) & ~0xfff);
395
396 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
397 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
398
399 if (big_page_size && g->ops.mm.set_big_page_size) {
400 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
401 }
402}
403
404int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
405{
406 int err;
407
408 nvgpu_log_fn(g, " ");
409
410 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
411 if (err) {
412 nvgpu_err(g, "%s: memory allocation failed", __func__);
413 return err;
414 }
415
416 nvgpu_log_fn(g, "done");
417 return 0;
418}
419
420int gk20a_mm_fb_flush(struct gk20a *g)
421{
422 struct mm_gk20a *mm = &g->mm;
423 u32 data;
424 int ret = 0;
425 struct nvgpu_timeout timeout;
426 u32 retries;
427
428 nvgpu_log_fn(g, " ");
429
430 gk20a_busy_noresume(g);
431 if (!g->power_on) {
432 gk20a_idle_nosuspend(g);
433 return 0;
434 }
435
436 retries = 100;
437
438 if (g->ops.mm.get_flush_retries) {
439 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
440 }
441
442 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
443
444 nvgpu_mutex_acquire(&mm->l2_op_lock);
445
446 /* Make sure all previous writes are committed to the L2. There's no
447 guarantee that writes are to DRAM. This will be a sysmembar internal
448 to the L2. */
449
450 trace_gk20a_mm_fb_flush(g->name);
451
452 gk20a_writel(g, flush_fb_flush_r(),
453 flush_fb_flush_pending_busy_f());
454
455 do {
456 data = gk20a_readl(g, flush_fb_flush_r());
457
458 if (flush_fb_flush_outstanding_v(data) ==
459 flush_fb_flush_outstanding_true_v() ||
460 flush_fb_flush_pending_v(data) ==
461 flush_fb_flush_pending_busy_v()) {
462 nvgpu_log_info(g, "fb_flush 0x%x", data);
463 nvgpu_udelay(5);
464 } else {
465 break;
466 }
467 } while (!nvgpu_timeout_expired(&timeout));
468
469 if (nvgpu_timeout_peek_expired(&timeout)) {
470 if (g->ops.fb.dump_vpr_info) {
471 g->ops.fb.dump_vpr_info(g);
472 }
473 if (g->ops.fb.dump_wpr_info) {
474 g->ops.fb.dump_wpr_info(g);
475 }
476 ret = -EBUSY;
477 }
478
479 trace_gk20a_mm_fb_flush_done(g->name);
480
481 nvgpu_mutex_release(&mm->l2_op_lock);
482
483 gk20a_idle_nosuspend(g);
484
485 return ret;
486}
487
488static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
489{
490 u32 data;
491 struct nvgpu_timeout timeout;
492 u32 retries = 200;
493
494 trace_gk20a_mm_l2_invalidate(g->name);
495
496 if (g->ops.mm.get_flush_retries) {
497 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
498 }
499
500 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
501
502 /* Invalidate any clean lines from the L2 so subsequent reads go to
503 DRAM. Dirty lines are not affected by this operation. */
504 gk20a_writel(g, flush_l2_system_invalidate_r(),
505 flush_l2_system_invalidate_pending_busy_f());
506
507 do {
508 data = gk20a_readl(g, flush_l2_system_invalidate_r());
509
510 if (flush_l2_system_invalidate_outstanding_v(data) ==
511 flush_l2_system_invalidate_outstanding_true_v() ||
512 flush_l2_system_invalidate_pending_v(data) ==
513 flush_l2_system_invalidate_pending_busy_v()) {
514 nvgpu_log_info(g, "l2_system_invalidate 0x%x",
515 data);
516 nvgpu_udelay(5);
517 } else {
518 break;
519 }
520 } while (!nvgpu_timeout_expired(&timeout));
521
522 if (nvgpu_timeout_peek_expired(&timeout)) {
523 nvgpu_warn(g, "l2_system_invalidate too many retries");
524 }
525
526 trace_gk20a_mm_l2_invalidate_done(g->name);
527}
528
529void gk20a_mm_l2_invalidate(struct gk20a *g)
530{
531 struct mm_gk20a *mm = &g->mm;
532 gk20a_busy_noresume(g);
533 if (g->power_on) {
534 nvgpu_mutex_acquire(&mm->l2_op_lock);
535 gk20a_mm_l2_invalidate_locked(g);
536 nvgpu_mutex_release(&mm->l2_op_lock);
537 }
538 gk20a_idle_nosuspend(g);
539}
540
541void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
542{
543 struct mm_gk20a *mm = &g->mm;
544 u32 data;
545 struct nvgpu_timeout timeout;
546 u32 retries = 2000;
547
548 nvgpu_log_fn(g, " ");
549
550 gk20a_busy_noresume(g);
551 if (!g->power_on) {
552 goto hw_was_off;
553 }
554
555 if (g->ops.mm.get_flush_retries) {
556 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
557 }
558
559 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
560
561 nvgpu_mutex_acquire(&mm->l2_op_lock);
562
563 trace_gk20a_mm_l2_flush(g->name);
564
565 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
566 as clean, so subsequent reads might hit in the L2. */
567 gk20a_writel(g, flush_l2_flush_dirty_r(),
568 flush_l2_flush_dirty_pending_busy_f());
569
570 do {
571 data = gk20a_readl(g, flush_l2_flush_dirty_r());
572
573 if (flush_l2_flush_dirty_outstanding_v(data) ==
574 flush_l2_flush_dirty_outstanding_true_v() ||
575 flush_l2_flush_dirty_pending_v(data) ==
576 flush_l2_flush_dirty_pending_busy_v()) {
577 nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
578 nvgpu_udelay(5);
579 } else {
580 break;
581 }
582 } while (!nvgpu_timeout_expired_msg(&timeout,
583 "l2_flush_dirty too many retries"));
584
585 trace_gk20a_mm_l2_flush_done(g->name);
586
587 if (invalidate) {
588 gk20a_mm_l2_invalidate_locked(g);
589 }
590
591 nvgpu_mutex_release(&mm->l2_op_lock);
592
593hw_was_off:
594 gk20a_idle_nosuspend(g);
595}
596
597void gk20a_mm_cbc_clean(struct gk20a *g)
598{
599 struct mm_gk20a *mm = &g->mm;
600 u32 data;
601 struct nvgpu_timeout timeout;
602 u32 retries = 200;
603
604 nvgpu_log_fn(g, " ");
605
606 gk20a_busy_noresume(g);
607 if (!g->power_on) {
608 goto hw_was_off;
609 }
610
611 if (g->ops.mm.get_flush_retries) {
612 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
613 }
614
615 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
616
617 nvgpu_mutex_acquire(&mm->l2_op_lock);
618
619 /* Flush all dirty lines from the CBC to L2 */
620 gk20a_writel(g, flush_l2_clean_comptags_r(),
621 flush_l2_clean_comptags_pending_busy_f());
622
623 do {
624 data = gk20a_readl(g, flush_l2_clean_comptags_r());
625
626 if (flush_l2_clean_comptags_outstanding_v(data) ==
627 flush_l2_clean_comptags_outstanding_true_v() ||
628 flush_l2_clean_comptags_pending_v(data) ==
629 flush_l2_clean_comptags_pending_busy_v()) {
630 nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
631 nvgpu_udelay(5);
632 } else {
633 break;
634 }
635 } while (!nvgpu_timeout_expired_msg(&timeout,
636 "l2_clean_comptags too many retries"));
637
638 nvgpu_mutex_release(&mm->l2_op_lock);
639
640hw_was_off:
641 gk20a_idle_nosuspend(g);
642}
643
644u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
645{
646 return 34;
647}
648
649const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
650 u32 big_page_size)
651{
652 return (big_page_size == SZ_64K) ?
653 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
654}