summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c632
1 files changed, 632 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
new file mode 100644
index 00000000..759ef816
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -0,0 +1,632 @@
1/*
2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/mm.h>
26#include <nvgpu/vm.h>
27#include <nvgpu/vm_area.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/timers.h>
31#include <nvgpu/pramin.h>
32#include <nvgpu/list.h>
33#include <nvgpu/nvgpu_mem.h>
34#include <nvgpu/allocator.h>
35#include <nvgpu/semaphore.h>
36#include <nvgpu/page_allocator.h>
37#include <nvgpu/log.h>
38#include <nvgpu/bug.h>
39#include <nvgpu/log2.h>
40#include <nvgpu/enabled.h>
41#include <nvgpu/vidmem.h>
42
43#include "gk20a.h"
44#include "mm_gk20a.h"
45#include "fence_gk20a.h"
46#include "bus_gk20a.h"
47
48#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
49#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
50#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
51#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
52#include <nvgpu/hw/gk20a/hw_bus_gk20a.h>
53#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
54#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
55
56/*
57 * GPU mapping life cycle
58 * ======================
59 *
60 * Kernel mappings
61 * ---------------
62 *
63 * Kernel mappings are created through vm.map(..., false):
64 *
65 * - Mappings to the same allocations are reused and refcounted.
66 * - This path does not support deferred unmapping (i.e. kernel must wait for
67 * all hw operations on the buffer to complete before unmapping).
68 * - References to dmabuf are owned and managed by the (kernel) clients of
69 * the gk20a_vm layer.
70 *
71 *
72 * User space mappings
73 * -------------------
74 *
75 * User space mappings are created through as.map_buffer -> vm.map(..., true):
76 *
77 * - Mappings to the same allocations are reused and refcounted.
78 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
79 * until all hw operations have completed).
80 * - References to dmabuf are owned and managed by the vm_gk20a
81 * layer itself. vm.map acquires these refs, and sets
82 * mapped_buffer->own_mem_ref to record that we must release the refs when we
83 * actually unmap.
84 *
85 */
86
87/* make sure gk20a_init_mm_support is called before */
88int gk20a_init_mm_setup_hw(struct gk20a *g)
89{
90 struct mm_gk20a *mm = &g->mm;
91 int err;
92
93 gk20a_dbg_fn("");
94
95 g->ops.fb.set_mmu_page_size(g);
96 if (g->ops.fb.set_use_full_comp_tag_line)
97 mm->use_full_comp_tag_line =
98 g->ops.fb.set_use_full_comp_tag_line(g);
99
100 g->ops.fb.init_hw(g);
101
102 if (g->ops.bus.bar1_bind)
103 g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
104
105 if (g->ops.mm.init_bar2_mm_hw_setup) {
106 err = g->ops.mm.init_bar2_mm_hw_setup(g);
107 if (err)
108 return err;
109 }
110
111 if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
112 return -EBUSY;
113
114 gk20a_dbg_fn("done");
115 return 0;
116}
117
118int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
119{
120 return vm->mmu_levels[0].lo_bit[0];
121}
122
123/* for gk20a the "video memory" apertures here are misnomers. */
124static inline u32 big_valid_pde0_bits(struct gk20a *g,
125 struct nvgpu_gmmu_pd *pd, u64 addr)
126{
127 u32 pde0_bits =
128 nvgpu_aperture_mask(g, pd->mem,
129 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
130 gmmu_pde_aperture_big_video_memory_f()) |
131 gmmu_pde_address_big_sys_f(
132 (u32)(addr >> gmmu_pde_address_shift_v()));
133
134 return pde0_bits;
135}
136
137static inline u32 small_valid_pde1_bits(struct gk20a *g,
138 struct nvgpu_gmmu_pd *pd, u64 addr)
139{
140 u32 pde1_bits =
141 nvgpu_aperture_mask(g, pd->mem,
142 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
143 gmmu_pde_aperture_small_video_memory_f()) |
144 gmmu_pde_vol_small_true_f() | /* tbd: why? */
145 gmmu_pde_address_small_sys_f(
146 (u32)(addr >> gmmu_pde_address_shift_v()));
147
148 return pde1_bits;
149}
150
151static void update_gmmu_pde_locked(struct vm_gk20a *vm,
152 const struct gk20a_mmu_level *l,
153 struct nvgpu_gmmu_pd *pd,
154 u32 pd_idx,
155 u64 virt_addr,
156 u64 phys_addr,
157 struct nvgpu_gmmu_attrs *attrs)
158{
159 struct gk20a *g = gk20a_from_vm(vm);
160 bool small_valid, big_valid;
161 u32 pd_offset = pd_offset_from_index(l, pd_idx);
162 u32 pde_v[2] = {0, 0};
163
164 small_valid = attrs->pgsz == gmmu_page_size_small;
165 big_valid = attrs->pgsz == gmmu_page_size_big;
166
167 pde_v[0] = gmmu_pde_size_full_f();
168 pde_v[0] |= big_valid ?
169 big_valid_pde0_bits(g, pd, phys_addr) :
170 gmmu_pde_aperture_big_invalid_f();
171
172 pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
173 (gmmu_pde_aperture_small_invalid_f() |
174 gmmu_pde_vol_small_false_f()))
175 |
176 (big_valid ? (gmmu_pde_vol_big_true_f()) :
177 gmmu_pde_vol_big_false_f());
178
179 pte_dbg(g, attrs,
180 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
181 "GPU %#-12llx phys %#-12llx "
182 "[0x%08x, 0x%08x]",
183 pd_idx, l->entry_size, pd_offset,
184 small_valid ? 'S' : '-',
185 big_valid ? 'B' : '-',
186 virt_addr, phys_addr,
187 pde_v[1], pde_v[0]);
188
189 pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
190 pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
191}
192
193static void __update_pte_sparse(u32 *pte_w)
194{
195 pte_w[0] = gmmu_pte_valid_false_f();
196 pte_w[1] |= gmmu_pte_vol_true_f();
197}
198
199static void __update_pte(struct vm_gk20a *vm,
200 u32 *pte_w,
201 u64 phys_addr,
202 struct nvgpu_gmmu_attrs *attrs)
203{
204 struct gk20a *g = gk20a_from_vm(vm);
205 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
206 u32 pte_valid = attrs->valid ?
207 gmmu_pte_valid_true_f() :
208 gmmu_pte_valid_false_f();
209 u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
210 u32 addr = attrs->aperture == APERTURE_SYSMEM ?
211 gmmu_pte_address_sys_f(phys_shifted) :
212 gmmu_pte_address_vid_f(phys_shifted);
213 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
214
215 pte_w[0] = pte_valid | addr;
216
217 if (attrs->priv)
218 pte_w[0] |= gmmu_pte_privilege_true_f();
219
220 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
221 gmmu_pte_aperture_sys_mem_ncoh_f(),
222 gmmu_pte_aperture_video_memory_f()) |
223 gmmu_pte_kind_f(attrs->kind_v) |
224 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
225
226 if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
227 phys_addr & 0x10000)
228 pte_w[1] |= gmmu_pte_comptagline_f(
229 1 << (gmmu_pte_comptagline_s() - 1));
230
231 if (attrs->rw_flag == gk20a_mem_flag_read_only) {
232 pte_w[0] |= gmmu_pte_read_only_true_f();
233 pte_w[1] |= gmmu_pte_write_disable_true_f();
234 } else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
235 pte_w[1] |= gmmu_pte_read_disable_true_f();
236 }
237
238 if (!attrs->cacheable)
239 pte_w[1] |= gmmu_pte_vol_true_f();
240
241 if (attrs->ctag)
242 attrs->ctag += page_size;
243}
244
245static void update_gmmu_pte_locked(struct vm_gk20a *vm,
246 const struct gk20a_mmu_level *l,
247 struct nvgpu_gmmu_pd *pd,
248 u32 pd_idx,
249 u64 virt_addr,
250 u64 phys_addr,
251 struct nvgpu_gmmu_attrs *attrs)
252{
253 struct gk20a *g = gk20a_from_vm(vm);
254 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
255 u32 pd_offset = pd_offset_from_index(l, pd_idx);
256 u32 pte_w[2] = {0, 0};
257 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
258
259 if (phys_addr)
260 __update_pte(vm, pte_w, phys_addr, attrs);
261 else if (attrs->sparse)
262 __update_pte_sparse(pte_w);
263
264 pte_dbg(g, attrs,
265 "PTE: i=%-4u size=%-2u offs=%-4u | "
266 "GPU %#-12llx phys %#-12llx "
267 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c "
268 "ctag=0x%08x "
269 "[0x%08x, 0x%08x]",
270 pd_idx, l->entry_size, pd_offset,
271 virt_addr, phys_addr,
272 page_size >> 10,
273 nvgpu_gmmu_perm_str(attrs->rw_flag),
274 attrs->kind_v,
275 nvgpu_aperture_str(attrs->aperture),
276 attrs->cacheable ? 'C' : 'v',
277 attrs->sparse ? 'S' : '-',
278 attrs->priv ? 'P' : '-',
279 attrs->coherent ? 'c' : '-',
280 attrs->valid ? 'V' : '-',
281 (u32)attrs->ctag >> ctag_shift,
282 pte_w[1], pte_w[0]);
283
284 pd_write(g, pd, pd_offset + 0, pte_w[0]);
285 pd_write(g, pd, pd_offset + 1, pte_w[1]);
286}
287
288enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
289 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
290{
291 /*
292 * big and small page sizes are the same
293 */
294 return gmmu_page_size_small;
295}
296
297enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
298 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
299{
300 /*
301 * return invalid
302 */
303 return gmmu_nr_page_sizes;
304}
305
306const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
307 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
308 .lo_bit = {26, 26},
309 .update_entry = update_gmmu_pde_locked,
310 .entry_size = 8,
311 .get_pgsz = gk20a_get_pde_pgsz},
312 {.hi_bit = {25, 25},
313 .lo_bit = {12, 16},
314 .update_entry = update_gmmu_pte_locked,
315 .entry_size = 8,
316 .get_pgsz = gk20a_get_pte_pgsz},
317 {.update_entry = NULL}
318};
319
320const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
321 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
322 .lo_bit = {27, 27},
323 .update_entry = update_gmmu_pde_locked,
324 .entry_size = 8,
325 .get_pgsz = gk20a_get_pde_pgsz},
326 {.hi_bit = {26, 26},
327 .lo_bit = {12, 17},
328 .update_entry = update_gmmu_pte_locked,
329 .entry_size = 8,
330 .get_pgsz = gk20a_get_pte_pgsz},
331 {.update_entry = NULL}
332};
333
334int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
335{
336 int err = 0;
337
338 gk20a_dbg_fn("");
339
340 nvgpu_vm_get(vm);
341 ch->vm = vm;
342 err = channel_gk20a_commit_va(ch);
343 if (err)
344 ch->vm = NULL;
345
346 nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
347 ch->chid, vm->name);
348
349 return err;
350}
351
352int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
353 struct channel_gk20a *ch)
354{
355 return __gk20a_vm_bind_channel(as_share->vm, ch);
356}
357
358void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
359 struct vm_gk20a *vm)
360{
361 u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
362 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
363 u32 pdb_addr_hi = u64_hi32(pdb_addr);
364
365 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
366
367 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
368 nvgpu_aperture_mask(g, vm->pdb.mem,
369 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
370 ram_in_page_dir_base_target_vid_mem_f()) |
371 ram_in_page_dir_base_vol_true_f() |
372 ram_in_page_dir_base_lo_f(pdb_addr_lo));
373
374 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
375 ram_in_page_dir_base_hi_f(pdb_addr_hi));
376}
377
378void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
379 u32 big_page_size)
380{
381 struct gk20a *g = gk20a_from_vm(vm);
382
383 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
384 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
385
386 g->ops.mm.init_pdb(g, inst_block, vm);
387
388 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
389 u64_lo32(vm->va_limit - 1) & ~0xfff);
390
391 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
392 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
393
394 if (big_page_size && g->ops.mm.set_big_page_size)
395 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
396}
397
398int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
399{
400 int err;
401
402 gk20a_dbg_fn("");
403
404 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
405 if (err) {
406 nvgpu_err(g, "%s: memory allocation failed", __func__);
407 return err;
408 }
409
410 gk20a_dbg_fn("done");
411 return 0;
412}
413
414int gk20a_mm_fb_flush(struct gk20a *g)
415{
416 struct mm_gk20a *mm = &g->mm;
417 u32 data;
418 int ret = 0;
419 struct nvgpu_timeout timeout;
420 u32 retries;
421
422 gk20a_dbg_fn("");
423
424 gk20a_busy_noresume(g);
425 if (!g->power_on) {
426 gk20a_idle_nosuspend(g);
427 return 0;
428 }
429
430 retries = 100;
431
432 if (g->ops.mm.get_flush_retries)
433 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
434
435 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
436
437 nvgpu_mutex_acquire(&mm->l2_op_lock);
438
439 /* Make sure all previous writes are committed to the L2. There's no
440 guarantee that writes are to DRAM. This will be a sysmembar internal
441 to the L2. */
442
443 trace_gk20a_mm_fb_flush(g->name);
444
445 gk20a_writel(g, flush_fb_flush_r(),
446 flush_fb_flush_pending_busy_f());
447
448 do {
449 data = gk20a_readl(g, flush_fb_flush_r());
450
451 if (flush_fb_flush_outstanding_v(data) ==
452 flush_fb_flush_outstanding_true_v() ||
453 flush_fb_flush_pending_v(data) ==
454 flush_fb_flush_pending_busy_v()) {
455 gk20a_dbg_info("fb_flush 0x%x", data);
456 nvgpu_udelay(5);
457 } else
458 break;
459 } while (!nvgpu_timeout_expired(&timeout));
460
461 if (nvgpu_timeout_peek_expired(&timeout)) {
462 if (g->ops.fb.dump_vpr_wpr_info)
463 g->ops.fb.dump_vpr_wpr_info(g);
464 ret = -EBUSY;
465 }
466
467 trace_gk20a_mm_fb_flush_done(g->name);
468
469 nvgpu_mutex_release(&mm->l2_op_lock);
470
471 gk20a_idle_nosuspend(g);
472
473 return ret;
474}
475
476static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
477{
478 u32 data;
479 struct nvgpu_timeout timeout;
480 u32 retries = 200;
481
482 trace_gk20a_mm_l2_invalidate(g->name);
483
484 if (g->ops.mm.get_flush_retries)
485 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
486
487 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
488
489 /* Invalidate any clean lines from the L2 so subsequent reads go to
490 DRAM. Dirty lines are not affected by this operation. */
491 gk20a_writel(g, flush_l2_system_invalidate_r(),
492 flush_l2_system_invalidate_pending_busy_f());
493
494 do {
495 data = gk20a_readl(g, flush_l2_system_invalidate_r());
496
497 if (flush_l2_system_invalidate_outstanding_v(data) ==
498 flush_l2_system_invalidate_outstanding_true_v() ||
499 flush_l2_system_invalidate_pending_v(data) ==
500 flush_l2_system_invalidate_pending_busy_v()) {
501 gk20a_dbg_info("l2_system_invalidate 0x%x",
502 data);
503 nvgpu_udelay(5);
504 } else
505 break;
506 } while (!nvgpu_timeout_expired(&timeout));
507
508 if (nvgpu_timeout_peek_expired(&timeout))
509 nvgpu_warn(g, "l2_system_invalidate too many retries");
510
511 trace_gk20a_mm_l2_invalidate_done(g->name);
512}
513
514void gk20a_mm_l2_invalidate(struct gk20a *g)
515{
516 struct mm_gk20a *mm = &g->mm;
517 gk20a_busy_noresume(g);
518 if (g->power_on) {
519 nvgpu_mutex_acquire(&mm->l2_op_lock);
520 gk20a_mm_l2_invalidate_locked(g);
521 nvgpu_mutex_release(&mm->l2_op_lock);
522 }
523 gk20a_idle_nosuspend(g);
524}
525
526void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
527{
528 struct mm_gk20a *mm = &g->mm;
529 u32 data;
530 struct nvgpu_timeout timeout;
531 u32 retries = 2000;
532
533 gk20a_dbg_fn("");
534
535 gk20a_busy_noresume(g);
536 if (!g->power_on)
537 goto hw_was_off;
538
539 if (g->ops.mm.get_flush_retries)
540 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
541
542 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
543
544 nvgpu_mutex_acquire(&mm->l2_op_lock);
545
546 trace_gk20a_mm_l2_flush(g->name);
547
548 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
549 as clean, so subsequent reads might hit in the L2. */
550 gk20a_writel(g, flush_l2_flush_dirty_r(),
551 flush_l2_flush_dirty_pending_busy_f());
552
553 do {
554 data = gk20a_readl(g, flush_l2_flush_dirty_r());
555
556 if (flush_l2_flush_dirty_outstanding_v(data) ==
557 flush_l2_flush_dirty_outstanding_true_v() ||
558 flush_l2_flush_dirty_pending_v(data) ==
559 flush_l2_flush_dirty_pending_busy_v()) {
560 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
561 nvgpu_udelay(5);
562 } else
563 break;
564 } while (!nvgpu_timeout_expired_msg(&timeout,
565 "l2_flush_dirty too many retries"));
566
567 trace_gk20a_mm_l2_flush_done(g->name);
568
569 if (invalidate)
570 gk20a_mm_l2_invalidate_locked(g);
571
572 nvgpu_mutex_release(&mm->l2_op_lock);
573
574hw_was_off:
575 gk20a_idle_nosuspend(g);
576}
577
578void gk20a_mm_cbc_clean(struct gk20a *g)
579{
580 struct mm_gk20a *mm = &g->mm;
581 u32 data;
582 struct nvgpu_timeout timeout;
583 u32 retries = 200;
584
585 gk20a_dbg_fn("");
586
587 gk20a_busy_noresume(g);
588 if (!g->power_on)
589 goto hw_was_off;
590
591 if (g->ops.mm.get_flush_retries)
592 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
593
594 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
595
596 nvgpu_mutex_acquire(&mm->l2_op_lock);
597
598 /* Flush all dirty lines from the CBC to L2 */
599 gk20a_writel(g, flush_l2_clean_comptags_r(),
600 flush_l2_clean_comptags_pending_busy_f());
601
602 do {
603 data = gk20a_readl(g, flush_l2_clean_comptags_r());
604
605 if (flush_l2_clean_comptags_outstanding_v(data) ==
606 flush_l2_clean_comptags_outstanding_true_v() ||
607 flush_l2_clean_comptags_pending_v(data) ==
608 flush_l2_clean_comptags_pending_busy_v()) {
609 gk20a_dbg_info("l2_clean_comptags 0x%x", data);
610 nvgpu_udelay(5);
611 } else
612 break;
613 } while (!nvgpu_timeout_expired_msg(&timeout,
614 "l2_clean_comptags too many retries"));
615
616 nvgpu_mutex_release(&mm->l2_op_lock);
617
618hw_was_off:
619 gk20a_idle_nosuspend(g);
620}
621
622u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
623{
624 return 34;
625}
626
627const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
628 u32 big_page_size)
629{
630 return (big_page_size == SZ_64K) ?
631 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
632}