1 files changed, 0 insertions, 654 deletions
diff --git a/include/gk20a/mm_gk20a.c b/include/gk20a/mm_gk20a.c
deleted file mode 100644
index 10ca84d..0000000
--- a/include/gk20a/mm_gk20a.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include <trace/events/gk20a.h>
-#include <nvgpu/mm.h>
-#include <nvgpu/vm.h>
-#include <nvgpu/vm_area.h>
-#include <nvgpu/dma.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/timers.h>
-#include <nvgpu/pramin.h>
-#include <nvgpu/list.h>
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/allocator.h>
-#include <nvgpu/semaphore.h>
-#include <nvgpu/page_allocator.h>
-#include <nvgpu/log.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/log2.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/vidmem.h>
-#include <nvgpu/sizes.h>
-#include <nvgpu/io.h>
-#include <nvgpu/utils.h>
-#include <nvgpu/channel.h>
-#include "gk20a.h"
-#include "mm_gk20a.h"
-#include "fence_gk20a.h"
-#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
-/*
- * GPU mapping life cycle
- * ======================
- *
- * Kernel mappings
- * ---------------
- *
- * Kernel mappings are created through vm.map(..., false):
- *
- *  - Mappings to the same allocations are reused and refcounted.
- *  - This path does not support deferred unmapping (i.e. kernel must wait for
- *    all hw operations on the buffer to complete before unmapping).
- *  - References to dmabuf are owned and managed by the (kernel) clients of
- *    the gk20a_vm layer.
- *
- *
- * User space mappings
- * -------------------
- *
- * User space mappings are created through as.map_buffer -> vm.map(..., true):
- *
- *  - Mappings to the same allocations are reused and refcounted.
- *  - This path supports deferred unmapping (i.e. we delay the actual unmapping
- *    until all hw operations have completed).
- *  - References to dmabuf are owned and managed by the vm_gk20a
- *    layer itself. vm.map acquires these refs, and sets
- *    mapped_buffer->own_mem_ref to record that we must release the refs when we
- *    actually unmap.
- *
- */
-/* make sure gk20a_init_mm_support is called before */
-int gk20a_init_mm_setup_hw(struct gk20a *g)
-{
-        struct mm_gk20a *mm = &g->mm;
-        int err;
-        nvgpu_log_fn(g, " ");
-        if (g->ops.fb.set_mmu_page_size) {
-                g->ops.fb.set_mmu_page_size(g);
-        }
-        if (g->ops.fb.set_use_full_comp_tag_line) {
-                mm->use_full_comp_tag_line =
-                        g->ops.fb.set_use_full_comp_tag_line(g);
-        }
-        g->ops.fb.init_hw(g);
-        if (g->ops.bus.bar1_bind) {
-                g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
-        }
-        if (g->ops.bus.bar2_bind) {
-                err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
-                if (err) {
-                        return err;
-                }
-        }
-        if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g)) {
-                return -EBUSY;
-        }
-        nvgpu_log_fn(g, "done");
-        return 0;
-}
-/* for gk20a the "video memory" apertures here are misnomers. */
-static inline u32 big_valid_pde0_bits(struct gk20a *g,
-                                      struct nvgpu_gmmu_pd *pd, u64 addr)
-{
-        u32 pde0_bits =
-                nvgpu_aperture_mask(g, pd->mem,
-                                    gmmu_pde_aperture_big_sys_mem_ncoh_f(),
-                                    gmmu_pde_aperture_big_sys_mem_coh_f(),
-                                    gmmu_pde_aperture_big_video_memory_f()) |
-                gmmu_pde_address_big_sys_f(
-                           (u32)(addr >> gmmu_pde_address_shift_v()));
-        return pde0_bits;
-}
-static inline u32 small_valid_pde1_bits(struct gk20a *g,
-                                        struct nvgpu_gmmu_pd *pd, u64 addr)
-{
-        u32 pde1_bits =
-                nvgpu_aperture_mask(g, pd->mem,
-                                    gmmu_pde_aperture_small_sys_mem_ncoh_f(),
-                                    gmmu_pde_aperture_small_sys_mem_coh_f(),
-                                    gmmu_pde_aperture_small_video_memory_f()) |
-                gmmu_pde_vol_small_true_f() | /* tbd: why? */
-                gmmu_pde_address_small_sys_f(
-                           (u32)(addr >> gmmu_pde_address_shift_v()));
-        return pde1_bits;
-}
-static void update_gmmu_pde_locked(struct vm_gk20a *vm,
-                                   const struct gk20a_mmu_level *l,
-                                   struct nvgpu_gmmu_pd *pd,
-                                   u32 pd_idx,
-                                   u64 virt_addr,
-                                   u64 phys_addr,
-                                   struct nvgpu_gmmu_attrs *attrs)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        bool small_valid, big_valid;
-        u32 pd_offset = pd_offset_from_index(l, pd_idx);
-        u32 pde_v[2] = {0, 0};
-        small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
-        big_valid   = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
-        pde_v[0] = gmmu_pde_size_full_f();
-        pde_v[0] |= big_valid ?
-                big_valid_pde0_bits(g, pd, phys_addr) :
-                gmmu_pde_aperture_big_invalid_f();
-        pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
-                     (gmmu_pde_aperture_small_invalid_f() |
-                      gmmu_pde_vol_small_false_f()))
-                |
-                (big_valid ? (gmmu_pde_vol_big_true_f()) :
-                 gmmu_pde_vol_big_false_f());
-        pte_dbg(g, attrs,
-                "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
-                "GPU %#-12llx  phys %#-12llx "
-                "[0x%08x, 0x%08x]",
-                pd_idx, l->entry_size, pd_offset,
-                small_valid ? 'S' : '-',
-                big_valid ?   'B' : '-',
-                virt_addr, phys_addr,
-                pde_v[1], pde_v[0]);
-        pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
-        pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
-}
-static void __update_pte_sparse(u32 *pte_w)
-{
-        pte_w[0]  = gmmu_pte_valid_false_f();
-        pte_w[1] |= gmmu_pte_vol_true_f();
-}
-static void __update_pte(struct vm_gk20a *vm,
-                         u32 *pte_w,
-                         u64 phys_addr,
-                         struct nvgpu_gmmu_attrs *attrs)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
-        u32 pte_valid = attrs->valid ?
-                gmmu_pte_valid_true_f() :
-                gmmu_pte_valid_false_f();
-        u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
-        u32 addr = attrs->aperture == APERTURE_SYSMEM ?
-                gmmu_pte_address_sys_f(phys_shifted) :
-                gmmu_pte_address_vid_f(phys_shifted);
-        int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
-        pte_w[0] = pte_valid | addr;
-        if (attrs->priv) {
-                pte_w[0] |= gmmu_pte_privilege_true_f();
-        }
-        pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
-                                         gmmu_pte_aperture_sys_mem_ncoh_f(),
-                                         gmmu_pte_aperture_sys_mem_coh_f(),
-                                         gmmu_pte_aperture_video_memory_f()) |
-                gmmu_pte_kind_f(attrs->kind_v) |
-                gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
-        if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
-            phys_addr & 0x10000) {
-                pte_w[1] |= gmmu_pte_comptagline_f(
-                        1 << (gmmu_pte_comptagline_s() - 1));
-        }
-        if (attrs->rw_flag == gk20a_mem_flag_read_only) {
-                pte_w[0] |= gmmu_pte_read_only_true_f();
-                pte_w[1] |= gmmu_pte_write_disable_true_f();
-        } else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
-                pte_w[1] |= gmmu_pte_read_disable_true_f();
-        }
-        if (!attrs->cacheable) {
-                pte_w[1] |= gmmu_pte_vol_true_f();
-        }
-        if (attrs->ctag) {
-                attrs->ctag += page_size;
-        }
-}
-static void update_gmmu_pte_locked(struct vm_gk20a *vm,
-                                   const struct gk20a_mmu_level *l,
-                                   struct nvgpu_gmmu_pd *pd,
-                                   u32 pd_idx,
-                                   u64 virt_addr,
-                                   u64 phys_addr,
-                                   struct nvgpu_gmmu_attrs *attrs)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
-        u32 pd_offset = pd_offset_from_index(l, pd_idx);
-        u32 pte_w[2] = {0, 0};
-        int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
-        if (phys_addr) {
-                __update_pte(vm, pte_w, phys_addr, attrs);
-        } else if (attrs->sparse) {
-                __update_pte_sparse(pte_w);
-        }
-        pte_dbg(g, attrs,
-                "PTE: i=%-4u size=%-2u offs=%-4u | "
-                "GPU %#-12llx  phys %#-12llx "
-                "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
-                "ctag=0x%08x "
-                "[0x%08x, 0x%08x]",
-                pd_idx, l->entry_size, pd_offset,
-                virt_addr, phys_addr,
-                page_size >> 10,
-                nvgpu_gmmu_perm_str(attrs->rw_flag),
-                attrs->kind_v,
-                nvgpu_aperture_str(g, attrs->aperture),
-                attrs->cacheable ? 'C' : '-',
-                attrs->sparse    ? 'S' : '-',
-                attrs->priv      ? 'P' : '-',
-                attrs->valid     ? 'V' : '-',
-                (u32)attrs->ctag >> ctag_shift,
-                pte_w[1], pte_w[0]);
-        pd_write(g, pd, pd_offset + 0, pte_w[0]);
-        pd_write(g, pd, pd_offset + 1, pte_w[1]);
-}
-u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-                                struct nvgpu_gmmu_pd *pd, u32 pd_idx)
-{
-        /*
-         * big and small page sizes are the same
-         */
-        return GMMU_PAGE_SIZE_SMALL;
-}
-u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-                                struct nvgpu_gmmu_pd *pd, u32 pd_idx)
-{
-        /*
-         * return invalid
-         */
-        return GMMU_NR_PAGE_SIZES;
-}
-const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
-        {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
-         .lo_bit = {26, 26},
-         .update_entry = update_gmmu_pde_locked,
-         .entry_size = 8,
-         .get_pgsz = gk20a_get_pde_pgsz},
-        {.hi_bit = {25, 25},
-         .lo_bit = {12, 16},
-         .update_entry = update_gmmu_pte_locked,
-         .entry_size = 8,
-         .get_pgsz = gk20a_get_pte_pgsz},
-        {.update_entry = NULL}
-};
-const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
-        {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
-         .lo_bit = {27, 27},
-         .update_entry = update_gmmu_pde_locked,
-         .entry_size = 8,
-         .get_pgsz = gk20a_get_pde_pgsz},
-        {.hi_bit = {26, 26},
-         .lo_bit = {12, 17},
-         .update_entry = update_gmmu_pte_locked,
-         .entry_size = 8,
-         .get_pgsz = gk20a_get_pte_pgsz},
-        {.update_entry = NULL}
-};
-int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
-{
-        int err = 0;
-        nvgpu_log_fn(ch->g, " ");
-        nvgpu_vm_get(vm);
-        ch->vm = vm;
-        err = channel_gk20a_commit_va(ch);
-        if (err) {
-                ch->vm = NULL;
-        }
-        nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
-                  ch->chid, vm->name);
-        return err;
-}
-void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
-                struct vm_gk20a *vm)
-{
-        u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
-        u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
-        u32 pdb_addr_hi = u64_hi32(pdb_addr);
-        nvgpu_log_info(g, "pde pa=0x%llx", pdb_addr);
-        nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
-                       nvgpu_aperture_mask(g, vm->pdb.mem,
-                                ram_in_page_dir_base_target_sys_mem_ncoh_f(),
-                                ram_in_page_dir_base_target_sys_mem_coh_f(),
-                                ram_in_page_dir_base_target_vid_mem_f()) |
-                       ram_in_page_dir_base_vol_true_f() |
-                       ram_in_page_dir_base_lo_f(pdb_addr_lo));
-        nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
-                ram_in_page_dir_base_hi_f(pdb_addr_hi));
-}
-void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
-                u32 big_page_size)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        nvgpu_log_info(g, "inst block phys = 0x%llx, kv = 0x%p",
-                nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
-        g->ops.mm.init_pdb(g, inst_block, vm);
-        nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
-                u64_lo32(vm->va_limit - 1) & ~0xfff);
-        nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
-                ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
-        if (big_page_size && g->ops.mm.set_big_page_size) {
-                g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
-        }
-}
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        int err;
-        nvgpu_log_fn(g, " ");
-        err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
-        if (err) {
-                nvgpu_err(g, "%s: memory allocation failed", __func__);
-                return err;
-        }
-        nvgpu_log_fn(g, "done");
-        return 0;
-}
-int gk20a_mm_fb_flush(struct gk20a *g)
-{
-        struct mm_gk20a *mm = &g->mm;
-        u32 data;
-        int ret = 0;
-        struct nvgpu_timeout timeout;
-        u32 retries;
-        nvgpu_log_fn(g, " ");
-        gk20a_busy_noresume(g);
-        if (!g->power_on) {
-                gk20a_idle_nosuspend(g);
-                return 0;
-        }
-        retries = 100;
-        if (g->ops.mm.get_flush_retries) {
-                retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
-        }
-        nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-        nvgpu_mutex_acquire(&mm->l2_op_lock);
-        /* Make sure all previous writes are committed to the L2. There's no
-           guarantee that writes are to DRAM. This will be a sysmembar internal
-           to the L2. */
-        trace_gk20a_mm_fb_flush(g->name);
-        gk20a_writel(g, flush_fb_flush_r(),
-                flush_fb_flush_pending_busy_f());
-        do {
-                data = gk20a_readl(g, flush_fb_flush_r());
-                if (flush_fb_flush_outstanding_v(data) ==
-                        flush_fb_flush_outstanding_true_v() ||
-                    flush_fb_flush_pending_v(data) ==
-                        flush_fb_flush_pending_busy_v()) {
-                                nvgpu_log_info(g, "fb_flush 0x%x", data);
-                                nvgpu_udelay(5);
-                } else {
-                        break;
-                }
-        } while (!nvgpu_timeout_expired(&timeout));
-        if (nvgpu_timeout_peek_expired(&timeout)) {
-                if (g->ops.fb.dump_vpr_info) {
-                        g->ops.fb.dump_vpr_info(g);
-                }
-                if (g->ops.fb.dump_wpr_info) {
-                        g->ops.fb.dump_wpr_info(g);
-                }
-                ret = -EBUSY;
-        }
-        trace_gk20a_mm_fb_flush_done(g->name);
-        nvgpu_mutex_release(&mm->l2_op_lock);
-        gk20a_idle_nosuspend(g);
-        return ret;
-}
-static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
-{
-        u32 data;
-        struct nvgpu_timeout timeout;
-        u32 retries = 200;
-        trace_gk20a_mm_l2_invalidate(g->name);
-        if (g->ops.mm.get_flush_retries) {
-                retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
-        }
-        nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-        /* Invalidate any clean lines from the L2 so subsequent reads go to
-           DRAM. Dirty lines are not affected by this operation. */
-        gk20a_writel(g, flush_l2_system_invalidate_r(),
-                flush_l2_system_invalidate_pending_busy_f());
-        do {
-                data = gk20a_readl(g, flush_l2_system_invalidate_r());
-                if (flush_l2_system_invalidate_outstanding_v(data) ==
-                        flush_l2_system_invalidate_outstanding_true_v() ||
-                    flush_l2_system_invalidate_pending_v(data) ==
-                        flush_l2_system_invalidate_pending_busy_v()) {
-                                nvgpu_log_info(g, "l2_system_invalidate 0x%x",
-                                                data);
-                                nvgpu_udelay(5);
-                } else {
-                        break;
-                }
-        } while (!nvgpu_timeout_expired(&timeout));
-        if (nvgpu_timeout_peek_expired(&timeout)) {
-                nvgpu_warn(g, "l2_system_invalidate too many retries");
-        }
-        trace_gk20a_mm_l2_invalidate_done(g->name);
-}
-void gk20a_mm_l2_invalidate(struct gk20a *g)
-{
-        struct mm_gk20a *mm = &g->mm;
-        gk20a_busy_noresume(g);
-        if (g->power_on) {
-                nvgpu_mutex_acquire(&mm->l2_op_lock);
-                gk20a_mm_l2_invalidate_locked(g);
-                nvgpu_mutex_release(&mm->l2_op_lock);
-        }
-        gk20a_idle_nosuspend(g);
-}
-void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
-{
-        struct mm_gk20a *mm = &g->mm;
-        u32 data;
-        struct nvgpu_timeout timeout;
-        u32 retries = 2000;
-        nvgpu_log_fn(g, " ");
-        gk20a_busy_noresume(g);
-        if (!g->power_on) {
-                goto hw_was_off;
-        }
-        if (g->ops.mm.get_flush_retries) {
-                retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
-        }
-        nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-        nvgpu_mutex_acquire(&mm->l2_op_lock);
-        trace_gk20a_mm_l2_flush(g->name);
-        /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
-           as clean, so subsequent reads might hit in the L2. */
-        gk20a_writel(g, flush_l2_flush_dirty_r(),
-                flush_l2_flush_dirty_pending_busy_f());
-        do {
-                data = gk20a_readl(g, flush_l2_flush_dirty_r());
-                if (flush_l2_flush_dirty_outstanding_v(data) ==
-                        flush_l2_flush_dirty_outstanding_true_v() ||
-                    flush_l2_flush_dirty_pending_v(data) ==
-                        flush_l2_flush_dirty_pending_busy_v()) {
-                                nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
-                                nvgpu_udelay(5);
-                } else {
-                        break;
-                }
-        } while (!nvgpu_timeout_expired_msg(&timeout,
-                                         "l2_flush_dirty too many retries"));
-        trace_gk20a_mm_l2_flush_done(g->name);
-        if (invalidate) {
-                gk20a_mm_l2_invalidate_locked(g);
-        }
-        nvgpu_mutex_release(&mm->l2_op_lock);
-hw_was_off:
-        gk20a_idle_nosuspend(g);
-}
-void gk20a_mm_cbc_clean(struct gk20a *g)
-{
-        struct mm_gk20a *mm = &g->mm;
-        u32 data;
-        struct nvgpu_timeout timeout;
-        u32 retries = 200;
-        nvgpu_log_fn(g, " ");
-        gk20a_busy_noresume(g);
-        if (!g->power_on) {
-                goto hw_was_off;
-        }
-        if (g->ops.mm.get_flush_retries) {
-                retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
-        }
-        nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-        nvgpu_mutex_acquire(&mm->l2_op_lock);
-        /* Flush all dirty lines from the CBC to L2 */
-        gk20a_writel(g, flush_l2_clean_comptags_r(),
-                flush_l2_clean_comptags_pending_busy_f());
-        do {
-                data = gk20a_readl(g, flush_l2_clean_comptags_r());
-                if (flush_l2_clean_comptags_outstanding_v(data) ==
-                        flush_l2_clean_comptags_outstanding_true_v() ||
-                    flush_l2_clean_comptags_pending_v(data) ==
-                        flush_l2_clean_comptags_pending_busy_v()) {
-                                nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
-                                nvgpu_udelay(5);
-                } else {
-                        break;
-                }
-        } while (!nvgpu_timeout_expired_msg(&timeout,
-                                         "l2_clean_comptags too many retries"));
-        nvgpu_mutex_release(&mm->l2_op_lock);
-hw_was_off:
-        gk20a_idle_nosuspend(g);
-}
-u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
-{
-        return 34;
-}
-const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
-                                                      u32 big_page_size)
-{
-        return (big_page_size == SZ_64K) ?
-                 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
-}

diff --git a/include/gk20a/mm_gk20a.c b/include/gk20a/mm_gk20a.c deleted file mode 100644 index 10ca84d..0000000 --- a/include/gk20a/mm_gk20a.c +++ /dev/null
@@ -1,654 +0,0 @@
1	/*
2	* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20	* DEALINGS IN THE SOFTWARE.
21	*/
22
23	#include <trace/events/gk20a.h>
24
25	#include <nvgpu/mm.h>
26	#include <nvgpu/vm.h>
27	#include <nvgpu/vm_area.h>
28	#include <nvgpu/dma.h>
29	#include <nvgpu/kmem.h>
30	#include <nvgpu/timers.h>
31	#include <nvgpu/pramin.h>
32	#include <nvgpu/list.h>
33	#include <nvgpu/nvgpu_mem.h>
34	#include <nvgpu/allocator.h>
35	#include <nvgpu/semaphore.h>
36	#include <nvgpu/page_allocator.h>
37	#include <nvgpu/log.h>
38	#include <nvgpu/bug.h>
39	#include <nvgpu/log2.h>
40	#include <nvgpu/enabled.h>
41	#include <nvgpu/vidmem.h>
42	#include <nvgpu/sizes.h>
43	#include <nvgpu/io.h>
44	#include <nvgpu/utils.h>
45	#include <nvgpu/channel.h>
46
47	#include "gk20a.h"
48	#include "mm_gk20a.h"
49	#include "fence_gk20a.h"
50
51	#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
52	#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
53	#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
54	#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
55
56	/*
57	* GPU mapping life cycle
58	* ======================
59	*
60	* Kernel mappings
61	* ---------------
62	*
63	* Kernel mappings are created through vm.map(..., false):
64	*
65	* - Mappings to the same allocations are reused and refcounted.
66	* - This path does not support deferred unmapping (i.e. kernel must wait for
67	* all hw operations on the buffer to complete before unmapping).
68	* - References to dmabuf are owned and managed by the (kernel) clients of
69	* the gk20a_vm layer.
70	*
71	*
72	* User space mappings
73	* -------------------
74	*
75	* User space mappings are created through as.map_buffer -> vm.map(..., true):
76	*
77	* - Mappings to the same allocations are reused and refcounted.
78	* - This path supports deferred unmapping (i.e. we delay the actual unmapping
79	* until all hw operations have completed).
80	* - References to dmabuf are owned and managed by the vm_gk20a
81	* layer itself. vm.map acquires these refs, and sets
82	* mapped_buffer->own_mem_ref to record that we must release the refs when we
83	* actually unmap.
84	*
85	*/
86
87	/* make sure gk20a_init_mm_support is called before */
88	int gk20a_init_mm_setup_hw(struct gk20a *g)
89	{
90	struct mm_gk20a *mm = &g->mm;
91	int err;
92
93	nvgpu_log_fn(g, " ");
94
95	if (g->ops.fb.set_mmu_page_size) {
96	g->ops.fb.set_mmu_page_size(g);
97	}
98
99	if (g->ops.fb.set_use_full_comp_tag_line) {
100	mm->use_full_comp_tag_line =
101	g->ops.fb.set_use_full_comp_tag_line(g);
102	}
103
104	g->ops.fb.init_hw(g);
105
106	if (g->ops.bus.bar1_bind) {
107	g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
108	}
109
110	if (g->ops.bus.bar2_bind) {
111	err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
112	if (err) {
113	return err;
114	}
115	}
116
117	if (gk20a_mm_fb_flush(g) \|\| gk20a_mm_fb_flush(g)) {
118	return -EBUSY;
119	}
120
121	nvgpu_log_fn(g, "done");
122	return 0;
123	}
124
125	/* for gk20a the "video memory" apertures here are misnomers. */
126	static inline u32 big_valid_pde0_bits(struct gk20a *g,
127	struct nvgpu_gmmu_pd *pd, u64 addr)
128	{
129	u32 pde0_bits =
130	nvgpu_aperture_mask(g, pd->mem,
131	gmmu_pde_aperture_big_sys_mem_ncoh_f(),
132	gmmu_pde_aperture_big_sys_mem_coh_f(),
133	gmmu_pde_aperture_big_video_memory_f()) \|
134	gmmu_pde_address_big_sys_f(
135	(u32)(addr >> gmmu_pde_address_shift_v()));
136
137	return pde0_bits;
138	}
139
140	static inline u32 small_valid_pde1_bits(struct gk20a *g,
141	struct nvgpu_gmmu_pd *pd, u64 addr)
142	{
143	u32 pde1_bits =
144	nvgpu_aperture_mask(g, pd->mem,
145	gmmu_pde_aperture_small_sys_mem_ncoh_f(),
146	gmmu_pde_aperture_small_sys_mem_coh_f(),
147	gmmu_pde_aperture_small_video_memory_f()) \|
148	gmmu_pde_vol_small_true_f() \| /* tbd: why? */
149	gmmu_pde_address_small_sys_f(
150	(u32)(addr >> gmmu_pde_address_shift_v()));
151
152	return pde1_bits;
153	}
154
155	static void update_gmmu_pde_locked(struct vm_gk20a *vm,
156	const struct gk20a_mmu_level *l,
157	struct nvgpu_gmmu_pd *pd,
158	u32 pd_idx,
159	u64 virt_addr,
160	u64 phys_addr,
161	struct nvgpu_gmmu_attrs *attrs)
162	{
163	struct gk20a *g = gk20a_from_vm(vm);
164	bool small_valid, big_valid;
165	u32 pd_offset = pd_offset_from_index(l, pd_idx);
166	u32 pde_v[2] = {0, 0};
167
168	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
169	big_valid = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
170
171	pde_v[0] = gmmu_pde_size_full_f();
172	pde_v[0] \|= big_valid ?
173	big_valid_pde0_bits(g, pd, phys_addr) :
174	gmmu_pde_aperture_big_invalid_f();
175
176	pde_v[1] \|= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
177	(gmmu_pde_aperture_small_invalid_f() \|
178	gmmu_pde_vol_small_false_f()))
179	\|
180	(big_valid ? (gmmu_pde_vol_big_true_f()) :
181	gmmu_pde_vol_big_false_f());
182
183	pte_dbg(g, attrs,
184	"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c \| "
185	"GPU %#-12llx phys %#-12llx "
186	"[0x%08x, 0x%08x]",
187	pd_idx, l->entry_size, pd_offset,
188	small_valid ? 'S' : '-',
189	big_valid ? 'B' : '-',
190	virt_addr, phys_addr,
191	pde_v[1], pde_v[0]);
192
193	pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
194	pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
195	}
196
197	static void __update_pte_sparse(u32 *pte_w)
198	{
199	pte_w[0] = gmmu_pte_valid_false_f();
200	pte_w[1] \|= gmmu_pte_vol_true_f();
201	}
202
203	static void __update_pte(struct vm_gk20a *vm,
204	u32 *pte_w,
205	u64 phys_addr,
206	struct nvgpu_gmmu_attrs *attrs)
207	{
208	struct gk20a *g = gk20a_from_vm(vm);
209	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
210	u32 pte_valid = attrs->valid ?
211	gmmu_pte_valid_true_f() :
212	gmmu_pte_valid_false_f();
213	u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
214	u32 addr = attrs->aperture == APERTURE_SYSMEM ?
215	gmmu_pte_address_sys_f(phys_shifted) :
216	gmmu_pte_address_vid_f(phys_shifted);
217	int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
218
219	pte_w[0] = pte_valid \| addr;
220
221	if (attrs->priv) {
222	pte_w[0] \|= gmmu_pte_privilege_true_f();
223	}
224
225	pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
226	gmmu_pte_aperture_sys_mem_ncoh_f(),
227	gmmu_pte_aperture_sys_mem_coh_f(),
228	gmmu_pte_aperture_video_memory_f()) \|
229	gmmu_pte_kind_f(attrs->kind_v) \|
230	gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
231
232	if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
233	phys_addr & 0x10000) {
234	pte_w[1] \|= gmmu_pte_comptagline_f(
235	1 << (gmmu_pte_comptagline_s() - 1));
236	}
237
238	if (attrs->rw_flag == gk20a_mem_flag_read_only) {
239	pte_w[0] \|= gmmu_pte_read_only_true_f();
240	pte_w[1] \|= gmmu_pte_write_disable_true_f();
241	} else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
242	pte_w[1] \|= gmmu_pte_read_disable_true_f();
243	}
244
245	if (!attrs->cacheable) {
246	pte_w[1] \|= gmmu_pte_vol_true_f();
247	}
248
249	if (attrs->ctag) {
250	attrs->ctag += page_size;
251	}
252	}
253
254	static void update_gmmu_pte_locked(struct vm_gk20a *vm,
255	const struct gk20a_mmu_level *l,
256	struct nvgpu_gmmu_pd *pd,
257	u32 pd_idx,
258	u64 virt_addr,
259	u64 phys_addr,
260	struct nvgpu_gmmu_attrs *attrs)
261	{
262	struct gk20a *g = gk20a_from_vm(vm);
263	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
264	u32 pd_offset = pd_offset_from_index(l, pd_idx);
265	u32 pte_w[2] = {0, 0};
266	int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
267
268	if (phys_addr) {
269	__update_pte(vm, pte_w, phys_addr, attrs);
270	} else if (attrs->sparse) {
271	__update_pte_sparse(pte_w);
272	}
273
274	pte_dbg(g, attrs,
275	"PTE: i=%-4u size=%-2u offs=%-4u \| "
276	"GPU %#-12llx phys %#-12llx "
277	"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
278	"ctag=0x%08x "
279	"[0x%08x, 0x%08x]",
280	pd_idx, l->entry_size, pd_offset,
281	virt_addr, phys_addr,
282	page_size >> 10,
283	nvgpu_gmmu_perm_str(attrs->rw_flag),
284	attrs->kind_v,
285	nvgpu_aperture_str(g, attrs->aperture),
286	attrs->cacheable ? 'C' : '-',
287	attrs->sparse ? 'S' : '-',
288	attrs->priv ? 'P' : '-',
289	attrs->valid ? 'V' : '-',
290	(u32)attrs->ctag >> ctag_shift,
291	pte_w[1], pte_w[0]);
292
293	pd_write(g, pd, pd_offset + 0, pte_w[0]);
294	pd_write(g, pd, pd_offset + 1, pte_w[1]);
295	}
296
297	u32 gk20a_get_pde_pgsz(struct gk20a g, const struct gk20a_mmu_level l,
298	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
299	{
300	/*
301	* big and small page sizes are the same
302	*/
303	return GMMU_PAGE_SIZE_SMALL;
304	}
305
306	u32 gk20a_get_pte_pgsz(struct gk20a g, const struct gk20a_mmu_level l,
307	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
308	{
309	/*
310	* return invalid
311	*/
312	return GMMU_NR_PAGE_SIZES;
313	}
314
315	const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
316	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
317	.lo_bit = {26, 26},
318	.update_entry = update_gmmu_pde_locked,
319	.entry_size = 8,
320	.get_pgsz = gk20a_get_pde_pgsz},
321	{.hi_bit = {25, 25},
322	.lo_bit = {12, 16},
323	.update_entry = update_gmmu_pte_locked,
324	.entry_size = 8,
325	.get_pgsz = gk20a_get_pte_pgsz},
326	{.update_entry = NULL}
327	};
328
329	const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
330	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
331	.lo_bit = {27, 27},
332	.update_entry = update_gmmu_pde_locked,
333	.entry_size = 8,
334	.get_pgsz = gk20a_get_pde_pgsz},
335	{.hi_bit = {26, 26},
336	.lo_bit = {12, 17},
337	.update_entry = update_gmmu_pte_locked,
338	.entry_size = 8,
339	.get_pgsz = gk20a_get_pte_pgsz},
340	{.update_entry = NULL}
341	};
342
343	int gk20a_vm_bind_channel(struct vm_gk20a vm, struct channel_gk20a ch)
344	{
345	int err = 0;
346
347	nvgpu_log_fn(ch->g, " ");
348
349	nvgpu_vm_get(vm);
350	ch->vm = vm;
351	err = channel_gk20a_commit_va(ch);
352	if (err) {
353	ch->vm = NULL;
354	}
355
356	nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
357	ch->chid, vm->name);
358
359	return err;
360	}
361
362	void gk20a_mm_init_pdb(struct gk20a g, struct nvgpu_mem inst_block,
363	struct vm_gk20a *vm)
364	{
365	u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
366	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
367	u32 pdb_addr_hi = u64_hi32(pdb_addr);
368
369	nvgpu_log_info(g, "pde pa=0x%llx", pdb_addr);
370
371	nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
372	nvgpu_aperture_mask(g, vm->pdb.mem,
373	ram_in_page_dir_base_target_sys_mem_ncoh_f(),
374	ram_in_page_dir_base_target_sys_mem_coh_f(),
375	ram_in_page_dir_base_target_vid_mem_f()) \|
376	ram_in_page_dir_base_vol_true_f() \|
377	ram_in_page_dir_base_lo_f(pdb_addr_lo));
378
379	nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
380	ram_in_page_dir_base_hi_f(pdb_addr_hi));
381	}
382
383	void gk20a_init_inst_block(struct nvgpu_mem inst_block, struct vm_gk20a vm,
384	u32 big_page_size)
385	{
386	struct gk20a *g = gk20a_from_vm(vm);
387
388	nvgpu_log_info(g, "inst block phys = 0x%llx, kv = 0x%p",
389	nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
390
391	g->ops.mm.init_pdb(g, inst_block, vm);
392
393	nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
394	u64_lo32(vm->va_limit - 1) & ~0xfff);
395
396	nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
397	ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
398
399	if (big_page_size && g->ops.mm.set_big_page_size) {
400	g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
401	}
402	}
403
404	int gk20a_alloc_inst_block(struct gk20a g, struct nvgpu_mem inst_block)
405	{
406	int err;
407
408	nvgpu_log_fn(g, " ");
409
410	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
411	if (err) {
412	nvgpu_err(g, "%s: memory allocation failed", __func__);
413	return err;
414	}
415
416	nvgpu_log_fn(g, "done");
417	return 0;
418	}
419
420	int gk20a_mm_fb_flush(struct gk20a *g)
421	{
422	struct mm_gk20a *mm = &g->mm;
423	u32 data;
424	int ret = 0;
425	struct nvgpu_timeout timeout;
426	u32 retries;
427
428	nvgpu_log_fn(g, " ");
429
430	gk20a_busy_noresume(g);
431	if (!g->power_on) {
432	gk20a_idle_nosuspend(g);
433	return 0;
434	}
435
436	retries = 100;
437
438	if (g->ops.mm.get_flush_retries) {
439	retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
440	}
441
442	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
443
444	nvgpu_mutex_acquire(&mm->l2_op_lock);
445
446	/* Make sure all previous writes are committed to the L2. There's no
447	guarantee that writes are to DRAM. This will be a sysmembar internal
448	to the L2. */
449
450	trace_gk20a_mm_fb_flush(g->name);
451
452	gk20a_writel(g, flush_fb_flush_r(),
453	flush_fb_flush_pending_busy_f());
454
455	do {
456	data = gk20a_readl(g, flush_fb_flush_r());
457
458	if (flush_fb_flush_outstanding_v(data) ==
459	flush_fb_flush_outstanding_true_v() \|\|
460	flush_fb_flush_pending_v(data) ==
461	flush_fb_flush_pending_busy_v()) {
462	nvgpu_log_info(g, "fb_flush 0x%x", data);
463	nvgpu_udelay(5);
464	} else {
465	break;
466	}
467	} while (!nvgpu_timeout_expired(&timeout));
468
469	if (nvgpu_timeout_peek_expired(&timeout)) {
470	if (g->ops.fb.dump_vpr_info) {
471	g->ops.fb.dump_vpr_info(g);
472	}
473	if (g->ops.fb.dump_wpr_info) {
474	g->ops.fb.dump_wpr_info(g);
475	}
476	ret = -EBUSY;
477	}
478
479	trace_gk20a_mm_fb_flush_done(g->name);
480
481	nvgpu_mutex_release(&mm->l2_op_lock);
482
483	gk20a_idle_nosuspend(g);
484
485	return ret;
486	}
487
488	static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
489	{
490	u32 data;
491	struct nvgpu_timeout timeout;
492	u32 retries = 200;
493
494	trace_gk20a_mm_l2_invalidate(g->name);
495
496	if (g->ops.mm.get_flush_retries) {
497	retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
498	}
499
500	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
501
502	/* Invalidate any clean lines from the L2 so subsequent reads go to
503	DRAM. Dirty lines are not affected by this operation. */
504	gk20a_writel(g, flush_l2_system_invalidate_r(),
505	flush_l2_system_invalidate_pending_busy_f());
506
507	do {
508	data = gk20a_readl(g, flush_l2_system_invalidate_r());
509
510	if (flush_l2_system_invalidate_outstanding_v(data) ==
511	flush_l2_system_invalidate_outstanding_true_v() \|\|
512	flush_l2_system_invalidate_pending_v(data) ==
513	flush_l2_system_invalidate_pending_busy_v()) {
514	nvgpu_log_info(g, "l2_system_invalidate 0x%x",
515	data);
516	nvgpu_udelay(5);
517	} else {
518	break;
519	}
520	} while (!nvgpu_timeout_expired(&timeout));
521
522	if (nvgpu_timeout_peek_expired(&timeout)) {
523	nvgpu_warn(g, "l2_system_invalidate too many retries");
524	}
525
526	trace_gk20a_mm_l2_invalidate_done(g->name);
527	}
528
529	void gk20a_mm_l2_invalidate(struct gk20a *g)
530	{
531	struct mm_gk20a *mm = &g->mm;
532	gk20a_busy_noresume(g);
533	if (g->power_on) {
534	nvgpu_mutex_acquire(&mm->l2_op_lock);
535	gk20a_mm_l2_invalidate_locked(g);
536	nvgpu_mutex_release(&mm->l2_op_lock);
537	}
538	gk20a_idle_nosuspend(g);
539	}
540
541	void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
542	{
543	struct mm_gk20a *mm = &g->mm;
544	u32 data;
545	struct nvgpu_timeout timeout;
546	u32 retries = 2000;
547
548	nvgpu_log_fn(g, " ");
549
550	gk20a_busy_noresume(g);
551	if (!g->power_on) {
552	goto hw_was_off;
553	}
554
555	if (g->ops.mm.get_flush_retries) {
556	retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
557	}
558
559	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
560
561	nvgpu_mutex_acquire(&mm->l2_op_lock);
562
563	trace_gk20a_mm_l2_flush(g->name);
564
565	/* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
566	as clean, so subsequent reads might hit in the L2. */
567	gk20a_writel(g, flush_l2_flush_dirty_r(),
568	flush_l2_flush_dirty_pending_busy_f());
569
570	do {
571	data = gk20a_readl(g, flush_l2_flush_dirty_r());
572
573	if (flush_l2_flush_dirty_outstanding_v(data) ==
574	flush_l2_flush_dirty_outstanding_true_v() \|\|
575	flush_l2_flush_dirty_pending_v(data) ==
576	flush_l2_flush_dirty_pending_busy_v()) {
577	nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
578	nvgpu_udelay(5);
579	} else {
580	break;
581	}
582	} while (!nvgpu_timeout_expired_msg(&timeout,
583	"l2_flush_dirty too many retries"));
584
585	trace_gk20a_mm_l2_flush_done(g->name);
586
587	if (invalidate) {
588	gk20a_mm_l2_invalidate_locked(g);
589	}
590
591	nvgpu_mutex_release(&mm->l2_op_lock);
592
593	hw_was_off:
594	gk20a_idle_nosuspend(g);
595	}
596
597	void gk20a_mm_cbc_clean(struct gk20a *g)
598	{
599	struct mm_gk20a *mm = &g->mm;
600	u32 data;
601	struct nvgpu_timeout timeout;
602	u32 retries = 200;
603
604	nvgpu_log_fn(g, " ");
605
606	gk20a_busy_noresume(g);
607	if (!g->power_on) {
608	goto hw_was_off;
609	}
610
611	if (g->ops.mm.get_flush_retries) {
612	retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
613	}
614
615	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
616
617	nvgpu_mutex_acquire(&mm->l2_op_lock);
618
619	/* Flush all dirty lines from the CBC to L2 */
620	gk20a_writel(g, flush_l2_clean_comptags_r(),
621	flush_l2_clean_comptags_pending_busy_f());
622
623	do {
624	data = gk20a_readl(g, flush_l2_clean_comptags_r());
625
626	if (flush_l2_clean_comptags_outstanding_v(data) ==
627	flush_l2_clean_comptags_outstanding_true_v() \|\|
628	flush_l2_clean_comptags_pending_v(data) ==
629	flush_l2_clean_comptags_pending_busy_v()) {
630	nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
631	nvgpu_udelay(5);
632	} else {
633	break;
634	}
635	} while (!nvgpu_timeout_expired_msg(&timeout,
636	"l2_clean_comptags too many retries"));
637
638	nvgpu_mutex_release(&mm->l2_op_lock);
639
640	hw_was_off:
641	gk20a_idle_nosuspend(g);
642	}
643
644	u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
645	{
646	return 34;
647	}
648
649	const struct gk20a_mmu_level gk20a_mm_get_mmu_levels(struct gk20a g,
650	u32 big_page_size)
651	{
652	return (big_page_size == SZ_64K) ?
653	gk20a_mm_levels_64k : gk20a_mm_levels_128k;
654	}