From 18a017865946617fd63256858a0d2300160643f4 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Thu, 12 May 2016 09:31:30 +0300 Subject: gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. JIRA DNVGPU-23 Change-Id: I21d4a54827b0e2741012dfde7952c0555a583435 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1121914 GVS: Gerrit_Virtual_Submit Reviewed-by: Ken Adams --- drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 61 +++++++++++++--------------- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 78 ++++++++++++++++++------------------ drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 44 +++++++++++--------- 3 files changed, 91 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index 9cb26d3f..4766e0e4 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c @@ -25,24 +25,24 @@ #include "hw_ram_gp10b.h" static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g, - void *inst_ptr) + struct mem_desc *mem) { u32 val; gk20a_dbg_fn(""); - val = gk20a_mem_rd32(inst_ptr, + val = gk20a_mem_rd32(g, mem, ram_in_page_dir_base_fault_replay_tex_w()); val &= ~ram_in_page_dir_base_fault_replay_tex_m(); val |= ram_in_page_dir_base_fault_replay_tex_true_f(); - gk20a_mem_wr32(inst_ptr, + gk20a_mem_wr32(g, mem, ram_in_page_dir_base_fault_replay_tex_w(), val); - val = gk20a_mem_rd32(inst_ptr, + val = gk20a_mem_rd32(g, mem, ram_in_page_dir_base_fault_replay_gcc_w()); val &= ~ram_in_page_dir_base_fault_replay_gcc_m(); val |= ram_in_page_dir_base_fault_replay_gcc_true_f(); - gk20a_mem_wr32(inst_ptr, + gk20a_mem_wr32(g, mem, ram_in_page_dir_base_fault_replay_gcc_w(), val); gk20a_dbg_fn("done"); @@ -52,28 +52,25 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c) { u32 addr_lo; u32 addr_hi; - void *inst_ptr; struct gk20a *g = c->g; gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpu_va; - if (!inst_ptr) - return -ENOMEM; - addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); addr_hi = u64_hi32(c->userd_iova); gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", c->hw_chid, (u64)c->userd_iova); - gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), + gk20a_mem_wr32(g, &c->inst_block, + ram_in_ramfc_w() + ram_fc_userd_w(), (g->mm.vidmem_is_vidmem ? pbdma_userd_target_sys_mem_ncoh_f() : pbdma_userd_target_vid_mem_f()) | pbdma_userd_addr_f(addr_lo)); - gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), + gk20a_mem_wr32(g, &c->inst_block, + ram_in_ramfc_w() + ram_fc_userd_hi_w(), pbdma_userd_hi_addr_f(addr_hi)); return 0; @@ -82,33 +79,30 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c) static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, u64 gpfifo_base, u32 gpfifo_entries, u32 flags) { - void *inst_ptr; + struct gk20a *g = c->g; + struct mem_desc *mem = &c->inst_block; gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpu_va; - if (!inst_ptr) - return -ENOMEM; + gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); - memset(inst_ptr, 0, ram_fc_size_val_v()); - - gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), + gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), pbdma_gp_base_offset_f( u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); - gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), + gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); - gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), + gk20a_mem_wr32(g, mem, ram_fc_signature_w(), c->g->ops.fifo.get_pbdma_signature(c->g)); - gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), + gk20a_mem_wr32(g, mem, ram_fc_formats_w(), pbdma_formats_gp_fermi0_f() | pbdma_formats_pb_fermi1_f() | pbdma_formats_mp_fermi0_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), + gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), pbdma_pb_header_priv_user_f() | pbdma_pb_header_method_zero_f() | pbdma_pb_header_subchannel_zero_f() | @@ -116,26 +110,26 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, pbdma_pb_header_first_true_f() | pbdma_pb_header_type_inc_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), + gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), pbdma_subdevice_id_f(1) | pbdma_subdevice_status_active_f() | pbdma_subdevice_channel_dma_enable_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); + gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), + gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), channel_gk20a_pbdma_acquire_val(c)); - gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), + gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), pbdma_runlist_timeslice_timeout_128_f() | pbdma_runlist_timeslice_timescale_3_f() | pbdma_runlist_timeslice_enable_true_f()); if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) - gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr); + gp10b_set_pdb_fault_replay_flags(c->g, mem); - gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); + gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); return channel_gp10b_commit_userd(c); } @@ -149,14 +143,12 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g) static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) { u32 new_syncpt = 0, old_syncpt; - void *inst_ptr; u32 v; gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpu_va; - - v = gk20a_mem_rd32(inst_ptr, ram_fc_allowed_syncpoints_w()); + v = gk20a_mem_rd32(c->g, &c->inst_block, + ram_fc_allowed_syncpoints_w()); old_syncpt = pbdma_allowed_syncpoints_0_index_v(v); if (c->sync) new_syncpt = c->sync->syncpt_id(c->sync); @@ -175,7 +167,8 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt); - gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v); + gk20a_mem_wr32(c->g, &c->inst_block, + ram_fc_allowed_syncpoints_w(), v); } /* enable channel */ diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index b36eff8f..07f1014f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -958,52 +958,51 @@ fail_free_gk20a_ctx: } static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) { - void *ctx_ptr = vmap(gr_ctx->mem.pages, - PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) { + struct gr_ctx_desc *gr_ctx) +{ + struct mem_desc *mem = &gr_ctx->mem; + + if (gk20a_mem_begin(g, mem)) { WARN_ON("Cannot map context"); return; } gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_magic_value_o(), 0), + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_magic_value_o()), ctxsw_prog_main_image_magic_value_v_value_v()); gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_control_o())); gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_save_ops_o())); gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_wfi_save_ops_o())); gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_cta_save_ops_o())); gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_gfxp_save_ops_o())); gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_cilp_save_ops_o())); gk20a_err(dev_from_gk20a(g), "image gfx preemption option (GFXP is 1) %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_graphics_preemption_options_o(), - 0)); - vunmap(ctx_ptr); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_graphics_preemption_options_o())); + gk20a_mem_end(g, mem); } static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, @@ -1028,7 +1027,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, - void *ctx_ptr) + struct mem_desc *mem) { struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; u32 gfxp_preempt_option = @@ -1043,19 +1042,22 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_graphics_preemption_options_o(), gfxp_preempt_option); } if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { gk20a_dbg_info("CILP: %x", cilp_preempt_option); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_compute_preemption_options_o(), cilp_preempt_option); } if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { gk20a_dbg_info("CTA: %x", cta_preempt_option); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_compute_preemption_options_o(), cta_preempt_option); } @@ -1064,7 +1066,8 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, u32 size; u32 cbes_reserve; - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0, + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_full_preemption_ptr_o(), gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); @@ -1931,7 +1934,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, struct gk20a *g = ch->g; struct tsg_gk20a *tsg; struct vm_gk20a *vm; - void *ctx_ptr; + struct mem_desc *mem = &gr_ctx->mem; u32 class; int err = 0; @@ -1955,10 +1958,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, if (err) return err; - ctx_ptr = vmap(gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + if (gk20a_mem_begin(g, mem)) return -ENOMEM; g->ops.fifo.disable_channel(ch); @@ -1967,14 +1967,14 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, goto unmap_ctx; if (g->ops.gr.update_ctxsw_preemption_mode) { - g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, ctx_ptr); + g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); g->ops.gr.commit_global_cb_manager(g, ch, true); } g->ops.fifo.enable_channel(ch); unmap_ctx: - vunmap(ctx_ptr); + gk20a_mem_end(g, mem); return err; } diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0e1f3c4b..0b693f7c 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); } -static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) +static u32 pde3_from_index(u32 i) { - return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v()); + return i * gmmu_new_pde__size_v() / sizeof(u32); +} + +static u32 pte3_from_index(u32 i) +{ + return i * gmmu_new_pte__size_v() / sizeof(u32); } static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) @@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, u64 pde_addr = 0; struct gk20a_mm_entry *pte = parent->entries + i; u32 pde_v[2] = {0, 0}; - u32 *pde; + u32 pde; gk20a_dbg_fn(""); @@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); pde_v[0] |= gmmu_new_pde_vol_true_f(); pde_v[1] |= pte_addr >> 24; - pde = pde3_from_index(parent, i); + pde = pde3_from_index(i); - gk20a_mem_wr32(pde, 0, pde_v[0]); - gk20a_mem_wr32(pde, 1, pde_v[1]); + gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]); + gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]); gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); @@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, return 0; } -static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) +static u32 pde0_from_index(u32 i) { - return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v()); + return i * gmmu_new_dual_pde__size_v() / sizeof(u32); } static int update_gmmu_pde0_locked(struct vm_gk20a *vm, @@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, u32 pte_addr_small = 0, pte_addr_big = 0; struct gk20a_mm_entry *entry = pte->entries + i; u32 pde_v[4] = {0, 0, 0, 0}; - u32 *pde; + u32 pde; gk20a_dbg_fn(""); @@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, pde_v[1] |= pte_addr_big >> 28; } - pde = pde0_from_index(pte, i); + pde = pde0_from_index(i); - gk20a_mem_wr32(pde, 0, pde_v[0]); - gk20a_mem_wr32(pde, 1, pde_v[1]); - gk20a_mem_wr32(pde, 2, pde_v[2]); - gk20a_mem_wr32(pde, 3, pde_v[3]); + gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]); + gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]); + gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]); + gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]); gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); @@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); } - gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); - gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); + gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]); + gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]); if (*iova) { *iova += page_size; @@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, return gp10b_mm_levels; } -static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) +static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, + u64 pdb_addr) { u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); u32 pdb_addr_hi = u64_hi32(pdb_addr); - gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), (g->mm.vidmem_is_vidmem ? ram_in_page_dir_base_target_sys_mem_ncoh_f() : ram_in_page_dir_base_target_vid_mem_f()) | @@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) ram_in_page_dir_base_lo_f(pdb_addr_lo) | 1 << 10); - gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), ram_in_page_dir_base_hi_f(pdb_addr_hi)); } -- cgit v1.2.2