From 55a5c57bc1fd532cc6d041fdfb70d90286894b35 Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Thu, 7 Apr 2016 16:57:18 -0700 Subject: gpu: nvgpu: gv11b: added initial source code Bug 1735757 Change-Id: Iea7488551a437afa0dfc005c87ad1b9ab9673b6c Signed-off-by: Seshendra Gadagottu Reviewed-on: http://git-master/r/1122123 GVS: Gerrit_Virtual_Submit Reviewed-by: Ken Adams --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 drivers/gpu/nvgpu/gv11b/gr_gv11b.c (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c new file mode 100644 index 00000000..d775aae8 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -0,0 +1,31 @@ +/* + * GV11B GPU GR + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ +#include +#include + +#include "gk20a/gr_gk20a.h" +#include "gk20a/semaphore_gk20a.h" +#include "gk20a/dbg_gpu_gk20a.h" + +#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */ +#include "gp10b/gr_gp10b.h" +#include "gv11b/gr_gv11b.h" + +void gv11b_init_gr(struct gpu_ops *gops) +{ + gp10b_init_gr(gops); +} -- cgit v1.2.2 From c84ddceda648d6e47828115654ca7745010ec09f Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Thu, 14 Apr 2016 13:01:58 -0700 Subject: gpu: nvgpu: gv11b: sm priv reg related changes Included all basic ops for gv11b and updated sm related functions to include new priv register addresses. Bug 1735757 Change-Id: Ie48651f918ee97fba00487111e4b28d6c95747f5 Signed-off-by: Seshendra Gadagottu Reviewed-on: http://git-master/r/1126961 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1743 +++++++++++++++++++++++++++++++++++- 1 file changed, 1741 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d775aae8..f0736e19 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1,5 +1,5 @@ /* - * GV11B GPU GR + * GV11b GPU GR * * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. * @@ -21,11 +21,1750 @@ #include "gk20a/semaphore_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" -#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */ +#include "gm20b/gr_gm20b.h" #include "gp10b/gr_gp10b.h" #include "gv11b/gr_gv11b.h" +#include "hw_gr_gv11b.h" +#include "hw_fifo_gv11b.h" +#include "hw_proj_gv11b.h" +#include "hw_ctxsw_prog_gv11b.h" +#include "hw_mc_gv11b.h" +#include + +static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) +{ + bool valid = false; + + switch (class_num) { + case VOLTA_COMPUTE_A: + case VOLTA_A: + case VOLTA_DMA_COPY_A: + valid = true; + break; + + case MAXWELL_COMPUTE_B: + case MAXWELL_B: + case FERMI_TWOD_A: + case KEPLER_DMA_COPY_A: + case MAXWELL_DMA_COPY_A: + case PASCAL_COMPUTE_A: + case PASCAL_A: + case PASCAL_DMA_COPY_A: + valid = true; + break; + + default: + break; + } + gk20a_dbg_info("class=0x%x valid=%d", class_num, valid); + return valid; +} + +static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch) +{ + int ret = 0; + u32 offset = proj_gpc_stride_v() * gpc + + proj_tpc_in_gpc_stride_v() * tpc; + u32 lrf_ecc_status, shm_ecc_status; + + gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch); + + /* Check for LRF ECC errors. */ + lrf_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); + if ( (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) || + (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) || + (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) || + (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) { + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Single bit error detected in SM LRF!"); + + g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, + 0); + } + if ( (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) || + (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) || + (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) || + (lrf_ecc_status & + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) { + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Double bit error detected in SM LRF!"); + + g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, + 0); + } + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, + lrf_ecc_status); + + /* Check for SHM ECC errors. */ + shm_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset); + if ((shm_ecc_status & + gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) || + (shm_ecc_status & + gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) || + (shm_ecc_status & + gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) || + (shm_ecc_status & + gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) { + u32 ecc_stats_reg_val; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Single bit error detected in SM SHM!"); + + ecc_stats_reg_val = + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); + g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] += + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); + g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] += + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, + ecc_stats_reg_val); + } + if ( (shm_ecc_status & + gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) || + (shm_ecc_status & + gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) { + u32 ecc_stats_reg_val; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Double bit error detected in SM SHM!"); + + ecc_stats_reg_val = + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); + g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] += + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, + ecc_stats_reg_val); + } + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset, + shm_ecc_status); + + + return ret; +} + +static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event) +{ + int ret = 0; + u32 offset = proj_gpc_stride_v() * gpc + + proj_tpc_in_gpc_stride_v() * tpc; + u32 esr; + u32 ecc_stats_reg_val; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + esr = gk20a_readl(g, + gr_gpc0_tpc0_tex_m_hww_esr_r() + offset); + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr); + + if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Single bit error detected in TEX!"); + + /* Pipe 0 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + /* Pipe 1 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); + } + if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Double bit error detected in TEX!"); + + /* Pipe 0 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + /* Pipe 1 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); + } + + gk20a_writel(g, + gr_gpc0_tpc0_tex_m_hww_esr_r() + offset, + esr); + + return ret; +} + +static int gr_gv11b_commit_global_cb_manager(struct gk20a *g, + struct channel_gk20a *c, bool patch) +{ + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + u32 attrib_offset_in_chunk = 0; + u32 alpha_offset_in_chunk = 0; + u32 pd_ab_max_output; + u32 gpc_index, ppc_index; + u32 temp, temp2; + u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate; + u32 attrib_size_in_chunk, cb_attrib_cache_size_init; + + gk20a_dbg_fn(""); + + if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) { + attrib_size_in_chunk = gr->attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + cb_attrib_cache_size_init = gr->attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + } else { + attrib_size_in_chunk = gr->attrib_cb_size; + cb_attrib_cache_size_init = gr->attrib_cb_default_size; + } + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), + gr->attrib_cb_default_size, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), + gr->alpha_cb_default_size, patch); + + pd_ab_max_output = (gr->alpha_cb_default_size * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / + gr_pd_ab_dist_cfg1_max_output_granularity_v(); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); + + attrib_offset_in_chunk = alpha_offset_in_chunk + + gr->tpc_count * gr->alpha_cb_size; + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + temp = proj_gpc_stride_v() * gpc_index; + temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index; + for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + ppc_index++) { + cbm_cfg_size_beta = cb_attrib_cache_size_init * + gr->pes_tpc_count[ppc_index][gpc_index]; + cbm_cfg_size_alpha = gr->alpha_cb_default_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + cbm_cfg_size_steadystate = gr->attrib_cb_default_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + cbm_cfg_size_beta, patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + attrib_offset_in_chunk, patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + cbm_cfg_size_steadystate, + patch); + + attrib_offset_in_chunk += attrib_size_in_chunk * + gr->pes_tpc_count[ppc_index][gpc_index]; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + cbm_cfg_size_alpha, patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + alpha_offset_in_chunk, patch); + + alpha_offset_in_chunk += gr->alpha_cb_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), + gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), + patch); + } + } + + return 0; +} + +static void gr_gv11b_commit_global_pagepool(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u32 size, bool patch) +{ + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), + gr_scc_pagepool_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), + gr_scc_pagepool_total_pages_f(size) | + gr_scc_pagepool_valid_true_f(), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), + gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), + gr_gpcs_gcc_pagepool_total_pages_f(size), patch); +} + +static int gr_gv11b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *color_val, u32 index) +{ + u32 i; + u32 zbc_c; + + /* update l2 table */ + g->ops.ltc.set_zbc_color_entry(g, color_val, index); + + /* update ds table */ + gk20a_writel(g, gr_ds_zbc_color_r_r(), + gr_ds_zbc_color_r_val_f(color_val->color_ds[0])); + gk20a_writel(g, gr_ds_zbc_color_g_r(), + gr_ds_zbc_color_g_val_f(color_val->color_ds[1])); + gk20a_writel(g, gr_ds_zbc_color_b_r(), + gr_ds_zbc_color_b_val_f(color_val->color_ds[2])); + gk20a_writel(g, gr_ds_zbc_color_a_r(), + gr_ds_zbc_color_a_val_f(color_val->color_ds[3])); + + gk20a_writel(g, gr_ds_zbc_color_fmt_r(), + gr_ds_zbc_color_fmt_val_f(color_val->format)); + + gk20a_writel(g, gr_ds_zbc_tbl_index_r(), + gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); + + /* trigger the write */ + gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), + gr_ds_zbc_tbl_ld_select_c_f() | + gr_ds_zbc_tbl_ld_action_write_f() | + gr_ds_zbc_tbl_ld_trigger_active_f()); + + /* update local copy */ + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i]; + gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i]; + } + gr->zbc_col_tbl[index].format = color_val->format; + gr->zbc_col_tbl[index].ref_cnt++; + + gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_r_r(index), + color_val->color_ds[0]); + gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_g_r(index), + color_val->color_ds[1]); + gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_b_r(index), + color_val->color_ds[2]); + gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_a_r(index), + color_val->color_ds[3]); + zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3)); + zbc_c &= ~(0x7f << ((index % 4) * 7)); + zbc_c |= color_val->format << ((index % 4) * 7); + gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c); + + return 0; +} + +static int gr_gv11b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *depth_val, u32 index) +{ + u32 zbc_z; + + /* update l2 table */ + g->ops.ltc.set_zbc_depth_entry(g, depth_val, index); + + /* update ds table */ + gk20a_writel(g, gr_ds_zbc_z_r(), + gr_ds_zbc_z_val_f(depth_val->depth)); + + gk20a_writel(g, gr_ds_zbc_z_fmt_r(), + gr_ds_zbc_z_fmt_val_f(depth_val->format)); + + gk20a_writel(g, gr_ds_zbc_tbl_index_r(), + gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); + + /* trigger the write */ + gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), + gr_ds_zbc_tbl_ld_select_z_f() | + gr_ds_zbc_tbl_ld_action_write_f() | + gr_ds_zbc_tbl_ld_trigger_active_f()); + + /* update local copy */ + gr->zbc_dep_tbl[index].depth = depth_val->depth; + gr->zbc_dep_tbl[index].format = depth_val->format; + gr->zbc_dep_tbl[index].ref_cnt++; + + gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth); + zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3)); + zbc_z &= ~(0x7f << (index % 4) * 7); + zbc_z |= depth_val->format << (index % 4) * 7; + gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3), zbc_z); + + return 0; +} + +static u32 gr_gv11b_pagepool_default_size(struct gk20a *g) +{ + return gr_scc_pagepool_total_pages_hwmax_value_v(); +} + +static int gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + int size; + + gr->attrib_cb_size = gr->attrib_cb_default_size; + gr->alpha_cb_size = gr->alpha_cb_default_size; + + gr->attrib_cb_size = min(gr->attrib_cb_size, + gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count); + gr->alpha_cb_size = min(gr->alpha_cb_size, + gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count); + + size = gr->attrib_cb_size * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * + gr->max_tpc_count; + + size += gr->alpha_cb_size * + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * + gr->max_tpc_count; + + size = ALIGN(size, 128); + + return size; +} + +static void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data) +{ + gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); +} + +static void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data) +{ + u32 val; + + gk20a_dbg_fn(""); + + val = gk20a_readl(g, gr_gpcs_tc_debug0_r()); + val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(), + gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data)); + gk20a_writel(g, gr_gpcs_tc_debug0_r(), val); + + gk20a_dbg_fn("done"); +} + +static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, + u32 class_num, u32 offset, u32 data) +{ + gk20a_dbg_fn(""); + + if (class_num == PASCAL_COMPUTE_A) { + switch (offset << 2) { + case NVC0C0_SET_SHADER_EXCEPTIONS: + gk20a_gr_set_shader_exceptions(g, data); + break; + default: + goto fail; + } + } + + if (class_num == PASCAL_A) { + switch (offset << 2) { + case NVC097_SET_SHADER_EXCEPTIONS: + gk20a_gr_set_shader_exceptions(g, data); + break; + case NVC097_SET_CIRCULAR_BUFFER_SIZE: + g->ops.gr.set_circular_buffer_size(g, data); + break; + case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE: + g->ops.gr.set_alpha_circular_buffer_size(g, data); + break; + case NVC097_SET_GO_IDLE_TIMEOUT: + gr_gv11b_set_go_idle_timeout(g, data); + break; + case NVC097_SET_COALESCE_BUFFER_SIZE: + gr_gv11b_set_coalesce_buffer_size(g, data); + break; + default: + goto fail; + } + } + return 0; + +fail: + return -EINVAL; +} + +static void gr_gv11b_cb_size_default(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + + if (!gr->attrib_cb_default_size) + gr->attrib_cb_default_size = 0x800; + gr->alpha_cb_default_size = + gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); +} + +static void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) +{ + struct gr_gk20a *gr = &g->gr; + u32 gpc_index, ppc_index, stride, val; + u32 pd_ab_max_output; + u32 alpha_cb_size = data * 4; + + gk20a_dbg_fn(""); + + if (alpha_cb_size > gr->alpha_cb_size) + alpha_cb_size = gr->alpha_cb_size; + + gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(), + (gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) & + ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | + gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); + + pd_ab_max_output = alpha_cb_size * + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / + gr_pd_ab_dist_cfg1_max_output_granularity_v(); + + gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_init_f()); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + stride = proj_gpc_stride_v() * gpc_index; + + for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + ppc_index++) { + + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index); + + val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), + gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * + gr->pes_tpc_count[ppc_index][gpc_index])); + + gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index, val); + } + } +} + +static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) +{ + struct gr_gk20a *gr = &g->gr; + u32 gpc_index, ppc_index, stride, val; + u32 cb_size_steady = data * 4, cb_size; + + gk20a_dbg_fn(""); + + if (cb_size_steady > gr->attrib_cb_size) + cb_size_steady = gr->attrib_cb_size; + if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) != + gk20a_readl(g, + gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) { + cb_size = cb_size_steady + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + } else { + cb_size = cb_size_steady; + } + + gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(), + (gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) & + ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | + gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + stride = proj_gpc_stride_v() * gpc_index; + + for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + ppc_index++) { + + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index); + + val = set_field(val, + gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * + gr->pes_tpc_count[ppc_index][gpc_index])); + + gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index, val); + + gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index + + gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + + stride, + gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f( + cb_size_steady)); + + val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( + ppc_index + gpc_index)); + + val = set_field(val, + gr_gpcs_swdx_tc_beta_cb_size_v_m(), + gr_gpcs_swdx_tc_beta_cb_size_v_f( + cb_size_steady * + gr->gpc_ppc_count[gpc_index])); + + gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( + ppc_index + gpc_index), val); + } + } +} + +static int gr_gv11b_init_ctx_state(struct gk20a *g) +{ + struct fecs_method_op_gk20a op = { + .mailbox = { .id = 0, .data = 0, + .clr = ~0, .ok = 0, .fail = 0}, + .method.data = 0, + .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, + .cond.fail = GR_IS_UCODE_OP_SKIP, + }; + int err; + + gk20a_dbg_fn(""); + + err = gr_gk20a_init_ctx_state(g); + if (err) + return err; + + if (!g->gr.t18x.ctx_vars.preempt_image_size) { + op.method.addr = + gr_fecs_method_push_adr_discover_preemption_image_size_v(); + op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size; + err = gr_gk20a_submit_fecs_method_op(g, op, false); + if (err) { + gk20a_err(dev_from_gk20a(g), + "query preempt image size failed"); + return err; + } + } + + gk20a_dbg_info("preempt image size: %u", + g->gr.t18x.ctx_vars.preempt_image_size); + + gk20a_dbg_fn("done"); + + return 0; +} + +int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, + struct mem_desc *mem) +{ + int err; + + gk20a_dbg_fn(""); + + err = gk20a_gmmu_alloc_attr(vm->mm->g, 0, size, mem); + if (err) + return err; + + mem->gpu_va = gk20a_gmmu_map(vm, + &mem->sgt, + size, + NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_none, + false); + + if (!mem->gpu_va) { + err = -ENOMEM; + goto fail_free; + } + + return 0; + +fail_free: + gk20a_gmmu_free(vm->mm->g, mem); + return err; +} + +static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + u32 class, + u32 flags) +{ + int err; + + gk20a_dbg_fn(""); + + err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); + if (err) + return err; + + (*gr_ctx)->t18x.ctx_id_valid = false; + + if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) + flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; + + if (class == PASCAL_COMPUTE_A && + g->gr.t18x.ctx_vars.force_preemption_cilp) + flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP; + + if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) { + u32 spill_size = + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + u32 pagepool_size = g->ops.gr.pagepool_default_size(g) * + gr_scc_pagepool_total_pages_byte_granularity_v(); + u32 betacb_size = g->gr.attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * + g->gr.max_tpc_count; + attrib_cb_size = ALIGN(attrib_cb_size, 128); + + gk20a_dbg_info("gfxp context spill_size=%d", spill_size); + gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size); + gk20a_dbg_info("gfxp context attrib_cb_size=%d", + attrib_cb_size); + err = gr_gv11b_alloc_buffer(vm, + g->gr.t18x.ctx_vars.preempt_image_size, + &(*gr_ctx)->t18x.preempt_ctxsw_buffer); + if (err) { + gk20a_err(dev_from_gk20a(vm->mm->g), + "cannot allocate preempt buffer"); + goto fail_free_gk20a_ctx; + } + + err = gr_gv11b_alloc_buffer(vm, + spill_size, + &(*gr_ctx)->t18x.spill_ctxsw_buffer); + if (err) { + gk20a_err(dev_from_gk20a(vm->mm->g), + "cannot allocate spill buffer"); + goto fail_free_preempt; + } + + err = gr_gv11b_alloc_buffer(vm, + attrib_cb_size, + &(*gr_ctx)->t18x.betacb_ctxsw_buffer); + if (err) { + gk20a_err(dev_from_gk20a(vm->mm->g), + "cannot allocate beta buffer"); + goto fail_free_spill; + } + + err = gr_gv11b_alloc_buffer(vm, + pagepool_size, + &(*gr_ctx)->t18x.pagepool_ctxsw_buffer); + if (err) { + gk20a_err(dev_from_gk20a(vm->mm->g), + "cannot allocate page pool"); + goto fail_free_betacb; + } + + (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP; + } + + if (class == PASCAL_COMPUTE_A) { + if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) + (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP; + else + (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA; + } + + gk20a_dbg_fn("done"); + + return err; + +fail_free_betacb: + gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer); +fail_free_spill: + gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer); +fail_free_preempt: + gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer); +fail_free_gk20a_ctx: + gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); + *gr_ctx = NULL; + + return err; +} + +static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, + struct gr_ctx_desc *gr_ctx) { + void *ctx_ptr = vmap(gr_ctx->mem.pages, + PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, + 0, pgprot_writecombine(PAGE_KERNEL)); + if (!ctx_ptr) { + WARN_ON("Cannot map context"); + return; + } + gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_magic_value_o(), 0), + ctxsw_prog_main_image_magic_value_v_value_v()); + + gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0)); + + gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0)); + + gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0)); + + gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_num_save_ops_o(), 0)); + gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); + gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); + gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); + gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); + gk20a_err(dev_from_gk20a(g), + "image gfx preemption option (GFXP is 1) %x\n", + gk20a_mem_rd32(ctx_ptr + + ctxsw_prog_main_image_graphics_preemption_options_o(), + 0)); + vunmap(ctx_ptr); +} + +static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct gr_ctx_desc *gr_ctx) +{ + gk20a_dbg_fn(""); + + if (!gr_ctx) + return; + + if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close) + dump_ctx_switch_stats(g, vm, gr_ctx); + + gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); + gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); + gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); + gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); + gr_gk20a_free_gr_ctx(g, vm, gr_ctx); + gk20a_dbg_fn("done"); +} + + +static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + void *ctx_ptr) +{ + struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + u32 gfxp_preempt_option = + ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); + u32 cilp_preempt_option = + ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(); + int err; + + gk20a_dbg_fn(""); + + if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) { + gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, + gfxp_preempt_option); + } + + if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) { + gk20a_dbg_info("CILP: %x", cilp_preempt_option); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, + cilp_preempt_option); + } + + if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) { + u32 addr; + u32 size; + u32 cbes_reserve; + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0, + gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); + + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + + addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >> + gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | + (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) << + (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); + + gk20a_dbg_info("attrib cb addr : 0x%016x", addr); + g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); + + addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >> + gr_scc_pagepool_base_addr_39_8_align_bits_v()) | + (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) << + (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); + size = gr_ctx->t18x.pagepool_ctxsw_buffer.size; + + if (size == g->ops.gr.pagepool_default_size(g)) + size = gr_scc_pagepool_total_pages_hwmax_v(); + + g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); + + addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >> + gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | + (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) << + (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v())); + size = gr_ctx->t18x.spill_ctxsw_buffer.size / + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_swdx_rm_spill_buffer_addr_r(), + gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), + true); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_swdx_rm_spill_buffer_size_r(), + gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), + true); + + cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_swdx_beta_cb_ctrl_r(), + gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( + cbes_reserve), + true); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), + gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( + cbes_reserve), + true); + + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + } + + gk20a_dbg_fn("done"); +} + +static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, + struct gk20a_debug_output *o) +{ + struct gr_gk20a *gr = &g->gr; + + gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", + gk20a_readl(g, gr_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", + gk20a_readl(g, gr_status_1_r())); + gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n", + gk20a_readl(g, gr_status_2_r())); + gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n", + gk20a_readl(g, gr_engine_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n", + gk20a_readl(g, gr_gpfifo_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n", + gk20a_readl(g, gr_gpfifo_ctl_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", + gk20a_readl(g, gr_fecs_host_int_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n", + gk20a_readl(g, gr_exception_r())); + gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", + gk20a_readl(g, gr_fecs_intr_r())); + gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", + gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A))); + gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_activity_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", + gk20a_readl(g, gr_activity_1_r())); + gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n", + gk20a_readl(g, gr_activity_2_r())); + gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n", + gk20a_readl(g, gr_activity_4_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n", + gk20a_readl(g, gr_pri_sked_activity_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); + if (gr->gpc_tpc_count[0] == 2) + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); + if (gr->gpc_tpc_count[0] == 2) + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_be0_becs_be_activity0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_be1_becs_be_activity0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n", + gk20a_readl(g, gr_pri_bes_becs_be_activity0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n", + gk20a_readl(g, gr_pri_ds_mpipe_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n", + gk20a_readl(g, gr_fe_go_idle_timeout_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n", + gk20a_readl(g, gr_pri_fe_go_idle_info_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n", + gk20a_readl(g, gr_cwd_fs_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n", + gk20a_readl(g, gr_fe_tpc_fs_r(0))); + gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n", + gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0))); + gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n", + gk20a_readl(g, gr_cwd_sm_id_r(0))); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n", + gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n", + gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n", + gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n", + gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n", + gk20a_readl(g, gr_fecs_ctxsw_idlestate_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n", + gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n", + gk20a_readl(g, gr_fecs_current_ctx_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", + gk20a_readl(g, gr_fecs_new_ctx_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", + gk20a_readl(g, gr_pri_be0_crop_status1_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", + gk20a_readl(g, gr_pri_bes_crop_status1_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n", + gk20a_readl(g, gr_pri_be0_zrop_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n", + gk20a_readl(g, gr_pri_be0_zrop_status2_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n", + gk20a_readl(g, gr_pri_bes_zrop_status_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n", + gk20a_readl(g, gr_pri_bes_zrop_status2_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n", + gk20a_readl(g, gr_pri_be0_becs_be_exception_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n", + gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", + gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); + return 0; +} + +static bool gr_activity_empty_or_preempted(u32 val) +{ + while(val) { + u32 v = val & 7; + if (v != gr_activity_4_gpc0_empty_v() && + v != gr_activity_4_gpc0_preempted_v()) + return false; + val >>= 3; + } + + return true; +} + +static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long end_jiffies, + u32 expect_delay) +{ + u32 delay = expect_delay; + bool gr_enabled; + bool ctxsw_active; + bool gr_busy; + u32 gr_status; + u32 activity0, activity1, activity2, activity4; + + gk20a_dbg_fn(""); + + do { + /* fmodel: host gets fifo_engine_status(gr) from gr + only when gr_status is read */ + gr_status = gk20a_readl(g, gr_status_r()); + + gr_enabled = gk20a_readl(g, mc_enable_r()) & + mc_enable_pgraph_enabled_f(); + + ctxsw_active = gr_status & 1<<7; + + activity0 = gk20a_readl(g, gr_activity_0_r()); + activity1 = gk20a_readl(g, gr_activity_1_r()); + activity2 = gk20a_readl(g, gr_activity_2_r()); + activity4 = gk20a_readl(g, gr_activity_4_r()); + + gr_busy = !(gr_activity_empty_or_preempted(activity0) && + gr_activity_empty_or_preempted(activity1) && + activity2 == 0 && + gr_activity_empty_or_preempted(activity4)); + + if (!gr_enabled || (!gr_busy && !ctxsw_active)) { + gk20a_dbg_fn("done"); + return 0; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies) + || !tegra_platform_is_silicon()); + + gk20a_err(dev_from_gk20a(g), + "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", + ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); + + return -EAGAIN; +} + +static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, bool patch) +{ + struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + int attrBufferSize; + + if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) + attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size; + else + attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g); + + attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); + + gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), + gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | + gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), + gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), + gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | + gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); +} + +static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u64 size, bool patch) +{ + u32 data; + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), + gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), + gr_scc_bundle_cb_size_div_256b_f(size) | + gr_scc_bundle_cb_size_valid_true_f(), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), + gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), + gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | + gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); + + /* data for state_limit */ + data = (g->gr.bundle_cb_default_size * + gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / + gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); + + data = min_t(u32, data, g->gr.min_gpm_fifo_depth); + + gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", + g->gr.bundle_cb_token_limit, data); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), + gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | + gr_pd_ab_dist_cfg2_state_limit_f(data), patch); +} + +static int gr_gv11b_init_fs_state(struct gk20a *g) +{ + u32 data; + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r()); + data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(), + gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data); + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); + data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(), + gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data); + + return gr_gm20b_ctx_state_floorsweep(g); +} + +static void gr_gv11b_init_cyclestats(struct gk20a *g) +{ +#if defined(CONFIG_GK20A_CYCLE_STATS) + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; +#else + (void)g; +#endif +} + +static void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +{ + tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0); + tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0); + + if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) + tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0); + else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2) + tegra_fuse_writel(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0); + else + tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0); +} + +static void gr_gv11b_get_access_map(struct gk20a *g, + u32 **whitelist, int *num_entries) +{ + static u32 wl_addr_gv11b[] = { + /* this list must be sorted (low to high) */ + 0x404468, /* gr_pri_mme_max_instructions */ + 0x418300, /* gr_pri_gpcs_rasterarb_line_class */ + 0x418800, /* gr_pri_gpcs_setup_debug */ + 0x418e00, /* gr_pri_gpcs_swdx_config */ + 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ + 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ + 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ + 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ + 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ + 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */ + 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */ + 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ + 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ + 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ + 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ + }; + + *whitelist = wl_addr_gv11b; + *num_entries = ARRAY_SIZE(wl_addr_gv11b); +} + +static int gr_gv11b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch) +{ + int ret = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); + + ret = gk20a_disable_channel_tsg(g, fault_ch); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "CILP: failed to disable channel/TSG!\n"); + return ret; + } + + ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "CILP: failed to restart runlist 0!"); + return ret; + } + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist"); + + if (gk20a_is_channel_marked_as_tsg(fault_ch)) + gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true); + else + gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg"); + + return ret; +} + +static int gr_gv11b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch) +{ + int ret; + struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); + + if (!gr_ctx) + return -EINVAL; + + if (gr_ctx->t18x.cilp_preempt_pending) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP is already pending for chid %d", + fault_ch->hw_chid); + return 0; + } + + /* get ctx_id from the ucode image */ + if (!gr_ctx->t18x.ctx_id_valid) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: looking up ctx id"); + ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id); + if (ret) { + gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n"); + return ret; + } + gr_ctx->t18x.ctx_id_valid = true; + } + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id); + + /* send ucode method to set ctxsw interrupt */ + ret = gr_gk20a_submit_fecs_sideband_method_op(g, + (struct fecs_method_op_gk20a) { + .method.data = gr_ctx->t18x.ctx_id, + .method.addr = + gr_fecs_method_push_adr_configure_interrupt_completion_option_v(), + .mailbox = { + .id = 1 /* sideband */, .data = 0, + .clr = ~0, .ret = NULL, + .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), + .fail = 0}, + .cond.ok = GR_IS_UCODE_OP_EQUAL, + .cond.fail = GR_IS_UCODE_OP_SKIP}); + + if (ret) { + gk20a_err(dev_from_gk20a(g), + "CILP: failed to enable ctxsw interrupt!"); + return ret; + } + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: enabled ctxsw completion interrupt"); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: disabling channel %d", + fault_ch->hw_chid); + + ret = gr_gv11b_disable_channel_or_tsg(g, fault_ch); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "CILP: failed to disable channel!!"); + return ret; + } + + /* set cilp_preempt_pending = true and record the channel */ + gr_ctx->t18x.cilp_preempt_pending = true; + g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid; + + if (gk20a_is_channel_marked_as_tsg(fault_ch)) { + struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid]; + + gk20a_tsg_event_id_post_event(tsg, + NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED); + } else { + gk20a_channel_event_id_post_event(fault_ch, + NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED); + } + + return 0; +} + +static int gr_gv11b_clear_cilp_preempt_pending(struct gk20a *g, + struct channel_gk20a *fault_ch) +{ + struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); + + if (!gr_ctx) + return -EINVAL; + + /* The ucode is self-clearing, so all we need to do here is + to clear cilp_preempt_pending. */ + if (!gr_ctx->t18x.cilp_preempt_pending) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP is already cleared for chid %d\n", + fault_ch->hw_chid); + return 0; + } + + gr_ctx->t18x.cilp_preempt_pending = false; + g->gr.t18x.cilp_preempt_pending_chid = -1; + + return 0; +} + +/* @brief pre-process work on the SM exceptions to determine if we clear them or not. + * + * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing + */ +static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, + u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr, + bool sm_debugger_attached, struct channel_gk20a *fault_ch, + bool *early_exit, bool *ignore_debugger) +{ + int ret; + bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode == + NVGPU_GR_PREEMPTION_MODE_CILP) ; + u32 global_mask = 0, dbgr_control0, global_esr_copy; + u32 offset = proj_gpc_stride_v() * gpc + + proj_tpc_in_gpc_stride_v() * tpc; + + *early_exit = false; + *ignore_debugger = false; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", + gpc, tpc, global_esr); + + if (cilp_enabled && sm_debugger_attached) { + if (global_esr & gr_gpc0_tpc0_sm1_hww_global_esr_bpt_int_pending_f()) + gk20a_writel(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset, + gr_gpc0_tpc0_sm1_hww_global_esr_bpt_int_pending_f()); + + if (global_esr & gr_gpc0_tpc0_sm1_hww_global_esr_single_step_complete_pending_f()) + gk20a_writel(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset, + gr_gpc0_tpc0_sm1_hww_global_esr_single_step_complete_pending_f()); + + global_mask = gr_gpcs_tpcs_sm1_hww_global_esr_multiple_warp_errors_pending_f() | + gr_gpcs_tpcs_sm1_hww_global_esr_bpt_pause_pending_f(); + + if (warp_esr != 0 || (global_esr & global_mask) != 0) { + *ignore_debugger = true; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n", + gpc, tpc); + + if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n", + gpc, tpc); + gk20a_suspend_all_sms(g, global_mask, false); + + gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch); + } else { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: STOP_TRIGGER from gpc %d tpc %d\n", + gpc, tpc); + gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true); + } + + /* reset the HWW errors after locking down */ + global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset); + gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: HWWs cleared for gpc %d tpc %d\n", + gpc, tpc); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n"); + ret = gr_gv11b_set_cilp_preempt_pending(g, fault_ch); + if (ret) { + gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n"); + return ret; + } + + dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm1_dbgr_control0_r() + offset); + if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n", + gpc, tpc); + dbgr_control0 = set_field(dbgr_control0, + gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(), + gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f()); + gk20a_writel(g, gr_gpc0_tpc0_sm1_dbgr_control0_r() + offset, dbgr_control0); + } + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: resume for gpc %d tpc %d\n", + gpc, tpc); + gk20a_resume_single_sm(g, gpc, tpc); + + *ignore_debugger = true; + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc); + } + + *early_exit = true; + } + return 0; +} + +static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) +{ + struct gr_ctx_desc *gr_ctx; + struct channel_gk20a *ch; + int chid; + int ret = -EINVAL; + + chid = g->gr.t18x.cilp_preempt_pending_chid; + + ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid)); + if (!ch) + return ret; + + gr_ctx = ch->ch_ctx.gr_ctx; + + if (gr_ctx->t18x.cilp_preempt_pending) { + *__chid = chid; + ret = 0; + } + + gk20a_channel_put(ch); + + return ret; +} + +static int gr_gv11b_handle_fecs_error(struct gk20a *g, + struct channel_gk20a *__ch, + struct gr_gk20a_isr_data *isr_data) +{ + u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); + struct channel_gk20a *ch; + int chid = -1; + int ret = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); + + /* + * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR) + * indicates that a CILP ctxsw save has finished + */ + if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: ctxsw save completed!\n"); + + /* now clear the interrupt */ + gk20a_writel(g, gr_fecs_host_int_clear_r(), + gr_fecs_host_int_clear_ctxsw_intr1_clear_f()); + + ret = gr_gv11b_get_cilp_preempt_pending_chid(g, &chid); + if (ret) + goto clean_up; + + ch = gk20a_channel_get( + gk20a_fifo_channel_from_hw_chid(g, chid)); + if (!ch) + goto clean_up; + + + /* set preempt_pending to false */ + ret = gr_gv11b_clear_cilp_preempt_pending(g, ch); + if (ret) { + gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n"); + gk20a_channel_put(ch); + goto clean_up; + } + + if (gk20a_gr_sm_debugger_attached(g)) { + if (gk20a_is_channel_marked_as_tsg(ch)) { + struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; + struct channel_gk20a *__ch; + + mutex_lock(&tsg->ch_list_lock); + list_for_each_entry(__ch, &tsg->ch_list, ch_entry) { + gk20a_dbg_gpu_post_events(__ch); + } + mutex_unlock(&tsg->ch_list_lock); + + gk20a_tsg_event_id_post_event(tsg, + NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE); + } else { + gk20a_dbg_gpu_post_events(ch); + + gk20a_channel_event_id_post_event(ch, + NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE); + } + } + + gk20a_channel_put(ch); + } + +clean_up: + /* handle any remaining interrupts */ + return gk20a_gr_handle_fecs_error(g, __ch, isr_data); +} + +static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) +{ + if (!(hww_warp_esr & gr_gpc0_tpc0_sm1_hww_warp_esr_addr_valid_m())) + hww_warp_esr = set_field(hww_warp_esr, + gr_gpc0_tpc0_sm1_hww_warp_esr_addr_error_type_m(), + gr_gpc0_tpc0_sm1_hww_warp_esr_addr_error_type_none_f()); + + return hww_warp_esr; +} void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); + gops->gr.init_fs_state = gr_gv11b_init_fs_state; + gops->gr.is_valid_class = gr_gv11b_is_valid_class; + gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; + gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; + gops->gr.add_zbc_color = gr_gv11b_add_zbc_color; + gops->gr.add_zbc_depth = gr_gv11b_add_zbc_depth; + gops->gr.pagepool_default_size = gr_gv11b_pagepool_default_size; + gops->gr.calc_global_ctx_buffer_size = + gr_gv11b_calc_global_ctx_buffer_size; + gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; + gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb; + gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; + gops->gr.cb_size_default = gr_gv11b_cb_size_default; + gops->gr.set_alpha_circular_buffer_size = + gr_gv11b_set_alpha_circular_buffer_size; + gops->gr.set_circular_buffer_size = + gr_gv11b_set_circular_buffer_size; + gops->gr.init_ctx_state = gr_gv11b_init_ctx_state; + gops->gr.alloc_gr_ctx = gr_gv11b_alloc_gr_ctx; + gops->gr.free_gr_ctx = gr_gv11b_free_gr_ctx; + gops->gr.update_ctxsw_preemption_mode = + gr_gv11b_update_ctxsw_preemption_mode; + gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; + gops->gr.wait_empty = gr_gv11b_wait_empty; + gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; + gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; + gops->gr.get_access_map = gr_gv11b_get_access_map; + gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; + gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; + gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; + gops->gr.pre_process_sm_exception = + gr_gv11b_pre_process_sm_exception; + gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; } -- cgit v1.2.2 From c8b6a331d1e30595c5798fc3121575c1ab21e2ae Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Wed, 27 Apr 2016 18:25:57 +0530 Subject: gpu: nvgpu: use preemption modes defined in nvgpu-t18x.h Below definitions of preemption modes are deleted: NVGPU_GR_PREEMPTION_MODE_GFXP NVGPU_GR_PREEMPTION_MODE_CILP Use new definitions defined in nvgpu-t18x.h NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP NVGPU_COMPUTE_PREEMPTION_MODE_CILP Bug 1646259 Change-Id: Ieff51e41ef34eb61357f95778c400c8a3fa330c8 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1133597 Reviewed-by: Seshendra Gadagottu Reviewed-by: Ken Adams GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f0736e19..5dee0921 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -320,7 +320,7 @@ static int gr_gv11b_commit_global_cb_manager(struct gk20a *g, gk20a_dbg_fn(""); - if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) { + if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { attrib_size_in_chunk = gr->attrib_cb_default_size + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); @@ -869,14 +869,14 @@ static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, goto fail_free_betacb; } - (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP; + (*gr_ctx)->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; } if (class == PASCAL_COMPUTE_A) { if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) - (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP; + (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; else - (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA; + (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA; } gk20a_dbg_fn("done"); @@ -978,13 +978,13 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, gk20a_dbg_fn(""); - if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) { + if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, gfxp_preempt_option); } - if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) { + if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { gk20a_dbg_info("CILP: %x", cilp_preempt_option); gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, cilp_preempt_option); @@ -1542,8 +1542,8 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, bool *early_exit, bool *ignore_debugger) { int ret; - bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode == - NVGPU_GR_PREEMPTION_MODE_CILP) ; + bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == + NVGPU_COMPUTE_PREEMPTION_MODE_CILP) ; u32 global_mask = 0, dbgr_control0, global_esr_copy; u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; -- cgit v1.2.2 From d089e402355f3533b18a50a4e9fe7423593762af Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Fri, 6 May 2016 14:34:07 +0300 Subject: gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. JIRA DNVGPU-23 Change-Id: Ie2cc17c4a0315d03a66e92fb635c217840d5399e Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1128863 GVS: Gerrit_Virtual_Submit Reviewed-by: Ken Adams --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 61 +++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 5dee0921..64bfa773 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -897,52 +897,51 @@ fail_free_gk20a_ctx: } static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) { - void *ctx_ptr = vmap(gr_ctx->mem.pages, - PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) { + struct gr_ctx_desc *gr_ctx) +{ + struct mem_desc *mem = &gr_ctx->mem; + + if (gk20a_mem_begin(g, mem)) { WARN_ON("Cannot map context"); return; } gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_magic_value_o(), 0), + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_magic_value_o()), ctxsw_prog_main_image_magic_value_v_value_v()); gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_control_o())); gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_save_ops_o())); gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_wfi_save_ops_o())); gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_cta_save_ops_o())); gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_gfxp_save_ops_o())); gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_num_cilp_save_ops_o())); gk20a_err(dev_from_gk20a(g), "image gfx preemption option (GFXP is 1) %x\n", - gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_graphics_preemption_options_o(), - 0)); - vunmap(ctx_ptr); + gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_graphics_preemption_options_o())); + gk20a_mem_end(g, mem); } static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, @@ -967,7 +966,7 @@ static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, - void *ctx_ptr) + struct mem_desc *mem) { struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; u32 gfxp_preempt_option = @@ -980,13 +979,13 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(), gfxp_preempt_option); } if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { gk20a_dbg_info("CILP: %x", cilp_preempt_option); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(), cilp_preempt_option); } @@ -995,7 +994,7 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, u32 size; u32 cbes_reserve; - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(), gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); -- cgit v1.2.2 From c3117bf337371d6e161a13849cadc8a4fc9c63b0 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 10 May 2016 13:47:17 -0700 Subject: gpu: nvgpu: gv11b: Use gp10b GR floorsweeping Use gp10b version of GR floorsweeping function. Change-Id: I5715672b5f94b779165f44c78aec14a2836928e7 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/1144905 --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 64bfa773..cb9c1902 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1310,19 +1310,7 @@ static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g, static int gr_gv11b_init_fs_state(struct gk20a *g) { - u32 data; - - data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r()); - data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(), - gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f()); - gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data); - - data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); - data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(), - gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f()); - gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data); - - return gr_gm20b_ctx_state_floorsweep(g); + return gr_gp10b_init_fs_state(g); } static void gr_gv11b_init_cyclestats(struct gk20a *g) -- cgit v1.2.2 From 822b0dc53823e9c5bc3cbcdc78a62cc2c7f0647c Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Wed, 25 May 2016 13:24:37 +0300 Subject: gpu: nvgpu: fix patch write error check in update_ctxsw_preemption_mode Don't attempt to access memory if the patch context can't be mapped, but print an error message instead. Change-Id: I2d0ec22378ace0ef826f5a84a9ce4d35466f7832 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1157281 GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index cb9c1902..ad8e4eff 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -998,6 +998,11 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) { + gk20a_err(dev_from_gk20a(g), + "can't map patch context"); + goto out; + } addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | @@ -1049,6 +1054,7 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, gr_gk20a_ctx_patch_write_end(g, ch_ctx); } +out: gk20a_dbg_fn("done"); } -- cgit v1.2.2 From 6f24a7666045b25072282d4c29ff190b7af33e59 Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Thu, 2 Jun 2016 09:44:13 +0530 Subject: gpu: nvgpu: Add multiple engine and runlist support This CL covers the following modification, 1) Added multiple engine_info support 2) Added multiple runlist_info support 3) Initial changes for ASYNC CE support 4) Added ASYNC CE interrupt support for Volta GPU series 5) Removed hard coded engine_id logic and made generic way 6) Code cleanup for readability JIRA DNVGPU-26 Change-Id: Ief3b586ff3d9f492f0277243b2a94952bab48786 Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1156023 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index ad8e4eff..a4301fa9 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1062,6 +1062,9 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { struct gr_gk20a *gr = &g->gr; + u32 gr_engine_id; + + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", gk20a_readl(g, gr_status_r())); @@ -1082,7 +1085,7 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", gk20a_readl(g, gr_fecs_intr_r())); gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", - gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A))); + gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_activity_0_r())); gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", -- cgit v1.2.2 From 973c2811ca584270da659fb01fa1facb5dd527fa Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Thu, 9 Jun 2016 18:55:12 +0530 Subject: gpu: nvgpu: Remove hard coded runlist_id mapping From this patch onwards, runlist_id is a member of struct channel_gk20a. So removed hard coded runlist_id mapping logic. JIRA DNVGPU-25 Change-Id: Ia02feffdc057b0dceab9721423feeed1cc7a1c12 Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1161779 GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a4301fa9..59d5569c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1401,7 +1401,7 @@ static int gr_gv11b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a return ret; } - ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false); + ret = g->ops.fifo.update_runlist(g, fault_ch->runlist_id, ~0, true, false); if (ret) { gk20a_err(dev_from_gk20a(g), "CILP: failed to restart runlist 0!"); -- cgit v1.2.2 From ca9cb9715407d5e86228cf1b26e83b8dd6115385 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Fri, 17 Jun 2016 15:45:47 +0300 Subject: gpu: nvgpu: gv11x: support in-kernel vidmem mappings Propagate the buffer aperture flag in gk20a_locked_gmmu_map up so that buffers represented as a mem_desc and present in vidmem can be mapped to gpu. JIRA DNVGPU-18 JIRA DNVGPU-76 Change-Id: I67d476b2c1b84218217ef203e429fb5e8a33adc7 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1169297 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 59d5569c..a23c5e8c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -779,7 +779,8 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, - false); + false, + mem->aperture); if (!mem->gpu_va) { err = -ENOMEM; -- cgit v1.2.2 From f4035d17a39ac356f3cbf8aecc2ba4c679dd6fb3 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Tue, 5 Jul 2016 13:55:46 -0700 Subject: gpu: nvgpu: gv11b: update code to HW CL 36758735 Update headers and corresponding code to work with HW CL # 36758735 Bug 1735760 Change-Id: Ie26bfaa6377ab797c5ad978e4796a55334761b5d Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1175882 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 43 ++++++++++++++------------------------ 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a23c5e8c..c5d2aa56 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -911,17 +911,6 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, ctxsw_prog_main_image_magic_value_o()), ctxsw_prog_main_image_magic_value_v_value_v()); - gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", - gk20a_mem_rd(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); - - gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", - gk20a_mem_rd(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); - - gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", - gk20a_mem_rd(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_control_o())); gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", gk20a_mem_rd(g, mem, @@ -1144,8 +1133,8 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n", gk20a_readl(g, gr_cwd_fs_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n", - gk20a_readl(g, gr_fe_tpc_fs_r(0))); - gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n", + gk20a_readl(g, gr_fe_tpc_fs_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID: 0x%x\n", gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0))); gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n", gk20a_readl(g, gr_cwd_sm_id_r(0))); @@ -1552,16 +1541,16 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, gpc, tpc, global_esr); if (cilp_enabled && sm_debugger_attached) { - if (global_esr & gr_gpc0_tpc0_sm1_hww_global_esr_bpt_int_pending_f()) - gk20a_writel(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset, - gr_gpc0_tpc0_sm1_hww_global_esr_bpt_int_pending_f()); + if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) + gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, + gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()); - if (global_esr & gr_gpc0_tpc0_sm1_hww_global_esr_single_step_complete_pending_f()) - gk20a_writel(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset, - gr_gpc0_tpc0_sm1_hww_global_esr_single_step_complete_pending_f()); + if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f()) + gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, + gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f()); - global_mask = gr_gpcs_tpcs_sm1_hww_global_esr_multiple_warp_errors_pending_f() | - gr_gpcs_tpcs_sm1_hww_global_esr_bpt_pause_pending_f(); + global_mask = gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() | + gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(); if (warp_esr != 0 || (global_esr & global_mask) != 0) { *ignore_debugger = true; @@ -1585,7 +1574,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, } /* reset the HWW errors after locking down */ - global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset); + global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: HWWs cleared for gpc %d tpc %d\n", @@ -1598,7 +1587,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, return ret; } - dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm1_dbgr_control0_r() + offset); + dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset); if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n", @@ -1606,7 +1595,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, dbgr_control0 = set_field(dbgr_control0, gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(), gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f()); - gk20a_writel(g, gr_gpc0_tpc0_sm1_dbgr_control0_r() + offset, dbgr_control0); + gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); } gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, @@ -1720,10 +1709,10 @@ clean_up: static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) { - if (!(hww_warp_esr & gr_gpc0_tpc0_sm1_hww_warp_esr_addr_valid_m())) + if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m())) hww_warp_esr = set_field(hww_warp_esr, - gr_gpc0_tpc0_sm1_hww_warp_esr_addr_error_type_m(), - gr_gpc0_tpc0_sm1_hww_warp_esr_addr_error_type_none_f()); + gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_m(), + gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_none_f()); return hww_warp_esr; } -- cgit v1.2.2 From 27b8b2a633a3a85c6ada7310ac81cebe519b950d Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Thu, 7 Jul 2016 11:09:32 +0300 Subject: gpu: nvgpu: use vidmem by default in gmmu_alloc variants For devices that have vidmem available, use the vidmem allocator in gk20a_gmmu_alloc{,attr,_map,_map_attr}. For others, use sysmem. Because all of the buffers haven't been tested to work in vidmem yet, rename calls to gk20a_gmmu_alloc{,attr,_map,_map_attr} to have _sys at the end to declare explicitly that vidmem is used. Enabling vidmem for each now is a matter of removing "_sys" from the function call. Jira DNVGPU-18 Change-Id: Ieb13c21c774380ac0be9987e177b4adc0a647abb Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1176810 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index c5d2aa56..831b92f5 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -770,7 +770,7 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, gk20a_dbg_fn(""); - err = gk20a_gmmu_alloc_attr(vm->mm->g, 0, size, mem); + err = gk20a_gmmu_alloc_sys(vm->mm->g, size, mem); if (err) return err; -- cgit v1.2.2 From 7297e1401962af075b14db2fec2f95b6a80d97f6 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 1 Aug 2016 14:27:54 +0530 Subject: gpu: nvgpu: post bpt events after processing Receive hww_global_esr in gr_gv11b_handle_sm_exception() and pass it to gr_gk20a_handle_sm_exception() Bug 200209410 Change-Id: I57a701a1f1fa560367f78db212c06d4ce361c7f0 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1194987 GVS: Gerrit_Virtual_Submit Reviewed-by: Cory Perry Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 831b92f5..12b07d11 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -61,14 +61,15 @@ static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) } static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, - bool *post_event, struct channel_gk20a *fault_ch) + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) { int ret = 0; u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; u32 lrf_ecc_status, shm_ecc_status; - gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch); + gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); /* Check for LRF ECC errors. */ lrf_ecc_status = gk20a_readl(g, -- cgit v1.2.2 From 2c6652f182d84dc7ec4218576b65ad582f05d4a6 Mon Sep 17 00:00:00 2001 From: Cory Perry Date: Thu, 28 Jul 2016 22:37:09 -0700 Subject: gpu: nvgpu: send only one event to the debugger Event notifications on TSGs should only be sent to the channel that caused the event to happen in the first place, not evey channel in the tsg. Any more and the debugger will not be able to tell what channel actually got the event. Worse yet, if all the channels in a tsg are bound to the same debug session (as is the case with cuda-gdb), then multiple nvgpu events for the same gpu event will be triggered, causing events to be buffered and the client to get out of sync. One gpu exception, one nvgpu event per tsg. Bug 1793988 Change-Id: Ifb33b65f09f67b0e323917c7e7ea016fc3676f18 Signed-off-by: Cory Perry Reviewed-on: http://git-master/r/1194207 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 12b07d11..9d0b4ade 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1680,21 +1680,14 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, } if (gk20a_gr_sm_debugger_attached(g)) { + gk20a_dbg_gpu_post_events(ch); + if (gk20a_is_channel_marked_as_tsg(ch)) { struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; - struct channel_gk20a *__ch; - - mutex_lock(&tsg->ch_list_lock); - list_for_each_entry(__ch, &tsg->ch_list, ch_entry) { - gk20a_dbg_gpu_post_events(__ch); - } - mutex_unlock(&tsg->ch_list_lock); gk20a_tsg_event_id_post_event(tsg, NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE); } else { - gk20a_dbg_gpu_post_events(ch); - gk20a_channel_event_id_post_event(ch, NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE); } -- cgit v1.2.2 From 51b5ec852096c0eeb1eaca48ae132d7bf9ac7a9d Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 22 Aug 2016 13:20:05 -0700 Subject: gpu: nvgpu: gv11b: hw header update Updated hw headers to CL#37001916. Some of important changes include new door bell user mode mechanism and new runlist structure. Bug 1735765 Change-Id: Icf01156dd3e7d94466f553ffc53267e4043e1188 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1205888 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 63 +++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9d0b4ade..088ec040 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -72,16 +72,16 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); /* Check for LRF ECC errors. */ - lrf_ecc_status = gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); - if ( (lrf_ecc_status & + lrf_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); + if ((lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) || (lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) || (lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) || (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) { + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f())) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Single bit error detected in SM LRF!"); @@ -93,14 +93,14 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, 0); } - if ( (lrf_ecc_status & + if ((lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) || (lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) || (lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) || (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) { + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f())) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Double bit error detected in SM LRF!"); @@ -109,14 +109,13 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gk20a_readl(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset); gk20a_writel(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, - 0); + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, 0); } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, lrf_ecc_status); /* Check for SHM ECC errors. */ - shm_ecc_status = gk20a_readl(g, + shm_ecc_status = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset); if ((shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) || @@ -125,7 +124,7 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, (shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) || (shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) { + gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f())) { u32 ecc_stats_reg_val; gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, @@ -144,10 +143,10 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, ecc_stats_reg_val); } - if ( (shm_ecc_status & + if ((shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) || (shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) { + gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f())) { u32 ecc_stats_reg_val; gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, @@ -1133,8 +1132,8 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n", gk20a_readl(g, gr_cwd_fs_r())); - gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n", - gk20a_readl(g, gr_fe_tpc_fs_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS(0): 0x%x\n", + gk20a_readl(g, gr_fe_tpc_fs_r(0))); gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID: 0x%x\n", gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0))); gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n", @@ -1184,7 +1183,7 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, static bool gr_activity_empty_or_preempted(u32 val) { - while(val) { + while (val) { u32 v = val & 7; if (v != gr_activity_4_gpc0_empty_v() && v != gr_activity_4_gpc0_preempted_v()) @@ -1542,16 +1541,16 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, gpc, tpc, global_esr); if (cilp_enabled && sm_debugger_attached) { - if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) - gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, - gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()); + if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f()) + gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, + gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f()); - if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f()) - gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, - gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f()); + if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f()) + gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, + gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f()); - global_mask = gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() | - gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(); + global_mask = gr_gpcs_tpcs_sm0_hww_global_esr_multiple_warp_errors_pending_f() | + gr_gpcs_tpcs_sm0_hww_global_esr_bpt_pause_pending_f(); if (warp_esr != 0 || (global_esr & global_mask) != 0) { *ignore_debugger = true; @@ -1575,7 +1574,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, } /* reset the HWW errors after locking down */ - global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: HWWs cleared for gpc %d tpc %d\n", @@ -1588,15 +1587,15 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, return ret; } - dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset); - if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) { + dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + if (dbgr_control0 & gr_gpcs_tpcs_sm0_dbgr_control0_single_step_mode_enable_f()) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n", gpc, tpc); dbgr_control0 = set_field(dbgr_control0, - gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(), - gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f()); - gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); + gr_gpcs_tpcs_sm0_dbgr_control0_single_step_mode_m(), + gr_gpcs_tpcs_sm0_dbgr_control0_single_step_mode_disable_f()); + gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); } gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, @@ -1703,10 +1702,10 @@ clean_up: static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) { - if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m())) + if (!(hww_warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m())) hww_warp_esr = set_field(hww_warp_esr, - gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_m(), - gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_none_f()); + gr_gpc0_tpc0_sm0_hww_warp_esr_addr_error_type_m(), + gr_gpc0_tpc0_sm0_hww_warp_esr_addr_error_type_none_f()); return hww_warp_esr; } -- cgit v1.2.2 From 09168aac40b22ba264de6a7fc1e5264528b1fe9f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 22 Sep 2016 14:41:31 -0700 Subject: gpu: nvgpu: gv11b: header updates for CL#37119043 Bug 1735760 Change-Id: I5216863a25338f14498ae0be58b86993104d4e99 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1222031 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 207 +------------------------------------ 1 file changed, 2 insertions(+), 205 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 088ec040..02044df6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -67,105 +67,16 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, int ret = 0; u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; - u32 lrf_ecc_status, shm_ecc_status; + u32 lrf_ecc_status; gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); /* Check for LRF ECC errors. */ lrf_ecc_status = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); - if ((lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) || - (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) || - (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) || - (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f())) { - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, - "Single bit error detected in SM LRF!"); - - g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += - gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset); - gk20a_writel(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, - 0); - } - if ((lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) || - (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) || - (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) || - (lrf_ecc_status & - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f())) { - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, - "Double bit error detected in SM LRF!"); - - g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += - gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset); - gk20a_writel(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, 0); - } + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, lrf_ecc_status); - - /* Check for SHM ECC errors. */ - shm_ecc_status = gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset); - if ((shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) || - (shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) || - (shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) || - (shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f())) { - u32 ecc_stats_reg_val; - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, - "Single bit error detected in SM SHM!"); - - ecc_stats_reg_val = - gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); - g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] += - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); - g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] += - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); - gk20a_writel(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, - ecc_stats_reg_val); - } - if ((shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) || - (shm_ecc_status & - gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f())) { - u32 ecc_stats_reg_val; - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, - "Double bit error detected in SM SHM!"); - - ecc_stats_reg_val = - gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); - g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] += - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); - gk20a_writel(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, - ecc_stats_reg_val); - } - gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset, - shm_ecc_status); - - return ret; } @@ -176,7 +87,6 @@ static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; u32 esr; - u32 ecc_stats_reg_val; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); @@ -184,119 +94,6 @@ static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, gr_gpc0_tpc0_tex_m_hww_esr_r() + offset); gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr); - if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) { - gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, - "Single bit error detected in TEX!"); - - /* Pipe 0 counters */ - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, - gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f()); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, - ecc_stats_reg_val); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, - ecc_stats_reg_val); - - - /* Pipe 1 counters */ - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, - gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f()); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, - ecc_stats_reg_val); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, - ecc_stats_reg_val); - - - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, - gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); - } - if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) { - gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, - "Double bit error detected in TEX!"); - - /* Pipe 0 counters */ - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, - gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f()); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, - ecc_stats_reg_val); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, - ecc_stats_reg_val); - - - /* Pipe 1 counters */ - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, - gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f()); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, - ecc_stats_reg_val); - - ecc_stats_reg_val = gk20a_readl(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] += - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); - ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, - ecc_stats_reg_val); - - - gk20a_writel(g, - gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, - gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); - } - gk20a_writel(g, gr_gpc0_tpc0_tex_m_hww_esr_r() + offset, esr); -- cgit v1.2.2 From 6f29d0d8cd81f3b964fff975b917569b865b26d3 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 7 Sep 2016 10:22:28 -0700 Subject: gpu: nvgpu: gv11b: setup rop mappings JIRA GV11B-21 Change-Id: I7695936bdac4502ceb0bdad4fc029e249eb2f05d Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1224783 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 61 +++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 02044df6..3488a03a 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -22,13 +22,13 @@ #include "gk20a/dbg_gpu_gk20a.h" #include "gm20b/gr_gm20b.h" -#include "gp10b/gr_gp10b.h" #include "gv11b/gr_gv11b.h" #include "hw_gr_gv11b.h" #include "hw_fifo_gv11b.h" #include "hw_proj_gv11b.h" #include "hw_ctxsw_prog_gv11b.h" #include "hw_mc_gv11b.h" +#include "hw_gr_gv11b.h" #include static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) @@ -1507,6 +1507,64 @@ static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) return hww_warp_esr; } +int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 map; + u32 i, j, mapregs; + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); + + gk20a_dbg_fn(""); + + if (!gr->map_tiles) + return -1; + + gk20a_writel(g, gr_crstr_map_table_cfg_r(), + gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); + + /* 6 tpc can be stored in one map register */ + mapregs = (num_gpcs * num_tpc_per_gpc + 5) / 6; + + for (i = 0, j = 0; i < mapregs; i++, j = j + 6) { + map = gr_crstr_gpc_map_tile0_f(gr->map_tiles[j]) | + gr_crstr_gpc_map_tile1_f(gr->map_tiles[j + 1]) | + gr_crstr_gpc_map_tile2_f(gr->map_tiles[j + 2]) | + gr_crstr_gpc_map_tile3_f(gr->map_tiles[j + 3]) | + gr_crstr_gpc_map_tile4_f(gr->map_tiles[j + 4]) | + gr_crstr_gpc_map_tile5_f(gr->map_tiles[j + 5]); + + gk20a_writel(g, gr_crstr_gpc_map_r(i), map); + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map_r(i), map); + gk20a_writel(g, gr_rstr2d_gpc_map_r(i), map); + } + + gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), + gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); + + for (i = 0, j = 1; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v(); + i++, j = j + 4) { + gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), + gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( + ((1 << j) % gr->tpc_count)) | + gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( + ((1 << (j + 1)) % gr->tpc_count)) | + gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( + ((1 << (j + 2)) % gr->tpc_count)) | + gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( + ((1 << (j + 3)) % gr->tpc_count))); + } + + gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), + gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); + + return 0; +} + + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -1543,4 +1601,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception; gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; + gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; } -- cgit v1.2.2 From 20d4f2052a25053c716201e708b4caddda1c9a16 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Tue, 4 Oct 2016 15:24:46 -0700 Subject: gpu: nvgpu: gv11b: program sw veid bundles Program hw state with relevant sw veid bundles. JIRA GV11B-11 Change-Id: I2c5e02016ed41db9c9b7f85cc0b401abaa003d37 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1231598 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 58 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3488a03a..d8b251c6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1565,6 +1565,63 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) } +static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) +{ + struct av_list_gk20a *sw_veid_bundle_init = + &g->gr.ctx_vars.sw_veid_bundle_init; + u32 j; + u32 num_subctx = nvgpu_get_litter_value(g, GPU_LIT_NUM_SUBCTX); + u32 err = 0; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + + for (j = 0; j < num_subctx; j++) { + + gk20a_writel(g, gr_pipe_bundle_address_r(), + sw_veid_bundle_init->l[index].addr | + gr_pipe_bundle_address_veid_f(j)); + + err = gr_gk20a_wait_fe_idle(g, end_jiffies, + GR_IDLE_CHECK_DEFAULT); + } +} + +static int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) +{ + struct av_list_gk20a *sw_veid_bundle_init = + &g->gr.ctx_vars.sw_veid_bundle_init; + u32 i; + u32 last_bundle_data = 0; + u32 err = 0; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + + gk20a_dbg_fn(""); + for (i = 0; i < sw_veid_bundle_init->count; i++) { + + if (i == 0 || last_bundle_data != + sw_veid_bundle_init->l[i].value) { + gk20a_writel(g, gr_pipe_bundle_data_r(), + sw_veid_bundle_init->l[i].value); + last_bundle_data = sw_veid_bundle_init->l[i].value; + } + + if (gr_pipe_bundle_address_value_v( + sw_veid_bundle_init->l[i].addr) == GR_GO_IDLE_BUNDLE) { + gk20a_writel(g, gr_pipe_bundle_address_r(), + sw_veid_bundle_init->l[i].addr); + err |= gr_gk20a_wait_idle(g, end_jiffies, + GR_IDLE_CHECK_DEFAULT); + } else + gv11b_write_bundle_veid_state(g, i); + + if (err) + break; + } + gk20a_dbg_fn("done"); + return err; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -1602,4 +1659,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gr_gv11b_pre_process_sm_exception; gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; + gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; } -- cgit v1.2.2 From 37f317a3c4033b54ab4bf47286fb9ebd48edb021 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 15 Sep 2016 14:43:55 -0700 Subject: gpu: nvgpu: gv11b: zcull programming Bug 1735760 Change-Id: Id801efb613b5740389bde5dc2cfff47232d0a0f3 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1221582 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu Tested-by: Seshendra Gadagottu Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d8b251c6..78ffd3d4 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1622,6 +1622,37 @@ static int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) return err; } +void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, + u32 *zcull_map_tiles) +{ + u32 val, i, j; + + gk20a_dbg_fn(""); + + for (i = 0, j = 0; i < (zcull_num_entries / 8); i++, j += 8) { + val = + gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f( + zcull_map_tiles[j+0]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f( + zcull_map_tiles[j+1]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f( + zcull_map_tiles[j+2]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f( + zcull_map_tiles[j+3]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f( + zcull_map_tiles[j+4]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f( + zcull_map_tiles[j+5]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f( + zcull_map_tiles[j+6]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f( + zcull_map_tiles[j+7]); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val); + } +} + + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -1660,4 +1691,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; + gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; } -- cgit v1.2.2 From ac5383e76e2a2629d4bb1573ed080b89f1e2040f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 13 Oct 2016 15:48:44 -0700 Subject: gpu: nvgpu: gv11b: remove tex exception update for CL#37320141 JIRA GV11B-27 Change-Id: I095af59ac419b44b3a1e3abc489857d6f533874a Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1236274 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 78ffd3d4..89be2563 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -83,22 +83,7 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event) { - int ret = 0; - u32 offset = proj_gpc_stride_v() * gpc + - proj_tpc_in_gpc_stride_v() * tpc; - u32 esr; - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); - - esr = gk20a_readl(g, - gr_gpc0_tpc0_tex_m_hww_esr_r() + offset); - gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr); - - gk20a_writel(g, - gr_gpc0_tpc0_tex_m_hww_esr_r() + offset, - esr); - - return ret; + return 0; } static int gr_gv11b_commit_global_cb_manager(struct gk20a *g, -- cgit v1.2.2 From 742156225248816acba9955b14fc43fc9fe737bc Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 14 Oct 2016 13:37:41 -0700 Subject: gpu: nvgpu: gv11b: enable gpc exceptions Add function ptr and function for enabling gpc exceptions. Disable Tex exceptions. JIRA GV11B-28 JIRA GV11B-27 Change-Id: Ife8fe22c24da00ae14f68fd977d84d208831eb45 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1236899 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 89be2563..d8b79bac 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -80,6 +80,20 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, return ret; } +static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + u32 tpc_mask; + + gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), + gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); + + tpc_mask = + gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); + + gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); +} + static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event) { @@ -1670,6 +1684,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.get_access_map = gr_gv11b_get_access_map; gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; + gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; gops->gr.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception; -- cgit v1.2.2 From e5b96a827301a4ebdff45425722281de9d500d01 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 17 Oct 2016 16:38:21 -0700 Subject: gpu: nvgpu: gv11b: update sm arch info Use updated register offset for gr_gpc0_tpc0_sm_arch_r() to read and update correct sm arch info. JIRA GV11B-21 Change-Id: I34af2d4a7665d7848bd74bc56a92ff2c861ceac9 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1237916 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d8b79bac..4274a800 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1651,11 +1651,24 @@ void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, } } +static void gr_gv11b_detect_sm_arch(struct gk20a *g) +{ + u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); + + g->gpu_characteristics.sm_arch_spa_version = + gr_gpc0_tpc0_sm_arch_spa_version_v(v); + g->gpu_characteristics.sm_arch_sm_version = + gr_gpc0_tpc0_sm_arch_sm_version_v(v); + g->gpu_characteristics.sm_arch_warp_count = + gr_gpc0_tpc0_sm_arch_warp_count_v(v); + +} void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); gops->gr.init_fs_state = gr_gv11b_init_fs_state; + gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; gops->gr.is_valid_class = gr_gv11b_is_valid_class; gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; -- cgit v1.2.2 From 35d2db64e28df6d65fed381c793f0954eed5eb7b Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 17 Oct 2016 10:39:18 -0700 Subject: gpu: nvgpu: gv11b: update gr cb callbacks Update gr cb callbacks with gv11b default sizes. Also updated sw method ids for volta. JIRA GV11B-11 Change-Id: I77cccedb7a017f378e2194cef98ea4b0bf7acd6b Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1237786 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 4274a800..94fd8058 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -364,7 +364,7 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, { gk20a_dbg_fn(""); - if (class_num == PASCAL_COMPUTE_A) { + if (class_num == VOLTA_COMPUTE_A) { switch (offset << 2) { case NVC0C0_SET_SHADER_EXCEPTIONS: gk20a_gr_set_shader_exceptions(g, data); @@ -374,18 +374,18 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, } } - if (class_num == PASCAL_A) { + if (class_num == VOLTA_A) { switch (offset << 2) { - case NVC097_SET_SHADER_EXCEPTIONS: + case NVC397_SET_SHADER_EXCEPTIONS: gk20a_gr_set_shader_exceptions(g, data); break; - case NVC097_SET_CIRCULAR_BUFFER_SIZE: + case NVC397_SET_CIRCULAR_BUFFER_SIZE: g->ops.gr.set_circular_buffer_size(g, data); break; - case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE: + case NVC397_SET_ALPHA_CIRCULAR_BUFFER_SIZE: g->ops.gr.set_alpha_circular_buffer_size(g, data); break; - case NVC097_SET_GO_IDLE_TIMEOUT: + case NVC397_SET_GO_IDLE_TIMEOUT: gr_gv11b_set_go_idle_timeout(g, data); break; case NVC097_SET_COALESCE_BUFFER_SIZE: @@ -401,12 +401,25 @@ fail: return -EINVAL; } +static void gr_gv11b_bundle_cb_defaults(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + + gr->bundle_cb_default_size = + gr_scc_bundle_cb_size_div_256b__prod_v(); + gr->min_gpm_fifo_depth = + gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); + gr->bundle_cb_token_limit = + gr_pd_ab_dist_cfg2_token_limit_init_v(); +} + static void gr_gv11b_cb_size_default(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; if (!gr->attrib_cb_default_size) - gr->attrib_cb_default_size = 0x800; + gr->attrib_cb_default_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); } @@ -1680,6 +1693,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb; gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; + gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; gops->gr.cb_size_default = gr_gv11b_cb_size_default; gops->gr.set_alpha_circular_buffer_size = gr_gv11b_set_alpha_circular_buffer_size; -- cgit v1.2.2 From e38542cc1e7cf4eb22cfa4089600a787ece0e189 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 27 Oct 2016 12:29:46 -0700 Subject: gpu: nvgpu: gv11b: commit global timeslice Implement chip specific commit_global_timeslice function. JIRA GV11B-21 Change-Id: I4f852913cb181f62063084c4e118d97148f99056 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1243947 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Seema Khowala Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 94fd8058..cc4bbb21 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1677,6 +1677,59 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g) } +static int gr_gv11b_commit_global_timeslice(struct gk20a *g, + struct channel_gk20a *c, bool patch) +{ + struct channel_ctx_gk20a *ch_ctx = NULL; + u32 pd_ab_dist_cfg0; + u32 ds_debug; + u32 mpc_vtg_debug; + u32 pe_vaf; + u32 pe_vsc_vpc; + + gk20a_dbg_fn(""); + + pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r()); + ds_debug = gk20a_readl(g, gr_ds_debug_r()); + mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); + + if (patch) { + int err; + + ch_ctx = &c->ch_ctx; + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) + return err; + } + + pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); + pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); + + pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf; + pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | + pe_vsc_vpc; + pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | + pd_ab_dist_cfg0; + ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; + mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | + mpc_vtg_debug; + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, + patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), + pe_vsc_vpc, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), + pd_ab_dist_cfg0, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), + mpc_vtg_debug, patch); + + if (patch) + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + + return 0; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -1719,4 +1772,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; + gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; } -- cgit v1.2.2 From 2c23fd19ad62a58fc35ac4c08760915abc74dc63 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 26 Sep 2016 08:49:39 -0700 Subject: gpu: nvgpu: gv11b: smid programming gv11b specific smid table init, smid numbering and smid programing. JIRA GV11B-21 Change-Id: I3a0f8355f2cd90ab1518cd8a5642a0e84202bdf8 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1227096 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 92 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index cc4bbb21..e6050359 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1674,7 +1674,95 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_sm_version_v(v); g->gpu_characteristics.sm_arch_warp_count = gr_gpc0_tpc0_sm_arch_warp_count_v(v); +} + +static void gr_gv11b_init_sm_id_table(struct gk20a *g) +{ + u32 gpc, tpc; + u32 sm_id = 0; + + /* TODO populate smids based on power efficiency */ + for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { + for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + + if (tpc < g->gr.gpc_tpc_count[gpc]) { + g->gr.sm_to_cluster[sm_id].tpc_index = tpc; + g->gr.sm_to_cluster[sm_id].gpc_index = gpc; + g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2; + g->gr.sm_to_cluster[sm_id].global_tpc_index = + sm_id; + sm_id++; + } + } + } + g->gr.no_of_sm = sm_id; +} + +static void gr_gv11b_program_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + u32 gpc_offset = gpc_stride * gpc; + u32 tpc_offset = tpc_in_gpc_stride * tpc; + u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; + + gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); + gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, + gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); + gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); +} + +static int gr_gv11b_load_smid_config(struct gk20a *g) +{ + u32 *tpc_sm_id; + u32 i, j; + u32 tpc_index, gpc_index, tpc_id; + u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + + tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); + if (!tpc_sm_id) + return -ENOMEM; + /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ + for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { + u32 reg = 0; + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + + gr_cwd_gpc_tpc_id_tpc0_s(); + + for (j = 0; j < 4; j++) { + u32 sm_id; + u32 bits; + + tpc_id = (i << 2) + j; + sm_id = tpc_id * sms_per_tpc; + + if (sm_id >= g->gr.no_of_sm) + break; + + gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; + + bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | + gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); + reg |= bits << (j * bit_stride); + + tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4) + >> 2))] |= tpc_id << tpc_index * bit_stride; + } + gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); + } + + for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) + gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); + + kfree(tpc_sm_id); + + return 0; } static int gr_gv11b_commit_global_timeslice(struct gk20a *g, @@ -1773,4 +1861,8 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; + gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table; + gops->gr.load_smid_config = gr_gv11b_load_smid_config; + gops->gr.program_sm_id_numbering = + gr_gv11b_program_sm_id_numbering; } -- cgit v1.2.2 From 68ff0c8f309e79266682f96994cf83c921061f77 Mon Sep 17 00:00:00 2001 From: Shardar Shariff Md Date: Tue, 1 Nov 2016 19:13:34 +0530 Subject: gpu: nvgpu: gv11b: use fuse control read/write APIs for K4.4 Use fuse control read/write APIs when accessing fuse control registers for Kernel version 4.4 Bug 200243956 Change-Id: I3d78ec2733b5f56615fa0b588664570c85557e63 Signed-off-by: Shardar Shariff Md Reviewed-on: http://git-master/r/1245826 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index e6050359..ca5e8dc0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -16,6 +16,7 @@ #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ #include #include +#include #include "gk20a/gr_gk20a.h" #include "gk20a/semaphore_gk20a.h" @@ -1135,8 +1136,13 @@ static void gr_gv11b_init_cyclestats(struct gk20a *g) static void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0); tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0); +#else + tegra_fuse_control_write(0x1, FUSE_FUSEBYPASS_0); + tegra_fuse_control_write(0x0, FUSE_WRITE_ACCESS_SW_0); +#endif if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0); -- cgit v1.2.2 From 7b67abb2edb606b9ad0d0ecf0d94a333770974b1 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 16 Nov 2016 09:33:43 -0800 Subject: gpu: nvgpu: gv11b: support for new littler values Add support for new litter values: GPU_LIT_NUM_SUBCTX GPU_LIT_NUM_SM_PER_TPC Also updated get_litter_value api to use int instead of enum type. JIRA GV11B-21 Change-Id: Ide06245d03743e2d757d27d045701beb25b6707b Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1254857 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index ca5e8dc0..f60337e2 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -31,6 +31,7 @@ #include "hw_mc_gv11b.h" #include "hw_gr_gv11b.h" #include +#include static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { -- cgit v1.2.2 From 903df9b59705f7ebbb66fb2912c1419a0c992368 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Tue, 29 Nov 2016 16:14:19 -0800 Subject: gpu: nvgpu: gv11b: fix sparse warning Fix following sparse warning by making function as static: $TOP/kernel/nvgpu-t19x/drivers/gpu/nvgpu/gv11b/gr_gv11b.c:1529:5: warning: symbol 'gr_gv11b_setup_rop_mapping' was not declared. Should it be static? Bug 200088648 Change-Id: Idd388170f35e7e6cd7559d8aab8968f7e8e545c6 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1261891 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f60337e2..7f5b8d3f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1526,7 +1526,7 @@ static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) return hww_warp_esr; } -int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) +static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) { u32 map; u32 i, j, mapregs; -- cgit v1.2.2 From 67b54c3c76cfa488e1d102e5c74a32b3aaba9287 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 16 Nov 2016 12:25:08 -0800 Subject: gpu: nvgpu: legacy support with subcontext gv11b needs atleast one subcontext to submit work. To support legacy in gv11b, currently main context is always copied into subcontext0 (veid0) during channel commit instance. As part of channel commit instance, veid0 for that channel is created and relevant pdb and context info copied to vedi0. JIRA GV11B-21 Change-Id: I5147a1708b5e94202fa55e73fa0e53199ab7fced Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1231169 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 42 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7f5b8d3f..bdb96329 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -13,6 +13,7 @@ * more details. */ +#include #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ #include #include @@ -24,12 +25,16 @@ #include "gm20b/gr_gm20b.h" #include "gv11b/gr_gv11b.h" +#include "gv11b/mm_gv11b.h" +#include "gv11b/subctx_gv11b.h" #include "hw_gr_gv11b.h" #include "hw_fifo_gv11b.h" #include "hw_proj_gv11b.h" #include "hw_ctxsw_prog_gv11b.h" #include "hw_mc_gv11b.h" #include "hw_gr_gv11b.h" +#include "hw_ram_gv11b.h" +#include "hw_pbdma_gv11b.h" #include #include @@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) return 0; } - static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) { struct av_list_gk20a *sw_veid_bundle_init = @@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g) for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); - kfree(tpc_sm_id); return 0; } +static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) +{ + u32 addr_lo; + u32 addr_hi; + struct ctx_header_desc *ctx; + + gk20a_dbg_fn(""); + + gv11b_alloc_subctx_header(c); + + gv11b_update_subctx_header(c, gpu_va); + + ctx = &c->ch_ctx.ctx_header; + addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); + addr_hi = u64_hi32(ctx->mem.gpu_va); + + /* point this address to engine_wfi_ptr */ + gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), + ram_in_engine_cs_wfi_v() | + ram_in_engine_wfi_target_f( + ram_in_engine_wfi_target_sys_mem_ncoh_v()) | + ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) | + ram_in_engine_wfi_ptr_lo_f(addr_lo)); + + gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(), + ram_in_engine_wfi_ptr_hi_f(addr_hi)); + + return 0; +} + + + static int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch) { @@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g, void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); + gops->gr.init_preemption_state = NULL; gops->gr.init_fs_state = gr_gv11b_init_fs_state; gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; gops->gr.is_valid_class = gr_gv11b_is_valid_class; @@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.load_smid_config = gr_gv11b_load_smid_config; gops->gr.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering; + gops->gr.commit_inst = gr_gv11b_commit_inst; + } -- cgit v1.2.2 From a674eeee419a68e27bec63e46200036f5f33c8ff Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 13 Jan 2017 14:24:36 -0800 Subject: gpu: nvgpu: gv11b: Support Stencil ZBC Pre-GP10X All chips prior to GP10X do not support ZBC (Zero Bandwidth Clear) to stencil part of the packed kinds (packed kinds refer to Z24S8 and Z32_X24S8 kinds). Clears for these kinds typically happen in two phases, depth phase and stencil phase. The depth clears can be compressed or ZBC-ed, whereas the stencil part is always uncompressed. Stencil ZBC in GP10X For GP10X both the depth and the stencil data for these packed kinds can be ZBC cleared. A given tile will be a cross product of the following states for depth and stencil. Depth: Uncompressed, 1-2 plane compressed, 3-4 plane compressed, ZBC index 0, ZBC index 1 Stencil: Uncompressed, ZBC index 0, ZBC index 1, ZBC index 2 JIRA GV11B-9 Change-Id: I3381fd6305a4fada64211176b8ef98f27b04089f Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1235520 Reviewed-by: Terje Bergstrom Reviewed-by: Seshendra Gadagottu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 190 ++++++++++++++++++++++--------------- 1 file changed, 116 insertions(+), 74 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index bdb96329..4c23455d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1,7 +1,7 @@ /* * GV11b GPU GR * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -223,96 +223,135 @@ static void gr_gv11b_commit_global_pagepool(struct gk20a *g, gr_gpcs_gcc_pagepool_total_pages_f(size), patch); } -static int gr_gv11b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, - struct zbc_entry *color_val, u32 index) +static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_query_params *query_params) { + u32 index = query_params->index_size; + + if (index >= GK20A_ZBC_TABLE_SIZE) { + gk20a_err(dev_from_gk20a(g), + "invalid zbc stencil table index\n"); + return -EINVAL; + } + query_params->depth = gr->zbc_s_tbl[index].stencil; + query_params->format = gr->zbc_s_tbl[index].format; + query_params->ref_cnt = gr->zbc_s_tbl[index].ref_cnt; + + return 0; +} + +static bool gr_gv11b_add_zbc_type_s(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *zbc_val, int *ret_val) +{ + struct zbc_s_table *s_tbl; u32 i; - u32 zbc_c; + bool added = false; + + *ret_val = -ENOMEM; + + /* search existing tables */ + for (i = 0; i < gr->max_used_s_index; i++) { + + s_tbl = &gr->zbc_s_tbl[i]; + + if (s_tbl->ref_cnt && + s_tbl->stencil == zbc_val->depth && + s_tbl->format == zbc_val->format) { + added = true; + s_tbl->ref_cnt++; + *ret_val = 0; + break; + } + } + /* add new table */ + if (!added && + gr->max_used_s_index < GK20A_ZBC_TABLE_SIZE) { + + s_tbl = &gr->zbc_s_tbl[gr->max_used_s_index]; + WARN_ON(s_tbl->ref_cnt != 0); + + *ret_val = g->ops.gr.add_zbc_s(g, gr, + zbc_val, gr->max_used_s_index); + + if (!(*ret_val)) + gr->max_used_s_index++; + } + return added; +} + +static int gr_gv11b_add_zbc_stencil(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *stencil_val, u32 index) +{ + u32 zbc_s; /* update l2 table */ - g->ops.ltc.set_zbc_color_entry(g, color_val, index); - - /* update ds table */ - gk20a_writel(g, gr_ds_zbc_color_r_r(), - gr_ds_zbc_color_r_val_f(color_val->color_ds[0])); - gk20a_writel(g, gr_ds_zbc_color_g_r(), - gr_ds_zbc_color_g_val_f(color_val->color_ds[1])); - gk20a_writel(g, gr_ds_zbc_color_b_r(), - gr_ds_zbc_color_b_val_f(color_val->color_ds[2])); - gk20a_writel(g, gr_ds_zbc_color_a_r(), - gr_ds_zbc_color_a_val_f(color_val->color_ds[3])); - - gk20a_writel(g, gr_ds_zbc_color_fmt_r(), - gr_ds_zbc_color_fmt_val_f(color_val->format)); - - gk20a_writel(g, gr_ds_zbc_tbl_index_r(), - gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); - - /* trigger the write */ - gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), - gr_ds_zbc_tbl_ld_select_c_f() | - gr_ds_zbc_tbl_ld_action_write_f() | - gr_ds_zbc_tbl_ld_trigger_active_f()); + g->ops.ltc.set_zbc_s_entry(g, stencil_val, index); /* update local copy */ - for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { - gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i]; - gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i]; - } - gr->zbc_col_tbl[index].format = color_val->format; - gr->zbc_col_tbl[index].ref_cnt++; - - gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_r_r(index), - color_val->color_ds[0]); - gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_g_r(index), - color_val->color_ds[1]); - gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_b_r(index), - color_val->color_ds[2]); - gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_a_r(index), - color_val->color_ds[3]); - zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3)); - zbc_c &= ~(0x7f << ((index % 4) * 7)); - zbc_c |= color_val->format << ((index % 4) * 7); - gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c); + gr->zbc_s_tbl[index].stencil = stencil_val->depth; + gr->zbc_s_tbl[index].format = stencil_val->format; + gr->zbc_s_tbl[index].ref_cnt++; + + gk20a_writel(g, gr_gpcs_swdx_dss_zbc_s_r(index), stencil_val->depth); + zbc_s = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_s_01_to_04_format_r() + + (index & ~3)); + zbc_s &= ~(0x7f << (index % 4) * 7); + zbc_s |= stencil_val->format << (index % 4) * 7; + gk20a_writel(g, gr_gpcs_swdx_dss_zbc_s_01_to_04_format_r() + + (index & ~3), zbc_s); return 0; } -static int gr_gv11b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, - struct zbc_entry *depth_val, u32 index) +static int gr_gv11b_load_stencil_default_tbl(struct gk20a *g, + struct gr_gk20a *gr) { - u32 zbc_z; + struct zbc_entry zbc_val; + u32 err; - /* update l2 table */ - g->ops.ltc.set_zbc_depth_entry(g, depth_val, index); + /* load default stencil table */ + zbc_val.type = GV11B_ZBC_TYPE_STENCIL; - /* update ds table */ - gk20a_writel(g, gr_ds_zbc_z_r(), - gr_ds_zbc_z_val_f(depth_val->depth)); + zbc_val.depth = 0x0; + zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8; + err = gr_gk20a_add_zbc(g, gr, &zbc_val); - gk20a_writel(g, gr_ds_zbc_z_fmt_r(), - gr_ds_zbc_z_fmt_val_f(depth_val->format)); + zbc_val.depth = 0x1; + zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8; + err |= gr_gk20a_add_zbc(g, gr, &zbc_val); - gk20a_writel(g, gr_ds_zbc_tbl_index_r(), - gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); + zbc_val.depth = 0xff; + zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8; + err |= gr_gk20a_add_zbc(g, gr, &zbc_val); - /* trigger the write */ - gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), - gr_ds_zbc_tbl_ld_select_z_f() | - gr_ds_zbc_tbl_ld_action_write_f() | - gr_ds_zbc_tbl_ld_trigger_active_f()); + if (!err) { + gr->max_default_s_index = 3; + } else { + gk20a_err(dev_from_gk20a(g), + "fail to load default zbc stencil table\n"); + return err; + } - /* update local copy */ - gr->zbc_dep_tbl[index].depth = depth_val->depth; - gr->zbc_dep_tbl[index].format = depth_val->format; - gr->zbc_dep_tbl[index].ref_cnt++; + return 0; +} + +static int gr_gv11b_load_stencil_tbl(struct gk20a *g, struct gr_gk20a *gr) +{ + int ret; + u32 i; + + for (i = 0; i < gr->max_used_s_index; i++) { + struct zbc_s_table *s_tbl = &gr->zbc_s_tbl[i]; + struct zbc_entry zbc_val; - gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth); - zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3)); - zbc_z &= ~(0x7f << (index % 4) * 7); - zbc_z |= depth_val->format << (index % 4) * 7; - gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3), zbc_z); + zbc_val.type = GV11B_ZBC_TYPE_STENCIL; + zbc_val.depth = s_tbl->stencil; + zbc_val.format = s_tbl->format; + ret = g->ops.gr.add_zbc_s(g, gr, &zbc_val, i); + if (ret) + return ret; + } return 0; } @@ -1869,8 +1908,11 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.is_valid_class = gr_gv11b_is_valid_class; gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; - gops->gr.add_zbc_color = gr_gv11b_add_zbc_color; - gops->gr.add_zbc_depth = gr_gv11b_add_zbc_depth; + gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; + gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; + gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; + gops->gr.zbc_s_query_table = gr_gv11b_zbc_s_query_table; + gops->gr.add_zbc_type_s = gr_gv11b_add_zbc_type_s; gops->gr.pagepool_default_size = gr_gv11b_pagepool_default_size; gops->gr.calc_global_ctx_buffer_size = gr_gv11b_calc_global_ctx_buffer_size; -- cgit v1.2.2 From 3f0e08da43a448d8a1a85529b03d4c073f8c11c5 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 4 Jan 2017 17:18:11 -0800 Subject: gpu: nvgpu: Use timer API in gv11b's wait_idle() The interface for wait_idle() was changed for gk20a, etc, so this change is necessary to update the wait_idle function for gv11b. Similarly for wait_fe() - this needs to no longer use an end_jiffie argument. Bug 1799159 Change-Id: I192159feffda5476269194e7d6ef15b5fe3055bd Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1280459 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 4c23455d..4984af46 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -19,6 +19,8 @@ #include #include +#include + #include "gk20a/gr_gk20a.h" #include "gk20a/semaphore_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" @@ -1049,7 +1051,7 @@ static bool gr_activity_empty_or_preempted(u32 val) return true; } -static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long end_jiffies, +static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, u32 expect_delay) { u32 delay = expect_delay; @@ -1058,9 +1060,12 @@ static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long end_jiffies, bool gr_busy; u32 gr_status; u32 activity0, activity1, activity2, activity4; + struct nvgpu_timeout timeout; gk20a_dbg_fn(""); + nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER); + do { /* fmodel: host gets fifo_engine_status(gr) from gr only when gr_status is read */ @@ -1089,8 +1094,7 @@ static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long end_jiffies, usleep_range(delay, delay * 2); delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - } while (time_before(jiffies, end_jiffies) - || !tegra_platform_is_silicon()); + } while (!nvgpu_timeout_expired(&timeout)); gk20a_err(dev_from_gk20a(g), "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", @@ -1634,8 +1638,6 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) u32 j; u32 num_subctx = nvgpu_get_litter_value(g, GPU_LIT_NUM_SUBCTX); u32 err = 0; - unsigned long end_jiffies = jiffies + - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); for (j = 0; j < num_subctx; j++) { @@ -1643,8 +1645,8 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) sw_veid_bundle_init->l[index].addr | gr_pipe_bundle_address_veid_f(j)); - err = gr_gk20a_wait_fe_idle(g, end_jiffies, - GR_IDLE_CHECK_DEFAULT); + err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g), + GR_IDLE_CHECK_DEFAULT); } } @@ -1655,8 +1657,6 @@ static int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) u32 i; u32 last_bundle_data = 0; u32 err = 0; - unsigned long end_jiffies = jiffies + - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); gk20a_dbg_fn(""); for (i = 0; i < sw_veid_bundle_init->count; i++) { @@ -1672,8 +1672,9 @@ static int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) sw_veid_bundle_init->l[i].addr) == GR_GO_IDLE_BUNDLE) { gk20a_writel(g, gr_pipe_bundle_address_r(), sw_veid_bundle_init->l[i].addr); - err |= gr_gk20a_wait_idle(g, end_jiffies, - GR_IDLE_CHECK_DEFAULT); + err |= gr_gk20a_wait_idle(g, + gk20a_get_gr_idle_timeout(g), + GR_IDLE_CHECK_DEFAULT); } else gv11b_write_bundle_veid_state(g, i); -- cgit v1.2.2 From 64ab12979590564db19c820ff8cdd71579a9b317 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 11 Jan 2017 14:28:32 -0800 Subject: gpu: nvgpu: gv11b: sw methods for shader exception Added proper sw methods handling of shader execptions for gv11b. Bug 1834201 Change-Id: I3f3a45beed777cc4af59368dccd9dc7bb8181c37 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1283729 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 4984af46..79eee0e5 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -407,6 +407,25 @@ static void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data) gk20a_dbg_fn("done"); } + +static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data) +{ + u32 val; + + gk20a_dbg_fn(""); + + if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) + val = 0; + else + val = 0xffffffff; + + /* setup sm warp esr report masks */ + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(), val); + + /* setup sm global esr report mask */ + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), val); +} + static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { @@ -415,7 +434,7 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, if (class_num == VOLTA_COMPUTE_A) { switch (offset << 2) { case NVC0C0_SET_SHADER_EXCEPTIONS: - gk20a_gr_set_shader_exceptions(g, data); + gv11b_gr_set_shader_exceptions(g, data); break; default: goto fail; @@ -425,7 +444,7 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, if (class_num == VOLTA_A) { switch (offset << 2) { case NVC397_SET_SHADER_EXCEPTIONS: - gk20a_gr_set_shader_exceptions(g, data); + gv11b_gr_set_shader_exceptions(g, data); break; case NVC397_SET_CIRCULAR_BUFFER_SIZE: g->ops.gr.set_circular_buffer_size(g, data); -- cgit v1.2.2 From 3a4a7d196a6a7fd256052cded45d6763c20dd3f6 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 18 Jan 2017 18:32:09 +0530 Subject: drivers: gpu: nvgpu: Use soc/tegra/fuse.h for fuse header The fuse headers are unified and moved all the content of linux/tegra-fuse.h to the soc/tegra/fuse.h to have the single fuse header for Tegra. Use unified fuse header soc/tegra/fuse.h. bug 200260692 Change-Id: Ied87164ea1de793d97a4cc6a754150164af04698 Signed-off-by: Laxman Dewangan Reviewed-on: http://git-master/r/1287500 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 79eee0e5..10b1aebb 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -16,8 +16,8 @@ #include #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ #include -#include #include +#include #include -- cgit v1.2.2 From 4f3871309d5216b50179feed8f8024193b2224cf Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Fri, 13 Jan 2017 13:34:24 -0800 Subject: gpu: nvgpu: gv11b: restore golden context Restore golden context correctly with subcontext header. Increase subctx header size to hold complete golden context. Also fill function pointer for freeing context header. Bug 1834201 Change-Id: Id8a3437bc437fef02ee15333c1163290217d34d1 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1282440 Reviewed-by: Alex Waterman Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 10b1aebb..dfb46701 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1853,8 +1853,6 @@ static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) /* point this address to engine_wfi_ptr */ gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), ram_in_engine_cs_wfi_v() | - ram_in_engine_wfi_target_f( - ram_in_engine_wfi_target_sys_mem_ncoh_v()) | ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) | ram_in_engine_wfi_ptr_lo_f(addr_lo)); @@ -1919,6 +1917,29 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g, return 0; } +static void gv11b_restore_context_header(struct gk20a *g, + struct mem_desc *ctxheader) +{ + u32 va_lo, va_hi; + struct gr_gk20a *gr = &g->gr; + + va_hi = gk20a_mem_rd(g, ctxheader, + ctxsw_prog_main_image_context_buffer_ptr_hi_o()); + va_lo = gk20a_mem_rd(g, ctxheader, + ctxsw_prog_main_image_context_buffer_ptr_o()); + gk20a_mem_wr_n(g, ctxheader, 0, + gr->ctx_vars.local_golden_image, + gr->ctx_vars.golden_image_size); + gk20a_mem_wr(g, ctxheader, + ctxsw_prog_main_image_context_buffer_ptr_hi_o(), va_hi); + gk20a_mem_wr(g, ctxheader, + ctxsw_prog_main_image_context_buffer_ptr_o(), va_lo); + gk20a_mem_wr(g, ctxheader, + ctxsw_prog_main_image_num_restore_ops_o(), 0); + gk20a_mem_wr(g, ctxheader, + ctxsw_prog_main_image_num_save_ops_o(), 0); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -1971,5 +1992,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering; gops->gr.commit_inst = gr_gv11b_commit_inst; + gops->gr.restore_context_header = gv11b_restore_context_header; } -- cgit v1.2.2 From 4b09997772f406d16945016ff4581c7c992faeab Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 12 Jan 2017 13:01:36 -0800 Subject: nvgpu: gpu: HW header update for Volta Similar HW header update as has been done for all the other chips. HW header files are located under: drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/ And can be included like so: #include Bug 1799159 Change-Id: If39bd71480a34f85bf25f4c36aec0f8f6de4dc9f Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1284433 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index dfb46701..31c1a420 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -13,32 +13,34 @@ * more details. */ -#include -#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ #include #include +#include +#include + #include #include +#include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" #include "gk20a/semaphore_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" #include "gm20b/gr_gm20b.h" + #include "gv11b/gr_gv11b.h" #include "gv11b/mm_gv11b.h" #include "gv11b/subctx_gv11b.h" -#include "hw_gr_gv11b.h" -#include "hw_fifo_gv11b.h" -#include "hw_proj_gv11b.h" -#include "hw_ctxsw_prog_gv11b.h" -#include "hw_mc_gv11b.h" -#include "hw_gr_gv11b.h" -#include "hw_ram_gv11b.h" -#include "hw_pbdma_gv11b.h" -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { -- cgit v1.2.2 From d00b2000b5bb2f39e3610b8321e0872e2b06bd0a Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Fri, 20 Jan 2017 16:18:47 -0800 Subject: gpu: nvgpu: gv11b: update zcull and pm pointers Update zcull and perfmon buffer pointers in context header. For gv11b maximum 49 bits gpu va possible. But, zcull and perfmon buffer pointers uses maximum 41 bit va address (258 bytes aligned). To accommodate this, high pointer registers needs to be updated in context header. JIRA GV11B-48 Change-Id: Ibe62b6bfedd32c4f3721e4d19d96cce58ef0f366 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1291852 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: svccoveritychecker --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 31c1a420..daadef68 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1941,6 +1941,34 @@ static void gv11b_restore_context_header(struct gk20a *g, gk20a_mem_wr(g, ctxheader, ctxsw_prog_main_image_num_save_ops_o(), 0); } +static void gr_gv11b_write_zcull_ptr(struct gk20a *g, + struct mem_desc *mem, u64 gpu_va) +{ + u32 va_lo, va_hi; + + gpu_va = gpu_va >> 8; + va_lo = u64_lo32(gpu_va); + va_hi = u64_hi32(gpu_va); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_zcull_ptr_o(), va_lo); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi); +} + + +static void gr_gv11b_write_pm_ptr(struct gk20a *g, + struct mem_desc *mem, u64 gpu_va) +{ + u32 va_lo, va_hi; + + gpu_va = gpu_va >> 8; + va_lo = u64_lo32(gpu_va); + va_hi = u64_hi32(gpu_va); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_pm_ptr_o(), va_lo); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); +} void gv11b_init_gr(struct gpu_ops *gops) { @@ -1995,5 +2023,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gr_gv11b_program_sm_id_numbering; gops->gr.commit_inst = gr_gv11b_commit_inst; gops->gr.restore_context_header = gv11b_restore_context_header; + gops->gr.write_zcull_ptr = gr_gv11b_write_zcull_ptr; + gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; } -- cgit v1.2.2 From f04a84b7ce976e911bf81497796016e149d17082 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 6 Feb 2017 15:57:12 -0800 Subject: gpu: nvgpu: gv11b: chip specific init_elcg_mode Added thermal registers for gv11b. Implemented chip specific init_elcg_mode. In thermal control register, engine power auto control config is removed and added new field for engine holdoff enable signal. JIRA GV11B-58 Change-Id: I412d9a232800d25efbdb0a40f14949d3f085fb0e Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1300119 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index daadef68..3c332bbf 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -41,6 +41,7 @@ #include #include #include +#include static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { @@ -1970,6 +1971,39 @@ static void gr_gv11b_write_pm_ptr(struct gk20a *g, ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); } +void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) +{ + u32 gate_ctrl; + + gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); + + switch (mode) { + case ELCG_RUN: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_clk_m(), + therm_gate_ctrl_eng_clk_run_f()); + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_idle_holdoff_m(), + therm_gate_ctrl_idle_holdoff_on_f()); + break; + case ELCG_STOP: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_clk_m(), + therm_gate_ctrl_eng_clk_stop_f()); + break; + case ELCG_AUTO: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_clk_m(), + therm_gate_ctrl_eng_clk_auto_f()); + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid elcg mode %d", mode); + } + + gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2025,5 +2059,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.restore_context_header = gv11b_restore_context_header; gops->gr.write_zcull_ptr = gr_gv11b_write_zcull_ptr; gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; + gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; } -- cgit v1.2.2 From edad02b1b0071aa9fa0eac53d275e08a0051dca1 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Fri, 10 Feb 2017 16:15:42 -0800 Subject: gpu: nvgpu: Organize semaphore_gk20a.[ch] t19x version. Bug 1799159 Change-Id: I5e4c2a5341909d2e366ebc15adb4cdce70d695c7 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1303264 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3c332bbf..c5390fe4 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -24,7 +24,6 @@ #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" #include "gm20b/gr_gm20b.h" -- cgit v1.2.2 From 207e2ac7d12e62df476f4828136a4c15e156f8a6 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 22 Feb 2017 10:04:31 -0800 Subject: gpu: nvgpu: gv11b: reading max veid number To get maximum number of subctx, sw should read NV_PGRAPH_PRI_FE_CHIP_DEF_INFO_MAX_VEID_COUNT instead of LITTER_NUM_SUBCTX. JIRA GV11B-72 Change-Id: I4d675ba49d8a600da77e7b60da449d9e5ba48971 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1309591 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Seema Khowala GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index c5390fe4..a67b1476 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1657,8 +1657,10 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) struct av_list_gk20a *sw_veid_bundle_init = &g->gr.ctx_vars.sw_veid_bundle_init; u32 j; - u32 num_subctx = nvgpu_get_litter_value(g, GPU_LIT_NUM_SUBCTX); - u32 err = 0; + u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); + u32 num_subctx, err = 0; + + num_subctx = gr_pri_fe_chip_def_info_max_veid_count_v(data); for (j = 0; j < num_subctx; j++) { -- cgit v1.2.2 From 58c72012f4a8b554083cdf7ea8061b98e8de02dc Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 23 Feb 2017 10:17:52 -0800 Subject: gpu: nvgpu: gv11b: add gr ops for load tpc mask gr_fe_tpc_fs_r addr is different for t19x Change-Id: Ibae4b7224ffbd4d8366890cd05649b1b66e22f02 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1310327 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a67b1476..c512322b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2005,6 +2005,11 @@ void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); } +static void gr_gv11b_load_tpc_mask(struct gk20a *g) +{ + /* TODO */ +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2061,5 +2066,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.write_zcull_ptr = gr_gv11b_write_zcull_ptr; gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; + gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; } -- cgit v1.2.2 From a29521e8d84241ea27b83f14f80547a2da601a97 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Fri, 3 Mar 2017 14:31:39 -0800 Subject: gpu: nvgpu: gv11b: add tpc floor sweeping code Added TPC floor sweeping code for gv11b. JIRA GV11B-73 Change-Id: Ib0d05e2d606fe13dff119948fbc9022e8af0fa7f Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1315257 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: Navneet Kumar --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index c512322b..fabc6819 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2007,7 +2007,32 @@ void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) static void gr_gv11b_load_tpc_mask(struct gk20a *g) { - /* TODO */ + u32 pes_tpc_mask = 0, fuse_tpc_mask; + u32 gpc, pes, val; + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); + + /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ + for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { + pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << + num_tpc_per_gpc * gpc; + } + } + + gk20a_dbg_info("pes_tpc_mask %u\n", pes_tpc_mask); + fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, gpc); + if (g->tpc_fs_mask_user && + g->tpc_fs_mask_user != fuse_tpc_mask && + fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) { + val = g->tpc_fs_mask_user; + val &= (0x1U << g->gr.max_tpc_count) - 1U; + val = (0x1U << hweight32(val)) - 1U; + gk20a_writel(g, gr_fe_tpc_fs_r(0), val); + } else { + gk20a_writel(g, gr_fe_tpc_fs_r(0), pes_tpc_mask); + } + } void gv11b_init_gr(struct gpu_ops *gops) -- cgit v1.2.2 From e1cad55e529aa44d1d3646de83e563e037d80b67 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 17 Mar 2017 11:32:41 -0700 Subject: gpu: nvgpu: gv11b: null check for fault_ch gk20a_gr_get_channel_from_ctx() could return NULL as a result fault_ch could be null JIRA GPUT19X-7 Change-Id: If89507d3d3fa5a95ba75c4a90eb212d0c8b2214a Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1323255 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index fabc6819..d109dbf8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1413,8 +1413,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, bool *early_exit, bool *ignore_debugger) { int ret; - bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == - NVGPU_COMPUTE_PREEMPTION_MODE_CILP) ; + bool cilp_enabled = false; u32 global_mask = 0, dbgr_control0, global_esr_copy; u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; @@ -1422,6 +1421,10 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, *early_exit = false; *ignore_debugger = false; + if (fault_ch) + cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == + NVGPU_COMPUTE_PREEMPTION_MODE_CILP); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", gpc, tpc, global_esr); -- cgit v1.2.2 From 633d331ae2db50fbcce829fe324c19fc44b82c24 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 22 Mar 2017 10:00:24 -0700 Subject: gpu: nvgpu: Rename gk20a_mem_* functions Rename the functions used for mem_desc access to nvgpu_mem_*. JIRA NVGPU-12 Change-Id: I5a1180c9a08d33c3dfc361ce8579c3c767fa5656 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1326193 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d109dbf8..ddaaa350 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -780,36 +780,36 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, { struct mem_desc *mem = &gr_ctx->mem; - if (gk20a_mem_begin(g, mem)) { + if (nvgpu_mem_begin(g, mem)) { WARN_ON("Cannot map context"); return; } gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_magic_value_o()), ctxsw_prog_main_image_magic_value_v_value_v()); gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_save_ops_o())); gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_wfi_save_ops_o())); gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_cta_save_ops_o())); gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_gfxp_save_ops_o())); gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_cilp_save_ops_o())); gk20a_err(dev_from_gk20a(g), "image gfx preemption option (GFXP is 1) %x\n", - gk20a_mem_rd(g, mem, + nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o())); - gk20a_mem_end(g, mem); + nvgpu_mem_end(g, mem); } static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, @@ -847,13 +847,13 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); - gk20a_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(), + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(), gfxp_preempt_option); } if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { gk20a_dbg_info("CILP: %x", cilp_preempt_option); - gk20a_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(), + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(), cilp_preempt_option); } @@ -862,7 +862,7 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, u32 size; u32 cbes_reserve; - gk20a_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(), + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(), gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); @@ -1858,12 +1858,12 @@ static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) addr_hi = u64_hi32(ctx->mem.gpu_va); /* point this address to engine_wfi_ptr */ - gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), + nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), ram_in_engine_cs_wfi_v() | ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) | ram_in_engine_wfi_ptr_lo_f(addr_lo)); - gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(), + nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(), ram_in_engine_wfi_ptr_hi_f(addr_hi)); return 0; @@ -1930,20 +1930,20 @@ static void gv11b_restore_context_header(struct gk20a *g, u32 va_lo, va_hi; struct gr_gk20a *gr = &g->gr; - va_hi = gk20a_mem_rd(g, ctxheader, + va_hi = nvgpu_mem_rd(g, ctxheader, ctxsw_prog_main_image_context_buffer_ptr_hi_o()); - va_lo = gk20a_mem_rd(g, ctxheader, + va_lo = nvgpu_mem_rd(g, ctxheader, ctxsw_prog_main_image_context_buffer_ptr_o()); - gk20a_mem_wr_n(g, ctxheader, 0, + nvgpu_mem_wr_n(g, ctxheader, 0, gr->ctx_vars.local_golden_image, gr->ctx_vars.golden_image_size); - gk20a_mem_wr(g, ctxheader, + nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_context_buffer_ptr_hi_o(), va_hi); - gk20a_mem_wr(g, ctxheader, + nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_context_buffer_ptr_o(), va_lo); - gk20a_mem_wr(g, ctxheader, + nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_num_restore_ops_o(), 0); - gk20a_mem_wr(g, ctxheader, + nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_num_save_ops_o(), 0); } static void gr_gv11b_write_zcull_ptr(struct gk20a *g, @@ -1954,9 +1954,9 @@ static void gr_gv11b_write_zcull_ptr(struct gk20a *g, gpu_va = gpu_va >> 8; va_lo = u64_lo32(gpu_va); va_hi = u64_hi32(gpu_va); - gk20a_mem_wr(g, mem, + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_zcull_ptr_o(), va_lo); - gk20a_mem_wr(g, mem, + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi); } @@ -1969,9 +1969,9 @@ static void gr_gv11b_write_pm_ptr(struct gk20a *g, gpu_va = gpu_va >> 8; va_lo = u64_lo32(gpu_va); va_hi = u64_hi32(gpu_va); - gk20a_mem_wr(g, mem, + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), va_lo); - gk20a_mem_wr(g, mem, + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); } -- cgit v1.2.2 From c876bec8bab5a1e4d6dea529700ef19c5eac5e64 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 22 Mar 2017 10:01:14 -0700 Subject: gpu: nvgpu: rename mem_desc to nvgpu_mem $ find -type f | \ xargs sed -i 's/struct mem_desc/struct nvgpu_mem/g' JIRA NVGPU-12 Change-Id: I2b5d015e45185269bfae7c6d4199fe843ff26834 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1326194 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index ddaaa350..6e6b5630 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -642,7 +642,7 @@ static int gr_gv11b_init_ctx_state(struct gk20a *g) } int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, - struct mem_desc *mem) + struct nvgpu_mem *mem) { int err; @@ -778,7 +778,7 @@ fail_free_gk20a_ctx: static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) { - struct mem_desc *mem = &gr_ctx->mem; + struct nvgpu_mem *mem = &gr_ctx->mem; if (nvgpu_mem_begin(g, mem)) { WARN_ON("Cannot map context"); @@ -834,7 +834,7 @@ static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, - struct mem_desc *mem) + struct nvgpu_mem *mem) { struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; u32 gfxp_preempt_option = @@ -1925,7 +1925,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g, } static void gv11b_restore_context_header(struct gk20a *g, - struct mem_desc *ctxheader) + struct nvgpu_mem *ctxheader) { u32 va_lo, va_hi; struct gr_gk20a *gr = &g->gr; @@ -1947,7 +1947,7 @@ static void gv11b_restore_context_header(struct gk20a *g, ctxsw_prog_main_image_num_save_ops_o(), 0); } static void gr_gv11b_write_zcull_ptr(struct gk20a *g, - struct mem_desc *mem, u64 gpu_va) + struct nvgpu_mem *mem, u64 gpu_va) { u32 va_lo, va_hi; @@ -1962,7 +1962,7 @@ static void gr_gv11b_write_zcull_ptr(struct gk20a *g, static void gr_gv11b_write_pm_ptr(struct gk20a *g, - struct mem_desc *mem, u64 gpu_va) + struct nvgpu_mem *mem, u64 gpu_va) { u32 va_lo, va_hi; -- cgit v1.2.2 From 26159aed046393e5d01060323a76f838d2f5bba3 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Tue, 4 Apr 2017 23:59:09 +0100 Subject: gpu: nvgpu: Move DMA API to dma.h Handle gv11b specific of gpu: nvgpu: Move DMA API to dma.h In the main nvgpu repository. JIRA NVGPU-12 Change-Id: I9645c4eedc5f61585d15caeee54db92bc4cca079 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1455212 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker Reviewed-by: Konsta Holtta Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 6e6b5630..a9f0d1e0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -21,6 +21,7 @@ #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" -- cgit v1.2.2 From 1a426c981c4fa2816d969b27163ab2dbc2fa4e89 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 5 Apr 2017 00:08:52 +0100 Subject: gpu: nvgpu: Rename nvgpu DMA APIs gv11b changes to go along with gpu: nvgpu: Rename nvgpu DMA APIs In the main nvgpu repo. JIRA NVGPU-12 Change-Id: I5e28b13448d171e1511ace0842e53700385f8489 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1455213 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker Reviewed-by: Konsta Holtta Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a9f0d1e0..f4e31ec0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -649,7 +649,7 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, gk20a_dbg_fn(""); - err = gk20a_gmmu_alloc_sys(vm->mm->g, size, mem); + err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem); if (err) return err; @@ -669,7 +669,7 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, return 0; fail_free: - gk20a_gmmu_free(vm->mm->g, mem); + nvgpu_dma_free(vm->mm->g, mem); return err; } @@ -764,11 +764,11 @@ static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, return err; fail_free_betacb: - gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer); fail_free_spill: - gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer); fail_free_preempt: - gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer); fail_free_gk20a_ctx: gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); *gr_ctx = NULL; @@ -824,10 +824,10 @@ static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close) dump_ctx_switch_stats(g, vm, gr_ctx); - gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); - gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); - gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); - gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); gr_gk20a_free_gr_ctx(g, vm, gr_ctx); gk20a_dbg_fn("done"); } -- cgit v1.2.2 From 7fe4b6572ba80dda58d513969b69e22437901077 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Mon, 10 Apr 2017 11:09:13 -0700 Subject: gpu: nvgpu: gv11b: Use new error macros gk20a_err() and gk20a_warn() require a struct device pointer, which is not portable across operating systems. The new nvgpu_err() and nvgpu_warn() macros take struct gk20a pointer. Convert code to use the more portable macros. JIRA NVGPU-16 Change-Id: I8c0d8944f625e3c5b16a9f5a2a59d95a680f4e55 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/1459822 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker Reviewed-by: Alex Waterman GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 63 +++++++++++++++----------------------- 1 file changed, 25 insertions(+), 38 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f4e31ec0..5b0526b0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -22,6 +22,7 @@ #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" @@ -234,8 +235,7 @@ static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, u32 index = query_params->index_size; if (index >= GK20A_ZBC_TABLE_SIZE) { - gk20a_err(dev_from_gk20a(g), - "invalid zbc stencil table index\n"); + nvgpu_err(g, "invalid zbc stencil table index"); return -EINVAL; } query_params->depth = gr->zbc_s_tbl[index].stencil; @@ -332,8 +332,7 @@ static int gr_gv11b_load_stencil_default_tbl(struct gk20a *g, if (!err) { gr->max_default_s_index = 3; } else { - gk20a_err(dev_from_gk20a(g), - "fail to load default zbc stencil table\n"); + nvgpu_err(g, "fail to load default zbc stencil table"); return err; } @@ -628,8 +627,7 @@ static int gr_gv11b_init_ctx_state(struct gk20a *g) op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size; err = gr_gk20a_submit_fecs_method_op(g, op, false); if (err) { - gk20a_err(dev_from_gk20a(g), - "query preempt image size failed"); + nvgpu_err(g, "query preempt image size failed"); return err; } } @@ -717,8 +715,7 @@ static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, g->gr.t18x.ctx_vars.preempt_image_size, &(*gr_ctx)->t18x.preempt_ctxsw_buffer); if (err) { - gk20a_err(dev_from_gk20a(vm->mm->g), - "cannot allocate preempt buffer"); + nvgpu_err(vm->mm->g, "cannot allocate preempt buffer"); goto fail_free_gk20a_ctx; } @@ -726,8 +723,7 @@ static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, spill_size, &(*gr_ctx)->t18x.spill_ctxsw_buffer); if (err) { - gk20a_err(dev_from_gk20a(vm->mm->g), - "cannot allocate spill buffer"); + nvgpu_err(vm->mm->g, "cannot allocate spill buffer"); goto fail_free_preempt; } @@ -735,8 +731,7 @@ static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, attrib_cb_size, &(*gr_ctx)->t18x.betacb_ctxsw_buffer); if (err) { - gk20a_err(dev_from_gk20a(vm->mm->g), - "cannot allocate beta buffer"); + nvgpu_err(vm->mm->g, "cannot allocate beta buffer"); goto fail_free_spill; } @@ -744,8 +739,7 @@ static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, pagepool_size, &(*gr_ctx)->t18x.pagepool_ctxsw_buffer); if (err) { - gk20a_err(dev_from_gk20a(vm->mm->g), - "cannot allocate page pool"); + nvgpu_err(vm->mm->g, "cannot allocate page pool"); goto fail_free_betacb; } @@ -785,29 +779,28 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, WARN_ON("Cannot map context"); return; } - gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", + nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_magic_value_o()), ctxsw_prog_main_image_magic_value_v_value_v()); - gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", + nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_save_ops_o())); - gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", + nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_wfi_save_ops_o())); - gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", + nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_cta_save_ops_o())); - gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", + nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_gfxp_save_ops_o())); - gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", + nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_num_cilp_save_ops_o())); - gk20a_err(dev_from_gk20a(g), - "image gfx preemption option (GFXP is 1) %x\n", + nvgpu_err(g, "image gfx preemption option (GFXP is 1) %x", nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o())); nvgpu_mem_end(g, mem); @@ -868,8 +861,7 @@ static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); if (err) { - gk20a_err(dev_from_gk20a(g), - "can't map patch context"); + nvgpu_err(g, "can't map patch context"); goto out; } @@ -1118,7 +1110,7 @@ static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, } while (!nvgpu_timeout_expired(&timeout)); - gk20a_err(dev_from_gk20a(g), + nvgpu_err(g, "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); @@ -1272,15 +1264,13 @@ static int gr_gv11b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a ret = gk20a_disable_channel_tsg(g, fault_ch); if (ret) { - gk20a_err(dev_from_gk20a(g), - "CILP: failed to disable channel/TSG!\n"); + nvgpu_err(g, "CILP: failed to disable channel/TSG!"); return ret; } ret = g->ops.fifo.update_runlist(g, fault_ch->runlist_id, ~0, true, false); if (ret) { - gk20a_err(dev_from_gk20a(g), - "CILP: failed to restart runlist 0!"); + nvgpu_err(g, "CILP: failed to restart runlist 0!"); return ret; } @@ -1319,7 +1309,7 @@ static int gr_gv11b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk2 "CILP: looking up ctx id"); ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id); if (ret) { - gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n"); + nvgpu_err(g, "CILP: error looking up ctx id!"); return ret; } gr_ctx->t18x.ctx_id_valid = true; @@ -1343,8 +1333,7 @@ static int gr_gv11b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk2 .cond.fail = GR_IS_UCODE_OP_SKIP}); if (ret) { - gk20a_err(dev_from_gk20a(g), - "CILP: failed to enable ctxsw interrupt!"); + nvgpu_err(g, "CILP: failed to enable ctxsw interrupt!"); return ret; } @@ -1357,8 +1346,7 @@ static int gr_gv11b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk2 ret = gr_gv11b_disable_channel_or_tsg(g, fault_ch); if (ret) { - gk20a_err(dev_from_gk20a(g), - "CILP: failed to disable channel!!"); + nvgpu_err(g, "CILP: failed to disable channel!!"); return ret; } @@ -1472,7 +1460,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n"); ret = gr_gv11b_set_cilp_preempt_pending(g, fault_ch); if (ret) { - gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n"); + nvgpu_err(g, "CILP: error while setting CILP preempt pending!"); return ret; } @@ -1562,7 +1550,7 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, /* set preempt_pending to false */ ret = gr_gv11b_clear_cilp_preempt_pending(g, ch); if (ret) { - gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n"); + nvgpu_err(g, "CILP: error while unsetting CILP preempt pending!"); gk20a_channel_put(ch); goto clean_up; } @@ -2002,8 +1990,7 @@ void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) therm_gate_ctrl_eng_clk_auto_f()); break; default: - gk20a_err(dev_from_gk20a(g), - "invalid elcg mode %d", mode); + nvgpu_err(g, "invalid elcg mode %d", mode); } gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); -- cgit v1.2.2 From 7b5b4fc84ff0067fb0e7b5a3b86a4b16784e8ffa Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 9 Mar 2017 11:30:14 -0800 Subject: gpu: nvgpu: gv11b: function to get max veid Defined function to get max number of subcontexs supported and used it where max subcontext count required. JIRA GV11B-23 Change-Id: I4f6307162486bab1e91cbf66abfee7763c70fe7b Signed-off-by: seshendra Gadagottu Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1318146 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 5b0526b0..33a5067e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1649,10 +1649,9 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) struct av_list_gk20a *sw_veid_bundle_init = &g->gr.ctx_vars.sw_veid_bundle_init; u32 j; - u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); u32 num_subctx, err = 0; - num_subctx = gr_pri_fe_chip_def_info_max_veid_count_v(data); + num_subctx = gv11b_get_max_subctx_count(g); for (j = 0; j < num_subctx; j++) { -- cgit v1.2.2 From 7872900486bd31cf186930848adec46d0a13b68a Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 17 Apr 2017 21:20:19 +0100 Subject: gpu: nvgpu: Move Linux nvgpu_mem fields t19x part for this change in nvgpu. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I31116b4241076b39a6638273281630a1527bcd35 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1464109 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker Reviewed-by: Konsta Holtta GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 33a5067e..190e3df5 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -652,7 +652,7 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, return err; mem->gpu_va = gk20a_gmmu_map(vm, - &mem->sgt, + &mem->priv.sgt, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, -- cgit v1.2.2 From 92895a57a733cffe9f4318be9a5064c9e32350e1 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 27 Apr 2017 11:02:41 -0700 Subject: gpu: nvgpu: gv11b: fix error for static code analysis Functions that are not declared in header files are made static Bug 200299572 Change-Id: Ibf9e9cc9f48ad9ceaa202d1bb7ed57724057cda0 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1471538 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 190e3df5..7638baaf 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1963,7 +1963,7 @@ static void gr_gv11b_write_pm_ptr(struct gk20a *g, ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); } -void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) +static void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) { u32 gate_ctrl; -- cgit v1.2.2 From 4b990224df5ab7bc2fc92a66ba8e5d9bba023ca8 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Tue, 24 Jan 2017 15:39:29 -0800 Subject: gpu: nvgpu: gv11b: changes related to preemeption Added function pointers to check chip specific valid gfx class and compute class. Also added function pointer to update ctx header with preemption buffer pointers. Also fall back to gp10b functions, where nothing is changed from gp10b to gv11b. Bug 200292090 Change-Id: I69900e32bbcce4576c4c0f7a7119c7dd8e984928 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1293503 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 502 +++++-------------------------------- 1 file changed, 60 insertions(+), 442 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7638baaf..b8993052 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -73,6 +73,41 @@ static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) return valid; } +static bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) +{ + bool valid = false; + + switch (class_num) { + case VOLTA_A: + case PASCAL_A: + case MAXWELL_B: + valid = true; + break; + + default: + break; + } + return valid; +} + +static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) +{ + bool valid = false; + + switch (class_num) { + case VOLTA_COMPUTE_A: + case PASCAL_COMPUTE_A: + case MAXWELL_COMPUTE_B: + valid = true; + break; + + default: + break; + } + return valid; +} + + static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) @@ -113,122 +148,6 @@ static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, return 0; } -static int gr_gv11b_commit_global_cb_manager(struct gk20a *g, - struct channel_gk20a *c, bool patch) -{ - struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; - u32 attrib_offset_in_chunk = 0; - u32 alpha_offset_in_chunk = 0; - u32 pd_ab_max_output; - u32 gpc_index, ppc_index; - u32 temp, temp2; - u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate; - u32 attrib_size_in_chunk, cb_attrib_cache_size_init; - - gk20a_dbg_fn(""); - - if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { - attrib_size_in_chunk = gr->attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); - cb_attrib_cache_size_init = gr->attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); - } else { - attrib_size_in_chunk = gr->attrib_cb_size; - cb_attrib_cache_size_init = gr->attrib_cb_default_size; - } - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), - gr->attrib_cb_default_size, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), - gr->alpha_cb_default_size, patch); - - pd_ab_max_output = (gr->alpha_cb_default_size * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / - gr_pd_ab_dist_cfg1_max_output_granularity_v(); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), - gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | - gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); - - attrib_offset_in_chunk = alpha_offset_in_chunk + - gr->tpc_count * gr->alpha_cb_size; - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - temp = proj_gpc_stride_v() * gpc_index; - temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { - cbm_cfg_size_beta = cb_attrib_cache_size_init * - gr->pes_tpc_count[ppc_index][gpc_index]; - cbm_cfg_size_alpha = gr->alpha_cb_default_size * - gr->pes_tpc_count[ppc_index][gpc_index]; - cbm_cfg_size_steadystate = gr->attrib_cb_default_size * - gr->pes_tpc_count[ppc_index][gpc_index]; - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, - cbm_cfg_size_beta, patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, - attrib_offset_in_chunk, patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, - cbm_cfg_size_steadystate, - patch); - - attrib_offset_in_chunk += attrib_size_in_chunk * - gr->pes_tpc_count[ppc_index][gpc_index]; - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, - cbm_cfg_size_alpha, patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, - alpha_offset_in_chunk, patch); - - alpha_offset_in_chunk += gr->alpha_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index]; - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), - gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), - patch); - } - } - - return 0; -} - -static void gr_gv11b_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, - u64 addr, u32 size, bool patch) -{ - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), - gr_scc_pagepool_base_addr_39_8_f(addr), patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), - gr_scc_pagepool_total_pages_f(size) | - gr_scc_pagepool_valid_true_f(), patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), - gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), - gr_gpcs_gcc_pagepool_total_pages_f(size), patch); -} - static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, struct zbc_query_params *query_params) { @@ -604,42 +523,6 @@ static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) } } -static int gr_gv11b_init_ctx_state(struct gk20a *g) -{ - struct fecs_method_op_gk20a op = { - .mailbox = { .id = 0, .data = 0, - .clr = ~0, .ok = 0, .fail = 0}, - .method.data = 0, - .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, - .cond.fail = GR_IS_UCODE_OP_SKIP, - }; - int err; - - gk20a_dbg_fn(""); - - err = gr_gk20a_init_ctx_state(g); - if (err) - return err; - - if (!g->gr.t18x.ctx_vars.preempt_image_size) { - op.method.addr = - gr_fecs_method_push_adr_discover_preemption_image_size_v(); - op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size; - err = gr_gk20a_submit_fecs_method_op(g, op, false); - if (err) { - nvgpu_err(g, "query preempt image size failed"); - return err; - } - } - - gk20a_dbg_info("preempt image size: %u", - g->gr.t18x.ctx_vars.preempt_image_size); - - gk20a_dbg_fn("done"); - - return 0; -} - int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem) { @@ -671,253 +554,6 @@ fail_free: return err; } -static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, - u32 class, - u32 flags) -{ - int err; - - gk20a_dbg_fn(""); - - err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); - if (err) - return err; - - (*gr_ctx)->t18x.ctx_id_valid = false; - - if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) - flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; - - if (class == PASCAL_COMPUTE_A && - g->gr.t18x.ctx_vars.force_preemption_cilp) - flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP; - - if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) { - u32 spill_size = - gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * - gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); - u32 pagepool_size = g->ops.gr.pagepool_default_size(g) * - gr_scc_pagepool_total_pages_byte_granularity_v(); - u32 betacb_size = g->gr.attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); - u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - g->gr.max_tpc_count; - attrib_cb_size = ALIGN(attrib_cb_size, 128); - - gk20a_dbg_info("gfxp context spill_size=%d", spill_size); - gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size); - gk20a_dbg_info("gfxp context attrib_cb_size=%d", - attrib_cb_size); - err = gr_gv11b_alloc_buffer(vm, - g->gr.t18x.ctx_vars.preempt_image_size, - &(*gr_ctx)->t18x.preempt_ctxsw_buffer); - if (err) { - nvgpu_err(vm->mm->g, "cannot allocate preempt buffer"); - goto fail_free_gk20a_ctx; - } - - err = gr_gv11b_alloc_buffer(vm, - spill_size, - &(*gr_ctx)->t18x.spill_ctxsw_buffer); - if (err) { - nvgpu_err(vm->mm->g, "cannot allocate spill buffer"); - goto fail_free_preempt; - } - - err = gr_gv11b_alloc_buffer(vm, - attrib_cb_size, - &(*gr_ctx)->t18x.betacb_ctxsw_buffer); - if (err) { - nvgpu_err(vm->mm->g, "cannot allocate beta buffer"); - goto fail_free_spill; - } - - err = gr_gv11b_alloc_buffer(vm, - pagepool_size, - &(*gr_ctx)->t18x.pagepool_ctxsw_buffer); - if (err) { - nvgpu_err(vm->mm->g, "cannot allocate page pool"); - goto fail_free_betacb; - } - - (*gr_ctx)->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; - } - - if (class == PASCAL_COMPUTE_A) { - if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) - (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; - else - (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA; - } - - gk20a_dbg_fn("done"); - - return err; - -fail_free_betacb: - nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer); -fail_free_spill: - nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer); -fail_free_preempt: - nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer); -fail_free_gk20a_ctx: - gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); - *gr_ctx = NULL; - - return err; -} - -static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) -{ - struct nvgpu_mem *mem = &gr_ctx->mem; - - if (nvgpu_mem_begin(g, mem)) { - WARN_ON("Cannot map context"); - return; - } - nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_magic_value_o()), - ctxsw_prog_main_image_magic_value_v_value_v()); - - - nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_save_ops_o())); - nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_wfi_save_ops_o())); - nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_cta_save_ops_o())); - nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_gfxp_save_ops_o())); - nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_cilp_save_ops_o())); - nvgpu_err(g, "image gfx preemption option (GFXP is 1) %x", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_graphics_preemption_options_o())); - nvgpu_mem_end(g, mem); -} - -static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) -{ - gk20a_dbg_fn(""); - - if (!gr_ctx) - return; - - if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close) - dump_ctx_switch_stats(g, vm, gr_ctx); - - nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); - gr_gk20a_free_gr_ctx(g, vm, gr_ctx); - gk20a_dbg_fn("done"); -} - - -static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, - struct nvgpu_mem *mem) -{ - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; - u32 gfxp_preempt_option = - ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); - u32 cilp_preempt_option = - ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(); - int err; - - gk20a_dbg_fn(""); - - if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { - gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(), - gfxp_preempt_option); - } - - if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { - gk20a_dbg_info("CILP: %x", cilp_preempt_option); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(), - cilp_preempt_option); - } - - if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) { - u32 addr; - u32 size; - u32 cbes_reserve; - - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(), - gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); - - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); - if (err) { - nvgpu_err(g, "can't map patch context"); - goto out; - } - - addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >> - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | - (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) << - (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); - - gk20a_dbg_info("attrib cb addr : 0x%016x", addr); - g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); - - addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >> - gr_scc_pagepool_base_addr_39_8_align_bits_v()) | - (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) << - (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); - size = gr_ctx->t18x.pagepool_ctxsw_buffer.size; - - if (size == g->ops.gr.pagepool_default_size(g)) - size = gr_scc_pagepool_total_pages_hwmax_v(); - - g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); - - addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >> - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | - (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) << - (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v())); - size = gr_ctx->t18x.spill_ctxsw_buffer.size / - gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); - - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_swdx_rm_spill_buffer_addr_r(), - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), - true); - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpc0_swdx_rm_spill_buffer_size_r(), - gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), - true); - - cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpcs_swdx_beta_cb_ctrl_r(), - gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( - cbes_reserve), - true); - gr_gk20a_ctx_patch_write(g, ch_ctx, - gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), - gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( - cbes_reserve), - true); - - gr_gk20a_ctx_patch_write_end(g, ch_ctx); - } - -out: - gk20a_dbg_fn("done"); -} static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) @@ -1145,41 +781,6 @@ static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } -static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, - u64 addr, u64 size, bool patch) -{ - u32 data; - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), - gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), - gr_scc_bundle_cb_size_div_256b_f(size) | - gr_scc_bundle_cb_size_valid_true_f(), patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), - gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), - gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | - gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); - - /* data for state_limit */ - data = (g->gr.bundle_cb_default_size * - gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / - gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); - - data = min_t(u32, data, g->gr.min_gpm_fifo_depth); - - gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", - g->gr.bundle_cb_token_limit, data); - - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), - gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | - gr_pd_ab_dist_cfg2_state_limit_f(data), patch); -} - static int gr_gv11b_init_fs_state(struct gk20a *g) { return gr_gp10b_init_fs_state(g); @@ -2025,6 +1626,28 @@ static void gr_gv11b_load_tpc_mask(struct gk20a *g) } +static void gr_gv11b_write_preemption_ptr(struct gk20a *g, + struct nvgpu_mem *mem, u64 gpu_va) +{ + u32 addr_lo, addr_hi; + + addr_lo = u64_lo32(gpu_va); + addr_hi = u64_hi32(gpu_va); + + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi); + + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(), + addr_hi); + +} + + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2032,8 +1655,9 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.init_fs_state = gr_gv11b_init_fs_state; gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; gops->gr.is_valid_class = gr_gv11b_is_valid_class; - gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; - gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; + gops->gr.is_valid_gfx_class = gr_gv11b_is_valid_gfx_class; + gops->gr.is_valid_compute_class = gr_gv11b_is_valid_compute_class; + gops->gr.write_preemption_ptr = gr_gv11b_write_preemption_ptr; gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; @@ -2043,7 +1667,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.calc_global_ctx_buffer_size = gr_gv11b_calc_global_ctx_buffer_size; gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; - gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb; gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; gops->gr.cb_size_default = gr_gv11b_cb_size_default; @@ -2051,11 +1674,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gr_gv11b_set_alpha_circular_buffer_size; gops->gr.set_circular_buffer_size = gr_gv11b_set_circular_buffer_size; - gops->gr.init_ctx_state = gr_gv11b_init_ctx_state; - gops->gr.alloc_gr_ctx = gr_gv11b_alloc_gr_ctx; - gops->gr.free_gr_ctx = gr_gv11b_free_gr_ctx; - gops->gr.update_ctxsw_preemption_mode = - gr_gv11b_update_ctxsw_preemption_mode; gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; gops->gr.wait_empty = gr_gv11b_wait_empty; gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; -- cgit v1.2.2 From 44dcc5a53fabc68a32f16a1a3a46a2582b5b192b Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 17 Apr 2017 22:16:00 +0100 Subject: gpu: nvgpu: Separate GMMU out of mm_gk20a.c t19x version of same named patch in nvgpu. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I0b176577c0edcdcc587f22a6908045a960f830e2 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1464111 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index b8993052..46626bb7 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -534,8 +535,8 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, if (err) return err; - mem->gpu_va = gk20a_gmmu_map(vm, - &mem->priv.sgt, + mem->gpu_va = nvgpu_gmmu_map(vm, + mem, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, -- cgit v1.2.2 From 808af68d962b85594c2accd1069c6a2de35c50e4 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 16 May 2017 15:37:02 +0300 Subject: gpu: nvgpu: gv11b: check subctx header err codes React to possible errors in gr_gv11b_commit_inst() from allocating and updating subcontext header. Bug 1927306 Change-Id: I668e13ce13af296e9a7badb3b167fa7a7cd26212 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1483043 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Seshendra Gadagottu Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 46626bb7..179c7d33 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1436,12 +1436,17 @@ static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) u32 addr_lo; u32 addr_hi; struct ctx_header_desc *ctx; + int err; gk20a_dbg_fn(""); - gv11b_alloc_subctx_header(c); + err = gv11b_alloc_subctx_header(c); + if (err) + return err; - gv11b_update_subctx_header(c, gpu_va); + err = gv11b_update_subctx_header(c, gpu_va); + if (err) + return err; ctx = &c->ch_ctx.ctx_header; addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); -- cgit v1.2.2 From ffc37e50fa8e869e9a160b35f3cf414040e8a360 Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Wed, 10 May 2017 12:38:08 +0530 Subject: gpu: nvgpu: gv11b: Add L1 tags parity support This CL covers the following parity support (corrected + uncorrected), 1) SM's L1 tags 2) SM's S2R's pixel PRF buffer 3) SM's L1 D-cache miss latency FIFOs Volta Resiliency Id - Volta-720, Volta-721, Volta-637 JIRA GPUT19X-85 JIRA GPUT19X-104 JIRA GPUT19X-100 JIRA GPUT19X-103 Bug 1825948 Bug 1825962 Bug 1775457 Change-Id: I53d7231a36b2c7c252395eca27b349eca80dec63 Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1478881 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 87 +++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 179c7d33..ad34233c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -108,6 +108,89 @@ static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) return valid; } +static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 l1_tag_ecc_status, l1_tag_ecc_corrected_err_status = 0; + u32 l1_tag_ecc_uncorrected_err_status = 0; + u32 l1_tag_corrected_err_count_delta = 0; + u32 l1_tag_uncorrected_err_count_delta = 0; + bool is_l1_tag_ecc_corrected_total_err_overflow = 0; + bool is_l1_tag_ecc_uncorrected_total_err_overflow = 0; + + /* Check for L1 tag ECC errors. */ + l1_tag_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset); + l1_tag_ecc_corrected_err_status = l1_tag_ecc_status & + (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()); + l1_tag_ecc_uncorrected_err_status = l1_tag_ecc_status & + (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()); + + if ((l1_tag_ecc_corrected_err_status == 0) && (l1_tag_ecc_uncorrected_err_status == 0)) + return 0; + + l1_tag_corrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + + offset)); + l1_tag_uncorrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + + offset)); + is_l1_tag_ecc_corrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_total_counter_overflow_v(l1_tag_ecc_status); + is_l1_tag_ecc_uncorrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_total_counter_overflow_v(l1_tag_ecc_status); + + if ((l1_tag_corrected_err_count_delta > 0) || is_l1_tag_ecc_corrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "corrected error (SBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]", + l1_tag_ecc_corrected_err_status, is_l1_tag_ecc_corrected_total_err_overflow); + + /* HW uses 16-bits counter */ + l1_tag_corrected_err_count_delta += + (is_l1_tag_ecc_corrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count.counters[tpc] += + l1_tag_corrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, + 0); + } + if ((l1_tag_uncorrected_err_count_delta > 0) || is_l1_tag_ecc_uncorrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Uncorrected error (DBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]", + l1_tag_ecc_uncorrected_err_status, is_l1_tag_ecc_uncorrected_total_err_overflow); + + /* HW uses 16-bits counter */ + l1_tag_uncorrected_err_count_delta += + (is_l1_tag_ecc_uncorrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_l1_tag_uncorrected_err_count.counters[tpc] += + l1_tag_uncorrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, + 0); + } + + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset, + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_reset_task_f()); + + return 0; + +} static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, @@ -118,7 +201,8 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, proj_tpc_in_gpc_stride_v() * tpc; u32 lrf_ecc_status; - gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); + /* Check for L1 tag ECC errors. */ + gr_gv11b_handle_l1_tag_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); /* Check for LRF ECC errors. */ lrf_ecc_status = gk20a_readl(g, @@ -1692,6 +1776,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception; gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; + gops->gr.create_gr_sysfs = gr_gv11b_create_sysfs; gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; -- cgit v1.2.2 From d503a234440b0b5912f64314de68689b3211bbcd Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Mon, 15 May 2017 15:32:21 +0530 Subject: gpu: nvgpu: gv11b: Add LRF + CBU parity support This CL covers the following parity support (uncorrected error), 1) SM's LRF 2) SM's CBU Volta Resiliency Id - Volta-637 JIRA GPUT19X-85 JIRA GPUT19X-110 Bug 1775457 Change-Id: I3befb1fe22719d06aa819ef27654aaf97f911a9b Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1481791 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 187 +++++++++++++++++++++++++++++++++++-- 1 file changed, 180 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index ad34233c..d36aa6ec 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -192,24 +192,197 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, } +static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 lrf_ecc_status, lrf_ecc_corrected_err_status = 0; + u32 lrf_ecc_uncorrected_err_status = 0; + u32 lrf_corrected_err_count_delta = 0; + u32 lrf_uncorrected_err_count_delta = 0; + bool is_lrf_ecc_corrected_total_err_overflow = 0; + bool is_lrf_ecc_uncorrected_total_err_overflow = 0; + + /* Check for LRF ECC errors. */ + lrf_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); + lrf_ecc_corrected_err_status = lrf_ecc_status & + (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp0_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp1_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp2_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp3_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp4_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp5_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp6_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp7_m()); + lrf_ecc_uncorrected_err_status = lrf_ecc_status & + (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp0_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp1_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp2_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp3_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp4_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp5_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp6_m() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp7_m()); + + if ((lrf_ecc_corrected_err_status == 0) && (lrf_ecc_uncorrected_err_status == 0)) + return 0; + + lrf_corrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + + offset)); + lrf_uncorrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + + offset)); + is_lrf_ecc_corrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_total_counter_overflow_v(lrf_ecc_status); + is_lrf_ecc_uncorrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_total_counter_overflow_v(lrf_ecc_status); + + if ((lrf_corrected_err_count_delta > 0) || is_lrf_ecc_corrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "corrected error (SBE) detected in SM LRF! err_mask [%08x] is_overf [%d]", + lrf_ecc_corrected_err_status, is_lrf_ecc_corrected_total_err_overflow); + + /* HW uses 16-bits counter */ + lrf_corrected_err_count_delta += + (is_lrf_ecc_corrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); + g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += + lrf_corrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, + 0); + } + if ((lrf_uncorrected_err_count_delta > 0) || is_lrf_ecc_uncorrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Uncorrected error (DBE) detected in SM LRF! err_mask [%08x] is_overf [%d]", + lrf_ecc_uncorrected_err_status, is_lrf_ecc_uncorrected_total_err_overflow); + + /* HW uses 16-bits counter */ + lrf_uncorrected_err_count_delta += + (is_lrf_ecc_uncorrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); + g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += + lrf_uncorrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, + 0); + } + + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_reset_task_f()); + + return 0; + +} + +static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 cbu_ecc_status, cbu_ecc_corrected_err_status = 0; + u32 cbu_ecc_uncorrected_err_status = 0; + u32 cbu_corrected_err_count_delta = 0; + u32 cbu_uncorrected_err_count_delta = 0; + bool is_cbu_ecc_corrected_total_err_overflow = 0; + bool is_cbu_ecc_uncorrected_total_err_overflow = 0; + + /* Check for CBU ECC errors. */ + cbu_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_r() + offset); + cbu_ecc_corrected_err_status = cbu_ecc_status & + (gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_warp_sm0_m() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_warp_sm1_m() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_barrier_sm0_m() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_barrier_sm1_m()); + cbu_ecc_uncorrected_err_status = cbu_ecc_status & + (gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_warp_sm0_m() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_warp_sm1_m() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_barrier_sm0_m() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_barrier_sm1_m()); + + if ((cbu_ecc_corrected_err_status == 0) && (cbu_ecc_uncorrected_err_status == 0)) + return 0; + + cbu_corrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + + offset)); + cbu_uncorrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + + offset)); + is_cbu_ecc_corrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_total_counter_overflow_v(cbu_ecc_status); + is_cbu_ecc_uncorrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_total_counter_overflow_v(cbu_ecc_status); + + if ((cbu_corrected_err_count_delta > 0) || is_cbu_ecc_corrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "corrected error (SBE) detected in SM CBU! err_mask [%08x] is_overf [%d]", + cbu_ecc_corrected_err_status, is_cbu_ecc_corrected_total_err_overflow); + + /* HW uses 16-bits counter */ + cbu_corrected_err_count_delta += + (is_cbu_ecc_corrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_cbu_corrected_err_count.counters[tpc] += + cbu_corrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, + 0); + } + if ((cbu_uncorrected_err_count_delta > 0) || is_cbu_ecc_uncorrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Uncorrected error (DBE) detected in SM CBU! err_mask [%08x] is_overf [%d]", + cbu_ecc_uncorrected_err_status, is_cbu_ecc_uncorrected_total_err_overflow); + + /* HW uses 16-bits counter */ + cbu_uncorrected_err_count_delta += + (is_cbu_ecc_uncorrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_cbu_uncorrected_err_count.counters[tpc] += + cbu_uncorrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, + 0); + } + + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_status_r() + offset, + gr_pri_gpc0_tpc0_sm_cbu_ecc_status_reset_task_f()); + + return 0; + +} + static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) { int ret = 0; - u32 offset = proj_gpc_stride_v() * gpc + - proj_tpc_in_gpc_stride_v() * tpc; - u32 lrf_ecc_status; /* Check for L1 tag ECC errors. */ gr_gv11b_handle_l1_tag_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); /* Check for LRF ECC errors. */ - lrf_ecc_status = gk20a_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); + gr_gv11b_handle_lrf_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); + + /* Check for CBU ECC errors. */ + gr_gv11b_handle_cbu_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); - gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, - lrf_ecc_status); return ret; } -- cgit v1.2.2 From 5a08eafbe076fba98de62883636ee6b0751cf7e9 Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Wed, 17 May 2017 11:42:24 +0530 Subject: gpu: nvgpu: gv11b: Add L1 DATA + iCACHE parity This CL covers the following parity support (uncorrected error), 1) SM's L1 DATA 2) SM's L0 && L1 icache Volta Resiliency Id - Volta-634 JIRA GPUT19X-113 JIRA GPUT19X-99 Bug 1807553 Change-Id: Iacbf492028983529dadc5753007e43510b8cb786 Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1483681 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 170 +++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d36aa6ec..0c0b4261 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -368,6 +368,170 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, } +static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 l1_data_ecc_status, l1_data_ecc_corrected_err_status = 0; + u32 l1_data_ecc_uncorrected_err_status = 0; + u32 l1_data_corrected_err_count_delta = 0; + u32 l1_data_uncorrected_err_count_delta = 0; + bool is_l1_data_ecc_corrected_total_err_overflow = 0; + bool is_l1_data_ecc_uncorrected_total_err_overflow = 0; + + /* Check for L1 data ECC errors. */ + l1_data_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset); + l1_data_ecc_corrected_err_status = l1_data_ecc_status & + (gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_el1_0_m() | + gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_el1_1_m()); + l1_data_ecc_uncorrected_err_status = l1_data_ecc_status & + (gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_el1_0_m() | + gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_el1_1_m()); + + if ((l1_data_ecc_corrected_err_status == 0) && (l1_data_ecc_uncorrected_err_status == 0)) + return 0; + + l1_data_corrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + + offset)); + l1_data_uncorrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + + offset)); + is_l1_data_ecc_corrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_total_counter_overflow_v(l1_data_ecc_status); + is_l1_data_ecc_uncorrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_total_counter_overflow_v(l1_data_ecc_status); + + if ((l1_data_corrected_err_count_delta > 0) || is_l1_data_ecc_corrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "corrected error (SBE) detected in SM L1 data! err_mask [%08x] is_overf [%d]", + l1_data_ecc_corrected_err_status, is_l1_data_ecc_corrected_total_err_overflow); + + /* HW uses 16-bits counter */ + l1_data_corrected_err_count_delta += + (is_l1_data_ecc_corrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_l1_data_corrected_err_count.counters[tpc] += + l1_data_corrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, + 0); + } + if ((l1_data_uncorrected_err_count_delta > 0) || is_l1_data_ecc_uncorrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Uncorrected error (DBE) detected in SM L1 data! err_mask [%08x] is_overf [%d]", + l1_data_ecc_uncorrected_err_status, is_l1_data_ecc_uncorrected_total_err_overflow); + + /* HW uses 16-bits counter */ + l1_data_uncorrected_err_count_delta += + (is_l1_data_ecc_uncorrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_l1_data_uncorrected_err_count.counters[tpc] += + l1_data_uncorrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, + 0); + } + + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset, + gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_reset_task_f()); + + return 0; + +} + +static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 icache_ecc_status, icache_ecc_corrected_err_status = 0; + u32 icache_ecc_uncorrected_err_status = 0; + u32 icache_corrected_err_count_delta = 0; + u32 icache_uncorrected_err_count_delta = 0; + bool is_icache_ecc_corrected_total_err_overflow = 0; + bool is_icache_ecc_uncorrected_total_err_overflow = 0; + + /* Check for L0 && L1 icache ECC errors. */ + icache_ecc_status = gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset); + icache_ecc_corrected_err_status = icache_ecc_status & + (gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m() | + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m() | + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m() | + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()); + icache_ecc_uncorrected_err_status = icache_ecc_status & + (gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m() | + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m() | + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m() | + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()); + + if ((icache_ecc_corrected_err_status == 0) && (icache_ecc_uncorrected_err_status == 0)) + return 0; + + icache_corrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + + offset)); + icache_uncorrected_err_count_delta = + gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + + offset)); + is_icache_ecc_corrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_total_counter_overflow_v(icache_ecc_status); + is_icache_ecc_uncorrected_total_err_overflow = + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_total_counter_overflow_v(icache_ecc_status); + + if ((icache_corrected_err_count_delta > 0) || is_icache_ecc_corrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "corrected error (SBE) detected in SM L0 && L1 icache! err_mask [%08x] is_overf [%d]", + icache_ecc_corrected_err_status, is_icache_ecc_corrected_total_err_overflow); + + /* HW uses 16-bits counter */ + icache_corrected_err_count_delta += + (is_icache_ecc_corrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_icache_corrected_err_count.counters[tpc] += + icache_corrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, + 0); + } + if ((icache_uncorrected_err_count_delta > 0) || is_icache_ecc_uncorrected_total_err_overflow) { + gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, + "Uncorrected error (DBE) detected in SM L0 && L1 icache! err_mask [%08x] is_overf [%d]", + icache_ecc_uncorrected_err_status, is_icache_ecc_uncorrected_total_err_overflow); + + /* HW uses 16-bits counter */ + icache_uncorrected_err_count_delta += + (is_icache_ecc_uncorrected_total_err_overflow << + gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); + g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count.counters[tpc] += + icache_uncorrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, + 0); + } + + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset, + gr_pri_gpc0_tpc0_sm_icache_ecc_status_reset_task_f()); + + return 0; + +} + static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) @@ -383,6 +547,12 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, /* Check for CBU ECC errors. */ gr_gv11b_handle_cbu_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); + /* Check for L1 data ECC errors. */ + gr_gv11b_handle_l1_data_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); + + /* Check for L0 && L1 icache ECC errors. */ + gr_gv11b_handle_icache_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); + return ret; } -- cgit v1.2.2 From 45ca7cb8c5774cfc15015973b1883faa1d93b9e6 Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Fri, 19 May 2017 15:40:41 +0530 Subject: gpu: nvgpu: gv11b: Add GCC L1.5 parity support Add handling of GCC L1.5 parity exception. JIRA GPUT19X-86 Change-Id: Ie83fc306d3dff79b0ddaf2616dcf0ff71fccd4ca Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1485834 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 82 +++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 0c0b4261..014ba537 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -556,6 +556,84 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, return ret; } +static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 offset = gpc_stride * gpc; + u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0; + u32 gcc_l15_ecc_uncorrected_err_status = 0; + u32 gcc_l15_corrected_err_count_delta = 0; + u32 gcc_l15_uncorrected_err_count_delta = 0; + bool is_gcc_l15_ecc_corrected_total_err_overflow = 0; + bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0; + + /* Check for gcc l15 ECC errors. */ + gcc_l15_ecc_status = gk20a_readl(g, + gr_pri_gpc0_gcc_l15_ecc_status_r() + offset); + gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status & + (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() | + gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m()); + gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status & + (gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() | + gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m()); + + if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0)) + return 0; + + gcc_l15_corrected_err_count_delta = + gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + + offset)); + gcc_l15_uncorrected_err_count_delta = + gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v( + gk20a_readl(g, + gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + + offset)); + is_gcc_l15_ecc_corrected_total_err_overflow = + gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status); + is_gcc_l15_ecc_uncorrected_total_err_overflow = + gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status); + + if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, + "corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", + gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow); + + /* HW uses 16-bits counter */ + gcc_l15_corrected_err_count_delta += + (is_gcc_l15_ecc_corrected_total_err_overflow << + gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); + g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] += + gcc_l15_corrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, + 0); + } + if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, + "Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", + gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow); + + /* HW uses 16-bits counter */ + gcc_l15_uncorrected_err_count_delta += + (is_gcc_l15_ecc_uncorrected_total_err_overflow << + gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); + g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] += + gcc_l15_uncorrected_err_count_delta; + gk20a_writel(g, + gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, + 0); + } + + gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset, + gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f()); + + return 0; +} + static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -567,7 +645,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) tpc_mask = gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); - gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); + gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), + (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1))); } static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, @@ -2113,6 +2192,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; gops->gr.get_access_map = gr_gv11b_get_access_map; gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; + gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception; gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; -- cgit v1.2.2 From 2173add7ae7210606afdaa56995a61d012b9a2f1 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Fri, 12 May 2017 11:07:00 -0700 Subject: gpu: nvgpu: per-chip GPCCS exception support Adding support for ISR handling of GPCCS exceptions and GCC ECC support JIRA: GPUT19X-83 Change-Id: Ica749dc678f152d536052cf47f2ea2b205a231d6 Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1480997 Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 122 ++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 014ba537..764374cc 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -634,6 +634,70 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, return 0; } +static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, + u32 exception) +{ + int ret = 0; + u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + int hww_esr; + u32 offset = proj_gpc_stride_v() * gpc; + + hww_esr = gk20a_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset); + + if (!(hww_esr & (gr_gpc0_gpccs_hww_esr_ecc_uncorrected_m() | + gr_gpc0_gpccs_hww_esr_ecc_corrected_m()))) + return ret; + + ecc_status = gk20a_readl(g, + gr_gpc0_gpccs_falcon_ecc_status_r() + offset); + ecc_addr = gk20a_readl(g, + gr_gpc0_gpccs_falcon_ecc_address_r() + offset); + corrected_cnt = gk20a_readl(g, + gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + offset); + uncorrected_cnt = gk20a_readl(g, + gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset); + + /* clear the interrupt */ + gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, + gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); + + nvgpu_log(g, gpu_dbg_intr, + "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); + + if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) + nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); + if (ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) + nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); + if (ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) + nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); + if (ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) + nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error row address: 0x%x", + gr_gpc0_gpccs_falcon_ecc_address_row_address_v(ecc_addr)); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error count corrected: %d, uncorrected %d", + gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v(corrected_cnt), + gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt)); + + return ret; +} + +static int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, + u32 gpc_exception) +{ + if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpccs_m()) + return gr_gv11b_handle_gpccs_ecc_exception(g, gpc, + gpc_exception); + + return 0; +} + static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -646,7 +710,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), - (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1))); + (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) + gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1)); } static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, @@ -1622,6 +1687,55 @@ static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) return ret; } +static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) +{ + u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + + if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() | + gr_fecs_host_int_status_ecc_corrected_m())) { + ecc_status = gk20a_readl(g, gr_fecs_falcon_ecc_status_r()); + ecc_addr = gk20a_readl(g, + gr_fecs_falcon_ecc_address_r()); + corrected_cnt = gk20a_readl(g, + gr_fecs_falcon_ecc_corrected_err_count_r()); + uncorrected_cnt = gk20a_readl(g, + gr_fecs_falcon_ecc_uncorrected_err_count_r()); + + /* clear the interrupt */ + gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), + gr_fecs_falcon_ecc_status_reset_task_f()); + + nvgpu_log(g, gpu_dbg_intr, + "fecs ecc interrupt intr: 0x%x", intr); + + if (ecc_status & + gr_fecs_falcon_ecc_status_corrected_err_imem_m()) + nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); + if (ecc_status & + gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) + nvgpu_log(g, gpu_dbg_intr, + "imem ecc error uncorrected"); + if (ecc_status & + gr_fecs_falcon_ecc_status_corrected_err_dmem_m()) + nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); + if (ecc_status & + gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) + nvgpu_log(g, gpu_dbg_intr, + "dmem ecc error uncorrected"); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error row address: 0x%x", + gr_fecs_falcon_ecc_address_row_address_v(ecc_addr)); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error count corrected: %d, uncorrected %d", + gr_fecs_falcon_ecc_corrected_err_count_total_v( + corrected_cnt), + gr_fecs_falcon_ecc_uncorrected_err_count_total_v( + uncorrected_cnt)); + } +} + static int gr_gv11b_handle_fecs_error(struct gk20a *g, struct channel_gk20a *__ch, struct gr_gk20a_isr_data *isr_data) @@ -1680,6 +1794,9 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, gk20a_channel_put(ch); } + /* Handle ECC errors */ + gr_gv11b_handle_fecs_ecc_error(g, gr_fecs_intr); + clean_up: /* handle any remaining interrupts */ return gk20a_gr_handle_fecs_error(g, __ch, isr_data); @@ -2214,5 +2331,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; - + gops->gr.handle_gpc_gpccs_exception = + gr_gv11b_handle_gpc_gpccs_exception; } -- cgit v1.2.2 From c771d0b979cd9f42a21da520d5010873d2a6aa47 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Thu, 18 May 2017 16:45:40 -0700 Subject: gpu: nvgpu: add GPC parity counters (1) Re-arrange the structure for ecc counters reporting so multiple units can be managed (2) Add counters and handling for additional GPC counters JIRA: GPUT19X-84 Change-Id: I74fd474d7daf7590fc7f7ddc9837bb692512d208 Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1485277 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 103 ++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 764374cc..8b4471ca 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -163,7 +163,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_tag_corrected_err_count_delta += (is_l1_tag_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_tag_corrected_err_count.counters[tpc] += l1_tag_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, @@ -178,7 +178,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_tag_uncorrected_err_count_delta += (is_l1_tag_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_tag_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_tag_uncorrected_err_count.counters[tpc] += l1_tag_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, @@ -255,7 +255,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, lrf_corrected_err_count_delta += (is_lrf_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); - g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += + g->ecc.gr.t18x.sm_lrf_single_err_count.counters[tpc] += lrf_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, @@ -270,7 +270,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, lrf_uncorrected_err_count_delta += (is_lrf_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); - g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += + g->ecc.gr.t18x.sm_lrf_double_err_count.counters[tpc] += lrf_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, @@ -339,7 +339,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, cbu_corrected_err_count_delta += (is_cbu_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_cbu_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_cbu_corrected_err_count.counters[tpc] += cbu_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, @@ -354,7 +354,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, cbu_uncorrected_err_count_delta += (is_cbu_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_cbu_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_cbu_uncorrected_err_count.counters[tpc] += cbu_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, @@ -419,7 +419,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_data_corrected_err_count_delta += (is_l1_data_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_data_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_data_corrected_err_count.counters[tpc] += l1_data_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, @@ -434,7 +434,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_data_uncorrected_err_count_delta += (is_l1_data_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_data_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_data_uncorrected_err_count.counters[tpc] += l1_data_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, @@ -503,7 +503,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, icache_corrected_err_count_delta += (is_icache_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_icache_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_icache_corrected_err_count.counters[tpc] += icache_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, @@ -518,7 +518,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, icache_uncorrected_err_count_delta += (is_icache_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_icache_uncorrected_err_count.counters[tpc] += icache_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, @@ -606,7 +606,7 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, gcc_l15_corrected_err_count_delta += (is_gcc_l15_ecc_corrected_total_err_overflow << gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] += + g->ecc.gr.t19x.gcc_l15_corrected_err_count.counters[gpc] += gcc_l15_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, @@ -621,7 +621,7 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, gcc_l15_uncorrected_err_count_delta += (is_gcc_l15_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] += + g->ecc.gr.t19x.gcc_l15_uncorrected_err_count.counters[gpc] += gcc_l15_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, @@ -639,6 +639,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, { int ret = 0; u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + u32 corrected_delta, uncorrected_delta; + u32 corrected_overflow, uncorrected_overflow; + int hww_esr; u32 offset = proj_gpc_stride_v() * gpc; @@ -657,10 +660,34 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, uncorrected_cnt = gk20a_readl(g, gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset); + corrected_delta = gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v( + corrected_cnt); + uncorrected_delta = gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v( + uncorrected_cnt); + corrected_overflow = ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_corrected_err_total_counter_overflow_m(); + + uncorrected_overflow = ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); + + /* clear the interrupt */ + if ((corrected_delta > 0) || corrected_overflow) + gk20a_writel(g, + gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + + offset, 0); + if ((uncorrected_delta > 0) || uncorrected_overflow) + gk20a_writel(g, + gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + + offset, 0); + gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); + g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc] += + corrected_delta; + g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc] += + uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); @@ -675,6 +702,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); + if (corrected_overflow || uncorrected_overflow) + nvgpu_info(g, "gpccs ecc counter overflow!"); nvgpu_log(g, gpu_dbg_intr, "ecc error row address: 0x%x", @@ -682,8 +711,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v(corrected_cnt), - gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt)); + g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc], + g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc]); return ret; } @@ -710,8 +739,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), - (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) - gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1)); + (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | + gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1))); } static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, @@ -1690,6 +1719,8 @@ static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) { u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + u32 corrected_delta, uncorrected_delta; + u32 corrected_overflow, uncorrected_overflow; if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() | gr_fecs_host_int_status_ecc_corrected_m())) { @@ -1701,10 +1732,42 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) uncorrected_cnt = gk20a_readl(g, gr_fecs_falcon_ecc_uncorrected_err_count_r()); + corrected_delta = + gr_fecs_falcon_ecc_corrected_err_count_total_v( + corrected_cnt); + uncorrected_delta = + gr_fecs_falcon_ecc_uncorrected_err_count_total_v( + uncorrected_cnt); + + corrected_overflow = ecc_status & + gr_fecs_falcon_ecc_status_corrected_err_total_counter_overflow_m(); + uncorrected_overflow = ecc_status & + gr_fecs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); + + /* clear the interrupt */ + if ((corrected_delta > 0) || corrected_overflow) + gk20a_writel(g, + gr_fecs_falcon_ecc_corrected_err_count_r(), 0); + if ((uncorrected_delta > 0) || uncorrected_overflow) + gk20a_writel(g, + gr_fecs_falcon_ecc_uncorrected_err_count_r(), + 0); + + + /* clear the interrupt */ + gk20a_writel(g, gr_fecs_falcon_ecc_uncorrected_err_count_r(), + 0); + gk20a_writel(g, gr_fecs_falcon_ecc_corrected_err_count_r(), 0); + /* clear the interrupt */ gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), gr_fecs_falcon_ecc_status_reset_task_f()); + g->ecc.gr.t19x.fecs_corrected_err_count.counters[0] += + corrected_delta; + g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0] += + uncorrected_delta; + nvgpu_log(g, gpu_dbg_intr, "fecs ecc interrupt intr: 0x%x", intr); @@ -1722,6 +1785,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); + if (corrected_overflow || uncorrected_overflow) + nvgpu_info(g, "gpccs ecc counter overflow!"); nvgpu_log(g, gpu_dbg_intr, "ecc error row address: 0x%x", @@ -1729,10 +1794,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - gr_fecs_falcon_ecc_corrected_err_count_total_v( - corrected_cnt), - gr_fecs_falcon_ecc_uncorrected_err_count_total_v( - uncorrected_cnt)); + g->ecc.gr.t19x.fecs_corrected_err_count.counters[0], + g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0]); } } -- cgit v1.2.2 From 77199c0225457c48acb2dca89d0bf93d05b33231 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 3 May 2017 13:55:27 -0700 Subject: gpu: nvgpu: gv11b: init enable_exceptions gr ops Enable FE, MEMFMT, DS and GPC exceptions only. Make sure corresponding HWW_ESR are enabled too. JIRA GPUT19X-75 Change-Id: Icf47b7e531dd72b59cbc6ac54b5902187f703d61 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1474859 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8b4471ca..a9f1183e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -284,6 +284,29 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } +static void gr_gv11b_enable_exceptions(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + u32 reg_val; + + /* + * clear exceptions : + * other than SM : hww_esr are reset in *enable_hww_excetpions* + * SM : cleared in *set_hww_esr_report_mask* + */ + + /* enable exceptions */ + gk20a_writel(g, gr_exception2_en_r(), 0x0); /* BE not enabled */ + gk20a_writel(g, gr_exception1_en_r(), (1 << gr->gpc_count) - 1); + + reg_val = gr_exception_en_fe_enabled_f() | + gr_exception_en_memfmt_enabled_f() | + gr_exception_en_ds_enabled_f() | + gr_exception_en_gpc_enabled_f(); + gk20a_writel(g, gr_exception_en_r(), reg_val); + +} + static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) @@ -2375,6 +2398,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception; gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; + gops->gr.enable_exceptions = gr_gv11b_enable_exceptions; gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; gops->gr.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception; -- cgit v1.2.2 From 8da78a9fa7826985e8ca08a0a15eb1d6f38a222e Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Wed, 31 May 2017 13:29:06 +0530 Subject: gpu: nvgpu: include Include explicitly wherever the debug operations are used Jira NVGPU-62 Change-Id: I1845e08774b7c211e7fd954937708905f905e069 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1492818 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Bharat Nihalani --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a9f1183e..415ad963 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" -- cgit v1.2.2 From 81172b5df4c7dc46bf46419074b30e0a73f5ddfb Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 1 Jun 2017 12:30:49 -0700 Subject: gpu: nvgpu: gv11b: disable czf_bypass Gv11b ucode is not having support for low latency context-switching. So disable cfz_bypass mode for now. JIRA GPUT19X-116 Change-Id: I814cd254fa3c342c20906805a4b13b52c89d5b1e Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1494217 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 415ad963..8176b807 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2421,4 +2421,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; gops->gr.handle_gpc_gpccs_exception = gr_gv11b_handle_gpc_gpccs_exception; + gops->gr.set_czf_bypass = NULL; } -- cgit v1.2.2 From 345eaef6a76771da9c3e8a5e375fc9d659fb1b2b Mon Sep 17 00:00:00 2001 From: David Nieto Date: Fri, 26 May 2017 08:31:46 -0700 Subject: gpu: nvgpu: GPC MMU ECC support Adding support for GPC MMU ECC error handling JIRA: GPUT19X-112 Change-Id: I62083bf2f144ff628ecd8c0aefc8d227a233ff36 Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1490772 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 105 +++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8176b807..701b840a 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -658,16 +658,101 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, return 0; } -static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, +static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, u32 exception) { int ret = 0; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 offset = gpc_stride * gpc; u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; u32 corrected_delta, uncorrected_delta; u32 corrected_overflow, uncorrected_overflow; + int hww_esr; + + hww_esr = gk20a_readl(g, gr_gpc0_mmu_gpcmmu_global_esr_r() + offset); + + if (!(hww_esr & (gr_gpc0_mmu_gpcmmu_global_esr_ecc_corrected_m() | + gr_gpc0_mmu_gpcmmu_global_esr_ecc_uncorrected_m()))) + return ret; + + ecc_status = gk20a_readl(g, + gr_gpc0_mmu_l1tlb_ecc_status_r() + offset); + ecc_addr = gk20a_readl(g, + gr_gpc0_mmu_l1tlb_ecc_address_r() + offset); + corrected_cnt = gk20a_readl(g, + gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_r() + offset); + uncorrected_cnt = gk20a_readl(g, + gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_r() + offset); + + corrected_delta = gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_v( + corrected_cnt); + uncorrected_delta = gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_v( + uncorrected_cnt); + corrected_overflow = ecc_status & + gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_total_counter_overflow_m(); + + uncorrected_overflow = ecc_status & + gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_total_counter_overflow_m(); + + + /* clear the interrupt */ + if ((corrected_delta > 0) || corrected_overflow) + gk20a_writel(g, + gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_r() + + offset, 0); + if ((uncorrected_delta > 0) || uncorrected_overflow) + gk20a_writel(g, + gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_r() + + offset, 0); + + gk20a_writel(g, gr_gpc0_mmu_l1tlb_ecc_status_r() + offset, + gr_gpc0_mmu_l1tlb_ecc_status_reset_task_f()); + + /* Handle overflow */ + if (corrected_overflow) + corrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_s()); + if (uncorrected_overflow) + uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); + + g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count.counters[gpc] += + corrected_delta; + g->ecc.gr.t19x.mmu_l1tlb_uncorrected_err_count.counters[gpc] += + uncorrected_delta; + nvgpu_log(g, gpu_dbg_intr, + "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); + + if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) + nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); + if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) + nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); + if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) + nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error"); + if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) + nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error"); + if (corrected_overflow || uncorrected_overflow) + nvgpu_info(g, "mmu l1tlb ecc counter overflow!"); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error address: 0x%x", ecc_addr); + nvgpu_log(g, gpu_dbg_intr, + "ecc error count corrected: %d, uncorrected %d", + g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count.counters[gpc], + g->ecc.gr.t19x.mmu_l1tlb_uncorrected_err_count.counters[gpc]); + + return ret; +} + +static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, + u32 exception) +{ + int ret = 0; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 offset = gpc_stride * gpc; + u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + u32 corrected_delta, uncorrected_delta; + u32 corrected_overflow, uncorrected_overflow; int hww_esr; - u32 offset = proj_gpc_stride_v() * gpc; hww_esr = gk20a_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset); @@ -741,6 +826,15 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, return ret; } +static int gr_gv11b_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, + u32 gpc_exception) +{ + if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpcmmu_m()) + return gr_gv11b_handle_gpcmmu_ecc_exception(g, gpc, + gpc_exception); + return 0; +} + static int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, u32 gpc_exception) { @@ -764,7 +858,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | - gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1))); + gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) | + gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1))); } static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, @@ -1810,7 +1905,7 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); if (corrected_overflow || uncorrected_overflow) - nvgpu_info(g, "gpccs ecc counter overflow!"); + nvgpu_info(g, "fecs ecc counter overflow!"); nvgpu_log(g, gpu_dbg_intr, "ecc error row address: 0x%x", @@ -2422,4 +2517,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.handle_gpc_gpccs_exception = gr_gv11b_handle_gpc_gpccs_exception; gops->gr.set_czf_bypass = NULL; + gops->gr.handle_gpc_gpcmmu_exception = + gr_gv11b_handle_gpc_gpcmmu_exception; } -- cgit v1.2.2 From 3e22195974ef233e12f69c5769d42b322f5adc1e Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 19 May 2017 14:01:26 -0700 Subject: gpu: nvgpu: gv11b: update init_fs_state gr ops GPUT19X-70 Change-Id: Ifc6c52ac15108d1389fcd732218abf46b6167485 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1486177 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 701b840a..503fb6f7 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1501,10 +1501,6 @@ static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } -static int gr_gv11b_init_fs_state(struct gk20a *g) -{ - return gr_gp10b_init_fs_state(g); -} static void gr_gv11b_init_cyclestats(struct gk20a *g) { @@ -2458,6 +2454,30 @@ static void gr_gv11b_write_preemption_ptr(struct gk20a *g, } +static int gr_gv11b_init_fs_state(struct gk20a *g) +{ + u32 data; + + gk20a_dbg_fn(""); + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r()); + data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(), + gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data); + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); + data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(), + gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data); + + if (g->gr.t18x.fecs_feature_override_ecc_val != 0) { + gk20a_writel(g, + gr_fecs_feature_override_ecc_r(), + g->gr.t18x.fecs_feature_override_ecc_val); + } + + return gr_gm20b_init_fs_state(g); +} void gv11b_init_gr(struct gpu_ops *gops) { -- cgit v1.2.2 From 4f0e19d44dee9039378bce7bd0cfb11f36fd8926 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 7 Jun 2017 13:17:22 -0700 Subject: gpu: nvgpu: gv11b: issue tsg preempt only Preempt type should be set to tsg and id should be set to tsgid in fifo_preempt_r(). Preempt type channel and id set to channel id does not initiate preemption. Bug 200289427 Bug 200292090 Bug 200289491 Change-Id: I2ae96c0b9ca8a88a8405f42775744f0879994887 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1497877 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 503fb6f7..a703e72c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1593,12 +1593,15 @@ static int gr_gv11b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist"); - if (gk20a_is_channel_marked_as_tsg(fault_ch)) + if (gk20a_is_channel_marked_as_tsg(fault_ch)) { gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true); - else - gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false); - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg"); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: preempted the channel/tsg"); + } else { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, + "CILP: tsgid is invalid, cannot preempt"); + WARN_ON(1); /* only TSG can be preempted */ + } return ret; } -- cgit v1.2.2 From 12a8f51aa7a056d070861b120fc945d946bc10f9 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 9 Jun 2017 16:19:17 -0700 Subject: gpu: nvgpu: gv11b: disable skedcheck18_l1_config_too_small SKED_HWW_ESR_EN_SKEDCHECK18_L1_CONFIG_TOO_SMALL disabled Bug 200315442 Change-Id: I6d5c5f2fe6255d480350e01959c3c340579646e2 Signed-off-by: Seema Khowala Reviewed-on: http://git-master/r/1499568 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a703e72c..542ed1ff 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -285,6 +285,27 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } +static void gr_gv11b_enable_hww_exceptions(struct gk20a *g) +{ + u32 val; + + /* enable exceptions */ + gk20a_writel(g, gr_fe_hww_esr_r(), + gr_fe_hww_esr_en_enable_f() | + gr_fe_hww_esr_reset_active_f()); + gk20a_writel(g, gr_memfmt_hww_esr_r(), + gr_memfmt_hww_esr_en_enable_f() | + gr_memfmt_hww_esr_reset_active_f()); + /* WAR for 200315442 */ + val = gk20a_readl(g, gr_sked_hww_esr_en_r()); + val = set_field(val, + gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(), + gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_disabled_f() + ); + nvgpu_log_info(g, "sked_hww_esr_en = 0x%x", val); + gk20a_writel(g, gr_sked_hww_esr_en_r(), val); +} + static void gr_gv11b_enable_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -2518,6 +2539,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; gops->gr.enable_exceptions = gr_gv11b_enable_exceptions; + gops->gr.enable_hww_exceptions = gr_gv11b_enable_hww_exceptions; gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; gops->gr.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception; -- cgit v1.2.2 From 5572bfa86a6afc7ae3c2f4a61e568f8e759c6ecc Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 22 Jun 2017 14:43:05 -0700 Subject: gpu: nvgpu: gv11b: sw method for NVC397_SET_TEX_IN_DBG Added sw method for NVC397_SET_TEX_IN_DBG with following data fields: data:0 PRI_TEX_IN_DBG_TSL1_RVCH_INVALIDATE data:1 PRI_SM_L1TAG_CTRL_CACHE_SURFACE_LD data:2 PRI_SM_L1TAG_CTRL_CACHE_SURFACE_ST Bug 1934197 Change-Id: I0956d3f5c859ac23e16fb6b7372acd098dfb6d16 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master/r/1507479 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Tested-by: Wei Sun Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 542ed1ff..7993e071 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1069,6 +1069,31 @@ static void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data) gk20a_dbg_fn("done"); } +static void gr_gv11b_set_tex_in_dbg(struct gk20a *g, u32 data) +{ + u32 val; + bool flag; + + gk20a_dbg_fn(""); + + val = gk20a_readl(g, gr_gpcs_tpcs_tex_in_dbg_r()); + flag = (data & NVC397_SET_TEX_IN_DBG_TSL1_RVCH_INVALIDATE) ? 1 : 0; + val = set_field(val, gr_gpcs_tpcs_tex_in_dbg_tsl1_rvch_invalidate_m(), + gr_gpcs_tpcs_tex_in_dbg_tsl1_rvch_invalidate_f(flag)); + gk20a_writel(g, gr_gpcs_tpcs_tex_in_dbg_r(), val); + + val = gk20a_readl(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r()); + flag = (data & + NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_LD) ? 1 : 0; + val = set_field(val, gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_ld_m(), + gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_ld_f(flag)); + flag = (data & + NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_ST) ? 1 : 0; + val = set_field(val, gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_st_m(), + gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_st_f(flag)); + gk20a_writel(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r(), val); +} + static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data) { @@ -1120,6 +1145,9 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, case NVC097_SET_COALESCE_BUFFER_SIZE: gr_gv11b_set_coalesce_buffer_size(g, data); break; + case NVC397_SET_TEX_IN_DBG: + gr_gv11b_set_tex_in_dbg(g, data); + break; default: goto fail; } -- cgit v1.2.2 From 7681d6b007755e16f55951a1491a38faff8c72e9 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 20 Jun 2017 13:27:39 -0700 Subject: gpu: nvgpu: gv11b: reuse gp10b functions reuse gr_gp10b_handle_fecs_error and cilp functions Bug 200289491 Change-Id: I4040f96875ad91d174ce36aab957fb94d79c3a74 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1505952 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 223 +------------------------------------ 1 file changed, 6 insertions(+), 217 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7993e071..41892746 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -32,6 +32,8 @@ #include "gm20b/gr_gm20b.h" +#include "gp10b/gr_gp10b.h" + #include "gv11b/gr_gv11b.h" #include "gv11b/mm_gv11b.h" #include "gv11b/subctx_gv11b.h" @@ -1622,145 +1624,6 @@ static void gr_gv11b_get_access_map(struct gk20a *g, *num_entries = ARRAY_SIZE(wl_addr_gv11b); } -static int gr_gv11b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch) -{ - int ret = 0; - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); - - ret = gk20a_disable_channel_tsg(g, fault_ch); - if (ret) { - nvgpu_err(g, "CILP: failed to disable channel/TSG!"); - return ret; - } - - ret = g->ops.fifo.update_runlist(g, fault_ch->runlist_id, ~0, true, false); - if (ret) { - nvgpu_err(g, "CILP: failed to restart runlist 0!"); - return ret; - } - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist"); - - if (gk20a_is_channel_marked_as_tsg(fault_ch)) { - gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true); - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: preempted the channel/tsg"); - } else { - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: tsgid is invalid, cannot preempt"); - WARN_ON(1); /* only TSG can be preempted */ - } - - return ret; -} - -static int gr_gv11b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch) -{ - int ret; - struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); - - if (!gr_ctx) - return -EINVAL; - - if (gr_ctx->t18x.cilp_preempt_pending) { - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP is already pending for chid %d", - fault_ch->hw_chid); - return 0; - } - - /* get ctx_id from the ucode image */ - if (!gr_ctx->t18x.ctx_id_valid) { - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: looking up ctx id"); - ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id); - if (ret) { - nvgpu_err(g, "CILP: error looking up ctx id!"); - return ret; - } - gr_ctx->t18x.ctx_id_valid = true; - } - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id); - - /* send ucode method to set ctxsw interrupt */ - ret = gr_gk20a_submit_fecs_sideband_method_op(g, - (struct fecs_method_op_gk20a) { - .method.data = gr_ctx->t18x.ctx_id, - .method.addr = - gr_fecs_method_push_adr_configure_interrupt_completion_option_v(), - .mailbox = { - .id = 1 /* sideband */, .data = 0, - .clr = ~0, .ret = NULL, - .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), - .fail = 0}, - .cond.ok = GR_IS_UCODE_OP_EQUAL, - .cond.fail = GR_IS_UCODE_OP_SKIP}); - - if (ret) { - nvgpu_err(g, "CILP: failed to enable ctxsw interrupt!"); - return ret; - } - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: enabled ctxsw completion interrupt"); - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: disabling channel %d", - fault_ch->hw_chid); - - ret = gr_gv11b_disable_channel_or_tsg(g, fault_ch); - if (ret) { - nvgpu_err(g, "CILP: failed to disable channel!!"); - return ret; - } - - /* set cilp_preempt_pending = true and record the channel */ - gr_ctx->t18x.cilp_preempt_pending = true; - g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid; - - if (gk20a_is_channel_marked_as_tsg(fault_ch)) { - struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid]; - - gk20a_tsg_event_id_post_event(tsg, - NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED); - } else { - gk20a_channel_event_id_post_event(fault_ch, - NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED); - } - - return 0; -} - -static int gr_gv11b_clear_cilp_preempt_pending(struct gk20a *g, - struct channel_gk20a *fault_ch) -{ - struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); - - if (!gr_ctx) - return -EINVAL; - - /* The ucode is self-clearing, so all we need to do here is - to clear cilp_preempt_pending. */ - if (!gr_ctx->t18x.cilp_preempt_pending) { - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP is already cleared for chid %d\n", - fault_ch->hw_chid); - return 0; - } - - gr_ctx->t18x.cilp_preempt_pending = false; - g->gr.t18x.cilp_preempt_pending_chid = -1; - - return 0; -} - /* @brief pre-process work on the SM exceptions to determine if we clear them or not. * * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing @@ -1827,7 +1690,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, gpc, tpc); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n"); - ret = gr_gv11b_set_cilp_preempt_pending(g, fault_ch); + ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch); if (ret) { nvgpu_err(g, "CILP: error while setting CILP preempt pending!"); return ret; @@ -1858,31 +1721,6 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, return 0; } -static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) -{ - struct gr_ctx_desc *gr_ctx; - struct channel_gk20a *ch; - int chid; - int ret = -EINVAL; - - chid = g->gr.t18x.cilp_preempt_pending_chid; - - ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid)); - if (!ch) - return ret; - - gr_ctx = ch->ch_ctx.gr_ctx; - - if (gr_ctx->t18x.cilp_preempt_pending) { - *__chid = chid; - ret = 0; - } - - gk20a_channel_put(ch); - - return ret; -} - static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) { u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; @@ -1971,65 +1809,16 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, struct gr_gk20a_isr_data *isr_data) { u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); - struct channel_gk20a *ch; - int chid = -1; - int ret = 0; + int ret; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); - /* - * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR) - * indicates that a CILP ctxsw save has finished - */ - if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) { - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, - "CILP: ctxsw save completed!\n"); - - /* now clear the interrupt */ - gk20a_writel(g, gr_fecs_host_int_clear_r(), - gr_fecs_host_int_clear_ctxsw_intr1_clear_f()); - - ret = gr_gv11b_get_cilp_preempt_pending_chid(g, &chid); - if (ret) - goto clean_up; - - ch = gk20a_channel_get( - gk20a_fifo_channel_from_hw_chid(g, chid)); - if (!ch) - goto clean_up; - - - /* set preempt_pending to false */ - ret = gr_gv11b_clear_cilp_preempt_pending(g, ch); - if (ret) { - nvgpu_err(g, "CILP: error while unsetting CILP preempt pending!"); - gk20a_channel_put(ch); - goto clean_up; - } - - if (gk20a_gr_sm_debugger_attached(g)) { - gk20a_dbg_gpu_post_events(ch); - - if (gk20a_is_channel_marked_as_tsg(ch)) { - struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; - - gk20a_tsg_event_id_post_event(tsg, - NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE); - } else { - gk20a_channel_event_id_post_event(ch, - NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE); - } - } - - gk20a_channel_put(ch); - } + ret = gr_gp10b_handle_fecs_error(g, __ch, isr_data); /* Handle ECC errors */ gr_gv11b_handle_fecs_ecc_error(g, gr_fecs_intr); -clean_up: - /* handle any remaining interrupts */ - return gk20a_gr_handle_fecs_error(g, __ch, isr_data); + return ret; } static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) -- cgit v1.2.2 From 11009e0e69a497780ddb918fab89da62089510ce Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 20 Jun 2017 15:31:46 -0700 Subject: gpu: nvgpu: gv11b: sm register changes gv11b has multiple SMs and SM register addresses have changed as compared to legacy chips. JIRA GPUT19X-75 Change-Id: I2319f4c78f3efda3430bab1f5ecf1a068e57a1ca Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1506013 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 41892746..59865a0f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1658,8 +1658,8 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f()); - global_mask = gr_gpcs_tpcs_sm0_hww_global_esr_multiple_warp_errors_pending_f() | - gr_gpcs_tpcs_sm0_hww_global_esr_bpt_pause_pending_f(); + global_mask = gr_gpc0_tpc0_sm0_hww_global_esr_multiple_warp_errors_pending_f() | + gr_gpc0_tpc0_sm0_hww_global_esr_bpt_pause_pending_f(); if (warp_esr != 0 || (global_esr & global_mask) != 0) { *ignore_debugger = true; @@ -1697,13 +1697,13 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, } dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); - if (dbgr_control0 & gr_gpcs_tpcs_sm0_dbgr_control0_single_step_mode_enable_f()) { + if (dbgr_control0 & gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_enable_f()) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n", gpc, tpc); dbgr_control0 = set_field(dbgr_control0, - gr_gpcs_tpcs_sm0_dbgr_control0_single_step_mode_m(), - gr_gpcs_tpcs_sm0_dbgr_control0_single_step_mode_disable_f()); + gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_m(), + gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_disable_f()); gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); } -- cgit v1.2.2 From d1b0920b3f595b57a38f807b040634a331aa1ddd Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 20 Jun 2017 21:56:39 -0700 Subject: gpu: nvgpu: gv11b: multiple sm support -Calculate sm reg offset by using gpc, tpc and sm numbers -Init get_esr_sm_sel gr ops JIRA GPUT19X-75 Change-Id: I74cfcae07e385cdad51774b963380c0633adfecf Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1506152 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 75 ++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 59865a0f..e4826376 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -111,6 +111,15 @@ static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) return valid; } +static u32 gv11b_gr_sm_offset(struct gk20a *g, u32 sm) +{ + + u32 sm_pri_stride = nvgpu_get_litter_value(g, GPU_LIT_SM_PRI_STRIDE); + u32 sm_offset = sm_pri_stride * sm; + + return sm_offset; +} + static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) @@ -580,7 +589,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, } static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, - bool *post_event, struct channel_gk20a *fault_ch, + u32 sm, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) { int ret = 0; @@ -1629,15 +1638,16 @@ static void gr_gv11b_get_access_map(struct gk20a *g, * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing */ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, - u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr, + u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, bool sm_debugger_attached, struct channel_gk20a *fault_ch, bool *early_exit, bool *ignore_debugger) { int ret; bool cilp_enabled = false; u32 global_mask = 0, dbgr_control0, global_esr_copy; - u32 offset = proj_gpc_stride_v() * gpc + - proj_tpc_in_gpc_stride_v() * tpc; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); *early_exit = false; *ignore_debugger = false; @@ -1646,8 +1656,9 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP); - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", - gpc, tpc, global_esr); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", + gpc, tpc, sm, global_esr); if (cilp_enabled && sm_debugger_attached) { if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f()) @@ -1665,20 +1676,23 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, *ignore_debugger = true; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n", - gpc, tpc); + "CILP: starting wait for LOCKED_DOWN on " + "gpc %d tpc %d sm %d", + gpc, tpc, sm); if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n", - gpc, tpc); + "CILP: Broadcasting STOP_TRIGGER from " + "gpc %d tpc %d sm %d", + gpc, tpc, sm); gk20a_suspend_all_sms(g, global_mask, false); gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch); } else { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "CILP: STOP_TRIGGER from gpc %d tpc %d\n", - gpc, tpc); + "CILP: STOP_TRIGGER from " + "gpc %d tpc %d sm %d", + gpc, tpc, sm); gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true); } @@ -1686,8 +1700,9 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "CILP: HWWs cleared for gpc %d tpc %d\n", - gpc, tpc); + "CILP: HWWs cleared for " + "gpc %d tpc %d sm %d", + gpc, tpc, sm); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n"); ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch); @@ -1699,8 +1714,9 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); if (dbgr_control0 & gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_enable_f()) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n", - gpc, tpc); + "CILP: clearing SINGLE_STEP_MODE " + "before resume for gpc %d tpc %d sm %d", + gpc, tpc, sm); dbgr_control0 = set_field(dbgr_control0, gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_m(), gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_disable_f()); @@ -1708,12 +1724,14 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, } gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "CILP: resume for gpc %d tpc %d\n", - gpc, tpc); + "CILP: resume for gpc %d tpc %d sm %d", + gpc, tpc, sm); gk20a_resume_single_sm(g, gpc, tpc); *ignore_debugger = true; - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "CILP: All done on gpc %d, tpc %d sm %d", + gpc, tpc, sm); } *early_exit = true; @@ -2320,6 +2338,24 @@ static int gr_gv11b_init_fs_state(struct gk20a *g) return gr_gm20b_init_fs_state(g); } +static void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, + u32 *esr_sm_sel) +{ + u32 reg_val; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); + + reg_val = gk20a_readl(g, gr_gpc0_tpc0_sm_tpc_esr_sm_sel_r() + offset); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "sm tpc esr sm sel reg val: 0x%x", reg_val); + *esr_sm_sel = 0; + if (gr_gpc0_tpc0_sm_tpc_esr_sm_sel_sm0_error_v(reg_val)) + *esr_sm_sel = 1; + if (gr_gpc0_tpc0_sm_tpc_esr_sm_sel_sm1_error_v(reg_val)) + *esr_sm_sel |= 1 << 1; + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "esr_sm_sel bitmask: 0x%x", *esr_sm_sel); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2381,4 +2417,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.set_czf_bypass = NULL; gops->gr.handle_gpc_gpcmmu_exception = gr_gv11b_handle_gpc_gpcmmu_exception; + gops->gr.get_esr_sm_sel = gv11b_gr_get_esr_sm_sel; } -- cgit v1.2.2 From 3e195bb02d62694728d80774cd03b486bb34ff14 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 29 Jun 2017 10:40:34 -0700 Subject: pu: nvgpu: gv11b: rename write_preemption_ptr Change function name write_preemption_ptr to set_preemption_buffer_va to match with what exactly getting done in that function. Change-Id: I91372642f1dba37e5e7bcda29ac9c4271cec4b53 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master/r/1510973 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index e4826376..612c6628 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2292,7 +2292,7 @@ static void gr_gv11b_load_tpc_mask(struct gk20a *g) } -static void gr_gv11b_write_preemption_ptr(struct gk20a *g, +static void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { u32 addr_lo, addr_hi; @@ -2365,7 +2365,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.is_valid_class = gr_gv11b_is_valid_class; gops->gr.is_valid_gfx_class = gr_gv11b_is_valid_gfx_class; gops->gr.is_valid_compute_class = gr_gv11b_is_valid_compute_class; - gops->gr.write_preemption_ptr = gr_gv11b_write_preemption_ptr; + gops->gr.set_preemption_buffer_va = gr_gv11b_set_preemption_buffer_va; gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; -- cgit v1.2.2 From 20158e09b7b7a90177945f3e7ceab57f34982b50 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 9 Jun 2017 10:17:13 -0700 Subject: gpu: nvgpu: gv11b: Corrected sm whitelist address corrected whitelist address for gr_pri_gpcs_tpcs_sms_dbgr_control0 JIRA GPUT19X-49 Bug 200311674 Change-Id: I512197c4a6ef97a59bbb303e31ab91f7727bf8d5 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1499394 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Seshendra Gadagottu Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 612c6628..5258e4ce 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1625,7 +1625,7 @@ static void gr_gv11b_get_access_map(struct gk20a *g, 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */ 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ - 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ + 0x419e84, /* gr_pri_gpcs_tpcs_sms_dbgr_control0 */ 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ }; -- cgit v1.2.2 From c5e68d6afa664b49a2045a826f83c46ee4c4485e Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Mon, 12 Jun 2017 11:22:23 -0700 Subject: gpu: nvgpu: gv11b: dump sm regs /d/gpu.0/gr_status will dump sm registers too JIRA GPUT19X-75 Change-Id: If5d19c9ef5c05b6390e8e55c39571869d3d01ae7 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1500879 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Seshendra Gadagottu Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 88 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 5258e4ce..0158d706 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1334,6 +1334,87 @@ fail_free: return err; } +static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g, + struct gk20a_debug_output *o, + u32 gpc, u32 tpc, u32 sm, u32 offset) +{ + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR: 0x%x\n", + gpc, tpc, sm, gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset)); + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR_REPORT_MASK: 0x%x\n", + gpc, tpc, sm, gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset)); + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR: 0x%x\n", + gpc, tpc, sm, gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset)); + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n", + gpc, tpc, sm, gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset)); + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_CONTROL0: 0x%x\n", + gpc, tpc, sm, gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset)); + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_STATUS0: 0x%x\n", + gpc, tpc, sm, gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset)); +} + +static int gr_gv11b_dump_gr_sm_regs(struct gk20a *g, + struct gk20a_debug_output *o) +{ + u32 gpc, tpc, sm, sm_per_tpc; + u32 gpc_offset, tpc_offset, offset; + + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n", + gk20a_readl(g, + gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r())); + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_WARP_ESR_REPORT_MASK: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r())); + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_hww_global_esr_r())); + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_CONTROL0: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_control0_r())); + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_STATUS0: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_status0_r())); + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_r())); + + sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + gpc_offset = gk20a_gr_gpc_offset(g, gpc); + + for (tpc = 0; tpc < g->gr.tpc_count; tpc++) { + tpc_offset = gk20a_gr_tpc_offset(g, tpc); + + for (sm = 0; sm < sm_per_tpc; sm++) { + offset = gpc_offset + tpc_offset + + gv11b_gr_sm_offset(g, sm); + + gr_gv11b_dump_gr_per_sm_regs(g, o, + gpc, tpc, sm, offset); + } + } + } + + return 0; +} static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) @@ -1441,6 +1522,10 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_fecs_current_ctx_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", gk20a_readl(g, gr_fecs_new_ctx_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_ENABLE : 0x%x\n", + gk20a_readl(g, gr_fecs_host_int_enable_r())); + gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", + gk20a_readl(g, gr_fecs_host_int_status_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", gk20a_readl(g, gr_pri_be0_crop_status1_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", @@ -1465,6 +1550,9 @@ static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); + + gr_gv11b_dump_gr_sm_regs(g, o); + return 0; } -- cgit v1.2.2 From fd80220dd30ec59f270b435dff8a0e0f512d0c98 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 10:34:20 -0700 Subject: gpu: nvgpu: gv11b: init trigger_suspend gr ops Add gv11b specific trigger_suspend function. SM register addresses have changed as compared to legacy gpu chips. JIRA GPUT19X-75 Change-Id: Ic3099e53bcba19128711a88ecc9e9883f5f7a31f Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1476532 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Seshendra Gadagottu GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 0158d706..58bb08a6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2444,6 +2444,28 @@ static void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, "esr_sm_sel bitmask: 0x%x", *esr_sm_sel); } +static int gv11b_gr_sm_trigger_suspend(struct gk20a *g) +{ + u32 dbgr_control0; + + /* assert stop trigger. uniformity assumption: all SMs will have + * the same state in dbg_control0. + */ + dbgr_control0 = + gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_control0_r()); + dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); + + /* broadcast write */ + gk20a_writel(g, + gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "stop trigger enable: broadcast dbgr_control0: 0x%x ", + dbgr_control0); + + return 0; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2506,4 +2528,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.handle_gpc_gpcmmu_exception = gr_gv11b_handle_gpc_gpcmmu_exception; gops->gr.get_esr_sm_sel = gv11b_gr_get_esr_sm_sel; + gops->gr.trigger_suspend = gv11b_gr_sm_trigger_suspend; } -- cgit v1.2.2 From f2235085d136c50c63d4f66d4baa00f1b46bf22a Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 10:48:23 -0700 Subject: gpu: nvgpu: gv11b: init bpt_reg_info gr ops Take care of t19x reg address changes to support multiple SM JIRA GPUT19X-75 Change-Id: I92b97e60ac82c50a97fe44a85482437446479800 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1477694 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 69 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 58bb08a6..eab78119 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2466,6 +2466,74 @@ static int gv11b_gr_sm_trigger_suspend(struct gk20a *g) return 0; } +static void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) +{ + /* Check if we have at least one valid warp + * get paused state on maxwell + */ + struct gr_gk20a *gr = &g->gr; + u32 gpc, tpc, sm, sm_id; + u32 offset; + u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; + + for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + gpc = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc = g->gr.sm_to_cluster[sm_id].tpc_index; + sm = g->gr.sm_to_cluster[sm_id].sm_index; + + offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + /* 64 bit read */ + warps_valid = (u64)gk20a_readl(g, + gr_gpc0_tpc0_sm0_warp_valid_mask_r() + + offset + 4) << 32; + warps_valid |= gk20a_readl(g, + gr_gpc0_tpc0_sm0_warp_valid_mask_r() + + offset); + + /* 64 bit read */ + warps_paused = (u64)gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_r() + + offset + 4) << 32; + warps_paused |= gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_r() + + offset); + + /* 64 bit read */ + warps_trapped = (u64)gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_r() + + offset + 4) << 32; + warps_trapped |= gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_r() + + offset); + + w_state[sm_id].valid_warps[0] = warps_valid; + w_state[sm_id].trapped_warps[0] = warps_trapped; + w_state[sm_id].paused_warps[0] = warps_paused; + } + + + /* Only for debug purpose */ + for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", + sm_id, w_state[sm_id].valid_warps[0]); + gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n", + sm_id, w_state[sm_id].valid_warps[1]); + + gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", + sm_id, w_state[sm_id].trapped_warps[0]); + gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n", + sm_id, w_state[sm_id].trapped_warps[1]); + + gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", + sm_id, w_state[sm_id].paused_warps[0]); + gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n", + sm_id, w_state[sm_id].paused_warps[1]); + } +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2529,4 +2597,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gr_gv11b_handle_gpc_gpcmmu_exception; gops->gr.get_esr_sm_sel = gv11b_gr_get_esr_sm_sel; gops->gr.trigger_suspend = gv11b_gr_sm_trigger_suspend; + gops->gr.bpt_reg_info = gv11b_gr_bpt_reg_info; } -- cgit v1.2.2 From f28efb987a13e87d10cc1c6d6e39fcbe9383934f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 11:03:14 -0700 Subject: gpu: nvgpu: gv11b: init update_sm_error_state gr ops Support multiple SM and take care of SM hardware reg address changes JIRA GPUT19X-75 Change-Id: I866011a85da06ca22bc10fda5ab59f84d0782902 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1477686 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index eab78119..28284b45 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2534,6 +2534,85 @@ static void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) } } +static int gv11b_gr_update_sm_error_state(struct gk20a *g, + struct channel_gk20a *ch, u32 sm_id, + struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) +{ + u32 gpc, tpc, sm, offset; + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + int err = 0; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + gr->sm_error_states[sm_id].hww_global_esr = + sm_error_state->hww_global_esr; + gr->sm_error_states[sm_id].hww_warp_esr = + sm_error_state->hww_warp_esr; + gr->sm_error_states[sm_id].hww_warp_esr_pc = + sm_error_state->hww_warp_esr_pc; + gr->sm_error_states[sm_id].hww_global_esr_report_mask = + sm_error_state->hww_global_esr_report_mask; + gr->sm_error_states[sm_id].hww_warp_esr_report_mask = + sm_error_state->hww_warp_esr_report_mask; + + err = gr_gk20a_disable_ctxsw(g); + if (err) { + nvgpu_err(g, "unable to stop gr ctxsw"); + goto fail; + } + + gpc = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc = g->gr.sm_to_cluster[sm_id].tpc_index; + sm = g->gr.sm_to_cluster[sm_id].sm_index; + + offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + if (gk20a_is_channel_ctx_resident(ch)) { + gk20a_writel(g, + gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, + gr->sm_error_states[sm_id].hww_global_esr); + gk20a_writel(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, + gr->sm_error_states[sm_id].hww_warp_esr); + gk20a_writel(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset, + gr->sm_error_states[sm_id].hww_warp_esr_pc); + gk20a_writel(g, + gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset, + gr->sm_error_states[sm_id].hww_global_esr_report_mask); + gk20a_writel(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset, + gr->sm_error_states[sm_id].hww_warp_esr_report_mask); + } else { + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) + goto enable_ctxsw; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r() + + offset, + gr->sm_error_states[sm_id].hww_global_esr_report_mask, + true); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r() + + offset, + gr->sm_error_states[sm_id].hww_warp_esr_report_mask, + true); + + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + } + +enable_ctxsw: + err = gr_gk20a_enable_ctxsw(g); + +fail: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2598,4 +2677,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.get_esr_sm_sel = gv11b_gr_get_esr_sm_sel; gops->gr.trigger_suspend = gv11b_gr_sm_trigger_suspend; gops->gr.bpt_reg_info = gv11b_gr_bpt_reg_info; + gops->gr.update_sm_error_state = gv11b_gr_update_sm_error_state; } -- cgit v1.2.2 From 8f0f88d61e452e81d03670b4a1413fc0ced631c2 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 11:15:43 -0700 Subject: gpu: nvgpu: gv11b: init set_sm_debug_mode gr ops Support multiple SM and take care of sm reg addr changes JIRA GPUT19X-75 Change-Id: Id39e269034762c7a8347edaf1fff0b2efd7f153c Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1477705 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 61 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 28284b45..c1dc7920 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -29,6 +29,7 @@ #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/regops_gk20a.h" #include "gm20b/gr_gm20b.h" @@ -2613,6 +2614,65 @@ fail: return err; } +static int gv11b_gr_set_sm_debug_mode(struct gk20a *g, + struct channel_gk20a *ch, u64 sms, bool enable) +{ + struct nvgpu_dbg_gpu_reg_op *ops; + unsigned int i = 0, sm_id; + int err; + + ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); + if (!ops) + return -ENOMEM; + for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + u32 gpc, tpc, sm; + u32 reg_offset, reg_mask, reg_val; + + if (!(sms & (1 << sm_id))) + continue; + + gpc = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc = g->gr.sm_to_cluster[sm_id].tpc_index; + sm = g->gr.sm_to_cluster[sm_id].sm_index; + + reg_offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + ops[i].op = REGOP(WRITE_32); + ops[i].type = REGOP(TYPE_GR_CTX); + ops[i].offset = gr_gpc0_tpc0_sm0_dbgr_control0_r() + reg_offset; + + reg_mask = 0; + reg_val = 0; + if (enable) { + nvgpu_log(g, gpu_dbg_gpu_dbg, + "SM:%d debuggger mode ON", sm); + reg_mask |= + gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_m(); + reg_val |= + gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_on_f(); + } else { + nvgpu_log(g, gpu_dbg_gpu_dbg, + "SM:%d debuggger mode Off", sm); + reg_mask |= + gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_m(); + reg_val |= + gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_off_f(); + } + + ops[i].and_n_mask_lo = reg_mask; + ops[i].value_lo = reg_val; + i++; + } + + err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0); + if (err) + nvgpu_err(g, "Failed to access register\n"); + nvgpu_kfree(g, ops); + return err; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2678,4 +2738,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.trigger_suspend = gv11b_gr_sm_trigger_suspend; gops->gr.bpt_reg_info = gv11b_gr_bpt_reg_info; gops->gr.update_sm_error_state = gv11b_gr_update_sm_error_state; + gops->gr.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode; } -- cgit v1.2.2 From ec71ac29576afa676e056b42c13f073a17ba57e9 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 11:31:55 -0700 Subject: gpu: nvgpu: gv11b: init record_sm_error_state gr ops Take care of t19x sm reg address changes and support multiple SM JIRA GPUT19X-75 Change-Id: I675b76b90d08fe75331f0023f1fe722497d06373 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1477673 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index c1dc7920..8cc1cfde 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2673,6 +2673,45 @@ static int gv11b_gr_set_sm_debug_mode(struct gk20a *g, return err; } +static int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) +{ + int sm_id; + struct gr_gk20a *gr = &g->gr; + u32 offset, sm, sm_per_tpc; + u32 gpc_tpc_offset; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc); + + sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g, + gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset)); + + sm = sm_id % sm_per_tpc; + + offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm); + + gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); + + gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset); + + gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset); + + gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset); + + gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset); + + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return 0; +} void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2739,4 +2778,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.bpt_reg_info = gv11b_gr_bpt_reg_info; gops->gr.update_sm_error_state = gv11b_gr_update_sm_error_state; gops->gr.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode; + gops->gr.record_sm_error_state = gv11b_gr_record_sm_error_state; } -- cgit v1.2.2 From 6f6329c377ae12f0bf3790c15c582e454d90d3f9 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 12:03:11 -0700 Subject: gpu: nvgpu: gv11b: init set_hww_esr_report_mask gv11b has 2 SMs per TPC. Use *gpcs_tpcs_sms_hww_warp/global_esr* registers instead of *gpcs_tpcs_sm_hww_warp/global_esr* GPUT19X-75 Change-Id: I86c7ded32b2b69214e047e6de67a1745f2cef6f3 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1474860 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 53 ++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8cc1cfde..1291759f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1106,23 +1106,18 @@ static void gr_gv11b_set_tex_in_dbg(struct gk20a *g, u32 data) gk20a_writel(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r(), val); } - static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data) { - u32 val; - gk20a_dbg_fn(""); - if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) - val = 0; - else - val = 0xffffffff; - - /* setup sm warp esr report masks */ - gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(), val); - - /* setup sm global esr report mask */ - gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), val); + if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) { + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(), + 0); + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), + 0); + } else { + g->ops.gr.set_hww_esr_report_mask(g); + } } static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, @@ -2712,6 +2707,37 @@ static int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) return 0; } + +static void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) +{ + + /* clear hww */ + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_r(), 0xffffffff); + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_r(), 0xffffffff); + + /* setup sm warp esr report masks */ + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(), + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_error_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_api_stack_error_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_pc_wrap_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_pc_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_pc_overflow_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_reg_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_param_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_reg_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()); + + /* setup sm global esr report mask. vat_alarm_report is not enabled */ + gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), + gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_multiple_warp_errors_report_f()); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2779,4 +2805,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.update_sm_error_state = gv11b_gr_update_sm_error_state; gops->gr.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode; gops->gr.record_sm_error_state = gv11b_gr_record_sm_error_state; + gops->gr.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask; } -- cgit v1.2.2 From a4439aee3a47ed9b966e5864a8e18a2bb13a9bb7 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 30 Jun 2017 11:19:24 -0700 Subject: gpu: nvgpu: gv11b: read from unicast register For updating broadcast register, read the current value from unicast register. JIRA GPUT19x-75 Change-Id: Ib4a3791304cabe77cf46543d4bec0312c6fcc0fb Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1511735 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 1291759f..5b1b41ce 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2448,7 +2448,7 @@ static int gv11b_gr_sm_trigger_suspend(struct gk20a *g) * the same state in dbg_control0. */ dbgr_control0 = - gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_control0_r()); + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); /* broadcast write */ -- cgit v1.2.2 From 690d560e65af8096bc391064631c74a3dd14fa89 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 30 Jun 2017 11:45:16 -0700 Subject: gpu: nvgpu: gv11b: Use sm dbgr bpt and warp mask 0/1 Instead of assuming mask_0 and mask_1 as consecutive registers, use mask_1 and mask_0 registers for reading/writing sm dbgr warp and bpt mask registers JIRA GPUT19X-75 Change-Id: Ib6843d13828d899d4bd3f12bdf6701325ea760fd Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1511736 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 5b1b41ce..6ac0c44f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1389,8 +1389,11 @@ static int gr_gv11b_dump_gr_sm_regs(struct gk20a *g, "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_STATUS0: 0x%x\n", gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_status0_r())); gk20a_debug_output(o, - "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK: 0x%x\n", - gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_r())); + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_0: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r())); + gk20a_debug_output(o, + "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_1: 0x%x\n", + gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r())); sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { @@ -2483,26 +2486,26 @@ static void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) /* 64 bit read */ warps_valid = (u64)gk20a_readl(g, - gr_gpc0_tpc0_sm0_warp_valid_mask_r() + - offset + 4) << 32; + gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() + + offset) << 32; warps_valid |= gk20a_readl(g, - gr_gpc0_tpc0_sm0_warp_valid_mask_r() + + gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() + offset); /* 64 bit read */ warps_paused = (u64)gk20a_readl(g, - gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_r() + - offset + 4) << 32; + gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() + + offset) << 32; warps_paused |= gk20a_readl(g, - gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_r() + + gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() + offset); /* 64 bit read */ warps_trapped = (u64)gk20a_readl(g, - gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_r() + - offset + 4) << 32; + gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() + + offset) << 32; warps_trapped |= gk20a_readl(g, - gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_r() + + gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() + offset); w_state[sm_id].valid_warps[0] = warps_valid; -- cgit v1.2.2 From edf87a42c3b18c2db8df07705ac26addc8c84df2 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sun, 2 Jul 2017 11:18:26 -0700 Subject: gpu: nvgpu: gv11b: init sm_debugger_attached gr ops Support gv11b sm register address changes. JIRA GPUT19X-75 Change-Id: I22562789ef7c064fa36c2d382224af6dc6a806c7 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512206 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 6ac0c44f..8e1f3af3 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2741,6 +2741,26 @@ static void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_multiple_warp_errors_report_f()); } +static bool gv11b_gr_sm_debugger_attached(struct gk20a *g) +{ + u32 debugger_mode; + u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); + + /* check if sm debugger is attached. + * assumption: all SMs will have debug mode enabled/disabled + * uniformly. + */ + debugger_mode = + gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_v(dbgr_control0); + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "SM Debugger Mode: %d", debugger_mode); + if (debugger_mode == + gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_on_v()) + return true; + + return false; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2809,4 +2829,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode; gops->gr.record_sm_error_state = gv11b_gr_record_sm_error_state; gops->gr.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask; + gops->gr.sm_debugger_attached = gv11b_gr_sm_debugger_attached; } -- cgit v1.2.2 From 30dcb317077a6a1651b856be56f714fb6016e21f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sun, 2 Jul 2017 15:31:03 -0700 Subject: gpu: nvgpu: gv11b: init suspend_single_sm gr ops Take care of SM register address changes. JIRA GPUT19X-75 Change-Id: I7fa68dbef014fb07a3656b2816d7d8d538a7cf52 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512207 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 40 +++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8e1f3af3..4f17a33c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1780,7 +1780,8 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, "CILP: STOP_TRIGGER from " "gpc %d tpc %d sm %d", gpc, tpc, sm); - gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true); + g->ops.gr.suspend_single_sm(g, + gpc, tpc, sm, global_mask, true); } /* reset the HWW errors after locking down */ @@ -2761,6 +2762,42 @@ static bool gv11b_gr_sm_debugger_attached(struct gk20a *g) return false; } +static void gv11b_gr_suspend_single_sm(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, + u32 global_esr_mask, bool check_errors) +{ + int err; + u32 dbgr_control0; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + /* if an SM debugger isn't attached, skip suspend */ + if (!g->ops.gr.sm_debugger_attached(g)) { + nvgpu_err(g, + "SM debugger not attached, skipping suspend!"); + return; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "suspending gpc:%d, tpc:%d, sm%d", gpc, tpc, sm); + + /* assert stop trigger. */ + dbgr_control0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); + gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, + dbgr_control0); + + err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, + global_esr_mask, check_errors); + if (err) { + nvgpu_err(g, + "SuspendSm failed"); + return; + } +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2830,4 +2867,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.record_sm_error_state = gv11b_gr_record_sm_error_state; gops->gr.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask; gops->gr.sm_debugger_attached = gv11b_gr_sm_debugger_attached; + gops->gr.suspend_single_sm = gv11b_gr_suspend_single_sm; } -- cgit v1.2.2 From cc698f6b9896342a4ab18a677fa05efd5d677bb6 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sun, 2 Jul 2017 16:20:19 -0700 Subject: gpu: nvgpu: gv11b: init suspend_all_sms gr ops This is required to support multiple SM and t19x sm register address changes JIRA GPUT19X-75 Change-Id: I46b7d58ed02710339aa27cd9db999aa60fbd4dd9 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512208 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 49 +++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 4f17a33c..84ff1335 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1772,7 +1772,8 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, "CILP: Broadcasting STOP_TRIGGER from " "gpc %d tpc %d sm %d", gpc, tpc, sm); - gk20a_suspend_all_sms(g, global_mask, false); + g->ops.gr.suspend_all_sms(g, + global_mask, false); gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch); } else { @@ -2798,6 +2799,51 @@ static void gv11b_gr_suspend_single_sm(struct gk20a *g, } } +static void gv11b_gr_suspend_all_sms(struct gk20a *g, + u32 global_esr_mask, bool check_errors) +{ + struct gr_gk20a *gr = &g->gr; + u32 gpc, tpc, sm; + int err; + u32 dbgr_control0; + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + + /* if an SM debugger isn't attached, skip suspend */ + if (!g->ops.gr.sm_debugger_attached(g)) { + nvgpu_err(g, + "SM debugger not attached, skipping suspend!"); + return; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "suspending all sms"); + + /* assert stop trigger. uniformity assumption: all SMs will have + * the same state in dbg_control0. + */ + dbgr_control0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); + dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); + + /* broadcast write */ + gk20a_writel(g, + gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); + + for (gpc = 0; gpc < gr->gpc_count; gpc++) { + for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { + for (sm = 0; sm < sm_per_tpc; sm++) { + err = gk20a_gr_wait_for_sm_lock_down(g, + gpc, tpc, + global_esr_mask, check_errors); + if (err) { + nvgpu_err(g, + "SuspendAllSms failed"); + return; + } + } + } + } +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2868,4 +2914,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask; gops->gr.sm_debugger_attached = gv11b_gr_sm_debugger_attached; gops->gr.suspend_single_sm = gv11b_gr_suspend_single_sm; + gops->gr.suspend_all_sms = gv11b_gr_suspend_all_sms; } -- cgit v1.2.2 From 7185dcdbf8292bbdfceae49702107f0661397d21 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sun, 2 Jul 2017 16:52:18 -0700 Subject: gpu: nvgpu: gv11b: init resume_single_sm gr ops This is required to support multiple SM and t19x sm register address changes JIRA GPUT19X-75 Change-Id: I0ebbfdad73d6212997a21f9240f5d4bc2f28ab2f Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512209 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 71 +++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 84ff1335..f7d5eb62 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1815,7 +1815,7 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: resume for gpc %d tpc %d sm %d", gpc, tpc, sm); - gk20a_resume_single_sm(g, gpc, tpc); + g->ops.gr.resume_single_sm(g, gpc, tpc, sm); *ignore_debugger = true; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, @@ -2844,6 +2844,74 @@ static void gv11b_gr_suspend_all_sms(struct gk20a *g, } } +static void gv11b_gr_resume_single_sm(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm) +{ + u32 dbgr_control0, dbgr_status0; + u32 offset; + /* + * The following requires some clarification. Despite the fact that both + * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their + * names, only one is actually a trigger, and that is the STOP_TRIGGER. + * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to + * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0 + * (_DISABLE) as well. + + * Advice from the arch group: Disable the stop trigger first, as a + * separate operation, in order to ensure that the trigger has taken + * effect, before enabling the run trigger. + */ + + offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "resuming gpc:%d, tpc:%d, sm%d", gpc, tpc, sm); + dbgr_control0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + dbgr_status0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "before stop trigger disable: " + "dbgr_control0 = 0x%x dbgr_status0: 0x%x", + dbgr_control0, dbgr_status0); + + /*De-assert stop trigger */ + dbgr_control0 = set_field(dbgr_control0, + gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_m(), + gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_disable_f()); + gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + + offset, dbgr_control0); + + dbgr_control0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + dbgr_status0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "before run trigger: " + "dbgr_control0 = 0x%x dbgr_status0: 0x%x", + dbgr_control0, dbgr_status0); + /* Run trigger */ + dbgr_control0 |= + gr_gpc0_tpc0_sm0_dbgr_control0_run_trigger_task_f(); + gk20a_writel(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + + offset, dbgr_control0); + + dbgr_control0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + dbgr_status0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); + /* run trigger is not sticky bit. SM clears it immediately */ + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "after run trigger: " + "dbgr_control0 = 0x%x dbgr_status0: 0x%x", + dbgr_control0, dbgr_status0); + +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2915,4 +2983,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.sm_debugger_attached = gv11b_gr_sm_debugger_attached; gops->gr.suspend_single_sm = gv11b_gr_suspend_single_sm; gops->gr.suspend_all_sms = gv11b_gr_suspend_all_sms; + gops->gr.resume_single_sm = gv11b_gr_resume_single_sm; } -- cgit v1.2.2 From 8fb191aec01d0b4c83fbfe12310ed7827d87a873 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sun, 2 Jul 2017 17:11:46 -0700 Subject: gpu: nvgpu: gv11b: init resume_all_sms gr ops This is required to support multiple SM and t19x sm register address changes JIRA GPUT19X-75 Change-Id: Ia5c0a3d1dead9c6094ca28716c06929dd3461814 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512210 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f7d5eb62..0f331293 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2912,6 +2912,71 @@ static void gv11b_gr_resume_single_sm(struct gk20a *g, } +static void gv11b_gr_resume_all_sms(struct gk20a *g) +{ + u32 dbgr_control0, dbgr_status0; + /* + * The following requires some clarification. Despite the fact that both + * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their + * names, only one is actually a trigger, and that is the STOP_TRIGGER. + * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to + * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0 + * (_DISABLE) as well. + + * Advice from the arch group: Disable the stop trigger first, as a + * separate operation, in order to ensure that the trigger has taken + * effect, before enabling the run trigger. + */ + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "resuming all sms"); + + /* Read from unicast registers */ + dbgr_control0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); + dbgr_status0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r()); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "before stop trigger disable: " + "dbgr_control0 = 0x%x dbgr_status0: 0x%x", + dbgr_control0, dbgr_status0); + + dbgr_control0 = set_field(dbgr_control0, + gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_m(), + gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_disable_f()); + /* Write to broadcast registers */ + gk20a_writel(g, + gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); + + /* Read from unicast registers */ + dbgr_control0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); + dbgr_status0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r()); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "before run trigger: " + "dbgr_control0 = 0x%x dbgr_status0: 0x%x", + dbgr_control0, dbgr_status0); + /* Run trigger */ + dbgr_control0 |= + gr_gpc0_tpc0_sm0_dbgr_control0_run_trigger_task_f(); + /* Write to broadcast registers */ + gk20a_writel(g, + gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); + + /* Read from unicast registers */ + dbgr_control0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); + dbgr_status0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r()); + /* run trigger is not sticky bit. SM clears it immediately */ + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "after run trigger: " + "dbgr_control0 = 0x%x dbgr_status0: 0x%x", + dbgr_control0, dbgr_status0); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -2984,4 +3049,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.suspend_single_sm = gv11b_gr_suspend_single_sm; gops->gr.suspend_all_sms = gv11b_gr_suspend_all_sms; gops->gr.resume_single_sm = gv11b_gr_resume_single_sm; + gops->gr.resume_all_sms = gv11b_gr_resume_all_sms; } -- cgit v1.2.2 From f5b5099cf87a8d3219301c97189d581cafeaac4e Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 16:28:19 -0700 Subject: gpu: nvgpu: gv11b: init resume_from_pause gr ops JIRA GPUT19X-75 Change-Id: Ie741bf50c771f21de3bf762ca506a36276f38437 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512211 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 0f331293..8094ea43 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2977,6 +2977,22 @@ static void gv11b_gr_resume_all_sms(struct gk20a *g) dbgr_control0, dbgr_status0); } +static int gv11b_gr_resume_from_pause(struct gk20a *g) +{ + int err = 0; + + /* Clear the pause mask to tell the GPU we want to resume everyone */ + gk20a_writel(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r(), 0); + + /* explicitly re-enable forwarding of SM interrupts upon any resume */ + gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), + gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); + + g->ops.gr.resume_all_sms(g); + + return err; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3050,4 +3066,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.suspend_all_sms = gv11b_gr_suspend_all_sms; gops->gr.resume_single_sm = gv11b_gr_resume_single_sm; gops->gr.resume_all_sms = gv11b_gr_resume_all_sms; + gops->gr.resume_from_pause = gv11b_gr_resume_from_pause; } -- cgit v1.2.2 From 37fa5128ec260bc9ebb2e902ac2dfe9baead4656 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 16:34:10 -0700 Subject: gpu: nvgpu: gv11b: init get_sm_hww_warp_esr gr ops get sm hww_warp_esr reg val JIRA GPUT19X-75 Change-Id: I4ed04045e947c417291b7b1e2fc81bbe51f0b48c Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1512212 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8094ea43..2b083203 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1928,16 +1928,6 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, return ret; } -static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr) -{ - if (!(hww_warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m())) - hww_warp_esr = set_field(hww_warp_esr, - gr_gpc0_tpc0_sm0_hww_warp_esr_addr_error_type_m(), - gr_gpc0_tpc0_sm0_hww_warp_esr_addr_error_type_none_f()); - - return hww_warp_esr; -} - static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) { u32 map; @@ -2993,6 +2983,18 @@ static int gv11b_gr_resume_from_pause(struct gk20a *g) return err; } +static u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm) +{ + u32 offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + u32 hww_warp_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset); + return hww_warp_esr; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3030,7 +3032,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; gops->gr.enable_exceptions = gr_gv11b_enable_exceptions; gops->gr.enable_hww_exceptions = gr_gv11b_enable_hww_exceptions; - gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; gops->gr.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception; gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; @@ -3067,4 +3068,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.resume_single_sm = gv11b_gr_resume_single_sm; gops->gr.resume_all_sms = gv11b_gr_resume_all_sms; gops->gr.resume_from_pause = gv11b_gr_resume_from_pause; + gops->gr.get_sm_hww_warp_esr = gv11b_gr_get_sm_hww_warp_esr; } -- cgit v1.2.2 From d250adf53e836ea982193ad8a3e08084411d81a2 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 20:47:25 -0700 Subject: gpu: nvgpu: gv11b: init gr ops get_sm_hww_global_esr Required for multiple SM support and sm register address changes JIRA GPUT19X-75 Change-Id: I3fb62a935636f3df050ed125ebe57d8469069591 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1514035 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 2b083203..0fcbd0d7 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1786,7 +1786,8 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, } /* reset the HWW errors after locking down */ - global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); + global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g, + gpc, tpc, sm); gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: HWWs cleared for " @@ -2995,6 +2996,19 @@ static u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g, return hww_warp_esr; } +static u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm) +{ + u32 offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + u32 hww_global_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); + + return hww_global_esr; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3069,4 +3083,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.resume_all_sms = gv11b_gr_resume_all_sms; gops->gr.resume_from_pause = gv11b_gr_resume_from_pause; gops->gr.get_sm_hww_warp_esr = gv11b_gr_get_sm_hww_warp_esr; + gops->gr.get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr; } -- cgit v1.2.2 From bdf5207583a3c8f3bd4d844548d443f1454d26f5 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 22 Jun 2017 10:37:09 -0700 Subject: gpu: nvgpu: gv11b: init get_sm_no_lock_down_hww_global_esr_mask gr ops Support SM register changes JIRA GPUT19X-75 Change-Id: I5d5e702d681398a8a8181d912e8c691c15e265d9 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1514036 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 0fcbd0d7..a726d058 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3009,6 +3009,23 @@ static u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g, return hww_global_esr; } +static u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g) +{ + /* + * These three interrupts don't require locking down the SM. They can + * be handled by usermode clients as they aren't fatal. Additionally, + * usermode clients may wish to allow some warps to execute while others + * are at breakpoints, as opposed to fatal errors where all warps should + * halt. + */ + u32 global_esr_mask = + gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f() | + gr_gpc0_tpc0_sm0_hww_global_esr_bpt_pause_pending_f() | + gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f(); + + return global_esr_mask; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3084,4 +3101,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.resume_from_pause = gv11b_gr_resume_from_pause; gops->gr.get_sm_hww_warp_esr = gv11b_gr_get_sm_hww_warp_esr; gops->gr.get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr; + gops->gr.get_sm_no_lock_down_hww_global_esr_mask = + gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask; } -- cgit v1.2.2 From 013ead1587b47c296b0328ef076b0ee4628160aa Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 22 Jun 2017 11:43:36 -0700 Subject: gpu: nvgpu: gv11b: init sm lock_down gr ops init lock_down_sm and wait_for_sm_lock_down gr ops Required to support multiple SM and register address changes JIRA GPUT19X-75 Change-Id: I992d1c0c5a1f559dc57bcef50025fa42913d6761 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1514037 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 158 ++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a726d058..63107cfc 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2781,7 +2781,7 @@ static void gv11b_gr_suspend_single_sm(struct gk20a *g, gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); - err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, + err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask, check_errors); if (err) { nvgpu_err(g, @@ -2822,8 +2822,8 @@ static void gv11b_gr_suspend_all_sms(struct gk20a *g, for (gpc = 0; gpc < gr->gpc_count; gpc++) { for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { for (sm = 0; sm < sm_per_tpc; sm++) { - err = gk20a_gr_wait_for_sm_lock_down(g, - gpc, tpc, + err = g->ops.gr.wait_for_sm_lock_down(g, + gpc, tpc, sm, global_esr_mask, check_errors); if (err) { nvgpu_err(g, @@ -3026,6 +3026,156 @@ static u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g) return global_esr_mask; } +static void gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(struct gk20a *g, + u32 offset, bool timeout) +{ + u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; + u32 dbgr_control0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + u32 dbgr_status0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); + /* 64 bit read */ + warps_valid = + (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() + + offset) << 32; + warps_valid |= gk20a_readl(g, + gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() + offset); + + /* 64 bit read */ + warps_paused = + (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() + + offset) << 32; + warps_paused |= gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() + offset); + + /* 64 bit read */ + warps_trapped = + (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() + + offset) << 32; + warps_trapped |= gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() + offset); + if (timeout) + nvgpu_err(g, + "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx " + "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", + dbgr_status0, dbgr_control0, warps_valid, + warps_paused, warps_trapped); + else + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx " + "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", + dbgr_status0, dbgr_control0, warps_valid, + warps_paused, warps_trapped); +} + +static int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, + u32 global_esr_mask, bool check_errors) +{ + bool locked_down; + bool no_error_pending; + u32 delay = GR_IDLE_CHECK_DEFAULT; + bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g); + u32 dbgr_status0 = 0; + u32 warp_esr, global_esr; + struct nvgpu_timeout timeout; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc); + gv11b_gr_sm_offset(g, sm); + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: locking down SM%d", gpc, tpc, sm); + + nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + + /* wait for the sm to lock down */ + do { + global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm); + dbgr_status0 = gk20a_readl(g, + gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); + + warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm); + + locked_down = + (gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_v(dbgr_status0) == + gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_true_v()); + no_error_pending = + check_errors && + (gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr) == + gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v()) && + ((global_esr & ~global_esr_mask) == 0); + + if (locked_down) { + /* + * if SM reports locked down, it means that SM is idle and + * trapped and also that one of the these conditions are true + * 1) sm is nonempty and all valid warps are paused + * 2) sm is empty and held in trapped state due to stop trigger + * 3) sm is nonempty and some warps are not paused, but are + * instead held at RTT due to an "active" stop trigger + * Check for Paused warp mask != Valid + * warp mask after SM reports it is locked down in order to + * distinguish case 1 from case 3. When case 3 is detected, + * it implies a misprogrammed trap handler code, as all warps + * in the handler must promise to BPT.PAUSE instead of RTT + * whenever SR64 read in trap mode indicates stop trigger + * is asserted. + */ + gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, + offset, false); + } + + if (locked_down || no_error_pending) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: locked down SM%d", gpc, tpc, sm); + return 0; + } + + /* if an mmu fault is pending and mmu debug mode is not + * enabled, the sm will never lock down. + */ + if (!mmu_debug_mode_enabled && + (g->ops.mm.mmu_fault_pending(g))) { + nvgpu_err(g, + "GPC%d TPC%d: mmu fault pending," + " SM%d will never lock down!", gpc, tpc, sm); + return -EFAULT; + } + + nvgpu_usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } while (!nvgpu_timeout_expired(&timeout)); + + nvgpu_err(g, "GPC%d TPC%d: timed out while trying to " + "lock down SM%d", gpc, tpc, sm); + gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, offset, true); + + return -ETIMEDOUT; +} + +static int gv11b_gr_lock_down_sm(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, + bool check_errors) +{ + u32 dbgr_control0; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm); + + /* assert stop trigger */ + dbgr_control0 = + gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); + dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); + gk20a_writel(g, + gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); + + return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask, + check_errors); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3103,4 +3253,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr; gops->gr.get_sm_no_lock_down_hww_global_esr_mask = gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask; + gops->gr.lock_down_sm = gv11b_gr_lock_down_sm; + gops->gr.wait_for_sm_lock_down = gv11b_gr_wait_for_sm_lock_down; } -- cgit v1.2.2 From 99aeb5ae3b5606ffbeb168d25bec4adc541e1236 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 22 Jun 2017 13:39:45 -0700 Subject: gpu: nvgpu: gv11b: init clear_sm_hww gr ops Required for multiple SM support and SM register address changes JIRA GPUT19X-75 Change-Id: I552bae890a416dc4a430b907641b5b3d09b638c7 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1514038 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 63107cfc..9da270ac 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1788,7 +1788,8 @@ static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, /* reset the HWW errors after locking down */ global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm); - gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); + g->ops.gr.clear_sm_hww(g, + gpc, tpc, sm, global_esr_copy); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: HWWs cleared for " "gpc %d tpc %d sm %d", @@ -3176,6 +3177,26 @@ static int gv11b_gr_lock_down_sm(struct gk20a *g, check_errors); } +static void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, + u32 global_esr) +{ + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + + gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, + global_esr); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "Cleared HWW global esr, current reg val: 0x%x", + gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + + offset)); + + gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "Cleared HWW warp esr, current reg val: 0x%x", + gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() + + offset)); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3255,4 +3276,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask; gops->gr.lock_down_sm = gv11b_gr_lock_down_sm; gops->gr.wait_for_sm_lock_down = gv11b_gr_wait_for_sm_lock_down; + gops->gr.clear_sm_hww = gv11b_gr_clear_sm_hww; } -- cgit v1.2.2 From 1f09340f82af277722deaa0e04e98a88f1e41044 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 22 Jun 2017 14:06:18 -0700 Subject: gpu: nvgpu: gv11b: init handle_sm_exception gr ops gr_gk20a_handle_sm_exception is initialized to handle_sm_exception and new gr ops handle_tpc_sm_ecc_exception is initialized to gr_gv11b_handle_tpc_sm_ecc_exception to handle sm ecc errors per tpc. JIRA GPUT19X-75 JIRA GPUT19X-109 Change-Id: Iefa95b185b9eed23f9f54e231405fcd9fd83ccc0 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1514039 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9da270ac..d61506c2 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -589,9 +589,10 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, } -static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, - u32 sm, bool *post_event, struct channel_gk20a *fault_ch, - u32 *hww_global_esr) +static int gr_gv11b_handle_tpc_sm_ecc_exception(struct gk20a *g, + u32 gpc, u32 tpc, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr) { int ret = 0; @@ -3228,7 +3229,7 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; gops->gr.get_access_map = gr_gv11b_get_access_map; - gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; + gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception; gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; @@ -3277,4 +3278,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.lock_down_sm = gv11b_gr_lock_down_sm; gops->gr.wait_for_sm_lock_down = gv11b_gr_wait_for_sm_lock_down; gops->gr.clear_sm_hww = gv11b_gr_clear_sm_hww; + gops->gr.handle_tpc_sm_ecc_exception = + gr_gv11b_handle_tpc_sm_ecc_exception; } -- cgit v1.2.2 From cc940da42f34568d6327ee20653725d11b1a3258 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sun, 9 Jul 2017 14:00:24 -0700 Subject: gpu: nvgpu: gv11b: enable and handle mpc exception Implement gr ops to handle MPC exception triggered per TPC JIRA GPUT19X-69 Change-Id: Ia92b1d51ad896116b25d71e07ed26f1539475be8 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1515915 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 40 +++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d61506c2..2c3b0820 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -885,7 +885,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) u32 tpc_mask; gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), - gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); + gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f() | + gr_gpcs_tpcs_tpccs_tpc_exception_en_mpc_enabled_f()); tpc_mask = gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); @@ -2973,13 +2974,16 @@ static void gv11b_gr_resume_all_sms(struct gk20a *g) static int gv11b_gr_resume_from_pause(struct gk20a *g) { int err = 0; + u32 reg_val; /* Clear the pause mask to tell the GPU we want to resume everyone */ gk20a_writel(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r(), 0); /* explicitly re-enable forwarding of SM interrupts upon any resume */ - gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), - gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); + reg_val = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r()); + reg_val |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(); + + gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), reg_val); g->ops.gr.resume_all_sms(g); @@ -3198,6 +3202,34 @@ static void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, offset)); } +static int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g, + u32 gpc, u32 tpc, bool *post_event) +{ + u32 esr; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); + u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() + + offset); + + if (!(tpc_exception & gr_gpc0_tpc0_tpccs_tpc_exception_mpc_m())) + return 0; + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d MPC exception", gpc, tpc); + + esr = gk20a_readl(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset); + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr 0x%08x", esr); + + esr = gk20a_readl(g, gr_gpc0_tpc0_mpc_hww_esr_info_r() + offset); + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "mpc hww esr info: veid 0x%08x", + gr_gpc0_tpc0_mpc_hww_esr_info_veid_v(esr)); + + gk20a_writel(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset, + gr_gpc0_tpc0_mpc_hww_esr_reset_trigger_f()); + + return 0; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3280,4 +3312,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.clear_sm_hww = gv11b_gr_clear_sm_hww; gops->gr.handle_tpc_sm_ecc_exception = gr_gv11b_handle_tpc_sm_ecc_exception; + gops->gr.handle_tpc_mpc_exception = + gr_gv11b_handle_tpc_mpc_exception; } -- cgit v1.2.2 From df022d27ddf2f66bff04170bb454fa26db8d51b1 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Sat, 8 Jul 2017 17:20:26 -0700 Subject: gpu: nvgpu: gv11b: support SET_SKEDCHECK s/w methods Support sw method NVC397_SET_SKEDCHECK and NVC3C0_SET_SKEDCHECK data fields are data:0 SKEDCHECK_18_DISABLE data:1 SKEDCHECK_18_ENABLE Bug 200315442 Change-Id: I0652434ab0b4d6e49dab94be329072861e99c38c Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1515772 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 40 ++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 2c3b0820..6f3b5f0f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -299,8 +299,6 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, static void gr_gv11b_enable_hww_exceptions(struct gk20a *g) { - u32 val; - /* enable exceptions */ gk20a_writel(g, gr_fe_hww_esr_r(), gr_fe_hww_esr_en_enable_f() | @@ -308,14 +306,6 @@ static void gr_gv11b_enable_hww_exceptions(struct gk20a *g) gk20a_writel(g, gr_memfmt_hww_esr_r(), gr_memfmt_hww_esr_en_enable_f() | gr_memfmt_hww_esr_reset_active_f()); - /* WAR for 200315442 */ - val = gk20a_readl(g, gr_sked_hww_esr_en_r()); - val = set_field(val, - gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(), - gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_disabled_f() - ); - nvgpu_log_info(g, "sked_hww_esr_en = 0x%x", val); - gk20a_writel(g, gr_sked_hww_esr_en_r(), val); } static void gr_gv11b_enable_exceptions(struct gk20a *g) @@ -1108,6 +1098,30 @@ static void gr_gv11b_set_tex_in_dbg(struct gk20a *g, u32 data) gk20a_writel(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r(), val); } +static void gr_gv11b_set_skedcheck(struct gk20a *g, u32 data) +{ + u32 reg_val; + + reg_val = gk20a_readl(g, gr_sked_hww_esr_en_r()); + + if ((data & NVC397_SET_SKEDCHECK_18_MASK) == + NVC397_SET_SKEDCHECK_18_DISABLE) { + reg_val = set_field(reg_val, + gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(), + gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_disabled_f() + ); + } else if ((data & NVC397_SET_SKEDCHECK_18_MASK) == + NVC397_SET_SKEDCHECK_18_ENABLE) { + reg_val = set_field(reg_val, + gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(), + gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_enabled_f() + ); + } + nvgpu_log_info(g, "sked_hww_esr_en = 0x%x", reg_val); + gk20a_writel(g, gr_sked_hww_esr_en_r(), reg_val); + +} + static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data) { gk20a_dbg_fn(""); @@ -1132,6 +1146,9 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, case NVC0C0_SET_SHADER_EXCEPTIONS: gv11b_gr_set_shader_exceptions(g, data); break; + case NVC3C0_SET_SKEDCHECK: + gr_gv11b_set_skedcheck(g, data); + break; default: goto fail; } @@ -1157,6 +1174,9 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, case NVC397_SET_TEX_IN_DBG: gr_gv11b_set_tex_in_dbg(g, data); break; + case NVC397_SET_SKEDCHECK: + gr_gv11b_set_skedcheck(g, data); + break; default: goto fail; } -- cgit v1.2.2 From 2272cedfbacf271a0faacfd054240fea3027423d Mon Sep 17 00:00:00 2001 From: Lauri Peltonen Date: Mon, 10 Jul 2017 15:06:31 +0300 Subject: gpu: nvgu: Support SET_BES_CROP_DEBUG3 sw method The new SET_BES_CROP_DEBUG3 sw method is used to flip two fields in the NV_PGRAPH_PRI_BES_CROP_DEBUG3 register. The sw method is used by the user space driver to disable enough ROP optimizations to maintain ZBC state of target tiles. Bug 1942454 Change-Id: I3109fb4120674b15db4998693d0aa65bf0c3c8b5 Signed-off-by: Lauri Peltonen Reviewed-on: https://git-master.nvidia.com/r/1516205 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Vijayakumar Subbu GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 6f3b5f0f..f44c60b0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1177,6 +1177,9 @@ static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, case NVC397_SET_SKEDCHECK: gr_gv11b_set_skedcheck(g, data); break; + case NVC397_SET_BES_CROP_DEBUG3: + g->ops.gr.set_bes_crop_debug3(g, data); + break; default: goto fail; } -- cgit v1.2.2 From 4df5427c15e28a3bd131a4bdaed413de2a9a5e99 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 6 Jun 2017 22:56:11 -0700 Subject: gpu: nvgpu: gv11b: init perf related gr ops Implement gv11b specific perf gr ops JIRA GPUT19X-49 Bug 200311674 Change-Id: Ia65fe84df6e38e25f87d2c1b21c04b518c334d42 Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1497402 GVS: Gerrit_Virtual_Submit Reviewed-by: Tushar Kashalikar Tested-by: Tushar Kashalikar Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 86 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f44c60b0..eefbdf3b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3253,6 +3253,87 @@ static int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g, return 0; } +static const u32 _num_ovr_perf_regs = 20; +static u32 _ovr_perf_regs[20] = { 0, }; + +static void gv11b_gr_init_ovr_sm_dsm_perf(void) +{ + if (_ovr_perf_regs[0] != 0) + return; + + _ovr_perf_regs[0] = gr_egpc0_etpc0_sm_dsm_perf_counter_control_sel0_r(); + _ovr_perf_regs[1] = gr_egpc0_etpc0_sm_dsm_perf_counter_control_sel1_r(); + _ovr_perf_regs[2] = gr_egpc0_etpc0_sm_dsm_perf_counter_control0_r(); + _ovr_perf_regs[3] = gr_egpc0_etpc0_sm_dsm_perf_counter_control1_r(); + _ovr_perf_regs[4] = gr_egpc0_etpc0_sm_dsm_perf_counter_control2_r(); + _ovr_perf_regs[5] = gr_egpc0_etpc0_sm_dsm_perf_counter_control3_r(); + _ovr_perf_regs[6] = gr_egpc0_etpc0_sm_dsm_perf_counter_control4_r(); + _ovr_perf_regs[7] = gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r(); + _ovr_perf_regs[8] = gr_egpc0_etpc0_sm_dsm_perf_counter0_control_r(); + _ovr_perf_regs[9] = gr_egpc0_etpc0_sm_dsm_perf_counter1_control_r(); + _ovr_perf_regs[10] = gr_egpc0_etpc0_sm_dsm_perf_counter2_control_r(); + _ovr_perf_regs[11] = gr_egpc0_etpc0_sm_dsm_perf_counter3_control_r(); + _ovr_perf_regs[12] = gr_egpc0_etpc0_sm_dsm_perf_counter4_control_r(); + _ovr_perf_regs[13] = gr_egpc0_etpc0_sm_dsm_perf_counter5_control_r(); + _ovr_perf_regs[14] = gr_egpc0_etpc0_sm_dsm_perf_counter6_control_r(); + _ovr_perf_regs[15] = gr_egpc0_etpc0_sm_dsm_perf_counter7_control_r(); + + _ovr_perf_regs[16] = gr_egpc0_etpc0_sm0_dsm_perf_counter4_r(); + _ovr_perf_regs[17] = gr_egpc0_etpc0_sm0_dsm_perf_counter5_r(); + _ovr_perf_regs[18] = gr_egpc0_etpc0_sm0_dsm_perf_counter6_r(); + _ovr_perf_regs[19] = gr_egpc0_etpc0_sm0_dsm_perf_counter7_r(); +} + +/* Following are the blocks of registers that the ucode + * stores in the extended region. + */ +/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ +static const u32 _num_sm_dsm_perf_regs; +/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ +static const u32 _num_sm_dsm_perf_ctrl_regs = 2; +static u32 *_sm_dsm_perf_regs; +static u32 _sm_dsm_perf_ctrl_regs[2]; + +static void gv11b_gr_init_sm_dsm_reg_info(void) +{ + if (_sm_dsm_perf_ctrl_regs[0] != 0) + return; + + _sm_dsm_perf_ctrl_regs[0] = + gr_egpc0_etpc0_sm_dsm_perf_counter_control0_r(); + _sm_dsm_perf_ctrl_regs[1] = + gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r(); +} + +static void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride) +{ + *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; + *sm_dsm_perf_regs = _sm_dsm_perf_regs; + *perf_register_stride = + ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); +} + +static void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_ctrl_regs, + u32 **sm_dsm_perf_ctrl_regs, + u32 *ctrl_register_stride) +{ + *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; + *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; + *ctrl_register_stride = + ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); +} + +static void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, + u32 **ovr_perf_regs) +{ + *num_ovr_perf_regs = _num_ovr_perf_regs; + *ovr_perf_regs = _ovr_perf_regs; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3337,4 +3418,9 @@ void gv11b_init_gr(struct gpu_ops *gops) gr_gv11b_handle_tpc_sm_ecc_exception; gops->gr.handle_tpc_mpc_exception = gr_gv11b_handle_tpc_mpc_exception; + gops->gr.init_ovr_sm_dsm_perf = gv11b_gr_init_ovr_sm_dsm_perf; + gops->gr.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info; + gops->gr.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs; + gops->gr.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs; + gops->gr.get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs; } -- cgit v1.2.2 From c7d48710b09dcd64c55f7ae4f264499c2bbc866c Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Mon, 17 Jul 2017 14:45:35 -0700 Subject: gpu: nvgpu: gv11b: init access_smpc_reg gr ops This is needed to support t19x smpc register addresses JIRA GPUT19X-49 Bug 200311674 Change-Id: I67146d997d96eeca4344ed0fb4cabbc216461c6c Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1508543 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index eefbdf3b..00bfde6b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -30,6 +30,7 @@ #include "gk20a/gr_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" #include "gk20a/regops_gk20a.h" +#include "gk20a/gr_pri_gk20a.h" #include "gm20b/gr_gm20b.h" @@ -3334,6 +3335,49 @@ static void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, *ovr_perf_regs = _ovr_perf_regs; } +static void gv11b_gr_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) +{ + u32 reg_val; + u32 quad_ctrl; + u32 half_ctrl; + u32 tpc, gpc; + u32 gpc_tpc_addr; + u32 gpc_tpc_stride; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); + + gpc = pri_get_gpc_num(g, offset); + gpc_tpc_addr = pri_gpccs_addr_mask(offset); + tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr); + + quad_ctrl = quad & 0x1; /* first bit tells us quad */ + half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ + + gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride; + gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; + + /* read from unicast reg */ + reg_val = gk20a_readl(g, gpc_tpc_addr); + reg_val = set_field(reg_val, + gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(), + gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl)); + + /* write to broadcast reg */ + gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), reg_val); + + gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride; + reg_val = gk20a_readl(g, gpc_tpc_addr); + reg_val = set_field(reg_val, + gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(), + gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl)); + + /* write to broadcast reg */ + gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), reg_val); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3423,4 +3467,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs; gops->gr.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs; gops->gr.get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs; + gops->gr.access_smpc_reg = gv11b_gr_access_smpc_reg; } -- cgit v1.2.2 From 7ab28a41842df2045533b0836233db3563cd531f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 6 Jun 2017 12:04:46 -0700 Subject: gpu: nvgpu: gv11b: support egpc and etpc context regoptype - implement is_egpc_addr, is_etpc_addr and get_egpc_etpc_num gr ops - implement decode and create priv addr for egpc/etpc JIRA GPUT19X-49 Bug 200311674 Signed-off-by: Seema Khowala Change-Id: Ia0cef51b2064df28460711185cd90b60aac03e4f Reviewed-on: https://git-master.nvidia.com/r/1522450 GVS: Gerrit_Virtual_Submit Reviewed-by: Tushar Kashalikar Tested-by: Tushar Kashalikar Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 196 +++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 00bfde6b..fab2ae9a 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3378,6 +3378,196 @@ static void gv11b_gr_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), reg_val); } +static bool pri_is_egpc_addr_shared(struct gk20a *g, u32 addr) +{ + u32 egpc_shared_base = EGPC_PRI_SHARED_BASE; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + + return (addr >= egpc_shared_base) && + (addr < egpc_shared_base + gpc_stride); +} + +static bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) +{ + u32 egpc_base = g->ops.gr.get_egpc_base(g); + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + + return ((addr >= egpc_base) && + (addr < egpc_base + num_gpcs * gpc_stride)) || + pri_is_egpc_addr_shared(g, addr); +} + +static bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) +{ + u32 egpc_addr = 0; + + if (g->ops.gr.is_egpc_addr(g, addr)) { + egpc_addr = pri_gpccs_addr_mask(addr); + if (g->ops.gr.is_tpc_addr(g, egpc_addr)) + return true; + } + + return false; +} + +static u32 pri_get_egpc_num(struct gk20a *g, u32 addr) +{ + u32 i, start; + u32 egpc_base = g->ops.gr.get_egpc_base(g); + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + + for (i = 0; i < num_gpcs; i++) { + start = egpc_base + (i * gpc_stride); + if ((addr >= start) && (addr < (start + gpc_stride))) + return i; + } + return 0; +} + +static u32 pri_egpc_addr(struct gk20a *g, u32 addr, u32 gpc) +{ + u32 egpc_base = g->ops.gr.get_egpc_base(g); + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + + return egpc_base + (gpc * gpc_stride) + addr; +} + +static u32 pri_etpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc) +{ + u32 egpc_base = g->ops.gr.get_egpc_base(g); + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + + return egpc_base + (gpc * gpc_stride) + + tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) + + addr; +} + +static void gv11b_gr_get_egpc_etpc_num(struct gk20a *g, u32 addr, + u32 *egpc_num, u32 *etpc_num) +{ + u32 egpc_addr = 0; + + *egpc_num = pri_get_egpc_num(g, addr); + egpc_addr = pri_gpccs_addr_mask(addr); + *etpc_num = g->ops.gr.get_tpc_num(g, egpc_addr); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "egpc_num = %d etpc_num = %d", *egpc_num, *etpc_num); +} + +static int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, + u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) +{ + u32 gpc_addr; + + if (g->ops.gr.is_egpc_addr(g, addr)) { + nvgpu_log_info(g, "addr=0x%x is egpc", addr); + + *addr_type = CTXSW_ADDR_TYPE_EGPC; + gpc_addr = pri_gpccs_addr_mask(addr); + if (pri_is_egpc_addr_shared(g, addr)) { + *broadcast_flags |= PRI_BROADCAST_FLAGS_EGPC; + *gpc_num = 0; + nvgpu_log_info(g, "shared egpc"); + } else { + *gpc_num = pri_get_egpc_num(g, addr); + nvgpu_log_info(g, "gpc=0x%x", *gpc_num); + } + if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { + nvgpu_log_info(g, "addr=0x%x is etpc", addr); + *addr_type = CTXSW_ADDR_TYPE_ETPC; + if (pri_is_tpc_addr_shared(g, gpc_addr)) { + *broadcast_flags |= PRI_BROADCAST_FLAGS_ETPC; + *tpc_num = 0; + nvgpu_log_info(g, "shared etpc"); + } else { + *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); + nvgpu_log_info(g, "tpc=0x%x", *tpc_num); + } + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "addr_type = %d, broadcast_flags = %#08x", + *addr_type, *broadcast_flags); + return 0; + } + return -EINVAL; +} + +static void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, + u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) +{ + u32 gpc_num, tpc_num; + + nvgpu_log_info(g, "addr=0x%x", addr); + + /* The GPC/TPC unicast registers are included in the compressed PRI + * tables. Convert a GPC/TPC broadcast address to unicast addresses so + * that we can look up the offsets. + */ + if (broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) { + nvgpu_log_info(g, "broadcast flags egpc"); + for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + + if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { + nvgpu_log_info(g, "broadcast flags etpc"); + for (tpc_num = 0; + tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num++) { + priv_addr_table[*t] = + pri_etpc_addr(g, + pri_tpccs_addr_mask(addr), + gpc_num, tpc_num); + nvgpu_log_info(g, + "priv_addr_table[%d]:%#08x", + *t, priv_addr_table[*t]); + (*t)++; + } + } else { + priv_addr_table[*t] = + pri_egpc_addr(g, + pri_gpccs_addr_mask(addr), + gpc_num); + nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", + *t, priv_addr_table[*t]); + (*t)++; + } + } + } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) { + if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { + nvgpu_log_info(g, "broadcast flags etpc but not egpc"); + for (tpc_num = 0; + tpc_num < g->gr.gpc_tpc_count[gpc]; + tpc_num++) { + priv_addr_table[*t] = + pri_etpc_addr(g, + pri_tpccs_addr_mask(addr), + gpc, tpc_num); + nvgpu_log_info(g, + "priv_addr_table[%d]:%#08x", + *t, priv_addr_table[*t]); + (*t)++; + } + } else { + priv_addr_table[*t] = addr; + nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", + *t, priv_addr_table[*t]); + (*t)++; + } + } +} + +static u32 gv11b_gr_get_egpc_base(struct gk20a *g) +{ + return EGPC_PRI_BASE; +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3468,4 +3658,10 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs; gops->gr.get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs; gops->gr.access_smpc_reg = gv11b_gr_access_smpc_reg; + gops->gr.decode_egpc_addr = gv11b_gr_decode_egpc_addr; + gops->gr.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table; + gops->gr.get_egpc_etpc_num = gv11b_gr_get_egpc_etpc_num; + gops->gr.get_egpc_base = gv11b_gr_get_egpc_base; + gops->gr.is_egpc_addr = gv11b_gr_pri_is_egpc_addr; + gops->gr.is_etpc_addr = gv11b_gr_pri_is_etpc_addr; } -- cgit v1.2.2 From 8b571de4563b83352097474c5f7157ea6623a97f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 11 Jul 2017 21:53:00 -0700 Subject: gpu: nvgpu: gv11b: implement init_gpc_mmu - Created HAL to configure gpc mmu unit for gv11b. - Earlier chips needs writes to NV_PGRAPH_PRI_GPCS_MMU_NUM_ACTIVE_LTCS register to know supported number of LTCS by reading NUM_ACTIVE_LTCS but gv11b support auto update from fuse upon reset, so skipped LTCS update for GPCS & skipping helps to fix compression failure issue. Bug 1950234 Change-Id: I628af7d1399e4fe3126895e3a703a19147f7a12f Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1517733 Reviewed-by: Mahantesh Kumbar Tested-by: Mahantesh Kumbar Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index fab2ae9a..3450bf05 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -45,10 +45,10 @@ #include #include #include -#include #include #include #include +#include static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { @@ -3568,6 +3568,39 @@ static u32 gv11b_gr_get_egpc_base(struct gk20a *g) return EGPC_PRI_BASE; } +static void gr_gv11b_init_gpc_mmu(struct gk20a *g) +{ + u32 temp; + + nvgpu_log_info(g, "initialize gpc mmu"); + + if (!g->ops.privsecurity) { + /* Bypass MMU check for non-secure boot. For + * secure-boot,this register write has no-effect */ + gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); + } + temp = gk20a_readl(g, fb_mmu_ctrl_r()); + temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | + gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | + gr_gpcs_pri_mmu_ctrl_vol_fault_m() | + gr_gpcs_pri_mmu_ctrl_comp_fault_m() | + gr_gpcs_pri_mmu_ctrl_miss_gran_m() | + gr_gpcs_pri_mmu_ctrl_cache_mode_m() | + gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | + gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | + gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); + gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); + gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); + gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); + + gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), + gk20a_readl(g, fb_mmu_debug_ctrl_r())); + gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), + gk20a_readl(g, fb_mmu_debug_wr_r())); + gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), + gk20a_readl(g, fb_mmu_debug_rd_r())); +} + void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); @@ -3664,4 +3697,5 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.get_egpc_base = gv11b_gr_get_egpc_base; gops->gr.is_egpc_addr = gv11b_gr_pri_is_egpc_addr; gops->gr.is_etpc_addr = gv11b_gr_pri_is_etpc_addr; + gops->gr.init_gpc_mmu = gr_gv11b_init_gpc_mmu; } -- cgit v1.2.2 From 2b98e1308d49b9c941d8fa6fc87f67108d6d9370 Mon Sep 17 00:00:00 2001 From: Sunny He Date: Mon, 24 Jul 2017 12:19:53 -0700 Subject: gpu: nvgpu: gv11b: Remove privsecurity from gpu_ops Replace privsecurity boolean flag in gpu_ops with entry in common flag system. The new common flag is NVGPU_SEC_PRIVSECURITY Jira NVGPU-74 Change-Id: I4c11e3a89a76abe137cf61b69ad0fbcd665554b7 Signed-off-by: Sunny He Reviewed-on: https://git-master.nvidia.com/r/1525714 Reviewed-by: Alex Waterman GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3450bf05..1ba0c523 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" @@ -3574,7 +3575,7 @@ static void gr_gv11b_init_gpc_mmu(struct gk20a *g) nvgpu_log_info(g, "initialize gpc mmu"); - if (!g->ops.privsecurity) { + if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { /* Bypass MMU check for non-secure boot. For * secure-boot,this register write has no-effect */ gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); @@ -3601,9 +3602,11 @@ static void gr_gv11b_init_gpc_mmu(struct gk20a *g) gk20a_readl(g, fb_mmu_debug_rd_r())); } -void gv11b_init_gr(struct gpu_ops *gops) +void gv11b_init_gr(struct gk20a *g) { - gp10b_init_gr(gops); + struct gpu_ops *gops = &g->ops; + + gp10b_init_gr(g); gops->gr.init_preemption_state = NULL; gops->gr.init_fs_state = gr_gv11b_init_fs_state; gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; -- cgit v1.2.2 From b859393ffee2b9e29fd3a18f6fc78594fed7eda1 Mon Sep 17 00:00:00 2001 From: Sandarbh Jain Date: Thu, 27 Jul 2017 00:43:37 -0700 Subject: gpu: nvgpu: gv11b: fix no_of_sm Number of sm is being reported incorrectly. This is because we are not taking into account that each TPC have 2 sm. Bug 1951026 Change-Id: I7c666aa2a0470a14aad29ab1a80ae9d23958a743 Signed-off-by: Sandarbh Jain Reviewed-on: https://git-master.nvidia.com/r/1527771 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Alexander Lewkowicz Tested-by: Alexander Lewkowicz Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 1ba0c523..850315f7 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2113,24 +2113,29 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g) static void gr_gv11b_init_sm_id_table(struct gk20a *g) { - u32 gpc, tpc; + u32 gpc, tpc, sm; u32 sm_id = 0; + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); /* TODO populate smids based on power efficiency */ for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { - if (tpc < g->gr.gpc_tpc_count[gpc]) { + if (tpc >= g->gr.gpc_tpc_count[gpc]) + continue; + + for (sm = 0; sm < sm_per_tpc; sm++) { g->gr.sm_to_cluster[sm_id].tpc_index = tpc; g->gr.sm_to_cluster[sm_id].gpc_index = gpc; g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2; g->gr.sm_to_cluster[sm_id].global_tpc_index = - sm_id; + tpc; sm_id++; } } } g->gr.no_of_sm = sm_id; + nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm); } static void gr_gv11b_program_sm_id_numbering(struct gk20a *g, @@ -2156,7 +2161,7 @@ static int gr_gv11b_load_smid_config(struct gk20a *g) u32 *tpc_sm_id; u32 i, j; u32 tpc_index, gpc_index, tpc_id; - u32 sms_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); @@ -2174,7 +2179,7 @@ static int gr_gv11b_load_smid_config(struct gk20a *g) u32 bits; tpc_id = (i << 2) + j; - sm_id = tpc_id * sms_per_tpc; + sm_id = tpc_id * sm_per_tpc; if (sm_id >= g->gr.no_of_sm) break; -- cgit v1.2.2 From 3197a918d5052c71ad854f6b22fdb35bfe7cebe2 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Thu, 10 Aug 2017 16:34:16 -0700 Subject: gpu: nvgpu: gv11b: add max_subctx_count to g->fifo.t19x - For better performance. It used to read register every time referencing max_subctx_count. - Avoid reading registers for vgpu. Jira VFND-3797 Change-Id: Id6e6b15a0d9a035795e8a9a2c6bb63524c5eb544 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1537009 Reviewed-by: svccoveritychecker Reviewed-by: Seshendra Gadagottu GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 850315f7..b95152eb 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2021,7 +2021,7 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) u32 j; u32 num_subctx, err = 0; - num_subctx = gv11b_get_max_subctx_count(g); + num_subctx = g->fifo.t19x.max_subctx_count; for (j = 0; j < num_subctx; j++) { -- cgit v1.2.2 From bacbc7331223b0f80b36d7eff43009e5d7ac9aae Mon Sep 17 00:00:00 2001 From: Alexander Lewkowicz Date: Fri, 4 Aug 2017 15:51:42 -0700 Subject: gpu: nvgpu: gv11b: Fix computation of offset When reading NV_PGRAPH_PRI_GPC0_TPC1_SM1_DBGR_STATUS0, we are not reading the expected value. The offset of the sm is not added to the PRI. JIRA GPUT19X-75 bug: ? Change-Id: I2eeb24505e928044c3a3331fa5f493a3f118a3c8 Signed-off-by: Alexander Lewkowicz Reviewed-on: https://git-master.nvidia.com/r/1533953 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index b95152eb..2b0e8be7 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3116,7 +3116,7 @@ static int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, u32 warp_esr, global_esr; struct nvgpu_timeout timeout; u32 offset = gk20a_gr_gpc_offset(g, gpc) + - gk20a_gr_tpc_offset(g, tpc); + gk20a_gr_tpc_offset(g, tpc) + gv11b_gr_sm_offset(g, sm); gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, -- cgit v1.2.2 From 866165749a0b7b2e6b219bb26bffd69d790d97c5 Mon Sep 17 00:00:00 2001 From: Sunny He Date: Thu, 17 Aug 2017 16:10:42 -0700 Subject: gpu: nvgpu: Reorg gr HAL initialization Reorganize HAL initialization to remove inheritance and construct the gpu_ops struct at compile time. This patch only covers the gr sub-module of the gpu_ops struct. Perform HAL function assignments in hal_gxxxx.c through the population of a chip-specific copy of gpu_ops. Jira NVGPU-74 Change-Id: I8feaa95a9830969221f7ac70a5ef61cdf25094c3 Signed-off-by: Sunny He Reviewed-on: https://git-master.nvidia.com/r/1542988 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 259 +++++++++++-------------------------- 1 file changed, 79 insertions(+), 180 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 2b0e8be7..078272d1 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -51,7 +51,7 @@ #include #include -static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) +bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -80,7 +80,7 @@ static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) return valid; } -static bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) +bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -97,7 +97,7 @@ static bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) return valid; } -static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) +bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -299,7 +299,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } -static void gr_gv11b_enable_hww_exceptions(struct gk20a *g) +void gr_gv11b_enable_hww_exceptions(struct gk20a *g) { /* enable exceptions */ gk20a_writel(g, gr_fe_hww_esr_r(), @@ -310,7 +310,7 @@ static void gr_gv11b_enable_hww_exceptions(struct gk20a *g) gr_memfmt_hww_esr_reset_active_f()); } -static void gr_gv11b_enable_exceptions(struct gk20a *g) +void gr_gv11b_enable_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; u32 reg_val; @@ -581,7 +581,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, } -static int gr_gv11b_handle_tpc_sm_ecc_exception(struct gk20a *g, +int gr_gv11b_handle_tpc_sm_ecc_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) @@ -606,7 +606,7 @@ static int gr_gv11b_handle_tpc_sm_ecc_exception(struct gk20a *g, return ret; } -static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, +int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) { @@ -852,7 +852,7 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, return ret; } -static int gr_gv11b_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, +int gr_gv11b_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, u32 gpc_exception) { if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpcmmu_m()) @@ -861,7 +861,7 @@ static int gr_gv11b_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, return 0; } -static int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, +int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, u32 gpc_exception) { if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpccs_m()) @@ -871,7 +871,7 @@ static int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, return 0; } -static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) +void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; u32 tpc_mask; @@ -889,13 +889,13 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1))); } -static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, +int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event) { return 0; } -static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, +int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, struct zbc_query_params *query_params) { u32 index = query_params->index_size; @@ -911,7 +911,7 @@ static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, return 0; } -static bool gr_gv11b_add_zbc_type_s(struct gk20a *g, struct gr_gk20a *gr, +bool gr_gv11b_add_zbc_type_s(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *zbc_val, int *ret_val) { struct zbc_s_table *s_tbl; @@ -950,7 +950,7 @@ static bool gr_gv11b_add_zbc_type_s(struct gk20a *g, struct gr_gk20a *gr, return added; } -static int gr_gv11b_add_zbc_stencil(struct gk20a *g, struct gr_gk20a *gr, +int gr_gv11b_add_zbc_stencil(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *stencil_val, u32 index) { u32 zbc_s; @@ -974,7 +974,7 @@ static int gr_gv11b_add_zbc_stencil(struct gk20a *g, struct gr_gk20a *gr, return 0; } -static int gr_gv11b_load_stencil_default_tbl(struct gk20a *g, +int gr_gv11b_load_stencil_default_tbl(struct gk20a *g, struct gr_gk20a *gr) { struct zbc_entry zbc_val; @@ -1005,7 +1005,7 @@ static int gr_gv11b_load_stencil_default_tbl(struct gk20a *g, return 0; } -static int gr_gv11b_load_stencil_tbl(struct gk20a *g, struct gr_gk20a *gr) +int gr_gv11b_load_stencil_tbl(struct gk20a *g, struct gr_gk20a *gr) { int ret; u32 i; @@ -1025,12 +1025,12 @@ static int gr_gv11b_load_stencil_tbl(struct gk20a *g, struct gr_gk20a *gr) return 0; } -static u32 gr_gv11b_pagepool_default_size(struct gk20a *g) +u32 gr_gv11b_pagepool_default_size(struct gk20a *g) { return gr_scc_pagepool_total_pages_hwmax_value_v(); } -static int gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g) +int gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; int size; @@ -1138,7 +1138,7 @@ static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data) } } -static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, +int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { gk20a_dbg_fn(""); @@ -1192,7 +1192,7 @@ fail: return -EINVAL; } -static void gr_gv11b_bundle_cb_defaults(struct gk20a *g) +void gr_gv11b_bundle_cb_defaults(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -1204,7 +1204,7 @@ static void gr_gv11b_bundle_cb_defaults(struct gk20a *g) gr_pd_ab_dist_cfg2_token_limit_init_v(); } -static void gr_gv11b_cb_size_default(struct gk20a *g) +void gr_gv11b_cb_size_default(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -1215,7 +1215,7 @@ static void gr_gv11b_cb_size_default(struct gk20a *g) gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); } -static void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) +void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; @@ -1261,7 +1261,7 @@ static void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) } } -static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) +void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; @@ -1442,7 +1442,7 @@ static int gr_gv11b_dump_gr_sm_regs(struct gk20a *g, return 0; } -static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, +int gr_gv11b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { struct gr_gk20a *gr = &g->gr; @@ -1595,7 +1595,7 @@ static bool gr_activity_empty_or_preempted(u32 val) return true; } -static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, +int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, u32 expect_delay) { u32 delay = expect_delay; @@ -1647,7 +1647,7 @@ static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, return -EAGAIN; } -static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, +void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, bool patch) { @@ -1676,7 +1676,7 @@ static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, } -static void gr_gv11b_init_cyclestats(struct gk20a *g) +void gr_gv11b_init_cyclestats(struct gk20a *g) { #if defined(CONFIG_GK20A_CYCLE_STATS) g->gpu_characteristics.flags |= @@ -1688,7 +1688,7 @@ static void gr_gv11b_init_cyclestats(struct gk20a *g) #endif } -static void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0); @@ -1706,7 +1706,7 @@ static void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0); } -static void gr_gv11b_get_access_map(struct gk20a *g, +void gr_gv11b_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries) { static u32 wl_addr_gv11b[] = { @@ -1751,7 +1751,7 @@ static void gr_gv11b_get_access_map(struct gk20a *g, * * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing */ -static int gr_gv11b_pre_process_sm_exception(struct gk20a *g, +int gr_gv11b_pre_process_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, bool sm_debugger_attached, struct channel_gk20a *fault_ch, bool *early_exit, bool *ignore_debugger) @@ -1940,7 +1940,7 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) } } -static int gr_gv11b_handle_fecs_error(struct gk20a *g, +int gr_gv11b_handle_fecs_error(struct gk20a *g, struct channel_gk20a *__ch, struct gr_gk20a_isr_data *isr_data) { @@ -1957,7 +1957,7 @@ static int gr_gv11b_handle_fecs_error(struct gk20a *g, return ret; } -static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) +int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) { u32 map; u32 i, j, mapregs; @@ -2034,7 +2034,7 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) } } -static int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) +int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) { struct av_list_gk20a *sw_veid_bundle_init = &g->gr.ctx_vars.sw_veid_bundle_init; @@ -2099,7 +2099,7 @@ void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, } } -static void gr_gv11b_detect_sm_arch(struct gk20a *g) +void gr_gv11b_detect_sm_arch(struct gk20a *g) { u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); @@ -2111,7 +2111,7 @@ static void gr_gv11b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } -static void gr_gv11b_init_sm_id_table(struct gk20a *g) +void gr_gv11b_init_sm_id_table(struct gk20a *g) { u32 gpc, tpc, sm; u32 sm_id = 0; @@ -2138,7 +2138,7 @@ static void gr_gv11b_init_sm_id_table(struct gk20a *g) nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm); } -static void gr_gv11b_program_sm_id_numbering(struct gk20a *g, +void gr_gv11b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid) { u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -2156,7 +2156,7 @@ static void gr_gv11b_program_sm_id_numbering(struct gk20a *g, gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); } -static int gr_gv11b_load_smid_config(struct gk20a *g) +int gr_gv11b_load_smid_config(struct gk20a *g) { u32 *tpc_sm_id; u32 i, j; @@ -2204,7 +2204,7 @@ static int gr_gv11b_load_smid_config(struct gk20a *g) return 0; } -static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) +int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) { u32 addr_lo; u32 addr_hi; @@ -2239,7 +2239,7 @@ static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) -static int gr_gv11b_commit_global_timeslice(struct gk20a *g, +int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch) { struct channel_ctx_gk20a *ch_ctx = NULL; @@ -2292,7 +2292,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g, return 0; } -static void gv11b_restore_context_header(struct gk20a *g, +void gv11b_restore_context_header(struct gk20a *g, struct nvgpu_mem *ctxheader) { u32 va_lo, va_hi; @@ -2314,7 +2314,7 @@ static void gv11b_restore_context_header(struct gk20a *g, nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_num_save_ops_o(), 0); } -static void gr_gv11b_write_zcull_ptr(struct gk20a *g, +void gr_gv11b_write_zcull_ptr(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { u32 va_lo, va_hi; @@ -2329,7 +2329,7 @@ static void gr_gv11b_write_zcull_ptr(struct gk20a *g, } -static void gr_gv11b_write_pm_ptr(struct gk20a *g, +void gr_gv11b_write_pm_ptr(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { u32 va_lo, va_hi; @@ -2343,7 +2343,7 @@ static void gr_gv11b_write_pm_ptr(struct gk20a *g, ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); } -static void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) +void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) { u32 gate_ctrl; @@ -2375,7 +2375,7 @@ static void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); } -static void gr_gv11b_load_tpc_mask(struct gk20a *g) +void gr_gv11b_load_tpc_mask(struct gk20a *g) { u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 gpc, pes, val; @@ -2405,7 +2405,7 @@ static void gr_gv11b_load_tpc_mask(struct gk20a *g) } -static void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, +void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { u32 addr_lo, addr_hi; @@ -2426,7 +2426,7 @@ static void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, } -static int gr_gv11b_init_fs_state(struct gk20a *g) +int gr_gv11b_init_fs_state(struct gk20a *g) { u32 data; @@ -2451,7 +2451,7 @@ static int gr_gv11b_init_fs_state(struct gk20a *g) return gr_gm20b_init_fs_state(g); } -static void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, +void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel) { u32 reg_val; @@ -2469,7 +2469,7 @@ static void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, "esr_sm_sel bitmask: 0x%x", *esr_sm_sel); } -static int gv11b_gr_sm_trigger_suspend(struct gk20a *g) +int gv11b_gr_sm_trigger_suspend(struct gk20a *g) { u32 dbgr_control0; @@ -2491,7 +2491,7 @@ static int gv11b_gr_sm_trigger_suspend(struct gk20a *g) return 0; } -static void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) +void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) { /* Check if we have at least one valid warp * get paused state on maxwell @@ -2559,7 +2559,7 @@ static void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) } } -static int gv11b_gr_update_sm_error_state(struct gk20a *g, +int gv11b_gr_update_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id, struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) { @@ -2638,7 +2638,7 @@ fail: return err; } -static int gv11b_gr_set_sm_debug_mode(struct gk20a *g, +int gv11b_gr_set_sm_debug_mode(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable) { struct nvgpu_dbg_gpu_reg_op *ops; @@ -2697,7 +2697,7 @@ static int gv11b_gr_set_sm_debug_mode(struct gk20a *g, return err; } -static int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) +int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) { int sm_id; struct gr_gk20a *gr = &g->gr; @@ -2737,7 +2737,7 @@ static int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) return 0; } -static void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) +void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) { /* clear hww */ @@ -2767,7 +2767,7 @@ static void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_multiple_warp_errors_report_f()); } -static bool gv11b_gr_sm_debugger_attached(struct gk20a *g) +bool gv11b_gr_sm_debugger_attached(struct gk20a *g) { u32 debugger_mode; u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); @@ -2787,7 +2787,7 @@ static bool gv11b_gr_sm_debugger_attached(struct gk20a *g) return false; } -static void gv11b_gr_suspend_single_sm(struct gk20a *g, +void gv11b_gr_suspend_single_sm(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors) { @@ -2823,7 +2823,7 @@ static void gv11b_gr_suspend_single_sm(struct gk20a *g, } } -static void gv11b_gr_suspend_all_sms(struct gk20a *g, +void gv11b_gr_suspend_all_sms(struct gk20a *g, u32 global_esr_mask, bool check_errors) { struct gr_gk20a *gr = &g->gr; @@ -2868,7 +2868,7 @@ static void gv11b_gr_suspend_all_sms(struct gk20a *g, } } -static void gv11b_gr_resume_single_sm(struct gk20a *g, +void gv11b_gr_resume_single_sm(struct gk20a *g, u32 gpc, u32 tpc, u32 sm) { u32 dbgr_control0, dbgr_status0; @@ -2936,7 +2936,7 @@ static void gv11b_gr_resume_single_sm(struct gk20a *g, } -static void gv11b_gr_resume_all_sms(struct gk20a *g) +void gv11b_gr_resume_all_sms(struct gk20a *g) { u32 dbgr_control0, dbgr_status0; /* @@ -3001,7 +3001,7 @@ static void gv11b_gr_resume_all_sms(struct gk20a *g) dbgr_control0, dbgr_status0); } -static int gv11b_gr_resume_from_pause(struct gk20a *g) +int gv11b_gr_resume_from_pause(struct gk20a *g) { int err = 0; u32 reg_val; @@ -3020,7 +3020,7 @@ static int gv11b_gr_resume_from_pause(struct gk20a *g) return err; } -static u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g, +u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm) { u32 offset = gk20a_gr_gpc_offset(g, gpc) + @@ -3032,7 +3032,7 @@ static u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g, return hww_warp_esr; } -static u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g, +u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm) { u32 offset = gk20a_gr_gpc_offset(g, gpc) + @@ -3045,7 +3045,7 @@ static u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g, return hww_global_esr; } -static u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g) +u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g) { /* * These three interrupts don't require locking down the SM. They can @@ -3104,7 +3104,7 @@ static void gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(struct gk20a *g, warps_paused, warps_trapped); } -static int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, +int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors) { @@ -3190,7 +3190,7 @@ static int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, return -ETIMEDOUT; } -static int gv11b_gr_lock_down_sm(struct gk20a *g, +int gv11b_gr_lock_down_sm(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors) { @@ -3212,7 +3212,7 @@ static int gv11b_gr_lock_down_sm(struct gk20a *g, check_errors); } -static void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, +void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr) { u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + @@ -3232,7 +3232,7 @@ static void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, offset)); } -static int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g, +int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event) { u32 esr; @@ -3263,7 +3263,7 @@ static int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g, static const u32 _num_ovr_perf_regs = 20; static u32 _ovr_perf_regs[20] = { 0, }; -static void gv11b_gr_init_ovr_sm_dsm_perf(void) +void gv11b_gr_init_ovr_sm_dsm_perf(void) { if (_ovr_perf_regs[0] != 0) return; @@ -3301,7 +3301,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 2; static u32 *_sm_dsm_perf_regs; static u32 _sm_dsm_perf_ctrl_regs[2]; -static void gv11b_gr_init_sm_dsm_reg_info(void) +void gv11b_gr_init_sm_dsm_reg_info(void) { if (_sm_dsm_perf_ctrl_regs[0] != 0) return; @@ -3312,7 +3312,7 @@ static void gv11b_gr_init_sm_dsm_reg_info(void) gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r(); } -static void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g, +void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g, u32 *num_sm_dsm_perf_regs, u32 **sm_dsm_perf_regs, u32 *perf_register_stride) @@ -3323,7 +3323,7 @@ static void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g, ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); } -static void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, +void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride) @@ -3334,14 +3334,14 @@ static void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); } -static void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, +void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, u32 **ovr_perf_regs) { *num_ovr_perf_regs = _num_ovr_perf_regs; *ovr_perf_regs = _ovr_perf_regs; } -static void gv11b_gr_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) +void gv11b_gr_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) { u32 reg_val; u32 quad_ctrl; @@ -3393,7 +3393,7 @@ static bool pri_is_egpc_addr_shared(struct gk20a *g, u32 addr) (addr < egpc_shared_base + gpc_stride); } -static bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) +bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) { u32 egpc_base = g->ops.gr.get_egpc_base(g); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -3404,7 +3404,7 @@ static bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) pri_is_egpc_addr_shared(g, addr); } -static bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) +bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) { u32 egpc_addr = 0; @@ -3454,7 +3454,7 @@ static u32 pri_etpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc) addr; } -static void gv11b_gr_get_egpc_etpc_num(struct gk20a *g, u32 addr, +void gv11b_gr_get_egpc_etpc_num(struct gk20a *g, u32 addr, u32 *egpc_num, u32 *etpc_num) { u32 egpc_addr = 0; @@ -3467,7 +3467,7 @@ static void gv11b_gr_get_egpc_etpc_num(struct gk20a *g, u32 addr, "egpc_num = %d etpc_num = %d", *egpc_num, *etpc_num); } -static int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, +int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) { u32 gpc_addr; @@ -3506,7 +3506,7 @@ static int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, return -EINVAL; } -static void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, +void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) { u32 gpc_num, tpc_num; @@ -3569,12 +3569,12 @@ static void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, } } -static u32 gv11b_gr_get_egpc_base(struct gk20a *g) +u32 gv11b_gr_get_egpc_base(struct gk20a *g) { return EGPC_PRI_BASE; } -static void gr_gv11b_init_gpc_mmu(struct gk20a *g) +void gr_gv11b_init_gpc_mmu(struct gk20a *g) { u32 temp; @@ -3606,104 +3606,3 @@ static void gr_gv11b_init_gpc_mmu(struct gk20a *g) gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), gk20a_readl(g, fb_mmu_debug_rd_r())); } - -void gv11b_init_gr(struct gk20a *g) -{ - struct gpu_ops *gops = &g->ops; - - gp10b_init_gr(g); - gops->gr.init_preemption_state = NULL; - gops->gr.init_fs_state = gr_gv11b_init_fs_state; - gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; - gops->gr.is_valid_class = gr_gv11b_is_valid_class; - gops->gr.is_valid_gfx_class = gr_gv11b_is_valid_gfx_class; - gops->gr.is_valid_compute_class = gr_gv11b_is_valid_compute_class; - gops->gr.set_preemption_buffer_va = gr_gv11b_set_preemption_buffer_va; - gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; - gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; - gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; - gops->gr.zbc_s_query_table = gr_gv11b_zbc_s_query_table; - gops->gr.add_zbc_type_s = gr_gv11b_add_zbc_type_s; - gops->gr.pagepool_default_size = gr_gv11b_pagepool_default_size; - gops->gr.calc_global_ctx_buffer_size = - gr_gv11b_calc_global_ctx_buffer_size; - gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; - gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; - gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; - gops->gr.cb_size_default = gr_gv11b_cb_size_default; - gops->gr.set_alpha_circular_buffer_size = - gr_gv11b_set_alpha_circular_buffer_size; - gops->gr.set_circular_buffer_size = - gr_gv11b_set_circular_buffer_size; - gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; - gops->gr.wait_empty = gr_gv11b_wait_empty; - gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; - gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; - gops->gr.get_access_map = gr_gv11b_get_access_map; - gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; - gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception; - gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; - gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; - gops->gr.enable_exceptions = gr_gv11b_enable_exceptions; - gops->gr.enable_hww_exceptions = gr_gv11b_enable_hww_exceptions; - gops->gr.pre_process_sm_exception = - gr_gv11b_pre_process_sm_exception; - gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; - gops->gr.create_gr_sysfs = gr_gv11b_create_sysfs; - gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; - gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; - gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; - gops->gr.commit_global_timeslice = gr_gv11b_commit_global_timeslice; - gops->gr.init_sm_id_table = gr_gv11b_init_sm_id_table; - gops->gr.load_smid_config = gr_gv11b_load_smid_config; - gops->gr.program_sm_id_numbering = - gr_gv11b_program_sm_id_numbering; - gops->gr.commit_inst = gr_gv11b_commit_inst; - gops->gr.restore_context_header = gv11b_restore_context_header; - gops->gr.write_zcull_ptr = gr_gv11b_write_zcull_ptr; - gops->gr.write_pm_ptr = gr_gv11b_write_pm_ptr; - gops->gr.init_elcg_mode = gr_gv11b_init_elcg_mode; - gops->gr.load_tpc_mask = gr_gv11b_load_tpc_mask; - gops->gr.handle_gpc_gpccs_exception = - gr_gv11b_handle_gpc_gpccs_exception; - gops->gr.set_czf_bypass = NULL; - gops->gr.handle_gpc_gpcmmu_exception = - gr_gv11b_handle_gpc_gpcmmu_exception; - gops->gr.get_esr_sm_sel = gv11b_gr_get_esr_sm_sel; - gops->gr.trigger_suspend = gv11b_gr_sm_trigger_suspend; - gops->gr.bpt_reg_info = gv11b_gr_bpt_reg_info; - gops->gr.update_sm_error_state = gv11b_gr_update_sm_error_state; - gops->gr.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode; - gops->gr.record_sm_error_state = gv11b_gr_record_sm_error_state; - gops->gr.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask; - gops->gr.sm_debugger_attached = gv11b_gr_sm_debugger_attached; - gops->gr.suspend_single_sm = gv11b_gr_suspend_single_sm; - gops->gr.suspend_all_sms = gv11b_gr_suspend_all_sms; - gops->gr.resume_single_sm = gv11b_gr_resume_single_sm; - gops->gr.resume_all_sms = gv11b_gr_resume_all_sms; - gops->gr.resume_from_pause = gv11b_gr_resume_from_pause; - gops->gr.get_sm_hww_warp_esr = gv11b_gr_get_sm_hww_warp_esr; - gops->gr.get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr; - gops->gr.get_sm_no_lock_down_hww_global_esr_mask = - gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask; - gops->gr.lock_down_sm = gv11b_gr_lock_down_sm; - gops->gr.wait_for_sm_lock_down = gv11b_gr_wait_for_sm_lock_down; - gops->gr.clear_sm_hww = gv11b_gr_clear_sm_hww; - gops->gr.handle_tpc_sm_ecc_exception = - gr_gv11b_handle_tpc_sm_ecc_exception; - gops->gr.handle_tpc_mpc_exception = - gr_gv11b_handle_tpc_mpc_exception; - gops->gr.init_ovr_sm_dsm_perf = gv11b_gr_init_ovr_sm_dsm_perf; - gops->gr.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info; - gops->gr.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs; - gops->gr.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs; - gops->gr.get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs; - gops->gr.access_smpc_reg = gv11b_gr_access_smpc_reg; - gops->gr.decode_egpc_addr = gv11b_gr_decode_egpc_addr; - gops->gr.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table; - gops->gr.get_egpc_etpc_num = gv11b_gr_get_egpc_etpc_num; - gops->gr.get_egpc_base = gv11b_gr_get_egpc_base; - gops->gr.is_egpc_addr = gv11b_gr_pri_is_egpc_addr; - gops->gr.is_etpc_addr = gv11b_gr_pri_is_etpc_addr; - gops->gr.init_gpc_mmu = gr_gv11b_init_gpc_mmu; -} -- cgit v1.2.2 From cedb24c7a09292ec8deee9ee17e1d7defeff0241 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 18 Sep 2017 10:46:06 -0700 Subject: gpu: nvgpu: gv11b: correct wl reg offset Corrected whitelist register address offset for gr_pri_gpcs_tpcs_sm_disp_ctrl. This offset value is changed for gv11b from gp10b. With wrong offset value, gl tests are generating "unhandled fecs error interrupt 0x00000002 for channel xxx". Bug 1958308 Change-Id: Iabfbb20ea1ee4ca8567d0cda940fa1e8cbff1bac Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/1562615 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity Reviewed-by: svccoveritychecker Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 078272d1..ee49a8f2 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1740,7 +1740,7 @@ void gr_gv11b_get_access_map(struct gk20a *g, 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ 0x419e84, /* gr_pri_gpcs_tpcs_sms_dbgr_control0 */ - 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ + 0x419ba4, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ }; *whitelist = wl_addr_gv11b; -- cgit v1.2.2 From 0420dd383e9aca0c764ad88979f88292603198d9 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 18 Sep 2017 11:17:15 -0700 Subject: gpu: nvgpu: gv11b: Initialize ctxsw hdr counters Initlize following context switch header counters for gv11b: ctxsw_prog_main_image_num_save_ops ctxsw_prog_main_image_num_restore_ops ctxsw_prog_main_image_num_wfi_save_ops ctxsw_prog_main_image_num_cta_save_ops ctxsw_prog_main_image_num_gfxp_save_ops ctxsw_prog_main_image_num_cilp_save_ops Reused gp10b gr hal function gr_gp10b_init_ctxsw_hdr_data() for this. Bug 1958308 Signed-off-by: seshendra Gadagottu Change-Id: I10d83e35ccd8cba517ebaba1f0e5bec5a0f68ba5 Reviewed-on: https://git-master.nvidia.com/r/1562655 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity Reviewed-by: svccoveritychecker Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index ee49a8f2..50b819ac 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2309,10 +2309,6 @@ void gv11b_restore_context_header(struct gk20a *g, ctxsw_prog_main_image_context_buffer_ptr_hi_o(), va_hi); nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_context_buffer_ptr_o(), va_lo); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_num_restore_ops_o(), 0); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_num_save_ops_o(), 0); } void gr_gv11b_write_zcull_ptr(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) -- cgit v1.2.2 From 1586a9f0044987106371255b8133430040ed4f70 Mon Sep 17 00:00:00 2001 From: Alexander Lewkowicz Date: Tue, 19 Sep 2017 13:36:56 -0700 Subject: gpu: nvgpu: gv11b: Fix sm lock down Volta traphandler RM changes Sm lock-down is not being executed correctly. This results in a GPU being in an undefined state. A similar bug fix was already provided on the resman implementation. This fix is inspired by the CL change 21183102. That change refers to bug http://nvbugs/1800484 and bug http://nvbugs/200162542 This patch solves the issues mention in bug http://nvbugs/1992522 Change-Id: I601fef7c94e5ba419d7bf854877fa7a9f9b82cfa Signed-off-by: Alexander Lewkowicz Reviewed-on: https://git-master.nvidia.com/r/1563815 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 50b819ac..74ea0d2c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3136,7 +3136,7 @@ int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, check_errors && (gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr) == gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v()) && - ((global_esr & ~global_esr_mask) == 0); + ((global_esr & global_esr_mask) == 0); if (locked_down) { /* -- cgit v1.2.2 From d61643c0200983dc340d37962bb0a3ca900a3e97 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Mon, 25 Sep 2017 08:59:28 -0700 Subject: gpu: nvgpu: gv11b: Change license for common files to MIT Change license of OS independent source code files to MIT. JIRA NVGPU-218 Change-Id: I93c0504f0544ee8ced4898c386b3f5fbaa6a99a9 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1567804 Reviewed-by: svc-mobile-coverity Reviewed-by: David Martinez Nieto Reviewed-by: Seshendra Gadagottu Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 74ea0d2c..b96f2bc6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3,14 +3,23 @@ * * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ #include -- cgit v1.2.2 From 9825a8ec69d54c725c38015006aed655d10ac567 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Tue, 15 Aug 2017 15:28:35 -0700 Subject: gpu: nvgpu: fix handling of EGPC_ETPC_SM addresses Implemented litter values for following defines: GPU_LIT_SMPC_PRI_BASE GPU_LIT_SMPC_PRI_SHARED_BASE GPU_LIT_SMPC_PRI_UNIQUE_BASE9 GPU_LIT_SMPC_PRI_STRIDE Added broadcast flags for smpc Handled all combinations of broadcast/unicast EGPC, ETPC, SM Bug 200337994 Change-Id: I7aa3c4d9ac4e819010061d44fb5a40056762f518 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/1539075 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: svccoveritychecker Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 106 +++++++++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index b96f2bc6..f209012c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3409,6 +3409,45 @@ bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) pri_is_egpc_addr_shared(g, addr); } +static inline u32 pri_smpc_in_etpc_addr_mask(struct gk20a *g, u32 addr) +{ + u32 smpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_SMPC_PRI_STRIDE); + + return (addr & (smpc_stride - 1)); +} + +static u32 pri_smpc_ext_addr(struct gk20a *g, u32 sm_offset, u32 gpc_num, + u32 tpc_num, u32 sm_num) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + u32 egpc_base = g->ops.gr.get_egpc_base(g); + u32 smpc_unique_base = nvgpu_get_litter_value(g, + GPU_LIT_SMPC_PRI_UNIQUE_BASE); + u32 smpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_SMPC_PRI_STRIDE); + + return (egpc_base + (gpc_num * gpc_stride) + tpc_in_gpc_base + + (tpc_num * tpc_in_gpc_stride) + + (sm_num * smpc_stride) + + (smpc_unique_base + sm_offset)); +} + +static bool pri_is_smpc_addr_in_etpc_shared(struct gk20a *g, u32 addr) +{ + u32 smpc_shared_base = nvgpu_get_litter_value(g, + GPU_LIT_SMPC_PRI_SHARED_BASE); + u32 smpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_SMPC_PRI_STRIDE); + + return (addr >= smpc_shared_base) && + (addr < smpc_shared_base + smpc_stride); +} + bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) { u32 egpc_addr = 0; @@ -3476,6 +3515,7 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) { u32 gpc_addr; + u32 tpc_addr; if (g->ops.gr.is_egpc_addr(g, addr)) { nvgpu_log_info(g, "addr=0x%x is egpc", addr); @@ -3501,6 +3541,9 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); nvgpu_log_info(g, "tpc=0x%x", *tpc_num); } + tpc_addr = pri_tpccs_addr_mask(addr); + if (pri_is_smpc_addr_in_etpc_shared(g, tpc_addr)) + *broadcast_flags |= PRI_BROADCAST_FLAGS_SMPC; } nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, @@ -3511,6 +3554,25 @@ int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, return -EINVAL; } +static void gv11b_gr_update_priv_addr_table_smpc(struct gk20a *g, u32 gpc_num, + u32 tpc_num, u32 addr, + u32 *priv_addr_table, u32 *t) +{ + u32 sm_per_tpc, sm_num; + + nvgpu_log_info(g, "broadcast flags smpc"); + + sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + for (sm_num = 0; sm_num < sm_per_tpc; sm_num++) { + priv_addr_table[*t] = pri_smpc_ext_addr(g, + pri_smpc_in_etpc_addr_mask(g, addr), + gpc_num, tpc_num, sm_num); + nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", + *t, priv_addr_table[*t]); + (*t)++; + } +} + void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) { @@ -3531,15 +3593,27 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, for (tpc_num = 0; tpc_num < g->gr.gpc_tpc_count[gpc_num]; tpc_num++) { - priv_addr_table[*t] = - pri_etpc_addr(g, - pri_tpccs_addr_mask(addr), - gpc_num, tpc_num); - nvgpu_log_info(g, - "priv_addr_table[%d]:%#08x", - *t, priv_addr_table[*t]); - (*t)++; + if (broadcast_flags & + PRI_BROADCAST_FLAGS_SMPC) { + gv11b_gr_update_priv_addr_table_smpc( + g, gpc_num, tpc_num, addr, + priv_addr_table, t); + } else { + priv_addr_table[*t] = + pri_etpc_addr(g, + pri_tpccs_addr_mask(addr), + gpc_num, tpc_num); + nvgpu_log_info(g, + "priv_addr_table[%d]:%#08x", + *t, priv_addr_table[*t]); + (*t)++; + } } + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) { + tpc_num = 0; + gv11b_gr_update_priv_addr_table_smpc( + g, gpc_num, tpc_num, addr, + priv_addr_table, t); } else { priv_addr_table[*t] = pri_egpc_addr(g, @@ -3553,10 +3627,17 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) { if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { nvgpu_log_info(g, "broadcast flags etpc but not egpc"); + gpc_num = 0; for (tpc_num = 0; tpc_num < g->gr.gpc_tpc_count[gpc]; tpc_num++) { - priv_addr_table[*t] = + if (broadcast_flags & + PRI_BROADCAST_FLAGS_SMPC) + gv11b_gr_update_priv_addr_table_smpc( + g, gpc_num, tpc_num, addr, + priv_addr_table, t); + else { + priv_addr_table[*t] = pri_etpc_addr(g, pri_tpccs_addr_mask(addr), gpc, tpc_num); @@ -3564,7 +3645,14 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, "priv_addr_table[%d]:%#08x", *t, priv_addr_table[*t]); (*t)++; + } } + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) { + tpc_num = 0; + gpc_num = 0; + gv11b_gr_update_priv_addr_table_smpc( + g, gpc_num, tpc_num, addr, + priv_addr_table, t); } else { priv_addr_table[*t] = addr; nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", -- cgit v1.2.2 From 192afccf7c9982ea47b46fd4b7ace4114ff7b45e Mon Sep 17 00:00:00 2001 From: Deepak Goyal Date: Fri, 22 Sep 2017 15:36:36 +0530 Subject: gpu: nvgpu: gv11b: skip clk gating prog for pre-si For pre-silicon platforms, clock gating should be skipped as it is not supported. Added new flags "can_"x"lcg" to check platform capability before programming SLCG,BLCG and ELCG. Bug 200314250 Change-Id: Iec7564b00b988cdd50a02f3130662727839c5047 Signed-off-by: Deepak Goyal Reviewed-on: https://git-master.nvidia.com/r/1566251 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f209012c..da632aac 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2352,6 +2352,9 @@ void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) { u32 gate_ctrl; + if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_ELCG)) + return; + gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); switch (mode) { -- cgit v1.2.2 From 6fe9bdeb9af81d42d6f5d8edcc98487d3fb155ea Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Mon, 25 Sep 2017 15:39:09 -0700 Subject: gpu: nvgpu: gv11b: track init veid bundle Add debug prints to track veid bundle init and also return err for subctx init failure. Bug 1983643 Change-Id: I9e6a32e76b1c7deba3a47157ba253976d88b2324 Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1568070 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index da632aac..da683af6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2023,7 +2023,7 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) return 0; } -static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) +static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) { struct av_list_gk20a *sw_veid_bundle_init = &g->gr.ctx_vars.sw_veid_bundle_init; @@ -2033,7 +2033,7 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) num_subctx = g->fifo.t19x.max_subctx_count; for (j = 0; j < num_subctx; j++) { - + nvgpu_log_fn(g, "write bundle_address_r for subctx: %d", j); gk20a_writel(g, gr_pipe_bundle_address_r(), sw_veid_bundle_init->l[index].addr | gr_pipe_bundle_address_veid_f(j)); @@ -2041,6 +2041,7 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g), GR_IDLE_CHECK_DEFAULT); } + return err; } int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) @@ -2051,30 +2052,34 @@ int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) u32 last_bundle_data = 0; u32 err = 0; - gk20a_dbg_fn(""); for (i = 0; i < sw_veid_bundle_init->count; i++) { + nvgpu_log_fn(g, "veid bundle count: %d", i); if (i == 0 || last_bundle_data != sw_veid_bundle_init->l[i].value) { gk20a_writel(g, gr_pipe_bundle_data_r(), sw_veid_bundle_init->l[i].value); last_bundle_data = sw_veid_bundle_init->l[i].value; + nvgpu_log_fn(g, "last_bundle_data : 0x%08x", + last_bundle_data); } if (gr_pipe_bundle_address_value_v( sw_veid_bundle_init->l[i].addr) == GR_GO_IDLE_BUNDLE) { + nvgpu_log_fn(g, "go idle bundle"); gk20a_writel(g, gr_pipe_bundle_address_r(), sw_veid_bundle_init->l[i].addr); err |= gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), GR_IDLE_CHECK_DEFAULT); } else - gv11b_write_bundle_veid_state(g, i); + err = gv11b_write_bundle_veid_state(g, i); - if (err) + if (err) { + nvgpu_err(g, "failed to init sw veid bundle"); break; + } } - gk20a_dbg_fn("done"); return err; } -- cgit v1.2.2 From 6647e5c9569258fbf3db096275a79f86f86ed3a6 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Thu, 5 Oct 2017 10:25:51 -0700 Subject: gpu: nvgpu: gv11b: disable cycle stat Feature will be enabled after it is verified. To disable cycle stat, do not set NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS and NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT Bug 200352825 Change-Id: I3f0d58a8095f3a0996964056029c12cff45f0a5b Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1573760 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index da683af6..c3f19fa2 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1688,12 +1688,7 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, void gr_gv11b_init_cyclestats(struct gk20a *g) { #if defined(CONFIG_GK20A_CYCLE_STATS) - g->gpu_characteristics.flags |= - NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; - g->gpu_characteristics.flags |= - NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; -#else - (void)g; + /* TODO: waiting for cycle stats to be implemented for gv11b */ #endif } -- cgit v1.2.2 From a693acc5b45020b359bf11f0370b7dd63d4f994c Mon Sep 17 00:00:00 2001 From: Timo Alho Date: Thu, 12 Oct 2017 10:22:48 -0700 Subject: Revert "gpu: nvgpu: gv11b: disable cycle stat" This reverts commit 6647e5c9569258fbf3db096275a79f86f86ed3a6. Bug 200352825 Change-Id: Ia44d61eafce78f99be2271e0afaf69cd5c102080 Signed-off-by: Timo Alho Reviewed-on: https://git-master.nvidia.com/r/1577920 Reviewed-by: svc-mobile-coverity Reviewed-by: Terje Bergstrom Reviewed-by: Seshendra Gadagottu Tested-by: Seshendra Gadagottu GVS: Gerrit_Virtual_Submit Reviewed-by: Srikar Srimath Tirumala --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index c3f19fa2..da683af6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1688,7 +1688,12 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, void gr_gv11b_init_cyclestats(struct gk20a *g) { #if defined(CONFIG_GK20A_CYCLE_STATS) - /* TODO: waiting for cycle stats to be implemented for gv11b */ + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; +#else + (void)g; #endif } -- cgit v1.2.2 From 99cae3dff71433c21f85bb7f03e42050db8a33dc Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 13 Oct 2017 08:12:58 -0700 Subject: gpu: nvgpu: gv11b: Use internal nvgpu_warpstate Replace use of ioctl structure warpstate with internal nvgpu_warptate. JIRA NVGPU-259 Change-Id: I003c15152042e566124c04d6124e515e36157c88 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1578683 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index da683af6..aac6cba3 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2504,7 +2504,7 @@ int gv11b_gr_sm_trigger_suspend(struct gk20a *g) return 0; } -void gv11b_gr_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) +void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) { /* Check if we have at least one valid warp * get paused state on maxwell -- cgit v1.2.2 From 387ecf8a6360f463a129ab569aaef921fe0a2b0e Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 18 Oct 2017 11:40:46 -0700 Subject: gpu: nvgpu: gv1xx: Remove HAL for restore_context_header gr restore_context_header is not required any more after enabling per context va mode for subcontext. Cleaning-up unused function pointers from gv100 and gv11b HAL. Change-Id: I65cc7d12d3c96726d323defd99726c3e259e7e63 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/1581432 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index aac6cba3..154088d6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2306,24 +2306,6 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, return 0; } -void gv11b_restore_context_header(struct gk20a *g, - struct nvgpu_mem *ctxheader) -{ - u32 va_lo, va_hi; - struct gr_gk20a *gr = &g->gr; - - va_hi = nvgpu_mem_rd(g, ctxheader, - ctxsw_prog_main_image_context_buffer_ptr_hi_o()); - va_lo = nvgpu_mem_rd(g, ctxheader, - ctxsw_prog_main_image_context_buffer_ptr_o()); - nvgpu_mem_wr_n(g, ctxheader, 0, - gr->ctx_vars.local_golden_image, - gr->ctx_vars.golden_image_size); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_context_buffer_ptr_hi_o(), va_hi); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_context_buffer_ptr_o(), va_lo); -} void gr_gv11b_write_zcull_ptr(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { -- cgit v1.2.2 From c6ccb5f2a1e9a8999436f6c28ed5c416c5418ae3 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Mon, 23 Oct 2017 10:20:12 -0700 Subject: gpu: nvgpu: gv11b: use scg perf for smid numbering For SCG to work, smid numbering needs to be done based on scg performance of tpcs. For gv11b and gv11b vgpu, reuse gv100 function "gr_gv100_init_sm_id_table" to do this. Used local variable "index" to avoid multiple computations in the function: gr_gv100_init_sm_id_table index = sm_id + sm Add deug info for printing initialized gpc/tpc/sm/global_tpc indexs. Bug 1842197 Change-Id: Ibf10f47f10a8ca58b86c307a22e159b2cc0d0f43 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/1583916 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 154088d6..fc894908 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2125,33 +2125,6 @@ void gr_gv11b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } -void gr_gv11b_init_sm_id_table(struct gk20a *g) -{ - u32 gpc, tpc, sm; - u32 sm_id = 0; - u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - - /* TODO populate smids based on power efficiency */ - for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { - - if (tpc >= g->gr.gpc_tpc_count[gpc]) - continue; - - for (sm = 0; sm < sm_per_tpc; sm++) { - g->gr.sm_to_cluster[sm_id].tpc_index = tpc; - g->gr.sm_to_cluster[sm_id].gpc_index = gpc; - g->gr.sm_to_cluster[sm_id].sm_index = sm_id % 2; - g->gr.sm_to_cluster[sm_id].global_tpc_index = - tpc; - sm_id++; - } - } - } - g->gr.no_of_sm = sm_id; - nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm); -} - void gr_gv11b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid) { -- cgit v1.2.2 From 1cbb5ea0235f15180b1d4299499cab85db23c5ce Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Fri, 13 Oct 2017 17:15:46 -0700 Subject: gpu: nvgpu: init_cyclestats fixes - in the native case, replace calls for init_cyclestats with the gm20b version, as each chip had identical versions of the code. - in the virtual case, use the vgpu version of the function in order to get the new max_css_buffer_size characteristic set to the mempool size. JIRA ESRM-54 Bug 200296210 Change-Id: I475876cb392978fb1350ede58e37d0962ae095c3 Signed-off-by: Peter Daifuku Reviewed-on: https://git-master.nvidia.com/r/1578934 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index fc894908..59a04d2c 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1684,19 +1684,6 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } - -void gr_gv11b_init_cyclestats(struct gk20a *g) -{ -#if defined(CONFIG_GK20A_CYCLE_STATS) - g->gpu_characteristics.flags |= - NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; - g->gpu_characteristics.flags |= - NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; -#else - (void)g; -#endif -} - void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -- cgit v1.2.2 From 33c707d60b116845c953b91c2693a82a4d1eb968 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 25 Oct 2017 14:33:05 -0700 Subject: gpu: nvgpu: Linux specific sm_error_state_record Create an nvgpu internal nvgpu_gr_sm_error_state to store and propagate SM error state within driver. Use nvgpu_dbg_gpu_sm_error_state_record only in Linux code. JIRA NVGPU-259 Change-Id: Ia2b347d0054365bdc790b4d6f2653a568935bdb0 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1585646 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 59a04d2c..2f33301d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2516,7 +2516,7 @@ void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) int gv11b_gr_update_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id, - struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) + struct nvgpu_gr_sm_error_state *sm_error_state) { u32 gpc, tpc, sm, offset; struct gr_gk20a *gr = &g->gr; -- cgit v1.2.2 From d393d3294ff8f1771484435d4edbb61df1e4f433 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 30 Oct 2017 13:37:27 +0530 Subject: gpu: nvgpu: use nvgpu_* APIs to allocate/free memory Use nvgpu specific nvgpu_kcalloc()/nvgpu_kfree() calls instead of linux specific kcalloc()/kfree() Jira NVGPU-259 Change-Id: I73034ea23561d1269230b9ac10360f8b171b8d41 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1588221 Reviewed-by: Konsta Holtta Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 2f33301d..568673aa 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2138,7 +2138,7 @@ int gr_gv11b_load_smid_config(struct gk20a *g) u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); + tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); if (!tpc_sm_id) return -ENOMEM; @@ -2173,7 +2173,7 @@ int gr_gv11b_load_smid_config(struct gk20a *g) for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); - kfree(tpc_sm_id); + nvgpu_kfree(g, tpc_sm_id); return 0; } -- cgit v1.2.2 From 075852f042b9b3a3d48180378e6d2a709708cc41 Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Mon, 6 Nov 2017 13:44:44 +0200 Subject: gpu: nvgpu: Switch to newer NVGPU_AS_MAP_BUFFER flags Switch two cases using the old NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_* flags to the newer definitions, that is, NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE. The legacy NVGPU_MAP_BUFFER_FLAGS_* definitions have been deleted. Bug 1902982 Change-Id: Ifbd2678b10005b4af2375600888469b01dd09f4e Signed-off-by: Sami Kiminki Reviewed-on: https://git-master.nvidia.com/r/1592655 Reviewed-by: Alex Waterman Reviewed-by: Konsta Holtta Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 568673aa..22377522 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1349,7 +1349,7 @@ int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, - NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE, gk20a_mem_flag_none, false, mem->aperture); -- cgit v1.2.2 From 01e5b17e08cc9bde4d8bfbefb09828ae897a6df0 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 7 Nov 2017 10:06:45 -0800 Subject: gpu: nvgpu: gv11b: Move sm_arch to nvgpu_gpu_params Move sm_arch_* fields to nvgpu_gpu_params to make them available from common code without accessing Linux specific GPU characteristics. JIRA NVGPU-259 Change-Id: I8e7b542642b620f161d62954400777079065f49d Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1593692 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 22377522..9d97a61f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2104,11 +2104,11 @@ void gr_gv11b_detect_sm_arch(struct gk20a *g) { u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); - g->gpu_characteristics.sm_arch_spa_version = + g->params.sm_arch_spa_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); - g->gpu_characteristics.sm_arch_sm_version = + g->params.sm_arch_sm_version = gr_gpc0_tpc0_sm_arch_sm_version_v(v); - g->gpu_characteristics.sm_arch_warp_count = + g->params.sm_arch_warp_count = gr_gpc0_tpc0_sm_arch_warp_count_v(v); } -- cgit v1.2.2 From d64241cb5a0ca21ae2c88419d34ad79715a4588a Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 10 Nov 2017 11:41:17 -0800 Subject: gpu: nvgpu: Include UAPI explicitly Add explicit #includes for for source code files that depend on it. JIRA NVGPU-388 Change-Id: I5d834e6f3b413cee9b1e4e055d710fc9f2c8f7c2 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1596246 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9d97a61f..4da2ef59 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include -- cgit v1.2.2 From 0f5202368781c5398e3d026dc408d79a37ad5aed Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Wed, 8 Nov 2017 18:32:26 -0800 Subject: gpu: nvgpu: ctx_patch_write fixes - Update commit_global_timeslice to remove unused patch parameter - Update calls to ctx_patch_write_begin/end to add update_patch_count param JIRA ESRM-74 Bug 2012077 Change-Id: Ie2e640dfa0ab7193a062a58f588575f220e5efd3 Signed-off-by: Peter Daifuku Reviewed-on: https://git-master.nvidia.com/r/1594791 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 4da2ef59..3a3406f9 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2214,8 +2214,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) -int gr_gv11b_commit_global_timeslice(struct gk20a *g, - struct channel_gk20a *c, bool patch) +int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) { struct channel_ctx_gk20a *ch_ctx = NULL; u32 pd_ab_dist_cfg0; @@ -2230,15 +2229,6 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, ds_debug = gk20a_readl(g, gr_ds_debug_r()); mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); - if (patch) { - int err; - - ch_ctx = &c->ch_ctx; - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); - if (err) - return err; - } - pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); @@ -2252,17 +2242,14 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, mpc_vtg_debug; gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, - patch); + false); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), - pe_vsc_vpc, patch); + pe_vsc_vpc, false); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), - pd_ab_dist_cfg0, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); + pd_ab_dist_cfg0, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), - mpc_vtg_debug, patch); - - if (patch) - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + mpc_vtg_debug, false); return 0; } @@ -2568,7 +2555,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g, gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset, gr->sm_error_states[sm_id].hww_warp_esr_report_mask); } else { - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); if (err) goto enable_ctxsw; @@ -2583,7 +2570,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g, gr->sm_error_states[sm_id].hww_warp_esr_report_mask, true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); } enable_ctxsw: -- cgit v1.2.2 From 4dbf6f7bd600750461d6e747c00df99999e2be2f Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 13 Nov 2017 13:05:13 +0530 Subject: gpu: nvgpu: define preemption modes in common code Use common preemption modes in common code instead of using linux specific definitions Jira NVGPU-392 Change-Id: Iff65ab4278973f2e2d7db33f6fedb561b2164c42 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1596931 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3a3406f9..3d817d7e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1765,7 +1765,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, if (fault_ch) cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == - NVGPU_COMPUTE_PREEMPTION_MODE_CILP); + NVGPU_PREEMPTION_MODE_COMPUTE_CILP); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", -- cgit v1.2.2