From 4b5b67d6d83430d8d670660b1dfc9cf024d60d88 Mon Sep 17 00:00:00 2001 From: Sunny He Date: Thu, 17 Aug 2017 16:11:34 -0700 Subject: gpu: nvgpu: Reorg gr HAL initialization Reorganize HAL initialization to remove inheritance and construct the gpu_ops struct at compile time. This patch only covers the gr sub-module of the gpu_ops struct. Perform HAL function assignments in hal_gxxxx.c through the population of a chip-specific copy of gpu_ops. Jira NVGPU-74 Change-Id: Ie37638f442fd68aca8a7ade5f297118447bdc91e Signed-off-by: Sunny He Reviewed-on: https://git-master.nvidia.com/r/1542989 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity Reviewed-by: svccoveritychecker Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 222 +++++++++--------------------------- drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 87 +++++++++++++- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 120 ++++++++++++++++++- drivers/gpu/nvgpu/gp106/gr_gp106.c | 26 +---- drivers/gpu/nvgpu/gp106/gr_gp106.h | 11 +- drivers/gpu/nvgpu/gp106/hal_gp106.c | 127 ++++++++++++++++++++- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 136 +++++++--------------- drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 86 +++++++++++++- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 129 ++++++++++++++++++++- 9 files changed, 649 insertions(+), 295 deletions(-) diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 5fcc3f7b..30991102 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -36,7 +36,7 @@ #include #include -static void gr_gm20b_init_gpc_mmu(struct gk20a *g) +void gr_gm20b_init_gpc_mmu(struct gk20a *g) { u32 temp; @@ -73,7 +73,7 @@ static void gr_gm20b_init_gpc_mmu(struct gk20a *g) gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); } -static void gr_gm20b_bundle_cb_defaults(struct gk20a *g) +void gr_gm20b_bundle_cb_defaults(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -85,7 +85,7 @@ static void gr_gm20b_bundle_cb_defaults(struct gk20a *g) gr_pd_ab_dist_cfg2_token_limit_init_v(); } -static void gr_gm20b_cb_size_default(struct gk20a *g) +void gr_gm20b_cb_size_default(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -96,7 +96,7 @@ static void gr_gm20b_cb_size_default(struct gk20a *g) gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); } -static int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) +int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; int size; @@ -134,7 +134,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); } -static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, +void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u64 size, bool patch) { @@ -170,7 +170,7 @@ static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, } -static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, +int gr_gm20b_commit_global_cb_manager(struct gk20a *g, struct channel_gk20a *c, bool patch) { struct gr_gk20a *gr = &g->gr; @@ -250,7 +250,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, return 0; } -static void gr_gm20b_commit_global_pagepool(struct gk20a *g, +void gr_gm20b_commit_global_pagepool(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u32 size, bool patch) { @@ -276,7 +276,7 @@ void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data) gk20a_dbg_fn("done"); } -static int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, +int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { gk20a_dbg_fn(""); @@ -318,7 +318,7 @@ fail: return -EINVAL; } -static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) +void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; @@ -368,7 +368,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) } } -static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) +void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; @@ -423,7 +423,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) } } -static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) +void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) { /* setup sm warp esr report masks */ gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), @@ -456,7 +456,7 @@ static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f()); } -static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) +bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -476,7 +476,7 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) return valid; } -static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) +bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) { if (class_num == MAXWELL_B) return true; @@ -484,7 +484,7 @@ static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) return false; } -static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) +bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) { if (class_num == MAXWELL_COMPUTE_B) return true; @@ -502,7 +502,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 2; static u32 *_sm_dsm_perf_regs; static u32 _sm_dsm_perf_ctrl_regs[2]; -static void gr_gm20b_init_sm_dsm_reg_info(void) +void gr_gm20b_init_sm_dsm_reg_info(void) { if (_sm_dsm_perf_ctrl_regs[0] != 0) return; @@ -513,7 +513,7 @@ static void gr_gm20b_init_sm_dsm_reg_info(void) gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); } -static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, +void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, u32 *num_sm_dsm_perf_regs, u32 **sm_dsm_perf_regs, u32 *perf_register_stride) @@ -523,7 +523,7 @@ static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, *perf_register_stride = 0; } -static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, +void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride) @@ -535,7 +535,7 @@ static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); } -static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { u32 val; struct gr_gk20a *gr = &g->gr; @@ -546,7 +546,7 @@ static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); } -static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_access_sw(g, 0x0); @@ -563,7 +563,7 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) } } -static void gr_gm20b_load_tpc_mask(struct gk20a *g) +void gr_gm20b_load_tpc_mask(struct gk20a *g) { u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 gpc, pes; @@ -588,7 +588,7 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g) } } -static void gr_gm20b_program_sm_id_numbering(struct gk20a *g, +void gr_gm20b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid) { u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -604,7 +604,7 @@ static void gr_gm20b_program_sm_id_numbering(struct gk20a *g, gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); } -static int gr_gm20b_load_smid_config(struct gk20a *g) +int gr_gm20b_load_smid_config(struct gk20a *g) { u32 *tpc_sm_id; u32 i, j; @@ -669,7 +669,7 @@ int gr_gm20b_init_fs_state(struct gk20a *g) return 0; } -static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, +int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) { gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), @@ -697,7 +697,7 @@ static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) tpc_in_gpc_stride)); } -static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) +bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) { u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); @@ -708,7 +708,7 @@ static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) || gr_gm20b_is_tpc_addr_shared(g, addr); } -static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) +u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) { u32 i, start; u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); @@ -738,7 +738,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) gr_fecs_falcon_hwcfg_r()); } -static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) +int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) { u32 err, flags; u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - @@ -819,14 +819,14 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) } #else -static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) +int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) { return -EPERM; } #endif -static void gr_gm20b_detect_sm_arch(struct gk20a *g) +void gr_gm20b_detect_sm_arch(struct gk20a *g) { u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); @@ -838,12 +838,12 @@ static void gr_gm20b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } -static u32 gr_gm20b_pagepool_default_size(struct gk20a *g) +u32 gr_gm20b_pagepool_default_size(struct gk20a *g) { return gr_scc_pagepool_total_pages_hwmax_value_v(); } -static int gr_gm20b_alloc_gr_ctx(struct gk20a *g, +int gr_gm20b_alloc_gr_ctx(struct gk20a *g, struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) @@ -864,7 +864,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g, return 0; } -static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, +void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, struct nvgpu_mem *mem) { @@ -884,7 +884,7 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, gk20a_dbg_fn("done"); } -static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, +int gr_gm20b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { struct gr_gk20a *gr = &g->gr; @@ -1022,7 +1022,7 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, return 0; } -static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, +int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, bool enable) { struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; @@ -1051,7 +1051,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, return 0; } -static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) +u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) { u32 fbp_en_mask, opt_fbio; u32 tmp, max_fbps_count; @@ -1066,7 +1066,7 @@ static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) return fbp_en_mask; } -static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) +u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) { u32 ltc_per_fbp, reg; reg = gk20a_readl(g, top_ltc_per_fbp_r()); @@ -1074,7 +1074,7 @@ static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) return ltc_per_fbp; } -static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) +u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) { u32 lts_per_ltc, reg; reg = gk20a_readl(g, top_slices_per_ltc_r()); @@ -1082,7 +1082,7 @@ static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) return lts_per_ltc; } -static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) +u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; u32 i, tmp, max_fbps_count, max_ltc_per_fbp; @@ -1102,7 +1102,7 @@ static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) return gr->fbp_rop_l2_en_mask; } -static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) +u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) { u32 tmp, max_fbps_count; tmp = gk20a_readl(g, top_num_fbps_r()); @@ -1110,7 +1110,7 @@ static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) return max_fbps_count; } -static void gr_gm20b_init_cyclestats(struct gk20a *g) +void gr_gm20b_init_cyclestats(struct gk20a *g) { #if defined(CONFIG_GK20A_CYCLE_STATS) g->gpu_characteristics.flags |= @@ -1122,7 +1122,7 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g) #endif } -static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) +void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) { u32 cde_v; @@ -1131,7 +1131,7 @@ static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); } -static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) +void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) { /* Check if we have at least one valid warp */ /* get paused state on maxwell */ @@ -1210,7 +1210,7 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) } } -static void gr_gm20b_get_access_map(struct gk20a *g, +void gr_gm20b_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries) { static u32 wl_addr_gm20b[] = { @@ -1251,7 +1251,7 @@ static void gr_gm20b_get_access_map(struct gk20a *g, *num_entries = ARRAY_SIZE(wl_addr_gm20b); } -static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) +int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) { int sm_id; struct gr_gk20a *gr = &g->gr; @@ -1281,7 +1281,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) return 0; } -static int gm20b_gr_update_sm_error_state(struct gk20a *g, +int gm20b_gr_update_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id, struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) { @@ -1353,7 +1353,7 @@ fail: return err; } -static int gm20b_gr_clear_sm_error_state(struct gk20a *g, +int gm20b_gr_clear_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id) { u32 gpc, tpc, offset; @@ -1394,7 +1394,7 @@ fail: return err; } -static int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, +int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, struct nvgpu_preemption_modes_rec *preemption_modes_rec) { preemption_modes_rec->graphics_preemption_mode_flags = @@ -1421,7 +1421,7 @@ int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask) return 0; } -static int gm20b_gr_fuse_override(struct gk20a *g) +int gm20b_gr_fuse_override(struct gk20a *g) { struct device_node *np = dev_from_gk20a(g)->of_node; u32 *fuses; @@ -1457,7 +1457,7 @@ static int gm20b_gr_fuse_override(struct gk20a *g) return 0; } -static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) +bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) { u32 ltc_shared_base = ltc_ltcs_ltss_v(); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); @@ -1466,7 +1466,7 @@ static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) (addr < (ltc_shared_base + lts_stride)); } -static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) +bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) { u32 lts_shared_base = ltc_ltc0_ltss_v(); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); @@ -1498,7 +1498,7 @@ static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, *priv_addr_table_index = index; } -static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, +void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *priv_addr_table_index) { @@ -1518,7 +1518,7 @@ static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, priv_addr_table_index); } -static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, +void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *priv_addr_table_index) { @@ -1530,7 +1530,7 @@ static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, priv_addr_table, priv_addr_table_index); } -static void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, +void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr) { u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); @@ -1559,121 +1559,3 @@ void gm20a_gr_disable_rd_coalesce(struct gk20a *g) gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); } - -void gm20b_init_gr(struct gk20a *g) -{ - struct gpu_ops *gops = &g->ops; - - gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; - gops->gr.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults; - gops->gr.cb_size_default = gr_gm20b_cb_size_default; - gops->gr.calc_global_ctx_buffer_size = - gr_gm20b_calc_global_ctx_buffer_size; - gops->gr.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb; - gops->gr.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb; - gops->gr.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager; - gops->gr.commit_global_pagepool = gr_gm20b_commit_global_pagepool; - gops->gr.handle_sw_method = gr_gm20b_handle_sw_method; - gops->gr.set_alpha_circular_buffer_size = gr_gm20b_set_alpha_circular_buffer_size; - gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; - gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; - gops->gr.is_valid_class = gr_gm20b_is_valid_class; - gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class; - gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class; - gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; - gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; - gops->gr.init_fs_state = gr_gm20b_init_fs_state; - gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; - gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; - if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) - gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; - else - gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; - gops->gr.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask; - gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; - gops->gr.free_channel_ctx = gk20a_free_channel_ctx; - gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; - gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; - gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; - gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr; - gops->gr.get_tpc_num = gr_gm20b_get_tpc_num; - gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch; - gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; - gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; - gops->gr.zbc_set_table = gk20a_gr_zbc_set_table; - gops->gr.zbc_query_table = gr_gk20a_query_zbc; - gops->gr.pmu_save_zbc = gk20a_pmu_save_zbc; - gops->gr.add_zbc = gr_gk20a_add_zbc; - gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; - gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; - gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx; - gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; - gops->gr.update_ctxsw_preemption_mode = - gr_gm20b_update_ctxsw_preemption_mode; - gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; - gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling; - gops->gr.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask; - gops->gr.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp; - gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; - gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; - gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; - gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; - gops->gr.wait_empty = gr_gk20a_wait_idle; - gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; - gops->gr.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode; - gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; - gops->gr.bpt_reg_info = gr_gm20b_bpt_reg_info; - gops->gr.get_access_map = gr_gm20b_get_access_map; - gops->gr.handle_fecs_error = gk20a_gr_handle_fecs_error; - gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; - gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception; - gops->gr.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions; - gops->gr.enable_exceptions = gk20a_gr_enable_exceptions; - gops->gr.get_lrf_tex_ltc_dram_override = NULL; - gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; - gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; - gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state; - gops->gr.update_sm_error_state = gm20b_gr_update_sm_error_state; - gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state; - gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; - gops->gr.resume_contexts = gr_gk20a_resume_contexts; - gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; - gops->gr.fuse_override = gm20b_gr_fuse_override; - gops->gr.init_sm_id_table = gr_gk20a_init_sm_id_table; - gops->gr.load_smid_config = gr_gm20b_load_smid_config; - gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; - gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr; - gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr; - gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr; - gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr; - gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping; - gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping; - gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice; - gops->gr.commit_inst = gr_gk20a_commit_inst; - gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; - gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; - gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; - gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask; - gops->gr.inval_icache = gr_gk20a_inval_icache; - gops->gr.trigger_suspend = gr_gk20a_trigger_suspend; - gops->gr.wait_for_pause = gr_gk20a_wait_for_pause; - gops->gr.resume_from_pause = gr_gk20a_resume_from_pause; - gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors; - gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions; - gops->gr.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel; - gops->gr.sm_debugger_attached = gk20a_gr_sm_debugger_attached; - gops->gr.suspend_single_sm = gk20a_gr_suspend_single_sm; - gops->gr.suspend_all_sms = gk20a_gr_suspend_all_sms; - gops->gr.resume_single_sm = gk20a_gr_resume_single_sm; - gops->gr.resume_all_sms = gk20a_gr_resume_all_sms; - gops->gr.get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr; - gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr; - gops->gr.get_sm_no_lock_down_hww_global_esr_mask = - gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask; - gops->gr.lock_down_sm = gk20a_gr_lock_down_sm; - gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down; - gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww; - gops->gr.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf; - gops->gr.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs; - gops->gr.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce; -} diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 116a92f4..f81aa728 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -34,12 +34,95 @@ enum { #define NVB1C0_SET_RD_COALESCE 0x0228 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 -void gm20b_init_gr(struct gk20a *g); + void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, bool patch); int gr_gm20b_init_fs_state(struct gk20a *g); int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data); - +void gm20a_gr_disable_rd_coalesce(struct gk20a *g); +void gr_gm20b_init_gpc_mmu(struct gk20a *g); +void gr_gm20b_bundle_cb_defaults(struct gk20a *g); +void gr_gm20b_cb_size_default(struct gk20a *g); +int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); +void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u64 size, bool patch); +int gr_gm20b_commit_global_cb_manager(struct gk20a *g, + struct channel_gk20a *c, bool patch); +void gr_gm20b_commit_global_pagepool(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u32 size, bool patch); +int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, + u32 class_num, u32 offset, u32 data); +void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); +void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data); +void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g); +bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num); +bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num); +bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num); +void gr_gm20b_init_sm_dsm_reg_info(void); +void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride); +void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_ctrl_regs, + u32 **sm_dsm_perf_ctrl_regs, + u32 *ctrl_register_stride); +u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); +void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); +void gr_gm20b_load_tpc_mask(struct gk20a *g); +void gr_gm20b_program_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid); +int gr_gm20b_load_smid_config(struct gk20a *g); +int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); +bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr); +u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr); +int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); +int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); +void gr_gm20b_detect_sm_arch(struct gk20a *g); +u32 gr_gm20b_pagepool_default_size(struct gk20a *g); +int gr_gm20b_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + u32 class, + u32 flags); +void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_mem *mem); +int gr_gm20b_dump_gr_status_regs(struct gk20a *g, + struct gk20a_debug_output *o); +int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, + bool enable); +u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g); +u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g); +u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g); +u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g); +u32 gr_gm20b_get_max_fbps_count(struct gk20a *g); +void gr_gm20b_init_cyclestats(struct gk20a *g); +void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem); +void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state); +void gr_gm20b_get_access_map(struct gk20a *g, + u32 **whitelist, int *num_entries); +int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc); +int gm20b_gr_update_sm_error_state(struct gk20a *g, + struct channel_gk20a *ch, u32 sm_id, + struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state); +int gm20b_gr_clear_sm_error_state(struct gk20a *g, + struct channel_gk20a *ch, u32 sm_id); +int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, + struct nvgpu_preemption_modes_rec *preemption_modes_rec); +int gm20b_gr_fuse_override(struct gk20a *g); +bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr); +bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr); +void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index); +void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index); +void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, + u32 global_esr); #endif diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index aa953ca5..b77f10d2 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -27,6 +27,7 @@ #include "gk20a/priv_ring_gk20a.h" #include "gk20a/regops_gk20a.h" #include "gk20a/pmu_gk20a.h" +#include "gk20a/gr_gk20a.h" #include "ltc_gm20b.h" #include "gr_gm20b.h" @@ -170,6 +171,118 @@ static const struct gpu_ops gm20b_ops = { .isr_stall = gk20a_ce2_isr, .isr_nonstall = gk20a_ce2_nonstall_isr, }, + .gr = { + .init_gpc_mmu = gr_gm20b_init_gpc_mmu, + .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, + .cb_size_default = gr_gm20b_cb_size_default, + .calc_global_ctx_buffer_size = + gr_gm20b_calc_global_ctx_buffer_size, + .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb, + .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb, + .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager, + .commit_global_pagepool = gr_gm20b_commit_global_pagepool, + .handle_sw_method = gr_gm20b_handle_sw_method, + .set_alpha_circular_buffer_size = + gr_gm20b_set_alpha_circular_buffer_size, + .set_circular_buffer_size = gr_gm20b_set_circular_buffer_size, + .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions, + .is_valid_class = gr_gm20b_is_valid_class, + .is_valid_gfx_class = gr_gm20b_is_valid_gfx_class, + .is_valid_compute_class = gr_gm20b_is_valid_compute_class, + .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, + .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, + .init_fs_state = gr_gm20b_init_fs_state, + .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, + .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, + .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, + .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, + .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, + .free_channel_ctx = gk20a_free_channel_ctx, + .alloc_obj_ctx = gk20a_alloc_obj_ctx, + .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, + .get_zcull_info = gr_gk20a_get_zcull_info, + .is_tpc_addr = gr_gm20b_is_tpc_addr, + .get_tpc_num = gr_gm20b_get_tpc_num, + .detect_sm_arch = gr_gm20b_detect_sm_arch, + .add_zbc_color = gr_gk20a_add_zbc_color, + .add_zbc_depth = gr_gk20a_add_zbc_depth, + .zbc_set_table = gk20a_gr_zbc_set_table, + .zbc_query_table = gr_gk20a_query_zbc, + .pmu_save_zbc = gk20a_pmu_save_zbc, + .add_zbc = gr_gk20a_add_zbc, + .pagepool_default_size = gr_gm20b_pagepool_default_size, + .init_ctx_state = gr_gk20a_init_ctx_state, + .alloc_gr_ctx = gr_gm20b_alloc_gr_ctx, + .free_gr_ctx = gr_gk20a_free_gr_ctx, + .update_ctxsw_preemption_mode = + gr_gm20b_update_ctxsw_preemption_mode, + .dump_gr_regs = gr_gm20b_dump_gr_status_regs, + .update_pc_sampling = gr_gm20b_update_pc_sampling, + .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, + .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp, + .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc, + .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, + .get_max_fbps_count = gr_gm20b_get_max_fbps_count, + .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, + .wait_empty = gr_gk20a_wait_idle, + .init_cyclestats = gr_gm20b_init_cyclestats, + .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, + .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, + .bpt_reg_info = gr_gm20b_bpt_reg_info, + .get_access_map = gr_gm20b_get_access_map, + .handle_fecs_error = gk20a_gr_handle_fecs_error, + .handle_sm_exception = gr_gk20a_handle_sm_exception, + .handle_tex_exception = gr_gk20a_handle_tex_exception, + .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions, + .enable_exceptions = gk20a_gr_enable_exceptions, + .get_lrf_tex_ltc_dram_override = NULL, + .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, + .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .record_sm_error_state = gm20b_gr_record_sm_error_state, + .update_sm_error_state = gm20b_gr_update_sm_error_state, + .clear_sm_error_state = gm20b_gr_clear_sm_error_state, + .suspend_contexts = gr_gk20a_suspend_contexts, + .resume_contexts = gr_gk20a_resume_contexts, + .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags, + .fuse_override = gm20b_gr_fuse_override, + .init_sm_id_table = gr_gk20a_init_sm_id_table, + .load_smid_config = gr_gm20b_load_smid_config, + .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, + .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr, + .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr, + .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr, + .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr, + .setup_rop_mapping = gr_gk20a_setup_rop_mapping, + .program_zcull_mapping = gr_gk20a_program_zcull_mapping, + .commit_global_timeslice = gr_gk20a_commit_global_timeslice, + .commit_inst = gr_gk20a_commit_inst, + .write_zcull_ptr = gr_gk20a_write_zcull_ptr, + .write_pm_ptr = gr_gk20a_write_pm_ptr, + .init_elcg_mode = gr_gk20a_init_elcg_mode, + .load_tpc_mask = gr_gm20b_load_tpc_mask, + .inval_icache = gr_gk20a_inval_icache, + .trigger_suspend = gr_gk20a_trigger_suspend, + .wait_for_pause = gr_gk20a_wait_for_pause, + .resume_from_pause = gr_gk20a_resume_from_pause, + .clear_sm_errors = gr_gk20a_clear_sm_errors, + .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions, + .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel, + .sm_debugger_attached = gk20a_gr_sm_debugger_attached, + .suspend_single_sm = gk20a_gr_suspend_single_sm, + .suspend_all_sms = gk20a_gr_suspend_all_sms, + .resume_single_sm = gk20a_gr_resume_single_sm, + .resume_all_sms = gk20a_gr_resume_all_sms, + .get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr, + .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr, + .get_sm_no_lock_down_hww_global_esr_mask = + gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask, + .lock_down_sm = gk20a_gr_lock_down_sm, + .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down, + .clear_sm_hww = gm20b_gr_clear_sm_hww, + .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, + .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, + .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, + }, .fb = { .reset = fb_gk20a_reset, .init_hw = gk20a_fb_init_hw, @@ -448,6 +561,7 @@ int gm20b_init_hal(struct gk20a *g) gops->ltc = gm20b_ops.ltc; gops->ce2 = gm20b_ops.ce2; + gops->gr = gm20b_ops.gr; gops->fb = gm20b_ops.fb; gops->clock_gating = gm20b_ops.clock_gating; gops->fifo = gm20b_ops.fifo; @@ -538,6 +652,8 @@ int gm20b_init_hal(struct gk20a *g) gops->pmu.init_wpr_region = gm20b_pmu_init_acr; gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode; + + gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; } else { /* Inherit from gk20a */ gops->pmu.is_pmu_supported = gk20a_is_pmu_supported; @@ -547,14 +663,14 @@ int gm20b_init_hal(struct gk20a *g) gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.init_wpr_region = NULL; + + gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; } __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); g->pmu_lsf_pmu_wpr_init_done = 0; g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; - gm20b_init_gr(g); - gm20b_init_uncompressed_kind_map(); gm20b_init_kind_attr(); diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index 76e5cf89..00d6432f 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -24,7 +24,7 @@ #include -static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num) +bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -53,7 +53,7 @@ static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num) return valid; } -static u32 gr_gp106_pagepool_default_size(struct gk20a *g) +u32 gr_gp106_pagepool_default_size(struct gk20a *g) { return gr_scc_pagepool_total_pages_hwmax_value_v(); } @@ -63,7 +63,7 @@ static void gr_gp106_set_go_idle_timeout(struct gk20a *g, u32 data) gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); } -static int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, +int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { gk20a_dbg_fn(""); @@ -111,7 +111,7 @@ fail: return -EINVAL; } -static void gr_gp106_cb_size_default(struct gk20a *g) +void gr_gp106_cb_size_default(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -121,7 +121,7 @@ static void gr_gp106_cb_size_default(struct gk20a *g) gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); } -static int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, +int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, struct gr_ctx_desc *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, @@ -233,19 +233,3 @@ fail_free_preempt: fail: return err; } - -void gp106_init_gr(struct gk20a *g) -{ - struct gpu_ops *gops = &g->ops; - - gp10b_init_gr(g); - gops->gr.is_valid_class = gr_gp106_is_valid_class; - gops->gr.pagepool_default_size = gr_gp106_pagepool_default_size; - gops->gr.handle_sw_method = gr_gp106_handle_sw_method; - gops->gr.cb_size_default = gr_gp106_cb_size_default; - gops->gr.init_preemption_state = NULL; - gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode; - gops->gr.create_gr_sysfs = NULL; - gops->gr.set_boosted_ctx = NULL; - gops->gr.update_boosted_ctx = NULL; -} diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h index 3f49aac6..28ff56a9 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.h +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h @@ -23,6 +23,15 @@ enum { PASCAL_COMPUTE_B = 0xC1C0, }; -void gp106_init_gr(struct gk20a *g); +bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num); +u32 gr_gp106_pagepool_default_size(struct gk20a *g); +int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, + u32 class_num, u32 offset, u32 data); +void gr_gp106_cb_size_default(struct gk20a *g); +int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, + struct gr_ctx_desc *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); #endif diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 21d5fee3..7e7fc195 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -27,6 +27,7 @@ #include "gk20a/mc_gk20a.h" #include "gk20a/fb_gk20a.h" #include "gk20a/pmu_gk20a.h" +#include "gk20a/gr_gk20a.h" #include "gp10b/ltc_gp10b.h" #include "gp10b/gr_gp10b.h" @@ -40,6 +41,7 @@ #include "gp10b/fifo_gp10b.h" #include "gp10b/fb_gp10b.h" #include "gp10b/pmu_gp10b.h" +#include "gp10b/gr_gp10b.h" #include "gp106/fifo_gp106.h" #include "gp106/regops_gp106.h" @@ -51,6 +53,7 @@ #include "gm20b/pmu_gm20b.h" #include "gm20b/fb_gm20b.h" #include "gm20b/acr_gm20b.h" +#include "gm20b/gr_gm20b.h" #include "gp106/acr_gp106.h" #include "gp106/sec2_gp106.h" @@ -221,6 +224,128 @@ static const struct gpu_ops gp106_ops = { .isr_stall = gp10b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, }, + .gr = { + .init_gpc_mmu = gr_gm20b_init_gpc_mmu, + .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, + .cb_size_default = gr_gp106_cb_size_default, + .calc_global_ctx_buffer_size = + gr_gp10b_calc_global_ctx_buffer_size, + .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, + .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, + .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, + .commit_global_pagepool = gr_gp10b_commit_global_pagepool, + .handle_sw_method = gr_gp106_handle_sw_method, + .set_alpha_circular_buffer_size = + gr_gp10b_set_alpha_circular_buffer_size, + .set_circular_buffer_size = gr_gp10b_set_circular_buffer_size, + .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions, + .is_valid_class = gr_gp106_is_valid_class, + .is_valid_gfx_class = gr_gp10b_is_valid_gfx_class, + .is_valid_compute_class = gr_gp10b_is_valid_compute_class, + .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, + .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, + .init_fs_state = gr_gp10b_init_fs_state, + .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, + .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, + .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, + .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, + .free_channel_ctx = gk20a_free_channel_ctx, + .alloc_obj_ctx = gk20a_alloc_obj_ctx, + .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, + .get_zcull_info = gr_gk20a_get_zcull_info, + .is_tpc_addr = gr_gm20b_is_tpc_addr, + .get_tpc_num = gr_gm20b_get_tpc_num, + .detect_sm_arch = gr_gm20b_detect_sm_arch, + .add_zbc_color = gr_gp10b_add_zbc_color, + .add_zbc_depth = gr_gp10b_add_zbc_depth, + .zbc_set_table = gk20a_gr_zbc_set_table, + .zbc_query_table = gr_gk20a_query_zbc, + .pmu_save_zbc = gk20a_pmu_save_zbc, + .add_zbc = gr_gk20a_add_zbc, + .pagepool_default_size = gr_gp106_pagepool_default_size, + .init_ctx_state = gr_gp10b_init_ctx_state, + .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, + .free_gr_ctx = gr_gp10b_free_gr_ctx, + .update_ctxsw_preemption_mode = + gr_gp10b_update_ctxsw_preemption_mode, + .dump_gr_regs = gr_gp10b_dump_gr_status_regs, + .update_pc_sampling = gr_gm20b_update_pc_sampling, + .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, + .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp, + .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc, + .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, + .get_max_fbps_count = gr_gm20b_get_max_fbps_count, + .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, + .wait_empty = gr_gp10b_wait_empty, + .init_cyclestats = gr_gp10b_init_cyclestats, + .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, + .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, + .bpt_reg_info = gr_gm20b_bpt_reg_info, + .get_access_map = gr_gp10b_get_access_map, + .handle_fecs_error = gr_gp10b_handle_fecs_error, + .handle_sm_exception = gr_gp10b_handle_sm_exception, + .handle_tex_exception = gr_gp10b_handle_tex_exception, + .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions, + .enable_exceptions = gk20a_gr_enable_exceptions, + .get_lrf_tex_ltc_dram_override = get_ecc_override_val, + .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, + .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .record_sm_error_state = gm20b_gr_record_sm_error_state, + .update_sm_error_state = gm20b_gr_update_sm_error_state, + .clear_sm_error_state = gm20b_gr_clear_sm_error_state, + .suspend_contexts = gr_gp10b_suspend_contexts, + .resume_contexts = gr_gk20a_resume_contexts, + .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, + .fuse_override = gp10b_gr_fuse_override, + .init_sm_id_table = gr_gk20a_init_sm_id_table, + .load_smid_config = gr_gp10b_load_smid_config, + .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, + .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr, + .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr, + .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr, + .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr, + .setup_rop_mapping = gr_gk20a_setup_rop_mapping, + .program_zcull_mapping = gr_gk20a_program_zcull_mapping, + .commit_global_timeslice = gr_gk20a_commit_global_timeslice, + .commit_inst = gr_gk20a_commit_inst, + .write_zcull_ptr = gr_gk20a_write_zcull_ptr, + .write_pm_ptr = gr_gk20a_write_pm_ptr, + .init_elcg_mode = gr_gk20a_init_elcg_mode, + .load_tpc_mask = gr_gm20b_load_tpc_mask, + .inval_icache = gr_gk20a_inval_icache, + .trigger_suspend = gr_gk20a_trigger_suspend, + .wait_for_pause = gr_gk20a_wait_for_pause, + .resume_from_pause = gr_gk20a_resume_from_pause, + .clear_sm_errors = gr_gk20a_clear_sm_errors, + .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions, + .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel, + .sm_debugger_attached = gk20a_gr_sm_debugger_attached, + .suspend_single_sm = gk20a_gr_suspend_single_sm, + .suspend_all_sms = gk20a_gr_suspend_all_sms, + .resume_single_sm = gk20a_gr_resume_single_sm, + .resume_all_sms = gk20a_gr_resume_all_sms, + .get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr, + .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr, + .get_sm_no_lock_down_hww_global_esr_mask = + gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask, + .lock_down_sm = gk20a_gr_lock_down_sm, + .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down, + .clear_sm_hww = gm20b_gr_clear_sm_hww, + .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, + .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, + .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, + .set_boosted_ctx = NULL, + .set_preemption_mode = gr_gp10b_set_preemption_mode, + .set_czf_bypass = gr_gp10b_set_czf_bypass, + .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception, + .set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va, + .init_preemption_state = NULL, + .update_boosted_ctx = NULL, + .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .create_gr_sysfs = NULL, + .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode, + .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode + }, .fb = { .reset = gp106_fb_reset, .init_hw = gk20a_fb_init_hw, @@ -569,6 +694,7 @@ int gp106_init_hal(struct gk20a *g) gops->ltc = gp106_ops.ltc; gops->ce2 = gp106_ops.ce2; + gops->gr = gp106_ops.gr; gops->fb = gp106_ops.fb; gops->clock_gating = gp106_ops.clock_gating; gops->fifo = gp106_ops.fifo; @@ -618,7 +744,6 @@ int gp106_init_hal(struct gk20a *g) g->pmu_lsf_pmu_wpr_init_done = 0; g->bootstrap_owner = LSF_FALCON_ID_SEC2; - gp106_init_gr(g); gp10b_init_uncompressed_kind_map(); gp10b_init_kind_attr(); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 05fbeb21..74af9817 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -41,7 +41,7 @@ #define NVGPU_GFXP_WFI_TIMEOUT_US 100LL -static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) +bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -67,7 +67,7 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) return valid; } -static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) +bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) { if (class_num == PASCAL_A || class_num == MAXWELL_B) return true; @@ -75,7 +75,7 @@ static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) return false; } -static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num) +bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num) { if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B) return true; @@ -119,7 +119,7 @@ static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, *count_to_adjust = 0; } -static int gr_gp10b_handle_sm_exception(struct gk20a *g, +int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) @@ -244,7 +244,7 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, return ret; } -static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, +int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event) { int ret = 0; @@ -380,7 +380,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, return ret; } -static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, +int gr_gp10b_commit_global_cb_manager(struct gk20a *g, struct channel_gk20a *c, bool patch) { struct gr_gk20a *gr = &g->gr; @@ -481,7 +481,7 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, return 0; } -static void gr_gp10b_commit_global_pagepool(struct gk20a *g, +void gr_gp10b_commit_global_pagepool(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u32 size, bool patch) { @@ -499,7 +499,7 @@ static void gr_gp10b_commit_global_pagepool(struct gk20a *g, gr_gpcs_gcc_pagepool_total_pages_f(size), patch); } -static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, +int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *color_val, u32 index) { u32 i; @@ -554,7 +554,7 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, return 0; } -static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, +int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *depth_val, u32 index) { u32 zbc_z; @@ -592,12 +592,12 @@ static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, return 0; } -static u32 gr_gp10b_pagepool_default_size(struct gk20a *g) +u32 gr_gp10b_pagepool_default_size(struct gk20a *g) { return gr_scc_pagepool_total_pages_hwmax_value_v(); } -static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g) +int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; int size; @@ -642,7 +642,7 @@ static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data) gk20a_dbg_fn("done"); } -static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) +void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) { u32 val; @@ -667,7 +667,7 @@ static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) gk20a_writel(g, gr_bes_crop_debug3_r(), val); } -static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, +int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { gk20a_dbg_fn(""); @@ -718,7 +718,7 @@ fail: return -EINVAL; } -static void gr_gp10b_cb_size_default(struct gk20a *g) +void gr_gp10b_cb_size_default(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -728,7 +728,7 @@ static void gr_gp10b_cb_size_default(struct gk20a *g) gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); } -static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) +void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; @@ -776,7 +776,7 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) } } -static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) +void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; @@ -843,7 +843,7 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) } } -static int gr_gp10b_init_ctx_state(struct gk20a *g) +int gr_gp10b_init_ctx_state(struct gk20a *g) { struct fecs_method_op_gk20a op = { .mailbox = { .id = 0, .data = 0, @@ -910,7 +910,7 @@ fail_free: return err; } -static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, +int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, struct gr_ctx_desc *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, @@ -1034,7 +1034,7 @@ fail: return err; } -static int gr_gp10b_alloc_gr_ctx(struct gk20a *g, +int gr_gp10b_alloc_gr_ctx(struct gk20a *g, struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) @@ -1131,7 +1131,7 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, nvgpu_mem_end(g, mem); } -static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, +void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) { gk20a_dbg_fn(""); @@ -1151,7 +1151,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, } -static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, +void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, struct nvgpu_mem *mem) { @@ -1256,7 +1256,7 @@ out: gk20a_dbg_fn("done"); } -static int gr_gp10b_dump_gr_status_regs(struct gk20a *g, +int gr_gp10b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { struct gr_gk20a *gr = &g->gr; @@ -1402,7 +1402,7 @@ static bool gr_activity_empty_or_preempted(u32 val) return true; } -static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, +int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, u32 expect_delay) { u32 delay = expect_delay; @@ -1453,7 +1453,7 @@ static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, return -EAGAIN; } -static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, +void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, bool patch) { @@ -1481,7 +1481,7 @@ static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } -static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, +void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u64 size, bool patch) { @@ -1516,7 +1516,7 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, gr_pd_ab_dist_cfg2_state_limit_f(data), patch); } -static int gr_gp10b_load_smid_config(struct gk20a *g) +int gr_gp10b_load_smid_config(struct gk20a *g) { u32 *tpc_sm_id; u32 i, j; @@ -1586,7 +1586,7 @@ int gr_gp10b_init_fs_state(struct gk20a *g) return gr_gm20b_init_fs_state(g); } -static void gr_gp10b_init_cyclestats(struct gk20a *g) +void gr_gp10b_init_cyclestats(struct gk20a *g) { #if defined(CONFIG_GK20A_CYCLE_STATS) g->gpu_characteristics.flags |= @@ -1598,7 +1598,7 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g) #endif } -static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_access_sw(g, 0x0); @@ -1611,7 +1611,7 @@ static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); } -static void gr_gp10b_get_access_map(struct gk20a *g, +void gr_gp10b_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries) { static u32 wl_addr_gp10b[] = { @@ -1801,7 +1801,7 @@ static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, * * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing */ -static int gr_gp10b_pre_process_sm_exception(struct gk20a *g, +int gr_gp10b_pre_process_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, bool sm_debugger_attached, struct channel_gk20a *fault_ch, bool *early_exit, bool *ignore_debugger) @@ -1988,7 +1988,7 @@ clean_up: return gk20a_gr_handle_fecs_error(g, __ch, isr_data); } -static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g, +u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm) { u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); @@ -2003,7 +2003,7 @@ static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g, return hww_warp_esr; } -static u32 get_ecc_override_val(struct gk20a *g) +u32 get_ecc_override_val(struct gk20a *g) { u32 val; @@ -2046,7 +2046,7 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch, return ctx_resident; } -static int gr_gp10b_suspend_contexts(struct gk20a *g, +int gr_gp10b_suspend_contexts(struct gk20a *g, struct dbg_session_gk20a *dbg_s, int *ctx_resident_ch_fd) { @@ -2122,7 +2122,7 @@ clean_up: return err; } -static int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, +int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, bool boost) { struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; @@ -2156,7 +2156,7 @@ unmap_ctx: return err; } -static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, +void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, struct gr_ctx_desc *gr_ctx) { u32 v; @@ -2165,7 +2165,7 @@ static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v); } -static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, +int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode) { @@ -2261,7 +2261,7 @@ unamp_ctx_header: return err; } -static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, +int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, struct nvgpu_preemption_modes_rec *preemption_modes_rec) { preemption_modes_rec->graphics_preemption_mode_flags = ( @@ -2279,7 +2279,7 @@ static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, return 0; } -static int gp10b_gr_fuse_override(struct gk20a *g) +int gp10b_gr_fuse_override(struct gk20a *g) { struct device_node *np = dev_from_gk20a(g)->of_node; u32 *fuses; @@ -2319,7 +2319,7 @@ static int gp10b_gr_fuse_override(struct gk20a *g) return 0; } -static int gr_gp10b_init_preemption_state(struct gk20a *g) +int gr_gp10b_init_preemption_state(struct gk20a *g) { u32 debug_2; u64 sysclk_rate; @@ -2341,7 +2341,7 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g) return 0; } -static void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, +void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { u32 va = u64_lo32(gpu_va >> 8); @@ -2367,59 +2367,3 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch) return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false); } - -void gp10b_init_gr(struct gk20a *g) -{ - struct gpu_ops *gops = &g->ops; - - gm20b_init_gr(g); - gops->gr.init_fs_state = gr_gp10b_init_fs_state; - gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; - gops->gr.is_valid_class = gr_gp10b_is_valid_class; - gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class; - gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class; - gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; - gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; - gops->gr.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va; - gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; - gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; - gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; - gops->gr.calc_global_ctx_buffer_size = - gr_gp10b_calc_global_ctx_buffer_size; - gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb; - gops->gr.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb; - gops->gr.handle_sw_method = gr_gp10b_handle_sw_method; - gops->gr.cb_size_default = gr_gp10b_cb_size_default; - gops->gr.set_alpha_circular_buffer_size = - gr_gp10b_set_alpha_circular_buffer_size; - gops->gr.set_circular_buffer_size = - gr_gp10b_set_circular_buffer_size; - gops->gr.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3; - gops->gr.init_ctx_state = gr_gp10b_init_ctx_state; - gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx; - gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx; - gops->gr.update_ctxsw_preemption_mode = - gr_gp10b_update_ctxsw_preemption_mode; - gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs; - gops->gr.wait_empty = gr_gp10b_wait_empty; - gops->gr.init_cyclestats = gr_gp10b_init_cyclestats; - gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask; - gops->gr.get_access_map = gr_gp10b_get_access_map; - gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception; - gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception; - gops->gr.pre_process_sm_exception = - gr_gp10b_pre_process_sm_exception; - gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error; - gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs; - gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val; - gops->gr.suspend_contexts = gr_gp10b_suspend_contexts; - gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode; - gops->gr.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode; - gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags; - gops->gr.fuse_override = gp10b_gr_fuse_override; - gops->gr.load_smid_config = gr_gp10b_load_smid_config; - gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx; - gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx; - gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass; - gops->gr.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr; -} diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index ac53e231..ce1ca01f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -20,6 +20,10 @@ struct gk20a; struct gr_gk20a_isr_data; +struct channel_ctx_gk20a; +struct zbc_entry; +struct gr_ctx_desc; +struct nvgpu_preemption_modes_rec; enum { PASCAL_CHANNEL_GPFIFO_A = 0xC06F, @@ -39,7 +43,6 @@ enum { #define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528 #define NVC0C0_SET_RD_COALESCE 0x0228 -void gp10b_init_gr(struct gk20a *g); int gr_gp10b_init_fs_state(struct gk20a *g); int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem); @@ -50,6 +53,87 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g, int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch); +bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num); +bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num); +bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num); +int gr_gp10b_handle_sm_exception(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, + bool *post_event, struct channel_gk20a *fault_ch, + u32 *hww_global_esr); +int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event); +int gr_gp10b_commit_global_cb_manager(struct gk20a *g, + struct channel_gk20a *c, bool patch); +void gr_gp10b_commit_global_pagepool(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u32 size, bool patch); +int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *color_val, u32 index); +int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *depth_val, u32 index); +u32 gr_gp10b_pagepool_default_size(struct gk20a *g); +int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g); +void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data); +int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, + u32 class_num, u32 offset, u32 data); +void gr_gp10b_cb_size_default(struct gk20a *g); +void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); +void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); +int gr_gp10b_init_ctx_state(struct gk20a *g); +int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, + struct gr_ctx_desc *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); +int gr_gp10b_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + u32 class, + u32 flags); +void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct gr_ctx_desc *gr_ctx); +void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_mem *mem); +int gr_gp10b_dump_gr_status_regs(struct gk20a *g, + struct gk20a_debug_output *o); +int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, + u32 expect_delay); +void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, bool patch); +void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u64 size, bool patch); +int gr_gp10b_load_smid_config(struct gk20a *g); +void gr_gp10b_init_cyclestats(struct gk20a *g); +void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); +void gr_gp10b_get_access_map(struct gk20a *g, + u32 **whitelist, int *num_entries); +int gr_gp10b_pre_process_sm_exception(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, + bool sm_debugger_attached, struct channel_gk20a *fault_ch, + bool *early_exit, bool *ignore_debugger); +u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm); +u32 get_ecc_override_val(struct gk20a *g); +int gr_gp10b_suspend_contexts(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, + int *ctx_resident_ch_fd); +int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, + bool boost); +void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, + struct gr_ctx_desc *gr_ctx); +int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); +int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, + struct nvgpu_preemption_modes_rec *preemption_modes_rec); +int gp10b_gr_fuse_override(struct gk20a *g); +int gr_gp10b_init_preemption_state(struct gk20a *g); +void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, + struct nvgpu_mem *mem, u64 gpu_va); +int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch); + struct gr_t18x { struct { u32 preempt_image_size; diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 69a90031..d0f07a2b 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -27,6 +27,7 @@ #include "gk20a/mc_gk20a.h" #include "gk20a/fb_gk20a.h" #include "gk20a/pmu_gk20a.h" +#include "gk20a/gr_gk20a.h" #include "gp10b/gr_gp10b.h" #include "gp10b/fecs_trace_gp10b.h" @@ -179,6 +180,128 @@ static const struct gpu_ops gp10b_ops = { .isr_stall = gp10b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, }, + .gr = { + .init_gpc_mmu = gr_gm20b_init_gpc_mmu, + .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, + .cb_size_default = gr_gp10b_cb_size_default, + .calc_global_ctx_buffer_size = + gr_gp10b_calc_global_ctx_buffer_size, + .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, + .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, + .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, + .commit_global_pagepool = gr_gp10b_commit_global_pagepool, + .handle_sw_method = gr_gp10b_handle_sw_method, + .set_alpha_circular_buffer_size = + gr_gp10b_set_alpha_circular_buffer_size, + .set_circular_buffer_size = gr_gp10b_set_circular_buffer_size, + .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions, + .is_valid_class = gr_gp10b_is_valid_class, + .is_valid_gfx_class = gr_gp10b_is_valid_gfx_class, + .is_valid_compute_class = gr_gp10b_is_valid_compute_class, + .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, + .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, + .init_fs_state = gr_gp10b_init_fs_state, + .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, + .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, + .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, + .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, + .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, + .free_channel_ctx = gk20a_free_channel_ctx, + .alloc_obj_ctx = gk20a_alloc_obj_ctx, + .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, + .get_zcull_info = gr_gk20a_get_zcull_info, + .is_tpc_addr = gr_gm20b_is_tpc_addr, + .get_tpc_num = gr_gm20b_get_tpc_num, + .detect_sm_arch = gr_gm20b_detect_sm_arch, + .add_zbc_color = gr_gp10b_add_zbc_color, + .add_zbc_depth = gr_gp10b_add_zbc_depth, + .zbc_set_table = gk20a_gr_zbc_set_table, + .zbc_query_table = gr_gk20a_query_zbc, + .pmu_save_zbc = gk20a_pmu_save_zbc, + .add_zbc = gr_gk20a_add_zbc, + .pagepool_default_size = gr_gp10b_pagepool_default_size, + .init_ctx_state = gr_gp10b_init_ctx_state, + .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, + .free_gr_ctx = gr_gp10b_free_gr_ctx, + .update_ctxsw_preemption_mode = + gr_gp10b_update_ctxsw_preemption_mode, + .dump_gr_regs = gr_gp10b_dump_gr_status_regs, + .update_pc_sampling = gr_gm20b_update_pc_sampling, + .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, + .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp, + .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc, + .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, + .get_max_fbps_count = gr_gm20b_get_max_fbps_count, + .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, + .wait_empty = gr_gp10b_wait_empty, + .init_cyclestats = gr_gp10b_init_cyclestats, + .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, + .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, + .bpt_reg_info = gr_gm20b_bpt_reg_info, + .get_access_map = gr_gp10b_get_access_map, + .handle_fecs_error = gr_gp10b_handle_fecs_error, + .handle_sm_exception = gr_gp10b_handle_sm_exception, + .handle_tex_exception = gr_gp10b_handle_tex_exception, + .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions, + .enable_exceptions = gk20a_gr_enable_exceptions, + .get_lrf_tex_ltc_dram_override = get_ecc_override_val, + .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, + .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .record_sm_error_state = gm20b_gr_record_sm_error_state, + .update_sm_error_state = gm20b_gr_update_sm_error_state, + .clear_sm_error_state = gm20b_gr_clear_sm_error_state, + .suspend_contexts = gr_gp10b_suspend_contexts, + .resume_contexts = gr_gk20a_resume_contexts, + .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, + .fuse_override = gp10b_gr_fuse_override, + .init_sm_id_table = gr_gk20a_init_sm_id_table, + .load_smid_config = gr_gp10b_load_smid_config, + .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, + .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr, + .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr, + .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr, + .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr, + .setup_rop_mapping = gr_gk20a_setup_rop_mapping, + .program_zcull_mapping = gr_gk20a_program_zcull_mapping, + .commit_global_timeslice = gr_gk20a_commit_global_timeslice, + .commit_inst = gr_gk20a_commit_inst, + .write_zcull_ptr = gr_gk20a_write_zcull_ptr, + .write_pm_ptr = gr_gk20a_write_pm_ptr, + .init_elcg_mode = gr_gk20a_init_elcg_mode, + .load_tpc_mask = gr_gm20b_load_tpc_mask, + .inval_icache = gr_gk20a_inval_icache, + .trigger_suspend = gr_gk20a_trigger_suspend, + .wait_for_pause = gr_gk20a_wait_for_pause, + .resume_from_pause = gr_gk20a_resume_from_pause, + .clear_sm_errors = gr_gk20a_clear_sm_errors, + .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions, + .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel, + .sm_debugger_attached = gk20a_gr_sm_debugger_attached, + .suspend_single_sm = gk20a_gr_suspend_single_sm, + .suspend_all_sms = gk20a_gr_suspend_all_sms, + .resume_single_sm = gk20a_gr_resume_single_sm, + .resume_all_sms = gk20a_gr_resume_all_sms, + .get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr, + .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr, + .get_sm_no_lock_down_hww_global_esr_mask = + gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask, + .lock_down_sm = gk20a_gr_lock_down_sm, + .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down, + .clear_sm_hww = gm20b_gr_clear_sm_hww, + .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, + .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, + .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, + .set_boosted_ctx = gr_gp10b_set_boosted_ctx, + .set_preemption_mode = gr_gp10b_set_preemption_mode, + .set_czf_bypass = gr_gp10b_set_czf_bypass, + .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception, + .set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va, + .init_preemption_state = gr_gp10b_init_preemption_state, + .update_boosted_ctx = gr_gp10b_update_boosted_ctx, + .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .create_gr_sysfs = gr_gp10b_create_sysfs, + .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, + }, .fb = { .reset = fb_gk20a_reset, .init_hw = gk20a_fb_init_hw, @@ -474,6 +597,7 @@ int gp10b_init_hal(struct gk20a *g) gops->ltc = gp10b_ops.ltc; gops->ce2 = gp10b_ops.ce2; + gops->gr = gp10b_ops.gr; gops->fb = gp10b_ops.fb; gops->clock_gating = gp10b_ops.clock_gating; gops->fifo = gp10b_ops.fifo; @@ -564,6 +688,8 @@ int gp10b_init_hal(struct gk20a *g) gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode; gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap; gops->pmu.is_priv_load = gp10b_is_priv_load; + + gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; } else { /* Inherit from gk20a */ gops->pmu.is_pmu_supported = gk20a_is_pmu_supported, @@ -574,12 +700,13 @@ int gp10b_init_hal(struct gk20a *g) gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.init_wpr_region = NULL; gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1; + + gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; } __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); g->pmu_lsf_pmu_wpr_init_done = 0; g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; - gp10b_init_gr(g); gp10b_init_uncompressed_kind_map(); gp10b_init_kind_attr(); -- cgit v1.2.2