summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSunny He <suhe@nvidia.com>2017-08-17 19:11:34 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-08-24 12:34:43 -0400
commit4b5b67d6d83430d8d670660b1dfc9cf024d60d88 (patch)
tree541a421438fe849ee4b1ab9e6bdfa9e8b6ee4485
parent82ba1277f3da7379ed6b8288c04bb91db008549c (diff)
gpu: nvgpu: Reorg gr HAL initialization
Reorganize HAL initialization to remove inheritance and construct the gpu_ops struct at compile time. This patch only covers the gr sub-module of the gpu_ops struct. Perform HAL function assignments in hal_gxxxx.c through the population of a chip-specific copy of gpu_ops. Jira NVGPU-74 Change-Id: Ie37638f442fd68aca8a7ade5f297118447bdc91e Signed-off-by: Sunny He <suhe@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1542989 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c222
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.h87
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c120
-rw-r--r--drivers/gpu/nvgpu/gp106/gr_gp106.c26
-rw-r--r--drivers/gpu/nvgpu/gp106/gr_gp106.h11
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c127
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c136
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.h86
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c129
9 files changed, 649 insertions, 295 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 5fcc3f7b..30991102 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -36,7 +36,7 @@
36#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> 36#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
37#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> 37#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
38 38
39static void gr_gm20b_init_gpc_mmu(struct gk20a *g) 39void gr_gm20b_init_gpc_mmu(struct gk20a *g)
40{ 40{
41 u32 temp; 41 u32 temp;
42 42
@@ -73,7 +73,7 @@ static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
73 gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); 73 gk20a_readl(g, fb_fbhub_num_active_ltcs_r()));
74} 74}
75 75
76static void gr_gm20b_bundle_cb_defaults(struct gk20a *g) 76void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
77{ 77{
78 struct gr_gk20a *gr = &g->gr; 78 struct gr_gk20a *gr = &g->gr;
79 79
@@ -85,7 +85,7 @@ static void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
85 gr_pd_ab_dist_cfg2_token_limit_init_v(); 85 gr_pd_ab_dist_cfg2_token_limit_init_v();
86} 86}
87 87
88static void gr_gm20b_cb_size_default(struct gk20a *g) 88void gr_gm20b_cb_size_default(struct gk20a *g)
89{ 89{
90 struct gr_gk20a *gr = &g->gr; 90 struct gr_gk20a *gr = &g->gr;
91 91
@@ -96,7 +96,7 @@ static void gr_gm20b_cb_size_default(struct gk20a *g)
96 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); 96 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
97} 97}
98 98
99static int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) 99int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
100{ 100{
101 struct gr_gk20a *gr = &g->gr; 101 struct gr_gk20a *gr = &g->gr;
102 int size; 102 int size;
@@ -134,7 +134,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
134 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); 134 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
135} 135}
136 136
137static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, 137void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
138 struct channel_ctx_gk20a *ch_ctx, 138 struct channel_ctx_gk20a *ch_ctx,
139 u64 addr, u64 size, bool patch) 139 u64 addr, u64 size, bool patch)
140{ 140{
@@ -170,7 +170,7 @@ static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
170 170
171} 171}
172 172
173static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, 173int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
174 struct channel_gk20a *c, bool patch) 174 struct channel_gk20a *c, bool patch)
175{ 175{
176 struct gr_gk20a *gr = &g->gr; 176 struct gr_gk20a *gr = &g->gr;
@@ -250,7 +250,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
250 return 0; 250 return 0;
251} 251}
252 252
253static void gr_gm20b_commit_global_pagepool(struct gk20a *g, 253void gr_gm20b_commit_global_pagepool(struct gk20a *g,
254 struct channel_ctx_gk20a *ch_ctx, 254 struct channel_ctx_gk20a *ch_ctx,
255 u64 addr, u32 size, bool patch) 255 u64 addr, u32 size, bool patch)
256{ 256{
@@ -276,7 +276,7 @@ void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data)
276 gk20a_dbg_fn("done"); 276 gk20a_dbg_fn("done");
277} 277}
278 278
279static int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, 279int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
280 u32 class_num, u32 offset, u32 data) 280 u32 class_num, u32 offset, u32 data)
281{ 281{
282 gk20a_dbg_fn(""); 282 gk20a_dbg_fn("");
@@ -318,7 +318,7 @@ fail:
318 return -EINVAL; 318 return -EINVAL;
319} 319}
320 320
321static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) 321void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
322{ 322{
323 struct gr_gk20a *gr = &g->gr; 323 struct gr_gk20a *gr = &g->gr;
324 u32 gpc_index, ppc_index, stride, val; 324 u32 gpc_index, ppc_index, stride, val;
@@ -368,7 +368,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
368 } 368 }
369} 369}
370 370
371static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) 371void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
372{ 372{
373 struct gr_gk20a *gr = &g->gr; 373 struct gr_gk20a *gr = &g->gr;
374 u32 gpc_index, ppc_index, stride, val; 374 u32 gpc_index, ppc_index, stride, val;
@@ -423,7 +423,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
423 } 423 }
424} 424}
425 425
426static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) 426void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
427{ 427{
428 /* setup sm warp esr report masks */ 428 /* setup sm warp esr report masks */
429 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 429 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
@@ -456,7 +456,7 @@ static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
456 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f()); 456 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f());
457} 457}
458 458
459static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) 459bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
460{ 460{
461 bool valid = false; 461 bool valid = false;
462 462
@@ -476,7 +476,7 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
476 return valid; 476 return valid;
477} 477}
478 478
479static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) 479bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
480{ 480{
481 if (class_num == MAXWELL_B) 481 if (class_num == MAXWELL_B)
482 return true; 482 return true;
@@ -484,7 +484,7 @@ static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
484 return false; 484 return false;
485} 485}
486 486
487static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) 487bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num)
488{ 488{
489 if (class_num == MAXWELL_COMPUTE_B) 489 if (class_num == MAXWELL_COMPUTE_B)
490 return true; 490 return true;
@@ -502,7 +502,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
502static u32 *_sm_dsm_perf_regs; 502static u32 *_sm_dsm_perf_regs;
503static u32 _sm_dsm_perf_ctrl_regs[2]; 503static u32 _sm_dsm_perf_ctrl_regs[2];
504 504
505static void gr_gm20b_init_sm_dsm_reg_info(void) 505void gr_gm20b_init_sm_dsm_reg_info(void)
506{ 506{
507 if (_sm_dsm_perf_ctrl_regs[0] != 0) 507 if (_sm_dsm_perf_ctrl_regs[0] != 0)
508 return; 508 return;
@@ -513,7 +513,7 @@ static void gr_gm20b_init_sm_dsm_reg_info(void)
513 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); 513 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
514} 514}
515 515
516static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, 516void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
517 u32 *num_sm_dsm_perf_regs, 517 u32 *num_sm_dsm_perf_regs,
518 u32 **sm_dsm_perf_regs, 518 u32 **sm_dsm_perf_regs,
519 u32 *perf_register_stride) 519 u32 *perf_register_stride)
@@ -523,7 +523,7 @@ static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
523 *perf_register_stride = 0; 523 *perf_register_stride = 0;
524} 524}
525 525
526static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, 526void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
527 u32 *num_sm_dsm_perf_ctrl_regs, 527 u32 *num_sm_dsm_perf_ctrl_regs,
528 u32 **sm_dsm_perf_ctrl_regs, 528 u32 **sm_dsm_perf_ctrl_regs,
529 u32 *ctrl_register_stride) 529 u32 *ctrl_register_stride)
@@ -535,7 +535,7 @@ static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
535 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); 535 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
536} 536}
537 537
538static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) 538u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
539{ 539{
540 u32 val; 540 u32 val;
541 struct gr_gk20a *gr = &g->gr; 541 struct gr_gk20a *gr = &g->gr;
@@ -546,7 +546,7 @@ static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
546 return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); 546 return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1);
547} 547}
548 548
549static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) 549void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
550{ 550{
551 nvgpu_tegra_fuse_write_bypass(g, 0x1); 551 nvgpu_tegra_fuse_write_bypass(g, 0x1);
552 nvgpu_tegra_fuse_write_access_sw(g, 0x0); 552 nvgpu_tegra_fuse_write_access_sw(g, 0x0);
@@ -563,7 +563,7 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
563 } 563 }
564} 564}
565 565
566static void gr_gm20b_load_tpc_mask(struct gk20a *g) 566void gr_gm20b_load_tpc_mask(struct gk20a *g)
567{ 567{
568 u32 pes_tpc_mask = 0, fuse_tpc_mask; 568 u32 pes_tpc_mask = 0, fuse_tpc_mask;
569 u32 gpc, pes; 569 u32 gpc, pes;
@@ -588,7 +588,7 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g)
588 } 588 }
589} 589}
590 590
591static void gr_gm20b_program_sm_id_numbering(struct gk20a *g, 591void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
592 u32 gpc, u32 tpc, u32 smid) 592 u32 gpc, u32 tpc, u32 smid)
593{ 593{
594 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 594 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
@@ -604,7 +604,7 @@ static void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
604 gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); 604 gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
605} 605}
606 606
607static int gr_gm20b_load_smid_config(struct gk20a *g) 607int gr_gm20b_load_smid_config(struct gk20a *g)
608{ 608{
609 u32 *tpc_sm_id; 609 u32 *tpc_sm_id;
610 u32 i, j; 610 u32 i, j;
@@ -669,7 +669,7 @@ int gr_gm20b_init_fs_state(struct gk20a *g)
669 return 0; 669 return 0;
670} 670}
671 671
672static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, 672int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
673 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) 673 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
674{ 674{
675 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), 675 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
@@ -697,7 +697,7 @@ static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
697 tpc_in_gpc_stride)); 697 tpc_in_gpc_stride));
698} 698}
699 699
700static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) 700bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
701{ 701{
702 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); 702 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
703 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 703 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
@@ -708,7 +708,7 @@ static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
708 || gr_gm20b_is_tpc_addr_shared(g, addr); 708 || gr_gm20b_is_tpc_addr_shared(g, addr);
709} 709}
710 710
711static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) 711u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
712{ 712{
713 u32 i, start; 713 u32 i, start;
714 u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); 714 u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
@@ -738,7 +738,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
738 gr_fecs_falcon_hwcfg_r()); 738 gr_fecs_falcon_hwcfg_r());
739} 739}
740 740
741static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) 741int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
742{ 742{
743 u32 err, flags; 743 u32 err, flags;
744 u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - 744 u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
@@ -819,14 +819,14 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
819} 819}
820#else 820#else
821 821
822static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) 822int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
823{ 823{
824 return -EPERM; 824 return -EPERM;
825} 825}
826 826
827#endif 827#endif
828 828
829static void gr_gm20b_detect_sm_arch(struct gk20a *g) 829void gr_gm20b_detect_sm_arch(struct gk20a *g)
830{ 830{
831 u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); 831 u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
832 832
@@ -838,12 +838,12 @@ static void gr_gm20b_detect_sm_arch(struct gk20a *g)
838 gr_gpc0_tpc0_sm_arch_warp_count_v(v); 838 gr_gpc0_tpc0_sm_arch_warp_count_v(v);
839} 839}
840 840
841static u32 gr_gm20b_pagepool_default_size(struct gk20a *g) 841u32 gr_gm20b_pagepool_default_size(struct gk20a *g)
842{ 842{
843 return gr_scc_pagepool_total_pages_hwmax_value_v(); 843 return gr_scc_pagepool_total_pages_hwmax_value_v();
844} 844}
845 845
846static int gr_gm20b_alloc_gr_ctx(struct gk20a *g, 846int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
847 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 847 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
848 u32 class, 848 u32 class,
849 u32 flags) 849 u32 flags)
@@ -864,7 +864,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
864 return 0; 864 return 0;
865} 865}
866 866
867static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, 867void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
868 struct channel_ctx_gk20a *ch_ctx, 868 struct channel_ctx_gk20a *ch_ctx,
869 struct nvgpu_mem *mem) 869 struct nvgpu_mem *mem)
870{ 870{
@@ -884,7 +884,7 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
884 gk20a_dbg_fn("done"); 884 gk20a_dbg_fn("done");
885} 885}
886 886
887static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, 887int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
888 struct gk20a_debug_output *o) 888 struct gk20a_debug_output *o)
889{ 889{
890 struct gr_gk20a *gr = &g->gr; 890 struct gr_gk20a *gr = &g->gr;
@@ -1022,7 +1022,7 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
1022 return 0; 1022 return 0;
1023} 1023}
1024 1024
1025static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, 1025int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1026 bool enable) 1026 bool enable)
1027{ 1027{
1028 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1028 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
@@ -1051,7 +1051,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1051 return 0; 1051 return 0;
1052} 1052}
1053 1053
1054static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) 1054u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
1055{ 1055{
1056 u32 fbp_en_mask, opt_fbio; 1056 u32 fbp_en_mask, opt_fbio;
1057 u32 tmp, max_fbps_count; 1057 u32 tmp, max_fbps_count;
@@ -1066,7 +1066,7 @@ static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
1066 return fbp_en_mask; 1066 return fbp_en_mask;
1067} 1067}
1068 1068
1069static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) 1069u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
1070{ 1070{
1071 u32 ltc_per_fbp, reg; 1071 u32 ltc_per_fbp, reg;
1072 reg = gk20a_readl(g, top_ltc_per_fbp_r()); 1072 reg = gk20a_readl(g, top_ltc_per_fbp_r());
@@ -1074,7 +1074,7 @@ static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
1074 return ltc_per_fbp; 1074 return ltc_per_fbp;
1075} 1075}
1076 1076
1077static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) 1077u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
1078{ 1078{
1079 u32 lts_per_ltc, reg; 1079 u32 lts_per_ltc, reg;
1080 reg = gk20a_readl(g, top_slices_per_ltc_r()); 1080 reg = gk20a_readl(g, top_slices_per_ltc_r());
@@ -1082,7 +1082,7 @@ static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
1082 return lts_per_ltc; 1082 return lts_per_ltc;
1083} 1083}
1084 1084
1085static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) 1085u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
1086{ 1086{
1087 struct gr_gk20a *gr = &g->gr; 1087 struct gr_gk20a *gr = &g->gr;
1088 u32 i, tmp, max_fbps_count, max_ltc_per_fbp; 1088 u32 i, tmp, max_fbps_count, max_ltc_per_fbp;
@@ -1102,7 +1102,7 @@ static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
1102 return gr->fbp_rop_l2_en_mask; 1102 return gr->fbp_rop_l2_en_mask;
1103} 1103}
1104 1104
1105static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) 1105u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
1106{ 1106{
1107 u32 tmp, max_fbps_count; 1107 u32 tmp, max_fbps_count;
1108 tmp = gk20a_readl(g, top_num_fbps_r()); 1108 tmp = gk20a_readl(g, top_num_fbps_r());
@@ -1110,7 +1110,7 @@ static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
1110 return max_fbps_count; 1110 return max_fbps_count;
1111} 1111}
1112 1112
1113static void gr_gm20b_init_cyclestats(struct gk20a *g) 1113void gr_gm20b_init_cyclestats(struct gk20a *g)
1114{ 1114{
1115#if defined(CONFIG_GK20A_CYCLE_STATS) 1115#if defined(CONFIG_GK20A_CYCLE_STATS)
1116 g->gpu_characteristics.flags |= 1116 g->gpu_characteristics.flags |=
@@ -1122,7 +1122,7 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g)
1122#endif 1122#endif
1123} 1123}
1124 1124
1125static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) 1125void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
1126{ 1126{
1127 u32 cde_v; 1127 u32 cde_v;
1128 1128
@@ -1131,7 +1131,7 @@ static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
1131 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); 1131 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
1132} 1132}
1133 1133
1134static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) 1134void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1135{ 1135{
1136 /* Check if we have at least one valid warp */ 1136 /* Check if we have at least one valid warp */
1137 /* get paused state on maxwell */ 1137 /* get paused state on maxwell */
@@ -1210,7 +1210,7 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1210 } 1210 }
1211} 1211}
1212 1212
1213static void gr_gm20b_get_access_map(struct gk20a *g, 1213void gr_gm20b_get_access_map(struct gk20a *g,
1214 u32 **whitelist, int *num_entries) 1214 u32 **whitelist, int *num_entries)
1215{ 1215{
1216 static u32 wl_addr_gm20b[] = { 1216 static u32 wl_addr_gm20b[] = {
@@ -1251,7 +1251,7 @@ static void gr_gm20b_get_access_map(struct gk20a *g,
1251 *num_entries = ARRAY_SIZE(wl_addr_gm20b); 1251 *num_entries = ARRAY_SIZE(wl_addr_gm20b);
1252} 1252}
1253 1253
1254static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) 1254int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
1255{ 1255{
1256 int sm_id; 1256 int sm_id;
1257 struct gr_gk20a *gr = &g->gr; 1257 struct gr_gk20a *gr = &g->gr;
@@ -1281,7 +1281,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
1281 return 0; 1281 return 0;
1282} 1282}
1283 1283
1284static int gm20b_gr_update_sm_error_state(struct gk20a *g, 1284int gm20b_gr_update_sm_error_state(struct gk20a *g,
1285 struct channel_gk20a *ch, u32 sm_id, 1285 struct channel_gk20a *ch, u32 sm_id,
1286 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) 1286 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state)
1287{ 1287{
@@ -1353,7 +1353,7 @@ fail:
1353 return err; 1353 return err;
1354} 1354}
1355 1355
1356static int gm20b_gr_clear_sm_error_state(struct gk20a *g, 1356int gm20b_gr_clear_sm_error_state(struct gk20a *g,
1357 struct channel_gk20a *ch, u32 sm_id) 1357 struct channel_gk20a *ch, u32 sm_id)
1358{ 1358{
1359 u32 gpc, tpc, offset; 1359 u32 gpc, tpc, offset;
@@ -1394,7 +1394,7 @@ fail:
1394 return err; 1394 return err;
1395} 1395}
1396 1396
1397static int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, 1397int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
1398 struct nvgpu_preemption_modes_rec *preemption_modes_rec) 1398 struct nvgpu_preemption_modes_rec *preemption_modes_rec)
1399{ 1399{
1400 preemption_modes_rec->graphics_preemption_mode_flags = 1400 preemption_modes_rec->graphics_preemption_mode_flags =
@@ -1421,7 +1421,7 @@ int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask)
1421 return 0; 1421 return 0;
1422} 1422}
1423 1423
1424static int gm20b_gr_fuse_override(struct gk20a *g) 1424int gm20b_gr_fuse_override(struct gk20a *g)
1425{ 1425{
1426 struct device_node *np = dev_from_gk20a(g)->of_node; 1426 struct device_node *np = dev_from_gk20a(g)->of_node;
1427 u32 *fuses; 1427 u32 *fuses;
@@ -1457,7 +1457,7 @@ static int gm20b_gr_fuse_override(struct gk20a *g)
1457 return 0; 1457 return 0;
1458} 1458}
1459 1459
1460static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) 1460bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
1461{ 1461{
1462 u32 ltc_shared_base = ltc_ltcs_ltss_v(); 1462 u32 ltc_shared_base = ltc_ltcs_ltss_v();
1463 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); 1463 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
@@ -1466,7 +1466,7 @@ static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
1466 (addr < (ltc_shared_base + lts_stride)); 1466 (addr < (ltc_shared_base + lts_stride));
1467} 1467}
1468 1468
1469static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) 1469bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
1470{ 1470{
1471 u32 lts_shared_base = ltc_ltc0_ltss_v(); 1471 u32 lts_shared_base = ltc_ltc0_ltss_v();
1472 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); 1472 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
@@ -1498,7 +1498,7 @@ static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
1498 *priv_addr_table_index = index; 1498 *priv_addr_table_index = index;
1499} 1499}
1500 1500
1501static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, 1501void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
1502 u32 *priv_addr_table, 1502 u32 *priv_addr_table,
1503 u32 *priv_addr_table_index) 1503 u32 *priv_addr_table_index)
1504{ 1504{
@@ -1518,7 +1518,7 @@ static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
1518 priv_addr_table_index); 1518 priv_addr_table_index);
1519} 1519}
1520 1520
1521static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, 1521void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
1522 u32 *priv_addr_table, 1522 u32 *priv_addr_table,
1523 u32 *priv_addr_table_index) 1523 u32 *priv_addr_table_index)
1524{ 1524{
@@ -1530,7 +1530,7 @@ static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
1530 priv_addr_table, priv_addr_table_index); 1530 priv_addr_table, priv_addr_table_index);
1531} 1531}
1532 1532
1533static void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, 1533void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
1534 u32 global_esr) 1534 u32 global_esr)
1535{ 1535{
1536 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); 1536 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
@@ -1559,121 +1559,3 @@ void gm20a_gr_disable_rd_coalesce(struct gk20a *g)
1559 1559
1560 gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); 1560 gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg);
1561} 1561}
1562
1563void gm20b_init_gr(struct gk20a *g)
1564{
1565 struct gpu_ops *gops = &g->ops;
1566
1567 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
1568 gops->gr.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults;
1569 gops->gr.cb_size_default = gr_gm20b_cb_size_default;
1570 gops->gr.calc_global_ctx_buffer_size =
1571 gr_gm20b_calc_global_ctx_buffer_size;
1572 gops->gr.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb;
1573 gops->gr.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb;
1574 gops->gr.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager;
1575 gops->gr.commit_global_pagepool = gr_gm20b_commit_global_pagepool;
1576 gops->gr.handle_sw_method = gr_gm20b_handle_sw_method;
1577 gops->gr.set_alpha_circular_buffer_size = gr_gm20b_set_alpha_circular_buffer_size;
1578 gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size;
1579 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
1580 gops->gr.is_valid_class = gr_gm20b_is_valid_class;
1581 gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class;
1582 gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class;
1583 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
1584 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
1585 gops->gr.init_fs_state = gr_gm20b_init_fs_state;
1586 gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
1587 gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
1588 if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY))
1589 gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
1590 else
1591 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
1592 gops->gr.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask;
1593 gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask;
1594 gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
1595 gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
1596 gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
1597 gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
1598 gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr;
1599 gops->gr.get_tpc_num = gr_gm20b_get_tpc_num;
1600 gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch;
1601 gops->gr.add_zbc_color = gr_gk20a_add_zbc_color;
1602 gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth;
1603 gops->gr.zbc_set_table = gk20a_gr_zbc_set_table;
1604 gops->gr.zbc_query_table = gr_gk20a_query_zbc;
1605 gops->gr.pmu_save_zbc = gk20a_pmu_save_zbc;
1606 gops->gr.add_zbc = gr_gk20a_add_zbc;
1607 gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size;
1608 gops->gr.init_ctx_state = gr_gk20a_init_ctx_state;
1609 gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx;
1610 gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx;
1611 gops->gr.update_ctxsw_preemption_mode =
1612 gr_gm20b_update_ctxsw_preemption_mode;
1613 gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
1614 gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
1615 gops->gr.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask;
1616 gops->gr.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp;
1617 gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc;
1618 gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask;
1619 gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count;
1620 gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info;
1621 gops->gr.wait_empty = gr_gk20a_wait_idle;
1622 gops->gr.init_cyclestats = gr_gm20b_init_cyclestats;
1623 gops->gr.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode;
1624 gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs;
1625 gops->gr.bpt_reg_info = gr_gm20b_bpt_reg_info;
1626 gops->gr.get_access_map = gr_gm20b_get_access_map;
1627 gops->gr.handle_fecs_error = gk20a_gr_handle_fecs_error;
1628 gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception;
1629 gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception;
1630 gops->gr.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions;
1631 gops->gr.enable_exceptions = gk20a_gr_enable_exceptions;
1632 gops->gr.get_lrf_tex_ltc_dram_override = NULL;
1633 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
1634 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
1635 gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
1636 gops->gr.update_sm_error_state = gm20b_gr_update_sm_error_state;
1637 gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state;
1638 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts;
1639 gops->gr.resume_contexts = gr_gk20a_resume_contexts;
1640 gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags;
1641 gops->gr.fuse_override = gm20b_gr_fuse_override;
1642 gops->gr.init_sm_id_table = gr_gk20a_init_sm_id_table;
1643 gops->gr.load_smid_config = gr_gm20b_load_smid_config;
1644 gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering;
1645 gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr;
1646 gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr;
1647 gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr;
1648 gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr;
1649 gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping;
1650 gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
1651 gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;
1652 gops->gr.commit_inst = gr_gk20a_commit_inst;
1653 gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
1654 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
1655 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
1656 gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask;
1657 gops->gr.inval_icache = gr_gk20a_inval_icache;
1658 gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
1659 gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
1660 gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
1661 gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
1662 gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
1663 gops->gr.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel;
1664 gops->gr.sm_debugger_attached = gk20a_gr_sm_debugger_attached;
1665 gops->gr.suspend_single_sm = gk20a_gr_suspend_single_sm;
1666 gops->gr.suspend_all_sms = gk20a_gr_suspend_all_sms;
1667 gops->gr.resume_single_sm = gk20a_gr_resume_single_sm;
1668 gops->gr.resume_all_sms = gk20a_gr_resume_all_sms;
1669 gops->gr.get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr;
1670 gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
1671 gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
1672 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
1673 gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
1674 gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
1675 gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww;
1676 gops->gr.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf;
1677 gops->gr.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs;
1678 gops->gr.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce;
1679}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index 116a92f4..f81aa728 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -34,12 +34,95 @@ enum {
34#define NVB1C0_SET_RD_COALESCE 0x0228 34#define NVB1C0_SET_RD_COALESCE 0x0228
35 35
36#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 36#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
37void gm20b_init_gr(struct gk20a *g); 37
38void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, 38void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
39 struct channel_ctx_gk20a *ch_ctx, 39 struct channel_ctx_gk20a *ch_ctx,
40 u64 addr, bool patch); 40 u64 addr, bool patch);
41int gr_gm20b_init_fs_state(struct gk20a *g); 41int gr_gm20b_init_fs_state(struct gk20a *g);
42int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); 42int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
43void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data); 43void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
44 44void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
45void gr_gm20b_init_gpc_mmu(struct gk20a *g);
46void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
47void gr_gm20b_cb_size_default(struct gk20a *g);
48int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
49void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
50 struct channel_ctx_gk20a *ch_ctx,
51 u64 addr, u64 size, bool patch);
52int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
53 struct channel_gk20a *c, bool patch);
54void gr_gm20b_commit_global_pagepool(struct gk20a *g,
55 struct channel_ctx_gk20a *ch_ctx,
56 u64 addr, u32 size, bool patch);
57int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
58 u32 class_num, u32 offset, u32 data);
59void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
60void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data);
61void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g);
62bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num);
63bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
64bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num);
65void gr_gm20b_init_sm_dsm_reg_info(void);
66void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
67 u32 *num_sm_dsm_perf_regs,
68 u32 **sm_dsm_perf_regs,
69 u32 *perf_register_stride);
70void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
71 u32 *num_sm_dsm_perf_ctrl_regs,
72 u32 **sm_dsm_perf_ctrl_regs,
73 u32 *ctrl_register_stride);
74u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
75void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
76void gr_gm20b_load_tpc_mask(struct gk20a *g);
77void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
78 u32 gpc, u32 tpc, u32 smid);
79int gr_gm20b_load_smid_config(struct gk20a *g);
80int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
81 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
82bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
83u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
84int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
85int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
86void gr_gm20b_detect_sm_arch(struct gk20a *g);
87u32 gr_gm20b_pagepool_default_size(struct gk20a *g);
88int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
89 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
90 u32 class,
91 u32 flags);
92void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
93 struct channel_ctx_gk20a *ch_ctx,
94 struct nvgpu_mem *mem);
95int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
96 struct gk20a_debug_output *o);
97int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
98 bool enable);
99u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g);
100u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g);
101u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g);
102u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g);
103u32 gr_gm20b_get_max_fbps_count(struct gk20a *g);
104void gr_gm20b_init_cyclestats(struct gk20a *g);
105void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem);
106void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state);
107void gr_gm20b_get_access_map(struct gk20a *g,
108 u32 **whitelist, int *num_entries);
109int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc);
110int gm20b_gr_update_sm_error_state(struct gk20a *g,
111 struct channel_gk20a *ch, u32 sm_id,
112 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state);
113int gm20b_gr_clear_sm_error_state(struct gk20a *g,
114 struct channel_gk20a *ch, u32 sm_id);
115int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
116 struct nvgpu_preemption_modes_rec *preemption_modes_rec);
117int gm20b_gr_fuse_override(struct gk20a *g);
118bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr);
119bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr);
120void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
121 u32 *priv_addr_table,
122 u32 *priv_addr_table_index);
123void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
124 u32 *priv_addr_table,
125 u32 *priv_addr_table_index);
126void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
127 u32 global_esr);
45#endif 128#endif
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index aa953ca5..b77f10d2 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -27,6 +27,7 @@
27#include "gk20a/priv_ring_gk20a.h" 27#include "gk20a/priv_ring_gk20a.h"
28#include "gk20a/regops_gk20a.h" 28#include "gk20a/regops_gk20a.h"
29#include "gk20a/pmu_gk20a.h" 29#include "gk20a/pmu_gk20a.h"
30#include "gk20a/gr_gk20a.h"
30 31
31#include "ltc_gm20b.h" 32#include "ltc_gm20b.h"
32#include "gr_gm20b.h" 33#include "gr_gm20b.h"
@@ -170,6 +171,118 @@ static const struct gpu_ops gm20b_ops = {
170 .isr_stall = gk20a_ce2_isr, 171 .isr_stall = gk20a_ce2_isr,
171 .isr_nonstall = gk20a_ce2_nonstall_isr, 172 .isr_nonstall = gk20a_ce2_nonstall_isr,
172 }, 173 },
174 .gr = {
175 .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
176 .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
177 .cb_size_default = gr_gm20b_cb_size_default,
178 .calc_global_ctx_buffer_size =
179 gr_gm20b_calc_global_ctx_buffer_size,
180 .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
181 .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
182 .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
183 .commit_global_pagepool = gr_gm20b_commit_global_pagepool,
184 .handle_sw_method = gr_gm20b_handle_sw_method,
185 .set_alpha_circular_buffer_size =
186 gr_gm20b_set_alpha_circular_buffer_size,
187 .set_circular_buffer_size = gr_gm20b_set_circular_buffer_size,
188 .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
189 .is_valid_class = gr_gm20b_is_valid_class,
190 .is_valid_gfx_class = gr_gm20b_is_valid_gfx_class,
191 .is_valid_compute_class = gr_gm20b_is_valid_compute_class,
192 .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
193 .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
194 .init_fs_state = gr_gm20b_init_fs_state,
195 .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
196 .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
197 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
198 .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
199 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
200 .free_channel_ctx = gk20a_free_channel_ctx,
201 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
202 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
203 .get_zcull_info = gr_gk20a_get_zcull_info,
204 .is_tpc_addr = gr_gm20b_is_tpc_addr,
205 .get_tpc_num = gr_gm20b_get_tpc_num,
206 .detect_sm_arch = gr_gm20b_detect_sm_arch,
207 .add_zbc_color = gr_gk20a_add_zbc_color,
208 .add_zbc_depth = gr_gk20a_add_zbc_depth,
209 .zbc_set_table = gk20a_gr_zbc_set_table,
210 .zbc_query_table = gr_gk20a_query_zbc,
211 .pmu_save_zbc = gk20a_pmu_save_zbc,
212 .add_zbc = gr_gk20a_add_zbc,
213 .pagepool_default_size = gr_gm20b_pagepool_default_size,
214 .init_ctx_state = gr_gk20a_init_ctx_state,
215 .alloc_gr_ctx = gr_gm20b_alloc_gr_ctx,
216 .free_gr_ctx = gr_gk20a_free_gr_ctx,
217 .update_ctxsw_preemption_mode =
218 gr_gm20b_update_ctxsw_preemption_mode,
219 .dump_gr_regs = gr_gm20b_dump_gr_status_regs,
220 .update_pc_sampling = gr_gm20b_update_pc_sampling,
221 .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
222 .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
223 .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
224 .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
225 .get_max_fbps_count = gr_gm20b_get_max_fbps_count,
226 .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
227 .wait_empty = gr_gk20a_wait_idle,
228 .init_cyclestats = gr_gm20b_init_cyclestats,
229 .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
230 .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
231 .bpt_reg_info = gr_gm20b_bpt_reg_info,
232 .get_access_map = gr_gm20b_get_access_map,
233 .handle_fecs_error = gk20a_gr_handle_fecs_error,
234 .handle_sm_exception = gr_gk20a_handle_sm_exception,
235 .handle_tex_exception = gr_gk20a_handle_tex_exception,
236 .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
237 .enable_exceptions = gk20a_gr_enable_exceptions,
238 .get_lrf_tex_ltc_dram_override = NULL,
239 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
240 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
241 .record_sm_error_state = gm20b_gr_record_sm_error_state,
242 .update_sm_error_state = gm20b_gr_update_sm_error_state,
243 .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
244 .suspend_contexts = gr_gk20a_suspend_contexts,
245 .resume_contexts = gr_gk20a_resume_contexts,
246 .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
247 .fuse_override = gm20b_gr_fuse_override,
248 .init_sm_id_table = gr_gk20a_init_sm_id_table,
249 .load_smid_config = gr_gm20b_load_smid_config,
250 .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
251 .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
252 .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
253 .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
254 .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
255 .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
256 .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
257 .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
258 .commit_inst = gr_gk20a_commit_inst,
259 .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
260 .write_pm_ptr = gr_gk20a_write_pm_ptr,
261 .init_elcg_mode = gr_gk20a_init_elcg_mode,
262 .load_tpc_mask = gr_gm20b_load_tpc_mask,
263 .inval_icache = gr_gk20a_inval_icache,
264 .trigger_suspend = gr_gk20a_trigger_suspend,
265 .wait_for_pause = gr_gk20a_wait_for_pause,
266 .resume_from_pause = gr_gk20a_resume_from_pause,
267 .clear_sm_errors = gr_gk20a_clear_sm_errors,
268 .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
269 .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
270 .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
271 .suspend_single_sm = gk20a_gr_suspend_single_sm,
272 .suspend_all_sms = gk20a_gr_suspend_all_sms,
273 .resume_single_sm = gk20a_gr_resume_single_sm,
274 .resume_all_sms = gk20a_gr_resume_all_sms,
275 .get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr,
276 .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
277 .get_sm_no_lock_down_hww_global_esr_mask =
278 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
279 .lock_down_sm = gk20a_gr_lock_down_sm,
280 .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
281 .clear_sm_hww = gm20b_gr_clear_sm_hww,
282 .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
283 .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
284 .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
285 },
173 .fb = { 286 .fb = {
174 .reset = fb_gk20a_reset, 287 .reset = fb_gk20a_reset,
175 .init_hw = gk20a_fb_init_hw, 288 .init_hw = gk20a_fb_init_hw,
@@ -448,6 +561,7 @@ int gm20b_init_hal(struct gk20a *g)
448 561
449 gops->ltc = gm20b_ops.ltc; 562 gops->ltc = gm20b_ops.ltc;
450 gops->ce2 = gm20b_ops.ce2; 563 gops->ce2 = gm20b_ops.ce2;
564 gops->gr = gm20b_ops.gr;
451 gops->fb = gm20b_ops.fb; 565 gops->fb = gm20b_ops.fb;
452 gops->clock_gating = gm20b_ops.clock_gating; 566 gops->clock_gating = gm20b_ops.clock_gating;
453 gops->fifo = gm20b_ops.fifo; 567 gops->fifo = gm20b_ops.fifo;
@@ -538,6 +652,8 @@ int gm20b_init_hal(struct gk20a *g)
538 652
539 gops->pmu.init_wpr_region = gm20b_pmu_init_acr; 653 gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
540 gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode; 654 gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
655
656 gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
541 } else { 657 } else {
542 /* Inherit from gk20a */ 658 /* Inherit from gk20a */
543 gops->pmu.is_pmu_supported = gk20a_is_pmu_supported; 659 gops->pmu.is_pmu_supported = gk20a_is_pmu_supported;
@@ -547,14 +663,14 @@ int gm20b_init_hal(struct gk20a *g)
547 663
548 gops->pmu.load_lsfalcon_ucode = NULL; 664 gops->pmu.load_lsfalcon_ucode = NULL;
549 gops->pmu.init_wpr_region = NULL; 665 gops->pmu.init_wpr_region = NULL;
666
667 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
550 } 668 }
551 669
552 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); 670 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
553 g->pmu_lsf_pmu_wpr_init_done = 0; 671 g->pmu_lsf_pmu_wpr_init_done = 0;
554 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; 672 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
555 673
556 gm20b_init_gr(g);
557
558 gm20b_init_uncompressed_kind_map(); 674 gm20b_init_uncompressed_kind_map();
559 gm20b_init_kind_attr(); 675 gm20b_init_kind_attr();
560 676
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index 76e5cf89..00d6432f 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -24,7 +24,7 @@
24 24
25#include <nvgpu/hw/gp106/hw_gr_gp106.h> 25#include <nvgpu/hw/gp106/hw_gr_gp106.h>
26 26
27static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num) 27bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num)
28{ 28{
29 bool valid = false; 29 bool valid = false;
30 30
@@ -53,7 +53,7 @@ static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num)
53 return valid; 53 return valid;
54} 54}
55 55
56static u32 gr_gp106_pagepool_default_size(struct gk20a *g) 56u32 gr_gp106_pagepool_default_size(struct gk20a *g)
57{ 57{
58 return gr_scc_pagepool_total_pages_hwmax_value_v(); 58 return gr_scc_pagepool_total_pages_hwmax_value_v();
59} 59}
@@ -63,7 +63,7 @@ static void gr_gp106_set_go_idle_timeout(struct gk20a *g, u32 data)
63 gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); 63 gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
64} 64}
65 65
66static int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, 66int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
67 u32 class_num, u32 offset, u32 data) 67 u32 class_num, u32 offset, u32 data)
68{ 68{
69 gk20a_dbg_fn(""); 69 gk20a_dbg_fn("");
@@ -111,7 +111,7 @@ fail:
111 return -EINVAL; 111 return -EINVAL;
112} 112}
113 113
114static void gr_gp106_cb_size_default(struct gk20a *g) 114void gr_gp106_cb_size_default(struct gk20a *g)
115{ 115{
116 struct gr_gk20a *gr = &g->gr; 116 struct gr_gk20a *gr = &g->gr;
117 117
@@ -121,7 +121,7 @@ static void gr_gp106_cb_size_default(struct gk20a *g)
121 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); 121 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
122} 122}
123 123
124static int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, 124int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
125 struct gr_ctx_desc *gr_ctx, 125 struct gr_ctx_desc *gr_ctx,
126 struct vm_gk20a *vm, u32 class, 126 struct vm_gk20a *vm, u32 class,
127 u32 graphics_preempt_mode, 127 u32 graphics_preempt_mode,
@@ -233,19 +233,3 @@ fail_free_preempt:
233fail: 233fail:
234 return err; 234 return err;
235} 235}
236
237void gp106_init_gr(struct gk20a *g)
238{
239 struct gpu_ops *gops = &g->ops;
240
241 gp10b_init_gr(g);
242 gops->gr.is_valid_class = gr_gp106_is_valid_class;
243 gops->gr.pagepool_default_size = gr_gp106_pagepool_default_size;
244 gops->gr.handle_sw_method = gr_gp106_handle_sw_method;
245 gops->gr.cb_size_default = gr_gp106_cb_size_default;
246 gops->gr.init_preemption_state = NULL;
247 gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode;
248 gops->gr.create_gr_sysfs = NULL;
249 gops->gr.set_boosted_ctx = NULL;
250 gops->gr.update_boosted_ctx = NULL;
251}
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h
index 3f49aac6..28ff56a9 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h
@@ -23,6 +23,15 @@ enum {
23 PASCAL_COMPUTE_B = 0xC1C0, 23 PASCAL_COMPUTE_B = 0xC1C0,
24}; 24};
25 25
26void gp106_init_gr(struct gk20a *g); 26bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num);
27u32 gr_gp106_pagepool_default_size(struct gk20a *g);
28int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
29 u32 class_num, u32 offset, u32 data);
30void gr_gp106_cb_size_default(struct gk20a *g);
31int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
32 struct gr_ctx_desc *gr_ctx,
33 struct vm_gk20a *vm, u32 class,
34 u32 graphics_preempt_mode,
35 u32 compute_preempt_mode);
27 36
28#endif 37#endif
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 21d5fee3..7e7fc195 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -27,6 +27,7 @@
27#include "gk20a/mc_gk20a.h" 27#include "gk20a/mc_gk20a.h"
28#include "gk20a/fb_gk20a.h" 28#include "gk20a/fb_gk20a.h"
29#include "gk20a/pmu_gk20a.h" 29#include "gk20a/pmu_gk20a.h"
30#include "gk20a/gr_gk20a.h"
30 31
31#include "gp10b/ltc_gp10b.h" 32#include "gp10b/ltc_gp10b.h"
32#include "gp10b/gr_gp10b.h" 33#include "gp10b/gr_gp10b.h"
@@ -40,6 +41,7 @@
40#include "gp10b/fifo_gp10b.h" 41#include "gp10b/fifo_gp10b.h"
41#include "gp10b/fb_gp10b.h" 42#include "gp10b/fb_gp10b.h"
42#include "gp10b/pmu_gp10b.h" 43#include "gp10b/pmu_gp10b.h"
44#include "gp10b/gr_gp10b.h"
43 45
44#include "gp106/fifo_gp106.h" 46#include "gp106/fifo_gp106.h"
45#include "gp106/regops_gp106.h" 47#include "gp106/regops_gp106.h"
@@ -51,6 +53,7 @@
51#include "gm20b/pmu_gm20b.h" 53#include "gm20b/pmu_gm20b.h"
52#include "gm20b/fb_gm20b.h" 54#include "gm20b/fb_gm20b.h"
53#include "gm20b/acr_gm20b.h" 55#include "gm20b/acr_gm20b.h"
56#include "gm20b/gr_gm20b.h"
54 57
55#include "gp106/acr_gp106.h" 58#include "gp106/acr_gp106.h"
56#include "gp106/sec2_gp106.h" 59#include "gp106/sec2_gp106.h"
@@ -221,6 +224,128 @@ static const struct gpu_ops gp106_ops = {
221 .isr_stall = gp10b_ce_isr, 224 .isr_stall = gp10b_ce_isr,
222 .isr_nonstall = gp10b_ce_nonstall_isr, 225 .isr_nonstall = gp10b_ce_nonstall_isr,
223 }, 226 },
227 .gr = {
228 .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
229 .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
230 .cb_size_default = gr_gp106_cb_size_default,
231 .calc_global_ctx_buffer_size =
232 gr_gp10b_calc_global_ctx_buffer_size,
233 .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
234 .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
235 .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
236 .commit_global_pagepool = gr_gp10b_commit_global_pagepool,
237 .handle_sw_method = gr_gp106_handle_sw_method,
238 .set_alpha_circular_buffer_size =
239 gr_gp10b_set_alpha_circular_buffer_size,
240 .set_circular_buffer_size = gr_gp10b_set_circular_buffer_size,
241 .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
242 .is_valid_class = gr_gp106_is_valid_class,
243 .is_valid_gfx_class = gr_gp10b_is_valid_gfx_class,
244 .is_valid_compute_class = gr_gp10b_is_valid_compute_class,
245 .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
246 .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
247 .init_fs_state = gr_gp10b_init_fs_state,
248 .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
249 .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
250 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
251 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
252 .free_channel_ctx = gk20a_free_channel_ctx,
253 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
254 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
255 .get_zcull_info = gr_gk20a_get_zcull_info,
256 .is_tpc_addr = gr_gm20b_is_tpc_addr,
257 .get_tpc_num = gr_gm20b_get_tpc_num,
258 .detect_sm_arch = gr_gm20b_detect_sm_arch,
259 .add_zbc_color = gr_gp10b_add_zbc_color,
260 .add_zbc_depth = gr_gp10b_add_zbc_depth,
261 .zbc_set_table = gk20a_gr_zbc_set_table,
262 .zbc_query_table = gr_gk20a_query_zbc,
263 .pmu_save_zbc = gk20a_pmu_save_zbc,
264 .add_zbc = gr_gk20a_add_zbc,
265 .pagepool_default_size = gr_gp106_pagepool_default_size,
266 .init_ctx_state = gr_gp10b_init_ctx_state,
267 .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
268 .free_gr_ctx = gr_gp10b_free_gr_ctx,
269 .update_ctxsw_preemption_mode =
270 gr_gp10b_update_ctxsw_preemption_mode,
271 .dump_gr_regs = gr_gp10b_dump_gr_status_regs,
272 .update_pc_sampling = gr_gm20b_update_pc_sampling,
273 .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
274 .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
275 .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
276 .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
277 .get_max_fbps_count = gr_gm20b_get_max_fbps_count,
278 .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
279 .wait_empty = gr_gp10b_wait_empty,
280 .init_cyclestats = gr_gp10b_init_cyclestats,
281 .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
282 .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
283 .bpt_reg_info = gr_gm20b_bpt_reg_info,
284 .get_access_map = gr_gp10b_get_access_map,
285 .handle_fecs_error = gr_gp10b_handle_fecs_error,
286 .handle_sm_exception = gr_gp10b_handle_sm_exception,
287 .handle_tex_exception = gr_gp10b_handle_tex_exception,
288 .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
289 .enable_exceptions = gk20a_gr_enable_exceptions,
290 .get_lrf_tex_ltc_dram_override = get_ecc_override_val,
291 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
292 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
293 .record_sm_error_state = gm20b_gr_record_sm_error_state,
294 .update_sm_error_state = gm20b_gr_update_sm_error_state,
295 .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
296 .suspend_contexts = gr_gp10b_suspend_contexts,
297 .resume_contexts = gr_gk20a_resume_contexts,
298 .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
299 .fuse_override = gp10b_gr_fuse_override,
300 .init_sm_id_table = gr_gk20a_init_sm_id_table,
301 .load_smid_config = gr_gp10b_load_smid_config,
302 .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
303 .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
304 .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
305 .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
306 .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
307 .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
308 .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
309 .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
310 .commit_inst = gr_gk20a_commit_inst,
311 .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
312 .write_pm_ptr = gr_gk20a_write_pm_ptr,
313 .init_elcg_mode = gr_gk20a_init_elcg_mode,
314 .load_tpc_mask = gr_gm20b_load_tpc_mask,
315 .inval_icache = gr_gk20a_inval_icache,
316 .trigger_suspend = gr_gk20a_trigger_suspend,
317 .wait_for_pause = gr_gk20a_wait_for_pause,
318 .resume_from_pause = gr_gk20a_resume_from_pause,
319 .clear_sm_errors = gr_gk20a_clear_sm_errors,
320 .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
321 .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
322 .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
323 .suspend_single_sm = gk20a_gr_suspend_single_sm,
324 .suspend_all_sms = gk20a_gr_suspend_all_sms,
325 .resume_single_sm = gk20a_gr_resume_single_sm,
326 .resume_all_sms = gk20a_gr_resume_all_sms,
327 .get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr,
328 .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
329 .get_sm_no_lock_down_hww_global_esr_mask =
330 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
331 .lock_down_sm = gk20a_gr_lock_down_sm,
332 .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
333 .clear_sm_hww = gm20b_gr_clear_sm_hww,
334 .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
335 .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
336 .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
337 .set_boosted_ctx = NULL,
338 .set_preemption_mode = gr_gp10b_set_preemption_mode,
339 .set_czf_bypass = gr_gp10b_set_czf_bypass,
340 .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
341 .set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va,
342 .init_preemption_state = NULL,
343 .update_boosted_ctx = NULL,
344 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
345 .create_gr_sysfs = NULL,
346 .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
347 .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode
348 },
224 .fb = { 349 .fb = {
225 .reset = gp106_fb_reset, 350 .reset = gp106_fb_reset,
226 .init_hw = gk20a_fb_init_hw, 351 .init_hw = gk20a_fb_init_hw,
@@ -569,6 +694,7 @@ int gp106_init_hal(struct gk20a *g)
569 694
570 gops->ltc = gp106_ops.ltc; 695 gops->ltc = gp106_ops.ltc;
571 gops->ce2 = gp106_ops.ce2; 696 gops->ce2 = gp106_ops.ce2;
697 gops->gr = gp106_ops.gr;
572 gops->fb = gp106_ops.fb; 698 gops->fb = gp106_ops.fb;
573 gops->clock_gating = gp106_ops.clock_gating; 699 gops->clock_gating = gp106_ops.clock_gating;
574 gops->fifo = gp106_ops.fifo; 700 gops->fifo = gp106_ops.fifo;
@@ -618,7 +744,6 @@ int gp106_init_hal(struct gk20a *g)
618 744
619 g->pmu_lsf_pmu_wpr_init_done = 0; 745 g->pmu_lsf_pmu_wpr_init_done = 0;
620 g->bootstrap_owner = LSF_FALCON_ID_SEC2; 746 g->bootstrap_owner = LSF_FALCON_ID_SEC2;
621 gp106_init_gr(g);
622 747
623 gp10b_init_uncompressed_kind_map(); 748 gp10b_init_uncompressed_kind_map();
624 gp10b_init_kind_attr(); 749 gp10b_init_kind_attr();
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 05fbeb21..74af9817 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -41,7 +41,7 @@
41 41
42#define NVGPU_GFXP_WFI_TIMEOUT_US 100LL 42#define NVGPU_GFXP_WFI_TIMEOUT_US 100LL
43 43
44static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) 44bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
45{ 45{
46 bool valid = false; 46 bool valid = false;
47 47
@@ -67,7 +67,7 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
67 return valid; 67 return valid;
68} 68}
69 69
70static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) 70bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
71{ 71{
72 if (class_num == PASCAL_A || class_num == MAXWELL_B) 72 if (class_num == PASCAL_A || class_num == MAXWELL_B)
73 return true; 73 return true;
@@ -75,7 +75,7 @@ static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
75 return false; 75 return false;
76} 76}
77 77
78static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num) 78bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
79{ 79{
80 if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B) 80 if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B)
81 return true; 81 return true;
@@ -119,7 +119,7 @@ static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
119 *count_to_adjust = 0; 119 *count_to_adjust = 0;
120} 120}
121 121
122static int gr_gp10b_handle_sm_exception(struct gk20a *g, 122int gr_gp10b_handle_sm_exception(struct gk20a *g,
123 u32 gpc, u32 tpc, u32 sm, 123 u32 gpc, u32 tpc, u32 sm,
124 bool *post_event, struct channel_gk20a *fault_ch, 124 bool *post_event, struct channel_gk20a *fault_ch,
125 u32 *hww_global_esr) 125 u32 *hww_global_esr)
@@ -244,7 +244,7 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g,
244 return ret; 244 return ret;
245} 245}
246 246
247static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, 247int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
248 bool *post_event) 248 bool *post_event)
249{ 249{
250 int ret = 0; 250 int ret = 0;
@@ -380,7 +380,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
380 return ret; 380 return ret;
381} 381}
382 382
383static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, 383int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
384 struct channel_gk20a *c, bool patch) 384 struct channel_gk20a *c, bool patch)
385{ 385{
386 struct gr_gk20a *gr = &g->gr; 386 struct gr_gk20a *gr = &g->gr;
@@ -481,7 +481,7 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
481 return 0; 481 return 0;
482} 482}
483 483
484static void gr_gp10b_commit_global_pagepool(struct gk20a *g, 484void gr_gp10b_commit_global_pagepool(struct gk20a *g,
485 struct channel_ctx_gk20a *ch_ctx, 485 struct channel_ctx_gk20a *ch_ctx,
486 u64 addr, u32 size, bool patch) 486 u64 addr, u32 size, bool patch)
487{ 487{
@@ -499,7 +499,7 @@ static void gr_gp10b_commit_global_pagepool(struct gk20a *g,
499 gr_gpcs_gcc_pagepool_total_pages_f(size), patch); 499 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
500} 500}
501 501
502static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, 502int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
503 struct zbc_entry *color_val, u32 index) 503 struct zbc_entry *color_val, u32 index)
504{ 504{
505 u32 i; 505 u32 i;
@@ -554,7 +554,7 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
554 return 0; 554 return 0;
555} 555}
556 556
557static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, 557int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
558 struct zbc_entry *depth_val, u32 index) 558 struct zbc_entry *depth_val, u32 index)
559{ 559{
560 u32 zbc_z; 560 u32 zbc_z;
@@ -592,12 +592,12 @@ static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
592 return 0; 592 return 0;
593} 593}
594 594
595static u32 gr_gp10b_pagepool_default_size(struct gk20a *g) 595u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
596{ 596{
597 return gr_scc_pagepool_total_pages_hwmax_value_v(); 597 return gr_scc_pagepool_total_pages_hwmax_value_v();
598} 598}
599 599
600static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g) 600int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
601{ 601{
602 struct gr_gk20a *gr = &g->gr; 602 struct gr_gk20a *gr = &g->gr;
603 int size; 603 int size;
@@ -642,7 +642,7 @@ static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
642 gk20a_dbg_fn("done"); 642 gk20a_dbg_fn("done");
643} 643}
644 644
645static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) 645void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
646{ 646{
647 u32 val; 647 u32 val;
648 648
@@ -667,7 +667,7 @@ static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
667 gk20a_writel(g, gr_bes_crop_debug3_r(), val); 667 gk20a_writel(g, gr_bes_crop_debug3_r(), val);
668} 668}
669 669
670static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, 670int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
671 u32 class_num, u32 offset, u32 data) 671 u32 class_num, u32 offset, u32 data)
672{ 672{
673 gk20a_dbg_fn(""); 673 gk20a_dbg_fn("");
@@ -718,7 +718,7 @@ fail:
718 return -EINVAL; 718 return -EINVAL;
719} 719}
720 720
721static void gr_gp10b_cb_size_default(struct gk20a *g) 721void gr_gp10b_cb_size_default(struct gk20a *g)
722{ 722{
723 struct gr_gk20a *gr = &g->gr; 723 struct gr_gk20a *gr = &g->gr;
724 724
@@ -728,7 +728,7 @@ static void gr_gp10b_cb_size_default(struct gk20a *g)
728 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); 728 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
729} 729}
730 730
731static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) 731void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
732{ 732{
733 struct gr_gk20a *gr = &g->gr; 733 struct gr_gk20a *gr = &g->gr;
734 u32 gpc_index, ppc_index, stride, val; 734 u32 gpc_index, ppc_index, stride, val;
@@ -776,7 +776,7 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
776 } 776 }
777} 777}
778 778
779static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) 779void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
780{ 780{
781 struct gr_gk20a *gr = &g->gr; 781 struct gr_gk20a *gr = &g->gr;
782 u32 gpc_index, ppc_index, stride, val; 782 u32 gpc_index, ppc_index, stride, val;
@@ -843,7 +843,7 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
843 } 843 }
844} 844}
845 845
846static int gr_gp10b_init_ctx_state(struct gk20a *g) 846int gr_gp10b_init_ctx_state(struct gk20a *g)
847{ 847{
848 struct fecs_method_op_gk20a op = { 848 struct fecs_method_op_gk20a op = {
849 .mailbox = { .id = 0, .data = 0, 849 .mailbox = { .id = 0, .data = 0,
@@ -910,7 +910,7 @@ fail_free:
910 return err; 910 return err;
911} 911}
912 912
913static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, 913int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
914 struct gr_ctx_desc *gr_ctx, 914 struct gr_ctx_desc *gr_ctx,
915 struct vm_gk20a *vm, u32 class, 915 struct vm_gk20a *vm, u32 class,
916 u32 graphics_preempt_mode, 916 u32 graphics_preempt_mode,
@@ -1034,7 +1034,7 @@ fail:
1034 return err; 1034 return err;
1035} 1035}
1036 1036
1037static int gr_gp10b_alloc_gr_ctx(struct gk20a *g, 1037int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1038 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 1038 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
1039 u32 class, 1039 u32 class,
1040 u32 flags) 1040 u32 flags)
@@ -1131,7 +1131,7 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
1131 nvgpu_mem_end(g, mem); 1131 nvgpu_mem_end(g, mem);
1132} 1132}
1133 1133
1134static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, 1134void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
1135 struct gr_ctx_desc *gr_ctx) 1135 struct gr_ctx_desc *gr_ctx)
1136{ 1136{
1137 gk20a_dbg_fn(""); 1137 gk20a_dbg_fn("");
@@ -1151,7 +1151,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
1151} 1151}
1152 1152
1153 1153
1154static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, 1154void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1155 struct channel_ctx_gk20a *ch_ctx, 1155 struct channel_ctx_gk20a *ch_ctx,
1156 struct nvgpu_mem *mem) 1156 struct nvgpu_mem *mem)
1157{ 1157{
@@ -1256,7 +1256,7 @@ out:
1256 gk20a_dbg_fn("done"); 1256 gk20a_dbg_fn("done");
1257} 1257}
1258 1258
1259static int gr_gp10b_dump_gr_status_regs(struct gk20a *g, 1259int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
1260 struct gk20a_debug_output *o) 1260 struct gk20a_debug_output *o)
1261{ 1261{
1262 struct gr_gk20a *gr = &g->gr; 1262 struct gr_gk20a *gr = &g->gr;
@@ -1402,7 +1402,7 @@ static bool gr_activity_empty_or_preempted(u32 val)
1402 return true; 1402 return true;
1403} 1403}
1404 1404
1405static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, 1405int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1406 u32 expect_delay) 1406 u32 expect_delay)
1407{ 1407{
1408 u32 delay = expect_delay; 1408 u32 delay = expect_delay;
@@ -1453,7 +1453,7 @@ static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1453 return -EAGAIN; 1453 return -EAGAIN;
1454} 1454}
1455 1455
1456static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, 1456void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1457 struct channel_ctx_gk20a *ch_ctx, 1457 struct channel_ctx_gk20a *ch_ctx,
1458 u64 addr, bool patch) 1458 u64 addr, bool patch)
1459{ 1459{
@@ -1481,7 +1481,7 @@ static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1481 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 1481 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1482} 1482}
1483 1483
1484static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, 1484void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1485 struct channel_ctx_gk20a *ch_ctx, 1485 struct channel_ctx_gk20a *ch_ctx,
1486 u64 addr, u64 size, bool patch) 1486 u64 addr, u64 size, bool patch)
1487{ 1487{
@@ -1516,7 +1516,7 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1516 gr_pd_ab_dist_cfg2_state_limit_f(data), patch); 1516 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1517} 1517}
1518 1518
1519static int gr_gp10b_load_smid_config(struct gk20a *g) 1519int gr_gp10b_load_smid_config(struct gk20a *g)
1520{ 1520{
1521 u32 *tpc_sm_id; 1521 u32 *tpc_sm_id;
1522 u32 i, j; 1522 u32 i, j;
@@ -1586,7 +1586,7 @@ int gr_gp10b_init_fs_state(struct gk20a *g)
1586 return gr_gm20b_init_fs_state(g); 1586 return gr_gm20b_init_fs_state(g);
1587} 1587}
1588 1588
1589static void gr_gp10b_init_cyclestats(struct gk20a *g) 1589void gr_gp10b_init_cyclestats(struct gk20a *g)
1590{ 1590{
1591#if defined(CONFIG_GK20A_CYCLE_STATS) 1591#if defined(CONFIG_GK20A_CYCLE_STATS)
1592 g->gpu_characteristics.flags |= 1592 g->gpu_characteristics.flags |=
@@ -1598,7 +1598,7 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g)
1598#endif 1598#endif
1599} 1599}
1600 1600
1601static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) 1601void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1602{ 1602{
1603 nvgpu_tegra_fuse_write_bypass(g, 0x1); 1603 nvgpu_tegra_fuse_write_bypass(g, 0x1);
1604 nvgpu_tegra_fuse_write_access_sw(g, 0x0); 1604 nvgpu_tegra_fuse_write_access_sw(g, 0x0);
@@ -1611,7 +1611,7 @@ static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1611 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); 1611 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
1612} 1612}
1613 1613
1614static void gr_gp10b_get_access_map(struct gk20a *g, 1614void gr_gp10b_get_access_map(struct gk20a *g,
1615 u32 **whitelist, int *num_entries) 1615 u32 **whitelist, int *num_entries)
1616{ 1616{
1617 static u32 wl_addr_gp10b[] = { 1617 static u32 wl_addr_gp10b[] = {
@@ -1801,7 +1801,7 @@ static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
1801 * 1801 *
1802 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing 1802 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
1803 */ 1803 */
1804static int gr_gp10b_pre_process_sm_exception(struct gk20a *g, 1804int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1805 u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, 1805 u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
1806 bool sm_debugger_attached, struct channel_gk20a *fault_ch, 1806 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
1807 bool *early_exit, bool *ignore_debugger) 1807 bool *early_exit, bool *ignore_debugger)
@@ -1988,7 +1988,7 @@ clean_up:
1988 return gk20a_gr_handle_fecs_error(g, __ch, isr_data); 1988 return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
1989} 1989}
1990 1990
1991static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g, 1991u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
1992 u32 gpc, u32 tpc, u32 sm) 1992 u32 gpc, u32 tpc, u32 sm)
1993{ 1993{
1994 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); 1994 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
@@ -2003,7 +2003,7 @@ static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
2003 return hww_warp_esr; 2003 return hww_warp_esr;
2004} 2004}
2005 2005
2006static u32 get_ecc_override_val(struct gk20a *g) 2006u32 get_ecc_override_val(struct gk20a *g)
2007{ 2007{
2008 u32 val; 2008 u32 val;
2009 2009
@@ -2046,7 +2046,7 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
2046 return ctx_resident; 2046 return ctx_resident;
2047} 2047}
2048 2048
2049static int gr_gp10b_suspend_contexts(struct gk20a *g, 2049int gr_gp10b_suspend_contexts(struct gk20a *g,
2050 struct dbg_session_gk20a *dbg_s, 2050 struct dbg_session_gk20a *dbg_s,
2051 int *ctx_resident_ch_fd) 2051 int *ctx_resident_ch_fd)
2052{ 2052{
@@ -2122,7 +2122,7 @@ clean_up:
2122 return err; 2122 return err;
2123} 2123}
2124 2124
2125static int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, 2125int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
2126 bool boost) 2126 bool boost)
2127{ 2127{
2128 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; 2128 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
@@ -2156,7 +2156,7 @@ unmap_ctx:
2156 return err; 2156 return err;
2157} 2157}
2158 2158
2159static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, 2159void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
2160 struct gr_ctx_desc *gr_ctx) { 2160 struct gr_ctx_desc *gr_ctx) {
2161 u32 v; 2161 u32 v;
2162 2162
@@ -2165,7 +2165,7 @@ static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
2165 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v); 2165 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
2166} 2166}
2167 2167
2168static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, 2168int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2169 u32 graphics_preempt_mode, 2169 u32 graphics_preempt_mode,
2170 u32 compute_preempt_mode) 2170 u32 compute_preempt_mode)
2171{ 2171{
@@ -2261,7 +2261,7 @@ unamp_ctx_header:
2261 return err; 2261 return err;
2262} 2262}
2263 2263
2264static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, 2264int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
2265 struct nvgpu_preemption_modes_rec *preemption_modes_rec) 2265 struct nvgpu_preemption_modes_rec *preemption_modes_rec)
2266{ 2266{
2267 preemption_modes_rec->graphics_preemption_mode_flags = ( 2267 preemption_modes_rec->graphics_preemption_mode_flags = (
@@ -2279,7 +2279,7 @@ static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
2279 2279
2280 return 0; 2280 return 0;
2281} 2281}
2282static int gp10b_gr_fuse_override(struct gk20a *g) 2282int gp10b_gr_fuse_override(struct gk20a *g)
2283{ 2283{
2284 struct device_node *np = dev_from_gk20a(g)->of_node; 2284 struct device_node *np = dev_from_gk20a(g)->of_node;
2285 u32 *fuses; 2285 u32 *fuses;
@@ -2319,7 +2319,7 @@ static int gp10b_gr_fuse_override(struct gk20a *g)
2319 return 0; 2319 return 0;
2320} 2320}
2321 2321
2322static int gr_gp10b_init_preemption_state(struct gk20a *g) 2322int gr_gp10b_init_preemption_state(struct gk20a *g)
2323{ 2323{
2324 u32 debug_2; 2324 u32 debug_2;
2325 u64 sysclk_rate; 2325 u64 sysclk_rate;
@@ -2341,7 +2341,7 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g)
2341 return 0; 2341 return 0;
2342} 2342}
2343 2343
2344static void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, 2344void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
2345 struct nvgpu_mem *mem, u64 gpu_va) 2345 struct nvgpu_mem *mem, u64 gpu_va)
2346{ 2346{
2347 u32 va = u64_lo32(gpu_va >> 8); 2347 u32 va = u64_lo32(gpu_va >> 8);
@@ -2367,59 +2367,3 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
2367 2367
2368 return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false); 2368 return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
2369} 2369}
2370
2371void gp10b_init_gr(struct gk20a *g)
2372{
2373 struct gpu_ops *gops = &g->ops;
2374
2375 gm20b_init_gr(g);
2376 gops->gr.init_fs_state = gr_gp10b_init_fs_state;
2377 gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
2378 gops->gr.is_valid_class = gr_gp10b_is_valid_class;
2379 gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class;
2380 gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class;
2381 gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
2382 gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
2383 gops->gr.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va;
2384 gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
2385 gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
2386 gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
2387 gops->gr.calc_global_ctx_buffer_size =
2388 gr_gp10b_calc_global_ctx_buffer_size;
2389 gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb;
2390 gops->gr.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb;
2391 gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
2392 gops->gr.cb_size_default = gr_gp10b_cb_size_default;
2393 gops->gr.set_alpha_circular_buffer_size =
2394 gr_gp10b_set_alpha_circular_buffer_size;
2395 gops->gr.set_circular_buffer_size =
2396 gr_gp10b_set_circular_buffer_size;
2397 gops->gr.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3;
2398 gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
2399 gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
2400 gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
2401 gops->gr.update_ctxsw_preemption_mode =
2402 gr_gp10b_update_ctxsw_preemption_mode;
2403 gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
2404 gops->gr.wait_empty = gr_gp10b_wait_empty;
2405 gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
2406 gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
2407 gops->gr.get_access_map = gr_gp10b_get_access_map;
2408 gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
2409 gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
2410 gops->gr.pre_process_sm_exception =
2411 gr_gp10b_pre_process_sm_exception;
2412 gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
2413 gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
2414 gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
2415 gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
2416 gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
2417 gops->gr.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode;
2418 gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
2419 gops->gr.fuse_override = gp10b_gr_fuse_override;
2420 gops->gr.load_smid_config = gr_gp10b_load_smid_config;
2421 gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
2422 gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
2423 gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
2424 gops->gr.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr;
2425}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index ac53e231..ce1ca01f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -20,6 +20,10 @@
20 20
21struct gk20a; 21struct gk20a;
22struct gr_gk20a_isr_data; 22struct gr_gk20a_isr_data;
23struct channel_ctx_gk20a;
24struct zbc_entry;
25struct gr_ctx_desc;
26struct nvgpu_preemption_modes_rec;
23 27
24enum { 28enum {
25 PASCAL_CHANNEL_GPFIFO_A = 0xC06F, 29 PASCAL_CHANNEL_GPFIFO_A = 0xC06F,
@@ -39,7 +43,6 @@ enum {
39#define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528 43#define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528
40#define NVC0C0_SET_RD_COALESCE 0x0228 44#define NVC0C0_SET_RD_COALESCE 0x0228
41 45
42void gp10b_init_gr(struct gk20a *g);
43int gr_gp10b_init_fs_state(struct gk20a *g); 46int gr_gp10b_init_fs_state(struct gk20a *g);
44int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, 47int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
45 struct nvgpu_mem *mem); 48 struct nvgpu_mem *mem);
@@ -50,6 +53,87 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
50int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, 53int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
51 struct channel_gk20a *fault_ch); 54 struct channel_gk20a *fault_ch);
52 55
56bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num);
57bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
58bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num);
59int gr_gp10b_handle_sm_exception(struct gk20a *g,
60 u32 gpc, u32 tpc, u32 sm,
61 bool *post_event, struct channel_gk20a *fault_ch,
62 u32 *hww_global_esr);
63int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
64 bool *post_event);
65int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
66 struct channel_gk20a *c, bool patch);
67void gr_gp10b_commit_global_pagepool(struct gk20a *g,
68 struct channel_ctx_gk20a *ch_ctx,
69 u64 addr, u32 size, bool patch);
70int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
71 struct zbc_entry *color_val, u32 index);
72int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
73 struct zbc_entry *depth_val, u32 index);
74u32 gr_gp10b_pagepool_default_size(struct gk20a *g);
75int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g);
76void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
77int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
78 u32 class_num, u32 offset, u32 data);
79void gr_gp10b_cb_size_default(struct gk20a *g);
80void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
81void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
82int gr_gp10b_init_ctx_state(struct gk20a *g);
83int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
84 struct gr_ctx_desc *gr_ctx,
85 struct vm_gk20a *vm, u32 class,
86 u32 graphics_preempt_mode,
87 u32 compute_preempt_mode);
88int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
89 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
90 u32 class,
91 u32 flags);
92void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
93 struct gr_ctx_desc *gr_ctx);
94void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
95 struct channel_ctx_gk20a *ch_ctx,
96 struct nvgpu_mem *mem);
97int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
98 struct gk20a_debug_output *o);
99int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
100 u32 expect_delay);
101void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
102 struct channel_ctx_gk20a *ch_ctx,
103 u64 addr, bool patch);
104void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
105 struct channel_ctx_gk20a *ch_ctx,
106 u64 addr, u64 size, bool patch);
107int gr_gp10b_load_smid_config(struct gk20a *g);
108void gr_gp10b_init_cyclestats(struct gk20a *g);
109void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
110void gr_gp10b_get_access_map(struct gk20a *g,
111 u32 **whitelist, int *num_entries);
112int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
113 u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
114 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
115 bool *early_exit, bool *ignore_debugger);
116u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
117 u32 gpc, u32 tpc, u32 sm);
118u32 get_ecc_override_val(struct gk20a *g);
119int gr_gp10b_suspend_contexts(struct gk20a *g,
120 struct dbg_session_gk20a *dbg_s,
121 int *ctx_resident_ch_fd);
122int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
123 bool boost);
124void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
125 struct gr_ctx_desc *gr_ctx);
126int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
127 u32 graphics_preempt_mode,
128 u32 compute_preempt_mode);
129int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
130 struct nvgpu_preemption_modes_rec *preemption_modes_rec);
131int gp10b_gr_fuse_override(struct gk20a *g);
132int gr_gp10b_init_preemption_state(struct gk20a *g);
133void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
134 struct nvgpu_mem *mem, u64 gpu_va);
135int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch);
136
53struct gr_t18x { 137struct gr_t18x {
54 struct { 138 struct {
55 u32 preempt_image_size; 139 u32 preempt_image_size;
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 69a90031..d0f07a2b 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -27,6 +27,7 @@
27#include "gk20a/mc_gk20a.h" 27#include "gk20a/mc_gk20a.h"
28#include "gk20a/fb_gk20a.h" 28#include "gk20a/fb_gk20a.h"
29#include "gk20a/pmu_gk20a.h" 29#include "gk20a/pmu_gk20a.h"
30#include "gk20a/gr_gk20a.h"
30 31
31#include "gp10b/gr_gp10b.h" 32#include "gp10b/gr_gp10b.h"
32#include "gp10b/fecs_trace_gp10b.h" 33#include "gp10b/fecs_trace_gp10b.h"
@@ -179,6 +180,128 @@ static const struct gpu_ops gp10b_ops = {
179 .isr_stall = gp10b_ce_isr, 180 .isr_stall = gp10b_ce_isr,
180 .isr_nonstall = gp10b_ce_nonstall_isr, 181 .isr_nonstall = gp10b_ce_nonstall_isr,
181 }, 182 },
183 .gr = {
184 .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
185 .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
186 .cb_size_default = gr_gp10b_cb_size_default,
187 .calc_global_ctx_buffer_size =
188 gr_gp10b_calc_global_ctx_buffer_size,
189 .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
190 .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
191 .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
192 .commit_global_pagepool = gr_gp10b_commit_global_pagepool,
193 .handle_sw_method = gr_gp10b_handle_sw_method,
194 .set_alpha_circular_buffer_size =
195 gr_gp10b_set_alpha_circular_buffer_size,
196 .set_circular_buffer_size = gr_gp10b_set_circular_buffer_size,
197 .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
198 .is_valid_class = gr_gp10b_is_valid_class,
199 .is_valid_gfx_class = gr_gp10b_is_valid_gfx_class,
200 .is_valid_compute_class = gr_gp10b_is_valid_compute_class,
201 .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
202 .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
203 .init_fs_state = gr_gp10b_init_fs_state,
204 .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
205 .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
206 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
207 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
208 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
209 .free_channel_ctx = gk20a_free_channel_ctx,
210 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
211 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
212 .get_zcull_info = gr_gk20a_get_zcull_info,
213 .is_tpc_addr = gr_gm20b_is_tpc_addr,
214 .get_tpc_num = gr_gm20b_get_tpc_num,
215 .detect_sm_arch = gr_gm20b_detect_sm_arch,
216 .add_zbc_color = gr_gp10b_add_zbc_color,
217 .add_zbc_depth = gr_gp10b_add_zbc_depth,
218 .zbc_set_table = gk20a_gr_zbc_set_table,
219 .zbc_query_table = gr_gk20a_query_zbc,
220 .pmu_save_zbc = gk20a_pmu_save_zbc,
221 .add_zbc = gr_gk20a_add_zbc,
222 .pagepool_default_size = gr_gp10b_pagepool_default_size,
223 .init_ctx_state = gr_gp10b_init_ctx_state,
224 .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
225 .free_gr_ctx = gr_gp10b_free_gr_ctx,
226 .update_ctxsw_preemption_mode =
227 gr_gp10b_update_ctxsw_preemption_mode,
228 .dump_gr_regs = gr_gp10b_dump_gr_status_regs,
229 .update_pc_sampling = gr_gm20b_update_pc_sampling,
230 .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
231 .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
232 .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
233 .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
234 .get_max_fbps_count = gr_gm20b_get_max_fbps_count,
235 .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
236 .wait_empty = gr_gp10b_wait_empty,
237 .init_cyclestats = gr_gp10b_init_cyclestats,
238 .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
239 .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
240 .bpt_reg_info = gr_gm20b_bpt_reg_info,
241 .get_access_map = gr_gp10b_get_access_map,
242 .handle_fecs_error = gr_gp10b_handle_fecs_error,
243 .handle_sm_exception = gr_gp10b_handle_sm_exception,
244 .handle_tex_exception = gr_gp10b_handle_tex_exception,
245 .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
246 .enable_exceptions = gk20a_gr_enable_exceptions,
247 .get_lrf_tex_ltc_dram_override = get_ecc_override_val,
248 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
249 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
250 .record_sm_error_state = gm20b_gr_record_sm_error_state,
251 .update_sm_error_state = gm20b_gr_update_sm_error_state,
252 .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
253 .suspend_contexts = gr_gp10b_suspend_contexts,
254 .resume_contexts = gr_gk20a_resume_contexts,
255 .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
256 .fuse_override = gp10b_gr_fuse_override,
257 .init_sm_id_table = gr_gk20a_init_sm_id_table,
258 .load_smid_config = gr_gp10b_load_smid_config,
259 .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
260 .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
261 .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
262 .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
263 .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
264 .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
265 .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
266 .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
267 .commit_inst = gr_gk20a_commit_inst,
268 .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
269 .write_pm_ptr = gr_gk20a_write_pm_ptr,
270 .init_elcg_mode = gr_gk20a_init_elcg_mode,
271 .load_tpc_mask = gr_gm20b_load_tpc_mask,
272 .inval_icache = gr_gk20a_inval_icache,
273 .trigger_suspend = gr_gk20a_trigger_suspend,
274 .wait_for_pause = gr_gk20a_wait_for_pause,
275 .resume_from_pause = gr_gk20a_resume_from_pause,
276 .clear_sm_errors = gr_gk20a_clear_sm_errors,
277 .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
278 .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
279 .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
280 .suspend_single_sm = gk20a_gr_suspend_single_sm,
281 .suspend_all_sms = gk20a_gr_suspend_all_sms,
282 .resume_single_sm = gk20a_gr_resume_single_sm,
283 .resume_all_sms = gk20a_gr_resume_all_sms,
284 .get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr,
285 .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
286 .get_sm_no_lock_down_hww_global_esr_mask =
287 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
288 .lock_down_sm = gk20a_gr_lock_down_sm,
289 .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
290 .clear_sm_hww = gm20b_gr_clear_sm_hww,
291 .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
292 .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
293 .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
294 .set_boosted_ctx = gr_gp10b_set_boosted_ctx,
295 .set_preemption_mode = gr_gp10b_set_preemption_mode,
296 .set_czf_bypass = gr_gp10b_set_czf_bypass,
297 .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
298 .set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va,
299 .init_preemption_state = gr_gp10b_init_preemption_state,
300 .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
301 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
302 .create_gr_sysfs = gr_gp10b_create_sysfs,
303 .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
304 },
182 .fb = { 305 .fb = {
183 .reset = fb_gk20a_reset, 306 .reset = fb_gk20a_reset,
184 .init_hw = gk20a_fb_init_hw, 307 .init_hw = gk20a_fb_init_hw,
@@ -474,6 +597,7 @@ int gp10b_init_hal(struct gk20a *g)
474 597
475 gops->ltc = gp10b_ops.ltc; 598 gops->ltc = gp10b_ops.ltc;
476 gops->ce2 = gp10b_ops.ce2; 599 gops->ce2 = gp10b_ops.ce2;
600 gops->gr = gp10b_ops.gr;
477 gops->fb = gp10b_ops.fb; 601 gops->fb = gp10b_ops.fb;
478 gops->clock_gating = gp10b_ops.clock_gating; 602 gops->clock_gating = gp10b_ops.clock_gating;
479 gops->fifo = gp10b_ops.fifo; 603 gops->fifo = gp10b_ops.fifo;
@@ -564,6 +688,8 @@ int gp10b_init_hal(struct gk20a *g)
564 gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode; 688 gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
565 gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap; 689 gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap;
566 gops->pmu.is_priv_load = gp10b_is_priv_load; 690 gops->pmu.is_priv_load = gp10b_is_priv_load;
691
692 gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
567 } else { 693 } else {
568 /* Inherit from gk20a */ 694 /* Inherit from gk20a */
569 gops->pmu.is_pmu_supported = gk20a_is_pmu_supported, 695 gops->pmu.is_pmu_supported = gk20a_is_pmu_supported,
@@ -574,12 +700,13 @@ int gp10b_init_hal(struct gk20a *g)
574 gops->pmu.load_lsfalcon_ucode = NULL; 700 gops->pmu.load_lsfalcon_ucode = NULL;
575 gops->pmu.init_wpr_region = NULL; 701 gops->pmu.init_wpr_region = NULL;
576 gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1; 702 gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
703
704 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
577 } 705 }
578 706
579 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); 707 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
580 g->pmu_lsf_pmu_wpr_init_done = 0; 708 g->pmu_lsf_pmu_wpr_init_done = 0;
581 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; 709 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
582 gp10b_init_gr(g);
583 710
584 gp10b_init_uncompressed_kind_map(); 711 gp10b_init_uncompressed_kind_map();
585 gp10b_init_kind_attr(); 712 gp10b_init_kind_attr();