summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c60
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c4
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c8
5 files changed, 37 insertions, 46 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 2dbe8b02..4bc5c04b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -401,7 +401,7 @@ struct gpu_ops {
401 void (*program_zcull_mapping)(struct gk20a *g, 401 void (*program_zcull_mapping)(struct gk20a *g,
402 u32 zcull_alloc_num, u32 *zcull_map_tiles); 402 u32 zcull_alloc_num, u32 *zcull_map_tiles);
403 int (*commit_global_timeslice)(struct gk20a *g, 403 int (*commit_global_timeslice)(struct gk20a *g,
404 struct channel_gk20a *c, bool patch); 404 struct channel_gk20a *c);
405 int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); 405 int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
406 void (*write_zcull_ptr)(struct gk20a *g, 406 void (*write_zcull_ptr)(struct gk20a *g,
407 struct nvgpu_mem *mem, u64 gpu_va); 407 struct nvgpu_mem *mem, u64 gpu_va);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ddce0716..5c13fec2 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,8 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
673 */ 673 */
674 674
675int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, 675int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
676 struct channel_ctx_gk20a *ch_ctx) 676 struct channel_ctx_gk20a *ch_ctx,
677 bool update_patch_count)
677{ 678{
678 int err = 0; 679 int err = 0;
679 680
@@ -681,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
681 if (err) 682 if (err)
682 return err; 683 return err;
683 684
684 if (ch_ctx->gr_ctx->mem.cpu_va) { 685 if (update_patch_count) {
685 /* reset patch count if ucode has already processed it */ 686 /* reset patch count if ucode has already processed it */
686 ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, 687 ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
687 &ch_ctx->gr_ctx->mem, 688 &ch_ctx->gr_ctx->mem,
@@ -693,12 +694,13 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
693} 694}
694 695
695void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 696void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
696 struct channel_ctx_gk20a *ch_ctx) 697 struct channel_ctx_gk20a *ch_ctx,
698 bool update_patch_count)
697{ 699{
698 nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); 700 nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);
699 701
700 /* Write context count to context image if it is mapped */ 702 /* Write context count to context image if it is mapped */
701 if (ch_ctx->gr_ctx->mem.cpu_va) { 703 if (update_patch_count) {
702 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, 704 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
703 ctxsw_prog_main_image_patch_count_o(), 705 ctxsw_prog_main_image_patch_count_o(),
704 ch_ctx->patch_ctx.data_count); 706 ch_ctx->patch_ctx.data_count);
@@ -876,7 +878,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
876 gk20a_dbg_fn(""); 878 gk20a_dbg_fn("");
877 if (patch) { 879 if (patch) {
878 int err; 880 int err;
879 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 881 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
880 if (err) 882 if (err)
881 return err; 883 return err;
882 } 884 }
@@ -922,13 +924,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
922 g->ops.gr.commit_global_cb_manager(g, c, patch); 924 g->ops.gr.commit_global_cb_manager(g, c, patch);
923 925
924 if (patch) 926 if (patch)
925 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 927 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
926 928
927 return 0; 929 return 0;
928} 930}
929 931
930int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, 932int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
931 bool patch)
932{ 933{
933 struct gr_gk20a *gr = &g->gr; 934 struct gr_gk20a *gr = &g->gr;
934 struct channel_ctx_gk20a *ch_ctx = NULL; 935 struct channel_ctx_gk20a *ch_ctx = NULL;
@@ -946,14 +947,6 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
946 ds_debug = gk20a_readl(g, gr_ds_debug_r()); 947 ds_debug = gk20a_readl(g, gr_ds_debug_r());
947 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); 948 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
948 949
949 if (patch) {
950 int err;
951 ch_ctx = &c->ch_ctx;
952 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
953 if (err)
954 return err;
955 }
956
957 if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) { 950 if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
958 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); 951 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
959 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); 952 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
@@ -965,27 +958,24 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
965 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; 958 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
966 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; 959 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
967 960
968 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); 961 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
969 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch); 962 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
970 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch); 963 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
971 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); 964 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
972 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); 965 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
973 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); 966 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
974 } else { 967 } else {
975 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; 968 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
976 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; 969 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
977 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; 970 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
978 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; 971 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
979 972
980 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); 973 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
981 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); 974 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
982 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); 975 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
983 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); 976 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
984 } 977 }
985 978
986 if (patch)
987 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
988
989 return 0; 979 return 0;
990} 980}
991 981
@@ -1489,7 +1479,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1489 goto clean_up; 1479 goto clean_up;
1490 1480
1491 /* override a few ctx state registers */ 1481 /* override a few ctx state registers */
1492 g->ops.gr.commit_global_timeslice(g, c, false); 1482 g->ops.gr.commit_global_timeslice(g, c);
1493 1483
1494 /* floorsweep anything left */ 1484 /* floorsweep anything left */
1495 err = g->ops.gr.init_fs_state(g); 1485 err = g->ops.gr.init_fs_state(g);
@@ -3031,7 +3021,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3031 lockboost = (lockboost & ~lockboost_mask) | 3021 lockboost = (lockboost & ~lockboost_mask) |
3032 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0); 3022 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0);
3033 3023
3034 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 3024 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
3035 3025
3036 if (!err) { 3026 if (!err) {
3037 gr_gk20a_ctx_patch_write(g, ch_ctx, 3027 gr_gk20a_ctx_patch_write(g, ch_ctx,
@@ -3040,7 +3030,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3040 gr_gk20a_ctx_patch_write(g, ch_ctx, 3030 gr_gk20a_ctx_patch_write(g, ch_ctx,
3041 gr_gpcs_tpcs_sm_sch_macro_sched_r(), 3031 gr_gpcs_tpcs_sm_sch_macro_sched_r(),
3042 lockboost, true); 3032 lockboost, true);
3043 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 3033 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
3044 } else { 3034 } else {
3045 nvgpu_err(g, 3035 nvgpu_err(g,
3046 "failed to set texlock for compute class"); 3036 "failed to set texlock for compute class");
@@ -4528,7 +4518,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4528 gr_fe_go_idle_timeout_count_disabled_f()); 4518 gr_fe_go_idle_timeout_count_disabled_f());
4529 4519
4530 /* override a few ctx state registers */ 4520 /* override a few ctx state registers */
4531 g->ops.gr.commit_global_timeslice(g, NULL, false); 4521 g->ops.gr.commit_global_timeslice(g, NULL);
4532 4522
4533 /* floorsweep anything left */ 4523 /* floorsweep anything left */
4534 err = g->ops.gr.init_fs_state(g); 4524 err = g->ops.gr.init_fs_state(g);
@@ -7851,7 +7841,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7851 } 7841 }
7852 offset_addrs = offsets + max_offsets; 7842 offset_addrs = offsets + max_offsets;
7853 7843
7854 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 7844 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
7855 if (err) 7845 if (err)
7856 goto cleanup; 7846 goto cleanup;
7857 7847
@@ -7989,7 +7979,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7989 nvgpu_kfree(g, offsets); 7979 nvgpu_kfree(g, offsets);
7990 7980
7991 if (ch_ctx->patch_ctx.mem.cpu_va) 7981 if (ch_ctx->patch_ctx.mem.cpu_va)
7992 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 7982 gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready);
7993 if (gr_ctx_ready) 7983 if (gr_ctx_ready)
7994 nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); 7984 nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem);
7995 if (pm_ctx_ready) 7985 if (pm_ctx_ready)
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2b3b74bc..b437838f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -619,9 +619,11 @@ struct channel_ctx_gk20a;
619void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, 619void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
620 u32 addr, u32 data, bool patch); 620 u32 addr, u32 data, bool patch);
621int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, 621int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
622 struct channel_ctx_gk20a *ch_ctx); 622 struct channel_ctx_gk20a *ch_ctx,
623 bool update_patch_count);
623void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 624void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
624 struct channel_ctx_gk20a *ch_ctx); 625 struct channel_ctx_gk20a *ch_ctx,
626 bool update_patch_count);
625void gr_gk20a_commit_global_pagepool(struct gk20a *g, 627void gr_gk20a_commit_global_pagepool(struct gk20a *g,
626 struct channel_ctx_gk20a *ch_ctx, 628 struct channel_ctx_gk20a *ch_ctx,
627 u64 addr, u32 size, bool patch); 629 u64 addr, u32 size, bool patch);
@@ -745,8 +747,7 @@ int gr_gk20a_resume_from_pause(struct gk20a *g);
745int gr_gk20a_clear_sm_errors(struct gk20a *g); 747int gr_gk20a_clear_sm_errors(struct gk20a *g);
746u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g); 748u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
747 749
748int gr_gk20a_commit_global_timeslice(struct gk20a *g, 750int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
749 struct channel_gk20a *c, bool patch);
750 751
751void gr_gk20a_init_sm_id_table(struct gk20a *g); 752void gr_gk20a_init_sm_id_table(struct gk20a *g);
752 753
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 4c49f734..c692d975 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1339,7 +1339,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
1339 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, 1339 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1340 gr->sm_error_states[sm_id].hww_warp_esr_report_mask); 1340 gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
1341 } else { 1341 } else {
1342 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 1342 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
1343 if (err) 1343 if (err)
1344 goto enable_ctxsw; 1344 goto enable_ctxsw;
1345 1345
@@ -1352,7 +1352,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
1352 gr->sm_error_states[sm_id].hww_warp_esr_report_mask, 1352 gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
1353 true); 1353 true);
1354 1354
1355 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 1355 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
1356 } 1356 }
1357 1357
1358enable_ctxsw: 1358enable_ctxsw:
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 24b22a7d..c9b2f859 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1224,7 +1224,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1224 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); 1224 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
1225 } 1225 }
1226 1226
1227 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 1227 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
1228 if (err) { 1228 if (err) {
1229 nvgpu_err(g, "can't map patch context"); 1229 nvgpu_err(g, "can't map patch context");
1230 goto out; 1230 goto out;
@@ -1277,7 +1277,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1277 cbes_reserve), 1277 cbes_reserve),
1278 true); 1278 true);
1279 1279
1280 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 1280 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
1281 } 1281 }
1282 1282
1283out: 1283out:
@@ -2254,13 +2254,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2254 g->ops.gr.update_ctxsw_preemption_mode(ch->g, 2254 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2255 ch_ctx, mem); 2255 ch_ctx, mem);
2256 2256
2257 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 2257 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
2258 if (err) { 2258 if (err) {
2259 nvgpu_err(g, "can't map patch context"); 2259 nvgpu_err(g, "can't map patch context");
2260 goto enable_ch; 2260 goto enable_ch;
2261 } 2261 }
2262 g->ops.gr.commit_global_cb_manager(g, ch, true); 2262 g->ops.gr.commit_global_cb_manager(g, ch, true);
2263 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 2263 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
2264 } 2264 }
2265 2265
2266enable_ch: 2266enable_ch: