summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2014-11-07 07:55:48 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:12:10 -0400
commitff1b2fc1e84fbac147e144d3c8d80104b7eca5e9 (patch)
tree59b405b844a5c1434fb7cd9fab3d1a006fb8306d /drivers/gpu/nvgpu/gm20b
parentf8f6b298848ed05ad83ce107ff8a4fff0b37dd2d (diff)
gpu: nvgpu: fix gm20b floorsweep API
Rewrite gr_gm20b_ctx_state_floorsweep() to include necessary register writes for gm20b tpc floorsweeping. This includes : - update the loop to write gr_gpc0_tpc0_sm_cfg_r() and gr_gpc0_gpm_pd_sm_id_r() - for gr_pd_num_tpc_per_gpc_r(i), we just need to write register with i = 0 and the value being written is tpc count in that gpc - gr_fe_tpc_fs_r() needs to have logical list of TPCs after floorsweeping. Get this value from pes_tpc_mask. - gr_cwd_gpc_tpc_id_tpc0_f() and gr_cwd_sm_id_tpc0_f() also refer to logical ids and hence no need to check tpc_fs_mask to configure these registers Bug 1513685 Change-Id: I82dc36a223fbd21e814e58e4d67738d7c63f04a7 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/601117 Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c92
1 files changed, 40 insertions, 52 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index a6b54ea5..93ad5e8e 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -519,52 +519,38 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
519 struct gr_gk20a *gr = &g->gr; 519 struct gr_gk20a *gr = &g->gr;
520 u32 tpc_index, gpc_index; 520 u32 tpc_index, gpc_index;
521 u32 tpc_offset, gpc_offset; 521 u32 tpc_offset, gpc_offset;
522 u32 sm_id = 0, gpc_id = 0; 522 u32 sm_id = 0;
523 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; 523 u32 tpc_per_gpc = 0;
524 u32 tpc_per_gpc; 524 u32 tpc_sm_id = 0, gpc_tpc_id = 0;
525 u32 tpc_fs_mask = 0, tpc_sm_id = 0, gpc_tpc_id = 0; 525 u32 pes_tpc_mask = 0, pes_index;
526 526
527 gk20a_dbg_fn(""); 527 gk20a_dbg_fn("");
528 528
529 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { 529 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
530 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 530 gpc_offset = proj_gpc_stride_v() * gpc_index;
531 gpc_offset = proj_gpc_stride_v() * gpc_index; 531 for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
532 if (tpc_index < gr->gpc_tpc_count[gpc_index]) { 532 tpc_index++) {
533 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; 533 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
534 534
535 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, 535 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r()
536 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); 536 + gpc_offset + tpc_offset,
537 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, 537 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
538 gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); 538 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index)
539 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, 539 + gpc_offset,
540 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); 540 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
541 541 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r()
542 sm_id_to_gpc_id[sm_id] = gpc_index; 542 + gpc_offset + tpc_offset,
543 sm_id++; 543 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
544 } 544
545 sm_id++;
545 } 546 }
546 } 547 }
547 548
548 for (tpc_index = 0, gpc_id = 0; 549 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
549 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); 550 tpc_per_gpc |= gr->gpc_tpc_count[gpc_index]
550 tpc_index++, gpc_id += 8) { 551 << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index);
551 552 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(0), tpc_per_gpc);
552 if (gpc_id >= gr->gpc_count) 553 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(0), tpc_per_gpc);
553 gpc_id = 0;
554
555 tpc_per_gpc =
556 gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
557 gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
558 gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
559 gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
560 gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
561 gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
562 gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
563 gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
564
565 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
566 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
567 }
568 554
569 /* gr__setup_pd_mapping stubbed for gk20a */ 555 /* gr__setup_pd_mapping stubbed for gk20a */
570 gr_gk20a_setup_rop_mapping(g, gr); 556 gr_gk20a_setup_rop_mapping(g, gr);
@@ -593,20 +579,22 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
593 gk20a_readl(g, gr_be0_crop_debug3_r()) | 579 gk20a_readl(g, gr_be0_crop_debug3_r()) |
594 gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); 580 gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
595 581
596 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 582 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
597 tpc_fs_mask |= gr->gpc_tpc_mask[gpc_index] << 583 for (pes_index = 0; pes_index < gr->pe_count_per_gpc;
598 (gr->max_tpc_per_gpc_count * gpc_index); 584 pes_index++)
585 pes_tpc_mask |= gr->pes_tpc_mask[pes_index][gpc_index];
586 gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
587
588 for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) {
589 if (tpc_index == 0) {
590 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
591 tpc_sm_id |= gr_cwd_sm_id_tpc0_f(tpc_index);
592 } else if (tpc_index == 1) {
593 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(tpc_index);
594 tpc_sm_id |= gr_cwd_sm_id_tpc1_f(tpc_index);
595 }
599 } 596 }
600 gk20a_writel(g, gr_fe_tpc_fs_r(), tpc_fs_mask);
601 597
602 if (tpc_fs_mask & (0x1 << 0)) {
603 tpc_sm_id |= gr_cwd_sm_id_tpc0_f(0);
604 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(0);
605 }
606 if (tpc_fs_mask & (0x1 << 1)) {
607 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(1);
608 tpc_sm_id |= gr_cwd_sm_id_tpc1_f(1);
609 }
610 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. 598 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.
611 * Since we know TPC number is less than 5. We select 599 * Since we know TPC number is less than 5. We select
612 * index 0 directly. */ 600 * index 0 directly. */