diff options
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 92 |
1 files changed, 40 insertions, 52 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index a6b54ea5..93ad5e8e 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -519,52 +519,38 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
519 | struct gr_gk20a *gr = &g->gr; | 519 | struct gr_gk20a *gr = &g->gr; |
520 | u32 tpc_index, gpc_index; | 520 | u32 tpc_index, gpc_index; |
521 | u32 tpc_offset, gpc_offset; | 521 | u32 tpc_offset, gpc_offset; |
522 | u32 sm_id = 0, gpc_id = 0; | 522 | u32 sm_id = 0; |
523 | u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; | 523 | u32 tpc_per_gpc = 0; |
524 | u32 tpc_per_gpc; | 524 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; |
525 | u32 tpc_fs_mask = 0, tpc_sm_id = 0, gpc_tpc_id = 0; | 525 | u32 pes_tpc_mask = 0, pes_index; |
526 | 526 | ||
527 | gk20a_dbg_fn(""); | 527 | gk20a_dbg_fn(""); |
528 | 528 | ||
529 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | 529 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
530 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 530 | gpc_offset = proj_gpc_stride_v() * gpc_index; |
531 | gpc_offset = proj_gpc_stride_v() * gpc_index; | 531 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; |
532 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | 532 | tpc_index++) { |
533 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | 533 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; |
534 | 534 | ||
535 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | 535 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() |
536 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | 536 | + gpc_offset + tpc_offset, |
537 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, | 537 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); |
538 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | 538 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) |
539 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | 539 | + gpc_offset, |
540 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | 540 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); |
541 | 541 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() | |
542 | sm_id_to_gpc_id[sm_id] = gpc_index; | 542 | + gpc_offset + tpc_offset, |
543 | sm_id++; | 543 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); |
544 | } | 544 | |
545 | sm_id++; | ||
545 | } | 546 | } |
546 | } | 547 | } |
547 | 548 | ||
548 | for (tpc_index = 0, gpc_id = 0; | 549 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) |
549 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); | 550 | tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] |
550 | tpc_index++, gpc_id += 8) { | 551 | << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); |
551 | 552 | gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(0), tpc_per_gpc); | |
552 | if (gpc_id >= gr->gpc_count) | 553 | gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(0), tpc_per_gpc); |
553 | gpc_id = 0; | ||
554 | |||
555 | tpc_per_gpc = | ||
556 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | | ||
557 | gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) | | ||
558 | gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) | | ||
559 | gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) | | ||
560 | gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) | | ||
561 | gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) | | ||
562 | gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) | | ||
563 | gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]); | ||
564 | |||
565 | gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); | ||
566 | gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); | ||
567 | } | ||
568 | 554 | ||
569 | /* gr__setup_pd_mapping stubbed for gk20a */ | 555 | /* gr__setup_pd_mapping stubbed for gk20a */ |
570 | gr_gk20a_setup_rop_mapping(g, gr); | 556 | gr_gk20a_setup_rop_mapping(g, gr); |
@@ -593,20 +579,22 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
593 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | 579 | gk20a_readl(g, gr_be0_crop_debug3_r()) | |
594 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | 580 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); |
595 | 581 | ||
596 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 582 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) |
597 | tpc_fs_mask |= gr->gpc_tpc_mask[gpc_index] << | 583 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; |
598 | (gr->max_tpc_per_gpc_count * gpc_index); | 584 | pes_index++) |
585 | pes_tpc_mask |= gr->pes_tpc_mask[pes_index][gpc_index]; | ||
586 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); | ||
587 | |||
588 | for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { | ||
589 | if (tpc_index == 0) { | ||
590 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
591 | tpc_sm_id |= gr_cwd_sm_id_tpc0_f(tpc_index); | ||
592 | } else if (tpc_index == 1) { | ||
593 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(tpc_index); | ||
594 | tpc_sm_id |= gr_cwd_sm_id_tpc1_f(tpc_index); | ||
595 | } | ||
599 | } | 596 | } |
600 | gk20a_writel(g, gr_fe_tpc_fs_r(), tpc_fs_mask); | ||
601 | 597 | ||
602 | if (tpc_fs_mask & (0x1 << 0)) { | ||
603 | tpc_sm_id |= gr_cwd_sm_id_tpc0_f(0); | ||
604 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(0); | ||
605 | } | ||
606 | if (tpc_fs_mask & (0x1 << 1)) { | ||
607 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(1); | ||
608 | tpc_sm_id |= gr_cwd_sm_id_tpc1_f(1); | ||
609 | } | ||
610 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. | 598 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. |
611 | * Since we know TPC number is less than 5. We select | 599 | * Since we know TPC number is less than 5. We select |
612 | * index 0 directly. */ | 600 | * index 0 directly. */ |