diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-06 18:13:54 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-16 13:57:48 -0400 |
commit | 211edaefb71d06d34c2835a93249da58673bff8a (patch) | |
tree | 3bd5eed1cc9020fcc8af4e4ffd9653268d59eb9b /drivers/gpu/nvgpu/gm20b | |
parent | 3a1321ddcd33accd6a8a6efee2921ebf088b0f50 (diff) |
gpu: nvgpu: Fix CWD floorsweep programming
Program CWD TPC and SM registers correctly. The old code did not work
when there are more than 4 TPCs.
Refactor init_fs_mask to reduce code duplication.
Change-Id: Id93c1f8df24f1b7ee60314c3204e288b91951a88
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1143697
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 133 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 16 |
3 files changed, 75 insertions, 79 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 2197bae5..0659eefd 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -552,79 +552,71 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g) | |||
552 | } | 552 | } |
553 | } | 553 | } |
554 | 554 | ||
555 | int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | 555 | static void gr_gm20b_program_sm_id_numbering(struct gk20a *g, |
556 | u32 gpc, u32 tpc, u32 smid) | ||
556 | { | 557 | { |
557 | struct gr_gk20a *gr = &g->gr; | ||
558 | u32 tpc_index, gpc_index; | ||
559 | u32 tpc_offset, gpc_offset; | ||
560 | u32 sm_id = 0; | ||
561 | u32 tpc_per_gpc = 0; | ||
562 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; | ||
563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 558 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
564 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 559 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
565 | u32 fuse_tpc_mask; | 560 | u32 gpc_offset = gpc_stride * gpc; |
561 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
562 | |||
563 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
564 | gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); | ||
565 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
566 | gr_gpc0_gpm_pd_sm_id_id_f(smid)); | ||
567 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
568 | gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); | ||
569 | } | ||
566 | 570 | ||
567 | gk20a_dbg_fn(""); | 571 | static int gr_gm20b_load_smid_config(struct gk20a *g) |
572 | { | ||
573 | u32 *tpc_sm_id; | ||
574 | u32 i, j; | ||
575 | u32 tpc_index, gpc_index; | ||
568 | 576 | ||
569 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 577 | tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); |
570 | gpc_offset = gpc_stride * gpc_index; | 578 | if (!tpc_sm_id) |
571 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; | 579 | return -ENOMEM; |
572 | tpc_index++) { | ||
573 | tpc_offset = tpc_in_gpc_stride * tpc_index; | ||
574 | |||
575 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() | ||
576 | + gpc_offset + tpc_offset, | ||
577 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
578 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) | ||
579 | + gpc_offset, | ||
580 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
581 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() | ||
582 | + gpc_offset + tpc_offset, | ||
583 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
584 | |||
585 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; | ||
586 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; | ||
587 | |||
588 | sm_id++; | ||
589 | } | ||
590 | } | ||
591 | 580 | ||
592 | gr->no_of_sm = sm_id; | 581 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ |
582 | for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { | ||
583 | u32 reg = 0; | ||
584 | u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + | ||
585 | gr_cwd_gpc_tpc_id_tpc0_s(); | ||
593 | 586 | ||
594 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | 587 | for (j = 0; j < 4; j++) { |
595 | tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] | 588 | u32 sm_id = (i / 4) + j; |
596 | << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); | 589 | u32 bits; |
597 | gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(0), tpc_per_gpc); | ||
598 | gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(0), tpc_per_gpc); | ||
599 | 590 | ||
600 | /* gr__setup_pd_mapping stubbed for gk20a */ | 591 | if (sm_id >= g->gr.tpc_count) |
601 | gr_gk20a_setup_rop_mapping(g, gr); | 592 | break; |
602 | 593 | ||
603 | for (gpc_index = 0; | 594 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; |
604 | gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; | 595 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; |
605 | gpc_index += 4) { | ||
606 | 596 | ||
607 | gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), | 597 | bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | |
608 | gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) | | 598 | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); |
609 | gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) | | 599 | reg |= bits << (j * bit_stride); |
610 | gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) | | ||
611 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | ||
612 | } | ||
613 | 600 | ||
614 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); | 601 | tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride; |
615 | if (g->tpc_fs_mask_user && | 602 | } |
616 | fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) { | 603 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); |
617 | u32 val = g->tpc_fs_mask_user; | ||
618 | val &= (0x1 << gr->max_tpc_count) - 1; | ||
619 | gk20a_writel(g, gr_cwd_fs_r(), | ||
620 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
621 | gr_cwd_fs_num_tpcs_f(hweight32(val))); | ||
622 | } else { | ||
623 | gk20a_writel(g, gr_cwd_fs_r(), | ||
624 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
625 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
626 | } | 604 | } |
627 | 605 | ||
606 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | ||
607 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | ||
608 | |||
609 | kfree(tpc_sm_id); | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | int gr_gm20b_init_fs_state(struct gk20a *g) | ||
615 | { | ||
616 | gk20a_dbg_fn(""); | ||
617 | |||
618 | gr_gk20a_init_fs_state(g); | ||
619 | |||
628 | gr_gm20b_load_tpc_mask(g); | 620 | gr_gm20b_load_tpc_mask(g); |
629 | 621 | ||
630 | gk20a_writel(g, gr_bes_zrop_settings_r(), | 622 | gk20a_writel(g, gr_bes_zrop_settings_r(), |
@@ -636,22 +628,7 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
636 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | 628 | gk20a_readl(g, gr_be0_crop_debug3_r()) | |
637 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | 629 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); |
638 | 630 | ||
639 | for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { | 631 | g->ops.gr.load_smid_config(g); |
640 | if (tpc_index == 0) { | ||
641 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
642 | tpc_sm_id |= gr_cwd_sm_id_tpc0_f(tpc_index); | ||
643 | } else if (tpc_index == 1) { | ||
644 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(tpc_index); | ||
645 | tpc_sm_id |= gr_cwd_sm_id_tpc1_f(tpc_index); | ||
646 | } | ||
647 | } | ||
648 | |||
649 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. | ||
650 | * Since we know TPC number is less than 5. We select | ||
651 | * index 0 directly. */ | ||
652 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gpc_tpc_id); | ||
653 | |||
654 | gk20a_writel(g, gr_cwd_sm_id_r(0), tpc_sm_id); | ||
655 | 632 | ||
656 | return 0; | 633 | return 0; |
657 | } | 634 | } |
@@ -1443,7 +1420,7 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1443 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; | 1420 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; |
1444 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; | 1421 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; |
1445 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; | 1422 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; |
1446 | gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; | 1423 | gops->gr.init_fs_state = gr_gm20b_init_fs_state; |
1447 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; | 1424 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; |
1448 | gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; | 1425 | gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; |
1449 | if (gops->privsecurity) | 1426 | if (gops->privsecurity) |
@@ -1499,4 +1476,6 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1499 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | 1476 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; |
1500 | gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; | 1477 | gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; |
1501 | gops->gr.fuse_override = gm20b_gr_fuse_override; | 1478 | gops->gr.fuse_override = gm20b_gr_fuse_override; |
1479 | gops->gr.load_smid_config = gr_gm20b_load_smid_config; | ||
1480 | gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; | ||
1502 | } | 1481 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 90f933bd..2a585e63 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B GPC MMU | 2 | * GM20B GPC MMU |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -46,5 +46,6 @@ void gm20b_init_gr(struct gpu_ops *gops); | |||
46 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | 46 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, |
47 | struct channel_ctx_gk20a *ch_ctx, | 47 | struct channel_ctx_gk20a *ch_ctx, |
48 | u64 addr, bool patch); | 48 | u64 addr, bool patch); |
49 | int gr_gm20b_ctx_state_floorsweep(struct gk20a *g); | 49 | int gr_gm20b_init_fs_state(struct gk20a *g); |
50 | |||
50 | #endif | 51 | #endif |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index 73861c07..45240e97 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | |||
@@ -1962,10 +1962,22 @@ static inline u32 gr_cwd_gpc_tpc_id_r(u32 i) | |||
1962 | { | 1962 | { |
1963 | return 0x00405b60 + i*4; | 1963 | return 0x00405b60 + i*4; |
1964 | } | 1964 | } |
1965 | static inline u32 gr_cwd_gpc_tpc_id_tpc0_s(void) | ||
1966 | { | ||
1967 | return 4; | ||
1968 | } | ||
1965 | static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v) | 1969 | static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v) |
1966 | { | 1970 | { |
1967 | return (v & 0xf) << 0; | 1971 | return (v & 0xf) << 0; |
1968 | } | 1972 | } |
1973 | static inline u32 gr_cwd_gpc_tpc_id_gpc0_s(void) | ||
1974 | { | ||
1975 | return 4; | ||
1976 | } | ||
1977 | static inline u32 gr_cwd_gpc_tpc_id_gpc0_f(u32 v) | ||
1978 | { | ||
1979 | return (v & 0xf) << 4; | ||
1980 | } | ||
1969 | static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v) | 1981 | static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v) |
1970 | { | 1982 | { |
1971 | return (v & 0xf) << 8; | 1983 | return (v & 0xf) << 8; |
@@ -1974,6 +1986,10 @@ static inline u32 gr_cwd_sm_id_r(u32 i) | |||
1974 | { | 1986 | { |
1975 | return 0x00405ba0 + i*4; | 1987 | return 0x00405ba0 + i*4; |
1976 | } | 1988 | } |
1989 | static inline u32 gr_cwd_sm_id__size_1_v(void) | ||
1990 | { | ||
1991 | return 0x00000006; | ||
1992 | } | ||
1977 | static inline u32 gr_cwd_sm_id_tpc0_f(u32 v) | 1993 | static inline u32 gr_cwd_sm_id_tpc0_f(u32 v) |
1978 | { | 1994 | { |
1979 | return (v & 0xff) << 0; | 1995 | return (v & 0xff) << 0; |