diff options
author | Peng Liu <pengliu@nvidia.com> | 2018-10-30 16:45:43 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2019-04-01 18:27:17 -0400 |
commit | 3a11883f7f4399ae8dffbea00c1842e3c2095937 (patch) | |
tree | 82d36197046e73c13432250ec4ebce0da21791d5 /drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |
parent | f1be222687a853b0218a5700a213f3d34d8ccc4f (diff) |
gpu: nvgpu: using pmu counters for load estimate
PMU counters #0 and #4 are used to count total cycles and busy cycles.
These counts are used by podgov to estimate GPU load.
PMU idle intr status register is used to monitor overflow. Overflow
rarely occurs because frequency governor reads and resets the counters
at a high cadence. When overflow occurs, 100% work load is reported to
frequency governor.
Bug 1963732
Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69
Signed-off-by: Peng Liu <pengliu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1939547
(cherry picked from commit 34df0035194e0203f68f679acdd84e5533a48149)
Reviewed-on: https://git-master.nvidia.com/r/1979495
Reviewed-by: Aaron Tian <atian@nvidia.com>
Tested-by: Aaron Tian <atian@nvidia.com>
Reviewed-by: Rajkumar Kasirajan <rkasirajan@nvidia.com>
Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/pmu_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 6eecc4fa..050423b0 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -798,6 +798,39 @@ void gk20a_pmu_init_perfmon_counter(struct gk20a *g) | |||
798 | pwr_pmu_idle_ctrl_value_always_f() | | 798 | pwr_pmu_idle_ctrl_value_always_f() | |
799 | pwr_pmu_idle_ctrl_filter_disabled_f()); | 799 | pwr_pmu_idle_ctrl_filter_disabled_f()); |
800 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); | 800 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); |
801 | |||
802 | /* | ||
803 | * use counters 4 and 0 for perfmon to log busy cycles and total cycles | ||
804 | * counter #0 overflow sets pmu idle intr status bit | ||
805 | */ | ||
806 | gk20a_writel(g, pwr_pmu_idle_intr_r(), | ||
807 | pwr_pmu_idle_intr_en_f(0)); | ||
808 | |||
809 | gk20a_writel(g, pwr_pmu_idle_threshold_r(0), | ||
810 | pwr_pmu_idle_threshold_value_f(0x7FFFFFFF)); | ||
811 | |||
812 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0)); | ||
813 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
814 | pwr_pmu_idle_ctrl_filter_m(), | ||
815 | pwr_pmu_idle_ctrl_value_always_f() | | ||
816 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
817 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data); | ||
818 | |||
819 | gk20a_writel(g, pwr_pmu_idle_mask_r(4), | ||
820 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
821 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
822 | |||
823 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4)); | ||
824 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
825 | pwr_pmu_idle_ctrl_filter_m(), | ||
826 | pwr_pmu_idle_ctrl_value_busy_f() | | ||
827 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
828 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data); | ||
829 | |||
830 | gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1)); | ||
831 | gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1)); | ||
832 | gk20a_writel(g, pwr_pmu_idle_intr_status_r(), | ||
833 | pwr_pmu_idle_intr_status_intr_f(1)); | ||
801 | } | 834 | } |
802 | 835 | ||
803 | u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) | 836 | u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) |
@@ -812,6 +845,18 @@ void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id) | |||
812 | pwr_pmu_idle_count_reset_f(1)); | 845 | pwr_pmu_idle_count_reset_f(1)); |
813 | } | 846 | } |
814 | 847 | ||
848 | u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g) | ||
849 | { | ||
850 | return pwr_pmu_idle_intr_status_intr_v( | ||
851 | gk20a_readl(g, pwr_pmu_idle_intr_status_r())); | ||
852 | } | ||
853 | |||
854 | void gk20a_pmu_clear_idle_intr_status(struct gk20a *g) | ||
855 | { | ||
856 | gk20a_writel(g, pwr_pmu_idle_intr_status_r(), | ||
857 | pwr_pmu_idle_intr_status_intr_f(1)); | ||
858 | } | ||
859 | |||
815 | void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, | 860 | void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, |
816 | struct pmu_pg_stats_data *pg_stat_data) | 861 | struct pmu_pg_stats_data *pg_stat_data) |
817 | { | 862 | { |