summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorPeng Liu <pengliu@nvidia.com>2018-10-30 16:45:43 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2019-04-01 18:27:17 -0400
commit3a11883f7f4399ae8dffbea00c1842e3c2095937 (patch)
tree82d36197046e73c13432250ec4ebce0da21791d5 /drivers/gpu/nvgpu/gk20a
parentf1be222687a853b0218a5700a213f3d34d8ccc4f (diff)
gpu: nvgpu: using pmu counters for load estimate
PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu <pengliu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1939547 (cherry picked from commit 34df0035194e0203f68f679acdd84e5533a48149) Reviewed-on: https://git-master.nvidia.com/r/1979495 Reviewed-by: Aaron Tian <atian@nvidia.com> Tested-by: Aaron Tian <atian@nvidia.com> Reviewed-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h3
2 files changed, 48 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 6eecc4fa..050423b0 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -798,6 +798,39 @@ void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
798 pwr_pmu_idle_ctrl_value_always_f() | 798 pwr_pmu_idle_ctrl_value_always_f() |
799 pwr_pmu_idle_ctrl_filter_disabled_f()); 799 pwr_pmu_idle_ctrl_filter_disabled_f());
800 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); 800 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
801
802 /*
803 * use counters 4 and 0 for perfmon to log busy cycles and total cycles
804 * counter #0 overflow sets pmu idle intr status bit
805 */
806 gk20a_writel(g, pwr_pmu_idle_intr_r(),
807 pwr_pmu_idle_intr_en_f(0));
808
809 gk20a_writel(g, pwr_pmu_idle_threshold_r(0),
810 pwr_pmu_idle_threshold_value_f(0x7FFFFFFF));
811
812 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0));
813 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
814 pwr_pmu_idle_ctrl_filter_m(),
815 pwr_pmu_idle_ctrl_value_always_f() |
816 pwr_pmu_idle_ctrl_filter_disabled_f());
817 gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data);
818
819 gk20a_writel(g, pwr_pmu_idle_mask_r(4),
820 pwr_pmu_idle_mask_gr_enabled_f() |
821 pwr_pmu_idle_mask_ce_2_enabled_f());
822
823 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4));
824 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
825 pwr_pmu_idle_ctrl_filter_m(),
826 pwr_pmu_idle_ctrl_value_busy_f() |
827 pwr_pmu_idle_ctrl_filter_disabled_f());
828 gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data);
829
830 gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1));
831 gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1));
832 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
833 pwr_pmu_idle_intr_status_intr_f(1));
801} 834}
802 835
803u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) 836u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
@@ -812,6 +845,18 @@ void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
812 pwr_pmu_idle_count_reset_f(1)); 845 pwr_pmu_idle_count_reset_f(1));
813} 846}
814 847
848u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g)
849{
850 return pwr_pmu_idle_intr_status_intr_v(
851 gk20a_readl(g, pwr_pmu_idle_intr_status_r()));
852}
853
854void gk20a_pmu_clear_idle_intr_status(struct gk20a *g)
855{
856 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
857 pwr_pmu_idle_intr_status_intr_f(1));
858}
859
815void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, 860void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
816 struct pmu_pg_stats_data *pg_stat_data) 861 struct pmu_pg_stats_data *pg_stat_data)
817{ 862{
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 35b80eaf..65ffd636 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -58,6 +58,9 @@ void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set);
58u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id); 58u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
59void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id); 59void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
60 60
61u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g);
62void gk20a_pmu_clear_idle_intr_status(struct gk20a *g);
63
61void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr); 64void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
62bool gk20a_is_pmu_supported(struct gk20a *g); 65bool gk20a_is_pmu_supported(struct gk20a *g);
63 66