summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Liu <pengliu@nvidia.com>2018-10-30 16:45:43 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2019-04-01 18:27:17 -0400
commit3a11883f7f4399ae8dffbea00c1842e3c2095937 (patch)
tree82d36197046e73c13432250ec4ebce0da21791d5
parentf1be222687a853b0218a5700a213f3d34d8ccc4f (diff)
gpu: nvgpu: using pmu counters for load estimate
PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu <pengliu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1939547 (cherry picked from commit 34df0035194e0203f68f679acdd84e5533a48149) Reviewed-on: https://git-master.nvidia.com/r/1979495 Reviewed-by: Aaron Tian <atian@nvidia.com> Tested-by: Aaron Tian <atian@nvidia.com> Reviewed-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c42
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c2
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c2
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h40
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h48
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h48
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h48
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h48
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h48
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/pmu.h4
-rw-r--r--drivers/gpu/nvgpu/os/linux/scale.c18
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c2
19 files changed, 398 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
index 57a4ea40..bf07bd79 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -236,6 +236,48 @@ int nvgpu_pmu_load_update(struct gk20a *g)
236 return 0; 236 return 0;
237} 237}
238 238
239int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm)
240{
241 u64 busy_cycles, total_cycles;
242 u32 intr_status;
243
244 gk20a_busy_noresume(g);
245 if (!g->power_on) {
246 *norm = 0;
247 goto exit;
248 }
249
250 if (g->ops.pmu.pmu_read_idle_counter == NULL ||
251 g->ops.pmu.pmu_reset_idle_counter == NULL ||
252 g->ops.pmu.pmu_read_idle_intr_status == NULL ||
253 g->ops.pmu.pmu_clear_idle_intr_status == NULL) {
254 *norm = PMU_BUSY_CYCLES_NORM_MAX;
255 goto exit;
256 }
257
258 busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4);
259 total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0);
260 intr_status = g->ops.pmu.pmu_read_idle_intr_status(g);
261
262 g->ops.pmu.pmu_reset_idle_counter(g, 4);
263 g->ops.pmu.pmu_reset_idle_counter(g, 0);
264
265 if (intr_status != 0UL) {
266 *norm = PMU_BUSY_CYCLES_NORM_MAX;
267 g->ops.pmu.pmu_clear_idle_intr_status(g);
268 } else if (total_cycles == 0ULL || busy_cycles > total_cycles) {
269 *norm = PMU_BUSY_CYCLES_NORM_MAX;
270 } else {
271 *norm = (u32)(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX
272 / total_cycles);
273 }
274
275exit:
276 gk20a_idle_nosuspend(g);
277
278 return 0;
279}
280
239void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 281void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
240 u32 *total_cycles) 282 u32 *total_cycles)
241{ 283{
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 6eecc4fa..050423b0 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -798,6 +798,39 @@ void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
798 pwr_pmu_idle_ctrl_value_always_f() | 798 pwr_pmu_idle_ctrl_value_always_f() |
799 pwr_pmu_idle_ctrl_filter_disabled_f()); 799 pwr_pmu_idle_ctrl_filter_disabled_f());
800 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); 800 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
801
802 /*
803 * use counters 4 and 0 for perfmon to log busy cycles and total cycles
804 * counter #0 overflow sets pmu idle intr status bit
805 */
806 gk20a_writel(g, pwr_pmu_idle_intr_r(),
807 pwr_pmu_idle_intr_en_f(0));
808
809 gk20a_writel(g, pwr_pmu_idle_threshold_r(0),
810 pwr_pmu_idle_threshold_value_f(0x7FFFFFFF));
811
812 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0));
813 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
814 pwr_pmu_idle_ctrl_filter_m(),
815 pwr_pmu_idle_ctrl_value_always_f() |
816 pwr_pmu_idle_ctrl_filter_disabled_f());
817 gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data);
818
819 gk20a_writel(g, pwr_pmu_idle_mask_r(4),
820 pwr_pmu_idle_mask_gr_enabled_f() |
821 pwr_pmu_idle_mask_ce_2_enabled_f());
822
823 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4));
824 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
825 pwr_pmu_idle_ctrl_filter_m(),
826 pwr_pmu_idle_ctrl_value_busy_f() |
827 pwr_pmu_idle_ctrl_filter_disabled_f());
828 gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data);
829
830 gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1));
831 gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1));
832 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
833 pwr_pmu_idle_intr_status_intr_f(1));
801} 834}
802 835
803u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) 836u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
@@ -812,6 +845,18 @@ void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
812 pwr_pmu_idle_count_reset_f(1)); 845 pwr_pmu_idle_count_reset_f(1));
813} 846}
814 847
848u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g)
849{
850 return pwr_pmu_idle_intr_status_intr_v(
851 gk20a_readl(g, pwr_pmu_idle_intr_status_r()));
852}
853
854void gk20a_pmu_clear_idle_intr_status(struct gk20a *g)
855{
856 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
857 pwr_pmu_idle_intr_status_intr_f(1));
858}
859
815void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, 860void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
816 struct pmu_pg_stats_data *pg_stat_data) 861 struct pmu_pg_stats_data *pg_stat_data)
817{ 862{
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 35b80eaf..65ffd636 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -58,6 +58,9 @@ void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set);
58u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id); 58u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
59void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id); 59void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
60 60
61u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g);
62void gk20a_pmu_clear_idle_intr_status(struct gk20a *g);
63
61void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr); 64void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
62bool gk20a_is_pmu_supported(struct gk20a *g); 65bool gk20a_is_pmu_supported(struct gk20a *g);
63 66
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 7eaf6bff..dbfbc3d7 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -530,6 +530,8 @@ static const struct gpu_ops gm20b_ops = {
530 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, 530 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
531 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, 531 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
532 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, 532 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
533 .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
534 .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
533 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, 535 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
534 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, 536 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
535 .pmu_enable_irq = gk20a_pmu_enable_irq, 537 .pmu_enable_irq = gk20a_pmu_enable_irq,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 86892d23..fea2ffa0 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -631,6 +631,8 @@ static const struct gpu_ops gp106_ops = {
631 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, 631 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
632 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, 632 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
633 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, 633 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
634 .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
635 .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
634 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, 636 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
635 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, 637 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
636 .pmu_enable_irq = gk20a_pmu_enable_irq, 638 .pmu_enable_irq = gk20a_pmu_enable_irq,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 85051c11..b3379253 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -595,6 +595,8 @@ static const struct gpu_ops gp10b_ops = {
595 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, 595 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
596 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, 596 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
597 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, 597 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
598 .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
599 .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
598 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, 600 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
599 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, 601 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
600 .pmu_enable_irq = gk20a_pmu_enable_irq, 602 .pmu_enable_irq = gk20a_pmu_enable_irq,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 5f1a18a0..9621aaa4 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -756,6 +756,8 @@ static const struct gpu_ops gv100_ops = {
756 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, 756 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
757 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, 757 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
758 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, 758 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
759 .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
760 .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
759 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, 761 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
760 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, 762 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
761 .pmu_enable_irq = gk20a_pmu_enable_irq, 763 .pmu_enable_irq = gk20a_pmu_enable_irq,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index cf6a7e2c..d52d1c7e 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -713,6 +713,8 @@ static const struct gpu_ops gv11b_ops = {
713 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, 713 .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
714 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, 714 .pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
715 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, 715 .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
716 .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
717 .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
716 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, 718 .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
717 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, 719 .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
718 .pmu_enable_irq = gk20a_pmu_enable_irq, 720 .pmu_enable_irq = gk20a_pmu_enable_irq,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index dfa4aaf2..aa435638 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1050,6 +1050,8 @@ struct gpu_ops {
1050 void (*pmu_init_perfmon_counter)(struct gk20a *g); 1050 void (*pmu_init_perfmon_counter)(struct gk20a *g);
1051 void (*pmu_pg_idle_counter_config)(struct gk20a *g, u32 pg_engine_id); 1051 void (*pmu_pg_idle_counter_config)(struct gk20a *g, u32 pg_engine_id);
1052 u32 (*pmu_read_idle_counter)(struct gk20a *g, u32 counter_id); 1052 u32 (*pmu_read_idle_counter)(struct gk20a *g, u32 counter_id);
1053 u32 (*pmu_read_idle_intr_status)(struct gk20a *g);
1054 void (*pmu_clear_idle_intr_status)(struct gk20a *g);
1053 void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id); 1055 void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id);
1054 void (*pmu_dump_elpg_stats)(struct nvgpu_pmu *pmu); 1056 void (*pmu_dump_elpg_stats)(struct nvgpu_pmu *pmu);
1055 void (*pmu_dump_falcon_stats)(struct nvgpu_pmu *pmu); 1057 void (*pmu_dump_falcon_stats)(struct nvgpu_pmu *pmu);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h
index 71b73d2a..28457634 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h
@@ -672,6 +672,46 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
672{ 672{
673 return 0x0U; 673 return 0x0U;
674} 674}
675static inline u32 pwr_pmu_idle_threshold_r(u32 i)
676{
677 return 0x0010a8a0U + i*4U;
678}
679static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
680{
681 return (v & 0x7fffffffU) << 0U;
682}
683static inline u32 pwr_pmu_idle_intr_r(void)
684{
685 return 0x0010a9e8U;
686}
687static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
688{
689 return (v & 0x1U) << 0U;
690}
691static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
692{
693 return 0x00000000U;
694}
695static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
696{
697 return 0x00000001U;
698}
699static inline u32 pwr_pmu_idle_intr_status_r(void)
700{
701 return 0x0010a9ecU;
702}
703static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
704{
705 return (v & 0x1U) << 0U;
706}
707static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
708{
709 return U32(0x1U) << 0U;
710}
711static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
712{
713 return (r >> 0U) & 0x1U;
714}
675static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) 715static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
676{ 716{
677 return 0x0010a9f0U + i*8U; 717 return 0x0010a9f0U + i*8U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h
index fa232644..2ca1f02b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h
@@ -716,6 +716,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
716{ 716{
717 return 0x0U; 717 return 0x0U;
718} 718}
719static inline u32 pwr_pmu_idle_threshold_r(u32 i)
720{
721 return 0x0010a8a0U + i*4U;
722}
723static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
724{
725 return (v & 0x7fffffffU) << 0U;
726}
727static inline u32 pwr_pmu_idle_intr_r(void)
728{
729 return 0x0010a9e8U;
730}
731static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
732{
733 return (v & 0x1U) << 0U;
734}
735static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
736{
737 return 0x00000000U;
738}
739static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
740{
741 return 0x00000001U;
742}
743static inline u32 pwr_pmu_idle_intr_status_r(void)
744{
745 return 0x0010a9ecU;
746}
747static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
748{
749 return (v & 0x1U) << 0U;
750}
751static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
752{
753 return U32(0x1U) << 0U;
754}
755static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
756{
757 return (r >> 0U) & 0x1U;
758}
759static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
760{
761 return 0x00000001U;
762}
763static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
764{
765 return 0x00000001U;
766}
719static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) 767static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
720{ 768{
721 return 0x0010a9f0U + i*8U; 769 return 0x0010a9f0U + i*8U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h
index a9fbbd10..2e75fa6e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h
@@ -724,6 +724,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
724{ 724{
725 return 0x0U; 725 return 0x0U;
726} 726}
727static inline u32 pwr_pmu_idle_threshold_r(u32 i)
728{
729 return 0x0010a8a0U + i*4U;
730}
731static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
732{
733 return (v & 0x7fffffffU) << 0U;
734}
735static inline u32 pwr_pmu_idle_intr_r(void)
736{
737 return 0x0010a9e8U;
738}
739static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
740{
741 return (v & 0x1U) << 0U;
742}
743static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
744{
745 return 0x00000000U;
746}
747static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
748{
749 return 0x00000001U;
750}
751static inline u32 pwr_pmu_idle_intr_status_r(void)
752{
753 return 0x0010a9ecU;
754}
755static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
756{
757 return (v & 0x1U) << 0U;
758}
759static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
760{
761 return U32(0x1U) << 0U;
762}
763static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
764{
765 return (r >> 0U) & 0x1U;
766}
767static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
768{
769 return 0x00000001U;
770}
771static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
772{
773 return 0x00000001U;
774}
727static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) 775static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
728{ 776{
729 return 0x0010a9f0U + i*8U; 777 return 0x0010a9f0U + i*8U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h
index 73a5c45c..c160e897 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h
@@ -720,6 +720,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
720{ 720{
721 return 0x0U; 721 return 0x0U;
722} 722}
723static inline u32 pwr_pmu_idle_threshold_r(u32 i)
724{
725 return 0x0010a8a0U + i*4U;
726}
727static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
728{
729 return (v & 0x7fffffffU) << 0U;
730}
731static inline u32 pwr_pmu_idle_intr_r(void)
732{
733 return 0x0010a9e8U;
734}
735static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
736{
737 return (v & 0x1U) << 0U;
738}
739static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
740{
741 return 0x00000000U;
742}
743static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
744{
745 return 0x00000001U;
746}
747static inline u32 pwr_pmu_idle_intr_status_r(void)
748{
749 return 0x0010a9ecU;
750}
751static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
752{
753 return (v & 0x1U) << 0U;
754}
755static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
756{
757 return U32(0x1U) << 0U;
758}
759static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
760{
761 return (r >> 0U) & 0x1U;
762}
763static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
764{
765 return 0x00000001U;
766}
767static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
768{
769 return 0x00000001U;
770}
723static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) 771static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
724{ 772{
725 return 0x0010a9f0U + i*8U; 773 return 0x0010a9f0U + i*8U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h
index 4b0b0326..c719226c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h
@@ -824,6 +824,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
824{ 824{
825 return 0x0U; 825 return 0x0U;
826} 826}
827static inline u32 pwr_pmu_idle_threshold_r(u32 i)
828{
829 return 0x0010a8a0U + i*4U;
830}
831static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
832{
833 return (v & 0x7fffffffU) << 0U;
834}
835static inline u32 pwr_pmu_idle_intr_r(void)
836{
837 return 0x0010a9e8U;
838}
839static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
840{
841 return (v & 0x1U) << 0U;
842}
843static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
844{
845 return 0x00000000U;
846}
847static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
848{
849 return 0x00000001U;
850}
851static inline u32 pwr_pmu_idle_intr_status_r(void)
852{
853 return 0x0010a9ecU;
854}
855static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
856{
857 return (v & 0x1U) << 0U;
858}
859static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
860{
861 return U32(0x1U) << 0U;
862}
863static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
864{
865 return (r >> 0U) & 0x1U;
866}
867static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
868{
869 return 0x00000001U;
870}
871static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
872{
873 return 0x00000001U;
874}
827static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) 875static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
828{ 876{
829 return 0x0010a9f0U + i*8U; 877 return 0x0010a9f0U + i*8U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h
index c16d44f1..295c6e95 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h
@@ -880,6 +880,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
880{ 880{
881 return 0x0U; 881 return 0x0U;
882} 882}
883static inline u32 pwr_pmu_idle_threshold_r(u32 i)
884{
885 return 0x0010a8a0U + i*4U;
886}
887static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
888{
889 return (v & 0x7fffffffU) << 0U;
890}
891static inline u32 pwr_pmu_idle_intr_r(void)
892{
893 return 0x0010a9e8U;
894}
895static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
896{
897 return (v & 0x1U) << 0U;
898}
899static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
900{
901 return 0x00000000U;
902}
903static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
904{
905 return 0x00000001U;
906}
907static inline u32 pwr_pmu_idle_intr_status_r(void)
908{
909 return 0x0010a9ecU;
910}
911static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
912{
913 return (v & 0x1U) << 0U;
914}
915static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
916{
917 return U32(0x1U) << 0U;
918}
919static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
920{
921 return (r >> 0U) & 0x1U;
922}
923static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
924{
925 return 0x00000001U;
926}
927static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
928{
929 return 0x00000001U;
930}
883static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) 931static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
884{ 932{
885 return 0x0010a9f0U + i*8U; 933 return 0x0010a9f0U + i*8U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index 7283755a..00194ec0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -144,6 +144,9 @@ enum {
144#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000) 144#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000)
145#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200) 145#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200)
146 146
147/* pmu load const defines */
148#define PMU_BUSY_CYCLES_NORM_MAX (1000U)
149
147/* RPC */ 150/* RPC */
148#define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\ 151#define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\
149 do { \ 152 do { \
@@ -449,6 +452,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
449int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); 452int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
450int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); 453int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
451int nvgpu_pmu_load_update(struct gk20a *g); 454int nvgpu_pmu_load_update(struct gk20a *g);
455int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm);
452void nvgpu_pmu_reset_load_counters(struct gk20a *g); 456void nvgpu_pmu_reset_load_counters(struct gk20a *g);
453void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 457void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
454 u32 *total_cycles); 458 u32 *total_cycles);
diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c
index ee5b6861..ef7996e6 100644
--- a/drivers/gpu/nvgpu/os/linux/scale.c
+++ b/drivers/gpu/nvgpu/os/linux/scale.c
@@ -211,18 +211,18 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,
211} 211}
212 212
213/* 213/*
214 * update_load_estimate_gpmu(profile) 214 * update_load_estimate_busy_cycles(dev)
215 * 215 *
216 * Update load estimate using gpmu. The gpmu value is normalised 216 * Update load estimate using pmu idle counters. Result is normalised
217 * based on the time it was asked last time. 217 * based on the time it was asked last time.
218 */ 218 */
219 219
220static void update_load_estimate_gpmu(struct device *dev) 220static void update_load_estimate_busy_cycles(struct device *dev)
221{ 221{
222 struct gk20a *g = get_gk20a(dev); 222 struct gk20a *g = get_gk20a(dev);
223 struct gk20a_scale_profile *profile = g->scale_profile; 223 struct gk20a_scale_profile *profile = g->scale_profile;
224 unsigned long dt; 224 unsigned long dt;
225 u32 busy_time; 225 u32 busy_cycles_norm;
226 ktime_t t; 226 ktime_t t;
227 227
228 t = ktime_get(); 228 t = ktime_get();
@@ -230,8 +230,9 @@ static void update_load_estimate_gpmu(struct device *dev)
230 230
231 profile->dev_stat.total_time = dt; 231 profile->dev_stat.total_time = dt;
232 profile->last_event_time = t; 232 profile->last_event_time = t;
233 nvgpu_pmu_load_norm(g, &busy_time); 233 nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm);
234 profile->dev_stat.busy_time = (busy_time * dt) / 1000; 234 profile->dev_stat.busy_time =
235 (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX;
235} 236}
236 237
237/* 238/*
@@ -284,9 +285,6 @@ static int gk20a_scale_get_dev_status(struct device *dev,
284 struct gk20a_scale_profile *profile = g->scale_profile; 285 struct gk20a_scale_profile *profile = g->scale_profile;
285 struct gk20a_platform *platform = dev_get_drvdata(dev); 286 struct gk20a_platform *platform = dev_get_drvdata(dev);
286 287
287 /* update the software shadow */
288 nvgpu_pmu_load_update(g);
289
290 /* inform edp about new constraint */ 288 /* inform edp about new constraint */
291 if (platform->prescale) 289 if (platform->prescale)
292 platform->prescale(dev); 290 platform->prescale(dev);
@@ -296,7 +294,7 @@ static int gk20a_scale_get_dev_status(struct device *dev,
296 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); 294 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
297 295
298 /* Update load estimate */ 296 /* Update load estimate */
299 update_load_estimate_gpmu(dev); 297 update_load_estimate_busy_cycles(dev);
300 298
301 /* Copy the contents of the current device status */ 299 /* Copy the contents of the current device status */
302 *stat = profile->dev_stat; 300 *stat = profile->dev_stat;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index eae0ba9e..7c800d5f 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -436,6 +436,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
436 .pmu_pg_idle_counter_config = NULL, 436 .pmu_pg_idle_counter_config = NULL,
437 .pmu_read_idle_counter = NULL, 437 .pmu_read_idle_counter = NULL,
438 .pmu_reset_idle_counter = NULL, 438 .pmu_reset_idle_counter = NULL,
439 .pmu_read_idle_intr_status = NULL,
440 .pmu_clear_idle_intr_status = NULL,
439 .pmu_dump_elpg_stats = NULL, 441 .pmu_dump_elpg_stats = NULL,
440 .pmu_dump_falcon_stats = NULL, 442 .pmu_dump_falcon_stats = NULL,
441 .pmu_enable_irq = NULL, 443 .pmu_enable_irq = NULL,
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
index de006b1e..78ea5643 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -504,6 +504,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
504 .pmu_pg_idle_counter_config = NULL, 504 .pmu_pg_idle_counter_config = NULL,
505 .pmu_read_idle_counter = NULL, 505 .pmu_read_idle_counter = NULL,
506 .pmu_reset_idle_counter = NULL, 506 .pmu_reset_idle_counter = NULL,
507 .pmu_read_idle_intr_status = NULL,
508 .pmu_clear_idle_intr_status = NULL,
507 .pmu_dump_elpg_stats = NULL, 509 .pmu_dump_elpg_stats = NULL,
508 .pmu_dump_falcon_stats = NULL, 510 .pmu_dump_falcon_stats = NULL,
509 .pmu_enable_irq = NULL, 511 .pmu_enable_irq = NULL,