gpu: nvgpu: using pmu counters for load estimate

PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu <pengliu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1939547 (cherry picked from commit 34df0035194e0203f68f679acdd84e5533a48149) Reviewed-on: https://git-master.nvidia.com/r/1979495 Reviewed-by: Aaron Tian <atian@nvidia.com> Tested-by: Aaron Tian <atian@nvidia.com> Reviewed-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Peng Liu <pengliu@nvidia.com> 2018-10-30 16:45:43 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2019-04-01 18:27:17 -0400
commit: 3a11883f7f4399ae8dffbea00c1842e3c2095937 (patch)
tree: 82d36197046e73c13432250ec4ebce0da21791d5 /drivers/gpu/nvgpu/common
parent: f1be222687a853b0218a5700a213f3d34d8ccc4f (diff)
1 files changed, 42 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
index 57a4ea40..bf07bd79 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -236,6 +236,48 @@ int nvgpu_pmu_load_update(struct gk20a *g)
        return 0;
 }
+int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm)
+{
+        u64 busy_cycles, total_cycles;
+        u32 intr_status;
+        gk20a_busy_noresume(g);
+        if (!g->power_on) {
+                *norm = 0;
+                goto exit;
+        }
+        if (g->ops.pmu.pmu_read_idle_counter == NULL ||
+            g->ops.pmu.pmu_reset_idle_counter == NULL ||
+            g->ops.pmu.pmu_read_idle_intr_status == NULL ||
+            g->ops.pmu.pmu_clear_idle_intr_status == NULL) {
+                *norm = PMU_BUSY_CYCLES_NORM_MAX;
+                goto exit;
+        }
+        busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4);
+        total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0);
+        intr_status = g->ops.pmu.pmu_read_idle_intr_status(g);
+        g->ops.pmu.pmu_reset_idle_counter(g, 4);
+        g->ops.pmu.pmu_reset_idle_counter(g, 0);
+        if (intr_status != 0UL) {
+                *norm = PMU_BUSY_CYCLES_NORM_MAX;
+                g->ops.pmu.pmu_clear_idle_intr_status(g);
+        } else if (total_cycles == 0ULL || busy_cycles > total_cycles) {
+                *norm = PMU_BUSY_CYCLES_NORM_MAX;
+        } else {
+                *norm = (u32)(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX
+                              / total_cycles);
+        }
+exit:
+        gk20a_idle_nosuspend(g);
+        return 0;
+}
 void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
                                 u32 *total_cycles)
 {
author	Peng Liu <pengliu@nvidia.com>	2018-10-30 16:45:43 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2019-04-01 18:27:17 -0400
commit	3a11883f7f4399ae8dffbea00c1842e3c2095937 (patch)
tree	82d36197046e73c13432250ec4ebce0da21791d5 /drivers/gpu/nvgpu/common
parent	f1be222687a853b0218a5700a213f3d34d8ccc4f (diff)

diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c index 57a4ea40..bf07bd79 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -236,6 +236,48 @@ int nvgpu_pmu_load_update(struct gk20a *g)
236	return 0;	236	return 0;
237	}	237	}
238		238
		239	int nvgpu_pmu_busy_cycles_norm(struct gk20a g, u32 norm)
		240	{
		241	u64 busy_cycles, total_cycles;
		242	u32 intr_status;
		243
		244	gk20a_busy_noresume(g);
		245	if (!g->power_on) {
		246	*norm = 0;
		247	goto exit;
		248	}
		249
		250	if (g->ops.pmu.pmu_read_idle_counter == NULL \|\|
		251	g->ops.pmu.pmu_reset_idle_counter == NULL \|\|
		252	g->ops.pmu.pmu_read_idle_intr_status == NULL \|\|
		253	g->ops.pmu.pmu_clear_idle_intr_status == NULL) {
		254	*norm = PMU_BUSY_CYCLES_NORM_MAX;
		255	goto exit;
		256	}
		257
		258	busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4);
		259	total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0);
		260	intr_status = g->ops.pmu.pmu_read_idle_intr_status(g);
		261
		262	g->ops.pmu.pmu_reset_idle_counter(g, 4);
		263	g->ops.pmu.pmu_reset_idle_counter(g, 0);
		264
		265	if (intr_status != 0UL) {
		266	*norm = PMU_BUSY_CYCLES_NORM_MAX;
		267	g->ops.pmu.pmu_clear_idle_intr_status(g);
		268	} else if (total_cycles == 0ULL \|\| busy_cycles > total_cycles) {
		269	*norm = PMU_BUSY_CYCLES_NORM_MAX;
		270	} else {
		271	norm = (u32)(busy_cycles PMU_BUSY_CYCLES_NORM_MAX
		272	/ total_cycles);
		273	}
		274
		275	exit:
		276	gk20a_idle_nosuspend(g);
		277
		278	return 0;
		279	}
		280
239	void nvgpu_pmu_get_load_counters(struct gk20a g, u32 busy_cycles,	281	void nvgpu_pmu_get_load_counters(struct gk20a g, u32 busy_cycles,
240	u32 *total_cycles)	282	u32 *total_cycles)
241	{	283	{