From e0dbf3a784f6cb1a6e1c41a23123b19ec73b8708 Mon Sep 17 00:00:00 2001
From: Deepak Goyal <dgoyal@nvidia.com>
Date: Thu, 18 Jan 2018 11:44:47 +0530
Subject: gpu: nvgpu: gv11b: Enable perfmon.

t19x PMU ucode uses RPC mechanism for
PERFMON commands.

- Declared  "pmu_init_perfmon",
  "pmu_perfmon_start_sampling",
  "pmu_perfmon_stop_sampling" and
  "pmu_perfmon_get_samples" in pmu ops
  to differenciate for chips using RPC & legacy
  cmd/msg mechanism.
- Defined and used PERFMON RPC commands for t19x
  	- INIT
	- START
	- STOP
	- QUERY
- Adds RPC handler for PERFMON RPC commands.
- For guerying GPU utilization/load, we need to send PERFMON_QUERY
  RPC command for gv11b.
- Enables perfmon for gv11b.

Bug 2039013

Change-Id: Ic32326f81d48f11bc772afb8fee2dee6e427a699
Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1614114
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/debug_pmu.c         |   4 +-
 .../gpu/nvgpu/common/linux/platform_gv11b_tegra.c  |   1 +
 .../nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c |   3 +
 .../nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c |   4 +
 drivers/gpu/nvgpu/common/pmu/pmu_ipc.c             |  35 +++++-
 drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c         | 136 ++++++++++++++++++++-
 drivers/gpu/nvgpu/gk20a/gk20a.h                    |   4 +
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c                |   3 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c                |   3 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c                |   3 +
 drivers/gpu/nvgpu/gv100/hal_gv100.c                |   3 +
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c                |   4 +
 drivers/gpu/nvgpu/include/nvgpu/pmu.h              |   7 ++
 .../gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h |  84 +++++++++++++
 14 files changed, 286 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
index ec997e28..a8a8870e 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -352,11 +352,11 @@ static ssize_t perfmon_events_enable_write(struct file *file,
 		if (val && !g->pmu.perfmon_sampling_enabled &&
 				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
 			g->pmu.perfmon_sampling_enabled = true;
-			nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
+			g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
 		} else if (!val && g->pmu.perfmon_sampling_enabled &&
 				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
 			g->pmu.perfmon_sampling_enabled = false;
-			nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
+			g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
 		}
 		gk20a_idle(g);
 	} else {
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
index d972b88a..a452896f 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
@@ -208,6 +208,7 @@ struct gk20a_platform t19x_gpu_tegra_platform = {
 	.can_slcg               = false,
 	.can_blcg               = false,
 	.can_elcg               = false,
+	.enable_perfmon         = true,
 
 	/* power management callbacks */
 	.suspend = gv11b_tegra_suspend,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
index d5fd5102..3f99eb9e 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -417,6 +417,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.pmu_mutex_release = gk20a_pmu_mutex_release,
 		.write_dmatrfbase = gp10b_write_dmatrfbase,
 		.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
 		.pmu_pg_init_param = gp10b_pg_gr_init,
 		.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
 		.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
index 132ce6e5..c8752f91 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -461,6 +461,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.pmu_mutex_release = gk20a_pmu_mutex_release,
 		.write_dmatrfbase = gp10b_write_dmatrfbase,
 		.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
+		.pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
 		.pmu_pg_init_param = gv11b_pg_gr_init,
 		.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
 		.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
index 829fee19..2811a4b0 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
@@ -925,8 +925,9 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
 		nvgpu_pmu_process_init_msg(pmu, &msg);
 		if (g->ops.pmu.init_wpr_region != NULL)
 			g->ops.pmu.init_wpr_region(g);
+
 		if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
-			nvgpu_pmu_init_perfmon(pmu);
+			g->ops.pmu.pmu_init_perfmon(pmu);
 
 		return 0;
 	}
@@ -978,6 +979,8 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
 		void *param, u32 handle, u32 status)
 {
 	struct nv_pmu_rpc_header rpc;
+	struct nvgpu_pmu *pmu = &g->pmu;
+        struct nv_pmu_rpc_struct_perfmon_query *rpc_param;
 
 	memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header));
 	if (param)
@@ -990,10 +993,36 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
 	}
 
 	switch (msg->hdr.unit_id) {
+	case PMU_UNIT_PERFMON_T18X:
+	case PMU_UNIT_PERFMON:
+		switch (rpc.function) {
+		case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
+			nvgpu_pmu_dbg(g,
+				"reply NV_PMU_RPC_ID_PERFMON_INIT");
+			pmu->perfmon_ready = 1;
+			break;
+		case NV_PMU_RPC_ID_PERFMON_T18X_START:
+			nvgpu_pmu_dbg(g,
+				"reply NV_PMU_RPC_ID_PERFMON_START");
+			break;
+		case NV_PMU_RPC_ID_PERFMON_T18X_STOP:
+			nvgpu_pmu_dbg(g,
+				"reply NV_PMU_RPC_ID_PERFMON_STOP");
+			break;
+		case NV_PMU_RPC_ID_PERFMON_T18X_QUERY:
+			nvgpu_pmu_dbg(g,
+				"reply NV_PMU_RPC_ID_PERFMON_QUERY");
+			rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)param;
+			pmu->load = rpc_param->sample_buffer[0];
+			pmu->perfmon_query = 1;
+			/* set perfmon_query to 1 after load is copied */
+			break;
+		}
+		break;
 		/* TBD case will be added */
 	default:
-			nvgpu_err(g, " Invalid RPC response, stats 0x%x",
-				rpc.flcn_status);
+		nvgpu_err(g, " Invalid RPC response, stats 0x%x",
+			rpc.flcn_status);
 		break;
 	}
 
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
index 2b952868..25d81b60 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -221,11 +221,18 @@ int nvgpu_pmu_load_update(struct gk20a *g)
 
 	if (!pmu->perfmon_ready) {
 		pmu->load_shadow = 0;
+		pmu->load = 0;
 		return 0;
 	}
 
-	nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
-		(u8 *)&load, 2, 0);
+	if (g->ops.pmu.pmu_perfmon_get_samples_rpc) {
+		nvgpu_pmu_perfmon_get_samples_rpc(pmu);
+		load = pmu->load;
+	} else {
+		nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
+		(u8 *)&load, 2 * 1, 0);
+	}
+
 	pmu->load_shadow = load / 10;
 	pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
 
@@ -288,6 +295,129 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 
 	/* restart sampling */
 	if (pmu->perfmon_sampling_enabled)
-		return nvgpu_pmu_perfmon_start_sampling(pmu);
+		return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
+
 	return 0;
 }
+
+/* Perfmon RPC */
+int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct nv_pmu_rpc_struct_perfmon_init rpc;
+	int status = 0;
+
+	if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+		return 0;
+
+	nvgpu_log_fn(g, " ");
+
+	memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
+	pmu->perfmon_ready = 0;
+
+	gk20a_pmu_init_perfmon_counter(g);
+
+	/* microseconds interval between pmu polls perf counters */
+	rpc.sample_periodus = 16700;
+	/* number of sample periods below lower threshold
+	 * before pmu triggers perfmon decrease event
+	 */
+	rpc.to_decrease_count = 15;
+	/* index of base counter, aka. always ticking counter */
+	rpc.base_counter_id = 6;
+	/* moving average window for sample periods */
+	rpc.samples_in_moving_avg = 17;
+	/* number of perfmon counters
+	 * counter #3 (GR and CE2) for gk20a
+	 */
+	rpc.num_counters = 1;
+
+	memset(rpc.counter, 0, sizeof(struct pmu_perfmon_counter_v3) *
+		NV_PMU_PERFMON_MAX_COUNTERS);
+	/* Counter used to count GR busy cycles */
+	rpc.counter[0].index = 3;
+
+	nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_INIT");
+	PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, INIT, &rpc, 0);
+	if (status) {
+		nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
+		goto exit;
+	}
+
+exit:
+	return 0;
+}
+
+int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct nv_pmu_rpc_struct_perfmon_start rpc;
+	int status = 0;
+
+	if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+		return 0;
+
+	nvgpu_log_fn(g, " ");
+
+	memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
+	rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
+	rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
+	rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
+				PMU_PERFMON_FLAG_ENABLE_DECREASE |
+				PMU_PERFMON_FLAG_CLEAR_PREV;
+
+	rpc.counter[0].upper_threshold = 3000;
+	rpc.counter[0].lower_threshold = 1000;
+
+	nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_START\n");
+	PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, START, &rpc, 0);
+	if (status)
+		nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
+
+	return status;
+}
+
+int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct nv_pmu_rpc_struct_perfmon_stop rpc;
+	int status = 0;
+
+	if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+		return 0;
+
+	nvgpu_log_fn(g, " ");
+
+	memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_stop));
+	/* PERFMON Stop */
+	nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_STOP\n");
+	PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, STOP, &rpc, 0);
+	if (status)
+		nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
+
+	return status;
+}
+
+int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct nv_pmu_rpc_struct_perfmon_query rpc;
+	int status = 0;
+
+	if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+		return 0;
+
+	nvgpu_log_fn(g, " ");
+	pmu->perfmon_query = 0;
+	memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
+	/* PERFMON QUERY */
+	nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
+	PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, QUERY, &rpc, 0);
+	if (status)
+		nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
+
+	pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
+				      &pmu->perfmon_query, 1);
+
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 685976b1..0ce3b50d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -836,6 +836,10 @@ struct gpu_ops {
 		int (*prepare_ucode)(struct gk20a *g);
 		int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
 		int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu);
+		int (*pmu_init_perfmon)(struct nvgpu_pmu *pmu);
+		int (*pmu_perfmon_start_sampling)(struct nvgpu_pmu *pmu);
+		int (*pmu_perfmon_stop_sampling)(struct nvgpu_pmu *pmu);
+		int (*pmu_perfmon_get_samples_rpc)(struct nvgpu_pmu *pmu);
 		int (*pmu_setup_elpg)(struct gk20a *g);
 		u32 (*pmu_get_queue_head)(u32 i);
 		u32 (*pmu_get_queue_head_size)(void);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 3ee22ed1..8a5c1278 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -483,6 +483,9 @@ static const struct gpu_ops gm20b_ops = {
 		.pmu_mutex_release = gk20a_pmu_mutex_release,
 		.write_dmatrfbase = gm20b_write_dmatrfbase,
 		.pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
 		.pmu_pg_init_param = NULL,
 		.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
 		.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 3073668e..d0458420 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -583,6 +583,9 @@ static const struct gpu_ops gp106_ops = {
 		.is_pmu_supported = gp106_is_pmu_supported,
 		.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
 		.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
 		.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
 		.pmu_is_lpwr_feature_supported =
 			gp106_pmu_is_lpwr_feature_supported,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 7041c5bd..5e9096e2 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -544,6 +544,9 @@ static const struct gpu_ops gp10b_ops = {
 		.pmu_mutex_release = gk20a_pmu_mutex_release,
 		.write_dmatrfbase = gp10b_write_dmatrfbase,
 		.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
 		.pmu_pg_init_param = gp10b_pg_gr_init,
 		.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
 		.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 95d1f076..dbf32b20 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -591,6 +591,9 @@ static const struct gpu_ops gv100_ops = {
 		.is_pmu_supported = gp106_is_pmu_supported,
 		.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
 		.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
 		.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
 		.pmu_is_lpwr_feature_supported =
 			gp106_pmu_is_lpwr_feature_supported,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 0a552f5b..80ed29b8 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -606,6 +606,10 @@ static const struct gpu_ops gv11b_ops = {
 		.pmu_mutex_release = gk20a_pmu_mutex_release,
 		.write_dmatrfbase = gp10b_write_dmatrfbase,
 		.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
+		.pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
+		.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
+		.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
+		.pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
 		.pmu_pg_init_param = gv11b_pg_gr_init,
 		.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
 		.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index cd7e1879..5e9983b0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -329,6 +329,8 @@ struct nvgpu_pmu {
 	u32 *ucode_image;
 	bool pmu_ready;
 
+	u32 perfmon_query;
+
 	u32 zbc_save_done;
 
 	u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE];
@@ -362,6 +364,7 @@ struct nvgpu_pmu {
 	u32 sample_buffer;
 	u32 load_shadow;
 	u32 load_avg;
+	u32 load;
 
 	struct nvgpu_mutex isr_mutex;
 	bool isr_enabled;
@@ -432,8 +435,12 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
+int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
+int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
+int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 	struct pmu_perfmon_msg *msg);
+int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
 int nvgpu_pmu_load_update(struct gk20a *g);
 void nvgpu_pmu_reset_load_counters(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h
index f8c15324..bcf4c8b6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h
@@ -32,6 +32,8 @@
 #define PMU_PERFMON_FLAG_ENABLE_DECREASE	(0x00000002)
 #define PMU_PERFMON_FLAG_CLEAR_PREV		(0x00000004)
 
+#define NV_PMU_PERFMON_MAX_COUNTERS     10
+
 enum pmu_perfmon_cmd_start_fields {
 	COUNTER_ALLOC
 };
@@ -61,6 +63,15 @@ struct pmu_perfmon_counter_v2 {
 	u32 scale;
 };
 
+struct pmu_perfmon_counter_v3 {
+	u8 index;
+	u8 group_id;
+	u16 flags;
+	u16 upper_threshold; /* units of 0.01% */
+	u16 lower_threshold; /* units of 0.01% */
+	u32 scale;
+};
+
 struct pmu_perfmon_cmd_start_v3 {
 	u8 cmd_type;
 	u8 group_id;
@@ -184,4 +195,77 @@ struct pmu_perfmon_msg {
 	};
 };
 
+/* PFERMON RPC interface*/
+/*
+ * RPC calls serviced by PERFMON unit.
+ */
+#define NV_PMU_RPC_ID_PERFMON_T18X_INIT                 0x00
+#define NV_PMU_RPC_ID_PERFMON_T18X_DEINIT               0x01
+#define NV_PMU_RPC_ID_PERFMON_T18X_START                0x02
+#define NV_PMU_RPC_ID_PERFMON_T18X_STOP                 0x03
+#define NV_PMU_RPC_ID_PERFMON_T18X_QUERY                0x04
+#define NV_PMU_RPC_ID_PERFMON_T18X__COUNT               0x05
+
+/*
+ * structure that holds data used to
+ * execute Perfmon INIT RPC.
+ * hdr - RPC header
+ * sample_periodus - Desired period in between samples.
+ * to_decrease_count - Consecutive samples before decrease event.
+ * base_counter_id - Index of the base counter.
+ * samples_in_moving_avg - Number of values in moving average.
+ * num_counters - Num of counters PMU should use.
+ * counter - Counters.
+ */
+struct nv_pmu_rpc_struct_perfmon_init {
+	struct nv_pmu_rpc_header hdr;
+	u32 sample_periodus;
+	u8 to_decrease_count;
+	u8 base_counter_id;
+	u8 samples_in_moving_avg;
+	u8 num_counters;
+	struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
+	u32 scratch[1];
+};
+
+/*
+ * structure that holds data used to
+ * execute Perfmon START RPC.
+ * hdr - RPC header
+ * group_id - NV group ID
+ * state_id - NV state ID
+ * flags - PMU_PERFON flags
+ * counters - Counters.
+ */
+struct nv_pmu_rpc_struct_perfmon_start {
+	struct nv_pmu_rpc_header hdr;
+	u8 group_id;
+	u8 state_id;
+	u8 flags;
+	struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
+	u32 scratch[1];
+};
+
+/*
+ * structure that holds data used to
+ * execute Perfmon STOP RPC.
+ * hdr - RPC header
+ */
+struct nv_pmu_rpc_struct_perfmon_stop {
+	struct nv_pmu_rpc_header hdr;
+	u32 scratch[1];
+};
+
+/*
+ * structure that holds data used to
+ * execute QUERY RPC.
+ * hdr - RPC header
+ * sample_buffer - Output buffer from pmu containing utilization samples.
+ */
+struct nv_pmu_rpc_struct_perfmon_query {
+	struct nv_pmu_rpc_header hdr;
+	u16 sample_buffer[NV_PMU_PERFMON_MAX_COUNTERS];
+	u32 scratch[1];
+};
+
 #endif /* _GPMUIFPERFMON_H_ */
-- 
cgit v1.2.2