summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Goyal <dgoyal@nvidia.com>2018-01-18 01:14:47 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-19 02:40:02 -0500
commite0dbf3a784f6cb1a6e1c41a23123b19ec73b8708 (patch)
treed089cf84f16980034b82c53f2913bcdda452151d
parenta57258e9b18f2f336457165391572bc477371e94 (diff)
gpu: nvgpu: gv11b: Enable perfmon.
t19x PMU ucode uses RPC mechanism for PERFMON commands. - Declared "pmu_init_perfmon", "pmu_perfmon_start_sampling", "pmu_perfmon_stop_sampling" and "pmu_perfmon_get_samples" in pmu ops to differenciate for chips using RPC & legacy cmd/msg mechanism. - Defined and used PERFMON RPC commands for t19x - INIT - START - STOP - QUERY - Adds RPC handler for PERFMON RPC commands. - For guerying GPU utilization/load, we need to send PERFMON_QUERY RPC command for gv11b. - Enables perfmon for gv11b. Bug 2039013 Change-Id: Ic32326f81d48f11bc772afb8fee2dee6e427a699 Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1614114 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.c4
-rw-r--r--drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c1
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c4
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu_ipc.c35
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c136
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c3
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c3
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c3
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c3
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/pmu.h7
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h84
14 files changed, 286 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
index ec997e28..a8a8870e 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -352,11 +352,11 @@ static ssize_t perfmon_events_enable_write(struct file *file,
352 if (val && !g->pmu.perfmon_sampling_enabled && 352 if (val && !g->pmu.perfmon_sampling_enabled &&
353 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { 353 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
354 g->pmu.perfmon_sampling_enabled = true; 354 g->pmu.perfmon_sampling_enabled = true;
355 nvgpu_pmu_perfmon_start_sampling(&(g->pmu)); 355 g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
356 } else if (!val && g->pmu.perfmon_sampling_enabled && 356 } else if (!val && g->pmu.perfmon_sampling_enabled &&
357 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { 357 nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
358 g->pmu.perfmon_sampling_enabled = false; 358 g->pmu.perfmon_sampling_enabled = false;
359 nvgpu_pmu_perfmon_stop_sampling(&(g->pmu)); 359 g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
360 } 360 }
361 gk20a_idle(g); 361 gk20a_idle(g);
362 } else { 362 } else {
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
index d972b88a..a452896f 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
@@ -208,6 +208,7 @@ struct gk20a_platform t19x_gpu_tegra_platform = {
208 .can_slcg = false, 208 .can_slcg = false,
209 .can_blcg = false, 209 .can_blcg = false,
210 .can_elcg = false, 210 .can_elcg = false,
211 .enable_perfmon = true,
211 212
212 /* power management callbacks */ 213 /* power management callbacks */
213 .suspend = gv11b_tegra_suspend, 214 .suspend = gv11b_tegra_suspend,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
index d5fd5102..3f99eb9e 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -417,6 +417,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
417 .pmu_mutex_release = gk20a_pmu_mutex_release, 417 .pmu_mutex_release = gk20a_pmu_mutex_release,
418 .write_dmatrfbase = gp10b_write_dmatrfbase, 418 .write_dmatrfbase = gp10b_write_dmatrfbase,
419 .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, 419 .pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
420 .pmu_init_perfmon = nvgpu_pmu_init_perfmon,
421 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
422 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
420 .pmu_pg_init_param = gp10b_pg_gr_init, 423 .pmu_pg_init_param = gp10b_pg_gr_init,
421 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, 424 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
422 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, 425 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
index 132ce6e5..c8752f91 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -461,6 +461,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
461 .pmu_mutex_release = gk20a_pmu_mutex_release, 461 .pmu_mutex_release = gk20a_pmu_mutex_release,
462 .write_dmatrfbase = gp10b_write_dmatrfbase, 462 .write_dmatrfbase = gp10b_write_dmatrfbase,
463 .pmu_elpg_statistics = gp106_pmu_elpg_statistics, 463 .pmu_elpg_statistics = gp106_pmu_elpg_statistics,
464 .pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
465 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
466 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
467 .pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
464 .pmu_pg_init_param = gv11b_pg_gr_init, 468 .pmu_pg_init_param = gv11b_pg_gr_init,
465 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, 469 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
466 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, 470 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
index 829fee19..2811a4b0 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
@@ -925,8 +925,9 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
925 nvgpu_pmu_process_init_msg(pmu, &msg); 925 nvgpu_pmu_process_init_msg(pmu, &msg);
926 if (g->ops.pmu.init_wpr_region != NULL) 926 if (g->ops.pmu.init_wpr_region != NULL)
927 g->ops.pmu.init_wpr_region(g); 927 g->ops.pmu.init_wpr_region(g);
928
928 if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) 929 if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
929 nvgpu_pmu_init_perfmon(pmu); 930 g->ops.pmu.pmu_init_perfmon(pmu);
930 931
931 return 0; 932 return 0;
932 } 933 }
@@ -978,6 +979,8 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
978 void *param, u32 handle, u32 status) 979 void *param, u32 handle, u32 status)
979{ 980{
980 struct nv_pmu_rpc_header rpc; 981 struct nv_pmu_rpc_header rpc;
982 struct nvgpu_pmu *pmu = &g->pmu;
983 struct nv_pmu_rpc_struct_perfmon_query *rpc_param;
981 984
982 memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header)); 985 memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header));
983 if (param) 986 if (param)
@@ -990,10 +993,36 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
990 } 993 }
991 994
992 switch (msg->hdr.unit_id) { 995 switch (msg->hdr.unit_id) {
996 case PMU_UNIT_PERFMON_T18X:
997 case PMU_UNIT_PERFMON:
998 switch (rpc.function) {
999 case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
1000 nvgpu_pmu_dbg(g,
1001 "reply NV_PMU_RPC_ID_PERFMON_INIT");
1002 pmu->perfmon_ready = 1;
1003 break;
1004 case NV_PMU_RPC_ID_PERFMON_T18X_START:
1005 nvgpu_pmu_dbg(g,
1006 "reply NV_PMU_RPC_ID_PERFMON_START");
1007 break;
1008 case NV_PMU_RPC_ID_PERFMON_T18X_STOP:
1009 nvgpu_pmu_dbg(g,
1010 "reply NV_PMU_RPC_ID_PERFMON_STOP");
1011 break;
1012 case NV_PMU_RPC_ID_PERFMON_T18X_QUERY:
1013 nvgpu_pmu_dbg(g,
1014 "reply NV_PMU_RPC_ID_PERFMON_QUERY");
1015 rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)param;
1016 pmu->load = rpc_param->sample_buffer[0];
1017 pmu->perfmon_query = 1;
1018 /* set perfmon_query to 1 after load is copied */
1019 break;
1020 }
1021 break;
993 /* TBD case will be added */ 1022 /* TBD case will be added */
994 default: 1023 default:
995 nvgpu_err(g, " Invalid RPC response, stats 0x%x", 1024 nvgpu_err(g, " Invalid RPC response, stats 0x%x",
996 rpc.flcn_status); 1025 rpc.flcn_status);
997 break; 1026 break;
998 } 1027 }
999 1028
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
index 2b952868..25d81b60 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -221,11 +221,18 @@ int nvgpu_pmu_load_update(struct gk20a *g)
221 221
222 if (!pmu->perfmon_ready) { 222 if (!pmu->perfmon_ready) {
223 pmu->load_shadow = 0; 223 pmu->load_shadow = 0;
224 pmu->load = 0;
224 return 0; 225 return 0;
225 } 226 }
226 227
227 nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer, 228 if (g->ops.pmu.pmu_perfmon_get_samples_rpc) {
228 (u8 *)&load, 2, 0); 229 nvgpu_pmu_perfmon_get_samples_rpc(pmu);
230 load = pmu->load;
231 } else {
232 nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
233 (u8 *)&load, 2 * 1, 0);
234 }
235
229 pmu->load_shadow = load / 10; 236 pmu->load_shadow = load / 10;
230 pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); 237 pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
231 238
@@ -288,6 +295,129 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
288 295
289 /* restart sampling */ 296 /* restart sampling */
290 if (pmu->perfmon_sampling_enabled) 297 if (pmu->perfmon_sampling_enabled)
291 return nvgpu_pmu_perfmon_start_sampling(pmu); 298 return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
299
292 return 0; 300 return 0;
293} 301}
302
303/* Perfmon RPC */
304int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
305{
306 struct gk20a *g = gk20a_from_pmu(pmu);
307 struct nv_pmu_rpc_struct_perfmon_init rpc;
308 int status = 0;
309
310 if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
311 return 0;
312
313 nvgpu_log_fn(g, " ");
314
315 memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
316 pmu->perfmon_ready = 0;
317
318 gk20a_pmu_init_perfmon_counter(g);
319
320 /* microseconds interval between pmu polls perf counters */
321 rpc.sample_periodus = 16700;
322 /* number of sample periods below lower threshold
323 * before pmu triggers perfmon decrease event
324 */
325 rpc.to_decrease_count = 15;
326 /* index of base counter, aka. always ticking counter */
327 rpc.base_counter_id = 6;
328 /* moving average window for sample periods */
329 rpc.samples_in_moving_avg = 17;
330 /* number of perfmon counters
331 * counter #3 (GR and CE2) for gk20a
332 */
333 rpc.num_counters = 1;
334
335 memset(rpc.counter, 0, sizeof(struct pmu_perfmon_counter_v3) *
336 NV_PMU_PERFMON_MAX_COUNTERS);
337 /* Counter used to count GR busy cycles */
338 rpc.counter[0].index = 3;
339
340 nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_INIT");
341 PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, INIT, &rpc, 0);
342 if (status) {
343 nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
344 goto exit;
345 }
346
347exit:
348 return 0;
349}
350
351int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
352{
353 struct gk20a *g = gk20a_from_pmu(pmu);
354 struct nv_pmu_rpc_struct_perfmon_start rpc;
355 int status = 0;
356
357 if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
358 return 0;
359
360 nvgpu_log_fn(g, " ");
361
362 memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
363 rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
364 rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
365 rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
366 PMU_PERFMON_FLAG_ENABLE_DECREASE |
367 PMU_PERFMON_FLAG_CLEAR_PREV;
368
369 rpc.counter[0].upper_threshold = 3000;
370 rpc.counter[0].lower_threshold = 1000;
371
372 nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_START\n");
373 PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, START, &rpc, 0);
374 if (status)
375 nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
376
377 return status;
378}
379
380int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
381{
382 struct gk20a *g = gk20a_from_pmu(pmu);
383 struct nv_pmu_rpc_struct_perfmon_stop rpc;
384 int status = 0;
385
386 if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
387 return 0;
388
389 nvgpu_log_fn(g, " ");
390
391 memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_stop));
392 /* PERFMON Stop */
393 nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_STOP\n");
394 PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, STOP, &rpc, 0);
395 if (status)
396 nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
397
398 return status;
399}
400
401int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
402{
403 struct gk20a *g = gk20a_from_pmu(pmu);
404 struct nv_pmu_rpc_struct_perfmon_query rpc;
405 int status = 0;
406
407 if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
408 return 0;
409
410 nvgpu_log_fn(g, " ");
411 pmu->perfmon_query = 0;
412 memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
413 /* PERFMON QUERY */
414 nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
415 PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, QUERY, &rpc, 0);
416 if (status)
417 nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
418
419 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
420 &pmu->perfmon_query, 1);
421
422 return status;
423}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 685976b1..0ce3b50d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -836,6 +836,10 @@ struct gpu_ops {
836 int (*prepare_ucode)(struct gk20a *g); 836 int (*prepare_ucode)(struct gk20a *g);
837 int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g); 837 int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
838 int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu); 838 int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu);
839 int (*pmu_init_perfmon)(struct nvgpu_pmu *pmu);
840 int (*pmu_perfmon_start_sampling)(struct nvgpu_pmu *pmu);
841 int (*pmu_perfmon_stop_sampling)(struct nvgpu_pmu *pmu);
842 int (*pmu_perfmon_get_samples_rpc)(struct nvgpu_pmu *pmu);
839 int (*pmu_setup_elpg)(struct gk20a *g); 843 int (*pmu_setup_elpg)(struct gk20a *g);
840 u32 (*pmu_get_queue_head)(u32 i); 844 u32 (*pmu_get_queue_head)(u32 i);
841 u32 (*pmu_get_queue_head_size)(void); 845 u32 (*pmu_get_queue_head_size)(void);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 3ee22ed1..8a5c1278 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -483,6 +483,9 @@ static const struct gpu_ops gm20b_ops = {
483 .pmu_mutex_release = gk20a_pmu_mutex_release, 483 .pmu_mutex_release = gk20a_pmu_mutex_release,
484 .write_dmatrfbase = gm20b_write_dmatrfbase, 484 .write_dmatrfbase = gm20b_write_dmatrfbase,
485 .pmu_elpg_statistics = gk20a_pmu_elpg_statistics, 485 .pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
486 .pmu_init_perfmon = nvgpu_pmu_init_perfmon,
487 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
488 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
486 .pmu_pg_init_param = NULL, 489 .pmu_pg_init_param = NULL,
487 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, 490 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
488 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, 491 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 3073668e..d0458420 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -583,6 +583,9 @@ static const struct gpu_ops gp106_ops = {
583 .is_pmu_supported = gp106_is_pmu_supported, 583 .is_pmu_supported = gp106_is_pmu_supported,
584 .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list, 584 .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
585 .pmu_elpg_statistics = gp106_pmu_elpg_statistics, 585 .pmu_elpg_statistics = gp106_pmu_elpg_statistics,
586 .pmu_init_perfmon = nvgpu_pmu_init_perfmon,
587 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
588 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
586 .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, 589 .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
587 .pmu_is_lpwr_feature_supported = 590 .pmu_is_lpwr_feature_supported =
588 gp106_pmu_is_lpwr_feature_supported, 591 gp106_pmu_is_lpwr_feature_supported,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 7041c5bd..5e9096e2 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -544,6 +544,9 @@ static const struct gpu_ops gp10b_ops = {
544 .pmu_mutex_release = gk20a_pmu_mutex_release, 544 .pmu_mutex_release = gk20a_pmu_mutex_release,
545 .write_dmatrfbase = gp10b_write_dmatrfbase, 545 .write_dmatrfbase = gp10b_write_dmatrfbase,
546 .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, 546 .pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
547 .pmu_init_perfmon = nvgpu_pmu_init_perfmon,
548 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
549 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
547 .pmu_pg_init_param = gp10b_pg_gr_init, 550 .pmu_pg_init_param = gp10b_pg_gr_init,
548 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, 551 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
549 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, 552 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 95d1f076..dbf32b20 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -591,6 +591,9 @@ static const struct gpu_ops gv100_ops = {
591 .is_pmu_supported = gp106_is_pmu_supported, 591 .is_pmu_supported = gp106_is_pmu_supported,
592 .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list, 592 .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
593 .pmu_elpg_statistics = gp106_pmu_elpg_statistics, 593 .pmu_elpg_statistics = gp106_pmu_elpg_statistics,
594 .pmu_init_perfmon = nvgpu_pmu_init_perfmon,
595 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
596 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
594 .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, 597 .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
595 .pmu_is_lpwr_feature_supported = 598 .pmu_is_lpwr_feature_supported =
596 gp106_pmu_is_lpwr_feature_supported, 599 gp106_pmu_is_lpwr_feature_supported,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 0a552f5b..80ed29b8 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -606,6 +606,10 @@ static const struct gpu_ops gv11b_ops = {
606 .pmu_mutex_release = gk20a_pmu_mutex_release, 606 .pmu_mutex_release = gk20a_pmu_mutex_release,
607 .write_dmatrfbase = gp10b_write_dmatrfbase, 607 .write_dmatrfbase = gp10b_write_dmatrfbase,
608 .pmu_elpg_statistics = gp106_pmu_elpg_statistics, 608 .pmu_elpg_statistics = gp106_pmu_elpg_statistics,
609 .pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
610 .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
611 .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
612 .pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
609 .pmu_pg_init_param = gv11b_pg_gr_init, 613 .pmu_pg_init_param = gv11b_pg_gr_init,
610 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, 614 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
611 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, 615 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index cd7e1879..5e9983b0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -329,6 +329,8 @@ struct nvgpu_pmu {
329 u32 *ucode_image; 329 u32 *ucode_image;
330 bool pmu_ready; 330 bool pmu_ready;
331 331
332 u32 perfmon_query;
333
332 u32 zbc_save_done; 334 u32 zbc_save_done;
333 335
334 u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE]; 336 u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE];
@@ -362,6 +364,7 @@ struct nvgpu_pmu {
362 u32 sample_buffer; 364 u32 sample_buffer;
363 u32 load_shadow; 365 u32 load_shadow;
364 u32 load_avg; 366 u32 load_avg;
367 u32 load;
365 368
366 struct nvgpu_mutex isr_mutex; 369 struct nvgpu_mutex isr_mutex;
367 bool isr_enabled; 370 bool isr_enabled;
@@ -432,8 +435,12 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu);
432int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); 435int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
433int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); 436int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
434int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); 437int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
438int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
439int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
440int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
435int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, 441int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
436 struct pmu_perfmon_msg *msg); 442 struct pmu_perfmon_msg *msg);
443int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
437int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); 444int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
438int nvgpu_pmu_load_update(struct gk20a *g); 445int nvgpu_pmu_load_update(struct gk20a *g);
439void nvgpu_pmu_reset_load_counters(struct gk20a *g); 446void nvgpu_pmu_reset_load_counters(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h
index f8c15324..bcf4c8b6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h
@@ -32,6 +32,8 @@
32#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002) 32#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002)
33#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004) 33#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004)
34 34
35#define NV_PMU_PERFMON_MAX_COUNTERS 10
36
35enum pmu_perfmon_cmd_start_fields { 37enum pmu_perfmon_cmd_start_fields {
36 COUNTER_ALLOC 38 COUNTER_ALLOC
37}; 39};
@@ -61,6 +63,15 @@ struct pmu_perfmon_counter_v2 {
61 u32 scale; 63 u32 scale;
62}; 64};
63 65
66struct pmu_perfmon_counter_v3 {
67 u8 index;
68 u8 group_id;
69 u16 flags;
70 u16 upper_threshold; /* units of 0.01% */
71 u16 lower_threshold; /* units of 0.01% */
72 u32 scale;
73};
74
64struct pmu_perfmon_cmd_start_v3 { 75struct pmu_perfmon_cmd_start_v3 {
65 u8 cmd_type; 76 u8 cmd_type;
66 u8 group_id; 77 u8 group_id;
@@ -184,4 +195,77 @@ struct pmu_perfmon_msg {
184 }; 195 };
185}; 196};
186 197
198/* PFERMON RPC interface*/
199/*
200 * RPC calls serviced by PERFMON unit.
201 */
202#define NV_PMU_RPC_ID_PERFMON_T18X_INIT 0x00
203#define NV_PMU_RPC_ID_PERFMON_T18X_DEINIT 0x01
204#define NV_PMU_RPC_ID_PERFMON_T18X_START 0x02
205#define NV_PMU_RPC_ID_PERFMON_T18X_STOP 0x03
206#define NV_PMU_RPC_ID_PERFMON_T18X_QUERY 0x04
207#define NV_PMU_RPC_ID_PERFMON_T18X__COUNT 0x05
208
209/*
210 * structure that holds data used to
211 * execute Perfmon INIT RPC.
212 * hdr - RPC header
213 * sample_periodus - Desired period in between samples.
214 * to_decrease_count - Consecutive samples before decrease event.
215 * base_counter_id - Index of the base counter.
216 * samples_in_moving_avg - Number of values in moving average.
217 * num_counters - Num of counters PMU should use.
218 * counter - Counters.
219 */
220struct nv_pmu_rpc_struct_perfmon_init {
221 struct nv_pmu_rpc_header hdr;
222 u32 sample_periodus;
223 u8 to_decrease_count;
224 u8 base_counter_id;
225 u8 samples_in_moving_avg;
226 u8 num_counters;
227 struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
228 u32 scratch[1];
229};
230
231/*
232 * structure that holds data used to
233 * execute Perfmon START RPC.
234 * hdr - RPC header
235 * group_id - NV group ID
236 * state_id - NV state ID
237 * flags - PMU_PERFON flags
238 * counters - Counters.
239 */
240struct nv_pmu_rpc_struct_perfmon_start {
241 struct nv_pmu_rpc_header hdr;
242 u8 group_id;
243 u8 state_id;
244 u8 flags;
245 struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
246 u32 scratch[1];
247};
248
249/*
250 * structure that holds data used to
251 * execute Perfmon STOP RPC.
252 * hdr - RPC header
253 */
254struct nv_pmu_rpc_struct_perfmon_stop {
255 struct nv_pmu_rpc_header hdr;
256 u32 scratch[1];
257};
258
259/*
260 * structure that holds data used to
261 * execute QUERY RPC.
262 * hdr - RPC header
263 * sample_buffer - Output buffer from pmu containing utilization samples.
264 */
265struct nv_pmu_rpc_struct_perfmon_query {
266 struct nv_pmu_rpc_header hdr;
267 u16 sample_buffer[NV_PMU_PERFMON_MAX_COUNTERS];
268 u32 scratch[1];
269};
270
187#endif /* _GPMUIFPERFMON_H_ */ 271#endif /* _GPMUIFPERFMON_H_ */