diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug_pmu.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pmu/pmu_ipc.c | 35 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c | 136 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/pmu.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h | 84 |
14 files changed, 286 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c index ec997e28..a8a8870e 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c +++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c | |||
@@ -352,11 +352,11 @@ static ssize_t perfmon_events_enable_write(struct file *file, | |||
352 | if (val && !g->pmu.perfmon_sampling_enabled && | 352 | if (val && !g->pmu.perfmon_sampling_enabled && |
353 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | 353 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { |
354 | g->pmu.perfmon_sampling_enabled = true; | 354 | g->pmu.perfmon_sampling_enabled = true; |
355 | nvgpu_pmu_perfmon_start_sampling(&(g->pmu)); | 355 | g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); |
356 | } else if (!val && g->pmu.perfmon_sampling_enabled && | 356 | } else if (!val && g->pmu.perfmon_sampling_enabled && |
357 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | 357 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { |
358 | g->pmu.perfmon_sampling_enabled = false; | 358 | g->pmu.perfmon_sampling_enabled = false; |
359 | nvgpu_pmu_perfmon_stop_sampling(&(g->pmu)); | 359 | g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); |
360 | } | 360 | } |
361 | gk20a_idle(g); | 361 | gk20a_idle(g); |
362 | } else { | 362 | } else { |
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c index d972b88a..a452896f 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c | |||
@@ -208,6 +208,7 @@ struct gk20a_platform t19x_gpu_tegra_platform = { | |||
208 | .can_slcg = false, | 208 | .can_slcg = false, |
209 | .can_blcg = false, | 209 | .can_blcg = false, |
210 | .can_elcg = false, | 210 | .can_elcg = false, |
211 | .enable_perfmon = true, | ||
211 | 212 | ||
212 | /* power management callbacks */ | 213 | /* power management callbacks */ |
213 | .suspend = gv11b_tegra_suspend, | 214 | .suspend = gv11b_tegra_suspend, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c index d5fd5102..3f99eb9e 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | |||
@@ -417,6 +417,9 @@ static const struct gpu_ops vgpu_gp10b_ops = { | |||
417 | .pmu_mutex_release = gk20a_pmu_mutex_release, | 417 | .pmu_mutex_release = gk20a_pmu_mutex_release, |
418 | .write_dmatrfbase = gp10b_write_dmatrfbase, | 418 | .write_dmatrfbase = gp10b_write_dmatrfbase, |
419 | .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, | 419 | .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, |
420 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon, | ||
421 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling, | ||
422 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling, | ||
420 | .pmu_pg_init_param = gp10b_pg_gr_init, | 423 | .pmu_pg_init_param = gp10b_pg_gr_init, |
421 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, | 424 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, |
422 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, | 425 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c index 132ce6e5..c8752f91 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c | |||
@@ -461,6 +461,10 @@ static const struct gpu_ops vgpu_gv11b_ops = { | |||
461 | .pmu_mutex_release = gk20a_pmu_mutex_release, | 461 | .pmu_mutex_release = gk20a_pmu_mutex_release, |
462 | .write_dmatrfbase = gp10b_write_dmatrfbase, | 462 | .write_dmatrfbase = gp10b_write_dmatrfbase, |
463 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, | 463 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, |
464 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc, | ||
465 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc, | ||
466 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc, | ||
467 | .pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc, | ||
464 | .pmu_pg_init_param = gv11b_pg_gr_init, | 468 | .pmu_pg_init_param = gv11b_pg_gr_init, |
465 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, | 469 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, |
466 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, | 470 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, |
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c index 829fee19..2811a4b0 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c | |||
@@ -925,8 +925,9 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu) | |||
925 | nvgpu_pmu_process_init_msg(pmu, &msg); | 925 | nvgpu_pmu_process_init_msg(pmu, &msg); |
926 | if (g->ops.pmu.init_wpr_region != NULL) | 926 | if (g->ops.pmu.init_wpr_region != NULL) |
927 | g->ops.pmu.init_wpr_region(g); | 927 | g->ops.pmu.init_wpr_region(g); |
928 | |||
928 | if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) | 929 | if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) |
929 | nvgpu_pmu_init_perfmon(pmu); | 930 | g->ops.pmu.pmu_init_perfmon(pmu); |
930 | 931 | ||
931 | return 0; | 932 | return 0; |
932 | } | 933 | } |
@@ -978,6 +979,8 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg, | |||
978 | void *param, u32 handle, u32 status) | 979 | void *param, u32 handle, u32 status) |
979 | { | 980 | { |
980 | struct nv_pmu_rpc_header rpc; | 981 | struct nv_pmu_rpc_header rpc; |
982 | struct nvgpu_pmu *pmu = &g->pmu; | ||
983 | struct nv_pmu_rpc_struct_perfmon_query *rpc_param; | ||
981 | 984 | ||
982 | memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header)); | 985 | memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header)); |
983 | if (param) | 986 | if (param) |
@@ -990,10 +993,36 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg, | |||
990 | } | 993 | } |
991 | 994 | ||
992 | switch (msg->hdr.unit_id) { | 995 | switch (msg->hdr.unit_id) { |
996 | case PMU_UNIT_PERFMON_T18X: | ||
997 | case PMU_UNIT_PERFMON: | ||
998 | switch (rpc.function) { | ||
999 | case NV_PMU_RPC_ID_PERFMON_T18X_INIT: | ||
1000 | nvgpu_pmu_dbg(g, | ||
1001 | "reply NV_PMU_RPC_ID_PERFMON_INIT"); | ||
1002 | pmu->perfmon_ready = 1; | ||
1003 | break; | ||
1004 | case NV_PMU_RPC_ID_PERFMON_T18X_START: | ||
1005 | nvgpu_pmu_dbg(g, | ||
1006 | "reply NV_PMU_RPC_ID_PERFMON_START"); | ||
1007 | break; | ||
1008 | case NV_PMU_RPC_ID_PERFMON_T18X_STOP: | ||
1009 | nvgpu_pmu_dbg(g, | ||
1010 | "reply NV_PMU_RPC_ID_PERFMON_STOP"); | ||
1011 | break; | ||
1012 | case NV_PMU_RPC_ID_PERFMON_T18X_QUERY: | ||
1013 | nvgpu_pmu_dbg(g, | ||
1014 | "reply NV_PMU_RPC_ID_PERFMON_QUERY"); | ||
1015 | rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)param; | ||
1016 | pmu->load = rpc_param->sample_buffer[0]; | ||
1017 | pmu->perfmon_query = 1; | ||
1018 | /* set perfmon_query to 1 after load is copied */ | ||
1019 | break; | ||
1020 | } | ||
1021 | break; | ||
993 | /* TBD case will be added */ | 1022 | /* TBD case will be added */ |
994 | default: | 1023 | default: |
995 | nvgpu_err(g, " Invalid RPC response, stats 0x%x", | 1024 | nvgpu_err(g, " Invalid RPC response, stats 0x%x", |
996 | rpc.flcn_status); | 1025 | rpc.flcn_status); |
997 | break; | 1026 | break; |
998 | } | 1027 | } |
999 | 1028 | ||
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c index 2b952868..25d81b60 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c | |||
@@ -221,11 +221,18 @@ int nvgpu_pmu_load_update(struct gk20a *g) | |||
221 | 221 | ||
222 | if (!pmu->perfmon_ready) { | 222 | if (!pmu->perfmon_ready) { |
223 | pmu->load_shadow = 0; | 223 | pmu->load_shadow = 0; |
224 | pmu->load = 0; | ||
224 | return 0; | 225 | return 0; |
225 | } | 226 | } |
226 | 227 | ||
227 | nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer, | 228 | if (g->ops.pmu.pmu_perfmon_get_samples_rpc) { |
228 | (u8 *)&load, 2, 0); | 229 | nvgpu_pmu_perfmon_get_samples_rpc(pmu); |
230 | load = pmu->load; | ||
231 | } else { | ||
232 | nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer, | ||
233 | (u8 *)&load, 2 * 1, 0); | ||
234 | } | ||
235 | |||
229 | pmu->load_shadow = load / 10; | 236 | pmu->load_shadow = load / 10; |
230 | pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); | 237 | pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); |
231 | 238 | ||
@@ -288,6 +295,129 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, | |||
288 | 295 | ||
289 | /* restart sampling */ | 296 | /* restart sampling */ |
290 | if (pmu->perfmon_sampling_enabled) | 297 | if (pmu->perfmon_sampling_enabled) |
291 | return nvgpu_pmu_perfmon_start_sampling(pmu); | 298 | return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); |
299 | |||
292 | return 0; | 300 | return 0; |
293 | } | 301 | } |
302 | |||
303 | /* Perfmon RPC */ | ||
304 | int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu) | ||
305 | { | ||
306 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
307 | struct nv_pmu_rpc_struct_perfmon_init rpc; | ||
308 | int status = 0; | ||
309 | |||
310 | if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) | ||
311 | return 0; | ||
312 | |||
313 | nvgpu_log_fn(g, " "); | ||
314 | |||
315 | memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init)); | ||
316 | pmu->perfmon_ready = 0; | ||
317 | |||
318 | gk20a_pmu_init_perfmon_counter(g); | ||
319 | |||
320 | /* microseconds interval between pmu polls perf counters */ | ||
321 | rpc.sample_periodus = 16700; | ||
322 | /* number of sample periods below lower threshold | ||
323 | * before pmu triggers perfmon decrease event | ||
324 | */ | ||
325 | rpc.to_decrease_count = 15; | ||
326 | /* index of base counter, aka. always ticking counter */ | ||
327 | rpc.base_counter_id = 6; | ||
328 | /* moving average window for sample periods */ | ||
329 | rpc.samples_in_moving_avg = 17; | ||
330 | /* number of perfmon counters | ||
331 | * counter #3 (GR and CE2) for gk20a | ||
332 | */ | ||
333 | rpc.num_counters = 1; | ||
334 | |||
335 | memset(rpc.counter, 0, sizeof(struct pmu_perfmon_counter_v3) * | ||
336 | NV_PMU_PERFMON_MAX_COUNTERS); | ||
337 | /* Counter used to count GR busy cycles */ | ||
338 | rpc.counter[0].index = 3; | ||
339 | |||
340 | nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_INIT"); | ||
341 | PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, INIT, &rpc, 0); | ||
342 | if (status) { | ||
343 | nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); | ||
344 | goto exit; | ||
345 | } | ||
346 | |||
347 | exit: | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu) | ||
352 | { | ||
353 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
354 | struct nv_pmu_rpc_struct_perfmon_start rpc; | ||
355 | int status = 0; | ||
356 | |||
357 | if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) | ||
358 | return 0; | ||
359 | |||
360 | nvgpu_log_fn(g, " "); | ||
361 | |||
362 | memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start)); | ||
363 | rpc.group_id = PMU_DOMAIN_GROUP_PSTATE; | ||
364 | rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]; | ||
365 | rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE | | ||
366 | PMU_PERFMON_FLAG_ENABLE_DECREASE | | ||
367 | PMU_PERFMON_FLAG_CLEAR_PREV; | ||
368 | |||
369 | rpc.counter[0].upper_threshold = 3000; | ||
370 | rpc.counter[0].lower_threshold = 1000; | ||
371 | |||
372 | nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_START\n"); | ||
373 | PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, START, &rpc, 0); | ||
374 | if (status) | ||
375 | nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); | ||
376 | |||
377 | return status; | ||
378 | } | ||
379 | |||
380 | int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu) | ||
381 | { | ||
382 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
383 | struct nv_pmu_rpc_struct_perfmon_stop rpc; | ||
384 | int status = 0; | ||
385 | |||
386 | if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) | ||
387 | return 0; | ||
388 | |||
389 | nvgpu_log_fn(g, " "); | ||
390 | |||
391 | memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_stop)); | ||
392 | /* PERFMON Stop */ | ||
393 | nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_STOP\n"); | ||
394 | PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, STOP, &rpc, 0); | ||
395 | if (status) | ||
396 | nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); | ||
397 | |||
398 | return status; | ||
399 | } | ||
400 | |||
401 | int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu) | ||
402 | { | ||
403 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
404 | struct nv_pmu_rpc_struct_perfmon_query rpc; | ||
405 | int status = 0; | ||
406 | |||
407 | if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) | ||
408 | return 0; | ||
409 | |||
410 | nvgpu_log_fn(g, " "); | ||
411 | pmu->perfmon_query = 0; | ||
412 | memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query)); | ||
413 | /* PERFMON QUERY */ | ||
414 | nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n"); | ||
415 | PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, QUERY, &rpc, 0); | ||
416 | if (status) | ||
417 | nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); | ||
418 | |||
419 | pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), | ||
420 | &pmu->perfmon_query, 1); | ||
421 | |||
422 | return status; | ||
423 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 685976b1..0ce3b50d 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -836,6 +836,10 @@ struct gpu_ops { | |||
836 | int (*prepare_ucode)(struct gk20a *g); | 836 | int (*prepare_ucode)(struct gk20a *g); |
837 | int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g); | 837 | int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g); |
838 | int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu); | 838 | int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu); |
839 | int (*pmu_init_perfmon)(struct nvgpu_pmu *pmu); | ||
840 | int (*pmu_perfmon_start_sampling)(struct nvgpu_pmu *pmu); | ||
841 | int (*pmu_perfmon_stop_sampling)(struct nvgpu_pmu *pmu); | ||
842 | int (*pmu_perfmon_get_samples_rpc)(struct nvgpu_pmu *pmu); | ||
839 | int (*pmu_setup_elpg)(struct gk20a *g); | 843 | int (*pmu_setup_elpg)(struct gk20a *g); |
840 | u32 (*pmu_get_queue_head)(u32 i); | 844 | u32 (*pmu_get_queue_head)(u32 i); |
841 | u32 (*pmu_get_queue_head_size)(void); | 845 | u32 (*pmu_get_queue_head_size)(void); |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 3ee22ed1..8a5c1278 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -483,6 +483,9 @@ static const struct gpu_ops gm20b_ops = { | |||
483 | .pmu_mutex_release = gk20a_pmu_mutex_release, | 483 | .pmu_mutex_release = gk20a_pmu_mutex_release, |
484 | .write_dmatrfbase = gm20b_write_dmatrfbase, | 484 | .write_dmatrfbase = gm20b_write_dmatrfbase, |
485 | .pmu_elpg_statistics = gk20a_pmu_elpg_statistics, | 485 | .pmu_elpg_statistics = gk20a_pmu_elpg_statistics, |
486 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon, | ||
487 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling, | ||
488 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling, | ||
486 | .pmu_pg_init_param = NULL, | 489 | .pmu_pg_init_param = NULL, |
487 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, | 490 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, |
488 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, | 491 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 3073668e..d0458420 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -583,6 +583,9 @@ static const struct gpu_ops gp106_ops = { | |||
583 | .is_pmu_supported = gp106_is_pmu_supported, | 583 | .is_pmu_supported = gp106_is_pmu_supported, |
584 | .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list, | 584 | .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list, |
585 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, | 585 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, |
586 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon, | ||
587 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling, | ||
588 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling, | ||
586 | .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, | 589 | .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, |
587 | .pmu_is_lpwr_feature_supported = | 590 | .pmu_is_lpwr_feature_supported = |
588 | gp106_pmu_is_lpwr_feature_supported, | 591 | gp106_pmu_is_lpwr_feature_supported, |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 7041c5bd..5e9096e2 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -544,6 +544,9 @@ static const struct gpu_ops gp10b_ops = { | |||
544 | .pmu_mutex_release = gk20a_pmu_mutex_release, | 544 | .pmu_mutex_release = gk20a_pmu_mutex_release, |
545 | .write_dmatrfbase = gp10b_write_dmatrfbase, | 545 | .write_dmatrfbase = gp10b_write_dmatrfbase, |
546 | .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, | 546 | .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, |
547 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon, | ||
548 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling, | ||
549 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling, | ||
547 | .pmu_pg_init_param = gp10b_pg_gr_init, | 550 | .pmu_pg_init_param = gp10b_pg_gr_init, |
548 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, | 551 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, |
549 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, | 552 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 95d1f076..dbf32b20 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -591,6 +591,9 @@ static const struct gpu_ops gv100_ops = { | |||
591 | .is_pmu_supported = gp106_is_pmu_supported, | 591 | .is_pmu_supported = gp106_is_pmu_supported, |
592 | .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list, | 592 | .pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list, |
593 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, | 593 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, |
594 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon, | ||
595 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling, | ||
596 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling, | ||
594 | .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, | 597 | .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, |
595 | .pmu_is_lpwr_feature_supported = | 598 | .pmu_is_lpwr_feature_supported = |
596 | gp106_pmu_is_lpwr_feature_supported, | 599 | gp106_pmu_is_lpwr_feature_supported, |
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 0a552f5b..80ed29b8 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -606,6 +606,10 @@ static const struct gpu_ops gv11b_ops = { | |||
606 | .pmu_mutex_release = gk20a_pmu_mutex_release, | 606 | .pmu_mutex_release = gk20a_pmu_mutex_release, |
607 | .write_dmatrfbase = gp10b_write_dmatrfbase, | 607 | .write_dmatrfbase = gp10b_write_dmatrfbase, |
608 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, | 608 | .pmu_elpg_statistics = gp106_pmu_elpg_statistics, |
609 | .pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc, | ||
610 | .pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc, | ||
611 | .pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc, | ||
612 | .pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc, | ||
609 | .pmu_pg_init_param = gv11b_pg_gr_init, | 613 | .pmu_pg_init_param = gv11b_pg_gr_init, |
610 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, | 614 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, |
611 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, | 615 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index cd7e1879..5e9983b0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h | |||
@@ -329,6 +329,8 @@ struct nvgpu_pmu { | |||
329 | u32 *ucode_image; | 329 | u32 *ucode_image; |
330 | bool pmu_ready; | 330 | bool pmu_ready; |
331 | 331 | ||
332 | u32 perfmon_query; | ||
333 | |||
332 | u32 zbc_save_done; | 334 | u32 zbc_save_done; |
333 | 335 | ||
334 | u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE]; | 336 | u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE]; |
@@ -362,6 +364,7 @@ struct nvgpu_pmu { | |||
362 | u32 sample_buffer; | 364 | u32 sample_buffer; |
363 | u32 load_shadow; | 365 | u32 load_shadow; |
364 | u32 load_avg; | 366 | u32 load_avg; |
367 | u32 load; | ||
365 | 368 | ||
366 | struct nvgpu_mutex isr_mutex; | 369 | struct nvgpu_mutex isr_mutex; |
367 | bool isr_enabled; | 370 | bool isr_enabled; |
@@ -432,8 +435,12 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu); | |||
432 | int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); | 435 | int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); |
433 | int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); | 436 | int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); |
434 | int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); | 437 | int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); |
438 | int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu); | ||
439 | int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu); | ||
440 | int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu); | ||
435 | int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, | 441 | int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, |
436 | struct pmu_perfmon_msg *msg); | 442 | struct pmu_perfmon_msg *msg); |
443 | int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); | ||
437 | int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); | 444 | int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); |
438 | int nvgpu_pmu_load_update(struct gk20a *g); | 445 | int nvgpu_pmu_load_update(struct gk20a *g); |
439 | void nvgpu_pmu_reset_load_counters(struct gk20a *g); | 446 | void nvgpu_pmu_reset_load_counters(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h index f8c15324..bcf4c8b6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuif_perfmon.h | |||
@@ -32,6 +32,8 @@ | |||
32 | #define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002) | 32 | #define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002) |
33 | #define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004) | 33 | #define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004) |
34 | 34 | ||
35 | #define NV_PMU_PERFMON_MAX_COUNTERS 10 | ||
36 | |||
35 | enum pmu_perfmon_cmd_start_fields { | 37 | enum pmu_perfmon_cmd_start_fields { |
36 | COUNTER_ALLOC | 38 | COUNTER_ALLOC |
37 | }; | 39 | }; |
@@ -61,6 +63,15 @@ struct pmu_perfmon_counter_v2 { | |||
61 | u32 scale; | 63 | u32 scale; |
62 | }; | 64 | }; |
63 | 65 | ||
66 | struct pmu_perfmon_counter_v3 { | ||
67 | u8 index; | ||
68 | u8 group_id; | ||
69 | u16 flags; | ||
70 | u16 upper_threshold; /* units of 0.01% */ | ||
71 | u16 lower_threshold; /* units of 0.01% */ | ||
72 | u32 scale; | ||
73 | }; | ||
74 | |||
64 | struct pmu_perfmon_cmd_start_v3 { | 75 | struct pmu_perfmon_cmd_start_v3 { |
65 | u8 cmd_type; | 76 | u8 cmd_type; |
66 | u8 group_id; | 77 | u8 group_id; |
@@ -184,4 +195,77 @@ struct pmu_perfmon_msg { | |||
184 | }; | 195 | }; |
185 | }; | 196 | }; |
186 | 197 | ||
198 | /* PFERMON RPC interface*/ | ||
199 | /* | ||
200 | * RPC calls serviced by PERFMON unit. | ||
201 | */ | ||
202 | #define NV_PMU_RPC_ID_PERFMON_T18X_INIT 0x00 | ||
203 | #define NV_PMU_RPC_ID_PERFMON_T18X_DEINIT 0x01 | ||
204 | #define NV_PMU_RPC_ID_PERFMON_T18X_START 0x02 | ||
205 | #define NV_PMU_RPC_ID_PERFMON_T18X_STOP 0x03 | ||
206 | #define NV_PMU_RPC_ID_PERFMON_T18X_QUERY 0x04 | ||
207 | #define NV_PMU_RPC_ID_PERFMON_T18X__COUNT 0x05 | ||
208 | |||
209 | /* | ||
210 | * structure that holds data used to | ||
211 | * execute Perfmon INIT RPC. | ||
212 | * hdr - RPC header | ||
213 | * sample_periodus - Desired period in between samples. | ||
214 | * to_decrease_count - Consecutive samples before decrease event. | ||
215 | * base_counter_id - Index of the base counter. | ||
216 | * samples_in_moving_avg - Number of values in moving average. | ||
217 | * num_counters - Num of counters PMU should use. | ||
218 | * counter - Counters. | ||
219 | */ | ||
220 | struct nv_pmu_rpc_struct_perfmon_init { | ||
221 | struct nv_pmu_rpc_header hdr; | ||
222 | u32 sample_periodus; | ||
223 | u8 to_decrease_count; | ||
224 | u8 base_counter_id; | ||
225 | u8 samples_in_moving_avg; | ||
226 | u8 num_counters; | ||
227 | struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS]; | ||
228 | u32 scratch[1]; | ||
229 | }; | ||
230 | |||
231 | /* | ||
232 | * structure that holds data used to | ||
233 | * execute Perfmon START RPC. | ||
234 | * hdr - RPC header | ||
235 | * group_id - NV group ID | ||
236 | * state_id - NV state ID | ||
237 | * flags - PMU_PERFON flags | ||
238 | * counters - Counters. | ||
239 | */ | ||
240 | struct nv_pmu_rpc_struct_perfmon_start { | ||
241 | struct nv_pmu_rpc_header hdr; | ||
242 | u8 group_id; | ||
243 | u8 state_id; | ||
244 | u8 flags; | ||
245 | struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS]; | ||
246 | u32 scratch[1]; | ||
247 | }; | ||
248 | |||
249 | /* | ||
250 | * structure that holds data used to | ||
251 | * execute Perfmon STOP RPC. | ||
252 | * hdr - RPC header | ||
253 | */ | ||
254 | struct nv_pmu_rpc_struct_perfmon_stop { | ||
255 | struct nv_pmu_rpc_header hdr; | ||
256 | u32 scratch[1]; | ||
257 | }; | ||
258 | |||
259 | /* | ||
260 | * structure that holds data used to | ||
261 | * execute QUERY RPC. | ||
262 | * hdr - RPC header | ||
263 | * sample_buffer - Output buffer from pmu containing utilization samples. | ||
264 | */ | ||
265 | struct nv_pmu_rpc_struct_perfmon_query { | ||
266 | struct nv_pmu_rpc_header hdr; | ||
267 | u16 sample_buffer[NV_PMU_PERFMON_MAX_COUNTERS]; | ||
268 | u32 scratch[1]; | ||
269 | }; | ||
270 | |||
187 | #endif /* _GPMUIFPERFMON_H_ */ | 271 | #endif /* _GPMUIFPERFMON_H_ */ |