summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVaibhav Kachore <vkachore@nvidia.com>2018-07-03 07:51:13 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-10 21:14:16 -0400
commit503d489dba278136ea0e4241d000018682989da5 (patch)
tree29b95d43aaf47b50f2abda9a4e16153afe070c25
parente14fdcd8f1f4125da697433b1744b1e4e4f15b09 (diff)
gpu: nvgpu: Initialize hwpm perfmons (engine_sel)
- For Mode-E ctxsw it is required that engine_sel is set to 0xFFFFFFFF. - Default 0 is a valid signal and causes problems. Bug 2106999 Change-Id: I5cdb4441a8e6d7e8133c31a9e361b54611dd2995 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1770755 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.c87
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.h1
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h24
6 files changed, 119 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ce0a6563..b677419c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -312,6 +312,7 @@ struct gpu_ops {
312 struct channel_gk20a *c, 312 struct channel_gk20a *c,
313 u64 gpu_va, 313 u64 gpu_va,
314 u32 mode); 314 u32 mode);
315 void (*init_hwpm_pmm_register)(struct gk20a *g);
315 int (*dump_gr_regs)(struct gk20a *g, 316 int (*dump_gr_regs)(struct gk20a *g,
316 struct gk20a_debug_output *o); 317 struct gk20a_debug_output *o);
317 int (*update_pc_sampling)(struct channel_gk20a *ch, 318 int (*update_pc_sampling)(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3f49fbf7..c70c1cd4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1782,6 +1782,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1782 return -ENOMEM; 1782 return -ENOMEM;
1783 } 1783 }
1784 } 1784 }
1785
1786 if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW &&
1787 g->ops.gr.init_hwpm_pmm_register) {
1788 g->ops.gr.init_hwpm_pmm_register(g);
1789 }
1785 } 1790 }
1786 1791
1787 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); 1792 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index b1b1cee6..5f891ce5 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -39,6 +39,7 @@
39#include <nvgpu/hw/gv100/hw_fuse_gv100.h> 39#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
40#include <nvgpu/hw/gv100/hw_top_gv100.h> 40#include <nvgpu/hw/gv100/hw_top_gv100.h>
41#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h> 41#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
42#include <nvgpu/hw/gv100/hw_perf_gv100.h>
42 43
43 44
44/* 45/*
@@ -459,3 +460,89 @@ u32 gr_gv100_get_hw_accessor_stream_out_mode()
459{ 460{
460 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); 461 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
461} 462}
463
464static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
465 u32 num_chiplets, u32 num_perfmons)
466{
467 u32 perfmon_index = 0;
468 u32 chiplet_index = 0;
469 u32 reg_offset = 0;
470 u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset();
471
472 for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) {
473 for (perfmon_index = 0; perfmon_index < num_perfmons;
474 perfmon_index++) {
475 reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() +
476 chiplet_index * chiplet_stride;
477 nvgpu_writel(g, reg_offset, val);
478 }
479 }
480
481}
482
483static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon,
484 int *num_fbp_perfmon, int *num_gpc_perfmon)
485{
486 int err;
487 u32 buf_offset_lo, buf_offset_addr, num_offsets;
488 u32 perfmon_index = 0;
489
490 for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v();
491 perfmon_index++) {
492 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
493 perf_pmmsys_engine_sel_r(perfmon_index),
494 1,
495 &buf_offset_lo,
496 &buf_offset_addr,
497 &num_offsets);
498 if (err) {
499 break;
500 }
501 }
502 *num_sys_perfmon = perfmon_index;
503
504 for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v();
505 perfmon_index++) {
506 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
507 perf_pmmfbp_engine_sel_r(perfmon_index),
508 1,
509 &buf_offset_lo,
510 &buf_offset_addr,
511 &num_offsets);
512 if (err) {
513 break;
514 }
515 }
516 *num_fbp_perfmon = perfmon_index;
517
518 for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v();
519 perfmon_index++) {
520 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
521 perf_pmmgpc_engine_sel_r(perfmon_index),
522 1,
523 &buf_offset_lo,
524 &buf_offset_addr,
525 &num_offsets);
526 if (err) {
527 break;
528 }
529 }
530 *num_gpc_perfmon = perfmon_index;
531}
532
533void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
534{
535 int num_sys_perfmon = 0;
536 int num_fbp_perfmon = 0;
537 int num_gpc_perfmon = 0;
538
539 gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon,
540 &num_fbp_perfmon, &num_gpc_perfmon);
541
542 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
543 1, 0xFFFFFFFF, num_sys_perfmon);
544 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
545 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon);
546 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
547 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon);
548}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 457bd701..81bf7e38 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -48,4 +48,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
48 u32 *priv_addr_table, u32 *t); 48 u32 *priv_addr_table, u32 *t);
49void gr_gv100_init_gpc_mmu(struct gk20a *g); 49void gr_gv100_init_gpc_mmu(struct gk20a *g);
50u32 gr_gv100_get_hw_accessor_stream_out_mode(void); 50u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
51void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
51#endif 52#endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index c84778dd..3cfda7ca 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -361,6 +361,7 @@ static const struct gpu_ops gv100_ops = {
361 .get_hw_accessor_stream_out_mode = 361 .get_hw_accessor_stream_out_mode =
362 gr_gv100_get_hw_accessor_stream_out_mode, 362 gr_gv100_get_hw_accessor_stream_out_mode,
363 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, 363 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
364 .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
364 .record_sm_error_state = gv11b_gr_record_sm_error_state, 365 .record_sm_error_state = gv11b_gr_record_sm_error_state,
365 .update_sm_error_state = gv11b_gr_update_sm_error_state, 366 .update_sm_error_state = gv11b_gr_update_sm_error_state,
366 .clear_sm_error_state = gm20b_gr_clear_sm_error_state, 367 .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
index 268efc52..a7ba460e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
@@ -232,4 +232,28 @@ static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void)
232{ 232{
233 return 0x10U; 233 return 0x10U;
234} 234}
235static inline u32 perf_pmmsys_engine_sel_r(u32 i)
236{
237 return 0x0024006cU + i*512U;
238}
239static inline u32 perf_pmmsys_engine_sel__size_1_v(void)
240{
241 return 0x00000020U;
242}
243static inline u32 perf_pmmfbp_engine_sel_r(u32 i)
244{
245 return 0x0020006cU + i*512U;
246}
247static inline u32 perf_pmmfbp_engine_sel__size_1_v(void)
248{
249 return 0x00000020U;
250}
251static inline u32 perf_pmmgpc_engine_sel_r(u32 i)
252{
253 return 0x0018006cU + i*512U;
254}
255static inline u32 perf_pmmgpc_engine_sel__size_1_v(void)
256{
257 return 0x00000020U;
258}
235#endif 259#endif