summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVaibhav Kachore <vkachore@nvidia.com>2018-07-25 07:42:38 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-09-26 02:03:55 -0400
commit3f83528d769169fdaf25912f425226eaa07427f0 (patch)
tree16258e86f78234ff05a0722dbe7f366d5e9f788e
parentb96a6506d0095ef7271b9fadaba2e0dc6ca33484 (diff)
gpu: nvgpu: correct parameters in set_pmm_register
- This patch corrects parameters in set_pmm_registers - As FBP 6 and 7 are floorswept for GV100, GPU_LIT_NUM_FBPS should not be used - halify get_num_hwpm_perfmon and set_pmm_register Bug 2106999 Change-Id: Ib285b25d0c836c93b529dfe4e26c078159a3e6dd Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1785620 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.c87
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.h4
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h4
5 files changed, 59 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 0d5692e1..5e8a99df 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -420,12 +420,12 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
420 } 420 }
421} 421}
422 422
423u32 gr_gv100_get_hw_accessor_stream_out_mode() 423u32 gr_gv100_get_hw_accessor_stream_out_mode(void)
424{ 424{
425 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); 425 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
426} 426}
427 427
428static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, 428void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
429 u32 num_chiplets, u32 num_perfmons) 429 u32 num_chiplets, u32 num_perfmons)
430{ 430{
431 u32 perfmon_index = 0; 431 u32 perfmon_index = 0;
@@ -434,60 +434,63 @@ static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
434 u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); 434 u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset();
435 435
436 for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { 436 for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) {
437 for (perfmon_index = 0; perfmon_index < num_perfmons; 437 for (perfmon_index = 0; perfmon_index < num_perfmons;
438 perfmon_index++) { 438 perfmon_index++) {
439 reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + 439 reg_offset = offset + perfmon_index *
440 chiplet_index * chiplet_stride; 440 perf_pmmsys_perdomain_offset_v() +
441 chiplet_index * chiplet_stride;
441 nvgpu_writel(g, reg_offset, val); 442 nvgpu_writel(g, reg_offset, val);
442 } 443 }
443 } 444 }
444
445} 445}
446 446
447static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, 447void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon,
448 int *num_fbp_perfmon, int *num_gpc_perfmon) 448 u32 *num_fbp_perfmon, u32 *num_gpc_perfmon)
449{ 449{
450 int err; 450 int err;
451 u32 buf_offset_lo, buf_offset_addr, num_offsets; 451 u32 buf_offset_lo, buf_offset_addr, num_offsets;
452 u32 perfmon_index = 0; 452 u32 perfmon_index = 0;
453 453
454 for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); 454 for (perfmon_index = 0; perfmon_index <
455 perfmon_index++) { 455 perf_pmmsys_engine_sel__size_1_v();
456 perfmon_index++) {
456 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 457 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
457 perf_pmmsys_engine_sel_r(perfmon_index), 458 perf_pmmsys_engine_sel_r(perfmon_index),
458 1, 459 1,
459 &buf_offset_lo, 460 &buf_offset_lo,
460 &buf_offset_addr, 461 &buf_offset_addr,
461 &num_offsets); 462 &num_offsets);
462 if (err) { 463 if (err != 0) {
463 break; 464 break;
464 } 465 }
465 } 466 }
466 *num_sys_perfmon = perfmon_index; 467 *num_sys_perfmon = perfmon_index;
467 468
468 for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); 469 for (perfmon_index = 0; perfmon_index <
469 perfmon_index++) { 470 perf_pmmfbp_engine_sel__size_1_v();
471 perfmon_index++) {
470 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 472 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
471 perf_pmmfbp_engine_sel_r(perfmon_index), 473 perf_pmmfbp_engine_sel_r(perfmon_index),
472 1, 474 1,
473 &buf_offset_lo, 475 &buf_offset_lo,
474 &buf_offset_addr, 476 &buf_offset_addr,
475 &num_offsets); 477 &num_offsets);
476 if (err) { 478 if (err != 0) {
477 break; 479 break;
478 } 480 }
479 } 481 }
480 *num_fbp_perfmon = perfmon_index; 482 *num_fbp_perfmon = perfmon_index;
481 483
482 for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); 484 for (perfmon_index = 0; perfmon_index <
483 perfmon_index++) { 485 perf_pmmgpc_engine_sel__size_1_v();
486 perfmon_index++) {
484 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 487 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
485 perf_pmmgpc_engine_sel_r(perfmon_index), 488 perf_pmmgpc_engine_sel_r(perfmon_index),
486 1, 489 1,
487 &buf_offset_lo, 490 &buf_offset_lo,
488 &buf_offset_addr, 491 &buf_offset_addr,
489 &num_offsets); 492 &num_offsets);
490 if (err) { 493 if (err != 0) {
491 break; 494 break;
492 } 495 }
493 } 496 }
@@ -496,17 +499,17 @@ static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon,
496 499
497void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) 500void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
498{ 501{
499 int num_sys_perfmon = 0; 502 u32 num_sys_perfmon = 0;
500 int num_fbp_perfmon = 0; 503 u32 num_fbp_perfmon = 0;
501 int num_gpc_perfmon = 0; 504 u32 num_gpc_perfmon = 0;
502 505
503 gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, 506 g->ops.gr.get_num_hwpm_perfmon(g, &num_sys_perfmon,
504 &num_fbp_perfmon, &num_gpc_perfmon); 507 &num_fbp_perfmon, &num_gpc_perfmon);
505 508
506 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 509 g->ops.gr.set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
507 1, 0xFFFFFFFF, num_sys_perfmon); 510 0xFFFFFFFFU, 1U, num_sys_perfmon);
508 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 511 g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0),
509 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); 512 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon);
510 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 513 g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0),
511 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); 514 0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon);
512} 515}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index aae87f09..d0d76737 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -48,4 +48,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
48 u32 *priv_addr_table, u32 *t); 48 u32 *priv_addr_table, u32 *t);
49u32 gr_gv100_get_hw_accessor_stream_out_mode(void); 49u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
50void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); 50void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
51void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
52 u32 num_chiplets, u32 num_perfmons);
53void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon,
54 u32 *num_fbp_perfmon, u32 *num_gpc_perfmon);
51#endif /* NVGPU_GR_GV100_H */ 55#endif /* NVGPU_GR_GV100_H */
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 26584bb4..3c3e5742 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -376,6 +376,8 @@ static const struct gpu_ops gv100_ops = {
376 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, 376 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
377 .get_hw_accessor_stream_out_mode = 377 .get_hw_accessor_stream_out_mode =
378 gr_gv100_get_hw_accessor_stream_out_mode, 378 gr_gv100_get_hw_accessor_stream_out_mode,
379 .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
380 .set_pmm_register = gr_gv100_set_pmm_register,
379 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, 381 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
380 .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, 382 .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
381 .record_sm_error_state = gv11b_gr_record_sm_error_state, 383 .record_sm_error_state = gv11b_gr_record_sm_error_state,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index fa31d0e1..a7fe1c2f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -330,6 +330,10 @@ struct gpu_ops {
330 u64 gpu_va, 330 u64 gpu_va,
331 u32 mode); 331 u32 mode);
332 void (*init_hwpm_pmm_register)(struct gk20a *g); 332 void (*init_hwpm_pmm_register)(struct gk20a *g);
333 void (*get_num_hwpm_perfmon)(struct gk20a *g, u32 *num_sys_perfmon,
334 u32 *num_fbp_perfmon, u32 *num_gpc_perfmon);
335 void (*set_pmm_register)(struct gk20a *g, u32 offset, u32 val,
336 u32 num_chiplets, u32 num_perfmons);
333 int (*dump_gr_regs)(struct gk20a *g, 337 int (*dump_gr_regs)(struct gk20a *g,
334 struct gk20a_debug_output *o); 338 struct gk20a_debug_output *o);
335 int (*update_pc_sampling)(struct channel_gk20a *ch, 339 int (*update_pc_sampling)(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
index a7ba460e..40107ee8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h
@@ -60,6 +60,10 @@ static inline u32 perf_pmmgpc_perdomain_offset_v(void)
60{ 60{
61 return 0x00000200U; 61 return 0x00000200U;
62} 62}
63static inline u32 perf_pmmsys_perdomain_offset_v(void)
64{
65 return 0x00000200U;
66}
63static inline u32 perf_pmmgpc_base_v(void) 67static inline u32 perf_pmmgpc_base_v(void)
64{ 68{
65 return 0x00180000U; 69 return 0x00180000U;