summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv100/gr_gv100.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv100/gr_gv100.c')
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.c87
1 files changed, 45 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 0d5692e1..5e8a99df 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -420,12 +420,12 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
420 } 420 }
421} 421}
422 422
423u32 gr_gv100_get_hw_accessor_stream_out_mode() 423u32 gr_gv100_get_hw_accessor_stream_out_mode(void)
424{ 424{
425 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); 425 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
426} 426}
427 427
428static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, 428void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
429 u32 num_chiplets, u32 num_perfmons) 429 u32 num_chiplets, u32 num_perfmons)
430{ 430{
431 u32 perfmon_index = 0; 431 u32 perfmon_index = 0;
@@ -434,60 +434,63 @@ static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
434 u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); 434 u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset();
435 435
436 for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { 436 for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) {
437 for (perfmon_index = 0; perfmon_index < num_perfmons; 437 for (perfmon_index = 0; perfmon_index < num_perfmons;
438 perfmon_index++) { 438 perfmon_index++) {
439 reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + 439 reg_offset = offset + perfmon_index *
440 chiplet_index * chiplet_stride; 440 perf_pmmsys_perdomain_offset_v() +
441 chiplet_index * chiplet_stride;
441 nvgpu_writel(g, reg_offset, val); 442 nvgpu_writel(g, reg_offset, val);
442 } 443 }
443 } 444 }
444
445} 445}
446 446
447static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, 447void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon,
448 int *num_fbp_perfmon, int *num_gpc_perfmon) 448 u32 *num_fbp_perfmon, u32 *num_gpc_perfmon)
449{ 449{
450 int err; 450 int err;
451 u32 buf_offset_lo, buf_offset_addr, num_offsets; 451 u32 buf_offset_lo, buf_offset_addr, num_offsets;
452 u32 perfmon_index = 0; 452 u32 perfmon_index = 0;
453 453
454 for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); 454 for (perfmon_index = 0; perfmon_index <
455 perfmon_index++) { 455 perf_pmmsys_engine_sel__size_1_v();
456 perfmon_index++) {
456 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 457 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
457 perf_pmmsys_engine_sel_r(perfmon_index), 458 perf_pmmsys_engine_sel_r(perfmon_index),
458 1, 459 1,
459 &buf_offset_lo, 460 &buf_offset_lo,
460 &buf_offset_addr, 461 &buf_offset_addr,
461 &num_offsets); 462 &num_offsets);
462 if (err) { 463 if (err != 0) {
463 break; 464 break;
464 } 465 }
465 } 466 }
466 *num_sys_perfmon = perfmon_index; 467 *num_sys_perfmon = perfmon_index;
467 468
468 for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); 469 for (perfmon_index = 0; perfmon_index <
469 perfmon_index++) { 470 perf_pmmfbp_engine_sel__size_1_v();
471 perfmon_index++) {
470 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 472 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
471 perf_pmmfbp_engine_sel_r(perfmon_index), 473 perf_pmmfbp_engine_sel_r(perfmon_index),
472 1, 474 1,
473 &buf_offset_lo, 475 &buf_offset_lo,
474 &buf_offset_addr, 476 &buf_offset_addr,
475 &num_offsets); 477 &num_offsets);
476 if (err) { 478 if (err != 0) {
477 break; 479 break;
478 } 480 }
479 } 481 }
480 *num_fbp_perfmon = perfmon_index; 482 *num_fbp_perfmon = perfmon_index;
481 483
482 for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); 484 for (perfmon_index = 0; perfmon_index <
483 perfmon_index++) { 485 perf_pmmgpc_engine_sel__size_1_v();
486 perfmon_index++) {
484 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 487 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
485 perf_pmmgpc_engine_sel_r(perfmon_index), 488 perf_pmmgpc_engine_sel_r(perfmon_index),
486 1, 489 1,
487 &buf_offset_lo, 490 &buf_offset_lo,
488 &buf_offset_addr, 491 &buf_offset_addr,
489 &num_offsets); 492 &num_offsets);
490 if (err) { 493 if (err != 0) {
491 break; 494 break;
492 } 495 }
493 } 496 }
@@ -496,17 +499,17 @@ static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon,
496 499
497void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) 500void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
498{ 501{
499 int num_sys_perfmon = 0; 502 u32 num_sys_perfmon = 0;
500 int num_fbp_perfmon = 0; 503 u32 num_fbp_perfmon = 0;
501 int num_gpc_perfmon = 0; 504 u32 num_gpc_perfmon = 0;
502 505
503 gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, 506 g->ops.gr.get_num_hwpm_perfmon(g, &num_sys_perfmon,
504 &num_fbp_perfmon, &num_gpc_perfmon); 507 &num_fbp_perfmon, &num_gpc_perfmon);
505 508
506 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 509 g->ops.gr.set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
507 1, 0xFFFFFFFF, num_sys_perfmon); 510 0xFFFFFFFFU, 1U, num_sys_perfmon);
508 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 511 g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0),
509 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); 512 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon);
510 gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 513 g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0),
511 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); 514 0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon);
512} 515}