diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.c | 87 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | 4 |
5 files changed, 59 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 0d5692e1..5e8a99df 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c | |||
@@ -420,12 +420,12 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | |||
420 | } | 420 | } |
421 | } | 421 | } |
422 | 422 | ||
423 | u32 gr_gv100_get_hw_accessor_stream_out_mode() | 423 | u32 gr_gv100_get_hw_accessor_stream_out_mode(void) |
424 | { | 424 | { |
425 | return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); | 425 | return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); |
426 | } | 426 | } |
427 | 427 | ||
428 | static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, | 428 | void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, |
429 | u32 num_chiplets, u32 num_perfmons) | 429 | u32 num_chiplets, u32 num_perfmons) |
430 | { | 430 | { |
431 | u32 perfmon_index = 0; | 431 | u32 perfmon_index = 0; |
@@ -434,60 +434,63 @@ static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, | |||
434 | u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); | 434 | u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); |
435 | 435 | ||
436 | for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { | 436 | for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { |
437 | for (perfmon_index = 0; perfmon_index < num_perfmons; | 437 | for (perfmon_index = 0; perfmon_index < num_perfmons; |
438 | perfmon_index++) { | 438 | perfmon_index++) { |
439 | reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + | 439 | reg_offset = offset + perfmon_index * |
440 | chiplet_index * chiplet_stride; | 440 | perf_pmmsys_perdomain_offset_v() + |
441 | chiplet_index * chiplet_stride; | ||
441 | nvgpu_writel(g, reg_offset, val); | 442 | nvgpu_writel(g, reg_offset, val); |
442 | } | 443 | } |
443 | } | 444 | } |
444 | |||
445 | } | 445 | } |
446 | 446 | ||
447 | static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, | 447 | void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon, |
448 | int *num_fbp_perfmon, int *num_gpc_perfmon) | 448 | u32 *num_fbp_perfmon, u32 *num_gpc_perfmon) |
449 | { | 449 | { |
450 | int err; | 450 | int err; |
451 | u32 buf_offset_lo, buf_offset_addr, num_offsets; | 451 | u32 buf_offset_lo, buf_offset_addr, num_offsets; |
452 | u32 perfmon_index = 0; | 452 | u32 perfmon_index = 0; |
453 | 453 | ||
454 | for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); | 454 | for (perfmon_index = 0; perfmon_index < |
455 | perfmon_index++) { | 455 | perf_pmmsys_engine_sel__size_1_v(); |
456 | perfmon_index++) { | ||
456 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 457 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
457 | perf_pmmsys_engine_sel_r(perfmon_index), | 458 | perf_pmmsys_engine_sel_r(perfmon_index), |
458 | 1, | 459 | 1, |
459 | &buf_offset_lo, | 460 | &buf_offset_lo, |
460 | &buf_offset_addr, | 461 | &buf_offset_addr, |
461 | &num_offsets); | 462 | &num_offsets); |
462 | if (err) { | 463 | if (err != 0) { |
463 | break; | 464 | break; |
464 | } | 465 | } |
465 | } | 466 | } |
466 | *num_sys_perfmon = perfmon_index; | 467 | *num_sys_perfmon = perfmon_index; |
467 | 468 | ||
468 | for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); | 469 | for (perfmon_index = 0; perfmon_index < |
469 | perfmon_index++) { | 470 | perf_pmmfbp_engine_sel__size_1_v(); |
471 | perfmon_index++) { | ||
470 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 472 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
471 | perf_pmmfbp_engine_sel_r(perfmon_index), | 473 | perf_pmmfbp_engine_sel_r(perfmon_index), |
472 | 1, | 474 | 1, |
473 | &buf_offset_lo, | 475 | &buf_offset_lo, |
474 | &buf_offset_addr, | 476 | &buf_offset_addr, |
475 | &num_offsets); | 477 | &num_offsets); |
476 | if (err) { | 478 | if (err != 0) { |
477 | break; | 479 | break; |
478 | } | 480 | } |
479 | } | 481 | } |
480 | *num_fbp_perfmon = perfmon_index; | 482 | *num_fbp_perfmon = perfmon_index; |
481 | 483 | ||
482 | for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); | 484 | for (perfmon_index = 0; perfmon_index < |
483 | perfmon_index++) { | 485 | perf_pmmgpc_engine_sel__size_1_v(); |
486 | perfmon_index++) { | ||
484 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 487 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
485 | perf_pmmgpc_engine_sel_r(perfmon_index), | 488 | perf_pmmgpc_engine_sel_r(perfmon_index), |
486 | 1, | 489 | 1, |
487 | &buf_offset_lo, | 490 | &buf_offset_lo, |
488 | &buf_offset_addr, | 491 | &buf_offset_addr, |
489 | &num_offsets); | 492 | &num_offsets); |
490 | if (err) { | 493 | if (err != 0) { |
491 | break; | 494 | break; |
492 | } | 495 | } |
493 | } | 496 | } |
@@ -496,17 +499,17 @@ static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, | |||
496 | 499 | ||
497 | void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) | 500 | void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) |
498 | { | 501 | { |
499 | int num_sys_perfmon = 0; | 502 | u32 num_sys_perfmon = 0; |
500 | int num_fbp_perfmon = 0; | 503 | u32 num_fbp_perfmon = 0; |
501 | int num_gpc_perfmon = 0; | 504 | u32 num_gpc_perfmon = 0; |
502 | 505 | ||
503 | gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, | 506 | g->ops.gr.get_num_hwpm_perfmon(g, &num_sys_perfmon, |
504 | &num_fbp_perfmon, &num_gpc_perfmon); | 507 | &num_fbp_perfmon, &num_gpc_perfmon); |
505 | 508 | ||
506 | gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), | 509 | g->ops.gr.set_pmm_register(g, perf_pmmsys_engine_sel_r(0), |
507 | 1, 0xFFFFFFFF, num_sys_perfmon); | 510 | 0xFFFFFFFFU, 1U, num_sys_perfmon); |
508 | gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), | 511 | g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0), |
509 | nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); | 512 | 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon); |
510 | gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), | 513 | g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0), |
511 | nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); | 514 | 0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon); |
512 | } | 515 | } |
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index aae87f09..d0d76737 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h | |||
@@ -48,4 +48,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | |||
48 | u32 *priv_addr_table, u32 *t); | 48 | u32 *priv_addr_table, u32 *t); |
49 | u32 gr_gv100_get_hw_accessor_stream_out_mode(void); | 49 | u32 gr_gv100_get_hw_accessor_stream_out_mode(void); |
50 | void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); | 50 | void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); |
51 | void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, | ||
52 | u32 num_chiplets, u32 num_perfmons); | ||
53 | void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon, | ||
54 | u32 *num_fbp_perfmon, u32 *num_gpc_perfmon); | ||
51 | #endif /* NVGPU_GR_GV100_H */ | 55 | #endif /* NVGPU_GR_GV100_H */ |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 26584bb4..3c3e5742 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -376,6 +376,8 @@ static const struct gpu_ops gv100_ops = { | |||
376 | .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, | 376 | .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, |
377 | .get_hw_accessor_stream_out_mode = | 377 | .get_hw_accessor_stream_out_mode = |
378 | gr_gv100_get_hw_accessor_stream_out_mode, | 378 | gr_gv100_get_hw_accessor_stream_out_mode, |
379 | .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon, | ||
380 | .set_pmm_register = gr_gv100_set_pmm_register, | ||
379 | .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, | 381 | .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, |
380 | .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, | 382 | .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, |
381 | .record_sm_error_state = gv11b_gr_record_sm_error_state, | 383 | .record_sm_error_state = gv11b_gr_record_sm_error_state, |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index fa31d0e1..a7fe1c2f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h | |||
@@ -330,6 +330,10 @@ struct gpu_ops { | |||
330 | u64 gpu_va, | 330 | u64 gpu_va, |
331 | u32 mode); | 331 | u32 mode); |
332 | void (*init_hwpm_pmm_register)(struct gk20a *g); | 332 | void (*init_hwpm_pmm_register)(struct gk20a *g); |
333 | void (*get_num_hwpm_perfmon)(struct gk20a *g, u32 *num_sys_perfmon, | ||
334 | u32 *num_fbp_perfmon, u32 *num_gpc_perfmon); | ||
335 | void (*set_pmm_register)(struct gk20a *g, u32 offset, u32 val, | ||
336 | u32 num_chiplets, u32 num_perfmons); | ||
333 | int (*dump_gr_regs)(struct gk20a *g, | 337 | int (*dump_gr_regs)(struct gk20a *g, |
334 | struct gk20a_debug_output *o); | 338 | struct gk20a_debug_output *o); |
335 | int (*update_pc_sampling)(struct channel_gk20a *ch, | 339 | int (*update_pc_sampling)(struct channel_gk20a *ch, |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h index a7ba460e..40107ee8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | |||
@@ -60,6 +60,10 @@ static inline u32 perf_pmmgpc_perdomain_offset_v(void) | |||
60 | { | 60 | { |
61 | return 0x00000200U; | 61 | return 0x00000200U; |
62 | } | 62 | } |
63 | static inline u32 perf_pmmsys_perdomain_offset_v(void) | ||
64 | { | ||
65 | return 0x00000200U; | ||
66 | } | ||
63 | static inline u32 perf_pmmgpc_base_v(void) | 67 | static inline u32 perf_pmmgpc_base_v(void) |
64 | { | 68 | { |
65 | return 0x00180000U; | 69 | return 0x00180000U; |