diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.c | 87 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | 24 |
6 files changed, 119 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ce0a6563..b677419c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -312,6 +312,7 @@ struct gpu_ops { | |||
312 | struct channel_gk20a *c, | 312 | struct channel_gk20a *c, |
313 | u64 gpu_va, | 313 | u64 gpu_va, |
314 | u32 mode); | 314 | u32 mode); |
315 | void (*init_hwpm_pmm_register)(struct gk20a *g); | ||
315 | int (*dump_gr_regs)(struct gk20a *g, | 316 | int (*dump_gr_regs)(struct gk20a *g, |
316 | struct gk20a_debug_output *o); | 317 | struct gk20a_debug_output *o); |
317 | int (*update_pc_sampling)(struct channel_gk20a *ch, | 318 | int (*update_pc_sampling)(struct channel_gk20a *ch, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3f49fbf7..c70c1cd4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1782,6 +1782,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1782 | return -ENOMEM; | 1782 | return -ENOMEM; |
1783 | } | 1783 | } |
1784 | } | 1784 | } |
1785 | |||
1786 | if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW && | ||
1787 | g->ops.gr.init_hwpm_pmm_register) { | ||
1788 | g->ops.gr.init_hwpm_pmm_register(g); | ||
1789 | } | ||
1785 | } | 1790 | } |
1786 | 1791 | ||
1787 | data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); | 1792 | data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); |
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index b1b1cee6..5f891ce5 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <nvgpu/hw/gv100/hw_fuse_gv100.h> | 39 | #include <nvgpu/hw/gv100/hw_fuse_gv100.h> |
40 | #include <nvgpu/hw/gv100/hw_top_gv100.h> | 40 | #include <nvgpu/hw/gv100/hw_top_gv100.h> |
41 | #include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h> | 41 | #include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h> |
42 | #include <nvgpu/hw/gv100/hw_perf_gv100.h> | ||
42 | 43 | ||
43 | 44 | ||
44 | /* | 45 | /* |
@@ -459,3 +460,89 @@ u32 gr_gv100_get_hw_accessor_stream_out_mode() | |||
459 | { | 460 | { |
460 | return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); | 461 | return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); |
461 | } | 462 | } |
463 | |||
464 | static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, | ||
465 | u32 num_chiplets, u32 num_perfmons) | ||
466 | { | ||
467 | u32 perfmon_index = 0; | ||
468 | u32 chiplet_index = 0; | ||
469 | u32 reg_offset = 0; | ||
470 | u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); | ||
471 | |||
472 | for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { | ||
473 | for (perfmon_index = 0; perfmon_index < num_perfmons; | ||
474 | perfmon_index++) { | ||
475 | reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() + | ||
476 | chiplet_index * chiplet_stride; | ||
477 | nvgpu_writel(g, reg_offset, val); | ||
478 | } | ||
479 | } | ||
480 | |||
481 | } | ||
482 | |||
483 | static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon, | ||
484 | int *num_fbp_perfmon, int *num_gpc_perfmon) | ||
485 | { | ||
486 | int err; | ||
487 | u32 buf_offset_lo, buf_offset_addr, num_offsets; | ||
488 | u32 perfmon_index = 0; | ||
489 | |||
490 | for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v(); | ||
491 | perfmon_index++) { | ||
492 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | ||
493 | perf_pmmsys_engine_sel_r(perfmon_index), | ||
494 | 1, | ||
495 | &buf_offset_lo, | ||
496 | &buf_offset_addr, | ||
497 | &num_offsets); | ||
498 | if (err) { | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | *num_sys_perfmon = perfmon_index; | ||
503 | |||
504 | for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v(); | ||
505 | perfmon_index++) { | ||
506 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | ||
507 | perf_pmmfbp_engine_sel_r(perfmon_index), | ||
508 | 1, | ||
509 | &buf_offset_lo, | ||
510 | &buf_offset_addr, | ||
511 | &num_offsets); | ||
512 | if (err) { | ||
513 | break; | ||
514 | } | ||
515 | } | ||
516 | *num_fbp_perfmon = perfmon_index; | ||
517 | |||
518 | for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v(); | ||
519 | perfmon_index++) { | ||
520 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | ||
521 | perf_pmmgpc_engine_sel_r(perfmon_index), | ||
522 | 1, | ||
523 | &buf_offset_lo, | ||
524 | &buf_offset_addr, | ||
525 | &num_offsets); | ||
526 | if (err) { | ||
527 | break; | ||
528 | } | ||
529 | } | ||
530 | *num_gpc_perfmon = perfmon_index; | ||
531 | } | ||
532 | |||
533 | void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) | ||
534 | { | ||
535 | int num_sys_perfmon = 0; | ||
536 | int num_fbp_perfmon = 0; | ||
537 | int num_gpc_perfmon = 0; | ||
538 | |||
539 | gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon, | ||
540 | &num_fbp_perfmon, &num_gpc_perfmon); | ||
541 | |||
542 | gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), | ||
543 | 1, 0xFFFFFFFF, num_sys_perfmon); | ||
544 | gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), | ||
545 | nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon); | ||
546 | gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0), | ||
547 | nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon); | ||
548 | } | ||
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 457bd701..81bf7e38 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h | |||
@@ -48,4 +48,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | |||
48 | u32 *priv_addr_table, u32 *t); | 48 | u32 *priv_addr_table, u32 *t); |
49 | void gr_gv100_init_gpc_mmu(struct gk20a *g); | 49 | void gr_gv100_init_gpc_mmu(struct gk20a *g); |
50 | u32 gr_gv100_get_hw_accessor_stream_out_mode(void); | 50 | u32 gr_gv100_get_hw_accessor_stream_out_mode(void); |
51 | void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); | ||
51 | #endif | 52 | #endif |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index c84778dd..3cfda7ca 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -361,6 +361,7 @@ static const struct gpu_ops gv100_ops = { | |||
361 | .get_hw_accessor_stream_out_mode = | 361 | .get_hw_accessor_stream_out_mode = |
362 | gr_gv100_get_hw_accessor_stream_out_mode, | 362 | gr_gv100_get_hw_accessor_stream_out_mode, |
363 | .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, | 363 | .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, |
364 | .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, | ||
364 | .record_sm_error_state = gv11b_gr_record_sm_error_state, | 365 | .record_sm_error_state = gv11b_gr_record_sm_error_state, |
365 | .update_sm_error_state = gv11b_gr_update_sm_error_state, | 366 | .update_sm_error_state = gv11b_gr_update_sm_error_state, |
366 | .clear_sm_error_state = gm20b_gr_clear_sm_error_state, | 367 | .clear_sm_error_state = gm20b_gr_clear_sm_error_state, |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h index 268efc52..a7ba460e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_perf_gv100.h | |||
@@ -232,4 +232,28 @@ static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void) | |||
232 | { | 232 | { |
233 | return 0x10U; | 233 | return 0x10U; |
234 | } | 234 | } |
235 | static inline u32 perf_pmmsys_engine_sel_r(u32 i) | ||
236 | { | ||
237 | return 0x0024006cU + i*512U; | ||
238 | } | ||
239 | static inline u32 perf_pmmsys_engine_sel__size_1_v(void) | ||
240 | { | ||
241 | return 0x00000020U; | ||
242 | } | ||
243 | static inline u32 perf_pmmfbp_engine_sel_r(u32 i) | ||
244 | { | ||
245 | return 0x0020006cU + i*512U; | ||
246 | } | ||
247 | static inline u32 perf_pmmfbp_engine_sel__size_1_v(void) | ||
248 | { | ||
249 | return 0x00000020U; | ||
250 | } | ||
251 | static inline u32 perf_pmmgpc_engine_sel_r(u32 i) | ||
252 | { | ||
253 | return 0x0018006cU + i*512U; | ||
254 | } | ||
255 | static inline u32 perf_pmmgpc_engine_sel__size_1_v(void) | ||
256 | { | ||
257 | return 0x00000020U; | ||
258 | } | ||
235 | #endif | 259 | #endif |