diff options
author | Thomas Fleury <tfleury@nvidia.com> | 2016-08-29 14:05:00 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-04 18:53:54 -0500 |
commit | b82d27e38490dc1155ece7d433fbcb6713b5a53b (patch) | |
tree | 929adc133e4f79d6e407a9596ab34d7a10ee78a4 /drivers | |
parent | 7feff293e4f88ddae92ccf05ff86386c8d1f5c0e (diff) |
gpu: nvgpu: FECS trace support on gp106
Enable FECS ctxsw tracing for gp106. Ensure that FECS records
have been written to memory before accessing the ring. Update
read index only once all records have been processed.
Jira EVLR-424
Change-Id: I1a21f841fcce1588397408906d77e2c3bf4a8c01
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1258243
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 |
2 files changed, 23 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 4bfbf503..a07faa93 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -258,6 +258,12 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | |||
258 | return -EINVAL; | 258 | return -EINVAL; |
259 | } | 259 | } |
260 | 260 | ||
261 | /* Clear magic_hi to detect cases where CPU could read write index | ||
262 | * before FECS record is actually written to DRAM. This should not | ||
263 | * as we force FECS writes to SYSMEM by reading through PRAMIN. | ||
264 | */ | ||
265 | r->magic_hi = 0; | ||
266 | |||
261 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); | 267 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); |
262 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); | 268 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); |
263 | 269 | ||
@@ -349,15 +355,21 @@ static int gk20a_fecs_trace_poll(struct gk20a *g) | |||
349 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", | 355 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", |
350 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); | 356 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); |
351 | 357 | ||
352 | /* consume all records */ | 358 | /* Ensure all FECS writes have made it to SYSMEM */ |
359 | g->ops.mm.fb_flush(g); | ||
360 | |||
353 | while (read != write) { | 361 | while (read != write) { |
354 | gk20a_fecs_trace_ring_read(g, read); | 362 | /* Ignore error code, as we want to consume all records */ |
363 | (void)gk20a_fecs_trace_ring_read(g, read); | ||
355 | 364 | ||
356 | /* Get to next record. */ | 365 | /* Get to next record. */ |
357 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); | 366 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); |
358 | gk20a_fecs_trace_set_read_index(g, read); | ||
359 | } | 367 | } |
360 | 368 | ||
369 | /* ensure FECS records has been updated before incrementing read index */ | ||
370 | wmb(); | ||
371 | gk20a_fecs_trace_set_read_index(g, read); | ||
372 | |||
361 | done: | 373 | done: |
362 | mutex_unlock(&trace->poll_lock); | 374 | mutex_unlock(&trace->poll_lock); |
363 | gk20a_idle(g->dev); | 375 | gk20a_idle(g->dev); |
@@ -597,6 +609,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
597 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; | 609 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; |
598 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); | 610 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); |
599 | pid_t pid; | 611 | pid_t pid; |
612 | u32 aperture; | ||
600 | 613 | ||
601 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | 614 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, |
602 | "hw_chid=%d context_ptr=%x inst_block=%llx", | 615 | "hw_chid=%d context_ptr=%x inst_block=%llx", |
@@ -609,6 +622,9 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
609 | pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); | 622 | pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); |
610 | if (!pa) | 623 | if (!pa) |
611 | return -ENOMEM; | 624 | return -ENOMEM; |
625 | aperture = gk20a_aperture_mask(g, &trace->trace_buf, | ||
626 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | ||
627 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | ||
612 | 628 | ||
613 | if (gk20a_mem_begin(g, mem)) | 629 | if (gk20a_mem_begin(g, mem)) |
614 | return -ENOMEM; | 630 | return -ENOMEM; |
@@ -624,7 +640,8 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
624 | lo); | 640 | lo); |
625 | gk20a_mem_wr(g, mem, | 641 | gk20a_mem_wr(g, mem, |
626 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | 642 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), |
627 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); | 643 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | |
644 | aperture); | ||
628 | gk20a_mem_wr(g, mem, | 645 | gk20a_mem_wr(g, mem, |
629 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | 646 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), |
630 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | 647 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index ee361953..0badd0a1 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
22 | 22 | ||
23 | #include "gp10b/gr_gp10b.h" | 23 | #include "gp10b/gr_gp10b.h" |
24 | #include "gp10b/fecs_trace_gp10b.h" | ||
24 | #include "gp10b/mc_gp10b.h" | 25 | #include "gp10b/mc_gp10b.h" |
25 | #include "gp106/ltc_gp106.h" | 26 | #include "gp106/ltc_gp106.h" |
26 | #include "gp10b/mm_gp10b.h" | 27 | #include "gp10b/mm_gp10b.h" |
@@ -219,6 +220,7 @@ int gp106_init_hal(struct gk20a *g) | |||
219 | gops->pmupstate = true; | 220 | gops->pmupstate = true; |
220 | gp10b_init_mc(gops); | 221 | gp10b_init_mc(gops); |
221 | gp106_init_gr(gops); | 222 | gp106_init_gr(gops); |
223 | gp10b_init_fecs_trace_ops(gops); | ||
222 | gp106_init_ltc(gops); | 224 | gp106_init_ltc(gops); |
223 | gp106_init_fb(gops); | 225 | gp106_init_fb(gops); |
224 | gp106_init_fifo(gops); | 226 | gp106_init_fifo(gops); |