summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
diff options
context:
space:
mode:
authorThomas Fleury <tfleury@nvidia.com>2016-08-29 14:05:00 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-04 18:53:54 -0500
commitb82d27e38490dc1155ece7d433fbcb6713b5a53b (patch)
tree929adc133e4f79d6e407a9596ab34d7a10ee78a4 /drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
parent7feff293e4f88ddae92ccf05ff86386c8d1f5c0e (diff)
gpu: nvgpu: FECS trace support on gp106
Enable FECS ctxsw tracing for gp106. Ensure that FECS records have been written to memory before accessing the ring. Update read index only once all records have been processed. Jira EVLR-424 Change-Id: I1a21f841fcce1588397408906d77e2c3bf4a8c01 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1258243 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c25
1 files changed, 21 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 4bfbf503..a07faa93 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -258,6 +258,12 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
258 return -EINVAL; 258 return -EINVAL;
259 } 259 }
260 260
261 /* Clear magic_hi to detect cases where CPU could read write index
262 * before FECS record is actually written to DRAM. This should not
263 * as we force FECS writes to SYSMEM by reading through PRAMIN.
264 */
265 r->magic_hi = 0;
266
261 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); 267 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
262 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); 268 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
263 269
@@ -349,15 +355,21 @@ static int gk20a_fecs_trace_poll(struct gk20a *g)
349 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", 355 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
350 read, gk20a_fecs_trace_get_read_index(g), write, cnt); 356 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
351 357
352 /* consume all records */ 358 /* Ensure all FECS writes have made it to SYSMEM */
359 g->ops.mm.fb_flush(g);
360
353 while (read != write) { 361 while (read != write) {
354 gk20a_fecs_trace_ring_read(g, read); 362 /* Ignore error code, as we want to consume all records */
363 (void)gk20a_fecs_trace_ring_read(g, read);
355 364
356 /* Get to next record. */ 365 /* Get to next record. */
357 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); 366 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
358 gk20a_fecs_trace_set_read_index(g, read);
359 } 367 }
360 368
369 /* ensure FECS records has been updated before incrementing read index */
370 wmb();
371 gk20a_fecs_trace_set_read_index(g, read);
372
361done: 373done:
362 mutex_unlock(&trace->poll_lock); 374 mutex_unlock(&trace->poll_lock);
363 gk20a_idle(g->dev); 375 gk20a_idle(g->dev);
@@ -597,6 +609,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
597 struct mem_desc *mem = &ch_ctx->gr_ctx->mem; 609 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
598 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); 610 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
599 pid_t pid; 611 pid_t pid;
612 u32 aperture;
600 613
601 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, 614 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
602 "hw_chid=%d context_ptr=%x inst_block=%llx", 615 "hw_chid=%d context_ptr=%x inst_block=%llx",
@@ -609,6 +622,9 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
609 pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); 622 pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf);
610 if (!pa) 623 if (!pa)
611 return -ENOMEM; 624 return -ENOMEM;
625 aperture = gk20a_aperture_mask(g, &trace->trace_buf,
626 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
627 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
612 628
613 if (gk20a_mem_begin(g, mem)) 629 if (gk20a_mem_begin(g, mem))
614 return -ENOMEM; 630 return -ENOMEM;
@@ -624,7 +640,8 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
624 lo); 640 lo);
625 gk20a_mem_wr(g, mem, 641 gk20a_mem_wr(g, mem,
626 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 642 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
627 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); 643 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
644 aperture);
628 gk20a_mem_wr(g, mem, 645 gk20a_mem_wr(g, mem,
629 ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 646 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
630 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( 647 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(