diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 |
2 files changed, 23 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 4bfbf503..a07faa93 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -258,6 +258,12 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | |||
258 | return -EINVAL; | 258 | return -EINVAL; |
259 | } | 259 | } |
260 | 260 | ||
261 | /* Clear magic_hi to detect cases where CPU could read write index | ||
262 | * before FECS record is actually written to DRAM. This should not | ||
263 | * as we force FECS writes to SYSMEM by reading through PRAMIN. | ||
264 | */ | ||
265 | r->magic_hi = 0; | ||
266 | |||
261 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); | 267 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); |
262 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); | 268 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); |
263 | 269 | ||
@@ -349,15 +355,21 @@ static int gk20a_fecs_trace_poll(struct gk20a *g) | |||
349 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", | 355 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", |
350 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); | 356 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); |
351 | 357 | ||
352 | /* consume all records */ | 358 | /* Ensure all FECS writes have made it to SYSMEM */ |
359 | g->ops.mm.fb_flush(g); | ||
360 | |||
353 | while (read != write) { | 361 | while (read != write) { |
354 | gk20a_fecs_trace_ring_read(g, read); | 362 | /* Ignore error code, as we want to consume all records */ |
363 | (void)gk20a_fecs_trace_ring_read(g, read); | ||
355 | 364 | ||
356 | /* Get to next record. */ | 365 | /* Get to next record. */ |
357 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); | 366 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); |
358 | gk20a_fecs_trace_set_read_index(g, read); | ||
359 | } | 367 | } |
360 | 368 | ||
369 | /* ensure FECS records has been updated before incrementing read index */ | ||
370 | wmb(); | ||
371 | gk20a_fecs_trace_set_read_index(g, read); | ||
372 | |||
361 | done: | 373 | done: |
362 | mutex_unlock(&trace->poll_lock); | 374 | mutex_unlock(&trace->poll_lock); |
363 | gk20a_idle(g->dev); | 375 | gk20a_idle(g->dev); |
@@ -597,6 +609,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
597 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; | 609 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; |
598 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); | 610 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); |
599 | pid_t pid; | 611 | pid_t pid; |
612 | u32 aperture; | ||
600 | 613 | ||
601 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | 614 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, |
602 | "hw_chid=%d context_ptr=%x inst_block=%llx", | 615 | "hw_chid=%d context_ptr=%x inst_block=%llx", |
@@ -609,6 +622,9 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
609 | pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); | 622 | pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); |
610 | if (!pa) | 623 | if (!pa) |
611 | return -ENOMEM; | 624 | return -ENOMEM; |
625 | aperture = gk20a_aperture_mask(g, &trace->trace_buf, | ||
626 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | ||
627 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | ||
612 | 628 | ||
613 | if (gk20a_mem_begin(g, mem)) | 629 | if (gk20a_mem_begin(g, mem)) |
614 | return -ENOMEM; | 630 | return -ENOMEM; |
@@ -624,7 +640,8 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
624 | lo); | 640 | lo); |
625 | gk20a_mem_wr(g, mem, | 641 | gk20a_mem_wr(g, mem, |
626 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | 642 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), |
627 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); | 643 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | |
644 | aperture); | ||
628 | gk20a_mem_wr(g, mem, | 645 | gk20a_mem_wr(g, mem, |
629 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | 646 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), |
630 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | 647 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index ee361953..0badd0a1 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
22 | 22 | ||
23 | #include "gp10b/gr_gp10b.h" | 23 | #include "gp10b/gr_gp10b.h" |
24 | #include "gp10b/fecs_trace_gp10b.h" | ||
24 | #include "gp10b/mc_gp10b.h" | 25 | #include "gp10b/mc_gp10b.h" |
25 | #include "gp106/ltc_gp106.h" | 26 | #include "gp106/ltc_gp106.h" |
26 | #include "gp10b/mm_gp10b.h" | 27 | #include "gp10b/mm_gp10b.h" |
@@ -219,6 +220,7 @@ int gp106_init_hal(struct gk20a *g) | |||
219 | gops->pmupstate = true; | 220 | gops->pmupstate = true; |
220 | gp10b_init_mc(gops); | 221 | gp10b_init_mc(gops); |
221 | gp106_init_gr(gops); | 222 | gp106_init_gr(gops); |
223 | gp10b_init_fecs_trace_ops(gops); | ||
222 | gp106_init_ltc(gops); | 224 | gp106_init_ltc(gops); |
223 | gp106_init_fb(gops); | 225 | gp106_init_fb(gops); |
224 | gp106_init_fifo(gops); | 226 | gp106_init_fifo(gops); |