summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
diff options
context:
space:
mode:
authorVaibhav Kachore <vkachore@nvidia.com>2018-02-22 06:15:30 -0500
committerTejal Kudav <tkudav@nvidia.com>2018-06-14 09:44:08 -0400
commitca3215c6b23c7d855ced899d8090aaa8ce9a9fa3 (patch)
tree710114451d4838f82a9e9998db52b81cf76d68c9 /drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
parent97d697a8481ca0c348102f04165903e3205302ed (diff)
gpu: nvgpu: add support for FECS VA
- On t186, ucode expects physical address to be programmed for FECS trace buffer. - On t194, ucode expects GPU VA to be programmed for FECS trace buffer. This patch adds extra support to handle this change for linux native. - Increase the size of FECS trace buffer (as few entries were getting dropped due to overflow of FECS trace buffer.) - This moves FECS trace buffer handling in global context buffer. - This adds extra check for updation of mailbox1 register. (Bug 200417403) EVLR-2077 Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1536028 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel <nipatel@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c102
1 files changed, 54 insertions, 48 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index c9d7ea06..117920da 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -28,6 +28,7 @@
28 28
29#include <nvgpu/kmem.h> 29#include <nvgpu/kmem.h>
30#include <nvgpu/dma.h> 30#include <nvgpu/dma.h>
31#include <nvgpu/enabled.h>
31#include <nvgpu/bug.h> 32#include <nvgpu/bug.h>
32#include <nvgpu/hashtable.h> 33#include <nvgpu/hashtable.h>
33#include <nvgpu/circ_buf.h> 34#include <nvgpu/circ_buf.h>
@@ -51,7 +52,7 @@
51 * If HW circular buffer is getting too many "buffer full" conditions, 52 * If HW circular buffer is getting too many "buffer full" conditions,
52 * increasing this constant should help (it drives Linux' internal buffer size). 53 * increasing this constant should help (it drives Linux' internal buffer size).
53 */ 54 */
54#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) 55#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10)
55#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ 56#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
56#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) 57#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
57#define GK20A_FECS_TRACE_PTIMER_SHIFT 5 58#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {
74 75
75struct gk20a_fecs_trace { 76struct gk20a_fecs_trace {
76 77
77 struct nvgpu_mem trace_buf;
78 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); 78 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
79 struct nvgpu_mutex hash_lock; 79 struct nvgpu_mutex hash_lock;
80 struct nvgpu_mutex poll_lock; 80 struct nvgpu_mutex poll_lock;
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
106} 106}
107 107
108static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( 108static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
109 struct gk20a_fecs_trace *trace, int idx) 109 struct gk20a *g, int idx)
110{ 110{
111 struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
112
111 return (struct gk20a_fecs_trace_record *) 113 return (struct gk20a_fecs_trace_record *)
112 ((u8 *) trace->trace_buf.cpu_va 114 ((u8 *) mem->cpu_va
113 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); 115 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
114} 116}
115 117
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
258 struct gk20a_fecs_trace *trace = g->fecs_trace; 260 struct gk20a_fecs_trace *trace = g->fecs_trace;
259 pid_t cur_pid; 261 pid_t cur_pid;
260 pid_t new_pid; 262 pid_t new_pid;
263 int count = 0;
261 264
262 /* for now, only one VM */ 265 /* for now, only one VM */
263 const int vmid = 0; 266 const int vmid = 0;
264 267
265 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( 268 struct gk20a_fecs_trace_record *r =
266 trace, index); 269 gk20a_fecs_trace_get_record(g, index);
267 270
268 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, 271 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
269 "consuming record trace=%p read=%d record=%p", trace, index, r); 272 "consuming record trace=%p read=%d record=%p", trace, index, r);
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
334 continue; 337 continue;
335 338
336 gk20a_ctxsw_trace_write(g, &entry); 339 gk20a_ctxsw_trace_write(g, &entry);
340 count++;
337 } 341 }
338 342
339 gk20a_ctxsw_trace_wake_up(g, vmid); 343 gk20a_ctxsw_trace_wake_up(g, vmid);
340 return 0; 344 return count;
341} 345}
342 346
343int gk20a_fecs_trace_poll(struct gk20a *g) 347int gk20a_fecs_trace_poll(struct gk20a *g)
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
376 g->ops.mm.fb_flush(g); 380 g->ops.mm.fb_flush(g);
377 381
378 while (read != write) { 382 while (read != write) {
379 /* Ignore error code, as we want to consume all records */ 383 cnt = gk20a_fecs_trace_ring_read(g, read);
380 (void)gk20a_fecs_trace_ring_read(g, read); 384 if (cnt <= 0)
385 break;
381 386
382 /* Get to next record. */ 387 /* Get to next record. */
383 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); 388 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
384 } 389 }
385 390
386 /* ensure FECS records has been updated before incrementing read index */ 391 /* ensure FECS records has been updated before incrementing read index */
387 nvgpu_smp_wmb(); 392 nvgpu_wmb();
388 gk20a_fecs_trace_set_read_index(g, read); 393 gk20a_fecs_trace_set_read_index(g, read);
389 394
390done: 395done:
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
411 return 0; 416 return 0;
412} 417}
413 418
414static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) 419size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
415{ 420{
416 struct gk20a_fecs_trace *trace = g->fecs_trace; 421 return GK20A_FECS_TRACE_NUM_RECORDS
417 422 * ctxsw_prog_record_timestamp_record_size_in_bytes_v();
418 return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
419 * ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
420 &trace->trace_buf);
421}
422
423static void gk20a_fecs_trace_free_ring(struct gk20a *g)
424{
425 struct gk20a_fecs_trace *trace = g->fecs_trace;
426
427 nvgpu_dma_free(g, &trace->trace_buf);
428} 423}
429 424
430#ifdef CONFIG_DEBUG_FS 425#ifdef CONFIG_DEBUG_FS
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
460{ 455{
461 loff_t *pos = (loff_t *) v; 456 loff_t *pos = (loff_t *) v;
462 struct gk20a *g = *(struct gk20a **)s->private; 457 struct gk20a *g = *(struct gk20a **)s->private;
463 struct gk20a_fecs_trace *trace = g->fecs_trace; 458 struct gk20a_fecs_trace_record *r =
464 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); 459 gk20a_fecs_trace_get_record(g, *pos);
465 int i; 460 int i;
466 const u32 invalid_tag = 461 const u32 invalid_tag =
467 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); 462 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
588 goto clean_poll_lock; 583 goto clean_poll_lock;
589 584
590 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); 585 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
591 err = gk20a_fecs_trace_alloc_ring(g);
592 if (err) {
593 nvgpu_warn(g, "failed to allocate FECS ring");
594 goto clean_hash_lock;
595 }
596
597 hash_init(trace->pid_hash_table); 586 hash_init(trace->pid_hash_table);
598 587
599 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); 588 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
604 593
605 return 0; 594 return 0;
606 595
607clean_hash_lock:
608 nvgpu_mutex_destroy(&trace->hash_lock);
609clean_poll_lock: 596clean_poll_lock:
610 nvgpu_mutex_destroy(&trace->poll_lock); 597 nvgpu_mutex_destroy(&trace->poll_lock);
611clean: 598clean:
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
624 611
625 u32 lo; 612 u32 lo;
626 u32 hi; 613 u32 hi;
627 u64 pa; 614 u64 addr;
628 struct tsg_gk20a *tsg; 615 struct tsg_gk20a *tsg;
629 struct nvgpu_gr_ctx *ch_ctx; 616 struct nvgpu_gr_ctx *ch_ctx;
630 struct gk20a_fecs_trace *trace = g->fecs_trace; 617 struct gk20a_fecs_trace *trace = g->fecs_trace;
631 struct nvgpu_mem *mem; 618 struct nvgpu_mem *mem;
632 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); 619 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
633 pid_t pid; 620 pid_t pid;
634 u32 aperture; 621 u32 aperture_mask;
635 622
636 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, 623 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
637 "chid=%d context_ptr=%x inst_block=%llx", 624 "chid=%d context_ptr=%x inst_block=%llx",
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
648 if (!trace) 635 if (!trace)
649 return -ENOMEM; 636 return -ENOMEM;
650 637
651 pa = nvgpu_inst_block_addr(g, &trace->trace_buf); 638 mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
652 if (!pa) 639
653 return -ENOMEM; 640 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 641 addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
642 nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
643 aperture_mask = 0;
644 } else {
645 addr = nvgpu_inst_block_addr(g, mem);
646 nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
647 aperture_mask = nvgpu_aperture_mask(g, mem,
655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), 648 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), 649 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
657 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); 650 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
651 }
652 if (!addr)
653 return -ENOMEM;
654
655 lo = u64_lo32(addr);
656 hi = u64_hi32(addr);
657
658 mem = &ch_ctx->mem;
658 659
659 if (nvgpu_mem_begin(g, mem)) 660 if (nvgpu_mem_begin(g, mem))
660 return -ENOMEM; 661 return -ENOMEM;
661 662
662 lo = u64_lo32(pa);
663 hi = u64_hi32(pa);
664
665 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, 663 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
666 lo, GK20A_FECS_TRACE_NUM_RECORDS); 664 lo, GK20A_FECS_TRACE_NUM_RECORDS);
667 665
668 nvgpu_mem_wr(g, mem, 666 nvgpu_mem_wr(g, mem,
667 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
668 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
669 GK20A_FECS_TRACE_NUM_RECORDS));
670
671 nvgpu_mem_end(g, mem);
672
673 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
674 mem = &ch->ctx_header.mem;
675
676 if (nvgpu_mem_begin(g, mem))
677 return -ENOMEM;
678
679 nvgpu_mem_wr(g, mem,
669 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 680 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
670 lo); 681 lo);
671 nvgpu_mem_wr(g, mem, 682 nvgpu_mem_wr(g, mem,
672 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 683 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
673 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | 684 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
674 aperture); 685 aperture_mask);
675 nvgpu_mem_wr(g, mem,
676 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
677 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
678 GK20A_FECS_TRACE_NUM_RECORDS));
679 686
680 nvgpu_mem_end(g, mem); 687 nvgpu_mem_end(g, mem);
681 688
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
728 return 0; 735 return 0;
729 736
730 nvgpu_thread_stop(&trace->poll_task); 737 nvgpu_thread_stop(&trace->poll_task);
731 gk20a_fecs_trace_free_ring(g);
732 gk20a_fecs_trace_free_hash_table(g); 738 gk20a_fecs_trace_free_hash_table(g);
733 739
734 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); 740 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);