summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVaibhav Kachore <vkachore@nvidia.com>2018-02-22 06:15:30 -0500
committerTejal Kudav <tkudav@nvidia.com>2018-06-14 09:44:08 -0400
commitca3215c6b23c7d855ced899d8090aaa8ce9a9fa3 (patch)
tree710114451d4838f82a9e9998db52b81cf76d68c9
parent97d697a8481ca0c348102f04165903e3205302ed (diff)
gpu: nvgpu: add support for FECS VA
- On t186, ucode expects physical address to be programmed for FECS trace buffer. - On t194, ucode expects GPU VA to be programmed for FECS trace buffer. This patch adds extra support to handle this change for linux native. - Increase the size of FECS trace buffer (as few entries were getting dropped due to overflow of FECS trace buffer.) - This moves FECS trace buffer handling in global context buffer. - This adds extra check for updation of mailbox1 register. (Bug 200417403) EVLR-2077 Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1536028 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel <nipatel@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c102
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c42
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c1
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c27
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/enabled.h1
8 files changed, 119 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index c9d7ea06..117920da 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -28,6 +28,7 @@
28 28
29#include <nvgpu/kmem.h> 29#include <nvgpu/kmem.h>
30#include <nvgpu/dma.h> 30#include <nvgpu/dma.h>
31#include <nvgpu/enabled.h>
31#include <nvgpu/bug.h> 32#include <nvgpu/bug.h>
32#include <nvgpu/hashtable.h> 33#include <nvgpu/hashtable.h>
33#include <nvgpu/circ_buf.h> 34#include <nvgpu/circ_buf.h>
@@ -51,7 +52,7 @@
51 * If HW circular buffer is getting too many "buffer full" conditions, 52 * If HW circular buffer is getting too many "buffer full" conditions,
52 * increasing this constant should help (it drives Linux' internal buffer size). 53 * increasing this constant should help (it drives Linux' internal buffer size).
53 */ 54 */
54#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) 55#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10)
55#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ 56#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
56#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) 57#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
57#define GK20A_FECS_TRACE_PTIMER_SHIFT 5 58#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {
74 75
75struct gk20a_fecs_trace { 76struct gk20a_fecs_trace {
76 77
77 struct nvgpu_mem trace_buf;
78 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); 78 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
79 struct nvgpu_mutex hash_lock; 79 struct nvgpu_mutex hash_lock;
80 struct nvgpu_mutex poll_lock; 80 struct nvgpu_mutex poll_lock;
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
106} 106}
107 107
108static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( 108static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
109 struct gk20a_fecs_trace *trace, int idx) 109 struct gk20a *g, int idx)
110{ 110{
111 struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
112
111 return (struct gk20a_fecs_trace_record *) 113 return (struct gk20a_fecs_trace_record *)
112 ((u8 *) trace->trace_buf.cpu_va 114 ((u8 *) mem->cpu_va
113 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); 115 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
114} 116}
115 117
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
258 struct gk20a_fecs_trace *trace = g->fecs_trace; 260 struct gk20a_fecs_trace *trace = g->fecs_trace;
259 pid_t cur_pid; 261 pid_t cur_pid;
260 pid_t new_pid; 262 pid_t new_pid;
263 int count = 0;
261 264
262 /* for now, only one VM */ 265 /* for now, only one VM */
263 const int vmid = 0; 266 const int vmid = 0;
264 267
265 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( 268 struct gk20a_fecs_trace_record *r =
266 trace, index); 269 gk20a_fecs_trace_get_record(g, index);
267 270
268 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, 271 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
269 "consuming record trace=%p read=%d record=%p", trace, index, r); 272 "consuming record trace=%p read=%d record=%p", trace, index, r);
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
334 continue; 337 continue;
335 338
336 gk20a_ctxsw_trace_write(g, &entry); 339 gk20a_ctxsw_trace_write(g, &entry);
340 count++;
337 } 341 }
338 342
339 gk20a_ctxsw_trace_wake_up(g, vmid); 343 gk20a_ctxsw_trace_wake_up(g, vmid);
340 return 0; 344 return count;
341} 345}
342 346
343int gk20a_fecs_trace_poll(struct gk20a *g) 347int gk20a_fecs_trace_poll(struct gk20a *g)
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
376 g->ops.mm.fb_flush(g); 380 g->ops.mm.fb_flush(g);
377 381
378 while (read != write) { 382 while (read != write) {
379 /* Ignore error code, as we want to consume all records */ 383 cnt = gk20a_fecs_trace_ring_read(g, read);
380 (void)gk20a_fecs_trace_ring_read(g, read); 384 if (cnt <= 0)
385 break;
381 386
382 /* Get to next record. */ 387 /* Get to next record. */
383 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); 388 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
384 } 389 }
385 390
386 /* ensure FECS records has been updated before incrementing read index */ 391 /* ensure FECS records has been updated before incrementing read index */
387 nvgpu_smp_wmb(); 392 nvgpu_wmb();
388 gk20a_fecs_trace_set_read_index(g, read); 393 gk20a_fecs_trace_set_read_index(g, read);
389 394
390done: 395done:
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
411 return 0; 416 return 0;
412} 417}
413 418
414static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) 419size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
415{ 420{
416 struct gk20a_fecs_trace *trace = g->fecs_trace; 421 return GK20A_FECS_TRACE_NUM_RECORDS
417 422 * ctxsw_prog_record_timestamp_record_size_in_bytes_v();
418 return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
419 * ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
420 &trace->trace_buf);
421}
422
423static void gk20a_fecs_trace_free_ring(struct gk20a *g)
424{
425 struct gk20a_fecs_trace *trace = g->fecs_trace;
426
427 nvgpu_dma_free(g, &trace->trace_buf);
428} 423}
429 424
430#ifdef CONFIG_DEBUG_FS 425#ifdef CONFIG_DEBUG_FS
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
460{ 455{
461 loff_t *pos = (loff_t *) v; 456 loff_t *pos = (loff_t *) v;
462 struct gk20a *g = *(struct gk20a **)s->private; 457 struct gk20a *g = *(struct gk20a **)s->private;
463 struct gk20a_fecs_trace *trace = g->fecs_trace; 458 struct gk20a_fecs_trace_record *r =
464 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); 459 gk20a_fecs_trace_get_record(g, *pos);
465 int i; 460 int i;
466 const u32 invalid_tag = 461 const u32 invalid_tag =
467 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); 462 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
588 goto clean_poll_lock; 583 goto clean_poll_lock;
589 584
590 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); 585 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
591 err = gk20a_fecs_trace_alloc_ring(g);
592 if (err) {
593 nvgpu_warn(g, "failed to allocate FECS ring");
594 goto clean_hash_lock;
595 }
596
597 hash_init(trace->pid_hash_table); 586 hash_init(trace->pid_hash_table);
598 587
599 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); 588 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
604 593
605 return 0; 594 return 0;
606 595
607clean_hash_lock:
608 nvgpu_mutex_destroy(&trace->hash_lock);
609clean_poll_lock: 596clean_poll_lock:
610 nvgpu_mutex_destroy(&trace->poll_lock); 597 nvgpu_mutex_destroy(&trace->poll_lock);
611clean: 598clean:
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
624 611
625 u32 lo; 612 u32 lo;
626 u32 hi; 613 u32 hi;
627 u64 pa; 614 u64 addr;
628 struct tsg_gk20a *tsg; 615 struct tsg_gk20a *tsg;
629 struct nvgpu_gr_ctx *ch_ctx; 616 struct nvgpu_gr_ctx *ch_ctx;
630 struct gk20a_fecs_trace *trace = g->fecs_trace; 617 struct gk20a_fecs_trace *trace = g->fecs_trace;
631 struct nvgpu_mem *mem; 618 struct nvgpu_mem *mem;
632 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); 619 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
633 pid_t pid; 620 pid_t pid;
634 u32 aperture; 621 u32 aperture_mask;
635 622
636 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, 623 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
637 "chid=%d context_ptr=%x inst_block=%llx", 624 "chid=%d context_ptr=%x inst_block=%llx",
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
648 if (!trace) 635 if (!trace)
649 return -ENOMEM; 636 return -ENOMEM;
650 637
651 pa = nvgpu_inst_block_addr(g, &trace->trace_buf); 638 mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
652 if (!pa) 639
653 return -ENOMEM; 640 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 641 addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
642 nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
643 aperture_mask = 0;
644 } else {
645 addr = nvgpu_inst_block_addr(g, mem);
646 nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
647 aperture_mask = nvgpu_aperture_mask(g, mem,
655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), 648 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), 649 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
657 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); 650 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
651 }
652 if (!addr)
653 return -ENOMEM;
654
655 lo = u64_lo32(addr);
656 hi = u64_hi32(addr);
657
658 mem = &ch_ctx->mem;
658 659
659 if (nvgpu_mem_begin(g, mem)) 660 if (nvgpu_mem_begin(g, mem))
660 return -ENOMEM; 661 return -ENOMEM;
661 662
662 lo = u64_lo32(pa);
663 hi = u64_hi32(pa);
664
665 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, 663 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
666 lo, GK20A_FECS_TRACE_NUM_RECORDS); 664 lo, GK20A_FECS_TRACE_NUM_RECORDS);
667 665
668 nvgpu_mem_wr(g, mem, 666 nvgpu_mem_wr(g, mem,
667 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
668 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
669 GK20A_FECS_TRACE_NUM_RECORDS));
670
671 nvgpu_mem_end(g, mem);
672
673 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
674 mem = &ch->ctx_header.mem;
675
676 if (nvgpu_mem_begin(g, mem))
677 return -ENOMEM;
678
679 nvgpu_mem_wr(g, mem,
669 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 680 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
670 lo); 681 lo);
671 nvgpu_mem_wr(g, mem, 682 nvgpu_mem_wr(g, mem,
672 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 683 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
673 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | 684 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
674 aperture); 685 aperture_mask);
675 nvgpu_mem_wr(g, mem,
676 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
677 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
678 GK20A_FECS_TRACE_NUM_RECORDS));
679 686
680 nvgpu_mem_end(g, mem); 687 nvgpu_mem_end(g, mem);
681 688
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
728 return 0; 735 return 0;
729 736
730 nvgpu_thread_stop(&trace->poll_task); 737 nvgpu_thread_stop(&trace->poll_task);
731 gk20a_fecs_trace_free_ring(g);
732 gk20a_fecs_trace_free_hash_table(g); 738 gk20a_fecs_trace_free_hash_table(g);
733 739
734 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); 740 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
index e310a18a..acac14c6 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g,
39int gk20a_fecs_trace_enable(struct gk20a *g); 39int gk20a_fecs_trace_enable(struct gk20a *g);
40int gk20a_fecs_trace_disable(struct gk20a *g); 40int gk20a_fecs_trace_disable(struct gk20a *g);
41bool gk20a_fecs_trace_is_enabled(struct gk20a *g); 41bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
42size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
42 43
43#endif /* __FECS_TRACE_GK20A_H */ 44#endif /* __FECS_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index a082cd92..7c51afca 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -41,6 +41,7 @@
41 41
42#include "gk20a.h" 42#include "gk20a.h"
43#include "gr_gk20a.h" 43#include "gr_gk20a.h"
44#include "gk20a/fecs_trace_gk20a.h"
44#include "gr_ctx_gk20a.h" 45#include "gr_ctx_gk20a.h"
45#include "gr_pri_gk20a.h" 46#include "gr_pri_gk20a.h"
46#include "regops_gk20a.h" 47#include "regops_gk20a.h"
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
2499 return ret; 2500 return ret;
2500 } 2501 }
2501 g->gr.ctx_vars.priv_access_map_size = 512 * 1024; 2502 g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
2503#ifdef CONFIG_GK20A_CTXSW_TRACE
2504 g->gr.ctx_vars.fecs_trace_buffer_size =
2505 gk20a_fecs_trace_buffer_size(g);
2506#endif
2502 } 2507 }
2503 2508
2504 nvgpu_log_fn(g, "done"); 2509 nvgpu_log_fn(g, "done");
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2630 if (err) 2635 if (err)
2631 goto clean_up; 2636 goto clean_up;
2632 2637
2638#ifdef CONFIG_GK20A_CTXSW_TRACE
2639 nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
2640 gr->ctx_vars.fecs_trace_buffer_size);
2641
2642 err = nvgpu_dma_alloc_sys(g,
2643 gr->ctx_vars.fecs_trace_buffer_size,
2644 &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
2645 if (err)
2646 goto clean_up;
2647
2648 gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
2649 gk20a_gr_destroy_ctx_buffer;
2650#endif
2651
2633 nvgpu_log_fn(g, "done"); 2652 nvgpu_log_fn(g, "done");
2634 return 0; 2653 return 0;
2635 2654
@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2769 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; 2788 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2770 2789
2771 tsg->gr_ctx.global_ctx_buffer_mapped = true; 2790 tsg->gr_ctx.global_ctx_buffer_mapped = true;
2791
2792#ifdef CONFIG_GK20A_CTXSW_TRACE
2793 /* FECS trace buffer */
2794 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
2795 mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
2796 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2797 gk20a_mem_flag_none, true, mem->aperture);
2798 if (!gpu_va)
2799 goto clean_up;
2800 g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
2801 g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
2802 g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
2803 }
2804#endif
2805
2772 return 0; 2806 return 0;
2773 2807
2774clean_up: 2808clean_up:
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3050 "fail to commit gr ctx buffer"); 3084 "fail to commit gr ctx buffer");
3051 goto out; 3085 goto out;
3052 } 3086 }
3087#ifdef CONFIG_GK20A_CTXSW_TRACE
3088 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3089 err = g->ops.fecs_trace.bind_channel(g, c);
3090 if (err)
3091 nvgpu_warn(g,
3092 "fail to bind channel for ctxsw trace");
3093 }
3094#endif
3053 } 3095 }
3054 3096
3055 nvgpu_log_fn(g, "done"); 3097 nvgpu_log_fn(g, "done");
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 01c7f43d..66d3c22e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ {
79 ATTRIBUTE_VPR = 5, 79 ATTRIBUTE_VPR = 5,
80 GOLDEN_CTX = 6, 80 GOLDEN_CTX = 6,
81 PRIV_ACCESS_MAP = 7, 81 PRIV_ACCESS_MAP = 7,
82 FECS_TRACE_BUFFER = 8,
82 NR_GLOBAL_CTX_BUF = 9 83 NR_GLOBAL_CTX_BUF = 9
83}; 84};
84 85
@@ -89,6 +90,7 @@ enum /*global_ctx_buffer_va */ {
89 ATTRIBUTE_VA = 2, 90 ATTRIBUTE_VA = 2,
90 GOLDEN_CTX_VA = 3, 91 GOLDEN_CTX_VA = 3,
91 PRIV_ACCESS_MAP_VA = 4, 92 PRIV_ACCESS_MAP_VA = 4,
93 FECS_TRACE_BUFFER_VA = 5,
92 NR_GLOBAL_CTX_BUF_VA = 6 94 NR_GLOBAL_CTX_BUF_VA = 6
93}; 95};
94 96
@@ -290,6 +292,8 @@ struct gr_gk20a {
290 292
291 u32 priv_access_map_size; 293 u32 priv_access_map_size;
292 294
295 u32 fecs_trace_buffer_size;
296
293 struct gr_ucode_gk20a ucode; 297 struct gr_ucode_gk20a ucode;
294 298
295 struct av_list_gk20a sw_bundle_init; 299 struct av_list_gk20a sw_bundle_init;
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 61c6cb0f..632f1063 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g)
834 __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true); 834 __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
835 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); 835 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
836 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); 836 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
837 __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
837 838
838 /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ 839 /* Read fuses to check if gpu needs to boot in secure/non-secure mode */
839 if (gops->fuse.check_priv_security(g)) 840 if (gops->fuse.check_priv_security(g))
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 62164d16..80e07b78 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g)
732 732
733 __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true); 733 __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
734 __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); 734 __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
735 __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
735 736
736 /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ 737 /* Read fuses to check if gpu needs to boot in secure/non-secure mode */
737 if (gops->fuse.check_priv_security(g)) 738 if (gops->fuse.check_priv_security(g))
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f9ac1f2a..9d7dca95 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = {
586 }, 586 },
587#ifdef CONFIG_GK20A_CTXSW_TRACE 587#ifdef CONFIG_GK20A_CTXSW_TRACE
588 .fecs_trace = { 588 .fecs_trace = {
589 .alloc_user_buffer = NULL, 589 .alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc,
590 .free_user_buffer = NULL, 590 .free_user_buffer = gk20a_ctxsw_dev_ring_free,
591 .mmap_user_buffer = NULL, 591 .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
592 .init = NULL, 592 .init = gk20a_fecs_trace_init,
593 .deinit = NULL, 593 .deinit = gk20a_fecs_trace_deinit,
594 .enable = NULL, 594 .enable = gk20a_fecs_trace_enable,
595 .disable = NULL, 595 .disable = gk20a_fecs_trace_disable,
596 .is_enabled = NULL, 596 .is_enabled = gk20a_fecs_trace_is_enabled,
597 .reset = NULL, 597 .reset = gk20a_fecs_trace_reset,
598 .flush = NULL, 598 .flush = NULL,
599 .poll = NULL, 599 .poll = gk20a_fecs_trace_poll,
600 .bind_channel = NULL, 600 .bind_channel = gk20a_fecs_trace_bind_channel,
601 .unbind_channel = NULL, 601 .unbind_channel = gk20a_fecs_trace_unbind_channel,
602 .max_entries = NULL, 602 .max_entries = gk20a_gr_max_entries,
603 }, 603 },
604#endif /* CONFIG_GK20A_CTXSW_TRACE */ 604#endif /* CONFIG_GK20A_CTXSW_TRACE */
605 .mm = { 605 .mm = {
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g)
843 } 843 }
844 844
845 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); 845 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
846 __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
846 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; 847 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
847 848
848 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); 849 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 0ffb0488..c352488c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -34,6 +34,7 @@ struct gk20a;
34#define NVGPU_IS_FMODEL 1 34#define NVGPU_IS_FMODEL 1
35#define NVGPU_DRIVER_IS_DYING 2 35#define NVGPU_DRIVER_IS_DYING 2
36#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 36#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
37#define NVGPU_FECS_TRACE_VA 4
37 38
38/* 39/*
39 * ECC flags 40 * ECC flags