gpu: nvgpu: add support for FECS VA

- On t186, ucode expects physical address to be programmed for FECS trace buffer. - On t194, ucode expects GPU VA to be programmed for FECS trace buffer. This patch adds extra support to handle this change for linux native. - Increase the size of FECS trace buffer (as few entries were getting dropped due to overflow of FECS trace buffer.) - This moves FECS trace buffer handling in global context buffer. - This adds extra check for updation of mailbox1 register. (Bug 200417403) EVLR-2077 Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1536028 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel <nipatel@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Vaibhav Kachore <vkachore@nvidia.com> 2018-02-22 06:15:30 -0500
committer: Tejal Kudav <tkudav@nvidia.com> 2018-06-14 09:44:08 -0400
commit: ca3215c6b23c7d855ced899d8090aaa8ce9a9fa3 (patch)
tree: 710114451d4838f82a9e9998db52b81cf76d68c9
parent: 97d697a8481ca0c348102f04165903e3205302ed (diff)
8 files changed, 119 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index c9d7ea06..117920da 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -28,6 +28,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/enabled.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/hashtable.h>
 #include <nvgpu/circ_buf.h>
@@ -51,7 +52,7 @@
 * If HW circular buffer is getting too many "buffer full" conditions,
 * increasing this constant should help (it drives Linux' internal buffer size).
 */
-#define GK20A_FECS_TRACE_NUM_RECORDS            (1 << 6)
+#define GK20A_FECS_TRACE_NUM_RECORDS            (1 << 10)
 #define GK20A_FECS_TRACE_HASH_BITS              8 /* 2^8 */
 #define GK20A_FECS_TRACE_FRAME_PERIOD_US        (1000000ULL/60ULL)
 #define GK20A_FECS_TRACE_PTIMER_SHIFT           5
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {
 struct gk20a_fecs_trace {
-        struct nvgpu_mem trace_buf;
        DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
        struct nvgpu_mutex hash_lock;
        struct nvgpu_mutex poll_lock;
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
 }
 static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
-        struct gk20a_fecs_trace *trace, int idx)
+        struct gk20a *g, int idx)
 {
+        struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
        return (struct gk20a_fecs_trace_record *)
-                ((u8 *) trace->trace_buf.cpu_va
+                ((u8 *) mem->cpu_va
                + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
 }
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
        struct gk20a_fecs_trace *trace = g->fecs_trace;
        pid_t cur_pid;
        pid_t new_pid;
+        int count = 0;
        /* for now, only one VM */
        const int vmid = 0;
-        struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
+        struct gk20a_fecs_trace_record *r =
-                trace, index);
+                gk20a_fecs_trace_get_record(g, index);
        nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
                "consuming record trace=%p read=%d record=%p", trace, index, r);
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
                        continue;
                gk20a_ctxsw_trace_write(g, &entry);
+                count++;
        }
        gk20a_ctxsw_trace_wake_up(g, vmid);
-        return 0;
+        return count;
 }
 int gk20a_fecs_trace_poll(struct gk20a *g)
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
        g->ops.mm.fb_flush(g);
        while (read != write) {
-                /* Ignore error code, as we want to consume all records */
+                cnt = gk20a_fecs_trace_ring_read(g, read);
-                (void)gk20a_fecs_trace_ring_read(g, read);
+                if (cnt <= 0)
+                        break;
                /* Get to next record. */
                read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
        }
        /* ensure FECS records has been updated before incrementing read index */
-        nvgpu_smp_wmb();
+        nvgpu_wmb();
        gk20a_fecs_trace_set_read_index(g, read);
 done:
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
        return 0;
 }
-static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
+size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
 {
-        struct gk20a_fecs_trace *trace = g->fecs_trace;
+        return GK20A_FECS_TRACE_NUM_RECORDS
+                        * ctxsw_prog_record_timestamp_record_size_in_bytes_v();
-        return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
-                        * ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
-                        &trace->trace_buf);
-}
-static void gk20a_fecs_trace_free_ring(struct gk20a *g)
-{
-        struct gk20a_fecs_trace *trace = g->fecs_trace;
-        nvgpu_dma_free(g, &trace->trace_buf);
 }
 #ifdef CONFIG_DEBUG_FS
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
 {
        loff_t *pos = (loff_t *) v;
        struct gk20a *g = *(struct gk20a **)s->private;
-        struct gk20a_fecs_trace *trace = g->fecs_trace;
+        struct gk20a_fecs_trace_record *r =
-        struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
+                gk20a_fecs_trace_get_record(g, *pos);
        int i;
        const u32 invalid_tag =
            ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
                goto clean_poll_lock;
        BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
-        err = gk20a_fecs_trace_alloc_ring(g);
-        if (err) {
-                nvgpu_warn(g, "failed to allocate FECS ring");
-                goto clean_hash_lock;
-        }
        hash_init(trace->pid_hash_table);
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
        return 0;
-clean_hash_lock:
-        nvgpu_mutex_destroy(&trace->hash_lock);
 clean_poll_lock:
        nvgpu_mutex_destroy(&trace->poll_lock);
 clean:
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
        u32 lo;
        u32 hi;
-        u64 pa;
+        u64 addr;
        struct tsg_gk20a *tsg;
        struct nvgpu_gr_ctx *ch_ctx;
        struct gk20a_fecs_trace *trace = g->fecs_trace;
        struct nvgpu_mem *mem;
        u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
        pid_t pid;
-        u32 aperture;
+        u32 aperture_mask;
        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
                        "chid=%d context_ptr=%x inst_block=%llx",
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
        if (!trace)
                return -ENOMEM;
-        pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
+        mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
-        if (!pa)
-                return -ENOMEM;
+        if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
-        aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
+                addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
+                nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
+                aperture_mask = 0;
+        } else {
+                addr = nvgpu_inst_block_addr(g, mem);
+                nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
+                aperture_mask = nvgpu_aperture_mask(g, mem,
                        ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
                        ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
                        ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
+        }
+        if (!addr)
+                return -ENOMEM;
+        lo = u64_lo32(addr);
+        hi = u64_hi32(addr);
+        mem = &ch_ctx->mem;
        if (nvgpu_mem_begin(g, mem))
                return -ENOMEM;
-        lo = u64_lo32(pa);
-        hi = u64_hi32(pa);
        nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
                lo, GK20A_FECS_TRACE_NUM_RECORDS);
        nvgpu_mem_wr(g, mem,
+                ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
+                ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
+                        GK20A_FECS_TRACE_NUM_RECORDS));
+        nvgpu_mem_end(g, mem);
+        if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
+                mem = &ch->ctx_header.mem;
+        if (nvgpu_mem_begin(g, mem))
+                return -ENOMEM;
+        nvgpu_mem_wr(g, mem,
                ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
                lo);
        nvgpu_mem_wr(g, mem,
                ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
                ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
-                aperture);
+                aperture_mask);
-        nvgpu_mem_wr(g, mem,
-                ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
-                ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
-                        GK20A_FECS_TRACE_NUM_RECORDS));
        nvgpu_mem_end(g, mem);
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
                return 0;
        nvgpu_thread_stop(&trace->poll_task);
-        gk20a_fecs_trace_free_ring(g);
        gk20a_fecs_trace_free_hash_table(g);
        nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
index e310a18a..acac14c6 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g,
 int gk20a_fecs_trace_enable(struct gk20a *g);
 int gk20a_fecs_trace_disable(struct gk20a *g);
 bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
+size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
 #endif /* __FECS_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index a082cd92..7c51afca 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -41,6 +41,7 @@
 #include "gk20a.h"
 #include "gr_gk20a.h"
+#include "gk20a/fecs_trace_gk20a.h"
 #include "gr_ctx_gk20a.h"
 #include "gr_pri_gk20a.h"
 #include "regops_gk20a.h"
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
                        return ret;
                }
                g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+                g->gr.ctx_vars.fecs_trace_buffer_size =
+                        gk20a_fecs_trace_buffer_size(g);
+#endif
        }
        nvgpu_log_fn(g, "done");
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
        if (err)
                goto clean_up;
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
+                   gr->ctx_vars.fecs_trace_buffer_size);
+        err = nvgpu_dma_alloc_sys(g,
+                        gr->ctx_vars.fecs_trace_buffer_size,
+                        &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
+        if (err)
+                goto clean_up;
+        gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
+                         gk20a_gr_destroy_ctx_buffer;
+#endif
        nvgpu_log_fn(g, "done");
        return 0;
@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
        g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
        tsg->gr_ctx.global_ctx_buffer_mapped = true;
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        /* FECS trace buffer */
+        if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
+                mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
+                gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
+                                gk20a_mem_flag_none, true, mem->aperture);
+                if (!gpu_va)
+                        goto clean_up;
+                g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
+                g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
+                g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
+        }
+#endif
        return 0;
 clean_up:
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
                                "fail to commit gr ctx buffer");
                        goto out;
                }
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+                if (g->ops.fecs_trace.bind_channel && !c->vpr) {
+                        err = g->ops.fecs_trace.bind_channel(g, c);
+                        if (err)
+                                nvgpu_warn(g,
+                                        "fail to bind channel for ctxsw trace");
+                }
+#endif
        }
        nvgpu_log_fn(g, "done");
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 01c7f43d..66d3c22e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ {
        ATTRIBUTE_VPR           = 5,
        GOLDEN_CTX              = 6,
        PRIV_ACCESS_MAP         = 7,
+        FECS_TRACE_BUFFER       = 8,
        NR_GLOBAL_CTX_BUF       = 9
 };
@@ -89,6 +90,7 @@ enum  /*global_ctx_buffer_va */ {
        ATTRIBUTE_VA            = 2,
        GOLDEN_CTX_VA           = 3,
        PRIV_ACCESS_MAP_VA      = 4,
+        FECS_TRACE_BUFFER_VA    = 5,
        NR_GLOBAL_CTX_BUF_VA    = 6
 };
@@ -290,6 +292,8 @@ struct gr_gk20a {
                u32 priv_access_map_size;
+                u32 fecs_trace_buffer_size;
                struct gr_ucode_gk20a ucode;
                struct av_list_gk20a  sw_bundle_init;
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 61c6cb0f..632f1063 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g)
        __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
+        __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
        /* Read fuses to check if gpu needs to boot in secure/non-secure mode */
        if (gops->fuse.check_priv_security(g))
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 62164d16..80e07b78 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g)
        __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
        __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
+        __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
        /* Read fuses to check if gpu needs to boot in secure/non-secure mode */
        if (gops->fuse.check_priv_security(g))
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f9ac1f2a..9d7dca95 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = {
        },
 #ifdef CONFIG_GK20A_CTXSW_TRACE
        .fecs_trace = {
-                .alloc_user_buffer = NULL,
+                .alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc,
-                .free_user_buffer = NULL,
+                .free_user_buffer = gk20a_ctxsw_dev_ring_free,
-                .mmap_user_buffer = NULL,
+                .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
-                .init = NULL,
+                .init = gk20a_fecs_trace_init,
-                .deinit = NULL,
+                .deinit = gk20a_fecs_trace_deinit,
-                .enable = NULL,
+                .enable = gk20a_fecs_trace_enable,
-                .disable = NULL,
+                .disable = gk20a_fecs_trace_disable,
-                .is_enabled = NULL,
+                .is_enabled = gk20a_fecs_trace_is_enabled,
-                .reset = NULL,
+                .reset = gk20a_fecs_trace_reset,
                .flush = NULL,
-                .poll = NULL,
+                .poll = gk20a_fecs_trace_poll,
-                .bind_channel = NULL,
+                .bind_channel = gk20a_fecs_trace_bind_channel,
-                .unbind_channel = NULL,
+                .unbind_channel = gk20a_fecs_trace_unbind_channel,
-                .max_entries = NULL,
+                .max_entries = gk20a_gr_max_entries,
        },
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
        .mm = {
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g)
        }
        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+        __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
        g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 0ffb0488..c352488c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -34,6 +34,7 @@ struct gk20a;
 #define NVGPU_IS_FMODEL                         1
 #define NVGPU_DRIVER_IS_DYING                   2
 #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
+#define NVGPU_FECS_TRACE_VA                     4
 /*
 * ECC flags
author	Vaibhav Kachore <vkachore@nvidia.com>	2018-02-22 06:15:30 -0500
committer	Tejal Kudav <tkudav@nvidia.com>	2018-06-14 09:44:08 -0400
commit	ca3215c6b23c7d855ced899d8090aaa8ce9a9fa3 (patch)
tree	710114451d4838f82a9e9998db52b81cf76d68c9
parent	97d697a8481ca0c348102f04165903e3205302ed (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index c9d7ea06..117920da 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -28,6 +28,7 @@
28		28
29	#include <nvgpu/kmem.h>	29	#include <nvgpu/kmem.h>
30	#include <nvgpu/dma.h>	30	#include <nvgpu/dma.h>
		31	#include <nvgpu/enabled.h>
31	#include <nvgpu/bug.h>	32	#include <nvgpu/bug.h>
32	#include <nvgpu/hashtable.h>	33	#include <nvgpu/hashtable.h>
33	#include <nvgpu/circ_buf.h>	34	#include <nvgpu/circ_buf.h>
@@ -51,7 +52,7 @@
51	* If HW circular buffer is getting too many "buffer full" conditions,	52	* If HW circular buffer is getting too many "buffer full" conditions,
52	* increasing this constant should help (it drives Linux' internal buffer size).	53	* increasing this constant should help (it drives Linux' internal buffer size).
53	*/	54	*/
54	#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)	55	#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10)
55	#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */	56	#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
56	#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)	57	#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
57	#define GK20A_FECS_TRACE_PTIMER_SHIFT 5	58	#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {
74		75
75	struct gk20a_fecs_trace {	76	struct gk20a_fecs_trace {
76		77
77	struct nvgpu_mem trace_buf;
78	DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);	78	DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
79	struct nvgpu_mutex hash_lock;	79	struct nvgpu_mutex hash_lock;
80	struct nvgpu_mutex poll_lock;	80	struct nvgpu_mutex poll_lock;
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
106	}	106	}
107		107
108	static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(	108	static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
109	struct gk20a_fecs_trace *trace, int idx)	109	struct gk20a *g, int idx)
110	{	110	{
		111	struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
		112
111	return (struct gk20a_fecs_trace_record *)	113	return (struct gk20a_fecs_trace_record *)
112	((u8 *) trace->trace_buf.cpu_va	114	((u8 *) mem->cpu_va
113	+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));	115	+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
114	}	116	}
115		117
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
258	struct gk20a_fecs_trace *trace = g->fecs_trace;	260	struct gk20a_fecs_trace *trace = g->fecs_trace;
259	pid_t cur_pid;	261	pid_t cur_pid;
260	pid_t new_pid;	262	pid_t new_pid;
		263	int count = 0;
261		264
262	/* for now, only one VM */	265	/* for now, only one VM */
263	const int vmid = 0;	266	const int vmid = 0;
264		267
265	struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(	268	struct gk20a_fecs_trace_record *r =
266	trace, index);	269	gk20a_fecs_trace_get_record(g, index);
267		270
268	nvgpu_log(g, gpu_dbg_fn \| gpu_dbg_ctxsw,	271	nvgpu_log(g, gpu_dbg_fn \| gpu_dbg_ctxsw,
269	"consuming record trace=%p read=%d record=%p", trace, index, r);	272	"consuming record trace=%p read=%d record=%p", trace, index, r);
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
334	continue;	337	continue;
335		338
336	gk20a_ctxsw_trace_write(g, &entry);	339	gk20a_ctxsw_trace_write(g, &entry);
		340	count++;
337	}	341	}
338		342
339	gk20a_ctxsw_trace_wake_up(g, vmid);	343	gk20a_ctxsw_trace_wake_up(g, vmid);
340	return 0;	344	return count;
341	}	345	}
342		346
343	int gk20a_fecs_trace_poll(struct gk20a *g)	347	int gk20a_fecs_trace_poll(struct gk20a *g)
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
376	g->ops.mm.fb_flush(g);	380	g->ops.mm.fb_flush(g);
377		381
378	while (read != write) {	382	while (read != write) {
379	/* Ignore error code, as we want to consume all records */	383	cnt = gk20a_fecs_trace_ring_read(g, read);
380	(void)gk20a_fecs_trace_ring_read(g, read);	384	if (cnt <= 0)
		385	break;
381		386
382	/* Get to next record. */	387	/* Get to next record. */
383	read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);	388	read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
384	}	389	}
385		390
386	/* ensure FECS records has been updated before incrementing read index */	391	/* ensure FECS records has been updated before incrementing read index */
387	nvgpu_smp_wmb();	392	nvgpu_wmb();
388	gk20a_fecs_trace_set_read_index(g, read);	393	gk20a_fecs_trace_set_read_index(g, read);
389		394
390	done:	395	done:
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
411	return 0;	416	return 0;
412	}	417	}
413		418
414	static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)	419	size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
415	{	420	{
416	struct gk20a_fecs_trace *trace = g->fecs_trace;	421	return GK20A_FECS_TRACE_NUM_RECORDS
417		422	* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
418	return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
419	* ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
420	&trace->trace_buf);
421	}
422
423	static void gk20a_fecs_trace_free_ring(struct gk20a *g)
424	{
425	struct gk20a_fecs_trace *trace = g->fecs_trace;
426
427	nvgpu_dma_free(g, &trace->trace_buf);
428	}	423	}
429		424
430	#ifdef CONFIG_DEBUG_FS	425	#ifdef CONFIG_DEBUG_FS
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
460	{	455	{
461	loff_t pos = (loff_t ) v;	456	loff_t pos = (loff_t ) v;
462	struct gk20a g = (struct gk20a **)s->private;	457	struct gk20a g = (struct gk20a **)s->private;
463	struct gk20a_fecs_trace *trace = g->fecs_trace;	458	struct gk20a_fecs_trace_record *r =
464	struct gk20a_fecs_trace_record r = gk20a_fecs_trace_get_record(trace, pos);	459	gk20a_fecs_trace_get_record(g, *pos);
465	int i;	460	int i;
466	const u32 invalid_tag =	461	const u32 invalid_tag =
467	ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();	462	ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
588	goto clean_poll_lock;	583	goto clean_poll_lock;
589		584
590	BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));	585	BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
591	err = gk20a_fecs_trace_alloc_ring(g);
592	if (err) {
593	nvgpu_warn(g, "failed to allocate FECS ring");
594	goto clean_hash_lock;
595	}
596
597	hash_init(trace->pid_hash_table);	586	hash_init(trace->pid_hash_table);
598		587
599	__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);	588	__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
604		593
605	return 0;	594	return 0;
606		595
607	clean_hash_lock:
608	nvgpu_mutex_destroy(&trace->hash_lock);
609	clean_poll_lock:	596	clean_poll_lock:
610	nvgpu_mutex_destroy(&trace->poll_lock);	597	nvgpu_mutex_destroy(&trace->poll_lock);
611	clean:	598	clean:
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
624		611
625	u32 lo;	612	u32 lo;
626	u32 hi;	613	u32 hi;
627	u64 pa;	614	u64 addr;
628	struct tsg_gk20a *tsg;	615	struct tsg_gk20a *tsg;
629	struct nvgpu_gr_ctx *ch_ctx;	616	struct nvgpu_gr_ctx *ch_ctx;
630	struct gk20a_fecs_trace *trace = g->fecs_trace;	617	struct gk20a_fecs_trace *trace = g->fecs_trace;
631	struct nvgpu_mem *mem;	618	struct nvgpu_mem *mem;
632	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);	619	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
633	pid_t pid;	620	pid_t pid;
634	u32 aperture;	621	u32 aperture_mask;
635		622
636	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw,	623	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw,
637	"chid=%d context_ptr=%x inst_block=%llx",	624	"chid=%d context_ptr=%x inst_block=%llx",
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
648	if (!trace)	635	if (!trace)
649	return -ENOMEM;	636	return -ENOMEM;
650		637
651	pa = nvgpu_inst_block_addr(g, &trace->trace_buf);	638	mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
652	if (!pa)	639
653	return -ENOMEM;	640	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
654	aperture = nvgpu_aperture_mask(g, &trace->trace_buf,	641	addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
		642	nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
		643	aperture_mask = 0;
		644	} else {
		645	addr = nvgpu_inst_block_addr(g, mem);
		646	nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
		647	aperture_mask = nvgpu_aperture_mask(g, mem,
655	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),	648	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),	649	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
657	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());	650	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
		651	}
		652	if (!addr)
		653	return -ENOMEM;
		654
		655	lo = u64_lo32(addr);
		656	hi = u64_hi32(addr);
		657
		658	mem = &ch_ctx->mem;
658		659
659	if (nvgpu_mem_begin(g, mem))	660	if (nvgpu_mem_begin(g, mem))
660	return -ENOMEM;	661	return -ENOMEM;
661		662
662	lo = u64_lo32(pa);
663	hi = u64_hi32(pa);
664
665	nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,	663	nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
666	lo, GK20A_FECS_TRACE_NUM_RECORDS);	664	lo, GK20A_FECS_TRACE_NUM_RECORDS);
667		665
668	nvgpu_mem_wr(g, mem,	666	nvgpu_mem_wr(g, mem,
		667	ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
		668	ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
		669	GK20A_FECS_TRACE_NUM_RECORDS));
		670
		671	nvgpu_mem_end(g, mem);
		672
		673	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
		674	mem = &ch->ctx_header.mem;
		675
		676	if (nvgpu_mem_begin(g, mem))
		677	return -ENOMEM;
		678
		679	nvgpu_mem_wr(g, mem,
669	ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),	680	ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
670	lo);	681	lo);
671	nvgpu_mem_wr(g, mem,	682	nvgpu_mem_wr(g, mem,
672	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),	683	ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
673	ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) \|	684	ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) \|
674	aperture);	685	aperture_mask);
675	nvgpu_mem_wr(g, mem,
676	ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
677	ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
678	GK20A_FECS_TRACE_NUM_RECORDS));
679		686
680	nvgpu_mem_end(g, mem);	687	nvgpu_mem_end(g, mem);
681		688
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
728	return 0;	735	return 0;
729		736
730	nvgpu_thread_stop(&trace->poll_task);	737	nvgpu_thread_stop(&trace->poll_task);
731	gk20a_fecs_trace_free_ring(g);
732	gk20a_fecs_trace_free_hash_table(g);	738	gk20a_fecs_trace_free_hash_table(g);
733		739
734	nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);	740	nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);


diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h index e310a18a..acac14c6 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
@@ -1,5 +1,5 @@
1	/*	1	/*
2	* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.	2	* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3	*	3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a	4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),	5	* copy of this software and associated documentation files (the "Software"),
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g,
39	int gk20a_fecs_trace_enable(struct gk20a *g);	39	int gk20a_fecs_trace_enable(struct gk20a *g);
40	int gk20a_fecs_trace_disable(struct gk20a *g);	40	int gk20a_fecs_trace_disable(struct gk20a *g);
41	bool gk20a_fecs_trace_is_enabled(struct gk20a *g);	41	bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
		42	size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
42		43
43	#endif /* __FECS_TRACE_GK20A_H */	44	#endif /* __FECS_TRACE_GK20A_H */


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a082cd92..7c51afca 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -41,6 +41,7 @@
41		41
42	#include "gk20a.h"	42	#include "gk20a.h"
43	#include "gr_gk20a.h"	43	#include "gr_gk20a.h"
		44	#include "gk20a/fecs_trace_gk20a.h"
44	#include "gr_ctx_gk20a.h"	45	#include "gr_ctx_gk20a.h"
45	#include "gr_pri_gk20a.h"	46	#include "gr_pri_gk20a.h"
46	#include "regops_gk20a.h"	47	#include "regops_gk20a.h"
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
2499	return ret;	2500	return ret;
2500	}	2501	}
2501	g->gr.ctx_vars.priv_access_map_size = 512 * 1024;	2502	g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
		2503	#ifdef CONFIG_GK20A_CTXSW_TRACE
		2504	g->gr.ctx_vars.fecs_trace_buffer_size =
		2505	gk20a_fecs_trace_buffer_size(g);
		2506	#endif
2502	}	2507	}
2503		2508
2504	nvgpu_log_fn(g, "done");	2509	nvgpu_log_fn(g, "done");
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2630	if (err)	2635	if (err)
2631	goto clean_up;	2636	goto clean_up;
2632		2637
		2638	#ifdef CONFIG_GK20A_CTXSW_TRACE
		2639	nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
		2640	gr->ctx_vars.fecs_trace_buffer_size);
		2641
		2642	err = nvgpu_dma_alloc_sys(g,
		2643	gr->ctx_vars.fecs_trace_buffer_size,
		2644	&gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
		2645	if (err)
		2646	goto clean_up;
		2647
		2648	gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
		2649	gk20a_gr_destroy_ctx_buffer;
		2650	#endif
		2651
2633	nvgpu_log_fn(g, "done");	2652	nvgpu_log_fn(g, "done");
2634	return 0;	2653	return 0;
2635		2654
@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2769	g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;	2788	g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2770		2789
2771	tsg->gr_ctx.global_ctx_buffer_mapped = true;	2790	tsg->gr_ctx.global_ctx_buffer_mapped = true;
		2791
		2792	#ifdef CONFIG_GK20A_CTXSW_TRACE
		2793	/* FECS trace buffer */
		2794	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
		2795	mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
		2796	gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
		2797	gk20a_mem_flag_none, true, mem->aperture);
		2798	if (!gpu_va)
		2799	goto clean_up;
		2800	g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
		2801	g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
		2802	g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
		2803	}
		2804	#endif
		2805
2772	return 0;	2806	return 0;
2773		2807
2774	clean_up:	2808	clean_up:
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3050	"fail to commit gr ctx buffer");	3084	"fail to commit gr ctx buffer");
3051	goto out;	3085	goto out;
3052	}	3086	}
		3087	#ifdef CONFIG_GK20A_CTXSW_TRACE
		3088	if (g->ops.fecs_trace.bind_channel && !c->vpr) {
		3089	err = g->ops.fecs_trace.bind_channel(g, c);
		3090	if (err)
		3091	nvgpu_warn(g,
		3092	"fail to bind channel for ctxsw trace");
		3093	}
		3094	#endif
3053	}	3095	}
3054		3096
3055	nvgpu_log_fn(g, "done");	3097	nvgpu_log_fn(g, "done");


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 01c7f43d..66d3c22e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ {
79	ATTRIBUTE_VPR = 5,	79	ATTRIBUTE_VPR = 5,
80	GOLDEN_CTX = 6,	80	GOLDEN_CTX = 6,
81	PRIV_ACCESS_MAP = 7,	81	PRIV_ACCESS_MAP = 7,
		82	FECS_TRACE_BUFFER = 8,
82	NR_GLOBAL_CTX_BUF = 9	83	NR_GLOBAL_CTX_BUF = 9
83	};	84	};
84		85
@@ -89,6 +90,7 @@ enum /global_ctx_buffer_va / {
89	ATTRIBUTE_VA = 2,	90	ATTRIBUTE_VA = 2,
90	GOLDEN_CTX_VA = 3,	91	GOLDEN_CTX_VA = 3,
91	PRIV_ACCESS_MAP_VA = 4,	92	PRIV_ACCESS_MAP_VA = 4,
		93	FECS_TRACE_BUFFER_VA = 5,
92	NR_GLOBAL_CTX_BUF_VA = 6	94	NR_GLOBAL_CTX_BUF_VA = 6
93	};	95	};
94		96
@@ -290,6 +292,8 @@ struct gr_gk20a {
290		292
291	u32 priv_access_map_size;	293	u32 priv_access_map_size;
292		294
		295	u32 fecs_trace_buffer_size;
		296
293	struct gr_ucode_gk20a ucode;	297	struct gr_ucode_gk20a ucode;
294		298
295	struct av_list_gk20a sw_bundle_init;	299	struct av_list_gk20a sw_bundle_init;


diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 61c6cb0f..632f1063 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g)
834	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);	834	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
835	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);	835	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
836	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);	836	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
		837	__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
837		838
838	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */	839	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
839	if (gops->fuse.check_priv_security(g))	840	if (gops->fuse.check_priv_security(g))


diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 62164d16..80e07b78 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g)
732		732
733	__nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);	733	__nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
734	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);	734	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
		735	__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
735		736
736	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */	737	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
737	if (gops->fuse.check_priv_security(g))	738	if (gops->fuse.check_priv_security(g))


diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f9ac1f2a..9d7dca95 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = {
586	},	586	},
587	#ifdef CONFIG_GK20A_CTXSW_TRACE	587	#ifdef CONFIG_GK20A_CTXSW_TRACE
588	.fecs_trace = {	588	.fecs_trace = {
589	.alloc_user_buffer = NULL,	589	.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc,
590	.free_user_buffer = NULL,	590	.free_user_buffer = gk20a_ctxsw_dev_ring_free,
591	.mmap_user_buffer = NULL,	591	.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
592	.init = NULL,	592	.init = gk20a_fecs_trace_init,
593	.deinit = NULL,	593	.deinit = gk20a_fecs_trace_deinit,
594	.enable = NULL,	594	.enable = gk20a_fecs_trace_enable,
595	.disable = NULL,	595	.disable = gk20a_fecs_trace_disable,
596	.is_enabled = NULL,	596	.is_enabled = gk20a_fecs_trace_is_enabled,
597	.reset = NULL,	597	.reset = gk20a_fecs_trace_reset,
598	.flush = NULL,	598	.flush = NULL,
599	.poll = NULL,	599	.poll = gk20a_fecs_trace_poll,
600	.bind_channel = NULL,	600	.bind_channel = gk20a_fecs_trace_bind_channel,
601	.unbind_channel = NULL,	601	.unbind_channel = gk20a_fecs_trace_unbind_channel,
602	.max_entries = NULL,	602	.max_entries = gk20a_gr_max_entries,
603	},	603	},
604	#endif /* CONFIG_GK20A_CTXSW_TRACE */	604	#endif /* CONFIG_GK20A_CTXSW_TRACE */
605	.mm = {	605	.mm = {
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g)
843	}	843	}
844		844
845	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);	845	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
		846	__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
846	g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;	847	g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
847		848
848	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);	849	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);


diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 0ffb0488..c352488c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -34,6 +34,7 @@ struct gk20a;
34	#define NVGPU_IS_FMODEL 1	34	#define NVGPU_IS_FMODEL 1
35	#define NVGPU_DRIVER_IS_DYING 2	35	#define NVGPU_DRIVER_IS_DYING 2
36	#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3	36	#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
		37	#define NVGPU_FECS_TRACE_VA 4
37		38
38	/*	39	/*
39	* ECC flags	40	* ECC flags