From ca3215c6b23c7d855ced899d8090aaa8ce9a9fa3 Mon Sep 17 00:00:00 2001 From: Vaibhav Kachore Date: Thu, 22 Feb 2018 16:45:30 +0530 Subject: gpu: nvgpu: add support for FECS VA - On t186, ucode expects physical address to be programmed for FECS trace buffer. - On t194, ucode expects GPU VA to be programmed for FECS trace buffer. This patch adds extra support to handle this change for linux native. - Increase the size of FECS trace buffer (as few entries were getting dropped due to overflow of FECS trace buffer.) - This moves FECS trace buffer handling in global context buffer. - This adds extra check for updation of mailbox1 register. (Bug 200417403) EVLR-2077 Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723 Signed-off-by: Vaibhav Kachore Reviewed-on: https://git-master.nvidia.com/r/1536028 Reviewed-by: svc-mobile-coverity Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 102 +++++++++++++++-------------- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h | 3 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 42 ++++++++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 ++ drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 27 ++++---- drivers/gpu/nvgpu/include/nvgpu/enabled.h | 1 + 8 files changed, 119 insertions(+), 62 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index c9d7ea06..117920da 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -51,7 +52,7 @@ * If HW circular buffer is getting too many "buffer full" conditions, * increasing this constant should help (it drives Linux' internal buffer size). */ -#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) +#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10) #define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ #define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 @@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent { struct gk20a_fecs_trace { - struct nvgpu_mem trace_buf; DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); struct nvgpu_mutex hash_lock; struct nvgpu_mutex poll_lock; @@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void) } static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( - struct gk20a_fecs_trace *trace, int idx) + struct gk20a *g, int idx) { + struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; + return (struct gk20a_fecs_trace_record *) - ((u8 *) trace->trace_buf.cpu_va + ((u8 *) mem->cpu_va + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); } @@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) struct gk20a_fecs_trace *trace = g->fecs_trace; pid_t cur_pid; pid_t new_pid; + int count = 0; /* for now, only one VM */ const int vmid = 0; - struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( - trace, index); + struct gk20a_fecs_trace_record *r = + gk20a_fecs_trace_get_record(g, index); nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "consuming record trace=%p read=%d record=%p", trace, index, r); @@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) continue; gk20a_ctxsw_trace_write(g, &entry); + count++; } gk20a_ctxsw_trace_wake_up(g, vmid); - return 0; + return count; } int gk20a_fecs_trace_poll(struct gk20a *g) @@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g) g->ops.mm.fb_flush(g); while (read != write) { - /* Ignore error code, as we want to consume all records */ - (void)gk20a_fecs_trace_ring_read(g, read); + cnt = gk20a_fecs_trace_ring_read(g, read); + if (cnt <= 0) + break; /* Get to next record. */ read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); } /* ensure FECS records has been updated before incrementing read index */ - nvgpu_smp_wmb(); + nvgpu_wmb(); gk20a_fecs_trace_set_read_index(g, read); done: @@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg) return 0; } -static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) +size_t gk20a_fecs_trace_buffer_size(struct gk20a *g) { - struct gk20a_fecs_trace *trace = g->fecs_trace; - - return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS - * ctxsw_prog_record_timestamp_record_size_in_bytes_v(), - &trace->trace_buf); -} - -static void gk20a_fecs_trace_free_ring(struct gk20a *g) -{ - struct gk20a_fecs_trace *trace = g->fecs_trace; - - nvgpu_dma_free(g, &trace->trace_buf); + return GK20A_FECS_TRACE_NUM_RECORDS + * ctxsw_prog_record_timestamp_record_size_in_bytes_v(); } #ifdef CONFIG_DEBUG_FS @@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show( { loff_t *pos = (loff_t *) v; struct gk20a *g = *(struct gk20a **)s->private; - struct gk20a_fecs_trace *trace = g->fecs_trace; - struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); + struct gk20a_fecs_trace_record *r = + gk20a_fecs_trace_get_record(g, *pos); int i; const u32 invalid_tag = ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); @@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g) goto clean_poll_lock; BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); - err = gk20a_fecs_trace_alloc_ring(g); - if (err) { - nvgpu_warn(g, "failed to allocate FECS ring"); - goto clean_hash_lock; - } - hash_init(trace->pid_hash_table); __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); @@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g) return 0; -clean_hash_lock: - nvgpu_mutex_destroy(&trace->hash_lock); clean_poll_lock: nvgpu_mutex_destroy(&trace->poll_lock); clean: @@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, u32 lo; u32 hi; - u64 pa; + u64 addr; struct tsg_gk20a *tsg; struct nvgpu_gr_ctx *ch_ctx; struct gk20a_fecs_trace *trace = g->fecs_trace; struct nvgpu_mem *mem; u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); pid_t pid; - u32 aperture; + u32 aperture_mask; nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "chid=%d context_ptr=%x inst_block=%llx", @@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, if (!trace) return -ENOMEM; - pa = nvgpu_inst_block_addr(g, &trace->trace_buf); - if (!pa) - return -ENOMEM; - aperture = nvgpu_aperture_mask(g, &trace->trace_buf, + mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { + addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA]; + nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr); + aperture_mask = 0; + } else { + addr = nvgpu_inst_block_addr(g, mem); + nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr); + aperture_mask = nvgpu_aperture_mask(g, mem, ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); + } + if (!addr) + return -ENOMEM; + + lo = u64_lo32(addr); + hi = u64_hi32(addr); + + mem = &ch_ctx->mem; if (nvgpu_mem_begin(g, mem)) return -ENOMEM; - lo = u64_lo32(pa); - hi = u64_hi32(pa); - nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, lo, GK20A_FECS_TRACE_NUM_RECORDS); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), + ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( + GK20A_FECS_TRACE_NUM_RECORDS)); + + nvgpu_mem_end(g, mem); + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) + mem = &ch->ctx_header.mem; + + if (nvgpu_mem_begin(g, mem)) + return -ENOMEM; + nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), lo); nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | - aperture); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_control_o(), - ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( - GK20A_FECS_TRACE_NUM_RECORDS)); + aperture_mask); nvgpu_mem_end(g, mem); @@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g) return 0; nvgpu_thread_stop(&trace->poll_task); - gk20a_fecs_trace_free_ring(g); gk20a_fecs_trace_free_hash_table(g); nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h index e310a18a..acac14c6 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g, int gk20a_fecs_trace_enable(struct gk20a *g); int gk20a_fecs_trace_disable(struct gk20a *g); bool gk20a_fecs_trace_is_enabled(struct gk20a *g); +size_t gk20a_fecs_trace_buffer_size(struct gk20a *g); #endif /* __FECS_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a082cd92..7c51afca 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -41,6 +41,7 @@ #include "gk20a.h" #include "gr_gk20a.h" +#include "gk20a/fecs_trace_gk20a.h" #include "gr_ctx_gk20a.h" #include "gr_pri_gk20a.h" #include "regops_gk20a.h" @@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) return ret; } g->gr.ctx_vars.priv_access_map_size = 512 * 1024; +#ifdef CONFIG_GK20A_CTXSW_TRACE + g->gr.ctx_vars.fecs_trace_buffer_size = + gk20a_fecs_trace_buffer_size(g); +#endif } nvgpu_log_fn(g, "done"); @@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) if (err) goto clean_up; +#ifdef CONFIG_GK20A_CTXSW_TRACE + nvgpu_log_info(g, "fecs_trace_buffer_size : %d", + gr->ctx_vars.fecs_trace_buffer_size); + + err = nvgpu_dma_alloc_sys(g, + gr->ctx_vars.fecs_trace_buffer_size, + &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem); + if (err) + goto clean_up; + + gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy = + gk20a_gr_destroy_ctx_buffer; +#endif + nvgpu_log_fn(g, "done"); return 0; @@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; tsg->gr_ctx.global_ctx_buffer_mapped = true; + +#ifdef CONFIG_GK20A_CTXSW_TRACE + /* FECS trace buffer */ + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { + mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem; + gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, + gk20a_mem_flag_none, true, mem->aperture); + if (!gpu_va) + goto clean_up; + g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va; + g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size; + g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER; + } +#endif + return 0; clean_up: @@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) "fail to commit gr ctx buffer"); goto out; } +#ifdef CONFIG_GK20A_CTXSW_TRACE + if (g->ops.fecs_trace.bind_channel && !c->vpr) { + err = g->ops.fecs_trace.bind_channel(g, c); + if (err) + nvgpu_warn(g, + "fail to bind channel for ctxsw trace"); + } +#endif } nvgpu_log_fn(g, "done"); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 01c7f43d..66d3c22e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ { ATTRIBUTE_VPR = 5, GOLDEN_CTX = 6, PRIV_ACCESS_MAP = 7, + FECS_TRACE_BUFFER = 8, NR_GLOBAL_CTX_BUF = 9 }; @@ -89,6 +90,7 @@ enum /*global_ctx_buffer_va */ { ATTRIBUTE_VA = 2, GOLDEN_CTX_VA = 3, PRIV_ACCESS_MAP_VA = 4, + FECS_TRACE_BUFFER_VA = 5, NR_GLOBAL_CTX_BUF_VA = 6 }; @@ -290,6 +292,8 @@ struct gr_gk20a { u32 priv_access_map_size; + u32 fecs_trace_buffer_size; + struct gr_ucode_gk20a ucode; struct av_list_gk20a sw_bundle_init; diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 61c6cb0f..632f1063 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true); __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); + __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false); /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ if (gops->fuse.check_priv_security(g)) diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 62164d16..80e07b78 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true); __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); + __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false); /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ if (gops->fuse.check_priv_security(g)) diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f9ac1f2a..9d7dca95 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = { }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { - .alloc_user_buffer = NULL, - .free_user_buffer = NULL, - .mmap_user_buffer = NULL, - .init = NULL, - .deinit = NULL, - .enable = NULL, - .disable = NULL, - .is_enabled = NULL, - .reset = NULL, + .alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc, + .free_user_buffer = gk20a_ctxsw_dev_ring_free, + .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer, + .init = gk20a_fecs_trace_init, + .deinit = gk20a_fecs_trace_deinit, + .enable = gk20a_fecs_trace_enable, + .disable = gk20a_fecs_trace_disable, + .is_enabled = gk20a_fecs_trace_is_enabled, + .reset = gk20a_fecs_trace_reset, .flush = NULL, - .poll = NULL, - .bind_channel = NULL, - .unbind_channel = NULL, - .max_entries = NULL, + .poll = gk20a_fecs_trace_poll, + .bind_channel = gk20a_fecs_trace_bind_channel, + .unbind_channel = gk20a_fecs_trace_unbind_channel, + .max_entries = gk20a_gr_max_entries, }, #endif /* CONFIG_GK20A_CTXSW_TRACE */ .mm = { @@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g) } __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); + __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true); g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 0ffb0488..c352488c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -34,6 +34,7 @@ struct gk20a; #define NVGPU_IS_FMODEL 1 #define NVGPU_DRIVER_IS_DYING 2 #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 +#define NVGPU_FECS_TRACE_VA 4 /* * ECC flags -- cgit v1.2.2