From 01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Wed, 28 Jun 2023 18:24:25 -0400 Subject: Include nvgpu headers These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future. --- include/gk20a/dbg_gpu_gk20a.c | 388 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 include/gk20a/dbg_gpu_gk20a.c (limited to 'include/gk20a/dbg_gpu_gk20a.c') diff --git a/include/gk20a/dbg_gpu_gk20a.c b/include/gk20a/dbg_gpu_gk20a.c new file mode 100644 index 0000000..1686d01 --- /dev/null +++ b/include/gk20a/dbg_gpu_gk20a.c @@ -0,0 +1,388 @@ +/* + * Tegra GK20A GPU Debugger/Profiler Driver + * + * Copyright (c) 2013-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a.h" +#include "gr_gk20a.h" +#include "dbg_gpu_gk20a.h" +#include "regops_gk20a.h" + +#include +#include + +static void gk20a_perfbuf_reset_streaming(struct gk20a *g) +{ + u32 engine_status; + u32 num_unread_bytes; + + g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON)); + + engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); + WARN_ON(0u == + (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f())); + + gk20a_writel(g, perf_pmasys_control_r(), + perf_pmasys_control_membuf_clear_status_doit_f()); + + num_unread_bytes = gk20a_readl(g, perf_pmasys_mem_bytes_r()); + if (num_unread_bytes != 0u) { + gk20a_writel(g, perf_pmasys_mem_bump_r(), num_unread_bytes); + } +} + +/* + * API to get first channel from the list of all channels + * bound to the debug session + */ +struct channel_gk20a * +nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s) +{ + struct dbg_session_channel_data *ch_data; + struct channel_gk20a *ch; + struct gk20a *g = dbg_s->g; + + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + if (nvgpu_list_empty(&dbg_s->ch_list)) { + nvgpu_mutex_release(&dbg_s->ch_list_lock); + return NULL; + } + + ch_data = nvgpu_list_first_entry(&dbg_s->ch_list, + dbg_session_channel_data, + ch_entry); + ch = g->fifo.channel + ch_data->chid; + + nvgpu_mutex_release(&dbg_s->ch_list_lock); + + return ch; +} + +void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch) +{ + struct dbg_session_data *session_data; + struct dbg_session_gk20a *dbg_s; + struct gk20a *g = ch->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + /* guard against the session list being modified */ + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, + dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + if (dbg_s->dbg_events.events_enabled) { + nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d", + dbg_s->id); + nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", + dbg_s->dbg_events.num_pending_events); + + dbg_s->dbg_events.num_pending_events++; + + nvgpu_dbg_session_post_event(dbg_s); + } + } + + nvgpu_mutex_release(&ch->dbg_s_lock); +} + +bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch) +{ + struct dbg_session_data *session_data; + struct dbg_session_gk20a *dbg_s; + bool broadcast = false; + struct gk20a *g = ch->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); + + /* guard against the session list being modified */ + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, + dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + if (dbg_s->broadcast_stop_trigger) { + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr, + "stop trigger broadcast enabled"); + broadcast = true; + break; + } + } + + nvgpu_mutex_release(&ch->dbg_s_lock); + + return broadcast; +} + +int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch) +{ + struct dbg_session_data *session_data; + struct dbg_session_gk20a *dbg_s; + struct gk20a *g = ch->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); + + /* guard against the session list being modified */ + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, + dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + if (dbg_s->broadcast_stop_trigger) { + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr, + "stop trigger broadcast disabled"); + dbg_s->broadcast_stop_trigger = false; + } + } + + nvgpu_mutex_release(&ch->dbg_s_lock); + + return 0; +} + +u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s, + bool mode) +{ + u32 err = 0U; + struct gk20a *g = dbg_s->g; + + if (dbg_s->is_pg_disabled != mode) { + if (mode == false) { + g->dbg_powergating_disabled_refcount--; + } + + /* + * Allow powergate disable or enable only if + * the global pg disabled refcount is zero + */ + if (g->dbg_powergating_disabled_refcount == 0) { + err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, + mode); + } + + if (mode) { + g->dbg_powergating_disabled_refcount++; + } + + dbg_s->is_pg_disabled = mode; + } + + return err; +} + +int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate) +{ + int err = 0; + struct gk20a *g = dbg_s->g; + + /* This function must be called with g->dbg_sessions_lock held */ + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s", + g->name, disable_powergate ? "disable" : "enable"); + + /* + * Powergate mode here refers to railgate+powergate+clockgate + * so in case slcg/blcg/elcg are disabled and railgating is enabled, + * disable railgating and then set is_pg_disabled = true + * Similarly re-enable railgating and not other features if they are not + * enabled when powermode=MODE_ENABLE + */ + if (disable_powergate) { + /* save off current powergate, clk state. + * set gpu module's can_powergate = 0. + * set gpu module's clk to max. + * while *a* debug session is active there will be no power or + * clocking state changes allowed from mainline code (but they + * should be saved). + */ + + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, + "module busy"); + err = gk20a_busy(g); + if (err) { + return err; + } + + err = nvgpu_cg_pg_disable(g); + + if (err == 0) { + dbg_s->is_pg_disabled = true; + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, + "pg disabled"); + } + } else { + /* restore (can) powergate, clk state */ + /* release pending exceptions to fault/be handled as usual */ + /*TBD: ordering of these? */ + + err = nvgpu_cg_pg_enable(g); + + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle"); + gk20a_idle(g); + + if (err == 0) { + dbg_s->is_pg_disabled = false; + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, + "pg enabled"); + } + } + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done", + g->name, disable_powergate ? "disable" : "enable"); + return err; +} + +bool nvgpu_check_and_set_global_reservation( + struct dbg_session_gk20a *dbg_s, + struct dbg_profiler_object_data *prof_obj) +{ + struct gk20a *g = dbg_s->g; + + if (g->profiler_reservation_count == 0) { + g->global_profiler_reservation_held = true; + g->profiler_reservation_count = 1; + dbg_s->has_profiler_reservation = true; + prof_obj->has_reservation = true; + return true; + } + return false; +} + +bool nvgpu_check_and_set_context_reservation( + struct dbg_session_gk20a *dbg_s, + struct dbg_profiler_object_data *prof_obj) +{ + struct gk20a *g = dbg_s->g; + + /* Assumes that we've already checked that no global reservation + * is in effect. + */ + g->profiler_reservation_count++; + dbg_s->has_profiler_reservation = true; + prof_obj->has_reservation = true; + return true; +} + +void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s, + struct dbg_profiler_object_data *prof_obj) +{ + struct gk20a *g = dbg_s->g; + + g->profiler_reservation_count--; + if (g->profiler_reservation_count < 0) { + nvgpu_err(g, "Negative reservation count!"); + } + dbg_s->has_profiler_reservation = false; + prof_obj->has_reservation = false; + if (prof_obj->ch == NULL) { + g->global_profiler_reservation_held = false; + } +} + +int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) +{ + struct mm_gk20a *mm = &g->mm; + u32 virt_addr_lo; + u32 virt_addr_hi; + u32 inst_pa_page; + int err; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block); + if (err) { + return err; + } + + g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); + + gk20a_perfbuf_reset_streaming(g); + + virt_addr_lo = u64_lo32(offset); + virt_addr_hi = u64_hi32(offset); + + /* address and size are aligned to 32 bytes, the lowest bits read back + * as zeros */ + gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); + gk20a_writel(g, perf_pmasys_outbaseupper_r(), + perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); + gk20a_writel(g, perf_pmasys_outsize_r(), size); + + /* this field is aligned to 4K */ + inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; + + /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK + * should be written last */ + gk20a_writel(g, perf_pmasys_mem_block_r(), + perf_pmasys_mem_block_base_f(inst_pa_page) | + nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, + perf_pmasys_mem_block_target_sys_ncoh_f(), + perf_pmasys_mem_block_target_sys_coh_f(), + perf_pmasys_mem_block_target_lfb_f()) | + perf_pmasys_mem_block_valid_true_f()); + + gk20a_idle(g); + return 0; +} + +/* must be called with dbg_sessions_lock held */ +int gk20a_perfbuf_disable_locked(struct gk20a *g) +{ + int err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + gk20a_perfbuf_reset_streaming(g); + + gk20a_writel(g, perf_pmasys_outbase_r(), 0); + gk20a_writel(g, perf_pmasys_outbaseupper_r(), + perf_pmasys_outbaseupper_ptr_f(0)); + gk20a_writel(g, perf_pmasys_outsize_r(), 0); + + gk20a_writel(g, perf_pmasys_mem_block_r(), + perf_pmasys_mem_block_base_f(0) | + perf_pmasys_mem_block_valid_false_f() | + perf_pmasys_mem_block_target_f(0)); + + gk20a_idle(g); + + return 0; +} -- cgit v1.2.2