aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/dbg_gpu_gk20a.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
commit01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch)
tree4ef34501728a087be24f4ba0af90f91486bf780b /include/gk20a/dbg_gpu_gk20a.c
parent306a03d18b305e4e573be3b2931978fa10679eb9 (diff)
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/gk20a/dbg_gpu_gk20a.c')
-rw-r--r--include/gk20a/dbg_gpu_gk20a.c388
1 files changed, 388 insertions, 0 deletions
diff --git a/include/gk20a/dbg_gpu_gk20a.c b/include/gk20a/dbg_gpu_gk20a.c
new file mode 100644
index 0000000..1686d01
--- /dev/null
+++ b/include/gk20a/dbg_gpu_gk20a.c
@@ -0,0 +1,388 @@
1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver
3 *
4 * Copyright (c) 2013-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/log.h>
27#include <nvgpu/vm.h>
28#include <nvgpu/atomic.h>
29#include <nvgpu/mm.h>
30#include <nvgpu/bug.h>
31#include <nvgpu/io.h>
32#include <nvgpu/utils.h>
33#include <nvgpu/channel.h>
34#include <nvgpu/unit.h>
35#include <nvgpu/power_features/power_features.h>
36
37#include "gk20a.h"
38#include "gr_gk20a.h"
39#include "dbg_gpu_gk20a.h"
40#include "regops_gk20a.h"
41
42#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
44
45static void gk20a_perfbuf_reset_streaming(struct gk20a *g)
46{
47 u32 engine_status;
48 u32 num_unread_bytes;
49
50 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
51
52 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
53 WARN_ON(0u ==
54 (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f()));
55
56 gk20a_writel(g, perf_pmasys_control_r(),
57 perf_pmasys_control_membuf_clear_status_doit_f());
58
59 num_unread_bytes = gk20a_readl(g, perf_pmasys_mem_bytes_r());
60 if (num_unread_bytes != 0u) {
61 gk20a_writel(g, perf_pmasys_mem_bump_r(), num_unread_bytes);
62 }
63}
64
65/*
66 * API to get first channel from the list of all channels
67 * bound to the debug session
68 */
69struct channel_gk20a *
70nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
71{
72 struct dbg_session_channel_data *ch_data;
73 struct channel_gk20a *ch;
74 struct gk20a *g = dbg_s->g;
75
76 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
77 if (nvgpu_list_empty(&dbg_s->ch_list)) {
78 nvgpu_mutex_release(&dbg_s->ch_list_lock);
79 return NULL;
80 }
81
82 ch_data = nvgpu_list_first_entry(&dbg_s->ch_list,
83 dbg_session_channel_data,
84 ch_entry);
85 ch = g->fifo.channel + ch_data->chid;
86
87 nvgpu_mutex_release(&dbg_s->ch_list_lock);
88
89 return ch;
90}
91
92void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
93{
94 struct dbg_session_data *session_data;
95 struct dbg_session_gk20a *dbg_s;
96 struct gk20a *g = ch->g;
97
98 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
99
100 /* guard against the session list being modified */
101 nvgpu_mutex_acquire(&ch->dbg_s_lock);
102
103 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
104 dbg_session_data, dbg_s_entry) {
105 dbg_s = session_data->dbg_s;
106 if (dbg_s->dbg_events.events_enabled) {
107 nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d",
108 dbg_s->id);
109 nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
110 dbg_s->dbg_events.num_pending_events);
111
112 dbg_s->dbg_events.num_pending_events++;
113
114 nvgpu_dbg_session_post_event(dbg_s);
115 }
116 }
117
118 nvgpu_mutex_release(&ch->dbg_s_lock);
119}
120
121bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
122{
123 struct dbg_session_data *session_data;
124 struct dbg_session_gk20a *dbg_s;
125 bool broadcast = false;
126 struct gk20a *g = ch->g;
127
128 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
129
130 /* guard against the session list being modified */
131 nvgpu_mutex_acquire(&ch->dbg_s_lock);
132
133 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
134 dbg_session_data, dbg_s_entry) {
135 dbg_s = session_data->dbg_s;
136 if (dbg_s->broadcast_stop_trigger) {
137 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
138 "stop trigger broadcast enabled");
139 broadcast = true;
140 break;
141 }
142 }
143
144 nvgpu_mutex_release(&ch->dbg_s_lock);
145
146 return broadcast;
147}
148
149int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
150{
151 struct dbg_session_data *session_data;
152 struct dbg_session_gk20a *dbg_s;
153 struct gk20a *g = ch->g;
154
155 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
156
157 /* guard against the session list being modified */
158 nvgpu_mutex_acquire(&ch->dbg_s_lock);
159
160 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
161 dbg_session_data, dbg_s_entry) {
162 dbg_s = session_data->dbg_s;
163 if (dbg_s->broadcast_stop_trigger) {
164 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
165 "stop trigger broadcast disabled");
166 dbg_s->broadcast_stop_trigger = false;
167 }
168 }
169
170 nvgpu_mutex_release(&ch->dbg_s_lock);
171
172 return 0;
173}
174
175u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
176 bool mode)
177{
178 u32 err = 0U;
179 struct gk20a *g = dbg_s->g;
180
181 if (dbg_s->is_pg_disabled != mode) {
182 if (mode == false) {
183 g->dbg_powergating_disabled_refcount--;
184 }
185
186 /*
187 * Allow powergate disable or enable only if
188 * the global pg disabled refcount is zero
189 */
190 if (g->dbg_powergating_disabled_refcount == 0) {
191 err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
192 mode);
193 }
194
195 if (mode) {
196 g->dbg_powergating_disabled_refcount++;
197 }
198
199 dbg_s->is_pg_disabled = mode;
200 }
201
202 return err;
203}
204
205int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate)
206{
207 int err = 0;
208 struct gk20a *g = dbg_s->g;
209
210 /* This function must be called with g->dbg_sessions_lock held */
211
212 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s",
213 g->name, disable_powergate ? "disable" : "enable");
214
215 /*
216 * Powergate mode here refers to railgate+powergate+clockgate
217 * so in case slcg/blcg/elcg are disabled and railgating is enabled,
218 * disable railgating and then set is_pg_disabled = true
219 * Similarly re-enable railgating and not other features if they are not
220 * enabled when powermode=MODE_ENABLE
221 */
222 if (disable_powergate) {
223 /* save off current powergate, clk state.
224 * set gpu module's can_powergate = 0.
225 * set gpu module's clk to max.
226 * while *a* debug session is active there will be no power or
227 * clocking state changes allowed from mainline code (but they
228 * should be saved).
229 */
230
231 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
232 "module busy");
233 err = gk20a_busy(g);
234 if (err) {
235 return err;
236 }
237
238 err = nvgpu_cg_pg_disable(g);
239
240 if (err == 0) {
241 dbg_s->is_pg_disabled = true;
242 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
243 "pg disabled");
244 }
245 } else {
246 /* restore (can) powergate, clk state */
247 /* release pending exceptions to fault/be handled as usual */
248 /*TBD: ordering of these? */
249
250 err = nvgpu_cg_pg_enable(g);
251
252 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
253 gk20a_idle(g);
254
255 if (err == 0) {
256 dbg_s->is_pg_disabled = false;
257 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
258 "pg enabled");
259 }
260 }
261
262 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done",
263 g->name, disable_powergate ? "disable" : "enable");
264 return err;
265}
266
267bool nvgpu_check_and_set_global_reservation(
268 struct dbg_session_gk20a *dbg_s,
269 struct dbg_profiler_object_data *prof_obj)
270{
271 struct gk20a *g = dbg_s->g;
272
273 if (g->profiler_reservation_count == 0) {
274 g->global_profiler_reservation_held = true;
275 g->profiler_reservation_count = 1;
276 dbg_s->has_profiler_reservation = true;
277 prof_obj->has_reservation = true;
278 return true;
279 }
280 return false;
281}
282
283bool nvgpu_check_and_set_context_reservation(
284 struct dbg_session_gk20a *dbg_s,
285 struct dbg_profiler_object_data *prof_obj)
286{
287 struct gk20a *g = dbg_s->g;
288
289 /* Assumes that we've already checked that no global reservation
290 * is in effect.
291 */
292 g->profiler_reservation_count++;
293 dbg_s->has_profiler_reservation = true;
294 prof_obj->has_reservation = true;
295 return true;
296}
297
298void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s,
299 struct dbg_profiler_object_data *prof_obj)
300{
301 struct gk20a *g = dbg_s->g;
302
303 g->profiler_reservation_count--;
304 if (g->profiler_reservation_count < 0) {
305 nvgpu_err(g, "Negative reservation count!");
306 }
307 dbg_s->has_profiler_reservation = false;
308 prof_obj->has_reservation = false;
309 if (prof_obj->ch == NULL) {
310 g->global_profiler_reservation_held = false;
311 }
312}
313
314int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
315{
316 struct mm_gk20a *mm = &g->mm;
317 u32 virt_addr_lo;
318 u32 virt_addr_hi;
319 u32 inst_pa_page;
320 int err;
321
322 err = gk20a_busy(g);
323 if (err) {
324 nvgpu_err(g, "failed to poweron");
325 return err;
326 }
327
328 err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
329 if (err) {
330 return err;
331 }
332
333 g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
334
335 gk20a_perfbuf_reset_streaming(g);
336
337 virt_addr_lo = u64_lo32(offset);
338 virt_addr_hi = u64_hi32(offset);
339
340 /* address and size are aligned to 32 bytes, the lowest bits read back
341 * as zeros */
342 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
343 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
344 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
345 gk20a_writel(g, perf_pmasys_outsize_r(), size);
346
347 /* this field is aligned to 4K */
348 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
349
350 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
351 * should be written last */
352 gk20a_writel(g, perf_pmasys_mem_block_r(),
353 perf_pmasys_mem_block_base_f(inst_pa_page) |
354 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
355 perf_pmasys_mem_block_target_sys_ncoh_f(),
356 perf_pmasys_mem_block_target_sys_coh_f(),
357 perf_pmasys_mem_block_target_lfb_f()) |
358 perf_pmasys_mem_block_valid_true_f());
359
360 gk20a_idle(g);
361 return 0;
362}
363
364/* must be called with dbg_sessions_lock held */
365int gk20a_perfbuf_disable_locked(struct gk20a *g)
366{
367 int err = gk20a_busy(g);
368 if (err) {
369 nvgpu_err(g, "failed to poweron");
370 return err;
371 }
372
373 gk20a_perfbuf_reset_streaming(g);
374
375 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
376 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
377 perf_pmasys_outbaseupper_ptr_f(0));
378 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
379
380 gk20a_writel(g, perf_pmasys_mem_block_r(),
381 perf_pmasys_mem_block_base_f(0) |
382 perf_pmasys_mem_block_valid_false_f() |
383 perf_pmasys_mem_block_target_f(0));
384
385 gk20a_idle(g);
386
387 return 0;
388}