diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-28 18:24:25 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-28 18:24:25 -0400 |
commit | 01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch) | |
tree | 4ef34501728a087be24f4ba0af90f91486bf780b /include/gk20a/dbg_gpu_gk20a.c | |
parent | 306a03d18b305e4e573be3b2931978fa10679eb9 (diff) |
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time
being. Only a couple structs are required, so it should be fairly
easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/gk20a/dbg_gpu_gk20a.c')
-rw-r--r-- | include/gk20a/dbg_gpu_gk20a.c | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/include/gk20a/dbg_gpu_gk20a.c b/include/gk20a/dbg_gpu_gk20a.c new file mode 100644 index 0000000..1686d01 --- /dev/null +++ b/include/gk20a/dbg_gpu_gk20a.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger/Profiler Driver | ||
3 | * | ||
4 | * Copyright (c) 2013-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/log.h> | ||
27 | #include <nvgpu/vm.h> | ||
28 | #include <nvgpu/atomic.h> | ||
29 | #include <nvgpu/mm.h> | ||
30 | #include <nvgpu/bug.h> | ||
31 | #include <nvgpu/io.h> | ||
32 | #include <nvgpu/utils.h> | ||
33 | #include <nvgpu/channel.h> | ||
34 | #include <nvgpu/unit.h> | ||
35 | #include <nvgpu/power_features/power_features.h> | ||
36 | |||
37 | #include "gk20a.h" | ||
38 | #include "gr_gk20a.h" | ||
39 | #include "dbg_gpu_gk20a.h" | ||
40 | #include "regops_gk20a.h" | ||
41 | |||
42 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
43 | #include <nvgpu/hw/gk20a/hw_perf_gk20a.h> | ||
44 | |||
45 | static void gk20a_perfbuf_reset_streaming(struct gk20a *g) | ||
46 | { | ||
47 | u32 engine_status; | ||
48 | u32 num_unread_bytes; | ||
49 | |||
50 | g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON)); | ||
51 | |||
52 | engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); | ||
53 | WARN_ON(0u == | ||
54 | (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f())); | ||
55 | |||
56 | gk20a_writel(g, perf_pmasys_control_r(), | ||
57 | perf_pmasys_control_membuf_clear_status_doit_f()); | ||
58 | |||
59 | num_unread_bytes = gk20a_readl(g, perf_pmasys_mem_bytes_r()); | ||
60 | if (num_unread_bytes != 0u) { | ||
61 | gk20a_writel(g, perf_pmasys_mem_bump_r(), num_unread_bytes); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * API to get first channel from the list of all channels | ||
67 | * bound to the debug session | ||
68 | */ | ||
69 | struct channel_gk20a * | ||
70 | nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s) | ||
71 | { | ||
72 | struct dbg_session_channel_data *ch_data; | ||
73 | struct channel_gk20a *ch; | ||
74 | struct gk20a *g = dbg_s->g; | ||
75 | |||
76 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
77 | if (nvgpu_list_empty(&dbg_s->ch_list)) { | ||
78 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | ch_data = nvgpu_list_first_entry(&dbg_s->ch_list, | ||
83 | dbg_session_channel_data, | ||
84 | ch_entry); | ||
85 | ch = g->fifo.channel + ch_data->chid; | ||
86 | |||
87 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
88 | |||
89 | return ch; | ||
90 | } | ||
91 | |||
92 | void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch) | ||
93 | { | ||
94 | struct dbg_session_data *session_data; | ||
95 | struct dbg_session_gk20a *dbg_s; | ||
96 | struct gk20a *g = ch->g; | ||
97 | |||
98 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
99 | |||
100 | /* guard against the session list being modified */ | ||
101 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
102 | |||
103 | nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, | ||
104 | dbg_session_data, dbg_s_entry) { | ||
105 | dbg_s = session_data->dbg_s; | ||
106 | if (dbg_s->dbg_events.events_enabled) { | ||
107 | nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d", | ||
108 | dbg_s->id); | ||
109 | nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", | ||
110 | dbg_s->dbg_events.num_pending_events); | ||
111 | |||
112 | dbg_s->dbg_events.num_pending_events++; | ||
113 | |||
114 | nvgpu_dbg_session_post_event(dbg_s); | ||
115 | } | ||
116 | } | ||
117 | |||
118 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
119 | } | ||
120 | |||
121 | bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch) | ||
122 | { | ||
123 | struct dbg_session_data *session_data; | ||
124 | struct dbg_session_gk20a *dbg_s; | ||
125 | bool broadcast = false; | ||
126 | struct gk20a *g = ch->g; | ||
127 | |||
128 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); | ||
129 | |||
130 | /* guard against the session list being modified */ | ||
131 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
132 | |||
133 | nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, | ||
134 | dbg_session_data, dbg_s_entry) { | ||
135 | dbg_s = session_data->dbg_s; | ||
136 | if (dbg_s->broadcast_stop_trigger) { | ||
137 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr, | ||
138 | "stop trigger broadcast enabled"); | ||
139 | broadcast = true; | ||
140 | break; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
145 | |||
146 | return broadcast; | ||
147 | } | ||
148 | |||
149 | int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch) | ||
150 | { | ||
151 | struct dbg_session_data *session_data; | ||
152 | struct dbg_session_gk20a *dbg_s; | ||
153 | struct gk20a *g = ch->g; | ||
154 | |||
155 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); | ||
156 | |||
157 | /* guard against the session list being modified */ | ||
158 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
159 | |||
160 | nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, | ||
161 | dbg_session_data, dbg_s_entry) { | ||
162 | dbg_s = session_data->dbg_s; | ||
163 | if (dbg_s->broadcast_stop_trigger) { | ||
164 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr, | ||
165 | "stop trigger broadcast disabled"); | ||
166 | dbg_s->broadcast_stop_trigger = false; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s, | ||
176 | bool mode) | ||
177 | { | ||
178 | u32 err = 0U; | ||
179 | struct gk20a *g = dbg_s->g; | ||
180 | |||
181 | if (dbg_s->is_pg_disabled != mode) { | ||
182 | if (mode == false) { | ||
183 | g->dbg_powergating_disabled_refcount--; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Allow powergate disable or enable only if | ||
188 | * the global pg disabled refcount is zero | ||
189 | */ | ||
190 | if (g->dbg_powergating_disabled_refcount == 0) { | ||
191 | err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, | ||
192 | mode); | ||
193 | } | ||
194 | |||
195 | if (mode) { | ||
196 | g->dbg_powergating_disabled_refcount++; | ||
197 | } | ||
198 | |||
199 | dbg_s->is_pg_disabled = mode; | ||
200 | } | ||
201 | |||
202 | return err; | ||
203 | } | ||
204 | |||
205 | int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate) | ||
206 | { | ||
207 | int err = 0; | ||
208 | struct gk20a *g = dbg_s->g; | ||
209 | |||
210 | /* This function must be called with g->dbg_sessions_lock held */ | ||
211 | |||
212 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s", | ||
213 | g->name, disable_powergate ? "disable" : "enable"); | ||
214 | |||
215 | /* | ||
216 | * Powergate mode here refers to railgate+powergate+clockgate | ||
217 | * so in case slcg/blcg/elcg are disabled and railgating is enabled, | ||
218 | * disable railgating and then set is_pg_disabled = true | ||
219 | * Similarly re-enable railgating and not other features if they are not | ||
220 | * enabled when powermode=MODE_ENABLE | ||
221 | */ | ||
222 | if (disable_powergate) { | ||
223 | /* save off current powergate, clk state. | ||
224 | * set gpu module's can_powergate = 0. | ||
225 | * set gpu module's clk to max. | ||
226 | * while *a* debug session is active there will be no power or | ||
227 | * clocking state changes allowed from mainline code (but they | ||
228 | * should be saved). | ||
229 | */ | ||
230 | |||
231 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, | ||
232 | "module busy"); | ||
233 | err = gk20a_busy(g); | ||
234 | if (err) { | ||
235 | return err; | ||
236 | } | ||
237 | |||
238 | err = nvgpu_cg_pg_disable(g); | ||
239 | |||
240 | if (err == 0) { | ||
241 | dbg_s->is_pg_disabled = true; | ||
242 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, | ||
243 | "pg disabled"); | ||
244 | } | ||
245 | } else { | ||
246 | /* restore (can) powergate, clk state */ | ||
247 | /* release pending exceptions to fault/be handled as usual */ | ||
248 | /*TBD: ordering of these? */ | ||
249 | |||
250 | err = nvgpu_cg_pg_enable(g); | ||
251 | |||
252 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle"); | ||
253 | gk20a_idle(g); | ||
254 | |||
255 | if (err == 0) { | ||
256 | dbg_s->is_pg_disabled = false; | ||
257 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, | ||
258 | "pg enabled"); | ||
259 | } | ||
260 | } | ||
261 | |||
262 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done", | ||
263 | g->name, disable_powergate ? "disable" : "enable"); | ||
264 | return err; | ||
265 | } | ||
266 | |||
267 | bool nvgpu_check_and_set_global_reservation( | ||
268 | struct dbg_session_gk20a *dbg_s, | ||
269 | struct dbg_profiler_object_data *prof_obj) | ||
270 | { | ||
271 | struct gk20a *g = dbg_s->g; | ||
272 | |||
273 | if (g->profiler_reservation_count == 0) { | ||
274 | g->global_profiler_reservation_held = true; | ||
275 | g->profiler_reservation_count = 1; | ||
276 | dbg_s->has_profiler_reservation = true; | ||
277 | prof_obj->has_reservation = true; | ||
278 | return true; | ||
279 | } | ||
280 | return false; | ||
281 | } | ||
282 | |||
283 | bool nvgpu_check_and_set_context_reservation( | ||
284 | struct dbg_session_gk20a *dbg_s, | ||
285 | struct dbg_profiler_object_data *prof_obj) | ||
286 | { | ||
287 | struct gk20a *g = dbg_s->g; | ||
288 | |||
289 | /* Assumes that we've already checked that no global reservation | ||
290 | * is in effect. | ||
291 | */ | ||
292 | g->profiler_reservation_count++; | ||
293 | dbg_s->has_profiler_reservation = true; | ||
294 | prof_obj->has_reservation = true; | ||
295 | return true; | ||
296 | } | ||
297 | |||
298 | void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s, | ||
299 | struct dbg_profiler_object_data *prof_obj) | ||
300 | { | ||
301 | struct gk20a *g = dbg_s->g; | ||
302 | |||
303 | g->profiler_reservation_count--; | ||
304 | if (g->profiler_reservation_count < 0) { | ||
305 | nvgpu_err(g, "Negative reservation count!"); | ||
306 | } | ||
307 | dbg_s->has_profiler_reservation = false; | ||
308 | prof_obj->has_reservation = false; | ||
309 | if (prof_obj->ch == NULL) { | ||
310 | g->global_profiler_reservation_held = false; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | ||
315 | { | ||
316 | struct mm_gk20a *mm = &g->mm; | ||
317 | u32 virt_addr_lo; | ||
318 | u32 virt_addr_hi; | ||
319 | u32 inst_pa_page; | ||
320 | int err; | ||
321 | |||
322 | err = gk20a_busy(g); | ||
323 | if (err) { | ||
324 | nvgpu_err(g, "failed to poweron"); | ||
325 | return err; | ||
326 | } | ||
327 | |||
328 | err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block); | ||
329 | if (err) { | ||
330 | return err; | ||
331 | } | ||
332 | |||
333 | g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); | ||
334 | |||
335 | gk20a_perfbuf_reset_streaming(g); | ||
336 | |||
337 | virt_addr_lo = u64_lo32(offset); | ||
338 | virt_addr_hi = u64_hi32(offset); | ||
339 | |||
340 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
341 | * as zeros */ | ||
342 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
343 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
344 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
345 | gk20a_writel(g, perf_pmasys_outsize_r(), size); | ||
346 | |||
347 | /* this field is aligned to 4K */ | ||
348 | inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; | ||
349 | |||
350 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
351 | * should be written last */ | ||
352 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
353 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
354 | nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, | ||
355 | perf_pmasys_mem_block_target_sys_ncoh_f(), | ||
356 | perf_pmasys_mem_block_target_sys_coh_f(), | ||
357 | perf_pmasys_mem_block_target_lfb_f()) | | ||
358 | perf_pmasys_mem_block_valid_true_f()); | ||
359 | |||
360 | gk20a_idle(g); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | /* must be called with dbg_sessions_lock held */ | ||
365 | int gk20a_perfbuf_disable_locked(struct gk20a *g) | ||
366 | { | ||
367 | int err = gk20a_busy(g); | ||
368 | if (err) { | ||
369 | nvgpu_err(g, "failed to poweron"); | ||
370 | return err; | ||
371 | } | ||
372 | |||
373 | gk20a_perfbuf_reset_streaming(g); | ||
374 | |||
375 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
376 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
377 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
378 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
379 | |||
380 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
381 | perf_pmasys_mem_block_base_f(0) | | ||
382 | perf_pmasys_mem_block_valid_false_f() | | ||
383 | perf_pmasys_mem_block_target_f(0)); | ||
384 | |||
385 | gk20a_idle(g); | ||
386 | |||
387 | return 0; | ||
388 | } | ||