diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-28 18:24:25 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-28 18:24:25 -0400 |
commit | 01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch) | |
tree | 4ef34501728a087be24f4ba0af90f91486bf780b /include/gk20a/regops_gk20a.c | |
parent | 306a03d18b305e4e573be3b2931978fa10679eb9 (diff) |
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time
being. Only a couple structs are required, so it should be fairly
easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/gk20a/regops_gk20a.c')
-rw-r--r-- | include/gk20a/regops_gk20a.c | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/include/gk20a/regops_gk20a.c b/include/gk20a/regops_gk20a.c new file mode 100644 index 0000000..0aec4f8 --- /dev/null +++ b/include/gk20a/regops_gk20a.c | |||
@@ -0,0 +1,472 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger Driver Register Ops | ||
3 | * | ||
4 | * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a.h" | ||
26 | #include "gr_gk20a.h" | ||
27 | #include "dbg_gpu_gk20a.h" | ||
28 | #include "regops_gk20a.h" | ||
29 | |||
30 | #include <nvgpu/log.h> | ||
31 | #include <nvgpu/bsearch.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/io.h> | ||
34 | |||
35 | static int regop_bsearch_range_cmp(const void *pkey, const void *pelem) | ||
36 | { | ||
37 | u32 key = *(u32 *)pkey; | ||
38 | struct regop_offset_range *prange = (struct regop_offset_range *)pelem; | ||
39 | if (key < prange->base) { | ||
40 | return -1; | ||
41 | } else if (prange->base <= key && key < (prange->base + | ||
42 | (prange->count * 4U))) { | ||
43 | return 0; | ||
44 | } | ||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | static inline bool linear_search(u32 offset, const u32 *list, int size) | ||
49 | { | ||
50 | int i; | ||
51 | for (i = 0; i < size; i++) { | ||
52 | if (list[i] == offset) { | ||
53 | return true; | ||
54 | } | ||
55 | } | ||
56 | return false; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * In order to perform a context relative op the context has | ||
61 | * to be created already... which would imply that the | ||
62 | * context switch mechanism has already been put in place. | ||
63 | * So by the time we perform such an opertation it should always | ||
64 | * be possible to query for the appropriate context offsets, etc. | ||
65 | * | ||
66 | * But note: while the dbg_gpu bind requires the a channel fd, | ||
67 | * it doesn't require an allocated gr/compute obj at that point... | ||
68 | */ | ||
69 | static bool gr_context_info_available(struct gr_gk20a *gr) | ||
70 | { | ||
71 | int err; | ||
72 | |||
73 | nvgpu_mutex_acquire(&gr->ctx_mutex); | ||
74 | err = !gr->ctx_vars.golden_image_initialized; | ||
75 | nvgpu_mutex_release(&gr->ctx_mutex); | ||
76 | if (err) { | ||
77 | return false; | ||
78 | } | ||
79 | |||
80 | return true; | ||
81 | |||
82 | } | ||
83 | |||
84 | static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
85 | u32 *ctx_rd_count, u32 *ctx_wr_count, | ||
86 | struct nvgpu_dbg_reg_op *ops, | ||
87 | u32 op_count); | ||
88 | |||
89 | |||
90 | int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s, | ||
91 | struct nvgpu_dbg_reg_op *ops, | ||
92 | u64 num_ops, | ||
93 | bool *is_current_ctx) | ||
94 | { | ||
95 | int err = 0; | ||
96 | unsigned int i; | ||
97 | struct channel_gk20a *ch = NULL; | ||
98 | struct gk20a *g = dbg_s->g; | ||
99 | /*struct gr_gk20a *gr = &g->gr;*/ | ||
100 | u32 data32_lo = 0, data32_hi = 0; | ||
101 | u32 ctx_rd_count = 0, ctx_wr_count = 0; | ||
102 | bool skip_read_lo, skip_read_hi; | ||
103 | bool ok; | ||
104 | |||
105 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
106 | |||
107 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
108 | |||
109 | /* For vgpu, the regops routines need to be handled in the | ||
110 | * context of the server and support for that does not exist. | ||
111 | * | ||
112 | * The two users of the regops interface are the compute driver | ||
113 | * and tools. The compute driver will work without a functional | ||
114 | * regops implementation, so we return -ENOSYS. This will allow | ||
115 | * compute apps to run with vgpu. Tools will not work in this | ||
116 | * configuration and are not required to work at this time. */ | ||
117 | if (g->is_virtual) { | ||
118 | return -ENOSYS; | ||
119 | } | ||
120 | |||
121 | ok = validate_reg_ops(dbg_s, | ||
122 | &ctx_rd_count, &ctx_wr_count, | ||
123 | ops, num_ops); | ||
124 | if (!ok) { | ||
125 | nvgpu_err(g, "invalid op(s)"); | ||
126 | err = -EINVAL; | ||
127 | /* each op has its own err/status */ | ||
128 | goto clean_up; | ||
129 | } | ||
130 | |||
131 | /* be sure that ctx info is in place if there are ctx ops */ | ||
132 | if (ctx_wr_count | ctx_rd_count) { | ||
133 | if (!gr_context_info_available(&g->gr)) { | ||
134 | nvgpu_err(g, "gr context data not available"); | ||
135 | return -ENODEV; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | for (i = 0; i < num_ops; i++) { | ||
140 | /* if it isn't global then it is done in the ctx ops... */ | ||
141 | if (ops[i].type != REGOP(TYPE_GLOBAL)) { | ||
142 | continue; | ||
143 | } | ||
144 | |||
145 | switch (ops[i].op) { | ||
146 | |||
147 | case REGOP(READ_32): | ||
148 | ops[i].value_hi = 0; | ||
149 | ops[i].value_lo = gk20a_readl(g, ops[i].offset); | ||
150 | nvgpu_log(g, gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x", | ||
151 | ops[i].value_lo, ops[i].offset); | ||
152 | |||
153 | break; | ||
154 | |||
155 | case REGOP(READ_64): | ||
156 | ops[i].value_lo = gk20a_readl(g, ops[i].offset); | ||
157 | ops[i].value_hi = | ||
158 | gk20a_readl(g, ops[i].offset + 4); | ||
159 | |||
160 | nvgpu_log(g, gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x", | ||
161 | ops[i].value_hi, ops[i].value_lo, | ||
162 | ops[i].offset); | ||
163 | break; | ||
164 | |||
165 | case REGOP(WRITE_32): | ||
166 | case REGOP(WRITE_64): | ||
167 | /* some of this appears wonky/unnecessary but | ||
168 | we've kept it for compat with existing | ||
169 | debugger code. just in case... */ | ||
170 | skip_read_lo = skip_read_hi = false; | ||
171 | if (ops[i].and_n_mask_lo == ~(u32)0) { | ||
172 | data32_lo = ops[i].value_lo; | ||
173 | skip_read_lo = true; | ||
174 | } | ||
175 | |||
176 | if ((ops[i].op == REGOP(WRITE_64)) && | ||
177 | (ops[i].and_n_mask_hi == ~(u32)0)) { | ||
178 | data32_hi = ops[i].value_hi; | ||
179 | skip_read_hi = true; | ||
180 | } | ||
181 | |||
182 | /* read first 32bits */ | ||
183 | if (skip_read_lo == false) { | ||
184 | data32_lo = gk20a_readl(g, ops[i].offset); | ||
185 | data32_lo &= ~ops[i].and_n_mask_lo; | ||
186 | data32_lo |= ops[i].value_lo; | ||
187 | } | ||
188 | |||
189 | /* if desired, read second 32bits */ | ||
190 | if ((ops[i].op == REGOP(WRITE_64)) && | ||
191 | !skip_read_hi) { | ||
192 | data32_hi = gk20a_readl(g, ops[i].offset + 4); | ||
193 | data32_hi &= ~ops[i].and_n_mask_hi; | ||
194 | data32_hi |= ops[i].value_hi; | ||
195 | } | ||
196 | |||
197 | /* now update first 32bits */ | ||
198 | gk20a_writel(g, ops[i].offset, data32_lo); | ||
199 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", | ||
200 | data32_lo, ops[i].offset); | ||
201 | /* if desired, update second 32bits */ | ||
202 | if (ops[i].op == REGOP(WRITE_64)) { | ||
203 | gk20a_writel(g, ops[i].offset + 4, data32_hi); | ||
204 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", | ||
205 | data32_hi, ops[i].offset + 4); | ||
206 | |||
207 | } | ||
208 | |||
209 | |||
210 | break; | ||
211 | |||
212 | /* shouldn't happen as we've already screened */ | ||
213 | default: | ||
214 | BUG(); | ||
215 | err = -EINVAL; | ||
216 | goto clean_up; | ||
217 | break; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | if (ctx_wr_count | ctx_rd_count) { | ||
222 | err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops, | ||
223 | ctx_wr_count, ctx_rd_count, | ||
224 | is_current_ctx); | ||
225 | if (err) { | ||
226 | nvgpu_warn(g, "failed to perform ctx ops\n"); | ||
227 | goto clean_up; | ||
228 | } | ||
229 | } | ||
230 | |||
231 | clean_up: | ||
232 | nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); | ||
233 | return err; | ||
234 | |||
235 | } | ||
236 | |||
237 | |||
238 | static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s, | ||
239 | struct nvgpu_dbg_reg_op *op) | ||
240 | { | ||
241 | int err = 0; | ||
242 | |||
243 | op->status = REGOP(STATUS_SUCCESS); | ||
244 | |||
245 | switch (op->op) { | ||
246 | case REGOP(READ_32): | ||
247 | case REGOP(READ_64): | ||
248 | case REGOP(WRITE_32): | ||
249 | case REGOP(WRITE_64): | ||
250 | break; | ||
251 | default: | ||
252 | op->status |= REGOP(STATUS_UNSUPPORTED_OP); | ||
253 | err = -EINVAL; | ||
254 | break; | ||
255 | } | ||
256 | |||
257 | switch (op->type) { | ||
258 | case REGOP(TYPE_GLOBAL): | ||
259 | case REGOP(TYPE_GR_CTX): | ||
260 | case REGOP(TYPE_GR_CTX_TPC): | ||
261 | case REGOP(TYPE_GR_CTX_SM): | ||
262 | case REGOP(TYPE_GR_CTX_CROP): | ||
263 | case REGOP(TYPE_GR_CTX_ZROP): | ||
264 | case REGOP(TYPE_GR_CTX_QUAD): | ||
265 | break; | ||
266 | /* | ||
267 | case NVGPU_DBG_GPU_REG_OP_TYPE_FB: | ||
268 | */ | ||
269 | default: | ||
270 | op->status |= REGOP(STATUS_INVALID_TYPE); | ||
271 | err = -EINVAL; | ||
272 | break; | ||
273 | } | ||
274 | |||
275 | return err; | ||
276 | } | ||
277 | |||
278 | static bool check_whitelists(struct dbg_session_gk20a *dbg_s, | ||
279 | struct nvgpu_dbg_reg_op *op, u32 offset) | ||
280 | { | ||
281 | struct gk20a *g = dbg_s->g; | ||
282 | bool valid = false; | ||
283 | struct channel_gk20a *ch; | ||
284 | |||
285 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
286 | |||
287 | if (op->type == REGOP(TYPE_GLOBAL)) { | ||
288 | /* search global list */ | ||
289 | valid = g->ops.regops.get_global_whitelist_ranges && | ||
290 | !!bsearch(&offset, | ||
291 | g->ops.regops.get_global_whitelist_ranges(), | ||
292 | g->ops.regops.get_global_whitelist_ranges_count(), | ||
293 | sizeof(*g->ops.regops.get_global_whitelist_ranges()), | ||
294 | regop_bsearch_range_cmp); | ||
295 | |||
296 | /* if debug session and channel is bound search context list */ | ||
297 | if ((!valid) && (!dbg_s->is_profiler && ch)) { | ||
298 | /* binary search context list */ | ||
299 | valid = g->ops.regops.get_context_whitelist_ranges && | ||
300 | !!bsearch(&offset, | ||
301 | g->ops.regops.get_context_whitelist_ranges(), | ||
302 | g->ops.regops.get_context_whitelist_ranges_count(), | ||
303 | sizeof(*g->ops.regops.get_context_whitelist_ranges()), | ||
304 | regop_bsearch_range_cmp); | ||
305 | } | ||
306 | |||
307 | /* if debug session and channel is bound search runcontrol list */ | ||
308 | if ((!valid) && (!dbg_s->is_profiler && ch)) { | ||
309 | valid = g->ops.regops.get_runcontrol_whitelist && | ||
310 | linear_search(offset, | ||
311 | g->ops.regops.get_runcontrol_whitelist(), | ||
312 | g->ops.regops.get_runcontrol_whitelist_count()); | ||
313 | } | ||
314 | } else if (op->type == REGOP(TYPE_GR_CTX)) { | ||
315 | /* it's a context-relative op */ | ||
316 | if (!ch) { | ||
317 | nvgpu_err(dbg_s->g, "can't perform ctx regop unless bound"); | ||
318 | op->status = REGOP(STATUS_UNSUPPORTED_OP); | ||
319 | return valid; | ||
320 | } | ||
321 | |||
322 | /* binary search context list */ | ||
323 | valid = g->ops.regops.get_context_whitelist_ranges && | ||
324 | !!bsearch(&offset, | ||
325 | g->ops.regops.get_context_whitelist_ranges(), | ||
326 | g->ops.regops.get_context_whitelist_ranges_count(), | ||
327 | sizeof(*g->ops.regops.get_context_whitelist_ranges()), | ||
328 | regop_bsearch_range_cmp); | ||
329 | |||
330 | /* if debug session and channel is bound search runcontrol list */ | ||
331 | if ((!valid) && (!dbg_s->is_profiler && ch)) { | ||
332 | valid = g->ops.regops.get_runcontrol_whitelist && | ||
333 | linear_search(offset, | ||
334 | g->ops.regops.get_runcontrol_whitelist(), | ||
335 | g->ops.regops.get_runcontrol_whitelist_count()); | ||
336 | } | ||
337 | |||
338 | } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) { | ||
339 | valid = g->ops.regops.get_qctl_whitelist && | ||
340 | linear_search(offset, | ||
341 | g->ops.regops.get_qctl_whitelist(), | ||
342 | g->ops.regops.get_qctl_whitelist_count()); | ||
343 | } | ||
344 | |||
345 | return valid; | ||
346 | } | ||
347 | |||
348 | /* note: the op here has already been through validate_reg_op_info */ | ||
349 | static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s, | ||
350 | struct nvgpu_dbg_reg_op *op) | ||
351 | { | ||
352 | int err; | ||
353 | u32 buf_offset_lo, buf_offset_addr, num_offsets, offset; | ||
354 | bool valid = false; | ||
355 | |||
356 | op->status = 0; | ||
357 | offset = op->offset; | ||
358 | |||
359 | /* support only 24-bit 4-byte aligned offsets */ | ||
360 | if (offset & 0xFF000003) { | ||
361 | nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset); | ||
362 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
363 | return -EINVAL; | ||
364 | } | ||
365 | |||
366 | valid = check_whitelists(dbg_s, op, offset); | ||
367 | if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) { | ||
368 | valid = check_whitelists(dbg_s, op, offset + 4); | ||
369 | } | ||
370 | |||
371 | if (valid && (op->type != REGOP(TYPE_GLOBAL))) { | ||
372 | err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, | ||
373 | op->offset, | ||
374 | 1, | ||
375 | &buf_offset_lo, | ||
376 | &buf_offset_addr, | ||
377 | &num_offsets, | ||
378 | op->type == REGOP(TYPE_GR_CTX_QUAD), | ||
379 | op->quad); | ||
380 | if (err) { | ||
381 | err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g, | ||
382 | op->offset, | ||
383 | 1, | ||
384 | &buf_offset_lo, | ||
385 | &buf_offset_addr, | ||
386 | &num_offsets); | ||
387 | |||
388 | if (err) { | ||
389 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
390 | return -EINVAL; | ||
391 | } | ||
392 | } | ||
393 | if (!num_offsets) { | ||
394 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
395 | return -EINVAL; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | if (!valid) { | ||
400 | nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset); | ||
401 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
402 | return -EINVAL; | ||
403 | } | ||
404 | |||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
409 | u32 *ctx_rd_count, u32 *ctx_wr_count, | ||
410 | struct nvgpu_dbg_reg_op *ops, | ||
411 | u32 op_count) | ||
412 | { | ||
413 | u32 i; | ||
414 | bool ok = true; | ||
415 | struct gk20a *g = dbg_s->g; | ||
416 | |||
417 | /* keep going until the end so every op can get | ||
418 | * a separate error code if needed */ | ||
419 | for (i = 0; i < op_count; i++) { | ||
420 | |||
421 | if (validate_reg_op_info(dbg_s, &ops[i]) != 0) { | ||
422 | ok = false; | ||
423 | } | ||
424 | |||
425 | if (reg_op_is_gr_ctx(ops[i].type)) { | ||
426 | if (reg_op_is_read(ops[i].op)) { | ||
427 | (*ctx_rd_count)++; | ||
428 | } else { | ||
429 | (*ctx_wr_count)++; | ||
430 | } | ||
431 | } | ||
432 | |||
433 | /* if "allow_all" flag enabled, dont validate offset */ | ||
434 | if (!g->allow_all) { | ||
435 | if (validate_reg_op_offset(dbg_s, &ops[i]) != 0) { | ||
436 | ok = false; | ||
437 | } | ||
438 | } | ||
439 | } | ||
440 | |||
441 | nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d", | ||
442 | *ctx_wr_count, *ctx_rd_count); | ||
443 | |||
444 | return ok; | ||
445 | } | ||
446 | |||
447 | /* exported for tools like cyclestats, etc */ | ||
448 | bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset) | ||
449 | { | ||
450 | bool valid = !!bsearch(&offset, | ||
451 | g->ops.regops.get_global_whitelist_ranges(), | ||
452 | g->ops.regops.get_global_whitelist_ranges_count(), | ||
453 | sizeof(*g->ops.regops.get_global_whitelist_ranges()), | ||
454 | regop_bsearch_range_cmp); | ||
455 | return valid; | ||
456 | } | ||
457 | |||
458 | bool reg_op_is_gr_ctx(u8 type) | ||
459 | { | ||
460 | return type == REGOP(TYPE_GR_CTX) || | ||
461 | type == REGOP(TYPE_GR_CTX_TPC) || | ||
462 | type == REGOP(TYPE_GR_CTX_SM) || | ||
463 | type == REGOP(TYPE_GR_CTX_CROP) || | ||
464 | type == REGOP(TYPE_GR_CTX_ZROP) || | ||
465 | type == REGOP(TYPE_GR_CTX_QUAD); | ||
466 | } | ||
467 | |||
468 | bool reg_op_is_read(u8 op) | ||
469 | { | ||
470 | return op == REGOP(READ_32) || | ||
471 | op == REGOP(READ_64); | ||
472 | } | ||