aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/regops_gk20a.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-25 16:09:09 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-25 16:09:09 -0400
commitf347fde22f1297e4f022600d201780d5ead78114 (patch)
tree76be305d6187003a1e0486ff6e91efb1062ae118 /include/gk20a/regops_gk20a.c
parent8340d234d78a7d0f46c11a584de538148b78b7cb (diff)
Delete no-longer-needed nvgpu headersHEADmasterjbakita-wip
The dependency on these was removed in commit 8340d234.
Diffstat (limited to 'include/gk20a/regops_gk20a.c')
-rw-r--r--include/gk20a/regops_gk20a.c472
1 files changed, 0 insertions, 472 deletions
diff --git a/include/gk20a/regops_gk20a.c b/include/gk20a/regops_gk20a.c
deleted file mode 100644
index 0aec4f8..0000000
--- a/include/gk20a/regops_gk20a.c
+++ /dev/null
@@ -1,472 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a.h"
26#include "gr_gk20a.h"
27#include "dbg_gpu_gk20a.h"
28#include "regops_gk20a.h"
29
30#include <nvgpu/log.h>
31#include <nvgpu/bsearch.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/io.h>
34
35static int regop_bsearch_range_cmp(const void *pkey, const void *pelem)
36{
37 u32 key = *(u32 *)pkey;
38 struct regop_offset_range *prange = (struct regop_offset_range *)pelem;
39 if (key < prange->base) {
40 return -1;
41 } else if (prange->base <= key && key < (prange->base +
42 (prange->count * 4U))) {
43 return 0;
44 }
45 return 1;
46}
47
48static inline bool linear_search(u32 offset, const u32 *list, int size)
49{
50 int i;
51 for (i = 0; i < size; i++) {
52 if (list[i] == offset) {
53 return true;
54 }
55 }
56 return false;
57}
58
59/*
60 * In order to perform a context relative op the context has
61 * to be created already... which would imply that the
62 * context switch mechanism has already been put in place.
63 * So by the time we perform such an opertation it should always
64 * be possible to query for the appropriate context offsets, etc.
65 *
66 * But note: while the dbg_gpu bind requires the a channel fd,
67 * it doesn't require an allocated gr/compute obj at that point...
68 */
69static bool gr_context_info_available(struct gr_gk20a *gr)
70{
71 int err;
72
73 nvgpu_mutex_acquire(&gr->ctx_mutex);
74 err = !gr->ctx_vars.golden_image_initialized;
75 nvgpu_mutex_release(&gr->ctx_mutex);
76 if (err) {
77 return false;
78 }
79
80 return true;
81
82}
83
84static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
85 u32 *ctx_rd_count, u32 *ctx_wr_count,
86 struct nvgpu_dbg_reg_op *ops,
87 u32 op_count);
88
89
90int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
91 struct nvgpu_dbg_reg_op *ops,
92 u64 num_ops,
93 bool *is_current_ctx)
94{
95 int err = 0;
96 unsigned int i;
97 struct channel_gk20a *ch = NULL;
98 struct gk20a *g = dbg_s->g;
99 /*struct gr_gk20a *gr = &g->gr;*/
100 u32 data32_lo = 0, data32_hi = 0;
101 u32 ctx_rd_count = 0, ctx_wr_count = 0;
102 bool skip_read_lo, skip_read_hi;
103 bool ok;
104
105 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
106
107 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
108
109 /* For vgpu, the regops routines need to be handled in the
110 * context of the server and support for that does not exist.
111 *
112 * The two users of the regops interface are the compute driver
113 * and tools. The compute driver will work without a functional
114 * regops implementation, so we return -ENOSYS. This will allow
115 * compute apps to run with vgpu. Tools will not work in this
116 * configuration and are not required to work at this time. */
117 if (g->is_virtual) {
118 return -ENOSYS;
119 }
120
121 ok = validate_reg_ops(dbg_s,
122 &ctx_rd_count, &ctx_wr_count,
123 ops, num_ops);
124 if (!ok) {
125 nvgpu_err(g, "invalid op(s)");
126 err = -EINVAL;
127 /* each op has its own err/status */
128 goto clean_up;
129 }
130
131 /* be sure that ctx info is in place if there are ctx ops */
132 if (ctx_wr_count | ctx_rd_count) {
133 if (!gr_context_info_available(&g->gr)) {
134 nvgpu_err(g, "gr context data not available");
135 return -ENODEV;
136 }
137 }
138
139 for (i = 0; i < num_ops; i++) {
140 /* if it isn't global then it is done in the ctx ops... */
141 if (ops[i].type != REGOP(TYPE_GLOBAL)) {
142 continue;
143 }
144
145 switch (ops[i].op) {
146
147 case REGOP(READ_32):
148 ops[i].value_hi = 0;
149 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
150 nvgpu_log(g, gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x",
151 ops[i].value_lo, ops[i].offset);
152
153 break;
154
155 case REGOP(READ_64):
156 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
157 ops[i].value_hi =
158 gk20a_readl(g, ops[i].offset + 4);
159
160 nvgpu_log(g, gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x",
161 ops[i].value_hi, ops[i].value_lo,
162 ops[i].offset);
163 break;
164
165 case REGOP(WRITE_32):
166 case REGOP(WRITE_64):
167 /* some of this appears wonky/unnecessary but
168 we've kept it for compat with existing
169 debugger code. just in case... */
170 skip_read_lo = skip_read_hi = false;
171 if (ops[i].and_n_mask_lo == ~(u32)0) {
172 data32_lo = ops[i].value_lo;
173 skip_read_lo = true;
174 }
175
176 if ((ops[i].op == REGOP(WRITE_64)) &&
177 (ops[i].and_n_mask_hi == ~(u32)0)) {
178 data32_hi = ops[i].value_hi;
179 skip_read_hi = true;
180 }
181
182 /* read first 32bits */
183 if (skip_read_lo == false) {
184 data32_lo = gk20a_readl(g, ops[i].offset);
185 data32_lo &= ~ops[i].and_n_mask_lo;
186 data32_lo |= ops[i].value_lo;
187 }
188
189 /* if desired, read second 32bits */
190 if ((ops[i].op == REGOP(WRITE_64)) &&
191 !skip_read_hi) {
192 data32_hi = gk20a_readl(g, ops[i].offset + 4);
193 data32_hi &= ~ops[i].and_n_mask_hi;
194 data32_hi |= ops[i].value_hi;
195 }
196
197 /* now update first 32bits */
198 gk20a_writel(g, ops[i].offset, data32_lo);
199 nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
200 data32_lo, ops[i].offset);
201 /* if desired, update second 32bits */
202 if (ops[i].op == REGOP(WRITE_64)) {
203 gk20a_writel(g, ops[i].offset + 4, data32_hi);
204 nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
205 data32_hi, ops[i].offset + 4);
206
207 }
208
209
210 break;
211
212 /* shouldn't happen as we've already screened */
213 default:
214 BUG();
215 err = -EINVAL;
216 goto clean_up;
217 break;
218 }
219 }
220
221 if (ctx_wr_count | ctx_rd_count) {
222 err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops,
223 ctx_wr_count, ctx_rd_count,
224 is_current_ctx);
225 if (err) {
226 nvgpu_warn(g, "failed to perform ctx ops\n");
227 goto clean_up;
228 }
229 }
230
231 clean_up:
232 nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
233 return err;
234
235}
236
237
238static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s,
239 struct nvgpu_dbg_reg_op *op)
240{
241 int err = 0;
242
243 op->status = REGOP(STATUS_SUCCESS);
244
245 switch (op->op) {
246 case REGOP(READ_32):
247 case REGOP(READ_64):
248 case REGOP(WRITE_32):
249 case REGOP(WRITE_64):
250 break;
251 default:
252 op->status |= REGOP(STATUS_UNSUPPORTED_OP);
253 err = -EINVAL;
254 break;
255 }
256
257 switch (op->type) {
258 case REGOP(TYPE_GLOBAL):
259 case REGOP(TYPE_GR_CTX):
260 case REGOP(TYPE_GR_CTX_TPC):
261 case REGOP(TYPE_GR_CTX_SM):
262 case REGOP(TYPE_GR_CTX_CROP):
263 case REGOP(TYPE_GR_CTX_ZROP):
264 case REGOP(TYPE_GR_CTX_QUAD):
265 break;
266 /*
267 case NVGPU_DBG_GPU_REG_OP_TYPE_FB:
268 */
269 default:
270 op->status |= REGOP(STATUS_INVALID_TYPE);
271 err = -EINVAL;
272 break;
273 }
274
275 return err;
276}
277
278static bool check_whitelists(struct dbg_session_gk20a *dbg_s,
279 struct nvgpu_dbg_reg_op *op, u32 offset)
280{
281 struct gk20a *g = dbg_s->g;
282 bool valid = false;
283 struct channel_gk20a *ch;
284
285 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
286
287 if (op->type == REGOP(TYPE_GLOBAL)) {
288 /* search global list */
289 valid = g->ops.regops.get_global_whitelist_ranges &&
290 !!bsearch(&offset,
291 g->ops.regops.get_global_whitelist_ranges(),
292 g->ops.regops.get_global_whitelist_ranges_count(),
293 sizeof(*g->ops.regops.get_global_whitelist_ranges()),
294 regop_bsearch_range_cmp);
295
296 /* if debug session and channel is bound search context list */
297 if ((!valid) && (!dbg_s->is_profiler && ch)) {
298 /* binary search context list */
299 valid = g->ops.regops.get_context_whitelist_ranges &&
300 !!bsearch(&offset,
301 g->ops.regops.get_context_whitelist_ranges(),
302 g->ops.regops.get_context_whitelist_ranges_count(),
303 sizeof(*g->ops.regops.get_context_whitelist_ranges()),
304 regop_bsearch_range_cmp);
305 }
306
307 /* if debug session and channel is bound search runcontrol list */
308 if ((!valid) && (!dbg_s->is_profiler && ch)) {
309 valid = g->ops.regops.get_runcontrol_whitelist &&
310 linear_search(offset,
311 g->ops.regops.get_runcontrol_whitelist(),
312 g->ops.regops.get_runcontrol_whitelist_count());
313 }
314 } else if (op->type == REGOP(TYPE_GR_CTX)) {
315 /* it's a context-relative op */
316 if (!ch) {
317 nvgpu_err(dbg_s->g, "can't perform ctx regop unless bound");
318 op->status = REGOP(STATUS_UNSUPPORTED_OP);
319 return valid;
320 }
321
322 /* binary search context list */
323 valid = g->ops.regops.get_context_whitelist_ranges &&
324 !!bsearch(&offset,
325 g->ops.regops.get_context_whitelist_ranges(),
326 g->ops.regops.get_context_whitelist_ranges_count(),
327 sizeof(*g->ops.regops.get_context_whitelist_ranges()),
328 regop_bsearch_range_cmp);
329
330 /* if debug session and channel is bound search runcontrol list */
331 if ((!valid) && (!dbg_s->is_profiler && ch)) {
332 valid = g->ops.regops.get_runcontrol_whitelist &&
333 linear_search(offset,
334 g->ops.regops.get_runcontrol_whitelist(),
335 g->ops.regops.get_runcontrol_whitelist_count());
336 }
337
338 } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) {
339 valid = g->ops.regops.get_qctl_whitelist &&
340 linear_search(offset,
341 g->ops.regops.get_qctl_whitelist(),
342 g->ops.regops.get_qctl_whitelist_count());
343 }
344
345 return valid;
346}
347
348/* note: the op here has already been through validate_reg_op_info */
349static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
350 struct nvgpu_dbg_reg_op *op)
351{
352 int err;
353 u32 buf_offset_lo, buf_offset_addr, num_offsets, offset;
354 bool valid = false;
355
356 op->status = 0;
357 offset = op->offset;
358
359 /* support only 24-bit 4-byte aligned offsets */
360 if (offset & 0xFF000003) {
361 nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset);
362 op->status |= REGOP(STATUS_INVALID_OFFSET);
363 return -EINVAL;
364 }
365
366 valid = check_whitelists(dbg_s, op, offset);
367 if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
368 valid = check_whitelists(dbg_s, op, offset + 4);
369 }
370
371 if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
372 err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
373 op->offset,
374 1,
375 &buf_offset_lo,
376 &buf_offset_addr,
377 &num_offsets,
378 op->type == REGOP(TYPE_GR_CTX_QUAD),
379 op->quad);
380 if (err) {
381 err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g,
382 op->offset,
383 1,
384 &buf_offset_lo,
385 &buf_offset_addr,
386 &num_offsets);
387
388 if (err) {
389 op->status |= REGOP(STATUS_INVALID_OFFSET);
390 return -EINVAL;
391 }
392 }
393 if (!num_offsets) {
394 op->status |= REGOP(STATUS_INVALID_OFFSET);
395 return -EINVAL;
396 }
397 }
398
399 if (!valid) {
400 nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset);
401 op->status |= REGOP(STATUS_INVALID_OFFSET);
402 return -EINVAL;
403 }
404
405 return 0;
406}
407
408static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
409 u32 *ctx_rd_count, u32 *ctx_wr_count,
410 struct nvgpu_dbg_reg_op *ops,
411 u32 op_count)
412{
413 u32 i;
414 bool ok = true;
415 struct gk20a *g = dbg_s->g;
416
417 /* keep going until the end so every op can get
418 * a separate error code if needed */
419 for (i = 0; i < op_count; i++) {
420
421 if (validate_reg_op_info(dbg_s, &ops[i]) != 0) {
422 ok = false;
423 }
424
425 if (reg_op_is_gr_ctx(ops[i].type)) {
426 if (reg_op_is_read(ops[i].op)) {
427 (*ctx_rd_count)++;
428 } else {
429 (*ctx_wr_count)++;
430 }
431 }
432
433 /* if "allow_all" flag enabled, dont validate offset */
434 if (!g->allow_all) {
435 if (validate_reg_op_offset(dbg_s, &ops[i]) != 0) {
436 ok = false;
437 }
438 }
439 }
440
441 nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
442 *ctx_wr_count, *ctx_rd_count);
443
444 return ok;
445}
446
447/* exported for tools like cyclestats, etc */
448bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset)
449{
450 bool valid = !!bsearch(&offset,
451 g->ops.regops.get_global_whitelist_ranges(),
452 g->ops.regops.get_global_whitelist_ranges_count(),
453 sizeof(*g->ops.regops.get_global_whitelist_ranges()),
454 regop_bsearch_range_cmp);
455 return valid;
456}
457
458bool reg_op_is_gr_ctx(u8 type)
459{
460 return type == REGOP(TYPE_GR_CTX) ||
461 type == REGOP(TYPE_GR_CTX_TPC) ||
462 type == REGOP(TYPE_GR_CTX_SM) ||
463 type == REGOP(TYPE_GR_CTX_CROP) ||
464 type == REGOP(TYPE_GR_CTX_ZROP) ||
465 type == REGOP(TYPE_GR_CTX_QUAD);
466}
467
468bool reg_op_is_read(u8 op)
469{
470 return op == REGOP(READ_32) ||
471 op == REGOP(READ_64);
472}