aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'include/gk20a')
-rw-r--r--include/gk20a/ce2_gk20a.c576
-rw-r--r--include/gk20a/ce2_gk20a.h156
-rw-r--r--include/gk20a/clk_gk20a.h134
-rw-r--r--include/gk20a/css_gr_gk20a.c636
-rw-r--r--include/gk20a/css_gr_gk20a.h151
-rw-r--r--include/gk20a/dbg_gpu_gk20a.c388
-rw-r--r--include/gk20a/dbg_gpu_gk20a.h147
-rw-r--r--include/gk20a/fecs_trace_gk20a.c744
-rw-r--r--include/gk20a/fecs_trace_gk20a.h45
-rw-r--r--include/gk20a/fence_gk20a.c319
-rw-r--r--include/gk20a/fence_gk20a.h100
-rw-r--r--include/gk20a/fifo_gk20a.c4641
-rw-r--r--include/gk20a/fifo_gk20a.h472
-rw-r--r--include/gk20a/flcn_gk20a.c759
-rw-r--r--include/gk20a/flcn_gk20a.h29
-rw-r--r--include/gk20a/gk20a.c595
-rw-r--r--include/gk20a/gk20a.h33
-rw-r--r--include/gk20a/gr_ctx_gk20a.c486
-rw-r--r--include/gk20a/gr_ctx_gk20a.h206
-rw-r--r--include/gk20a/gr_ctx_gk20a_sim.c356
-rw-r--r--include/gk20a/gr_gk20a.c9090
-rw-r--r--include/gk20a/gr_gk20a.h852
-rw-r--r--include/gk20a/gr_pri_gk20a.h261
-rw-r--r--include/gk20a/mm_gk20a.c654
-rw-r--r--include/gk20a/mm_gk20a.h155
-rw-r--r--include/gk20a/pmu_gk20a.c879
-rw-r--r--include/gk20a/pmu_gk20a.h80
-rw-r--r--include/gk20a/regops_gk20a.c472
-rw-r--r--include/gk20a/regops_gk20a.h90
29 files changed, 0 insertions, 23506 deletions
diff --git a/include/gk20a/ce2_gk20a.c b/include/gk20a/ce2_gk20a.c
deleted file mode 100644
index 2a40b08..0000000
--- a/include/gk20a/ce2_gk20a.c
+++ /dev/null
@@ -1,576 +0,0 @@
1/*
2 * GK20A Graphics Copy Engine (gr host)
3 *
4 * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/dma.h>
27#include <nvgpu/os_sched.h>
28#include <nvgpu/log.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/io.h>
31#include <nvgpu/utils.h>
32#include <nvgpu/channel.h>
33#include <nvgpu/power_features/cg.h>
34
35#include "gk20a.h"
36#include "gk20a/fence_gk20a.h"
37
38#include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
39#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
40#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
41#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
42#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
44#include <nvgpu/barrier.h>
45
46/*
47 * Copy engine defines line size in pixels
48 */
49#define MAX_CE_SHIFT 31 /* 4Gpixels -1 */
50#define MAX_CE_MASK ((u32) (~(~0U << MAX_CE_SHIFT)))
51#define MAX_CE_ALIGN(a) (a & MAX_CE_MASK)
52
53
54static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
55{
56 nvgpu_log(g, gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
57
58 return ce2_intr_status_nonblockpipe_pending_f();
59}
60
61static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
62{
63 nvgpu_log(g, gpu_dbg_intr, "ce2 blocking pipe interrupt\n");
64
65 return ce2_intr_status_blockpipe_pending_f();
66}
67
68static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr)
69{
70 nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt\n");
71
72 return ce2_intr_status_launcherr_pending_f();
73}
74
75void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
76{
77 u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
78 u32 clear_intr = 0;
79
80 nvgpu_log(g, gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr);
81
82 /* clear blocking interrupts: they exibit broken behavior */
83 if (ce2_intr & ce2_intr_status_blockpipe_pending_f()) {
84 clear_intr |= ce2_blockpipe_isr(g, ce2_intr);
85 }
86
87 if (ce2_intr & ce2_intr_status_launcherr_pending_f()) {
88 clear_intr |= ce2_launcherr_isr(g, ce2_intr);
89 }
90
91 gk20a_writel(g, ce2_intr_status_r(), clear_intr);
92 return;
93}
94
95u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
96{
97 u32 ops = 0;
98 u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
99
100 nvgpu_log(g, gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr);
101
102 if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) {
103 gk20a_writel(g, ce2_intr_status_r(),
104 ce2_nonblockpipe_isr(g, ce2_intr));
105 ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE |
106 GK20A_NONSTALL_OPS_POST_EVENTS);
107 }
108 return ops;
109}
110
111/* static CE app api */
112static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
113{
114 u32 i;
115
116 for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
117 struct gk20a_fence **fence = &ce_ctx->postfences[i];
118 if (*fence) {
119 gk20a_fence_put(*fence);
120 }
121 *fence = NULL;
122 }
123}
124
125/* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */
126static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
127{
128 struct nvgpu_list_node *list = &ce_ctx->list;
129
130 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
131
132 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
133
134 if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
135 gk20a_ce_put_fences(ce_ctx);
136 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
137 }
138
139 /*
140 * free the channel
141 * gk20a_channel_close() will also unbind the channel from TSG
142 */
143 gk20a_channel_close(ce_ctx->ch);
144 nvgpu_ref_put(&ce_ctx->tsg->refcount, gk20a_tsg_release);
145
146 /* housekeeping on app */
147 if (list->prev && list->next) {
148 nvgpu_list_del(list);
149 }
150
151 nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
152 nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex);
153
154 nvgpu_kfree(ce_ctx->g, ce_ctx);
155}
156
157static inline unsigned int gk20a_ce_get_method_size(int request_operation,
158 u64 size)
159{
160 /* failure size */
161 unsigned int methodsize = UINT_MAX;
162 unsigned int iterations = 0;
163 u32 shift;
164 u64 chunk = size;
165 u32 height, width;
166
167 while (chunk) {
168 iterations++;
169
170 shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) :
171 MAX_CE_SHIFT;
172 width = chunk >> shift;
173 height = 1 << shift;
174 width = MAX_CE_ALIGN(width);
175
176 chunk -= (u64) height * width;
177 }
178
179 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) {
180 methodsize = (2 + (16 * iterations)) * sizeof(u32);
181 } else if (request_operation & NVGPU_CE_MEMSET) {
182 methodsize = (2 + (15 * iterations)) * sizeof(u32);
183 }
184
185 return methodsize;
186}
187
188int gk20a_ce_prepare_submit(u64 src_buf,
189 u64 dst_buf,
190 u64 size,
191 u32 *cmd_buf_cpu_va,
192 u32 max_cmd_buf_size,
193 unsigned int payload,
194 int launch_flags,
195 int request_operation,
196 u32 dma_copy_class)
197{
198 u32 launch = 0;
199 u32 methodSize = 0;
200 u64 offset = 0;
201 u64 chunk_size = 0;
202 u64 chunk = size;
203
204 /* failure case handling */
205 if ((gk20a_ce_get_method_size(request_operation, size) >
206 max_cmd_buf_size) || (!size) ||
207 (request_operation > NVGPU_CE_MEMSET)) {
208 return 0;
209 }
210
211 /* set the channel object */
212 cmd_buf_cpu_va[methodSize++] = 0x20018000;
213 cmd_buf_cpu_va[methodSize++] = dma_copy_class;
214
215 /*
216 * The purpose clear the memory in 2D rectangles. We get the ffs to
217 * determine the number of lines to copy. The only constraint is that
218 * maximum number of pixels per line is 4Gpix - 1, which is awkward for
219 * calculation, so we settle to 2Gpix per line to make calculatione
220 * more agreable
221 */
222
223 /* The copy engine in 2D mode can have (2^32 - 1) x (2^32 - 1) pixels in
224 * a single submit, we are going to try to clear a range of up to 2Gpix
225 * multiple lines. Because we want to copy byte aligned we will be
226 * setting 1 byte pixels */
227
228 /*
229 * per iteration
230 * <------------------------- 40 bits ------------------------------>
231 * 1 <------ ffs ------->
232 * <-----------up to 30 bits----------->
233 */
234 while (chunk) {
235 u32 width, height, shift;
236
237 /*
238 * We will be aligning to bytes, making the maximum number of
239 * pix per line 2Gb
240 */
241
242 shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) :
243 MAX_CE_SHIFT;
244 height = chunk >> shift;
245 width = 1 << shift;
246 height = MAX_CE_ALIGN(height);
247
248 chunk_size = (u64) height * width;
249
250 /* reset launch flag */
251 launch = 0;
252
253 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) {
254 /* setup the source */
255 cmd_buf_cpu_va[methodSize++] = 0x20028100;
256 cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf +
257 offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
258 cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf +
259 offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
260
261 cmd_buf_cpu_va[methodSize++] = 0x20018098;
262 if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) {
263 cmd_buf_cpu_va[methodSize++] = 0x00000000;
264 } else if (launch_flags &
265 NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) {
266 cmd_buf_cpu_va[methodSize++] = 0x00000002;
267 } else {
268 cmd_buf_cpu_va[methodSize++] = 0x00000001;
269 }
270
271 launch |= 0x00001000;
272 } else if (request_operation & NVGPU_CE_MEMSET) {
273 /* Remap from component A on 1 byte wide pixels */
274 cmd_buf_cpu_va[methodSize++] = 0x200181c2;
275 cmd_buf_cpu_va[methodSize++] = 0x00000004;
276
277 cmd_buf_cpu_va[methodSize++] = 0x200181c0;
278 cmd_buf_cpu_va[methodSize++] = payload;
279
280 launch |= 0x00000400;
281 } else {
282 /* Illegal size */
283 return 0;
284 }
285
286 /* setup the destination/output */
287 cmd_buf_cpu_va[methodSize++] = 0x20068102;
288 cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf +
289 offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
290 cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf +
291 offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
292 /* Pitch in/out */
293 cmd_buf_cpu_va[methodSize++] = width;
294 cmd_buf_cpu_va[methodSize++] = width;
295 /* width and line count */
296 cmd_buf_cpu_va[methodSize++] = width;
297 cmd_buf_cpu_va[methodSize++] = height;
298
299 cmd_buf_cpu_va[methodSize++] = 0x20018099;
300 if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) {
301 cmd_buf_cpu_va[methodSize++] = 0x00000000;
302 } else if (launch_flags &
303 NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) {
304 cmd_buf_cpu_va[methodSize++] = 0x00000002;
305 } else {
306 cmd_buf_cpu_va[methodSize++] = 0x00000001;
307 }
308
309 launch |= 0x00002005;
310
311 if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) {
312 launch |= 0x00000000;
313 } else {
314 launch |= 0x00000080;
315 }
316
317 if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) {
318 launch |= 0x00000000;
319 } else {
320 launch |= 0x00000100;
321 }
322
323 cmd_buf_cpu_va[methodSize++] = 0x200180c0;
324 cmd_buf_cpu_va[methodSize++] = launch;
325 offset += chunk_size;
326 chunk -= chunk_size;
327 }
328
329 return methodSize;
330}
331
332/* global CE app related apis */
333int gk20a_init_ce_support(struct gk20a *g)
334{
335 struct gk20a_ce_app *ce_app = &g->ce_app;
336 int err;
337 u32 ce_reset_mask;
338
339 ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g);
340
341 g->ops.mc.reset(g, ce_reset_mask);
342
343 nvgpu_cg_slcg_ce2_load_enable(g);
344
345 nvgpu_cg_blcg_ce_load_enable(g);
346
347 if (ce_app->initialised) {
348 /* assume this happen during poweron/poweroff GPU sequence */
349 ce_app->app_state = NVGPU_CE_ACTIVE;
350 return 0;
351 }
352
353 nvgpu_log(g, gpu_dbg_fn, "ce: init");
354
355 err = nvgpu_mutex_init(&ce_app->app_mutex);
356 if (err) {
357 return err;
358 }
359
360 nvgpu_mutex_acquire(&ce_app->app_mutex);
361
362 nvgpu_init_list_node(&ce_app->allocated_contexts);
363 ce_app->ctx_count = 0;
364 ce_app->next_ctx_id = 0;
365 ce_app->initialised = true;
366 ce_app->app_state = NVGPU_CE_ACTIVE;
367
368 nvgpu_mutex_release(&ce_app->app_mutex);
369
370 if (g->ops.ce2.init_prod_values != NULL) {
371 g->ops.ce2.init_prod_values(g);
372 }
373
374 nvgpu_log(g, gpu_dbg_cde_ctx, "ce: init finished");
375
376 return 0;
377}
378
379void gk20a_ce_destroy(struct gk20a *g)
380{
381 struct gk20a_ce_app *ce_app = &g->ce_app;
382 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
383
384 if (!ce_app->initialised) {
385 return;
386 }
387
388 ce_app->app_state = NVGPU_CE_SUSPEND;
389 ce_app->initialised = false;
390
391 nvgpu_mutex_acquire(&ce_app->app_mutex);
392
393 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
394 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
395 gk20a_ce_delete_gpu_context(ce_ctx);
396 }
397
398 nvgpu_init_list_node(&ce_app->allocated_contexts);
399 ce_app->ctx_count = 0;
400 ce_app->next_ctx_id = 0;
401
402 nvgpu_mutex_release(&ce_app->app_mutex);
403
404 nvgpu_mutex_destroy(&ce_app->app_mutex);
405}
406
407void gk20a_ce_suspend(struct gk20a *g)
408{
409 struct gk20a_ce_app *ce_app = &g->ce_app;
410
411 if (!ce_app->initialised) {
412 return;
413 }
414
415 ce_app->app_state = NVGPU_CE_SUSPEND;
416
417 return;
418}
419
420/* CE app utility functions */
421u32 gk20a_ce_create_context(struct gk20a *g,
422 int runlist_id,
423 int timeslice,
424 int runlist_level)
425{
426 struct gk20a_gpu_ctx *ce_ctx;
427 struct gk20a_ce_app *ce_app = &g->ce_app;
428 struct nvgpu_setup_bind_args setup_bind_args;
429 u32 ctx_id = ~0;
430 int err = 0;
431
432 if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
433 return ctx_id;
434 }
435
436 ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx));
437 if (!ce_ctx) {
438 return ctx_id;
439 }
440
441 err = nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex);
442 if (err) {
443 nvgpu_kfree(g, ce_ctx);
444 return ctx_id;
445 }
446
447 ce_ctx->g = g;
448
449 ce_ctx->cmd_buf_read_queue_offset = 0;
450
451 ce_ctx->vm = g->mm.ce.vm;
452
453 /* allocate a tsg if needed */
454 ce_ctx->tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
455 if (!ce_ctx->tsg) {
456 nvgpu_err(g, "ce: gk20a tsg not available");
457 err = -ENOMEM;
458 goto end;
459 }
460
461 /* always kernel client needs privileged channel */
462 ce_ctx->ch = gk20a_open_new_channel(g, runlist_id, true,
463 nvgpu_current_pid(g), nvgpu_current_tid(g));
464 if (!ce_ctx->ch) {
465 nvgpu_err(g, "ce: gk20a channel not available");
466 err = -ENOMEM;
467 goto end;
468 }
469 ce_ctx->ch->timeout.enabled = false;
470
471 /* bind the channel to the vm */
472 err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
473 if (err) {
474 nvgpu_err(g, "ce: could not bind vm");
475 goto end;
476 }
477
478 err = gk20a_tsg_bind_channel(ce_ctx->tsg, ce_ctx->ch);
479 if (err) {
480 nvgpu_err(g, "ce: unable to bind to tsg");
481 goto end;
482 }
483
484 setup_bind_args.num_gpfifo_entries = 1024;
485 setup_bind_args.num_inflight_jobs = 0;
486 setup_bind_args.flags = 0;
487 /* allocate gpfifo (1024 should be more than enough) */
488 err = nvgpu_channel_setup_bind(ce_ctx->ch, &setup_bind_args);
489 if (err) {
490 nvgpu_err(g, "ce: unable to setup and bind channel");
491 goto end;
492 }
493
494 /* allocate command buffer from sysmem */
495 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
496 NVGPU_CE_MAX_INFLIGHT_JOBS *
497 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
498 &ce_ctx->cmd_buf_mem);
499 if (err) {
500 nvgpu_err(g,
501 "ce: could not allocate command buffer for CE context");
502 goto end;
503 }
504
505 memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size);
506
507 /* -1 means default channel timeslice value */
508 if (timeslice != -1) {
509 err = gk20a_fifo_tsg_set_timeslice(ce_ctx->tsg, timeslice);
510 if (err) {
511 nvgpu_err(g,
512 "ce: could not set the channel timeslice value for CE context");
513 goto end;
514 }
515 }
516
517 /* -1 means default channel runlist level */
518 if (runlist_level != -1) {
519 err = gk20a_tsg_set_runlist_interleave(ce_ctx->tsg,
520 runlist_level);
521 if (err) {
522 nvgpu_err(g,
523 "ce: could not set the runlist interleave for CE context");
524 goto end;
525 }
526 }
527
528 nvgpu_mutex_acquire(&ce_app->app_mutex);
529 ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
530 nvgpu_list_add(&ce_ctx->list, &ce_app->allocated_contexts);
531 ++ce_app->next_ctx_id;
532 ++ce_app->ctx_count;
533 nvgpu_mutex_release(&ce_app->app_mutex);
534
535 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
536
537end:
538 if (ctx_id == (u32)~0) {
539 nvgpu_mutex_acquire(&ce_app->app_mutex);
540 gk20a_ce_delete_gpu_context(ce_ctx);
541 nvgpu_mutex_release(&ce_app->app_mutex);
542 }
543 return ctx_id;
544
545}
546
547void gk20a_ce_delete_context(struct gk20a *g,
548 u32 ce_ctx_id)
549{
550 gk20a_ce_delete_context_priv(g, ce_ctx_id);
551}
552
553void gk20a_ce_delete_context_priv(struct gk20a *g,
554 u32 ce_ctx_id)
555{
556 struct gk20a_ce_app *ce_app = &g->ce_app;
557 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
558
559 if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
560 return;
561 }
562
563 nvgpu_mutex_acquire(&ce_app->app_mutex);
564
565 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
566 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
567 if (ce_ctx->ctx_id == ce_ctx_id) {
568 gk20a_ce_delete_gpu_context(ce_ctx);
569 --ce_app->ctx_count;
570 break;
571 }
572 }
573
574 nvgpu_mutex_release(&ce_app->app_mutex);
575 return;
576}
diff --git a/include/gk20a/ce2_gk20a.h b/include/gk20a/ce2_gk20a.h
deleted file mode 100644
index df3a0e8..0000000
--- a/include/gk20a/ce2_gk20a.h
+++ /dev/null
@@ -1,156 +0,0 @@
1/*
2 * drivers/video/tegra/host/gk20a/fifo_gk20a.h
3 *
4 * GK20A graphics copy engine (gr host)
5 *
6 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26#ifndef NVGPU_GK20A_CE2_GK20A_H
27#define NVGPU_GK20A_CE2_GK20A_H
28
29struct channel_gk20a;
30struct tsg_gk20a;
31
32void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
33u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
34
35/* CE command utility macros */
36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff
38
39#define NVGPU_CE_MAX_INFLIGHT_JOBS 32
40#define NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF 256
41
42/* dma launch_flags */
43enum {
44 /* location */
45 NVGPU_CE_SRC_LOCATION_COHERENT_SYSMEM = (1 << 0),
46 NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM = (1 << 1),
47 NVGPU_CE_SRC_LOCATION_LOCAL_FB = (1 << 2),
48 NVGPU_CE_DST_LOCATION_COHERENT_SYSMEM = (1 << 3),
49 NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM = (1 << 4),
50 NVGPU_CE_DST_LOCATION_LOCAL_FB = (1 << 5),
51
52 /* memory layout */
53 NVGPU_CE_SRC_MEMORY_LAYOUT_PITCH = (1 << 6),
54 NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 7),
55 NVGPU_CE_DST_MEMORY_LAYOUT_PITCH = (1 << 8),
56 NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 9),
57
58 /* transfer type */
59 NVGPU_CE_DATA_TRANSFER_TYPE_PIPELINED = (1 << 10),
60 NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED = (1 << 11),
61};
62
63/* CE operation mode */
64enum {
65 NVGPU_CE_PHYS_MODE_TRANSFER = (1 << 0),
66 NVGPU_CE_MEMSET = (1 << 1),
67};
68
69/* CE app state machine flags */
70enum {
71 NVGPU_CE_ACTIVE = (1 << 0),
72 NVGPU_CE_SUSPEND = (1 << 1),
73};
74
75/* gpu context state machine flags */
76enum {
77 NVGPU_CE_GPU_CTX_ALLOCATED = (1 << 0),
78 NVGPU_CE_GPU_CTX_DELETED = (1 << 1),
79};
80
81/* global ce app db */
82struct gk20a_ce_app {
83 bool initialised;
84 struct nvgpu_mutex app_mutex;
85 int app_state;
86
87 struct nvgpu_list_node allocated_contexts;
88 u32 ctx_count;
89 u32 next_ctx_id;
90};
91
92/* ce context db */
93struct gk20a_gpu_ctx {
94 struct gk20a *g;
95 u32 ctx_id;
96 struct nvgpu_mutex gpu_ctx_mutex;
97 int gpu_ctx_state;
98
99 /* tsg related data */
100 struct tsg_gk20a *tsg;
101
102 /* channel related data */
103 struct channel_gk20a *ch;
104 struct vm_gk20a *vm;
105
106 /* cmd buf mem_desc */
107 struct nvgpu_mem cmd_buf_mem;
108 struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
109
110 struct nvgpu_list_node list;
111
112 u32 cmd_buf_read_queue_offset;
113};
114
115static inline struct gk20a_gpu_ctx *
116gk20a_gpu_ctx_from_list(struct nvgpu_list_node *node)
117{
118 return (struct gk20a_gpu_ctx *)
119 ((uintptr_t)node - offsetof(struct gk20a_gpu_ctx, list));
120};
121
122/* global CE app related apis */
123int gk20a_init_ce_support(struct gk20a *g);
124void gk20a_ce_suspend(struct gk20a *g);
125void gk20a_ce_destroy(struct gk20a *g);
126
127/* CE app utility functions */
128u32 gk20a_ce_create_context(struct gk20a *g,
129 int runlist_id,
130 int timeslice,
131 int runlist_level);
132int gk20a_ce_execute_ops(struct gk20a *g,
133 u32 ce_ctx_id,
134 u64 src_buf,
135 u64 dst_buf,
136 u64 size,
137 unsigned int payload,
138 int launch_flags,
139 int request_operation,
140 u32 submit_flags,
141 struct gk20a_fence **gk20a_fence_out);
142void gk20a_ce_delete_context_priv(struct gk20a *g,
143 u32 ce_ctx_id);
144void gk20a_ce_delete_context(struct gk20a *g,
145 u32 ce_ctx_id);
146int gk20a_ce_prepare_submit(u64 src_buf,
147 u64 dst_buf,
148 u64 size,
149 u32 *cmd_buf_cpu_va,
150 u32 max_cmd_buf_size,
151 unsigned int payload,
152 int launch_flags,
153 int request_operation,
154 u32 dma_copy_class);
155
156#endif /*NVGPU_GK20A_CE2_GK20A_H*/
diff --git a/include/gk20a/clk_gk20a.h b/include/gk20a/clk_gk20a.h
deleted file mode 100644
index b8ec942..0000000
--- a/include/gk20a/clk_gk20a.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * Copyright (c) 2011 - 2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#ifndef CLK_GK20A_H
23#define CLK_GK20A_H
24
25#include <nvgpu/lock.h>
26
27#if defined(CONFIG_COMMON_CLK)
28#include <linux/clk-provider.h>
29#endif
30
31#define GPUFREQ_TABLE_END ~(u32)1
32enum {
33 /* only one PLL for gk20a */
34 GK20A_GPC_PLL = 0,
35 /* 2 PLL revisions for gm20b */
36 GM20B_GPC_PLL_B1,
37 GM20B_GPC_PLL_C1,
38};
39
40enum gpc_pll_mode {
41 GPC_PLL_MODE_F = 0, /* fixed frequency mode a.k.a legacy mode */
42 GPC_PLL_MODE_DVFS, /* DVFS mode a.k.a NA mode */
43};
44
45struct na_dvfs {
46 u32 n_int;
47 u32 sdm_din;
48 int dfs_coeff;
49 int dfs_det_max;
50 int dfs_ext_cal;
51 int uv_cal;
52 int mv;
53};
54
55struct pll {
56 u32 id;
57 u32 clk_in; /* KHz */
58 u32 M;
59 u32 N;
60 u32 PL;
61 u32 freq; /* KHz */
62 bool enabled;
63 enum gpc_pll_mode mode;
64 struct na_dvfs dvfs;
65};
66
67struct pll_parms {
68 u32 min_freq, max_freq; /* KHz */
69 u32 min_vco, max_vco; /* KHz */
70 u32 min_u, max_u; /* KHz */
71 u32 min_M, max_M;
72 u32 min_N, max_N;
73 u32 min_PL, max_PL;
74 /* NA mode parameters*/
75 int coeff_slope, coeff_offs; /* coeff = slope * V + offs */
76 int uvdet_slope, uvdet_offs; /* uV = slope * det + offs */
77 u32 vco_ctrl;
78 /*
79 * Timing parameters in us. Lock timeout is applied to locking in fixed
80 * frequency mode and to dynamic ramp in any mode; does not affect lock
81 * latency, since lock/ramp done status bit is polled. NA mode lock and
82 * and IDDQ exit delays set the time of the respective opertaions with
83 * no status polling.
84 */
85 u32 lock_timeout;
86 u32 na_lock_delay;
87 u32 iddq_exit_delay;
88 /* NA mode DFS control */
89 u32 dfs_ctrl;
90};
91
92struct namemap_cfg;
93
94struct clk_gk20a {
95 struct gk20a *g;
96#if defined(CONFIG_COMMON_CLK)
97 struct clk *tegra_clk;
98 struct clk *tegra_clk_parent;
99 struct clk_hw hw;
100#endif
101 struct pll gpc_pll;
102 struct pll gpc_pll_last;
103 struct nvgpu_mutex clk_mutex;
104 struct namemap_cfg *clk_namemap;
105 u32 namemap_num;
106 u32 *namemap_xlat_table;
107 bool sw_ready;
108 bool clk_hw_on;
109 bool debugfs_set;
110 int pll_poweron_uv;
111 unsigned long dvfs_safe_max_freq;
112};
113
114#if defined(CONFIG_COMMON_CLK)
115#define to_clk_gk20a(_hw) container_of(_hw, struct clk_gk20a, hw)
116#endif
117
118struct gpu_ops;
119
120#define KHZ 1000
121#define MHZ 1000000
122
123static inline unsigned long rate_gpc2clk_to_gpu(unsigned long rate)
124{
125 /* convert the kHz gpc2clk frequency to Hz gpcpll frequency */
126 return (rate * KHZ) / 2;
127}
128static inline unsigned long rate_gpu_to_gpc2clk(unsigned long rate)
129{
130 /* convert the Hz gpcpll frequency to kHz gpc2clk frequency */
131 return (rate * 2) / KHZ;
132}
133
134#endif /* CLK_GK20A_H */
diff --git a/include/gk20a/css_gr_gk20a.c b/include/gk20a/css_gr_gk20a.c
deleted file mode 100644
index 28a3d49..0000000
--- a/include/gk20a/css_gr_gk20a.c
+++ /dev/null
@@ -1,636 +0,0 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/bitops.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/lock.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/mm.h>
30#include <nvgpu/sizes.h>
31#include <nvgpu/barrier.h>
32#include <nvgpu/log.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/io.h>
35#include <nvgpu/utils.h>
36#include <nvgpu/channel.h>
37#include <nvgpu/unit.h>
38
39#include "gk20a.h"
40#include "css_gr_gk20a.h"
41
42#include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
43
44/* check client for pointed perfmon ownership */
45#define CONTAINS_PERFMON(cl, pm) \
46 ((cl)->perfmon_start <= (pm) && \
47 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
48
49/* address of fifo entry by offset */
50#define CSS_FIFO_ENTRY(fifo, offs) \
51 ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
52
53/* calculate area capacity in number of fifo entries */
54#define CSS_FIFO_ENTRY_CAPACITY(s) \
55 (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \
56 / sizeof(struct gk20a_cs_snapshot_fifo_entry))
57
58/* reserved to indicate failures with data */
59#define CSS_FIRST_PERFMON_ID 32
60/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
61#define CSS_MAX_PERFMON_IDS 256
62
63/* reports whether the hw queue overflowed */
64bool css_hw_get_overflow_status(struct gk20a *g)
65{
66 const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
67 return st == (gk20a_readl(g, perf_pmasys_control_r()) & st);
68}
69
70/* returns how many pending snapshot entries are pending */
71u32 css_hw_get_pending_snapshots(struct gk20a *g)
72{
73 return gk20a_readl(g, perf_pmasys_mem_bytes_r()) /
74 sizeof(struct gk20a_cs_snapshot_fifo_entry);
75}
76
77/* informs hw how many snapshots have been processed (frees up fifo space) */
78void css_hw_set_handled_snapshots(struct gk20a *g, u32 done)
79{
80 if (done > 0) {
81 gk20a_writel(g, perf_pmasys_mem_bump_r(),
82 done * sizeof(struct gk20a_cs_snapshot_fifo_entry));
83 }
84}
85
86/* disable streaming to memory */
87static void css_hw_reset_streaming(struct gk20a *g)
88{
89 u32 engine_status;
90
91 /* reset the perfmon */
92 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
93
94 /* RBUFEMPTY must be set -- otherwise we'll pick up */
95 /* snapshot that have been queued up from earlier */
96 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
97 WARN_ON(0 == (engine_status
98 & perf_pmasys_enginestatus_rbufempty_empty_f()));
99
100 /* turn off writes */
101 gk20a_writel(g, perf_pmasys_control_r(),
102 perf_pmasys_control_membuf_clear_status_doit_f());
103
104 /* pointing all pending snapshots as handled */
105 css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g));
106}
107
108/*
109 * WARNING: all css_gr_XXX functions are local and expected to be called
110 * from locked context (protected by cs_lock)
111 */
112
113static int css_gr_create_shared_data(struct gr_gk20a *gr)
114{
115 struct gk20a_cs_snapshot *data;
116
117 if (gr->cs_data)
118 return 0;
119
120 data = nvgpu_kzalloc(gr->g, sizeof(*data));
121 if (!data)
122 return -ENOMEM;
123
124 nvgpu_init_list_node(&data->clients);
125 gr->cs_data = data;
126
127 return 0;
128}
129
130int css_hw_enable_snapshot(struct channel_gk20a *ch,
131 struct gk20a_cs_snapshot_client *cs_client)
132{
133 struct gk20a *g = ch->g;
134 struct mm_gk20a *mm = &g->mm;
135 struct gr_gk20a *gr = &g->gr;
136 struct gk20a_cs_snapshot *data = gr->cs_data;
137 u32 snapshot_size = cs_client->snapshot_size;
138 int ret;
139
140 u32 virt_addr_lo;
141 u32 virt_addr_hi;
142 u32 inst_pa_page;
143
144 if (data->hw_snapshot)
145 return 0;
146
147 if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
148 snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;
149
150 ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size,
151 &data->hw_memdesc);
152 if (ret)
153 return ret;
154
155 /* perf output buffer may not cross a 4GB boundary - with a separate */
156 /* va smaller than that, it won't but check anyway */
157 if (!data->hw_memdesc.cpu_va ||
158 data->hw_memdesc.size < snapshot_size ||
159 data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) {
160 ret = -EFAULT;
161 goto failed_allocation;
162 }
163
164 data->hw_snapshot =
165 (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va;
166 data->hw_end = data->hw_snapshot +
167 snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
168 data->hw_get = data->hw_snapshot;
169 memset(data->hw_snapshot, 0xff, snapshot_size);
170
171 /* address and size are aligned to 32 bytes, the lowest bits read back
172 * as zeros */
173 virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va);
174 virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va);
175
176 css_hw_reset_streaming(g);
177
178 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
179 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
180 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
181 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
182
183 /* this field is aligned to 4K */
184 inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
185
186 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
187 * should be written last */
188 gk20a_writel(g, perf_pmasys_mem_block_r(),
189 perf_pmasys_mem_block_base_f(inst_pa_page) |
190 nvgpu_aperture_mask(g, &mm->hwpm.inst_block,
191 perf_pmasys_mem_block_target_sys_ncoh_f(),
192 perf_pmasys_mem_block_target_sys_coh_f(),
193 perf_pmasys_mem_block_target_lfb_f()) |
194 perf_pmasys_mem_block_valid_true_f());
195
196 nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n");
197
198 return 0;
199
200failed_allocation:
201 if (data->hw_memdesc.size) {
202 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
203 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
204 }
205 data->hw_snapshot = NULL;
206
207 return ret;
208}
209
210void css_hw_disable_snapshot(struct gr_gk20a *gr)
211{
212 struct gk20a *g = gr->g;
213 struct gk20a_cs_snapshot *data = gr->cs_data;
214
215 if (!data->hw_snapshot)
216 return;
217
218 css_hw_reset_streaming(g);
219
220 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
221 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
222 perf_pmasys_outbaseupper_ptr_f(0));
223 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
224
225 gk20a_writel(g, perf_pmasys_mem_block_r(),
226 perf_pmasys_mem_block_base_f(0) |
227 perf_pmasys_mem_block_valid_false_f() |
228 perf_pmasys_mem_block_target_f(0));
229
230 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
231 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
232 data->hw_snapshot = NULL;
233
234 nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots disabled\n");
235}
236
237static void css_gr_free_shared_data(struct gr_gk20a *gr)
238{
239 struct gk20a *g = gr->g;
240
241 if (gr->cs_data) {
242 /* the clients list is expected to be empty */
243 g->ops.css.disable_snapshot(gr);
244
245 /* release the objects */
246 nvgpu_kfree(gr->g, gr->cs_data);
247 gr->cs_data = NULL;
248 }
249}
250
251
252struct gk20a_cs_snapshot_client*
253css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon)
254{
255 struct gk20a_cs_snapshot_client *client;
256
257 nvgpu_list_for_each_entry(client, clients,
258 gk20a_cs_snapshot_client, list) {
259 if (CONTAINS_PERFMON(client, perfmon))
260 return client;
261 }
262
263 return NULL;
264}
265
266static int css_gr_flush_snapshots(struct channel_gk20a *ch)
267{
268 struct gk20a *g = ch->g;
269 struct gr_gk20a *gr = &g->gr;
270 struct gk20a_cs_snapshot *css = gr->cs_data;
271 struct gk20a_cs_snapshot_client *cur;
272 u32 pending, completed;
273 bool hw_overflow;
274 int err;
275
276 /* variables for iterating over HW entries */
277 u32 sid;
278 struct gk20a_cs_snapshot_fifo_entry *src;
279
280 /* due to data sharing with userspace we allowed update only */
281 /* overflows and put field in the fifo header */
282 struct gk20a_cs_snapshot_fifo *dst;
283 struct gk20a_cs_snapshot_fifo_entry *dst_get;
284 struct gk20a_cs_snapshot_fifo_entry *dst_put;
285 struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
286 struct gk20a_cs_snapshot_fifo_entry *dst_head;
287 struct gk20a_cs_snapshot_fifo_entry *dst_tail;
288
289 if (!css)
290 return -EINVAL;
291
292 if (nvgpu_list_empty(&css->clients))
293 return -EBADF;
294
295 /* check data available */
296 err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
297 if (err)
298 return err;
299
300 if (!pending)
301 return 0;
302
303 if (hw_overflow) {
304 nvgpu_list_for_each_entry(cur, &css->clients,
305 gk20a_cs_snapshot_client, list) {
306 cur->snapshot->hw_overflow_events_occured++;
307 }
308
309 nvgpu_warn(g, "cyclestats: hardware overflow detected");
310 }
311
312 /* process all items in HW buffer */
313 sid = 0;
314 completed = 0;
315 cur = NULL;
316 dst = NULL;
317 dst_put = NULL;
318 src = css->hw_get;
319
320 /* proceed all completed records */
321 while (sid < pending && 0 == src->zero0) {
322 /* we may have a new perfmon_id which required to */
323 /* switch to a new client -> let's forget current */
324 if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) {
325 dst->put = (char *)dst_put - (char *)dst;
326 dst = NULL;
327 cur = NULL;
328 }
329
330 /* now we have to select a new current client */
331 /* the client selection rate depends from experiment */
332 /* activity but on Android usually happened 1-2 times */
333 if (!cur) {
334 cur = css_gr_search_client(&css->clients,
335 src->perfmon_id);
336 if (cur) {
337 /* found - setup all required data */
338 dst = cur->snapshot;
339 dst_get = CSS_FIFO_ENTRY(dst, dst->get);
340 dst_put = CSS_FIFO_ENTRY(dst, dst->put);
341 dst_head = CSS_FIFO_ENTRY(dst, dst->start);
342 dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
343
344 dst_nxt = dst_put + 1;
345 if (dst_nxt == dst_tail)
346 dst_nxt = dst_head;
347 } else {
348 /* client not found - skipping this entry */
349 nvgpu_warn(g, "cyclestats: orphaned perfmon %u",
350 src->perfmon_id);
351 goto next_hw_fifo_entry;
352 }
353 }
354
355 /* check for software overflows */
356 if (dst_nxt == dst_get) {
357 /* no data copy, no pointer updates */
358 dst->sw_overflow_events_occured++;
359 nvgpu_warn(g, "cyclestats: perfmon %u soft overflow",
360 src->perfmon_id);
361 } else {
362 *dst_put = *src;
363 completed++;
364
365 dst_put = dst_nxt++;
366
367 if (dst_nxt == dst_tail)
368 dst_nxt = dst_head;
369 }
370
371next_hw_fifo_entry:
372 sid++;
373 if (++src >= css->hw_end)
374 src = css->hw_snapshot;
375 }
376
377 /* update client put pointer if necessary */
378 if (cur && dst)
379 dst->put = (char *)dst_put - (char *)dst;
380
381 /* re-set HW buffer after processing taking wrapping into account */
382 if (css->hw_get < src) {
383 memset(css->hw_get, 0xff, (src - css->hw_get) * sizeof(*src));
384 } else {
385 memset(css->hw_snapshot, 0xff,
386 (src - css->hw_snapshot) * sizeof(*src));
387 memset(css->hw_get, 0xff,
388 (css->hw_end - css->hw_get) * sizeof(*src));
389 }
390 gr->cs_data->hw_get = src;
391
392 if (g->ops.css.set_handled_snapshots)
393 g->ops.css.set_handled_snapshots(g, sid);
394
395 if (completed != sid) {
396 /* not all entries proceed correctly. some of problems */
397 /* reported as overflows, some as orphaned perfmons, */
398 /* but it will be better notify with summary about it */
399 nvgpu_warn(g, "cyclestats: completed %u from %u entries",
400 completed, pending);
401 }
402
403 return 0;
404}
405
406u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
407 u32 count)
408{
409 unsigned long *pids = data->perfmon_ids;
410 unsigned int f;
411
412 f = bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS,
413 CSS_FIRST_PERFMON_ID, count, 0);
414 if (f > CSS_MAX_PERFMON_IDS)
415 f = 0;
416 else
417 bitmap_set(pids, f, count);
418
419 return f;
420}
421
422u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
423 u32 start,
424 u32 count)
425{
426 unsigned long *pids = data->perfmon_ids;
427 u32 end = start + count;
428 u32 cnt = 0;
429
430 if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) {
431 bitmap_clear(pids, start, count);
432 cnt = count;
433 }
434
435 return cnt;
436}
437
438
439static int css_gr_free_client_data(struct gk20a *g,
440 struct gk20a_cs_snapshot *data,
441 struct gk20a_cs_snapshot_client *client)
442{
443 int ret = 0;
444
445 if (client->list.next && client->list.prev)
446 nvgpu_list_del(&client->list);
447
448 if (client->perfmon_start && client->perfmon_count
449 && g->ops.css.release_perfmon_ids) {
450 if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
451 client->perfmon_start, client->perfmon_count))
452 ret = -EINVAL;
453 }
454
455 return ret;
456}
457
458static int css_gr_create_client_data(struct gk20a *g,
459 struct gk20a_cs_snapshot *data,
460 u32 perfmon_count,
461 struct gk20a_cs_snapshot_client *cur)
462{
463 /*
464 * Special handling in-case of rm-server
465 *
466 * client snapshot buffer will not be mapped
467 * in-case of rm-server its only mapped in
468 * guest side
469 */
470 if (cur->snapshot) {
471 memset(cur->snapshot, 0, sizeof(*cur->snapshot));
472 cur->snapshot->start = sizeof(*cur->snapshot);
473 /* we should be ensure that can fit all fifo entries here */
474 cur->snapshot->end =
475 CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size)
476 * sizeof(struct gk20a_cs_snapshot_fifo_entry)
477 + sizeof(struct gk20a_cs_snapshot_fifo);
478 cur->snapshot->get = cur->snapshot->start;
479 cur->snapshot->put = cur->snapshot->start;
480 }
481
482 cur->perfmon_count = perfmon_count;
483
484 /* In virtual case, perfmon ID allocation is handled by the server
485 * at the time of the attach (allocate_perfmon_ids is NULL in this case)
486 */
487 if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
488 cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
489 cur->perfmon_count);
490 if (!cur->perfmon_start)
491 return -ENOENT;
492 }
493
494 nvgpu_list_add_tail(&cur->list, &data->clients);
495
496 return 0;
497}
498
499
500int gr_gk20a_css_attach(struct channel_gk20a *ch,
501 u32 perfmon_count,
502 u32 *perfmon_start,
503 struct gk20a_cs_snapshot_client *cs_client)
504{
505 int ret = 0;
506 struct gk20a *g = ch->g;
507 struct gr_gk20a *gr;
508
509 /* we must have a placeholder to store pointer to client structure */
510 if (!cs_client)
511 return -EINVAL;
512
513 if (!perfmon_count ||
514 perfmon_count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID)
515 return -EINVAL;
516
517 nvgpu_speculation_barrier();
518
519 gr = &g->gr;
520
521 nvgpu_mutex_acquire(&gr->cs_lock);
522
523 ret = css_gr_create_shared_data(gr);
524 if (ret)
525 goto failed;
526
527 ret = css_gr_create_client_data(g, gr->cs_data,
528 perfmon_count,
529 cs_client);
530 if (ret)
531 goto failed;
532
533 ret = g->ops.css.enable_snapshot(ch, cs_client);
534 if (ret)
535 goto failed;
536
537 if (perfmon_start)
538 *perfmon_start = cs_client->perfmon_start;
539
540 nvgpu_mutex_release(&gr->cs_lock);
541
542 return 0;
543
544failed:
545 if (gr->cs_data) {
546 if (cs_client) {
547 css_gr_free_client_data(g, gr->cs_data, cs_client);
548 cs_client = NULL;
549 }
550
551 if (nvgpu_list_empty(&gr->cs_data->clients))
552 css_gr_free_shared_data(gr);
553 }
554 nvgpu_mutex_release(&gr->cs_lock);
555
556 if (perfmon_start)
557 *perfmon_start = 0;
558
559 return ret;
560}
561
562int gr_gk20a_css_detach(struct channel_gk20a *ch,
563 struct gk20a_cs_snapshot_client *cs_client)
564{
565 int ret = 0;
566 struct gk20a *g = ch->g;
567 struct gr_gk20a *gr;
568
569 if (!cs_client)
570 return -EINVAL;
571
572 gr = &g->gr;
573 nvgpu_mutex_acquire(&gr->cs_lock);
574 if (gr->cs_data) {
575 struct gk20a_cs_snapshot *data = gr->cs_data;
576
577 if (g->ops.css.detach_snapshot)
578 g->ops.css.detach_snapshot(ch, cs_client);
579
580 ret = css_gr_free_client_data(g, data, cs_client);
581 if (nvgpu_list_empty(&data->clients))
582 css_gr_free_shared_data(gr);
583 } else {
584 ret = -EBADF;
585 }
586 nvgpu_mutex_release(&gr->cs_lock);
587
588 return ret;
589}
590
591int gr_gk20a_css_flush(struct channel_gk20a *ch,
592 struct gk20a_cs_snapshot_client *cs_client)
593{
594 int ret = 0;
595 struct gk20a *g = ch->g;
596 struct gr_gk20a *gr;
597
598 if (!cs_client)
599 return -EINVAL;
600
601 gr = &g->gr;
602 nvgpu_mutex_acquire(&gr->cs_lock);
603 ret = css_gr_flush_snapshots(ch);
604 nvgpu_mutex_release(&gr->cs_lock);
605
606 return ret;
607}
608
609/* helper function with locking to cleanup snapshot code code in gr_gk20a.c */
610void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
611{
612 struct gr_gk20a *gr = &g->gr;
613
614 nvgpu_mutex_acquire(&gr->cs_lock);
615 css_gr_free_shared_data(gr);
616 nvgpu_mutex_release(&gr->cs_lock);
617 nvgpu_mutex_destroy(&gr->cs_lock);
618}
619
620int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
621 bool *hw_overflow)
622{
623 struct gk20a *g = ch->g;
624 struct gr_gk20a *gr = &g->gr;
625 struct gk20a_cs_snapshot *css = gr->cs_data;
626
627 if (!css->hw_snapshot)
628 return -EINVAL;
629
630 *pending = css_hw_get_pending_snapshots(g);
631 if (!*pending)
632 return 0;
633
634 *hw_overflow = css_hw_get_overflow_status(g);
635 return 0;
636}
diff --git a/include/gk20a/css_gr_gk20a.h b/include/gk20a/css_gr_gk20a.h
deleted file mode 100644
index bf8890b..0000000
--- a/include/gk20a/css_gr_gk20a.h
+++ /dev/null
@@ -1,151 +0,0 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef CSS_GR_GK20A_H
26#define CSS_GR_GK20A_H
27
28#include <nvgpu/nvgpu_mem.h>
29#include <nvgpu/list.h>
30
31/* the minimal size of HW buffer - should be enough to avoid HW overflows */
32#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
33
34struct gk20a;
35struct gr_gk20a;
36struct channel_gk20a;
37
38/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
39struct gk20a_cs_snapshot_fifo {
40 /* layout description of the buffer */
41 u32 start;
42 u32 end;
43
44 /* snafu bits */
45 u32 hw_overflow_events_occured;
46 u32 sw_overflow_events_occured;
47
48 /* the kernel copies new entries to put and
49 * increment the put++. if put == get then
50 * overflowEventsOccured++
51 */
52 u32 put;
53 u32 _reserved10;
54 u32 _reserved11;
55 u32 _reserved12;
56
57 /* the driver/client reads from get until
58 * put==get, get++ */
59 u32 get;
60 u32 _reserved20;
61 u32 _reserved21;
62 u32 _reserved22;
63
64 /* unused */
65 u32 _reserved30;
66 u32 _reserved31;
67 u32 _reserved32;
68 u32 _reserved33;
69};
70
71/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
72struct gk20a_cs_snapshot_fifo_entry {
73 /* global 48 timestamp */
74 u32 timestamp31_00:32;
75 u32 timestamp39_32:8;
76
77 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
78 u32 perfmon_id:8;
79
80 /* typically samples_counter is wired to #pmtrigger count */
81 u32 samples_counter:12;
82
83 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
84 u32 ds:1;
85 u32 sz:1;
86 u32 zero0:1;
87 u32 zero1:1;
88
89 /* counter results */
90 u32 event_cnt:32;
91 u32 trigger0_cnt:32;
92 u32 trigger1_cnt:32;
93 u32 sample_cnt:32;
94
95 /* Local PmTrigger results for Maxwell+ or padding otherwise */
96 u16 local_trigger_b_count:16;
97 u16 book_mark_b:16;
98 u16 local_trigger_a_count:16;
99 u16 book_mark_a:16;
100};
101
102/* cycle stats snapshot client data (e.g. associated with channel) */
103struct gk20a_cs_snapshot_client {
104 struct nvgpu_list_node list;
105 struct gk20a_cs_snapshot_fifo *snapshot;
106 u32 snapshot_size;
107 u32 perfmon_start;
108 u32 perfmon_count;
109};
110
111static inline struct gk20a_cs_snapshot_client *
112gk20a_cs_snapshot_client_from_list(struct nvgpu_list_node *node)
113{
114 return (struct gk20a_cs_snapshot_client *)
115 ((uintptr_t)node - offsetof(struct gk20a_cs_snapshot_client, list));
116};
117
118/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
119#define CSS_MAX_PERFMON_IDS 256
120
121/* local definitions to avoid hardcodes sizes and shifts */
122#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
123
124/* cycle stats snapshot control structure for one HW entry and many clients */
125struct gk20a_cs_snapshot {
126 unsigned long perfmon_ids[PM_BITMAP_SIZE];
127 struct nvgpu_list_node clients;
128 struct nvgpu_mem hw_memdesc;
129 /* pointer to allocated cpu_va memory where GPU place data */
130 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
131 struct gk20a_cs_snapshot_fifo_entry *hw_end;
132 struct gk20a_cs_snapshot_fifo_entry *hw_get;
133};
134
135bool css_hw_get_overflow_status(struct gk20a *g);
136u32 css_hw_get_pending_snapshots(struct gk20a *g);
137void css_hw_set_handled_snapshots(struct gk20a *g, u32 done);
138int css_hw_enable_snapshot(struct channel_gk20a *ch,
139 struct gk20a_cs_snapshot_client *cs_client);
140void css_hw_disable_snapshot(struct gr_gk20a *gr);
141u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
142 u32 count);
143u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
144 u32 start,
145 u32 count);
146int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
147 bool *hw_overflow);
148struct gk20a_cs_snapshot_client*
149css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon);
150
151#endif /* CSS_GR_GK20A_H */
diff --git a/include/gk20a/dbg_gpu_gk20a.c b/include/gk20a/dbg_gpu_gk20a.c
deleted file mode 100644
index 1686d01..0000000
--- a/include/gk20a/dbg_gpu_gk20a.c
+++ /dev/null
@@ -1,388 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver
3 *
4 * Copyright (c) 2013-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/log.h>
27#include <nvgpu/vm.h>
28#include <nvgpu/atomic.h>
29#include <nvgpu/mm.h>
30#include <nvgpu/bug.h>
31#include <nvgpu/io.h>
32#include <nvgpu/utils.h>
33#include <nvgpu/channel.h>
34#include <nvgpu/unit.h>
35#include <nvgpu/power_features/power_features.h>
36
37#include "gk20a.h"
38#include "gr_gk20a.h"
39#include "dbg_gpu_gk20a.h"
40#include "regops_gk20a.h"
41
42#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
44
45static void gk20a_perfbuf_reset_streaming(struct gk20a *g)
46{
47 u32 engine_status;
48 u32 num_unread_bytes;
49
50 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
51
52 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
53 WARN_ON(0u ==
54 (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f()));
55
56 gk20a_writel(g, perf_pmasys_control_r(),
57 perf_pmasys_control_membuf_clear_status_doit_f());
58
59 num_unread_bytes = gk20a_readl(g, perf_pmasys_mem_bytes_r());
60 if (num_unread_bytes != 0u) {
61 gk20a_writel(g, perf_pmasys_mem_bump_r(), num_unread_bytes);
62 }
63}
64
65/*
66 * API to get first channel from the list of all channels
67 * bound to the debug session
68 */
69struct channel_gk20a *
70nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
71{
72 struct dbg_session_channel_data *ch_data;
73 struct channel_gk20a *ch;
74 struct gk20a *g = dbg_s->g;
75
76 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
77 if (nvgpu_list_empty(&dbg_s->ch_list)) {
78 nvgpu_mutex_release(&dbg_s->ch_list_lock);
79 return NULL;
80 }
81
82 ch_data = nvgpu_list_first_entry(&dbg_s->ch_list,
83 dbg_session_channel_data,
84 ch_entry);
85 ch = g->fifo.channel + ch_data->chid;
86
87 nvgpu_mutex_release(&dbg_s->ch_list_lock);
88
89 return ch;
90}
91
92void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
93{
94 struct dbg_session_data *session_data;
95 struct dbg_session_gk20a *dbg_s;
96 struct gk20a *g = ch->g;
97
98 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
99
100 /* guard against the session list being modified */
101 nvgpu_mutex_acquire(&ch->dbg_s_lock);
102
103 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
104 dbg_session_data, dbg_s_entry) {
105 dbg_s = session_data->dbg_s;
106 if (dbg_s->dbg_events.events_enabled) {
107 nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d",
108 dbg_s->id);
109 nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
110 dbg_s->dbg_events.num_pending_events);
111
112 dbg_s->dbg_events.num_pending_events++;
113
114 nvgpu_dbg_session_post_event(dbg_s);
115 }
116 }
117
118 nvgpu_mutex_release(&ch->dbg_s_lock);
119}
120
121bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
122{
123 struct dbg_session_data *session_data;
124 struct dbg_session_gk20a *dbg_s;
125 bool broadcast = false;
126 struct gk20a *g = ch->g;
127
128 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
129
130 /* guard against the session list being modified */
131 nvgpu_mutex_acquire(&ch->dbg_s_lock);
132
133 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
134 dbg_session_data, dbg_s_entry) {
135 dbg_s = session_data->dbg_s;
136 if (dbg_s->broadcast_stop_trigger) {
137 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
138 "stop trigger broadcast enabled");
139 broadcast = true;
140 break;
141 }
142 }
143
144 nvgpu_mutex_release(&ch->dbg_s_lock);
145
146 return broadcast;
147}
148
149int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
150{
151 struct dbg_session_data *session_data;
152 struct dbg_session_gk20a *dbg_s;
153 struct gk20a *g = ch->g;
154
155 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
156
157 /* guard against the session list being modified */
158 nvgpu_mutex_acquire(&ch->dbg_s_lock);
159
160 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
161 dbg_session_data, dbg_s_entry) {
162 dbg_s = session_data->dbg_s;
163 if (dbg_s->broadcast_stop_trigger) {
164 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
165 "stop trigger broadcast disabled");
166 dbg_s->broadcast_stop_trigger = false;
167 }
168 }
169
170 nvgpu_mutex_release(&ch->dbg_s_lock);
171
172 return 0;
173}
174
175u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
176 bool mode)
177{
178 u32 err = 0U;
179 struct gk20a *g = dbg_s->g;
180
181 if (dbg_s->is_pg_disabled != mode) {
182 if (mode == false) {
183 g->dbg_powergating_disabled_refcount--;
184 }
185
186 /*
187 * Allow powergate disable or enable only if
188 * the global pg disabled refcount is zero
189 */
190 if (g->dbg_powergating_disabled_refcount == 0) {
191 err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
192 mode);
193 }
194
195 if (mode) {
196 g->dbg_powergating_disabled_refcount++;
197 }
198
199 dbg_s->is_pg_disabled = mode;
200 }
201
202 return err;
203}
204
205int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate)
206{
207 int err = 0;
208 struct gk20a *g = dbg_s->g;
209
210 /* This function must be called with g->dbg_sessions_lock held */
211
212 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s",
213 g->name, disable_powergate ? "disable" : "enable");
214
215 /*
216 * Powergate mode here refers to railgate+powergate+clockgate
217 * so in case slcg/blcg/elcg are disabled and railgating is enabled,
218 * disable railgating and then set is_pg_disabled = true
219 * Similarly re-enable railgating and not other features if they are not
220 * enabled when powermode=MODE_ENABLE
221 */
222 if (disable_powergate) {
223 /* save off current powergate, clk state.
224 * set gpu module's can_powergate = 0.
225 * set gpu module's clk to max.
226 * while *a* debug session is active there will be no power or
227 * clocking state changes allowed from mainline code (but they
228 * should be saved).
229 */
230
231 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
232 "module busy");
233 err = gk20a_busy(g);
234 if (err) {
235 return err;
236 }
237
238 err = nvgpu_cg_pg_disable(g);
239
240 if (err == 0) {
241 dbg_s->is_pg_disabled = true;
242 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
243 "pg disabled");
244 }
245 } else {
246 /* restore (can) powergate, clk state */
247 /* release pending exceptions to fault/be handled as usual */
248 /*TBD: ordering of these? */
249
250 err = nvgpu_cg_pg_enable(g);
251
252 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
253 gk20a_idle(g);
254
255 if (err == 0) {
256 dbg_s->is_pg_disabled = false;
257 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
258 "pg enabled");
259 }
260 }
261
262 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done",
263 g->name, disable_powergate ? "disable" : "enable");
264 return err;
265}
266
267bool nvgpu_check_and_set_global_reservation(
268 struct dbg_session_gk20a *dbg_s,
269 struct dbg_profiler_object_data *prof_obj)
270{
271 struct gk20a *g = dbg_s->g;
272
273 if (g->profiler_reservation_count == 0) {
274 g->global_profiler_reservation_held = true;
275 g->profiler_reservation_count = 1;
276 dbg_s->has_profiler_reservation = true;
277 prof_obj->has_reservation = true;
278 return true;
279 }
280 return false;
281}
282
283bool nvgpu_check_and_set_context_reservation(
284 struct dbg_session_gk20a *dbg_s,
285 struct dbg_profiler_object_data *prof_obj)
286{
287 struct gk20a *g = dbg_s->g;
288
289 /* Assumes that we've already checked that no global reservation
290 * is in effect.
291 */
292 g->profiler_reservation_count++;
293 dbg_s->has_profiler_reservation = true;
294 prof_obj->has_reservation = true;
295 return true;
296}
297
298void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s,
299 struct dbg_profiler_object_data *prof_obj)
300{
301 struct gk20a *g = dbg_s->g;
302
303 g->profiler_reservation_count--;
304 if (g->profiler_reservation_count < 0) {
305 nvgpu_err(g, "Negative reservation count!");
306 }
307 dbg_s->has_profiler_reservation = false;
308 prof_obj->has_reservation = false;
309 if (prof_obj->ch == NULL) {
310 g->global_profiler_reservation_held = false;
311 }
312}
313
314int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
315{
316 struct mm_gk20a *mm = &g->mm;
317 u32 virt_addr_lo;
318 u32 virt_addr_hi;
319 u32 inst_pa_page;
320 int err;
321
322 err = gk20a_busy(g);
323 if (err) {
324 nvgpu_err(g, "failed to poweron");
325 return err;
326 }
327
328 err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
329 if (err) {
330 return err;
331 }
332
333 g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
334
335 gk20a_perfbuf_reset_streaming(g);
336
337 virt_addr_lo = u64_lo32(offset);
338 virt_addr_hi = u64_hi32(offset);
339
340 /* address and size are aligned to 32 bytes, the lowest bits read back
341 * as zeros */
342 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
343 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
344 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
345 gk20a_writel(g, perf_pmasys_outsize_r(), size);
346
347 /* this field is aligned to 4K */
348 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
349
350 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
351 * should be written last */
352 gk20a_writel(g, perf_pmasys_mem_block_r(),
353 perf_pmasys_mem_block_base_f(inst_pa_page) |
354 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
355 perf_pmasys_mem_block_target_sys_ncoh_f(),
356 perf_pmasys_mem_block_target_sys_coh_f(),
357 perf_pmasys_mem_block_target_lfb_f()) |
358 perf_pmasys_mem_block_valid_true_f());
359
360 gk20a_idle(g);
361 return 0;
362}
363
364/* must be called with dbg_sessions_lock held */
365int gk20a_perfbuf_disable_locked(struct gk20a *g)
366{
367 int err = gk20a_busy(g);
368 if (err) {
369 nvgpu_err(g, "failed to poweron");
370 return err;
371 }
372
373 gk20a_perfbuf_reset_streaming(g);
374
375 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
376 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
377 perf_pmasys_outbaseupper_ptr_f(0));
378 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
379
380 gk20a_writel(g, perf_pmasys_mem_block_r(),
381 perf_pmasys_mem_block_base_f(0) |
382 perf_pmasys_mem_block_valid_false_f() |
383 perf_pmasys_mem_block_target_f(0));
384
385 gk20a_idle(g);
386
387 return 0;
388}
diff --git a/include/gk20a/dbg_gpu_gk20a.h b/include/gk20a/dbg_gpu_gk20a.h
deleted file mode 100644
index fb5ae1f..0000000
--- a/include/gk20a/dbg_gpu_gk20a.h
+++ /dev/null
@@ -1,147 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger Driver
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef DBG_GPU_H
25#define DBG_GPU_H
26
27#include <nvgpu/cond.h>
28#include <nvgpu/lock.h>
29#include <nvgpu/list.h>
30
31struct gk20a;
32struct channel_gk20a;
33struct dbg_session_gk20a;
34
35/* used by the interrupt handler to post events */
36void gk20a_dbg_gpu_post_events(struct channel_gk20a *fault_ch);
37
38struct channel_gk20a *
39nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s);
40
41struct dbg_gpu_session_events {
42 struct nvgpu_cond wait_queue;
43 bool events_enabled;
44 int num_pending_events;
45};
46
47struct dbg_session_gk20a {
48 /* dbg session id used for trace/prints */
49 int id;
50
51 /* profiler session, if any */
52 bool is_profiler;
53
54 /* has a valid profiler reservation */
55 bool has_profiler_reservation;
56
57 /* power enabled or disabled */
58 bool is_pg_disabled;
59
60 /* timeouts enabled or disabled */
61 bool is_timeout_disabled;
62
63 struct gk20a *g;
64
65 /* list of bound channels, if any */
66 struct nvgpu_list_node ch_list;
67 struct nvgpu_mutex ch_list_lock;
68
69 /* event support */
70 struct dbg_gpu_session_events dbg_events;
71
72 bool broadcast_stop_trigger;
73
74 struct nvgpu_mutex ioctl_lock;
75};
76
77struct dbg_session_data {
78 struct dbg_session_gk20a *dbg_s;
79 struct nvgpu_list_node dbg_s_entry;
80};
81
82static inline struct dbg_session_data *
83dbg_session_data_from_dbg_s_entry(struct nvgpu_list_node *node)
84{
85 return (struct dbg_session_data *)
86 ((uintptr_t)node - offsetof(struct dbg_session_data, dbg_s_entry));
87};
88
89struct dbg_session_channel_data {
90 int channel_fd;
91 u32 chid;
92 struct nvgpu_list_node ch_entry;
93 struct dbg_session_data *session_data;
94 int (*unbind_single_channel)(struct dbg_session_gk20a *dbg_s,
95 struct dbg_session_channel_data *ch_data);
96};
97
98static inline struct dbg_session_channel_data *
99dbg_session_channel_data_from_ch_entry(struct nvgpu_list_node *node)
100{
101 return (struct dbg_session_channel_data *)
102 ((uintptr_t)node - offsetof(struct dbg_session_channel_data, ch_entry));
103};
104
105struct dbg_profiler_object_data {
106 int session_id;
107 u32 prof_handle;
108 struct channel_gk20a *ch;
109 bool has_reservation;
110 struct nvgpu_list_node prof_obj_entry;
111};
112
113static inline struct dbg_profiler_object_data *
114dbg_profiler_object_data_from_prof_obj_entry(struct nvgpu_list_node *node)
115{
116 return (struct dbg_profiler_object_data *)
117 ((uintptr_t)node - offsetof(struct dbg_profiler_object_data, prof_obj_entry));
118};
119
120bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch);
121int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch);
122
123int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate);
124bool nvgpu_check_and_set_global_reservation(
125 struct dbg_session_gk20a *dbg_s,
126 struct dbg_profiler_object_data *prof_obj);
127bool nvgpu_check_and_set_context_reservation(
128 struct dbg_session_gk20a *dbg_s,
129 struct dbg_profiler_object_data *prof_obj);
130void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s,
131 struct dbg_profiler_object_data *prof_obj);
132int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size);
133int gk20a_perfbuf_disable_locked(struct gk20a *g);
134
135void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s);
136u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
137 bool mode);
138
139 /* PM Context Switch Mode */
140/*This mode says that the pms are not to be context switched. */
141#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
142/* This mode says that the pms in Mode-B are to be context switched */
143#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW (0x00000001)
144/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
145#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002)
146
147#endif /* DBG_GPU_GK20A_H */
diff --git a/include/gk20a/fecs_trace_gk20a.c b/include/gk20a/fecs_trace_gk20a.c
deleted file mode 100644
index 5c1c5e0..0000000
--- a/include/gk20a/fecs_trace_gk20a.c
+++ /dev/null
@@ -1,744 +0,0 @@
1/*
2 * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/kmem.h>
24#include <nvgpu/dma.h>
25#include <nvgpu/enabled.h>
26#include <nvgpu/bug.h>
27#include <nvgpu/hashtable.h>
28#include <nvgpu/circ_buf.h>
29#include <nvgpu/thread.h>
30#include <nvgpu/barrier.h>
31#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/ctxsw_trace.h>
34#include <nvgpu/io.h>
35#include <nvgpu/utils.h>
36#include <nvgpu/timers.h>
37#include <nvgpu/channel.h>
38
39#include "fecs_trace_gk20a.h"
40#include "gk20a.h"
41#include "gr_gk20a.h"
42
43#include <nvgpu/log.h>
44#include <nvgpu/fecs_trace.h>
45
46#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
48
49struct gk20a_fecs_trace_hash_ent {
50 u32 context_ptr;
51 pid_t pid;
52 struct hlist_node node;
53};
54
55struct gk20a_fecs_trace {
56
57 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
58 struct nvgpu_mutex hash_lock;
59 struct nvgpu_mutex poll_lock;
60 struct nvgpu_thread poll_task;
61 bool init;
62 struct nvgpu_mutex enable_lock;
63 u32 enable_count;
64};
65
66#ifdef CONFIG_GK20A_CTXSW_TRACE
67u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void)
68{
69 return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
70}
71
72u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
73{
74 return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
75}
76
77u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
78{
79 return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
80}
81
82static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
83{
84 return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
85}
86
87int gk20a_fecs_trace_num_ts(void)
88{
89 return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
90 - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
91}
92
93struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
94 struct gk20a *g, int idx)
95{
96 struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
97
98 return (struct gk20a_fecs_trace_record *)
99 ((u8 *) mem->cpu_va
100 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
101}
102
103bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
104{
105 /*
106 * testing magic_hi should suffice. magic_lo is sometimes used
107 * as a sequence number in experimental ucode.
108 */
109 return (r->magic_hi
110 == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
111}
112
113int gk20a_fecs_trace_get_read_index(struct gk20a *g)
114{
115 return gr_gk20a_elpg_protected_call(g,
116 gk20a_readl(g, gr_fecs_mailbox1_r()));
117}
118
119int gk20a_fecs_trace_get_write_index(struct gk20a *g)
120{
121 return gr_gk20a_elpg_protected_call(g,
122 gk20a_readl(g, gr_fecs_mailbox0_r()));
123}
124
125static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
126{
127 nvgpu_log(g, gpu_dbg_ctxsw, "set read=%d", index);
128 return gr_gk20a_elpg_protected_call(g,
129 (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
130}
131
132void gk20a_fecs_trace_hash_dump(struct gk20a *g)
133{
134 u32 bkt;
135 struct gk20a_fecs_trace_hash_ent *ent;
136 struct gk20a_fecs_trace *trace = g->fecs_trace;
137
138 nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
139
140 nvgpu_mutex_acquire(&trace->hash_lock);
141 hash_for_each(trace->pid_hash_table, bkt, ent, node)
142 {
143 nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
144 ent, bkt, ent->context_ptr, ent->pid);
145
146 }
147 nvgpu_mutex_release(&trace->hash_lock);
148}
149
150static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
151{
152 struct gk20a_fecs_trace_hash_ent *he;
153 struct gk20a_fecs_trace *trace = g->fecs_trace;
154
155 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
156 "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
157
158 he = nvgpu_kzalloc(g, sizeof(*he));
159 if (unlikely(!he)) {
160 nvgpu_warn(g,
161 "can't alloc new hash entry for context_ptr=%x pid=%d",
162 context_ptr, pid);
163 return -ENOMEM;
164 }
165
166 he->context_ptr = context_ptr;
167 he->pid = pid;
168 nvgpu_mutex_acquire(&trace->hash_lock);
169 hash_add(trace->pid_hash_table, &he->node, context_ptr);
170 nvgpu_mutex_release(&trace->hash_lock);
171 return 0;
172}
173
174static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
175{
176 struct hlist_node *tmp;
177 struct gk20a_fecs_trace_hash_ent *ent;
178 struct gk20a_fecs_trace *trace = g->fecs_trace;
179
180 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
181 "freeing hash entry context_ptr=%x", context_ptr);
182
183 nvgpu_mutex_acquire(&trace->hash_lock);
184 hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
185 context_ptr) {
186 if (ent->context_ptr == context_ptr) {
187 hash_del(&ent->node);
188 nvgpu_log(g, gpu_dbg_ctxsw,
189 "freed hash entry=%p context_ptr=%x", ent,
190 ent->context_ptr);
191 nvgpu_kfree(g, ent);
192 break;
193 }
194 }
195 nvgpu_mutex_release(&trace->hash_lock);
196}
197
198static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
199{
200 u32 bkt;
201 struct hlist_node *tmp;
202 struct gk20a_fecs_trace_hash_ent *ent;
203 struct gk20a_fecs_trace *trace = g->fecs_trace;
204
205 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
206
207 nvgpu_mutex_acquire(&trace->hash_lock);
208 hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
209 hash_del(&ent->node);
210 nvgpu_kfree(g, ent);
211 }
212 nvgpu_mutex_release(&trace->hash_lock);
213
214}
215
216static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
217{
218 struct gk20a_fecs_trace_hash_ent *ent;
219 struct gk20a_fecs_trace *trace = g->fecs_trace;
220 pid_t pid = 0;
221
222 nvgpu_mutex_acquire(&trace->hash_lock);
223 hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
224 if (ent->context_ptr == context_ptr) {
225 nvgpu_log(g, gpu_dbg_ctxsw,
226 "found context_ptr=%x -> pid=%d",
227 ent->context_ptr, ent->pid);
228 pid = ent->pid;
229 break;
230 }
231 }
232 nvgpu_mutex_release(&trace->hash_lock);
233
234 return pid;
235}
236
237/*
238 * Converts HW entry format to userspace-facing format and pushes it to the
239 * queue.
240 */
241static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
242{
243 int i;
244 struct nvgpu_gpu_ctxsw_trace_entry entry = { };
245 struct gk20a_fecs_trace *trace = g->fecs_trace;
246 pid_t cur_pid;
247 pid_t new_pid;
248 int count = 0;
249
250 /* for now, only one VM */
251 const int vmid = 0;
252
253 struct gk20a_fecs_trace_record *r =
254 gk20a_fecs_trace_get_record(g, index);
255
256 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
257 "consuming record trace=%p read=%d record=%p", trace, index, r);
258
259 if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
260 nvgpu_warn(g,
261 "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
262 trace, index, r, r->magic_lo, r->magic_hi);
263 return -EINVAL;
264 }
265
266 /* Clear magic_hi to detect cases where CPU could read write index
267 * before FECS record is actually written to DRAM. This should not
268 * as we force FECS writes to SYSMEM by reading through PRAMIN.
269 */
270 r->magic_hi = 0;
271
272 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
273 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
274
275 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
276 "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
277 r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
278
279 entry.context_id = r->context_id;
280 entry.vmid = vmid;
281
282 /* break out FECS record into trace events */
283 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
284
285 entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
286 entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
287 entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
288
289 nvgpu_log(g, gpu_dbg_ctxsw,
290 "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
291 entry.tag, entry.timestamp, r->context_id,
292 r->new_context_id);
293
294 switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
295 case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
296 case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
297 entry.context_id = r->new_context_id;
298 entry.pid = new_pid;
299 break;
300
301 case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
302 case NVGPU_GPU_CTXSW_TAG_FE_ACK:
303 case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
304 case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
305 case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
306 case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
307 case NVGPU_GPU_CTXSW_TAG_SAVE_END:
308 entry.context_id = r->context_id;
309 entry.pid = cur_pid;
310 break;
311
312 default:
313 /* tags are not guaranteed to start at the beginning */
314 WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP));
315 continue;
316 }
317
318 nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
319 entry.tag, entry.context_id, entry.pid);
320
321 if (!entry.context_id)
322 continue;
323
324 gk20a_ctxsw_trace_write(g, &entry);
325 count++;
326 }
327
328 gk20a_ctxsw_trace_wake_up(g, vmid);
329 return count;
330}
331
332int gk20a_fecs_trace_poll(struct gk20a *g)
333{
334 struct gk20a_fecs_trace *trace = g->fecs_trace;
335
336 int read = 0;
337 int write = 0;
338 int cnt;
339 int err;
340
341 err = gk20a_busy(g);
342 if (unlikely(err))
343 return err;
344
345 nvgpu_mutex_acquire(&trace->poll_lock);
346 write = gk20a_fecs_trace_get_write_index(g);
347 if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
348 nvgpu_err(g,
349 "failed to acquire write index, write=%d", write);
350 err = write;
351 goto done;
352 }
353
354 read = gk20a_fecs_trace_get_read_index(g);
355
356 cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
357 if (!cnt)
358 goto done;
359
360 nvgpu_log(g, gpu_dbg_ctxsw,
361 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
362 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
363
364 /* Ensure all FECS writes have made it to SYSMEM */
365 g->ops.mm.fb_flush(g);
366
367 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
368 /* Bits 30:0 of MAILBOX1 represents actual read pointer value */
369 read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
370 }
371
372 while (read != write) {
373 cnt = gk20a_fecs_trace_ring_read(g, read);
374 if (cnt > 0) {
375 nvgpu_log(g, gpu_dbg_ctxsw,
376 "number of trace entries added: %d", cnt);
377 }
378
379 /* Get to next record. */
380 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
381 }
382
383 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
384 /*
385 * In the next step, read pointer is going to be updated.
386 * So, MSB of read pointer should be set back to 1. This will
387 * keep FECS trace enabled.
388 */
389 read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
390 }
391
392 /* ensure FECS records has been updated before incrementing read index */
393 nvgpu_wmb();
394 gk20a_fecs_trace_set_read_index(g, read);
395
396done:
397 nvgpu_mutex_release(&trace->poll_lock);
398 gk20a_idle(g);
399 return err;
400}
401
402static int gk20a_fecs_trace_periodic_polling(void *arg)
403{
404 struct gk20a *g = (struct gk20a *)arg;
405 struct gk20a_fecs_trace *trace = g->fecs_trace;
406
407 pr_info("%s: running\n", __func__);
408
409 while (!nvgpu_thread_should_stop(&trace->poll_task)) {
410
411 nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
412 GK20A_FECS_TRACE_FRAME_PERIOD_US * 2);
413
414 gk20a_fecs_trace_poll(g);
415 }
416
417 return 0;
418}
419
420size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
421{
422 return GK20A_FECS_TRACE_NUM_RECORDS
423 * ctxsw_prog_record_timestamp_record_size_in_bytes_v();
424}
425
426int gk20a_fecs_trace_init(struct gk20a *g)
427{
428 struct gk20a_fecs_trace *trace;
429 int err;
430
431 trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace));
432 if (!trace) {
433 nvgpu_warn(g, "failed to allocate fecs_trace");
434 return -ENOMEM;
435 }
436 g->fecs_trace = trace;
437
438 err = nvgpu_mutex_init(&trace->poll_lock);
439 if (err)
440 goto clean;
441 err = nvgpu_mutex_init(&trace->hash_lock);
442 if (err)
443 goto clean_poll_lock;
444
445 err = nvgpu_mutex_init(&trace->enable_lock);
446 if (err)
447 goto clean_hash_lock;
448
449 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
450 hash_init(trace->pid_hash_table);
451
452 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
453
454 trace->enable_count = 0;
455 trace->init = true;
456
457 return 0;
458
459clean_hash_lock:
460 nvgpu_mutex_destroy(&trace->hash_lock);
461
462clean_poll_lock:
463 nvgpu_mutex_destroy(&trace->poll_lock);
464clean:
465 nvgpu_kfree(g, trace);
466 g->fecs_trace = NULL;
467 return err;
468}
469
470int gk20a_fecs_trace_bind_channel(struct gk20a *g,
471 struct channel_gk20a *ch)
472{
473 /*
474 * map our circ_buf to the context space and store the GPU VA
475 * in the context header.
476 */
477
478 u32 lo;
479 u32 hi;
480 u64 addr;
481 struct tsg_gk20a *tsg;
482 struct nvgpu_gr_ctx *ch_ctx;
483 struct gk20a_fecs_trace *trace = g->fecs_trace;
484 struct nvgpu_mem *mem;
485 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
486 u32 aperture_mask;
487
488 tsg = tsg_gk20a_from_ch(ch);
489 if (tsg == NULL) {
490 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
491 return -EINVAL;
492 }
493
494 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
495 "chid=%d context_ptr=%x inst_block=%llx",
496 ch->chid, context_ptr,
497 nvgpu_inst_block_addr(g, &ch->inst_block));
498
499 tsg = tsg_gk20a_from_ch(ch);
500 if (!tsg)
501 return -EINVAL;
502
503 ch_ctx = &tsg->gr_ctx;
504 mem = &ch_ctx->mem;
505
506 if (!trace)
507 return -ENOMEM;
508
509 mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
510
511 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
512 addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
513 nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
514 aperture_mask = 0;
515 } else {
516 addr = nvgpu_inst_block_addr(g, mem);
517 nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
518 aperture_mask = nvgpu_aperture_mask(g, mem,
519 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
520 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
521 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
522 }
523 if (!addr)
524 return -ENOMEM;
525
526 lo = u64_lo32(addr);
527 hi = u64_hi32(addr);
528
529 mem = &ch_ctx->mem;
530
531 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
532 lo, GK20A_FECS_TRACE_NUM_RECORDS);
533
534 nvgpu_mem_wr(g, mem,
535 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
536 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
537 GK20A_FECS_TRACE_NUM_RECORDS));
538
539 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
540 mem = &ch->ctx_header;
541
542 nvgpu_mem_wr(g, mem,
543 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
544 lo);
545 nvgpu_mem_wr(g, mem,
546 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
547 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
548 aperture_mask);
549
550 /* pid (process identifier) in user space, corresponds to tgid (thread
551 * group id) in kernel space.
552 */
553 gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);
554
555 return 0;
556}
557
558int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
559{
560 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
561
562 if (g->fecs_trace) {
563 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
564 "ch=%p context_ptr=%x", ch, context_ptr);
565
566 if (g->ops.fecs_trace.is_enabled(g)) {
567 if (g->ops.fecs_trace.flush)
568 g->ops.fecs_trace.flush(g);
569 gk20a_fecs_trace_poll(g);
570 }
571 gk20a_fecs_trace_hash_del(g, context_ptr);
572 }
573 return 0;
574}
575
576int gk20a_fecs_trace_reset(struct gk20a *g)
577{
578 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
579
580 if (!g->ops.fecs_trace.is_enabled(g))
581 return 0;
582
583 gk20a_fecs_trace_poll(g);
584 return gk20a_fecs_trace_set_read_index(g, 0);
585}
586
587int gk20a_fecs_trace_deinit(struct gk20a *g)
588{
589 struct gk20a_fecs_trace *trace = g->fecs_trace;
590
591 if (!trace->init)
592 return 0;
593
594 /*
595 * Check if tracer was enabled before attempting to stop the
596 * tracer thread.
597 */
598 if (trace->enable_count > 0) {
599 nvgpu_thread_stop(&trace->poll_task);
600 }
601 gk20a_fecs_trace_free_hash_table(g);
602
603 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
604 nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
605 nvgpu_mutex_destroy(&g->fecs_trace->enable_lock);
606
607 nvgpu_kfree(g, g->fecs_trace);
608 g->fecs_trace = NULL;
609 return 0;
610}
611
612int gk20a_gr_max_entries(struct gk20a *g,
613 struct nvgpu_gpu_ctxsw_trace_filter *filter)
614{
615 int n;
616 int tag;
617
618 /* Compute number of entries per record, with given filter */
619 for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
620 n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
621
622 /* Return max number of entries generated for the whole ring */
623 return n * GK20A_FECS_TRACE_NUM_RECORDS;
624}
625
626int gk20a_fecs_trace_enable(struct gk20a *g)
627{
628 struct gk20a_fecs_trace *trace = g->fecs_trace;
629 int write;
630 int err = 0;
631
632 if (!trace)
633 return -EINVAL;
634
635 nvgpu_mutex_acquire(&trace->enable_lock);
636 trace->enable_count++;
637
638 if (trace->enable_count == 1U) {
639 /* drop data in hw buffer */
640 if (g->ops.fecs_trace.flush)
641 g->ops.fecs_trace.flush(g);
642
643 write = gk20a_fecs_trace_get_write_index(g);
644
645 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
646 /*
647 * For enabling FECS trace support, MAILBOX1's MSB
648 * (Bit 31:31) should be set to 1. Bits 30:0 represents
649 * actual pointer value.
650 */
651 write = write |
652 (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
653 }
654 gk20a_fecs_trace_set_read_index(g, write);
655
656 /*
657 * FECS ucode does a priv holdoff around the assertion of
658 * context reset. So, pri transactions (e.g. mailbox1 register
659 * write) might fail due to this. Hence, do write with ack
660 * i.e. write and read it back to make sure write happened for
661 * mailbox1.
662 */
663 while (gk20a_fecs_trace_get_read_index(g) != write) {
664 nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
665 gk20a_fecs_trace_set_read_index(g, write);
666 }
667
668 err = nvgpu_thread_create(&trace->poll_task, g,
669 gk20a_fecs_trace_periodic_polling, __func__);
670 if (err) {
671 nvgpu_warn(g,
672 "failed to create FECS polling task");
673 goto done;
674 }
675 }
676
677done:
678 nvgpu_mutex_release(&trace->enable_lock);
679 return err;
680}
681
682int gk20a_fecs_trace_disable(struct gk20a *g)
683{
684 struct gk20a_fecs_trace *trace = g->fecs_trace;
685 int read = 0;
686
687 if (trace == NULL) {
688 return -EINVAL;
689 }
690
691 nvgpu_mutex_acquire(&trace->enable_lock);
692 if (trace->enable_count <= 0U) {
693 nvgpu_mutex_release(&trace->enable_lock);
694 return 0;
695 }
696 trace->enable_count--;
697 if (trace->enable_count == 0U) {
698 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
699 /*
700 * For disabling FECS trace support, MAILBOX1's MSB
701 * (Bit 31:31) should be set to 0.
702 */
703 read = gk20a_fecs_trace_get_read_index(g) &
704 (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
705
706 gk20a_fecs_trace_set_read_index(g, read);
707
708 /*
709 * FECS ucode does a priv holdoff around the assertion
710 * of context reset. So, pri transactions (e.g.
711 * mailbox1 register write) might fail due to this.
712 * Hence, do write with ack i.e. write and read it back
713 * to make sure write happened for mailbox1.
714 */
715 while (gk20a_fecs_trace_get_read_index(g) != read) {
716 nvgpu_log(g, gpu_dbg_ctxsw,
717 "mailbox1 update failed");
718 gk20a_fecs_trace_set_read_index(g, read);
719 }
720 }
721
722 nvgpu_thread_stop(&trace->poll_task);
723
724 }
725 nvgpu_mutex_release(&trace->enable_lock);
726
727 return -EPERM;
728}
729
730bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
731{
732 struct gk20a_fecs_trace *trace = g->fecs_trace;
733
734 return (trace && nvgpu_thread_is_running(&trace->poll_task));
735}
736
737void gk20a_fecs_trace_reset_buffer(struct gk20a *g)
738{
739 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
740
741 gk20a_fecs_trace_set_read_index(g,
742 gk20a_fecs_trace_get_write_index(g));
743}
744#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/include/gk20a/fecs_trace_gk20a.h b/include/gk20a/fecs_trace_gk20a.h
deleted file mode 100644
index d33e619..0000000
--- a/include/gk20a/fecs_trace_gk20a.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef NVGPU_GK20A_FECS_TRACE_GK20A_H
24#define NVGPU_GK20A_FECS_TRACE_GK20A_H
25
26struct gk20a;
27struct channel_gk20a;
28struct nvgpu_gpu_ctxsw_trace_filter;
29
30int gk20a_fecs_trace_poll(struct gk20a *g);
31int gk20a_fecs_trace_init(struct gk20a *g);
32int gk20a_fecs_trace_bind_channel(struct gk20a *g,
33 struct channel_gk20a *ch);
34int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch);
35int gk20a_fecs_trace_reset(struct gk20a *g);
36int gk20a_fecs_trace_deinit(struct gk20a *g);
37int gk20a_gr_max_entries(struct gk20a *g,
38 struct nvgpu_gpu_ctxsw_trace_filter *filter);
39int gk20a_fecs_trace_enable(struct gk20a *g);
40int gk20a_fecs_trace_disable(struct gk20a *g);
41bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
42size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
43void gk20a_fecs_trace_reset_buffer(struct gk20a *g);
44
45#endif /* NVGPU_GK20A_FECS_TRACE_GK20A_H */
diff --git a/include/gk20a/fence_gk20a.c b/include/gk20a/fence_gk20a.c
deleted file mode 100644
index af42130..0000000
--- a/include/gk20a/fence_gk20a.c
+++ /dev/null
@@ -1,319 +0,0 @@
1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include "fence_gk20a.h"
24
25#include <nvgpu/semaphore.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/soc.h>
28#include <nvgpu/nvhost.h>
29#include <nvgpu/barrier.h>
30#include <nvgpu/os_fence.h>
31#include <nvgpu/channel.h>
32
33#include "gk20a.h"
34
35struct gk20a_fence_ops {
36 int (*wait)(struct gk20a_fence *, long timeout);
37 bool (*is_expired)(struct gk20a_fence *);
38 void *(*free)(struct nvgpu_ref *);
39};
40
41static void gk20a_fence_free(struct nvgpu_ref *ref)
42{
43 struct gk20a_fence *f =
44 container_of(ref, struct gk20a_fence, ref);
45 struct gk20a *g = f->g;
46
47 if (nvgpu_os_fence_is_initialized(&f->os_fence)) {
48 f->os_fence.ops->drop_ref(&f->os_fence);
49 }
50
51 if (f->semaphore) {
52 nvgpu_semaphore_put(f->semaphore);
53 }
54
55 if (f->allocator) {
56 if (nvgpu_alloc_initialized(f->allocator)) {
57 nvgpu_free(f->allocator, (u64)(uintptr_t)f);
58 }
59 } else {
60 nvgpu_kfree(g, f);
61 }
62}
63
64void gk20a_fence_put(struct gk20a_fence *f)
65{
66 if (f) {
67 nvgpu_ref_put(&f->ref, gk20a_fence_free);
68 }
69}
70
71struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
72{
73 if (f) {
74 nvgpu_ref_get(&f->ref);
75 }
76 return f;
77}
78
79inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
80{
81 bool valid = f->valid;
82
83 nvgpu_smp_rmb();
84 return valid;
85}
86
87int gk20a_fence_install_fd(struct gk20a_fence *f, int fd)
88{
89 if (!f || !gk20a_fence_is_valid(f) ||
90 !nvgpu_os_fence_is_initialized(&f->os_fence)) {
91 return -EINVAL;
92 }
93
94 f->os_fence.ops->install_fence(&f->os_fence, fd);
95
96 return 0;
97}
98
99int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
100 unsigned long timeout)
101{
102 if (f && gk20a_fence_is_valid(f)) {
103 if (!nvgpu_platform_is_silicon(g)) {
104 timeout = MAX_SCHEDULE_TIMEOUT;
105 }
106 return f->ops->wait(f, timeout);
107 }
108 return 0;
109}
110
111bool gk20a_fence_is_expired(struct gk20a_fence *f)
112{
113 if (f && gk20a_fence_is_valid(f) && f->ops) {
114 return f->ops->is_expired(f);
115 } else {
116 return true;
117 }
118}
119
120int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
121{
122 int err;
123 size_t size;
124 struct gk20a_fence *fence_pool = NULL;
125
126 size = sizeof(struct gk20a_fence);
127 if (count <= UINT_MAX / size) {
128 size = count * size;
129 fence_pool = nvgpu_vzalloc(c->g, size);
130 }
131
132 if (!fence_pool) {
133 return -ENOMEM;
134 }
135
136 err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator,
137 "fence_pool", (size_t)fence_pool, size,
138 sizeof(struct gk20a_fence), 0);
139 if (err) {
140 goto fail;
141 }
142
143 return 0;
144
145fail:
146 nvgpu_vfree(c->g, fence_pool);
147 return err;
148}
149
150void gk20a_free_fence_pool(struct channel_gk20a *c)
151{
152 if (nvgpu_alloc_initialized(&c->fence_allocator)) {
153 struct gk20a_fence *fence_pool;
154 fence_pool = (struct gk20a_fence *)(uintptr_t)
155 nvgpu_alloc_base(&c->fence_allocator);
156 nvgpu_alloc_destroy(&c->fence_allocator);
157 nvgpu_vfree(c->g, fence_pool);
158 }
159}
160
161struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
162{
163 struct gk20a_fence *fence = NULL;
164
165 if (channel_gk20a_is_prealloc_enabled(c)) {
166 if (nvgpu_alloc_initialized(&c->fence_allocator)) {
167 fence = (struct gk20a_fence *)(uintptr_t)
168 nvgpu_alloc(&c->fence_allocator,
169 sizeof(struct gk20a_fence));
170
171 /* clear the node and reset the allocator pointer */
172 if (fence) {
173 memset(fence, 0, sizeof(*fence));
174 fence->allocator = &c->fence_allocator;
175 }
176 }
177 } else {
178 fence = nvgpu_kzalloc(c->g, sizeof(struct gk20a_fence));
179 }
180
181 if (fence) {
182 nvgpu_ref_init(&fence->ref);
183 fence->g = c->g;
184 }
185
186 return fence;
187}
188
189void gk20a_init_fence(struct gk20a_fence *f,
190 const struct gk20a_fence_ops *ops,
191 struct nvgpu_os_fence os_fence)
192{
193 if (!f) {
194 return;
195 }
196 f->ops = ops;
197 f->syncpt_id = -1;
198 f->semaphore = NULL;
199 f->os_fence = os_fence;
200}
201
202/* Fences that are backed by GPU semaphores: */
203
204static int nvgpu_semaphore_fence_wait(struct gk20a_fence *f, long timeout)
205{
206 if (!nvgpu_semaphore_is_acquired(f->semaphore)) {
207 return 0;
208 }
209
210 return NVGPU_COND_WAIT_INTERRUPTIBLE(
211 f->semaphore_wq,
212 !nvgpu_semaphore_is_acquired(f->semaphore),
213 timeout);
214}
215
216static bool nvgpu_semaphore_fence_is_expired(struct gk20a_fence *f)
217{
218 return !nvgpu_semaphore_is_acquired(f->semaphore);
219}
220
221static const struct gk20a_fence_ops nvgpu_semaphore_fence_ops = {
222 .wait = &nvgpu_semaphore_fence_wait,
223 .is_expired = &nvgpu_semaphore_fence_is_expired,
224};
225
226/* This function takes ownership of the semaphore as well as the os_fence */
227int gk20a_fence_from_semaphore(
228 struct gk20a_fence *fence_out,
229 struct nvgpu_semaphore *semaphore,
230 struct nvgpu_cond *semaphore_wq,
231 struct nvgpu_os_fence os_fence)
232{
233 struct gk20a_fence *f = fence_out;
234
235 gk20a_init_fence(f, &nvgpu_semaphore_fence_ops, os_fence);
236 if (!f) {
237 return -EINVAL;
238 }
239
240
241 f->semaphore = semaphore;
242 f->semaphore_wq = semaphore_wq;
243
244 /* commit previous writes before setting the valid flag */
245 nvgpu_smp_wmb();
246 f->valid = true;
247
248 return 0;
249}
250
251#ifdef CONFIG_TEGRA_GK20A_NVHOST
252/* Fences that are backed by host1x syncpoints: */
253
254static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
255{
256 return nvgpu_nvhost_syncpt_wait_timeout_ext(
257 f->nvhost_dev, f->syncpt_id, f->syncpt_value,
258 (u32)timeout, NULL, NULL);
259}
260
261static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
262{
263
264 /*
265 * In cases we don't register a notifier, we can't expect the
266 * syncpt value to be updated. For this case, we force a read
267 * of the value from HW, and then check for expiration.
268 */
269 if (!nvgpu_nvhost_syncpt_is_expired_ext(f->nvhost_dev, f->syncpt_id,
270 f->syncpt_value)) {
271 u32 val;
272
273 if (!nvgpu_nvhost_syncpt_read_ext_check(f->nvhost_dev,
274 f->syncpt_id, &val)) {
275 return nvgpu_nvhost_syncpt_is_expired_ext(
276 f->nvhost_dev,
277 f->syncpt_id, f->syncpt_value);
278 }
279 }
280
281 return true;
282}
283
284static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
285 .wait = &gk20a_syncpt_fence_wait,
286 .is_expired = &gk20a_syncpt_fence_is_expired,
287};
288
289/* This function takes the ownership of the os_fence */
290int gk20a_fence_from_syncpt(
291 struct gk20a_fence *fence_out,
292 struct nvgpu_nvhost_dev *nvhost_dev,
293 u32 id, u32 value, struct nvgpu_os_fence os_fence)
294{
295 struct gk20a_fence *f = fence_out;
296
297 gk20a_init_fence(f, &gk20a_syncpt_fence_ops, os_fence);
298 if (!f)
299 return -EINVAL;
300
301 f->nvhost_dev = nvhost_dev;
302 f->syncpt_id = id;
303 f->syncpt_value = value;
304
305 /* commit previous writes before setting the valid flag */
306 nvgpu_smp_wmb();
307 f->valid = true;
308
309 return 0;
310}
311#else
312int gk20a_fence_from_syncpt(
313 struct gk20a_fence *fence_out,
314 struct nvgpu_nvhost_dev *nvhost_dev,
315 u32 id, u32 value, struct nvgpu_os_fence os_fence)
316{
317 return -EINVAL;
318}
319#endif
diff --git a/include/gk20a/fence_gk20a.h b/include/gk20a/fence_gk20a.h
deleted file mode 100644
index 0311279..0000000
--- a/include/gk20a/fence_gk20a.h
+++ /dev/null
@@ -1,100 +0,0 @@
1/*
2 * drivers/video/tegra/host/gk20a/fence_gk20a.h
3 *
4 * GK20A Fences
5 *
6 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26#ifndef NVGPU_GK20A_FENCE_GK20A_H
27#define NVGPU_GK20A_FENCE_GK20A_H
28
29#include <nvgpu/types.h>
30#include <nvgpu/kref.h>
31#include <nvgpu/os_fence.h>
32
33struct platform_device;
34struct nvgpu_semaphore;
35struct channel_gk20a;
36struct gk20a;
37struct nvgpu_os_fence;
38
39struct gk20a_fence_ops;
40
41struct gk20a_fence {
42 struct gk20a *g;
43
44 /* Valid for all fence types: */
45 bool valid;
46 struct nvgpu_ref ref;
47 const struct gk20a_fence_ops *ops;
48
49 struct nvgpu_os_fence os_fence;
50
51 /* Valid for fences created from semaphores: */
52 struct nvgpu_semaphore *semaphore;
53 struct nvgpu_cond *semaphore_wq;
54
55 /* Valid for fences created from syncpoints: */
56 struct nvgpu_nvhost_dev *nvhost_dev;
57 u32 syncpt_id;
58 u32 syncpt_value;
59
60 /* Valid for fences part of a pre-allocated fence pool */
61 struct nvgpu_allocator *allocator;
62};
63
64/* Fences can be created from semaphores or syncpoint (id, value) pairs */
65int gk20a_fence_from_semaphore(
66 struct gk20a_fence *fence_out,
67 struct nvgpu_semaphore *semaphore,
68 struct nvgpu_cond *semaphore_wq,
69 struct nvgpu_os_fence os_fence);
70
71int gk20a_fence_from_syncpt(
72 struct gk20a_fence *fence_out,
73 struct nvgpu_nvhost_dev *nvhost_dev,
74 u32 id, u32 value,
75 struct nvgpu_os_fence os_fence);
76
77int gk20a_alloc_fence_pool(
78 struct channel_gk20a *c,
79 unsigned int count);
80
81void gk20a_free_fence_pool(
82 struct channel_gk20a *c);
83
84struct gk20a_fence *gk20a_alloc_fence(
85 struct channel_gk20a *c);
86
87void gk20a_init_fence(struct gk20a_fence *f,
88 const struct gk20a_fence_ops *ops,
89 struct nvgpu_os_fence os_fence);
90
91/* Fence operations */
92void gk20a_fence_put(struct gk20a_fence *f);
93struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f);
94int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
95 unsigned long timeout);
96bool gk20a_fence_is_expired(struct gk20a_fence *f);
97bool gk20a_fence_is_valid(struct gk20a_fence *f);
98int gk20a_fence_install_fd(struct gk20a_fence *f, int fd);
99
100#endif /* NVGPU_GK20A_FENCE_GK20A_H */
diff --git a/include/gk20a/fifo_gk20a.c b/include/gk20a/fifo_gk20a.c
deleted file mode 100644
index 77babc7..0000000
--- a/include/gk20a/fifo_gk20a.c
+++ /dev/null
@@ -1,4641 +0,0 @@
1/*
2 * GK20A Graphics FIFO (gr host)
3 *
4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26
27#include <nvgpu/mm.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/timers.h>
30#include <nvgpu/semaphore.h>
31#include <nvgpu/enabled.h>
32#include <nvgpu/kmem.h>
33#include <nvgpu/log.h>
34#include <nvgpu/soc.h>
35#include <nvgpu/atomic.h>
36#include <nvgpu/bug.h>
37#include <nvgpu/log2.h>
38#include <nvgpu/debug.h>
39#include <nvgpu/nvhost.h>
40#include <nvgpu/barrier.h>
41#include <nvgpu/ctxsw_trace.h>
42#include <nvgpu/error_notifier.h>
43#include <nvgpu/ptimer.h>
44#include <nvgpu/io.h>
45#include <nvgpu/utils.h>
46#include <nvgpu/channel.h>
47#include <nvgpu/unit.h>
48#include <nvgpu/power_features/power_features.h>
49#include <nvgpu/power_features/cg.h>
50
51#include "gk20a.h"
52#include "mm_gk20a.h"
53
54#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
55#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
56#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
57#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
58#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
59#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
60
61#define FECS_METHOD_WFI_RESTORE 0x80000
62#define FECS_MAILBOX_0_ACK_RESTORE 0x4
63
64
65static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
66
67static const char *const pbdma_intr_fault_type_desc[] = {
68 "MEMREQ timeout", "MEMACK_TIMEOUT", "MEMACK_EXTRA acks",
69 "MEMDAT_TIMEOUT", "MEMDAT_EXTRA acks", "MEMFLUSH noack",
70 "MEMOP noack", "LBCONNECT noack", "NONE - was LBREQ",
71 "LBACK_TIMEOUT", "LBACK_EXTRA acks", "LBDAT_TIMEOUT",
72 "LBDAT_EXTRA acks", "GPFIFO won't fit", "GPPTR invalid",
73 "GPENTRY invalid", "GPCRC mismatch", "PBPTR get>put",
74 "PBENTRY invld", "PBCRC mismatch", "NONE - was XBARC",
75 "METHOD invld", "METHODCRC mismat", "DEVICE sw method",
76 "[ENGINE]", "SEMAPHORE invlid", "ACQUIRE timeout",
77 "PRI forbidden", "ILLEGAL SYNCPT", "[NO_CTXSW_SEG]",
78 "PBSEG badsplit", "SIGNATURE bad"
79};
80
81u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
82 u32 engine_id[], u32 engine_id_sz,
83 u32 engine_enum)
84{
85 struct fifo_gk20a *f = NULL;
86 u32 instance_cnt = 0;
87 u32 engine_id_idx;
88 u32 active_engine_id = 0;
89 struct fifo_engine_info_gk20a *info = NULL;
90
91 if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) {
92 f = &g->fifo;
93 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
94 active_engine_id = f->active_engines_list[engine_id_idx];
95 info = &f->engine_info[active_engine_id];
96
97 if (info->engine_enum == engine_enum) {
98 if (instance_cnt < engine_id_sz) {
99 engine_id[instance_cnt] = active_engine_id;
100 ++instance_cnt;
101 } else {
102 nvgpu_log_info(g, "warning engine_id table sz is small %d",
103 engine_id_sz);
104 }
105 }
106 }
107 }
108 return instance_cnt;
109}
110
111struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id)
112{
113 struct fifo_gk20a *f = NULL;
114 u32 engine_id_idx;
115 struct fifo_engine_info_gk20a *info = NULL;
116
117 if (!g) {
118 return info;
119 }
120
121 f = &g->fifo;
122
123 if (engine_id < f->max_engines) {
124 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
125 if (engine_id == f->active_engines_list[engine_id_idx]) {
126 info = &f->engine_info[engine_id];
127 break;
128 }
129 }
130 }
131
132 if (!info) {
133 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
134 }
135
136 return info;
137}
138
139bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id)
140{
141 struct fifo_gk20a *f = NULL;
142 u32 engine_id_idx;
143 bool valid = false;
144
145 if (!g) {
146 return valid;
147 }
148
149 f = &g->fifo;
150
151 if (engine_id < f->max_engines) {
152 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
153 if (engine_id == f->active_engines_list[engine_id_idx]) {
154 valid = true;
155 break;
156 }
157 }
158 }
159
160 if (!valid) {
161 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
162 }
163
164 return valid;
165}
166
167u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g)
168{
169 u32 gr_engine_cnt = 0;
170 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
171
172 /* Consider 1st available GR engine */
173 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
174 1, ENGINE_GR_GK20A);
175
176 if (!gr_engine_cnt) {
177 nvgpu_err(g, "No GR engine available on this device!");
178 }
179
180 return gr_engine_id;
181}
182
183u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
184{
185 u32 reset_mask = 0;
186 u32 engine_enum = ENGINE_INVAL_GK20A;
187 struct fifo_gk20a *f = NULL;
188 u32 engine_id_idx;
189 struct fifo_engine_info_gk20a *engine_info;
190 u32 active_engine_id = 0;
191
192 if (!g) {
193 return reset_mask;
194 }
195
196 f = &g->fifo;
197
198 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
199 active_engine_id = f->active_engines_list[engine_id_idx];
200 engine_info = &f->engine_info[active_engine_id];
201 engine_enum = engine_info->engine_enum;
202
203 if ((engine_enum == ENGINE_GRCE_GK20A) ||
204 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
205 reset_mask |= engine_info->reset_mask;
206 }
207 }
208
209 return reset_mask;
210}
211
212u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g)
213{
214 u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g);
215 u32 engine_enum = ENGINE_INVAL_GK20A;
216 struct fifo_gk20a *f = NULL;
217 u32 engine_id_idx;
218 struct fifo_engine_info_gk20a *engine_info;
219 u32 active_engine_id = 0;
220
221 if (!g) {
222 return ce_runlist_id;
223 }
224
225 f = &g->fifo;
226
227 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
228 active_engine_id = f->active_engines_list[engine_id_idx];
229 engine_info = &f->engine_info[active_engine_id];
230 engine_enum = engine_info->engine_enum;
231
232 /* selecet last available ASYNC_CE if available */
233 if (engine_enum == ENGINE_ASYNC_CE_GK20A) {
234 ce_runlist_id = engine_info->runlist_id;
235 }
236 }
237
238 return ce_runlist_id;
239}
240
241u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
242{
243 u32 gr_engine_cnt = 0;
244 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
245 struct fifo_engine_info_gk20a *engine_info;
246 u32 gr_runlist_id = ~0;
247
248 /* Consider 1st available GR engine */
249 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
250 1, ENGINE_GR_GK20A);
251
252 if (!gr_engine_cnt) {
253 nvgpu_err(g,
254 "No GR engine available on this device!");
255 goto end;
256 }
257
258 engine_info = gk20a_fifo_get_engine_info(g, gr_engine_id);
259
260 if (engine_info) {
261 gr_runlist_id = engine_info->runlist_id;
262 } else {
263 nvgpu_err(g,
264 "gr_engine_id is not in active list/invalid %d", gr_engine_id);
265 }
266
267end:
268 return gr_runlist_id;
269}
270
271bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
272{
273 struct fifo_gk20a *f = NULL;
274 u32 engine_id_idx;
275 u32 active_engine_id;
276 struct fifo_engine_info_gk20a *engine_info;
277
278 if (!g) {
279 return false;
280 }
281
282 f = &g->fifo;
283
284 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
285 active_engine_id = f->active_engines_list[engine_id_idx];
286 engine_info = gk20a_fifo_get_engine_info(g, active_engine_id);
287 if (engine_info && (engine_info->runlist_id == runlist_id)) {
288 return true;
289 }
290 }
291
292 return false;
293}
294
295/*
296 * Link engine IDs to MMU IDs and vice versa.
297 */
298
299static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
300{
301 u32 fault_id = FIFO_INVAL_ENGINE_ID;
302 struct fifo_engine_info_gk20a *engine_info;
303
304 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
305
306 if (engine_info) {
307 fault_id = engine_info->fault_id;
308 } else {
309 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
310 }
311 return fault_id;
312}
313
314static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
315{
316 u32 engine_id;
317 u32 active_engine_id;
318 struct fifo_engine_info_gk20a *engine_info;
319 struct fifo_gk20a *f = &g->fifo;
320
321 for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
322 active_engine_id = f->active_engines_list[engine_id];
323 engine_info = &g->fifo.engine_info[active_engine_id];
324
325 if (engine_info->fault_id == fault_id) {
326 break;
327 }
328 active_engine_id = FIFO_INVAL_ENGINE_ID;
329 }
330 return active_engine_id;
331}
332
333int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
334 u32 *inst_id)
335{
336 int ret = ENGINE_INVAL_GK20A;
337
338 nvgpu_log_info(g, "engine type %d", engine_type);
339 if (engine_type == top_device_info_type_enum_graphics_v()) {
340 ret = ENGINE_GR_GK20A;
341 } else if ((engine_type >= top_device_info_type_enum_copy0_v()) &&
342 (engine_type <= top_device_info_type_enum_copy2_v())) {
343 /* Lets consider all the CE engine have separate runlist at this point
344 * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id
345 * comparsion logic with GR runlist_id in init_engine_info() */
346 ret = ENGINE_ASYNC_CE_GK20A;
347 /* inst_id starts from CE0 to CE2 */
348 if (inst_id) {
349 *inst_id = (engine_type - top_device_info_type_enum_copy0_v());
350 }
351 }
352
353 return ret;
354}
355
356int gk20a_fifo_init_engine_info(struct fifo_gk20a *f)
357{
358 struct gk20a *g = f->g;
359 u32 i;
360 u32 max_info_entries = top_device_info__size_1_v();
361 u32 engine_enum = ENGINE_INVAL_GK20A;
362 u32 engine_id = FIFO_INVAL_ENGINE_ID;
363 u32 runlist_id = ~0;
364 u32 pbdma_id = ~0;
365 u32 intr_id = ~0;
366 u32 reset_id = ~0;
367 u32 inst_id = 0;
368 u32 pri_base = 0;
369 u32 fault_id = 0;
370 u32 gr_runlist_id = ~0;
371 bool found_pbdma_for_runlist = false;
372
373 nvgpu_log_fn(g, " ");
374
375 f->num_engines = 0;
376
377 for (i = 0; i < max_info_entries; i++) {
378 u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
379 u32 entry = top_device_info_entry_v(table_entry);
380 u32 runlist_bit;
381
382 if (entry == top_device_info_entry_enum_v()) {
383 if (top_device_info_engine_v(table_entry)) {
384 engine_id =
385 top_device_info_engine_enum_v(table_entry);
386 nvgpu_log_info(g, "info: engine_id %d",
387 top_device_info_engine_enum_v(table_entry));
388 }
389
390
391 if (top_device_info_runlist_v(table_entry)) {
392 runlist_id =
393 top_device_info_runlist_enum_v(table_entry);
394 nvgpu_log_info(g, "gr info: runlist_id %d", runlist_id);
395
396 runlist_bit = BIT(runlist_id);
397
398 found_pbdma_for_runlist = false;
399 for (pbdma_id = 0; pbdma_id < f->num_pbdma;
400 pbdma_id++) {
401 if (f->pbdma_map[pbdma_id] &
402 runlist_bit) {
403 nvgpu_log_info(g,
404 "gr info: pbdma_map[%d]=%d",
405 pbdma_id,
406 f->pbdma_map[pbdma_id]);
407 found_pbdma_for_runlist = true;
408 break;
409 }
410 }
411
412 if (!found_pbdma_for_runlist) {
413 nvgpu_err(g, "busted pbdma map");
414 return -EINVAL;
415 }
416 }
417
418 if (top_device_info_intr_v(table_entry)) {
419 intr_id =
420 top_device_info_intr_enum_v(table_entry);
421 nvgpu_log_info(g, "gr info: intr_id %d", intr_id);
422 }
423
424 if (top_device_info_reset_v(table_entry)) {
425 reset_id =
426 top_device_info_reset_enum_v(table_entry);
427 nvgpu_log_info(g, "gr info: reset_id %d",
428 reset_id);
429 }
430 } else if (entry == top_device_info_entry_engine_type_v()) {
431 u32 engine_type =
432 top_device_info_type_enum_v(table_entry);
433 engine_enum =
434 g->ops.fifo.engine_enum_from_type(g,
435 engine_type, &inst_id);
436 } else if (entry == top_device_info_entry_data_v()) {
437 /* gk20a doesn't support device_info_data packet parsing */
438 if (g->ops.fifo.device_info_data_parse) {
439 g->ops.fifo.device_info_data_parse(g,
440 table_entry, &inst_id, &pri_base,
441 &fault_id);
442 }
443 }
444
445 if (!top_device_info_chain_v(table_entry)) {
446 if (engine_enum < ENGINE_INVAL_GK20A) {
447 struct fifo_engine_info_gk20a *info =
448 &g->fifo.engine_info[engine_id];
449
450 info->intr_mask |= BIT(intr_id);
451 info->reset_mask |= BIT(reset_id);
452 info->runlist_id = runlist_id;
453 info->pbdma_id = pbdma_id;
454 info->inst_id = inst_id;
455 info->pri_base = pri_base;
456
457 if (engine_enum == ENGINE_GR_GK20A) {
458 gr_runlist_id = runlist_id;
459 }
460
461 /* GR and GR_COPY shares same runlist_id */
462 if ((engine_enum == ENGINE_ASYNC_CE_GK20A) &&
463 (gr_runlist_id == runlist_id)) {
464 engine_enum = ENGINE_GRCE_GK20A;
465 }
466
467 info->engine_enum = engine_enum;
468
469 if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A)) {
470 fault_id = 0x1b;
471 }
472 info->fault_id = fault_id;
473
474 /* engine_id starts from 0 to NV_HOST_NUM_ENGINES */
475 f->active_engines_list[f->num_engines] = engine_id;
476
477 ++f->num_engines;
478
479 engine_enum = ENGINE_INVAL_GK20A;
480 }
481 }
482 }
483
484 return 0;
485}
486
487u32 gk20a_fifo_act_eng_interrupt_mask(struct gk20a *g, u32 act_eng_id)
488{
489 struct fifo_engine_info_gk20a *engine_info = NULL;
490
491 engine_info = gk20a_fifo_get_engine_info(g, act_eng_id);
492 if (engine_info) {
493 return engine_info->intr_mask;
494 }
495
496 return 0;
497}
498
499u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
500{
501 u32 eng_intr_mask = 0;
502 unsigned int i;
503 u32 active_engine_id = 0;
504 u32 engine_enum = ENGINE_INVAL_GK20A;
505
506 for (i = 0; i < g->fifo.num_engines; i++) {
507 u32 intr_mask;
508 active_engine_id = g->fifo.active_engines_list[i];
509 intr_mask = g->fifo.engine_info[active_engine_id].intr_mask;
510 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
511 if (((engine_enum == ENGINE_GRCE_GK20A) ||
512 (engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
513 (!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall)) {
514 continue;
515 }
516
517 eng_intr_mask |= intr_mask;
518 }
519
520 return eng_intr_mask;
521}
522
523void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
524{
525 u32 i;
526 u32 runlist_id;
527 struct fifo_runlist_info_gk20a *runlist;
528 struct gk20a *g = NULL;
529
530 if (!f || !f->runlist_info) {
531 return;
532 }
533
534 g = f->g;
535
536 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
537 runlist = &f->runlist_info[runlist_id];
538 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
539 nvgpu_dma_free(g, &runlist->mem[i]);
540 }
541
542 nvgpu_kfree(g, runlist->active_channels);
543 runlist->active_channels = NULL;
544
545 nvgpu_kfree(g, runlist->active_tsgs);
546 runlist->active_tsgs = NULL;
547
548 nvgpu_mutex_destroy(&runlist->runlist_lock);
549
550 }
551 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
552 f->max_runlists));
553
554 nvgpu_kfree(g, f->runlist_info);
555 f->runlist_info = NULL;
556 f->max_runlists = 0;
557}
558
559static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
560{
561 struct gk20a *g = f->g;
562 unsigned int i = 0;
563
564 nvgpu_log_fn(g, " ");
565
566 nvgpu_channel_worker_deinit(g);
567 /*
568 * Make sure all channels are closed before deleting them.
569 */
570 for (; i < f->num_channels; i++) {
571 struct channel_gk20a *c = f->channel + i;
572 struct tsg_gk20a *tsg = f->tsg + i;
573
574 /*
575 * Could race but worst that happens is we get an error message
576 * from gk20a_free_channel() complaining about multiple closes.
577 */
578 if (c->referenceable) {
579 __gk20a_channel_kill(c);
580 }
581
582 nvgpu_mutex_destroy(&tsg->event_id_list_lock);
583
584 nvgpu_mutex_destroy(&c->ioctl_lock);
585 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
586 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
587 nvgpu_mutex_destroy(&c->sync_lock);
588#if defined(CONFIG_GK20A_CYCLE_STATS)
589 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
590 nvgpu_mutex_destroy(&c->cs_client_mutex);
591#endif
592 nvgpu_mutex_destroy(&c->dbg_s_lock);
593
594 }
595
596 nvgpu_vfree(g, f->channel);
597 nvgpu_vfree(g, f->tsg);
598 if (g->ops.mm.is_bar1_supported(g)) {
599 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
600 } else {
601 nvgpu_dma_free(g, &f->userd);
602 }
603
604 gk20a_fifo_delete_runlist(f);
605
606 nvgpu_kfree(g, f->pbdma_map);
607 f->pbdma_map = NULL;
608 nvgpu_kfree(g, f->engine_info);
609 f->engine_info = NULL;
610 nvgpu_kfree(g, f->active_engines_list);
611 f->active_engines_list = NULL;
612}
613
614/* reads info from hardware and fills in pbmda exception info record */
615static inline void get_exception_pbdma_info(
616 struct gk20a *g,
617 struct fifo_engine_info_gk20a *eng_info)
618{
619 struct fifo_pbdma_exception_info_gk20a *e =
620 &eng_info->pbdma_exception_info;
621
622 u32 pbdma_status_r = e->status_r = gk20a_readl(g,
623 fifo_pbdma_status_r(eng_info->pbdma_id));
624 e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
625 e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
626 fifo_pbdma_status_id_type_chid_v();
627 e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r);
628 e->next_id_is_chid =
629 fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
630 fifo_pbdma_status_next_id_type_chid_v();
631 e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
632 e->chsw_in_progress =
633 fifo_pbdma_status_chsw_v(pbdma_status_r) ==
634 fifo_pbdma_status_chsw_in_progress_v();
635}
636
637static void fifo_pbdma_exception_status(struct gk20a *g,
638 struct fifo_engine_info_gk20a *eng_info)
639{
640 struct fifo_pbdma_exception_info_gk20a *e;
641 get_exception_pbdma_info(g, eng_info);
642 e = &eng_info->pbdma_exception_info;
643
644 nvgpu_log_fn(g, "pbdma_id %d, "
645 "id_type %s, id %d, chan_status %d, "
646 "next_id_type %s, next_id %d, "
647 "chsw_in_progress %d",
648 eng_info->pbdma_id,
649 e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
650 e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
651 e->chsw_in_progress);
652}
653
654/* reads info from hardware and fills in pbmda exception info record */
655static inline void get_exception_engine_info(
656 struct gk20a *g,
657 struct fifo_engine_info_gk20a *eng_info)
658{
659 struct fifo_engine_exception_info_gk20a *e =
660 &eng_info->engine_exception_info;
661 u32 engine_status_r = e->status_r =
662 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
663 e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
664 e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
665 fifo_engine_status_id_type_chid_v();
666 e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
667 e->faulted =
668 fifo_engine_status_faulted_v(engine_status_r) ==
669 fifo_engine_status_faulted_true_v();
670 e->idle =
671 fifo_engine_status_engine_v(engine_status_r) ==
672 fifo_engine_status_engine_idle_v();
673 e->ctxsw_in_progress =
674 fifo_engine_status_ctxsw_v(engine_status_r) ==
675 fifo_engine_status_ctxsw_in_progress_v();
676}
677
678static void fifo_engine_exception_status(struct gk20a *g,
679 struct fifo_engine_info_gk20a *eng_info)
680{
681 struct fifo_engine_exception_info_gk20a *e;
682 get_exception_engine_info(g, eng_info);
683 e = &eng_info->engine_exception_info;
684
685 nvgpu_log_fn(g, "engine_id %d, id_type %s, id %d, ctx_status %d, "
686 "faulted %d, idle %d, ctxsw_in_progress %d, ",
687 eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
688 e->id, e->ctx_status_v,
689 e->faulted, e->idle, e->ctxsw_in_progress);
690}
691
692static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
693{
694 struct fifo_runlist_info_gk20a *runlist;
695 struct fifo_engine_info_gk20a *engine_info;
696 unsigned int runlist_id;
697 u32 i;
698 size_t runlist_size;
699 u32 active_engine_id, pbdma_id, engine_id;
700 int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ?
701 NVGPU_DMA_FORCE_CONTIGUOUS : 0;
702 int err = 0;
703
704 nvgpu_log_fn(g, " ");
705
706 f->max_runlists = g->ops.fifo.eng_runlist_base_size();
707 f->runlist_info = nvgpu_kzalloc(g,
708 sizeof(struct fifo_runlist_info_gk20a) *
709 f->max_runlists);
710 if (!f->runlist_info) {
711 goto clean_up_runlist;
712 }
713
714 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
715 f->max_runlists));
716
717 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
718 runlist = &f->runlist_info[runlist_id];
719
720 runlist->active_channels =
721 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
722 BITS_PER_BYTE));
723 if (!runlist->active_channels) {
724 goto clean_up_runlist;
725 }
726
727 runlist->active_tsgs =
728 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
729 BITS_PER_BYTE));
730 if (!runlist->active_tsgs) {
731 goto clean_up_runlist;
732 }
733
734 runlist_size = f->runlist_entry_size * f->num_runlist_entries;
735 nvgpu_log(g, gpu_dbg_info,
736 "runlist_entries %d runlist size %zu",
737 f->num_runlist_entries, runlist_size);
738
739 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
740 err = nvgpu_dma_alloc_flags_sys(g, flags,
741 runlist_size,
742 &runlist->mem[i]);
743 if (err) {
744 nvgpu_err(g, "memory allocation failed");
745 goto clean_up_runlist;
746 }
747 }
748
749 err = nvgpu_mutex_init(&runlist->runlist_lock);
750 if (err != 0) {
751 nvgpu_err(g,
752 "Error in runlist_lock mutex initialization");
753 goto clean_up_runlist;
754 }
755
756 /* None of buffers is pinned if this value doesn't change.
757 Otherwise, one of them (cur_buffer) must have been pinned. */
758 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
759
760 for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
761 if (f->pbdma_map[pbdma_id] & BIT(runlist_id)) {
762 runlist->pbdma_bitmask |= BIT(pbdma_id);
763 }
764 }
765 nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
766 runlist_id, runlist->pbdma_bitmask);
767
768 for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
769 active_engine_id = f->active_engines_list[engine_id];
770 engine_info = &f->engine_info[active_engine_id];
771
772 if (engine_info && engine_info->runlist_id == runlist_id) {
773 runlist->eng_bitmask |= BIT(active_engine_id);
774 }
775 }
776 nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
777 runlist_id, runlist->eng_bitmask);
778 }
779
780 nvgpu_log_fn(g, "done");
781 return 0;
782
783clean_up_runlist:
784 gk20a_fifo_delete_runlist(f);
785 nvgpu_log_fn(g, "fail");
786 return err;
787}
788
789u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g)
790{
791 u32 intr_0_error_mask =
792 fifo_intr_0_bind_error_pending_f() |
793 fifo_intr_0_sched_error_pending_f() |
794 fifo_intr_0_chsw_error_pending_f() |
795 fifo_intr_0_fb_flush_timeout_pending_f() |
796 fifo_intr_0_dropped_mmu_fault_pending_f() |
797 fifo_intr_0_mmu_fault_pending_f() |
798 fifo_intr_0_lb_error_pending_f() |
799 fifo_intr_0_pio_error_pending_f();
800
801 return intr_0_error_mask;
802}
803
804static u32 gk20a_fifo_intr_0_en_mask(struct gk20a *g)
805{
806 u32 intr_0_en_mask;
807
808 intr_0_en_mask = g->ops.fifo.intr_0_error_mask(g);
809
810 intr_0_en_mask |= fifo_intr_0_runlist_event_pending_f() |
811 fifo_intr_0_pbdma_intr_pending_f();
812
813 return intr_0_en_mask;
814}
815
816int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
817{
818 u32 intr_stall;
819 u32 mask;
820 u32 timeout;
821 unsigned int i;
822 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
823
824 nvgpu_log_fn(g, " ");
825
826 /* enable pmc pfifo */
827 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_FIFO));
828
829 nvgpu_cg_slcg_fifo_load_enable(g);
830
831 nvgpu_cg_blcg_fifo_load_enable(g);
832
833 timeout = gk20a_readl(g, fifo_fb_timeout_r());
834 timeout = set_field(timeout, fifo_fb_timeout_period_m(),
835 fifo_fb_timeout_period_max_f());
836 nvgpu_log_info(g, "fifo_fb_timeout reg val = 0x%08x", timeout);
837 gk20a_writel(g, fifo_fb_timeout_r(), timeout);
838
839 /* write pbdma timeout value */
840 for (i = 0; i < host_num_pbdma; i++) {
841 timeout = gk20a_readl(g, pbdma_timeout_r(i));
842 timeout = set_field(timeout, pbdma_timeout_period_m(),
843 pbdma_timeout_period_max_f());
844 nvgpu_log_info(g, "pbdma_timeout reg val = 0x%08x", timeout);
845 gk20a_writel(g, pbdma_timeout_r(i), timeout);
846 }
847 if (g->ops.fifo.apply_pb_timeout) {
848 g->ops.fifo.apply_pb_timeout(g);
849 }
850
851 if (g->ops.fifo.apply_ctxsw_timeout_intr) {
852 g->ops.fifo.apply_ctxsw_timeout_intr(g);
853 } else {
854 timeout = g->fifo_eng_timeout_us;
855 timeout = scale_ptimer(timeout,
856 ptimer_scalingfactor10x(g->ptimer_src_freq));
857 timeout |= fifo_eng_timeout_detection_enabled_f();
858 gk20a_writel(g, fifo_eng_timeout_r(), timeout);
859 }
860
861 /* clear and enable pbdma interrupt */
862 for (i = 0; i < host_num_pbdma; i++) {
863 gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
864 gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
865
866 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
867 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
868 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
869 nvgpu_log_info(g, "pbdma id:%u, intr_en_0 0x%08x", i, intr_stall);
870 gk20a_writel(g, pbdma_intr_en_0_r(i), intr_stall);
871 intr_stall = gk20a_readl(g, pbdma_intr_stall_1_r(i));
872 /*
873 * For bug 2082123
874 * Mask the unused HCE_RE_ILLEGAL_OP bit from the interrupt.
875 */
876 intr_stall &= ~pbdma_intr_stall_1_hce_illegal_op_enabled_f();
877 nvgpu_log_info(g, "pbdma id:%u, intr_en_1 0x%08x", i, intr_stall);
878 gk20a_writel(g, pbdma_intr_en_1_r(i), intr_stall);
879 }
880
881 /* reset runlist interrupts */
882 gk20a_writel(g, fifo_intr_runlist_r(), ~0);
883
884 /* clear and enable pfifo interrupt */
885 gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
886 mask = gk20a_fifo_intr_0_en_mask(g);
887 nvgpu_log_info(g, "fifo_intr_en_0 0x%08x", mask);
888 gk20a_writel(g, fifo_intr_en_0_r(), mask);
889 nvgpu_log_info(g, "fifo_intr_en_1 = 0x80000000");
890 gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);
891
892 nvgpu_log_fn(g, "done");
893
894 return 0;
895}
896
897int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
898{
899 struct fifo_gk20a *f = &g->fifo;
900 unsigned int chid, i;
901 int err = 0;
902
903 nvgpu_log_fn(g, " ");
904
905 f->g = g;
906
907 err = nvgpu_mutex_init(&f->intr.isr.mutex);
908 if (err) {
909 nvgpu_err(g, "failed to init isr.mutex");
910 return err;
911 }
912
913 err = nvgpu_mutex_init(&f->engines_reset_mutex);
914 if (err) {
915 nvgpu_err(g, "failed to init engines_reset_mutex");
916 return err;
917 }
918
919 g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
920
921 f->num_channels = g->ops.fifo.get_num_fifos(g);
922 f->runlist_entry_size = g->ops.fifo.runlist_entry_size();
923 f->num_runlist_entries = fifo_eng_runlist_length_max_v();
924 f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
925 f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
926
927 f->userd_entry_size = 1 << ram_userd_base_shift_v();
928
929 f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
930 f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
931 f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
932 f->engine_info = nvgpu_kzalloc(g, f->max_engines *
933 sizeof(*f->engine_info));
934 f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
935
936 if (!(f->channel && f->tsg && f->pbdma_map && f->engine_info &&
937 f->active_engines_list)) {
938 err = -ENOMEM;
939 goto clean_up;
940 }
941 memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
942
943 /* pbdma map needs to be in place before calling engine info init */
944 for (i = 0; i < f->num_pbdma; ++i) {
945 f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
946 }
947
948 g->ops.fifo.init_engine_info(f);
949
950 err = init_runlist(g, f);
951 if (err) {
952 nvgpu_err(g, "failed to init runlist");
953 goto clean_up;
954 }
955
956 nvgpu_init_list_node(&f->free_chs);
957
958 err = nvgpu_mutex_init(&f->free_chs_mutex);
959 if (err) {
960 nvgpu_err(g, "failed to init free_chs_mutex");
961 goto clean_up;
962 }
963
964 for (chid = 0; chid < f->num_channels; chid++) {
965 gk20a_init_channel_support(g, chid);
966 gk20a_init_tsg_support(g, chid);
967 }
968
969 err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
970 if (err) {
971 nvgpu_err(g, "failed to init tsg_inuse_mutex");
972 goto clean_up;
973 }
974
975 f->remove_support = gk20a_remove_fifo_support;
976
977 f->deferred_reset_pending = false;
978
979 err = nvgpu_mutex_init(&f->deferred_reset_mutex);
980 if (err) {
981 nvgpu_err(g, "failed to init deferred_reset_mutex");
982 goto clean_up;
983 }
984
985 nvgpu_log_fn(g, "done");
986 return 0;
987
988clean_up:
989 nvgpu_err(g, "fail");
990
991 nvgpu_vfree(g, f->channel);
992 f->channel = NULL;
993 nvgpu_vfree(g, f->tsg);
994 f->tsg = NULL;
995 nvgpu_kfree(g, f->pbdma_map);
996 f->pbdma_map = NULL;
997 nvgpu_kfree(g, f->engine_info);
998 f->engine_info = NULL;
999 nvgpu_kfree(g, f->active_engines_list);
1000 f->active_engines_list = NULL;
1001
1002 return err;
1003}
1004
1005int gk20a_init_fifo_setup_sw(struct gk20a *g)
1006{
1007 struct fifo_gk20a *f = &g->fifo;
1008 unsigned int chid;
1009 u64 userd_base;
1010 int err = 0;
1011
1012 nvgpu_log_fn(g, " ");
1013
1014 if (f->sw_ready) {
1015 nvgpu_log_fn(g, "skip init");
1016 return 0;
1017 }
1018
1019 err = gk20a_init_fifo_setup_sw_common(g);
1020 if (err) {
1021 nvgpu_err(g, "fail: err: %d", err);
1022 return err;
1023 }
1024
1025 if (g->ops.mm.is_bar1_supported(g)) {
1026 err = nvgpu_dma_alloc_map_sys(g->mm.bar1.vm,
1027 f->userd_entry_size * f->num_channels,
1028 &f->userd);
1029 } else {
1030 err = nvgpu_dma_alloc_sys(g, f->userd_entry_size *
1031 f->num_channels, &f->userd);
1032 }
1033 if (err) {
1034 nvgpu_err(g, "userd memory allocation failed");
1035 goto clean_up;
1036 }
1037 nvgpu_log(g, gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
1038
1039 userd_base = nvgpu_mem_get_addr(g, &f->userd);
1040 for (chid = 0; chid < f->num_channels; chid++) {
1041 f->channel[chid].userd_iova = userd_base +
1042 chid * f->userd_entry_size;
1043 f->channel[chid].userd_gpu_va =
1044 f->userd.gpu_va + chid * f->userd_entry_size;
1045 }
1046
1047 err = nvgpu_channel_worker_init(g);
1048 if (err) {
1049 goto clean_up;
1050 }
1051
1052 f->sw_ready = true;
1053
1054 nvgpu_log_fn(g, "done");
1055 return 0;
1056
1057clean_up:
1058 nvgpu_log_fn(g, "fail");
1059 if (nvgpu_mem_is_valid(&f->userd)) {
1060 if (g->ops.mm.is_bar1_supported(g)) {
1061 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
1062 } else {
1063 nvgpu_dma_free(g, &f->userd);
1064 }
1065 }
1066
1067 return err;
1068}
1069
1070void gk20a_fifo_handle_runlist_event(struct gk20a *g)
1071{
1072 u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
1073
1074 nvgpu_log(g, gpu_dbg_intr, "runlist event %08x",
1075 runlist_event);
1076
1077 gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
1078}
1079
1080int gk20a_init_fifo_setup_hw(struct gk20a *g)
1081{
1082 struct fifo_gk20a *f = &g->fifo;
1083
1084 nvgpu_log_fn(g, " ");
1085
1086 /* test write, read through bar1 @ userd region before
1087 * turning on the snooping */
1088 {
1089 struct fifo_gk20a *f = &g->fifo;
1090 u32 v, v1 = 0x33, v2 = 0x55;
1091
1092 u32 bar1_vaddr = f->userd.gpu_va;
1093 volatile u32 *cpu_vaddr = f->userd.cpu_va;
1094
1095 nvgpu_log_info(g, "test bar1 @ vaddr 0x%x",
1096 bar1_vaddr);
1097
1098 v = gk20a_bar1_readl(g, bar1_vaddr);
1099
1100 *cpu_vaddr = v1;
1101 nvgpu_mb();
1102
1103 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
1104 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
1105 GPU read 0x%x", *cpu_vaddr, gk20a_bar1_readl(g, bar1_vaddr));
1106 return -EINVAL;
1107 }
1108
1109 gk20a_bar1_writel(g, bar1_vaddr, v2);
1110
1111 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
1112 nvgpu_err(g, "bar1 broken @ gk20a: GPU wrote 0x%x, \
1113 CPU read 0x%x", gk20a_bar1_readl(g, bar1_vaddr), *cpu_vaddr);
1114 return -EINVAL;
1115 }
1116
1117 /* is it visible to the cpu? */
1118 if (*cpu_vaddr != v2) {
1119 nvgpu_err(g,
1120 "cpu didn't see bar1 write @ %p!",
1121 cpu_vaddr);
1122 }
1123
1124 /* put it back */
1125 gk20a_bar1_writel(g, bar1_vaddr, v);
1126 }
1127
1128 /*XXX all manner of flushes and caching worries, etc */
1129
1130 /* set the base for the userd region now */
1131 gk20a_writel(g, fifo_bar1_base_r(),
1132 fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
1133 fifo_bar1_base_valid_true_f());
1134
1135 nvgpu_log_fn(g, "done");
1136
1137 return 0;
1138}
1139
1140int gk20a_init_fifo_support(struct gk20a *g)
1141{
1142 u32 err;
1143
1144 err = g->ops.fifo.setup_sw(g);
1145 if (err) {
1146 return err;
1147 }
1148
1149 if (g->ops.fifo.init_fifo_setup_hw) {
1150 err = g->ops.fifo.init_fifo_setup_hw(g);
1151 }
1152 if (err) {
1153 return err;
1154 }
1155
1156 return err;
1157}
1158
1159/* return with a reference to the channel, caller must put it back */
1160struct channel_gk20a *
1161gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
1162{
1163 struct fifo_gk20a *f = &g->fifo;
1164 unsigned int ci;
1165 if (unlikely(!f->channel)) {
1166 return NULL;
1167 }
1168 for (ci = 0; ci < f->num_channels; ci++) {
1169 struct channel_gk20a *ch;
1170 u64 ch_inst_ptr;
1171
1172 ch = gk20a_channel_from_id(g, ci);
1173 /* only alive channels are searched */
1174 if (!ch) {
1175 continue;
1176 }
1177
1178 ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
1179 if (inst_ptr == ch_inst_ptr) {
1180 return ch;
1181 }
1182
1183 gk20a_channel_put(ch);
1184 }
1185 return NULL;
1186}
1187
1188/* fault info/descriptions.
1189 * tbd: move to setup
1190 * */
1191static const char * const gk20a_fault_type_descs[] = {
1192 "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
1193 "pde size",
1194 "pte",
1195 "va limit viol",
1196 "unbound inst",
1197 "priv viol",
1198 "ro viol",
1199 "wo viol",
1200 "pitch mask",
1201 "work creation",
1202 "bad aperture",
1203 "compression failure",
1204 "bad kind",
1205 "region viol",
1206 "dual ptes",
1207 "poisoned",
1208};
1209/* engine descriptions */
1210static const char * const engine_subid_descs[] = {
1211 "gpc",
1212 "hub",
1213};
1214
1215static const char * const gk20a_hub_client_descs[] = {
1216 "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
1217 "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
1218 "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
1219 "scc nb", "sec", "ssync", "gr copy", "xv", "mmu nb",
1220 "msenc", "d falcon", "sked", "a falcon", "n/a",
1221};
1222
1223static const char * const gk20a_gpc_client_descs[] = {
1224 "l1 0", "t1 0", "pe 0",
1225 "l1 1", "t1 1", "pe 1",
1226 "l1 2", "t1 2", "pe 2",
1227 "l1 3", "t1 3", "pe 3",
1228 "rast", "gcc", "gpccs",
1229 "prop 0", "prop 1", "prop 2", "prop 3",
1230 "l1 4", "t1 4", "pe 4",
1231 "l1 5", "t1 5", "pe 5",
1232 "l1 6", "t1 6", "pe 6",
1233 "l1 7", "t1 7", "pe 7",
1234};
1235
1236static const char * const does_not_exist[] = {
1237 "does not exist"
1238};
1239
1240/* fill in mmu fault desc */
1241void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault)
1242{
1243 if (mmfault->fault_type >= ARRAY_SIZE(gk20a_fault_type_descs)) {
1244 WARN_ON(mmfault->fault_type >=
1245 ARRAY_SIZE(gk20a_fault_type_descs));
1246 } else {
1247 mmfault->fault_type_desc =
1248 gk20a_fault_type_descs[mmfault->fault_type];
1249 }
1250}
1251
1252/* fill in mmu fault client description */
1253void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault)
1254{
1255 if (mmfault->client_id >= ARRAY_SIZE(gk20a_hub_client_descs)) {
1256 WARN_ON(mmfault->client_id >=
1257 ARRAY_SIZE(gk20a_hub_client_descs));
1258 } else {
1259 mmfault->client_id_desc =
1260 gk20a_hub_client_descs[mmfault->client_id];
1261 }
1262}
1263
1264/* fill in mmu fault gpc description */
1265void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault)
1266{
1267 if (mmfault->client_id >= ARRAY_SIZE(gk20a_gpc_client_descs)) {
1268 WARN_ON(mmfault->client_id >=
1269 ARRAY_SIZE(gk20a_gpc_client_descs));
1270 } else {
1271 mmfault->client_id_desc =
1272 gk20a_gpc_client_descs[mmfault->client_id];
1273 }
1274}
1275
1276static void get_exception_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1277 struct mmu_fault_info *mmfault)
1278{
1279 g->ops.fifo.get_mmu_fault_info(g, mmu_fault_id, mmfault);
1280
1281 /* parse info */
1282 mmfault->fault_type_desc = does_not_exist[0];
1283 if (g->ops.fifo.get_mmu_fault_desc) {
1284 g->ops.fifo.get_mmu_fault_desc(mmfault);
1285 }
1286
1287 if (mmfault->client_type >= ARRAY_SIZE(engine_subid_descs)) {
1288 WARN_ON(mmfault->client_type >= ARRAY_SIZE(engine_subid_descs));
1289 mmfault->client_type_desc = does_not_exist[0];
1290 } else {
1291 mmfault->client_type_desc =
1292 engine_subid_descs[mmfault->client_type];
1293 }
1294
1295 mmfault->client_id_desc = does_not_exist[0];
1296 if ((mmfault->client_type ==
1297 fifo_intr_mmu_fault_info_engine_subid_hub_v())
1298 && g->ops.fifo.get_mmu_fault_client_desc) {
1299 g->ops.fifo.get_mmu_fault_client_desc(mmfault);
1300 } else if ((mmfault->client_type ==
1301 fifo_intr_mmu_fault_info_engine_subid_gpc_v())
1302 && g->ops.fifo.get_mmu_fault_gpc_desc) {
1303 g->ops.fifo.get_mmu_fault_gpc_desc(mmfault);
1304 }
1305}
1306
1307/* reads info from hardware and fills in mmu fault info record */
1308void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1309 struct mmu_fault_info *mmfault)
1310{
1311 u32 fault_info;
1312 u32 addr_lo, addr_hi;
1313
1314 nvgpu_log_fn(g, "mmu_fault_id %d", mmu_fault_id);
1315
1316 memset(mmfault, 0, sizeof(*mmfault));
1317
1318 fault_info = gk20a_readl(g,
1319 fifo_intr_mmu_fault_info_r(mmu_fault_id));
1320 mmfault->fault_type =
1321 fifo_intr_mmu_fault_info_type_v(fault_info);
1322 mmfault->access_type =
1323 fifo_intr_mmu_fault_info_write_v(fault_info);
1324 mmfault->client_type =
1325 fifo_intr_mmu_fault_info_engine_subid_v(fault_info);
1326 mmfault->client_id =
1327 fifo_intr_mmu_fault_info_client_v(fault_info);
1328
1329 addr_lo = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(mmu_fault_id));
1330 addr_hi = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(mmu_fault_id));
1331 mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo);
1332 /* note:ignoring aperture on gk20a... */
1333 mmfault->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
1334 gk20a_readl(g, fifo_intr_mmu_fault_inst_r(mmu_fault_id)));
1335 /* note: inst_ptr is a 40b phys addr. */
1336 mmfault->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
1337}
1338
1339void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1340{
1341 u32 engine_enum = ENGINE_INVAL_GK20A;
1342 struct fifo_engine_info_gk20a *engine_info;
1343
1344 nvgpu_log_fn(g, " ");
1345
1346 if (!g) {
1347 return;
1348 }
1349
1350 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1351
1352 if (engine_info) {
1353 engine_enum = engine_info->engine_enum;
1354 }
1355
1356 if (engine_enum == ENGINE_INVAL_GK20A) {
1357 nvgpu_err(g, "unsupported engine_id %d", engine_id);
1358 }
1359
1360 if (engine_enum == ENGINE_GR_GK20A) {
1361 if (g->support_pmu) {
1362 if (nvgpu_pg_elpg_disable(g) != 0 ) {
1363 nvgpu_err(g, "failed to set disable elpg");
1364 }
1365 }
1366
1367#ifdef CONFIG_GK20A_CTXSW_TRACE
1368 /*
1369 * Resetting engine will alter read/write index. Need to flush
1370 * circular buffer before re-enabling FECS.
1371 */
1372 if (g->ops.fecs_trace.reset)
1373 g->ops.fecs_trace.reset(g);
1374#endif
1375 if (!nvgpu_platform_is_simulation(g)) {
1376 /*HALT_PIPELINE method, halt GR engine*/
1377 if (gr_gk20a_halt_pipe(g)) {
1378 nvgpu_err(g, "failed to HALT gr pipe");
1379 }
1380 /*
1381 * resetting engine using mc_enable_r() is not
1382 * enough, we do full init sequence
1383 */
1384 nvgpu_log(g, gpu_dbg_info, "resetting gr engine");
1385 gk20a_gr_reset(g);
1386 } else {
1387 nvgpu_log(g, gpu_dbg_info,
1388 "HALT gr pipe not supported and "
1389 "gr cannot be reset without halting gr pipe");
1390 }
1391 if (g->support_pmu) {
1392 if (nvgpu_pg_elpg_enable(g) != 0 ) {
1393 nvgpu_err(g, "failed to set enable elpg");
1394 }
1395 }
1396 }
1397 if ((engine_enum == ENGINE_GRCE_GK20A) ||
1398 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
1399 g->ops.mc.reset(g, engine_info->reset_mask);
1400 }
1401}
1402
1403static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
1404{
1405 u32 intr;
1406
1407 intr = gk20a_readl(g, fifo_intr_chsw_error_r());
1408 nvgpu_err(g, "chsw: %08x", intr);
1409 gk20a_fecs_dump_falcon_stats(g);
1410 gk20a_gpccs_dump_falcon_stats(g);
1411 gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
1412}
1413
1414static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
1415{
1416 u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1417 nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id);
1418}
1419
1420bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid)
1421{
1422 return (engine_subid == fifo_intr_mmu_fault_info_engine_subid_gpc_v());
1423}
1424
1425bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
1426 u32 engine_subid, bool fake_fault)
1427{
1428 u32 engine_enum = ENGINE_INVAL_GK20A;
1429 struct fifo_engine_info_gk20a *engine_info;
1430
1431 if (!g) {
1432 return false;
1433 }
1434
1435 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1436
1437 if (engine_info) {
1438 engine_enum = engine_info->engine_enum;
1439 }
1440
1441 if (engine_enum == ENGINE_INVAL_GK20A) {
1442 return false;
1443 }
1444
1445 /* channel recovery is only deferred if an sm debugger
1446 is attached and has MMU debug mode is enabled */
1447 if (!g->ops.gr.sm_debugger_attached(g) ||
1448 !g->ops.fb.is_debug_mode_enabled(g)) {
1449 return false;
1450 }
1451
1452 /* if this fault is fake (due to RC recovery), don't defer recovery */
1453 if (fake_fault) {
1454 return false;
1455 }
1456
1457 if (engine_enum != ENGINE_GR_GK20A) {
1458 return false;
1459 }
1460
1461 return g->ops.fifo.is_fault_engine_subid_gpc(g, engine_subid);
1462}
1463
1464/* caller must hold a channel reference */
1465static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
1466 struct channel_gk20a *refch)
1467{
1468 bool verbose = false;
1469 if (!refch) {
1470 return verbose;
1471 }
1472
1473 if (nvgpu_is_error_notifier_set(refch,
1474 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
1475 verbose = refch->timeout_debug_dump;
1476 }
1477
1478 return verbose;
1479}
1480
1481/* caller must hold a channel reference */
1482static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1483 struct channel_gk20a *refch)
1484{
1485 if (refch) {
1486 /* mark channel as faulted */
1487 gk20a_channel_set_timedout(refch);
1488
1489 /* unblock pending waits */
1490 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1491 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
1492 }
1493}
1494
1495/* caller must hold a channel reference */
1496bool gk20a_fifo_error_ch(struct gk20a *g,
1497 struct channel_gk20a *refch)
1498{
1499 bool verbose;
1500
1501 verbose = gk20a_fifo_ch_timeout_debug_dump_state(g, refch);
1502 gk20a_fifo_set_has_timedout_and_wake_up_wqs(g, refch);
1503
1504 return verbose;
1505}
1506
1507bool gk20a_fifo_error_tsg(struct gk20a *g,
1508 struct tsg_gk20a *tsg)
1509{
1510 struct channel_gk20a *ch = NULL;
1511 bool verbose = false;
1512
1513 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1514 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1515 if (gk20a_channel_get(ch)) {
1516 if (gk20a_fifo_error_ch(g, ch)) {
1517 verbose = true;
1518 }
1519 gk20a_channel_put(ch);
1520 }
1521 }
1522 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1523
1524 return verbose;
1525
1526}
1527/* caller must hold a channel reference */
1528void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
1529 struct channel_gk20a *refch)
1530{
1531 nvgpu_err(g,
1532 "channel %d generated a mmu fault", refch->chid);
1533 g->ops.fifo.set_error_notifier(refch,
1534 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
1535}
1536
1537void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
1538 struct tsg_gk20a *tsg)
1539{
1540 struct channel_gk20a *ch = NULL;
1541
1542 nvgpu_err(g,
1543 "TSG %d generated a mmu fault", tsg->tsgid);
1544
1545 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1546 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1547 if (gk20a_channel_get(ch)) {
1548 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1549 gk20a_channel_put(ch);
1550 }
1551 }
1552 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1553
1554}
1555
1556void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)
1557{
1558 struct channel_gk20a *ch = NULL;
1559
1560 nvgpu_log_fn(g, " ");
1561
1562 g->ops.fifo.disable_tsg(tsg);
1563
1564 if (preempt) {
1565 g->ops.fifo.preempt_tsg(g, tsg);
1566 }
1567
1568 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1569 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1570 if (gk20a_channel_get(ch)) {
1571 gk20a_channel_set_timedout(ch);
1572 if (ch->g->ops.fifo.ch_abort_clean_up) {
1573 ch->g->ops.fifo.ch_abort_clean_up(ch);
1574 }
1575 gk20a_channel_put(ch);
1576 }
1577 }
1578 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1579}
1580
1581int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
1582{
1583 unsigned long engine_id, engines = 0U;
1584 struct tsg_gk20a *tsg;
1585 bool deferred_reset_pending;
1586 struct fifo_gk20a *f = &g->fifo;
1587
1588 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1589
1590 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1591 deferred_reset_pending = g->fifo.deferred_reset_pending;
1592 nvgpu_mutex_release(&f->deferred_reset_mutex);
1593
1594 if (!deferred_reset_pending) {
1595 nvgpu_mutex_release(&g->dbg_sessions_lock);
1596 return 0;
1597 }
1598
1599 gr_gk20a_disable_ctxsw(g);
1600
1601 tsg = tsg_gk20a_from_ch(ch);
1602 if (tsg != NULL) {
1603 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
1604 } else {
1605 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
1606 engines = g->fifo.deferred_fault_engines;
1607 }
1608
1609 if (engines == 0U) {
1610 goto clean_up;
1611 }
1612
1613 /*
1614 * If deferred reset is set for an engine, and channel is running
1615 * on that engine, reset it
1616 */
1617 for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32) {
1618 if (BIT(engine_id) & engines) {
1619 gk20a_fifo_reset_engine(g, engine_id);
1620 }
1621 }
1622
1623 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1624 g->fifo.deferred_fault_engines = 0;
1625 g->fifo.deferred_reset_pending = false;
1626 nvgpu_mutex_release(&f->deferred_reset_mutex);
1627
1628clean_up:
1629 gr_gk20a_enable_ctxsw(g);
1630 nvgpu_mutex_release(&g->dbg_sessions_lock);
1631
1632 return 0;
1633}
1634
1635static bool gk20a_fifo_handle_mmu_fault_locked(
1636 struct gk20a *g,
1637 u32 mmu_fault_engines, /* queried from HW if 0 */
1638 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1639 bool id_is_tsg)
1640{
1641 bool fake_fault;
1642 unsigned long fault_id;
1643 unsigned long engine_mmu_fault_id;
1644 bool verbose = true;
1645 u32 grfifo_ctl;
1646
1647 bool deferred_reset_pending = false;
1648 struct fifo_gk20a *f = &g->fifo;
1649
1650 nvgpu_log_fn(g, " ");
1651
1652 /* Disable power management */
1653 if (g->support_pmu) {
1654 if (nvgpu_cg_pg_disable(g) != 0) {
1655 nvgpu_warn(g, "fail to disable power mgmt");
1656 }
1657 }
1658
1659 /* Disable fifo access */
1660 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
1661 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
1662 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
1663
1664 gk20a_writel(g, gr_gpfifo_ctl_r(),
1665 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
1666 gr_gpfifo_ctl_semaphore_access_f(0));
1667
1668 if (mmu_fault_engines) {
1669 fault_id = mmu_fault_engines;
1670 fake_fault = true;
1671 } else {
1672 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1673 fake_fault = false;
1674 gk20a_debug_dump(g);
1675 }
1676
1677 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1678 g->fifo.deferred_reset_pending = false;
1679 nvgpu_mutex_release(&f->deferred_reset_mutex);
1680
1681 /* go through all faulted engines */
1682 for_each_set_bit(engine_mmu_fault_id, &fault_id, 32) {
1683 /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
1684 * engines. Convert engine_mmu_id to engine_id */
1685 u32 engine_id = gk20a_mmu_id_to_engine_id(g,
1686 engine_mmu_fault_id);
1687 struct mmu_fault_info mmfault_info;
1688 struct channel_gk20a *ch = NULL;
1689 struct tsg_gk20a *tsg = NULL;
1690 struct channel_gk20a *refch = NULL;
1691 /* read and parse engine status */
1692 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1693 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1694 bool ctxsw = (ctx_status ==
1695 fifo_engine_status_ctx_status_ctxsw_switch_v()
1696 || ctx_status ==
1697 fifo_engine_status_ctx_status_ctxsw_save_v()
1698 || ctx_status ==
1699 fifo_engine_status_ctx_status_ctxsw_load_v());
1700
1701 get_exception_mmu_fault_info(g, engine_mmu_fault_id,
1702 &mmfault_info);
1703 trace_gk20a_mmu_fault(mmfault_info.fault_addr,
1704 mmfault_info.fault_type,
1705 mmfault_info.access_type,
1706 mmfault_info.inst_ptr,
1707 engine_id,
1708 mmfault_info.client_type_desc,
1709 mmfault_info.client_id_desc,
1710 mmfault_info.fault_type_desc);
1711 nvgpu_err(g, "%s mmu fault on engine %d, "
1712 "engine subid %d (%s), client %d (%s), "
1713 "addr 0x%llx, type %d (%s), access_type 0x%08x,"
1714 "inst_ptr 0x%llx",
1715 fake_fault ? "fake" : "",
1716 engine_id,
1717 mmfault_info.client_type,
1718 mmfault_info.client_type_desc,
1719 mmfault_info.client_id, mmfault_info.client_id_desc,
1720 mmfault_info.fault_addr,
1721 mmfault_info.fault_type,
1722 mmfault_info.fault_type_desc,
1723 mmfault_info.access_type, mmfault_info.inst_ptr);
1724
1725 if (ctxsw) {
1726 gk20a_fecs_dump_falcon_stats(g);
1727 gk20a_gpccs_dump_falcon_stats(g);
1728 nvgpu_err(g, "gr_status_r : 0x%x",
1729 gk20a_readl(g, gr_status_r()));
1730 }
1731
1732 /* get the channel/TSG */
1733 if (fake_fault) {
1734 /* use next_id if context load is failing */
1735 u32 id, type;
1736
1737 if (hw_id == ~(u32)0) {
1738 id = (ctx_status ==
1739 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1740 fifo_engine_status_next_id_v(status) :
1741 fifo_engine_status_id_v(status);
1742 type = (ctx_status ==
1743 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1744 fifo_engine_status_next_id_type_v(status) :
1745 fifo_engine_status_id_type_v(status);
1746 } else {
1747 id = hw_id;
1748 type = id_is_tsg ?
1749 fifo_engine_status_id_type_tsgid_v() :
1750 fifo_engine_status_id_type_chid_v();
1751 }
1752
1753 if (type == fifo_engine_status_id_type_tsgid_v()) {
1754 tsg = &g->fifo.tsg[id];
1755 } else if (type == fifo_engine_status_id_type_chid_v()) {
1756 ch = &g->fifo.channel[id];
1757 refch = gk20a_channel_get(ch);
1758 if (refch != NULL) {
1759 tsg = tsg_gk20a_from_ch(refch);
1760 }
1761 }
1762 } else {
1763 /* read channel based on instruction pointer */
1764 ch = gk20a_refch_from_inst_ptr(g,
1765 mmfault_info.inst_ptr);
1766 refch = ch;
1767 if (refch != NULL) {
1768 tsg = tsg_gk20a_from_ch(refch);
1769 }
1770 }
1771
1772 /* check if engine reset should be deferred */
1773 if (engine_id != FIFO_INVAL_ENGINE_ID) {
1774 bool defer = gk20a_fifo_should_defer_engine_reset(g,
1775 engine_id, mmfault_info.client_type,
1776 fake_fault);
1777 if ((ch || tsg) && defer) {
1778 g->fifo.deferred_fault_engines |= BIT(engine_id);
1779
1780 /* handled during channel free */
1781 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1782 g->fifo.deferred_reset_pending = true;
1783 nvgpu_mutex_release(&f->deferred_reset_mutex);
1784
1785 deferred_reset_pending = true;
1786
1787 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
1788 "sm debugger attached,"
1789 " deferring channel recovery to channel free");
1790 } else {
1791 gk20a_fifo_reset_engine(g, engine_id);
1792 }
1793 }
1794
1795#ifdef CONFIG_GK20A_CTXSW_TRACE
1796 if (tsg) {
1797 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1798 }
1799#endif
1800 /*
1801 * Disable the channel/TSG from hw and increment syncpoints.
1802 */
1803 if (tsg) {
1804 if (deferred_reset_pending) {
1805 gk20a_disable_tsg(tsg);
1806 } else {
1807 if (!fake_fault) {
1808 gk20a_fifo_set_ctx_mmu_error_tsg(g,
1809 tsg);
1810 }
1811 verbose = gk20a_fifo_error_tsg(g, tsg);
1812 gk20a_fifo_abort_tsg(g, tsg, false);
1813 }
1814
1815 /* put back the ref taken early above */
1816 if (refch) {
1817 gk20a_channel_put(ch);
1818 }
1819 } else if (refch != NULL) {
1820 nvgpu_err(g, "mmu error in unbound channel %d",
1821 ch->chid);
1822 gk20a_channel_put(ch);
1823 } else if (mmfault_info.inst_ptr ==
1824 nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
1825 nvgpu_err(g, "mmu fault from bar1");
1826 } else if (mmfault_info.inst_ptr ==
1827 nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
1828 nvgpu_err(g, "mmu fault from pmu");
1829 } else {
1830 nvgpu_err(g, "couldn't locate channel for mmu fault");
1831 }
1832 }
1833
1834 /* clear interrupt */
1835 gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
1836
1837 /* resume scheduler */
1838 gk20a_writel(g, fifo_error_sched_disable_r(),
1839 gk20a_readl(g, fifo_error_sched_disable_r()));
1840
1841 /* Re-enable fifo access */
1842 gk20a_writel(g, gr_gpfifo_ctl_r(),
1843 gr_gpfifo_ctl_access_enabled_f() |
1844 gr_gpfifo_ctl_semaphore_access_enabled_f());
1845
1846 /* It is safe to enable ELPG again. */
1847 if (g->support_pmu) {
1848 if (nvgpu_cg_pg_enable(g) != 0) {
1849 nvgpu_warn(g, "fail to enable power mgmt");
1850 }
1851 }
1852
1853 return verbose;
1854}
1855
1856static bool gk20a_fifo_handle_mmu_fault(
1857 struct gk20a *g,
1858 u32 mmu_fault_engines, /* queried from HW if 0 */
1859 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1860 bool id_is_tsg)
1861{
1862 u32 rlid;
1863 bool verbose;
1864
1865 nvgpu_log_fn(g, " ");
1866
1867 nvgpu_log_info(g, "acquire engines_reset_mutex");
1868 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1869
1870 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
1871 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1872 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
1873 }
1874
1875 verbose = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
1876 hw_id, id_is_tsg);
1877
1878 nvgpu_log_info(g, "release runlist_lock for all runlists");
1879 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1880 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
1881 }
1882
1883 nvgpu_log_info(g, "release engines_reset_mutex");
1884 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
1885
1886 return verbose;
1887}
1888
1889static void gk20a_fifo_get_faulty_id_type(struct gk20a *g, int engine_id,
1890 u32 *id, u32 *type)
1891{
1892 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1893 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1894
1895 /* use next_id if context load is failing */
1896 *id = (ctx_status ==
1897 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1898 fifo_engine_status_next_id_v(status) :
1899 fifo_engine_status_id_v(status);
1900
1901 *type = (ctx_status ==
1902 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1903 fifo_engine_status_next_id_type_v(status) :
1904 fifo_engine_status_id_type_v(status);
1905}
1906
1907static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1908{
1909 unsigned int i;
1910 u32 engines = 0;
1911
1912 for (i = 0; i < g->fifo.num_engines; i++) {
1913 u32 active_engine_id = g->fifo.active_engines_list[i];
1914 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
1915 u32 ctx_status =
1916 fifo_engine_status_ctx_status_v(status);
1917 u32 ctx_id = (ctx_status ==
1918 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1919 fifo_engine_status_next_id_v(status) :
1920 fifo_engine_status_id_v(status);
1921 u32 type = (ctx_status ==
1922 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1923 fifo_engine_status_next_id_type_v(status) :
1924 fifo_engine_status_id_type_v(status);
1925 bool busy = fifo_engine_status_engine_v(status) ==
1926 fifo_engine_status_engine_busy_v();
1927 if (busy && ctx_id == id) {
1928 if ((is_tsg && type ==
1929 fifo_engine_status_id_type_tsgid_v()) ||
1930 (!is_tsg && type ==
1931 fifo_engine_status_id_type_chid_v())) {
1932 engines |= BIT(active_engine_id);
1933 }
1934 }
1935 }
1936
1937 return engines;
1938}
1939
1940void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
1941 bool verbose, u32 rc_type)
1942{
1943 u32 engines;
1944
1945 /* stop context switching to prevent engine assignments from
1946 changing until channel is recovered */
1947 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1948 gr_gk20a_disable_ctxsw(g);
1949
1950 engines = gk20a_fifo_engines_on_id(g, ch->chid, false);
1951
1952 if (engines) {
1953 gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
1954 rc_type);
1955 } else {
1956 gk20a_channel_abort(ch, false);
1957
1958 if (gk20a_fifo_error_ch(g, ch)) {
1959 gk20a_debug_dump(g);
1960 }
1961 }
1962
1963 gr_gk20a_enable_ctxsw(g);
1964 nvgpu_mutex_release(&g->dbg_sessions_lock);
1965}
1966
1967void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1968 bool verbose, u32 rc_type)
1969{
1970 u32 engines = 0U;
1971 int err;
1972
1973 /* stop context switching to prevent engine assignments from
1974 changing until TSG is recovered */
1975 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1976
1977 /* disable tsg so that it does not get scheduled again */
1978 g->ops.fifo.disable_tsg(tsg);
1979
1980 /*
1981 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
1982 * fifo_engine_status register. Also while the engine is held in reset
1983 * h/w passes busy/idle straight through. fifo_engine_status registers
1984 * are correct in that there is no context switch outstanding
1985 * as the CTXSW is aborted when reset is asserted.
1986 */
1987 nvgpu_log_info(g, "acquire engines_reset_mutex");
1988 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1989
1990 /*
1991 * stop context switching to prevent engine assignments from
1992 * changing until engine status is checked to make sure tsg
1993 * being recovered is not loaded on the engines
1994 */
1995 err = gr_gk20a_disable_ctxsw(g);
1996
1997 if (err != 0) {
1998 /* if failed to disable ctxsw, just abort tsg */
1999 nvgpu_err(g, "failed to disable ctxsw");
2000 } else {
2001 /* recover engines if tsg is loaded on the engines */
2002 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
2003
2004 /*
2005 * it is ok to enable ctxsw before tsg is recovered. If engines
2006 * is 0, no engine recovery is needed and if it is non zero,
2007 * gk20a_fifo_recover will call get_engines_mask_on_id again.
2008 * By that time if tsg is not on the engine, engine need not
2009 * be reset.
2010 */
2011 err = gr_gk20a_enable_ctxsw(g);
2012 if (err != 0) {
2013 nvgpu_err(g, "failed to enable ctxsw");
2014 }
2015 }
2016
2017 nvgpu_log_info(g, "release engines_reset_mutex");
2018 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2019
2020 if (engines) {
2021 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
2022 rc_type);
2023 } else {
2024 if (gk20a_fifo_error_tsg(g, tsg) && verbose) {
2025 gk20a_debug_dump(g);
2026 }
2027
2028 gk20a_fifo_abort_tsg(g, tsg, false);
2029 }
2030
2031 nvgpu_mutex_release(&g->dbg_sessions_lock);
2032}
2033
2034void gk20a_fifo_teardown_mask_intr(struct gk20a *g)
2035{
2036 u32 val;
2037
2038 val = gk20a_readl(g, fifo_intr_en_0_r());
2039 val &= ~(fifo_intr_en_0_sched_error_m() |
2040 fifo_intr_en_0_mmu_fault_m());
2041 gk20a_writel(g, fifo_intr_en_0_r(), val);
2042 gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f());
2043}
2044
2045void gk20a_fifo_teardown_unmask_intr(struct gk20a *g)
2046{
2047 u32 val;
2048
2049 val = gk20a_readl(g, fifo_intr_en_0_r());
2050 val |= fifo_intr_en_0_mmu_fault_f(1) | fifo_intr_en_0_sched_error_f(1);
2051 gk20a_writel(g, fifo_intr_en_0_r(), val);
2052
2053}
2054
2055void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
2056 u32 hw_id, unsigned int id_type, unsigned int rc_type,
2057 struct mmu_fault_info *mmfault)
2058{
2059 unsigned long engine_id, i;
2060 unsigned long _engine_ids = __engine_ids;
2061 unsigned long engine_ids = 0;
2062 u32 mmu_fault_engines = 0;
2063 u32 ref_type;
2064 u32 ref_id;
2065 u32 ref_id_is_tsg = false;
2066 bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false;
2067 bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false;
2068 u32 rlid;
2069
2070 nvgpu_log_info(g, "acquire engines_reset_mutex");
2071 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
2072
2073 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
2074 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2075 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
2076 }
2077
2078 if (id_is_known) {
2079 engine_ids = gk20a_fifo_engines_on_id(g, hw_id, id_is_tsg);
2080 ref_id = hw_id;
2081 ref_type = id_is_tsg ?
2082 fifo_engine_status_id_type_tsgid_v() :
2083 fifo_engine_status_id_type_chid_v();
2084 ref_id_is_tsg = id_is_tsg;
2085 /* atleast one engine will get passed during sched err*/
2086 engine_ids |= __engine_ids;
2087 for_each_set_bit(engine_id, &engine_ids, 32) {
2088 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id);
2089
2090 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2091 mmu_fault_engines |= BIT(mmu_id);
2092 }
2093 }
2094 } else {
2095 /* store faulted engines in advance */
2096 for_each_set_bit(engine_id, &_engine_ids, 32) {
2097 gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id,
2098 &ref_type);
2099 if (ref_type == fifo_engine_status_id_type_tsgid_v()) {
2100 ref_id_is_tsg = true;
2101 } else {
2102 ref_id_is_tsg = false;
2103 }
2104 /* Reset *all* engines that use the
2105 * same channel as faulty engine */
2106 for (i = 0; i < g->fifo.num_engines; i++) {
2107 u32 active_engine_id = g->fifo.active_engines_list[i];
2108 u32 type;
2109 u32 id;
2110
2111 gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type);
2112 if (ref_type == type && ref_id == id) {
2113 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, active_engine_id);
2114
2115 engine_ids |= BIT(active_engine_id);
2116 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2117 mmu_fault_engines |= BIT(mmu_id);
2118 }
2119 }
2120 }
2121 }
2122 }
2123
2124 if (mmu_fault_engines) {
2125 g->ops.fifo.teardown_mask_intr(g);
2126 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
2127 gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id,
2128 ref_id_is_tsg);
2129
2130 g->ops.fifo.teardown_unmask_intr(g);
2131 }
2132
2133 nvgpu_log_info(g, "release runlist_lock for all runlists");
2134 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2135 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
2136 }
2137
2138 nvgpu_log_info(g, "release engines_reset_mutex");
2139 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2140}
2141
2142void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
2143 u32 hw_id, bool id_is_tsg,
2144 bool id_is_known, bool verbose, int rc_type)
2145{
2146 unsigned int id_type;
2147
2148 if (verbose) {
2149 gk20a_debug_dump(g);
2150 }
2151
2152 if (g->ops.ltc.flush) {
2153 g->ops.ltc.flush(g);
2154 }
2155
2156 if (id_is_known) {
2157 id_type = id_is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
2158 } else {
2159 id_type = ID_TYPE_UNKNOWN;
2160 }
2161
2162 g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type,
2163 rc_type, NULL);
2164}
2165
2166/* force reset channel and tsg */
2167int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
2168 u32 err_code, bool verbose)
2169{
2170 struct channel_gk20a *ch_tsg = NULL;
2171 struct gk20a *g = ch->g;
2172
2173 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2174
2175 if (tsg != NULL) {
2176 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2177
2178 nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
2179 channel_gk20a, ch_entry) {
2180 if (gk20a_channel_get(ch_tsg)) {
2181 g->ops.fifo.set_error_notifier(ch_tsg,
2182 err_code);
2183 gk20a_channel_put(ch_tsg);
2184 }
2185 }
2186
2187 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2188 gk20a_fifo_recover_tsg(g, tsg, verbose,
2189 RC_TYPE_FORCE_RESET);
2190 } else {
2191 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2192 }
2193
2194 return 0;
2195}
2196
2197int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
2198{
2199 struct gk20a *g = ch->g;
2200
2201 if (gk20a_fifo_channel_status_is_next(g, ch->chid)) {
2202 nvgpu_log_info(g, "Channel %d to be removed from TSG %d has NEXT set!",
2203 ch->chid, ch->tsgid);
2204 return -EAGAIN;
2205 }
2206
2207 if (g->ops.fifo.tsg_verify_status_ctx_reload) {
2208 g->ops.fifo.tsg_verify_status_ctx_reload(ch);
2209 }
2210
2211 if (g->ops.fifo.tsg_verify_status_faulted) {
2212 g->ops.fifo.tsg_verify_status_faulted(ch);
2213 }
2214
2215 return 0;
2216}
2217
2218static bool gk20a_fifo_tsg_is_multi_channel(struct tsg_gk20a *tsg)
2219{
2220 bool ret = false;
2221
2222 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2223 if (nvgpu_list_first_entry(&tsg->ch_list, channel_gk20a,
2224 ch_entry) !=
2225 nvgpu_list_last_entry(&tsg->ch_list, channel_gk20a,
2226 ch_entry)) {
2227 ret = true;
2228 }
2229 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2230
2231 return ret;
2232}
2233
2234int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
2235{
2236 struct gk20a *g = ch->g;
2237 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2238 int err;
2239 bool tsg_timedout = false;
2240
2241 if (tsg == NULL) {
2242 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2243 return 0;
2244 }
2245
2246 /* If one channel in TSG times out, we disable all channels */
2247 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2248 tsg_timedout = gk20a_channel_check_timedout(ch);
2249 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2250
2251 /* Disable TSG and examine status before unbinding channel */
2252 g->ops.fifo.disable_tsg(tsg);
2253
2254 err = g->ops.fifo.preempt_tsg(g, tsg);
2255 if (err != 0) {
2256 goto fail_enable_tsg;
2257 }
2258
2259 /*
2260 * State validation is only necessary if there are multiple channels in
2261 * the TSG.
2262 */
2263 if (gk20a_fifo_tsg_is_multi_channel(tsg) &&
2264 g->ops.fifo.tsg_verify_channel_status && !tsg_timedout) {
2265 err = g->ops.fifo.tsg_verify_channel_status(ch);
2266 if (err) {
2267 goto fail_enable_tsg;
2268 }
2269 }
2270
2271 /* Channel should be seen as TSG channel while updating runlist */
2272 err = channel_gk20a_update_runlist(ch, false);
2273 if (err) {
2274 goto fail_enable_tsg;
2275 }
2276
2277 while (ch->mmu_debug_mode_refcnt > 0U) {
2278 err = nvgpu_tsg_set_mmu_debug_mode(ch, false);
2279 if (err != 0) {
2280 nvgpu_err(g, "disable mmu debug mode failed ch:%u",
2281 ch->chid);
2282 break;
2283 }
2284 }
2285
2286 /* Remove channel from TSG and re-enable rest of the channels */
2287 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2288 nvgpu_list_del(&ch->ch_entry);
2289 ch->tsgid = NVGPU_INVALID_TSG_ID;
2290
2291 /* another thread could have re-enabled the channel because it was
2292 * still on the list at that time, so make sure it's truly disabled
2293 */
2294 g->ops.fifo.disable_channel(ch);
2295 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2296
2297 /*
2298 * Don't re-enable all channels if TSG has timed out already
2299 *
2300 * Note that we can skip disabling and preempting TSG too in case of
2301 * time out, but we keep that to ensure TSG is kicked out
2302 */
2303 if (!tsg_timedout) {
2304 g->ops.fifo.enable_tsg(tsg);
2305 }
2306
2307 if (ch->g->ops.fifo.ch_abort_clean_up) {
2308 ch->g->ops.fifo.ch_abort_clean_up(ch);
2309 }
2310
2311 return 0;
2312
2313fail_enable_tsg:
2314 if (!tsg_timedout) {
2315 g->ops.fifo.enable_tsg(tsg);
2316 }
2317 return err;
2318}
2319
2320u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
2321 int *__id, bool *__is_tsg)
2322{
2323 u32 engine_id;
2324 int id = -1;
2325 bool is_tsg = false;
2326 u32 mailbox2;
2327 u32 active_engine_id = FIFO_INVAL_ENGINE_ID;
2328
2329 for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
2330 u32 status;
2331 u32 ctx_status;
2332 bool failing_engine;
2333
2334 active_engine_id = g->fifo.active_engines_list[engine_id];
2335 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
2336 ctx_status = fifo_engine_status_ctx_status_v(status);
2337
2338 /* we are interested in busy engines */
2339 failing_engine = fifo_engine_status_engine_v(status) ==
2340 fifo_engine_status_engine_busy_v();
2341
2342 /* ..that are doing context switch */
2343 failing_engine = failing_engine &&
2344 (ctx_status ==
2345 fifo_engine_status_ctx_status_ctxsw_switch_v()
2346 || ctx_status ==
2347 fifo_engine_status_ctx_status_ctxsw_save_v()
2348 || ctx_status ==
2349 fifo_engine_status_ctx_status_ctxsw_load_v());
2350
2351 if (!failing_engine) {
2352 active_engine_id = FIFO_INVAL_ENGINE_ID;
2353 continue;
2354 }
2355
2356 if (ctx_status ==
2357 fifo_engine_status_ctx_status_ctxsw_load_v()) {
2358 id = fifo_engine_status_next_id_v(status);
2359 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2360 fifo_engine_status_next_id_type_chid_v();
2361 } else if (ctx_status ==
2362 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
2363 mailbox2 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(2));
2364 if (mailbox2 & FECS_METHOD_WFI_RESTORE) {
2365 id = fifo_engine_status_next_id_v(status);
2366 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2367 fifo_engine_status_next_id_type_chid_v();
2368 } else {
2369 id = fifo_engine_status_id_v(status);
2370 is_tsg = fifo_engine_status_id_type_v(status) !=
2371 fifo_engine_status_id_type_chid_v();
2372 }
2373 } else {
2374 id = fifo_engine_status_id_v(status);
2375 is_tsg = fifo_engine_status_id_type_v(status) !=
2376 fifo_engine_status_id_type_chid_v();
2377 }
2378 break;
2379 }
2380
2381 *__id = id;
2382 *__is_tsg = is_tsg;
2383
2384 return active_engine_id;
2385}
2386
2387bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
2388 bool *verbose, u32 *ms)
2389{
2390 bool recover = false;
2391 bool progress = false;
2392 struct gk20a *g = ch->g;
2393
2394 if (gk20a_channel_get(ch)) {
2395 recover = gk20a_channel_update_and_check_timeout(ch,
2396 g->fifo_eng_timeout_us / 1000,
2397 &progress);
2398 *verbose = ch->timeout_debug_dump;
2399 *ms = ch->timeout_accumulated_ms;
2400 if (recover) {
2401 g->ops.fifo.set_error_notifier(ch,
2402 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2403 }
2404
2405 gk20a_channel_put(ch);
2406 }
2407 return recover;
2408}
2409
2410bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
2411 bool *verbose, u32 *ms)
2412{
2413 struct channel_gk20a *ch;
2414 bool recover = false;
2415 bool progress = false;
2416 struct gk20a *g = tsg->g;
2417
2418 *verbose = false;
2419 *ms = g->fifo_eng_timeout_us / 1000;
2420
2421 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2422
2423 /* check if there was some progress on any of the TSG channels.
2424 * fifo recovery is needed if at least one channel reached the
2425 * maximum timeout without progress (update in gpfifo pointers).
2426 */
2427 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
2428 if (gk20a_channel_get(ch)) {
2429 recover = gk20a_channel_update_and_check_timeout(ch,
2430 *ms, &progress);
2431 if (progress || recover) {
2432 break;
2433 }
2434 gk20a_channel_put(ch);
2435 }
2436 }
2437
2438 if (recover) {
2439 /*
2440 * if one channel is presumed dead (no progress for too long),
2441 * then fifo recovery is needed. we can't really figure out
2442 * which channel caused the problem, so set timeout error
2443 * notifier for all channels.
2444 */
2445 nvgpu_log_info(g, "timeout on tsg=%d ch=%d",
2446 tsg->tsgid, ch->chid);
2447 *ms = ch->timeout_accumulated_ms;
2448 gk20a_channel_put(ch);
2449 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2450 channel_gk20a, ch_entry) {
2451 if (gk20a_channel_get(ch)) {
2452 ch->g->ops.fifo.set_error_notifier(ch,
2453 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2454 if (ch->timeout_debug_dump) {
2455 *verbose = true;
2456 }
2457 gk20a_channel_put(ch);
2458 }
2459 }
2460 } else if (progress) {
2461 /*
2462 * if at least one channel in the TSG made some progress, reset
2463 * accumulated timeout for all channels in the TSG. In
2464 * particular, this resets timeout for channels that already
2465 * completed their work
2466 */
2467 nvgpu_log_info(g, "progress on tsg=%d ch=%d",
2468 tsg->tsgid, ch->chid);
2469 gk20a_channel_put(ch);
2470 *ms = g->fifo_eng_timeout_us / 1000;
2471 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2472 channel_gk20a, ch_entry) {
2473 if (gk20a_channel_get(ch)) {
2474 ch->timeout_accumulated_ms = *ms;
2475 gk20a_channel_put(ch);
2476 }
2477 }
2478 }
2479
2480 /* if we could not detect progress on any of the channel, but none
2481 * of them has reached the timeout, there is nothing more to do:
2482 * timeout_accumulated_ms has been updated for all of them.
2483 */
2484 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2485 return recover;
2486}
2487
2488bool gk20a_fifo_handle_sched_error(struct gk20a *g)
2489{
2490 u32 sched_error;
2491 u32 engine_id;
2492 int id = -1;
2493 bool is_tsg = false;
2494 bool ret = false;
2495
2496 /* read the scheduler error register */
2497 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
2498
2499 engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
2500 /*
2501 * Could not find the engine
2502 * Possible Causes:
2503 * a)
2504 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
2505 * fifo_engine_status register. Also while the engine is held in reset
2506 * h/w passes busy/idle straight through. fifo_engine_status registers
2507 * are correct in that there is no context switch outstanding
2508 * as the CTXSW is aborted when reset is asserted.
2509 * This is just a side effect of how gv100 and earlier versions of
2510 * ctxsw_timeout behave.
2511 * With gv11b and later, h/w snaps the context at the point of error
2512 * so that s/w can see the tsg_id which caused the HW timeout.
2513 * b)
2514 * If engines are not busy and ctxsw state is valid then intr occurred
2515 * in the past and if the ctxsw state has moved on to VALID from LOAD
2516 * or SAVE, it means that whatever timed out eventually finished
2517 * anyways. The problem with this is that s/w cannot conclude which
2518 * context caused the problem as maybe more switches occurred before
2519 * intr is handled.
2520 */
2521 if (engine_id == FIFO_INVAL_ENGINE_ID) {
2522 nvgpu_info(g, "fifo sched error: 0x%08x, failed to find engine "
2523 "that is busy doing ctxsw. "
2524 "May be ctxsw already happened", sched_error);
2525 ret = false;
2526 goto err;
2527 }
2528
2529 /* could not find the engine - should never happen */
2530 if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
2531 nvgpu_err(g, "fifo sched error : 0x%08x, failed to find engine",
2532 sched_error);
2533 ret = false;
2534 goto err;
2535 }
2536
2537 if (fifo_intr_sched_error_code_f(sched_error) ==
2538 fifo_intr_sched_error_code_ctxsw_timeout_v()) {
2539 struct fifo_gk20a *f = &g->fifo;
2540 u32 ms = 0;
2541 bool verbose = false;
2542
2543 if (is_tsg) {
2544 ret = g->ops.fifo.check_tsg_ctxsw_timeout(
2545 &f->tsg[id], &verbose, &ms);
2546 } else {
2547 ret = g->ops.fifo.check_ch_ctxsw_timeout(
2548 &f->channel[id], &verbose, &ms);
2549 }
2550
2551 if (ret) {
2552 nvgpu_err(g,
2553 "fifo sched ctxsw timeout error: "
2554 "engine=%u, %s=%d, ms=%u",
2555 engine_id, is_tsg ? "tsg" : "ch", id, ms);
2556 /*
2557 * Cancel all channels' timeout since SCHED error might
2558 * trigger multiple watchdogs at a time
2559 */
2560 gk20a_channel_timeout_restart_all_channels(g);
2561 gk20a_fifo_recover(g, BIT(engine_id), id,
2562 is_tsg, true, verbose,
2563 RC_TYPE_CTXSW_TIMEOUT);
2564 } else {
2565 nvgpu_log_info(g,
2566 "fifo is waiting for ctx switch for %d ms, "
2567 "%s=%d", ms, is_tsg ? "tsg" : "ch", id);
2568 }
2569 } else {
2570 nvgpu_err(g,
2571 "fifo sched error : 0x%08x, engine=%u, %s=%d",
2572 sched_error, engine_id, is_tsg ? "tsg" : "ch", id);
2573 }
2574
2575err:
2576 return ret;
2577}
2578
2579static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
2580{
2581 bool print_channel_reset_log = false;
2582 u32 handled = 0;
2583
2584 nvgpu_log_fn(g, "fifo_intr=0x%08x", fifo_intr);
2585
2586 if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
2587 /* pio mode is unused. this shouldn't happen, ever. */
2588 /* should we clear it or just leave it pending? */
2589 nvgpu_err(g, "fifo pio error!");
2590 BUG_ON(1);
2591 }
2592
2593 if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
2594 u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
2595 nvgpu_err(g, "fifo bind error: 0x%08x", bind_error);
2596 print_channel_reset_log = true;
2597 handled |= fifo_intr_0_bind_error_pending_f();
2598 }
2599
2600 if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
2601 print_channel_reset_log = g->ops.fifo.handle_sched_error(g);
2602 handled |= fifo_intr_0_sched_error_pending_f();
2603 }
2604
2605 if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
2606 gk20a_fifo_handle_chsw_fault(g);
2607 handled |= fifo_intr_0_chsw_error_pending_f();
2608 }
2609
2610 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
2611 if (gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false)) {
2612 print_channel_reset_log = true;
2613 }
2614 handled |= fifo_intr_0_mmu_fault_pending_f();
2615 }
2616
2617 if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
2618 gk20a_fifo_handle_dropped_mmu_fault(g);
2619 handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
2620 }
2621
2622 print_channel_reset_log = !g->fifo.deferred_reset_pending
2623 && print_channel_reset_log;
2624
2625 if (print_channel_reset_log) {
2626 unsigned int engine_id;
2627 nvgpu_err(g,
2628 "channel reset initiated from %s; intr=0x%08x",
2629 __func__, fifo_intr);
2630 for (engine_id = 0;
2631 engine_id < g->fifo.num_engines;
2632 engine_id++) {
2633 u32 active_engine_id = g->fifo.active_engines_list[engine_id];
2634 u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
2635 nvgpu_log_fn(g, "enum:%d -> engine_id:%d", engine_enum,
2636 active_engine_id);
2637 fifo_pbdma_exception_status(g,
2638 &g->fifo.engine_info[active_engine_id]);
2639 fifo_engine_exception_status(g,
2640 &g->fifo.engine_info[active_engine_id]);
2641 }
2642 }
2643
2644 return handled;
2645}
2646
2647static inline void gk20a_fifo_reset_pbdma_header(struct gk20a *g, int pbdma_id)
2648{
2649 gk20a_writel(g, pbdma_pb_header_r(pbdma_id),
2650 pbdma_pb_header_first_true_f() |
2651 pbdma_pb_header_type_non_inc_f());
2652}
2653
2654void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
2655 int pbdma_method_index)
2656{
2657 u32 pbdma_method_stride;
2658 u32 pbdma_method_reg;
2659
2660 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2661 pbdma_method0_r(pbdma_id);
2662
2663 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2664 (pbdma_method_index * pbdma_method_stride);
2665
2666 gk20a_writel(g, pbdma_method_reg,
2667 pbdma_method0_valid_true_f() |
2668 pbdma_method0_first_true_f() |
2669 pbdma_method0_addr_f(
2670 pbdma_udma_nop_r() >> 2));
2671}
2672
2673static bool gk20a_fifo_is_sw_method_subch(struct gk20a *g, int pbdma_id,
2674 int pbdma_method_index)
2675{
2676 u32 pbdma_method_stride;
2677 u32 pbdma_method_reg, pbdma_method_subch;
2678
2679 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2680 pbdma_method0_r(pbdma_id);
2681
2682 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2683 (pbdma_method_index * pbdma_method_stride);
2684
2685 pbdma_method_subch = pbdma_method0_subch_v(
2686 gk20a_readl(g, pbdma_method_reg));
2687
2688 if (pbdma_method_subch == 5 ||
2689 pbdma_method_subch == 6 ||
2690 pbdma_method_subch == 7) {
2691 return true;
2692 }
2693
2694 return false;
2695}
2696
2697unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
2698 u32 pbdma_intr_0, u32 *handled, u32 *error_notifier)
2699{
2700 struct fifo_gk20a *f = &g->fifo;
2701 unsigned int rc_type = RC_TYPE_NO_RC;
2702 int i;
2703 unsigned long pbdma_intr_err;
2704 u32 bit;
2705
2706 if ((f->intr.pbdma.device_fatal_0 |
2707 f->intr.pbdma.channel_fatal_0 |
2708 f->intr.pbdma.restartable_0) & pbdma_intr_0) {
2709
2710 pbdma_intr_err = (unsigned long)pbdma_intr_0;
2711 for_each_set_bit(bit, &pbdma_intr_err, 32) {
2712 nvgpu_err(g, "PBDMA intr %s Error",
2713 pbdma_intr_fault_type_desc[bit]);
2714 }
2715
2716 nvgpu_err(g,
2717 "pbdma_intr_0(%d):0x%08x PBH: %08x "
2718 "SHADOW: %08x gp shadow0: %08x gp shadow1: %08x"
2719 "M0: %08x %08x %08x %08x ",
2720 pbdma_id, pbdma_intr_0,
2721 gk20a_readl(g, pbdma_pb_header_r(pbdma_id)),
2722 gk20a_readl(g, pbdma_hdr_shadow_r(pbdma_id)),
2723 gk20a_readl(g, pbdma_gp_shadow_0_r(pbdma_id)),
2724 gk20a_readl(g, pbdma_gp_shadow_1_r(pbdma_id)),
2725 gk20a_readl(g, pbdma_method0_r(pbdma_id)),
2726 gk20a_readl(g, pbdma_method1_r(pbdma_id)),
2727 gk20a_readl(g, pbdma_method2_r(pbdma_id)),
2728 gk20a_readl(g, pbdma_method3_r(pbdma_id))
2729 );
2730
2731 rc_type = RC_TYPE_PBDMA_FAULT;
2732 *handled |= ((f->intr.pbdma.device_fatal_0 |
2733 f->intr.pbdma.channel_fatal_0 |
2734 f->intr.pbdma.restartable_0) &
2735 pbdma_intr_0);
2736 }
2737
2738 if (pbdma_intr_0 & pbdma_intr_0_acquire_pending_f()) {
2739 u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id));
2740
2741 val &= ~pbdma_acquire_timeout_en_enable_f();
2742 gk20a_writel(g, pbdma_acquire_r(pbdma_id), val);
2743 if (nvgpu_is_timeouts_enabled(g)) {
2744 rc_type = RC_TYPE_PBDMA_FAULT;
2745 nvgpu_err(g,
2746 "semaphore acquire timeout!");
2747 *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
2748 }
2749 *handled |= pbdma_intr_0_acquire_pending_f();
2750 }
2751
2752 if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) {
2753 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2754 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2755 rc_type = RC_TYPE_PBDMA_FAULT;
2756 }
2757
2758 if (pbdma_intr_0 & pbdma_intr_0_method_pending_f()) {
2759 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2760 rc_type = RC_TYPE_PBDMA_FAULT;
2761 }
2762
2763 if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
2764 *error_notifier =
2765 NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
2766 rc_type = RC_TYPE_PBDMA_FAULT;
2767 }
2768
2769 if (pbdma_intr_0 & pbdma_intr_0_device_pending_f()) {
2770 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2771
2772 for (i = 0; i < 4; i++) {
2773 if (gk20a_fifo_is_sw_method_subch(g,
2774 pbdma_id, i)) {
2775 gk20a_fifo_reset_pbdma_method(g,
2776 pbdma_id, i);
2777 }
2778 }
2779 rc_type = RC_TYPE_PBDMA_FAULT;
2780 }
2781
2782 return rc_type;
2783}
2784
2785unsigned int gk20a_fifo_handle_pbdma_intr_1(struct gk20a *g,
2786 u32 pbdma_id, u32 pbdma_intr_1,
2787 u32 *handled, u32 *error_notifier)
2788{
2789 unsigned int rc_type = RC_TYPE_PBDMA_FAULT;
2790
2791 /*
2792 * all of the interrupts in _intr_1 are "host copy engine"
2793 * related, which is not supported. For now just make them
2794 * channel fatal.
2795 */
2796 nvgpu_err(g, "hce err: pbdma_intr_1(%d):0x%08x",
2797 pbdma_id, pbdma_intr_1);
2798 *handled |= pbdma_intr_1;
2799
2800 return rc_type;
2801}
2802
2803static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2804 struct fifo_gk20a *f, u32 pbdma_id,
2805 u32 error_notifier, u32 status)
2806{
2807 u32 id;
2808
2809 nvgpu_log(g, gpu_dbg_info, "pbdma id %d error notifier %d",
2810 pbdma_id, error_notifier);
2811 /* Remove channel from runlist */
2812 id = fifo_pbdma_status_id_v(status);
2813 if (fifo_pbdma_status_id_type_v(status)
2814 == fifo_pbdma_status_id_type_chid_v()) {
2815 struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
2816
2817 if (ch != NULL) {
2818 g->ops.fifo.set_error_notifier(ch, error_notifier);
2819 gk20a_fifo_recover_ch(g, ch, true, RC_TYPE_PBDMA_FAULT);
2820 gk20a_channel_put(ch);
2821 }
2822 } else if (fifo_pbdma_status_id_type_v(status)
2823 == fifo_pbdma_status_id_type_tsgid_v()) {
2824 struct tsg_gk20a *tsg = &f->tsg[id];
2825 struct channel_gk20a *ch = NULL;
2826
2827 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2828 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2829 channel_gk20a, ch_entry) {
2830 if (gk20a_channel_get(ch)) {
2831 g->ops.fifo.set_error_notifier(ch,
2832 error_notifier);
2833 gk20a_channel_put(ch);
2834 }
2835 }
2836 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2837 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PBDMA_FAULT);
2838 }
2839}
2840
2841u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2842 u32 pbdma_id, unsigned int rc)
2843{
2844 u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
2845 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
2846
2847 u32 handled = 0;
2848 u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
2849 unsigned int rc_type = RC_TYPE_NO_RC;
2850 u32 pbdma_status_info = 0;
2851
2852 if (pbdma_intr_0) {
2853 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2854 "pbdma id %d intr_0 0x%08x pending",
2855 pbdma_id, pbdma_intr_0);
2856
2857 if (g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, pbdma_intr_0,
2858 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2859 rc_type = RC_TYPE_PBDMA_FAULT;
2860
2861 pbdma_status_info = gk20a_readl(g,
2862 fifo_pbdma_status_r(pbdma_id));
2863 }
2864 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
2865 }
2866
2867 if (pbdma_intr_1) {
2868 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2869 "pbdma id %d intr_1 0x%08x pending",
2870 pbdma_id, pbdma_intr_1);
2871
2872 if (g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, pbdma_intr_1,
2873 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2874 rc_type = RC_TYPE_PBDMA_FAULT;
2875
2876 pbdma_status_info = gk20a_readl(g,
2877 fifo_pbdma_status_r(pbdma_id));
2878 }
2879 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
2880 }
2881
2882 if (rc == RC_YES && rc_type == RC_TYPE_PBDMA_FAULT) {
2883 gk20a_fifo_pbdma_fault_rc(g, f, pbdma_id, error_notifier,
2884 pbdma_status_info);
2885 }
2886
2887 return handled;
2888}
2889
2890static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
2891{
2892 struct fifo_gk20a *f = &g->fifo;
2893 u32 clear_intr = 0, i;
2894 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
2895 u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());
2896
2897 for (i = 0; i < host_num_pbdma; i++) {
2898 if (fifo_intr_pbdma_id_status_v(pbdma_pending, i)) {
2899 nvgpu_log(g, gpu_dbg_intr, "pbdma id %d intr pending", i);
2900 clear_intr |=
2901 gk20a_fifo_handle_pbdma_intr(g, f, i, RC_YES);
2902 }
2903 }
2904 return fifo_intr_0_pbdma_intr_pending_f();
2905}
2906
2907void gk20a_fifo_isr(struct gk20a *g)
2908{
2909 u32 error_intr_mask;
2910 u32 clear_intr = 0;
2911 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2912
2913 error_intr_mask = g->ops.fifo.intr_0_error_mask(g);
2914
2915 if (g->fifo.sw_ready) {
2916 /* note we're not actually in an "isr", but rather
2917 * in a threaded interrupt context... */
2918 nvgpu_mutex_acquire(&g->fifo.intr.isr.mutex);
2919
2920 nvgpu_log(g, gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
2921
2922 /* handle runlist update */
2923 if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
2924 gk20a_fifo_handle_runlist_event(g);
2925 clear_intr |= fifo_intr_0_runlist_event_pending_f();
2926 }
2927 if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f()) {
2928 clear_intr |= fifo_pbdma_isr(g, fifo_intr);
2929 }
2930
2931 if (g->ops.fifo.handle_ctxsw_timeout) {
2932 g->ops.fifo.handle_ctxsw_timeout(g, fifo_intr);
2933 }
2934
2935 if (unlikely((fifo_intr & error_intr_mask) != 0U)) {
2936 clear_intr |= fifo_error_isr(g, fifo_intr);
2937 }
2938
2939 nvgpu_mutex_release(&g->fifo.intr.isr.mutex);
2940 }
2941 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2942
2943 return;
2944}
2945
2946u32 gk20a_fifo_nonstall_isr(struct gk20a *g)
2947{
2948 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2949 u32 clear_intr = 0;
2950
2951 nvgpu_log(g, gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);
2952
2953 if (fifo_intr & fifo_intr_0_channel_intr_pending_f()) {
2954 clear_intr = fifo_intr_0_channel_intr_pending_f();
2955 }
2956
2957 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2958
2959 return GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE;
2960}
2961
2962void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
2963{
2964 if (is_tsg) {
2965 gk20a_writel(g, fifo_preempt_r(),
2966 fifo_preempt_id_f(id) |
2967 fifo_preempt_type_tsg_f());
2968 } else {
2969 gk20a_writel(g, fifo_preempt_r(),
2970 fifo_preempt_chid_f(id) |
2971 fifo_preempt_type_channel_f());
2972 }
2973}
2974
2975static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g)
2976{
2977 /* Use fifo_eng_timeout converted to ms for preempt
2978 * polling. gr_idle_timeout i.e 3000 ms is and not appropriate
2979 * for polling preempt done as context switch timeout gets
2980 * triggered every 100 ms and context switch recovery
2981 * happens every 3000 ms */
2982
2983 return g->fifo_eng_timeout_us / 1000;
2984}
2985
2986int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
2987 unsigned int id_type, bool preempt_retries_left)
2988{
2989 struct nvgpu_timeout timeout;
2990 u32 delay = GR_IDLE_CHECK_DEFAULT;
2991 int ret = -EBUSY;
2992
2993 nvgpu_timeout_init(g, &timeout, gk20a_fifo_get_preempt_timeout(g),
2994 NVGPU_TIMER_CPU_TIMER);
2995 do {
2996 if (!(gk20a_readl(g, fifo_preempt_r()) &
2997 fifo_preempt_pending_true_f())) {
2998 ret = 0;
2999 break;
3000 }
3001
3002 nvgpu_usleep_range(delay, delay * 2);
3003 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3004 } while (!nvgpu_timeout_expired(&timeout));
3005
3006 if (ret) {
3007 nvgpu_err(g, "preempt timeout: id: %u id_type: %d ",
3008 id, id_type);
3009 }
3010 return ret;
3011}
3012
3013void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3014{
3015 struct channel_gk20a *ch = NULL;
3016
3017 nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
3018
3019 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3020 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3021 channel_gk20a, ch_entry) {
3022 if (!gk20a_channel_get(ch)) {
3023 continue;
3024 }
3025 g->ops.fifo.set_error_notifier(ch,
3026 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3027 gk20a_channel_put(ch);
3028 }
3029 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3030 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
3031}
3032
3033void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
3034{
3035 nvgpu_err(g, "preempt channel %d timeout", ch->chid);
3036
3037 g->ops.fifo.set_error_notifier(ch,
3038 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3039 gk20a_fifo_recover_ch(g, ch, true,
3040 RC_TYPE_PREEMPT_TIMEOUT);
3041}
3042
3043int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg,
3044 bool preempt_retries_left)
3045{
3046 int ret;
3047 unsigned int id_type;
3048
3049 nvgpu_log_fn(g, "id: %d is_tsg: %d", id, is_tsg);
3050
3051 /* issue preempt */
3052 gk20a_fifo_issue_preempt(g, id, is_tsg);
3053
3054 id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
3055
3056 /*
3057 * Poll for preempt done. if stalling interrupts are pending
3058 * while preempt is in progress we poll for stalling interrupts
3059 * to finish based on return value from this function and
3060 * retry preempt again.
3061 * If HW is hung, on the last retry instance we try to identify
3062 * the engines hung and set the runlist reset_eng_bitmask
3063 * and mark preemption completion.
3064 */
3065 ret = g->ops.fifo.is_preempt_pending(g, id, id_type,
3066 preempt_retries_left);
3067
3068 return ret;
3069}
3070
3071int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
3072{
3073 struct fifo_gk20a *f = &g->fifo;
3074 u32 ret = 0;
3075 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3076 u32 mutex_ret = 0;
3077 u32 i;
3078
3079 nvgpu_log_fn(g, "chid: %d", ch->chid);
3080
3081 /* we have no idea which runlist we are using. lock all */
3082 for (i = 0; i < g->fifo.max_runlists; i++) {
3083 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3084 }
3085
3086 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3087
3088 ret = __locked_fifo_preempt(g, ch->chid, false, false);
3089
3090 if (!mutex_ret) {
3091 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3092 }
3093
3094 for (i = 0; i < g->fifo.max_runlists; i++) {
3095 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3096 }
3097
3098 if (ret) {
3099 if (nvgpu_platform_is_silicon(g)) {
3100 nvgpu_err(g, "preempt timed out for chid: %u, "
3101 "ctxsw timeout will trigger recovery if needed",
3102 ch->chid);
3103 } else {
3104 gk20a_fifo_preempt_timeout_rc(g, ch);
3105 }
3106 }
3107
3108 return ret;
3109}
3110
3111int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3112{
3113 struct fifo_gk20a *f = &g->fifo;
3114 u32 ret = 0;
3115 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3116 u32 mutex_ret = 0;
3117 u32 i;
3118
3119 nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
3120
3121 /* we have no idea which runlist we are using. lock all */
3122 for (i = 0; i < g->fifo.max_runlists; i++) {
3123 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3124 }
3125
3126 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3127
3128 ret = __locked_fifo_preempt(g, tsg->tsgid, true, false);
3129
3130 if (!mutex_ret) {
3131 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3132 }
3133
3134 for (i = 0; i < g->fifo.max_runlists; i++) {
3135 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3136 }
3137
3138 if (ret) {
3139 if (nvgpu_platform_is_silicon(g)) {
3140 nvgpu_err(g, "preempt timed out for tsgid: %u, "
3141 "ctxsw timeout will trigger recovery if needed",
3142 tsg->tsgid);
3143 } else {
3144 gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
3145 }
3146 }
3147
3148 return ret;
3149}
3150
3151int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch)
3152{
3153 int err;
3154 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
3155
3156 if (tsg != NULL) {
3157 err = g->ops.fifo.preempt_tsg(ch->g, tsg);
3158 } else {
3159 err = g->ops.fifo.preempt_channel(ch->g, ch);
3160 }
3161
3162 return err;
3163}
3164
3165static void gk20a_fifo_sched_disable_rw(struct gk20a *g, u32 runlists_mask,
3166 u32 runlist_state)
3167{
3168 u32 reg_val;
3169
3170 reg_val = gk20a_readl(g, fifo_sched_disable_r());
3171
3172 if (runlist_state == RUNLIST_DISABLED) {
3173 reg_val |= runlists_mask;
3174 } else {
3175 reg_val &= (~runlists_mask);
3176 }
3177
3178 gk20a_writel(g, fifo_sched_disable_r(), reg_val);
3179
3180}
3181
3182void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
3183 u32 runlist_state)
3184{
3185 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3186 u32 mutex_ret;
3187
3188 nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
3189 runlists_mask, runlist_state);
3190
3191 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3192
3193 gk20a_fifo_sched_disable_rw(g, runlists_mask, runlist_state);
3194
3195 if (!mutex_ret) {
3196 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3197 }
3198}
3199
3200void gk20a_fifo_enable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3201{
3202 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3203 tsg->runlist_id), RUNLIST_ENABLED);
3204
3205}
3206
3207void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3208{
3209 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3210 tsg->runlist_id), RUNLIST_DISABLED);
3211}
3212
3213int gk20a_fifo_enable_engine_activity(struct gk20a *g,
3214 struct fifo_engine_info_gk20a *eng_info)
3215{
3216 nvgpu_log(g, gpu_dbg_info, "start");
3217
3218 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3219 eng_info->runlist_id), RUNLIST_ENABLED);
3220 return 0;
3221}
3222
3223int gk20a_fifo_enable_all_engine_activity(struct gk20a *g)
3224{
3225 unsigned int i;
3226 int err = 0, ret = 0;
3227
3228 for (i = 0; i < g->fifo.num_engines; i++) {
3229 u32 active_engine_id = g->fifo.active_engines_list[i];
3230 err = gk20a_fifo_enable_engine_activity(g,
3231 &g->fifo.engine_info[active_engine_id]);
3232 if (err) {
3233 nvgpu_err(g,
3234 "failed to enable engine %d activity", active_engine_id);
3235 ret = err;
3236 }
3237 }
3238
3239 return ret;
3240}
3241
3242int gk20a_fifo_disable_engine_activity(struct gk20a *g,
3243 struct fifo_engine_info_gk20a *eng_info,
3244 bool wait_for_idle)
3245{
3246 u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
3247 u32 pbdma_chid = FIFO_INVAL_CHANNEL_ID;
3248 u32 engine_chid = FIFO_INVAL_CHANNEL_ID;
3249 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3250 int mutex_ret;
3251 struct channel_gk20a *ch = NULL;
3252 int err = 0;
3253
3254 nvgpu_log_fn(g, " ");
3255
3256 gr_stat =
3257 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3258 if (fifo_engine_status_engine_v(gr_stat) ==
3259 fifo_engine_status_engine_busy_v() && !wait_for_idle) {
3260 return -EBUSY;
3261 }
3262
3263 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3264
3265 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3266 eng_info->runlist_id), RUNLIST_DISABLED);
3267
3268 /* chid from pbdma status */
3269 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
3270 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
3271 if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
3272 chan_stat == fifo_pbdma_status_chan_status_chsw_save_v()) {
3273 pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
3274 } else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
3275 chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v()) {
3276 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
3277 }
3278
3279 if (pbdma_chid != FIFO_INVAL_CHANNEL_ID) {
3280 ch = gk20a_channel_from_id(g, pbdma_chid);
3281 if (ch != NULL) {
3282 err = g->ops.fifo.preempt_channel(g, ch);
3283 gk20a_channel_put(ch);
3284 }
3285 if (err != 0) {
3286 goto clean_up;
3287 }
3288 }
3289
3290 /* chid from engine status */
3291 eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3292 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
3293 if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
3294 ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v()) {
3295 engine_chid = fifo_engine_status_id_v(eng_stat);
3296 } else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
3297 ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3298 engine_chid = fifo_engine_status_next_id_v(eng_stat);
3299 }
3300
3301 if (engine_chid != FIFO_INVAL_ENGINE_ID && engine_chid != pbdma_chid) {
3302 ch = gk20a_channel_from_id(g, engine_chid);
3303 if (ch != NULL) {
3304 err = g->ops.fifo.preempt_channel(g, ch);
3305 gk20a_channel_put(ch);
3306 }
3307 if (err != 0) {
3308 goto clean_up;
3309 }
3310 }
3311
3312clean_up:
3313 if (!mutex_ret) {
3314 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3315 }
3316
3317 if (err) {
3318 nvgpu_log_fn(g, "failed");
3319 if (gk20a_fifo_enable_engine_activity(g, eng_info)) {
3320 nvgpu_err(g,
3321 "failed to enable gr engine activity");
3322 }
3323 } else {
3324 nvgpu_log_fn(g, "done");
3325 }
3326 return err;
3327}
3328
3329int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
3330 bool wait_for_idle)
3331{
3332 unsigned int i;
3333 int err = 0, ret = 0;
3334 u32 active_engine_id;
3335
3336 for (i = 0; i < g->fifo.num_engines; i++) {
3337 active_engine_id = g->fifo.active_engines_list[i];
3338 err = gk20a_fifo_disable_engine_activity(g,
3339 &g->fifo.engine_info[active_engine_id],
3340 wait_for_idle);
3341 if (err) {
3342 nvgpu_err(g, "failed to disable engine %d activity",
3343 active_engine_id);
3344 ret = err;
3345 break;
3346 }
3347 }
3348
3349 if (err) {
3350 while (i-- != 0) {
3351 active_engine_id = g->fifo.active_engines_list[i];
3352 err = gk20a_fifo_enable_engine_activity(g,
3353 &g->fifo.engine_info[active_engine_id]);
3354 if (err) {
3355 nvgpu_err(g,
3356 "failed to re-enable engine %d activity",
3357 active_engine_id);
3358 }
3359 }
3360 }
3361
3362 return ret;
3363}
3364
3365static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
3366{
3367 struct fifo_gk20a *f = &g->fifo;
3368 u32 engines = 0;
3369 unsigned int i;
3370
3371 for (i = 0; i < f->num_engines; i++) {
3372 u32 active_engine_id = g->fifo.active_engines_list[i];
3373 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
3374 bool engine_busy = fifo_engine_status_engine_v(status) ==
3375 fifo_engine_status_engine_busy_v();
3376
3377 if (engine_busy &&
3378 (f->engine_info[active_engine_id].runlist_id == runlist_id)) {
3379 engines |= BIT(active_engine_id);
3380 }
3381 }
3382
3383 if (engines) {
3384 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true,
3385 RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
3386 }
3387}
3388
3389int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
3390{
3391 struct nvgpu_timeout timeout;
3392 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3393 int ret = -ETIMEDOUT;
3394
3395 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3396 NVGPU_TIMER_CPU_TIMER);
3397
3398 do {
3399 if ((gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
3400 fifo_eng_runlist_pending_true_f()) == 0) {
3401 ret = 0;
3402 break;
3403 }
3404
3405 nvgpu_usleep_range(delay, delay * 2);
3406 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3407 } while (!nvgpu_timeout_expired(&timeout));
3408
3409 if (ret) {
3410 nvgpu_err(g, "runlist wait timeout: runlist id: %u",
3411 runlist_id);
3412 }
3413
3414 return ret;
3415}
3416
3417void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist)
3418{
3419
3420 u32 runlist_entry_0 = ram_rl_entry_id_f(tsg->tsgid) |
3421 ram_rl_entry_type_tsg_f() |
3422 ram_rl_entry_tsg_length_f(tsg->num_active_channels);
3423
3424 if (tsg->timeslice_timeout) {
3425 runlist_entry_0 |=
3426 ram_rl_entry_timeslice_scale_f(tsg->timeslice_scale) |
3427 ram_rl_entry_timeslice_timeout_f(tsg->timeslice_timeout);
3428 } else {
3429 runlist_entry_0 |=
3430 ram_rl_entry_timeslice_scale_f(
3431 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) |
3432 ram_rl_entry_timeslice_timeout_f(
3433 NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT);
3434 }
3435
3436 runlist[0] = runlist_entry_0;
3437 runlist[1] = 0;
3438
3439}
3440
3441u32 gk20a_fifo_default_timeslice_us(struct gk20a *g)
3442{
3443 return (((u64)(NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT <<
3444 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) *
3445 (u64)g->ptimer_src_freq) /
3446 (u64)PTIMER_REF_FREQ_HZ);
3447}
3448
3449void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist)
3450{
3451 runlist[0] = ram_rl_entry_chid_f(ch->chid);
3452 runlist[1] = 0;
3453}
3454
3455/* recursively construct a runlist with interleaved bare channels and TSGs */
3456u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
3457 struct fifo_runlist_info_gk20a *runlist,
3458 u32 cur_level,
3459 u32 *runlist_entry,
3460 bool interleave_enabled,
3461 bool prev_empty,
3462 u32 *entries_left)
3463{
3464 bool last_level = cur_level == NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
3465 struct channel_gk20a *ch;
3466 bool skip_next = false;
3467 u32 tsgid, count = 0;
3468 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3469 struct gk20a *g = f->g;
3470
3471 nvgpu_log_fn(g, " ");
3472
3473 /* for each TSG, T, on this level, insert all higher-level channels
3474 and TSGs before inserting T. */
3475 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
3476 struct tsg_gk20a *tsg = &f->tsg[tsgid];
3477
3478 if (tsg->interleave_level != cur_level) {
3479 continue;
3480 }
3481
3482 if (!last_level && !skip_next) {
3483 runlist_entry = gk20a_runlist_construct_locked(f,
3484 runlist,
3485 cur_level + 1,
3486 runlist_entry,
3487 interleave_enabled,
3488 false,
3489 entries_left);
3490 if (!interleave_enabled) {
3491 skip_next = true;
3492 }
3493 }
3494
3495 if (*entries_left == 0U) {
3496 return NULL;
3497 }
3498
3499 /* add TSG entry */
3500 nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
3501 f->g->ops.fifo.get_tsg_runlist_entry(tsg, runlist_entry);
3502 nvgpu_log_info(g, "tsg runlist count %d runlist [0] %x [1] %x\n",
3503 count, runlist_entry[0], runlist_entry[1]);
3504 runlist_entry += runlist_entry_words;
3505 count++;
3506 (*entries_left)--;
3507
3508 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3509 /* add runnable channels bound to this TSG */
3510 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3511 channel_gk20a, ch_entry) {
3512 if (!test_bit((int)ch->chid,
3513 runlist->active_channels)) {
3514 continue;
3515 }
3516
3517 if (*entries_left == 0U) {
3518 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3519 return NULL;
3520 }
3521
3522 nvgpu_log_info(g, "add channel %d to runlist",
3523 ch->chid);
3524 f->g->ops.fifo.get_ch_runlist_entry(ch, runlist_entry);
3525 nvgpu_log_info(g,
3526 "run list count %d runlist [0] %x [1] %x\n",
3527 count, runlist_entry[0], runlist_entry[1]);
3528 count++;
3529 runlist_entry += runlist_entry_words;
3530 (*entries_left)--;
3531 }
3532 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3533 }
3534
3535 /* append entries from higher level if this level is empty */
3536 if (!count && !last_level) {
3537 runlist_entry = gk20a_runlist_construct_locked(f,
3538 runlist,
3539 cur_level + 1,
3540 runlist_entry,
3541 interleave_enabled,
3542 true,
3543 entries_left);
3544 }
3545
3546 /*
3547 * if previous and this level have entries, append
3548 * entries from higher level.
3549 *
3550 * ex. dropping from MEDIUM to LOW, need to insert HIGH
3551 */
3552 if (interleave_enabled && count && !prev_empty && !last_level) {
3553 runlist_entry = gk20a_runlist_construct_locked(f,
3554 runlist,
3555 cur_level + 1,
3556 runlist_entry,
3557 interleave_enabled,
3558 false,
3559 entries_left);
3560 }
3561 return runlist_entry;
3562}
3563
3564int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
3565 u32 id,
3566 u32 runlist_id,
3567 u32 new_level)
3568{
3569 nvgpu_log_fn(g, " ");
3570
3571 g->fifo.tsg[id].interleave_level = new_level;
3572
3573 return 0;
3574}
3575
3576int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
3577{
3578 struct gk20a *g = tsg->g;
3579
3580 if (timeslice < g->min_timeslice_us ||
3581 timeslice > g->max_timeslice_us) {
3582 return -EINVAL;
3583 }
3584
3585 gk20a_channel_get_timescale_from_timeslice(g, timeslice,
3586 &tsg->timeslice_timeout, &tsg->timeslice_scale);
3587
3588 tsg->timeslice_us = timeslice;
3589
3590 return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
3591}
3592
3593void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
3594 u32 count, u32 buffer_index)
3595{
3596 struct fifo_runlist_info_gk20a *runlist = NULL;
3597 u64 runlist_iova;
3598
3599 runlist = &g->fifo.runlist_info[runlist_id];
3600 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]);
3601
3602 if (count != 0) {
3603 gk20a_writel(g, fifo_runlist_base_r(),
3604 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3605 nvgpu_aperture_mask(g, &runlist->mem[buffer_index],
3606 fifo_runlist_base_target_sys_mem_ncoh_f(),
3607 fifo_runlist_base_target_sys_mem_coh_f(),
3608 fifo_runlist_base_target_vid_mem_f()));
3609 }
3610
3611 gk20a_writel(g, fifo_runlist_r(),
3612 fifo_runlist_engine_f(runlist_id) |
3613 fifo_eng_runlist_length_f(count));
3614}
3615
3616int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3617 u32 chid, bool add,
3618 bool wait_for_finish)
3619{
3620 int ret = 0;
3621 struct fifo_gk20a *f = &g->fifo;
3622 struct fifo_runlist_info_gk20a *runlist = NULL;
3623 u32 *runlist_entry_base = NULL;
3624 u64 runlist_iova;
3625 u32 new_buf;
3626 struct channel_gk20a *ch = NULL;
3627 struct tsg_gk20a *tsg = NULL;
3628 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3629
3630 runlist = &f->runlist_info[runlist_id];
3631
3632 /* valid channel, add/remove it from active list.
3633 Otherwise, keep active list untouched for suspend/resume. */
3634 if (chid != FIFO_INVAL_CHANNEL_ID) {
3635 ch = &f->channel[chid];
3636 tsg = tsg_gk20a_from_ch(ch);
3637
3638 if (add) {
3639 if (test_and_set_bit(chid,
3640 runlist->active_channels) == 1) {
3641 return 0;
3642 }
3643 if (tsg && ++tsg->num_active_channels) {
3644 set_bit((int)f->channel[chid].tsgid,
3645 runlist->active_tsgs);
3646 }
3647 } else {
3648 if (test_and_clear_bit(chid,
3649 runlist->active_channels) == 0) {
3650 return 0;
3651 }
3652 if (tsg && --tsg->num_active_channels == 0) {
3653 clear_bit((int)f->channel[chid].tsgid,
3654 runlist->active_tsgs);
3655 }
3656 }
3657 }
3658
3659 new_buf = !runlist->cur_buffer;
3660
3661 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[new_buf]);
3662
3663 nvgpu_log_info(g, "runlist_id : %d, switch to new buffer 0x%16llx",
3664 runlist_id, (u64)runlist_iova);
3665
3666 if (!runlist_iova) {
3667 ret = -EINVAL;
3668 goto clean_up;
3669 }
3670
3671 runlist_entry_base = runlist->mem[new_buf].cpu_va;
3672 if (!runlist_entry_base) {
3673 ret = -ENOMEM;
3674 goto clean_up;
3675 }
3676
3677 if (chid != FIFO_INVAL_CHANNEL_ID || /* add/remove a valid channel */
3678 add /* resume to add all channels back */) {
3679 u32 max_entries = f->num_runlist_entries;
3680 u32 *runlist_end;
3681
3682 runlist_end = gk20a_runlist_construct_locked(f,
3683 runlist,
3684 0,
3685 runlist_entry_base,
3686 g->runlist_interleave,
3687 true,
3688 &max_entries);
3689 if (!runlist_end) {
3690 ret = -E2BIG;
3691 goto clean_up;
3692 }
3693 runlist->count = (runlist_end - runlist_entry_base) /
3694 runlist_entry_words;
3695 WARN_ON(runlist->count > f->num_runlist_entries);
3696 } else {
3697 /* suspend to remove all channels */
3698 runlist->count = 0;
3699 }
3700
3701 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3702
3703 if (wait_for_finish) {
3704 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
3705
3706 if (ret == -ETIMEDOUT) {
3707 nvgpu_err(g, "runlist %d update timeout", runlist_id);
3708 /* trigger runlist update timeout recovery */
3709 return ret;
3710
3711 } else if (ret == -EINTR) {
3712 nvgpu_err(g, "runlist update interrupted");
3713 }
3714 }
3715
3716 runlist->cur_buffer = new_buf;
3717
3718clean_up:
3719 return ret;
3720}
3721
3722int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 chid,
3723 bool add, bool wait_for_finish)
3724{
3725 u32 ret = -EINVAL;
3726 u32 runlist_id = 0;
3727 u32 errcode;
3728 unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
3729
3730 if (!g) {
3731 goto end;
3732 }
3733
3734 ret = 0;
3735 for_each_set_bit(runlist_id, &ulong_runlist_ids, 32) {
3736 /* Capture the last failure error code */
3737 errcode = g->ops.fifo.update_runlist(g, runlist_id, chid, add, wait_for_finish);
3738 if (errcode) {
3739 nvgpu_err(g,
3740 "failed to update_runlist %d %d", runlist_id, errcode);
3741 ret = errcode;
3742 }
3743 }
3744end:
3745 return ret;
3746}
3747
3748/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3749static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3750 bool wait_preempt)
3751{
3752 struct gk20a *g = ch->g;
3753 struct fifo_runlist_info_gk20a *runlist =
3754 &g->fifo.runlist_info[ch->runlist_id];
3755 int ret = 0;
3756 u32 gr_eng_id = 0;
3757 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3758 u32 preempt_id;
3759 u32 preempt_type = 0;
3760
3761 if (1 != gk20a_fifo_get_engine_ids(
3762 g, &gr_eng_id, 1, ENGINE_GR_GK20A)) {
3763 return ret;
3764 }
3765 if (!(runlist->eng_bitmask & (1 << gr_eng_id))) {
3766 return ret;
3767 }
3768
3769 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3770 fifo_preempt_pending_true_f()) {
3771 return ret;
3772 }
3773
3774 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3775 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3776 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3777 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3778 /* host switching to next context, preempt that if needed */
3779 preempt_id = fifo_engine_status_next_id_v(engstat);
3780 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3781 } else {
3782 return ret;
3783 }
3784 if (preempt_id == ch->tsgid && preempt_type) {
3785 return ret;
3786 }
3787 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3788 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3789 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3790 /* preempt useless if FECS acked save and started restore */
3791 return ret;
3792 }
3793
3794 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3795#ifdef TRACEPOINTS_ENABLED
3796 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3797 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3798 gk20a_readl(g, fifo_preempt_r()));
3799#endif
3800 if (wait_preempt) {
3801 g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type, false);
3802 }
3803#ifdef TRACEPOINTS_ENABLED
3804 trace_gk20a_reschedule_preempted_next(ch->chid);
3805#endif
3806 return ret;
3807}
3808
3809int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3810{
3811 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3812}
3813
3814/* trigger host to expire current timeslice and reschedule runlist from front */
3815int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3816 bool wait_preempt)
3817{
3818 struct gk20a *g = ch->g;
3819 struct fifo_runlist_info_gk20a *runlist;
3820 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3821 u32 mutex_ret;
3822 int ret = 0;
3823
3824 runlist = &g->fifo.runlist_info[ch->runlist_id];
3825 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {
3826 return -EBUSY;
3827 }
3828
3829 mutex_ret = nvgpu_pmu_mutex_acquire(
3830 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3831
3832 g->ops.fifo.runlist_hw_submit(
3833 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3834
3835 if (preempt_next) {
3836 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3837 }
3838
3839 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3840
3841 if (!mutex_ret) {
3842 nvgpu_pmu_mutex_release(
3843 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3844 }
3845 nvgpu_mutex_release(&runlist->runlist_lock);
3846
3847 return ret;
3848}
3849
3850/* add/remove a channel from runlist
3851 special cases below: runlist->active_channels will NOT be changed.
3852 (chid == ~0 && !add) means remove all active channels from runlist.
3853 (chid == ~0 && add) means restore all active channels on runlist. */
3854int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 chid,
3855 bool add, bool wait_for_finish)
3856{
3857 struct fifo_runlist_info_gk20a *runlist = NULL;
3858 struct fifo_gk20a *f = &g->fifo;
3859 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3860 u32 mutex_ret;
3861 int ret = 0;
3862
3863 nvgpu_log_fn(g, " ");
3864
3865 runlist = &f->runlist_info[runlist_id];
3866
3867 nvgpu_mutex_acquire(&runlist->runlist_lock);
3868
3869 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3870
3871 ret = gk20a_fifo_update_runlist_locked(g, runlist_id, chid, add,
3872 wait_for_finish);
3873
3874 if (!mutex_ret) {
3875 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3876 }
3877
3878 nvgpu_mutex_release(&runlist->runlist_lock);
3879
3880 if (ret == -ETIMEDOUT) {
3881 gk20a_fifo_runlist_reset_engines(g, runlist_id);
3882 }
3883
3884 return ret;
3885}
3886
3887int gk20a_fifo_suspend(struct gk20a *g)
3888{
3889 nvgpu_log_fn(g, " ");
3890
3891 /* stop bar1 snooping */
3892 if (g->ops.mm.is_bar1_supported(g)) {
3893 gk20a_writel(g, fifo_bar1_base_r(),
3894 fifo_bar1_base_valid_false_f());
3895 }
3896
3897 /* disable fifo intr */
3898 gk20a_writel(g, fifo_intr_en_0_r(), 0);
3899 gk20a_writel(g, fifo_intr_en_1_r(), 0);
3900
3901 nvgpu_log_fn(g, "done");
3902 return 0;
3903}
3904
3905bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
3906{
3907 if (gk20a_readl(g, fifo_intr_0_r()) &
3908 fifo_intr_0_mmu_fault_pending_f()) {
3909 return true;
3910 } else {
3911 return false;
3912 }
3913}
3914
3915bool gk20a_fifo_is_engine_busy(struct gk20a *g)
3916{
3917 u32 i, host_num_engines;
3918
3919 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3920
3921 for (i = 0; i < host_num_engines; i++) {
3922 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3923 if (fifo_engine_status_engine_v(status) ==
3924 fifo_engine_status_engine_busy_v()) {
3925 return true;
3926 }
3927 }
3928 return false;
3929}
3930
3931int gk20a_fifo_wait_engine_idle(struct gk20a *g)
3932{
3933 struct nvgpu_timeout timeout;
3934 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3935 int ret = -ETIMEDOUT;
3936 u32 i, host_num_engines;
3937
3938 nvgpu_log_fn(g, " ");
3939
3940 host_num_engines =
3941 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3942
3943 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3944 NVGPU_TIMER_CPU_TIMER);
3945
3946 for (i = 0; i < host_num_engines; i++) {
3947 do {
3948 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3949 if (!fifo_engine_status_engine_v(status)) {
3950 ret = 0;
3951 break;
3952 }
3953
3954 nvgpu_usleep_range(delay, delay * 2);
3955 delay = min_t(unsigned long,
3956 delay << 1, GR_IDLE_CHECK_MAX);
3957 } while (!nvgpu_timeout_expired(&timeout));
3958
3959 if (ret) {
3960 nvgpu_log_info(g, "cannot idle engine %u", i);
3961 break;
3962 }
3963 }
3964
3965 nvgpu_log_fn(g, "done");
3966
3967 return ret;
3968}
3969
3970u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
3971{
3972 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
3973}
3974
3975static const char * const ccsr_chan_status_str[] = {
3976 "idle",
3977 "pending",
3978 "pending_ctx_reload",
3979 "pending_acquire",
3980 "pending_acq_ctx_reload",
3981 "on_pbdma",
3982 "on_pbdma_and_eng",
3983 "on_eng",
3984 "on_eng_pending_acquire",
3985 "on_eng_pending",
3986 "on_pbdma_ctx_reload",
3987 "on_pbdma_and_eng_ctx_reload",
3988 "on_eng_ctx_reload",
3989 "on_eng_pending_ctx_reload",
3990 "on_eng_pending_acq_ctx_reload",
3991};
3992
3993static const char * const pbdma_chan_eng_ctx_status_str[] = {
3994 "invalid",
3995 "valid",
3996 "NA",
3997 "NA",
3998 "NA",
3999 "load",
4000 "save",
4001 "switch",
4002};
4003
4004static const char * const not_found_str[] = {
4005 "NOT FOUND"
4006};
4007
4008const char *gk20a_decode_ccsr_chan_status(u32 index)
4009{
4010 if (index >= ARRAY_SIZE(ccsr_chan_status_str)) {
4011 return not_found_str[0];
4012 } else {
4013 return ccsr_chan_status_str[index];
4014 }
4015}
4016
4017const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index)
4018{
4019 if (index >= ARRAY_SIZE(pbdma_chan_eng_ctx_status_str)) {
4020 return not_found_str[0];
4021 } else {
4022 return pbdma_chan_eng_ctx_status_str[index];
4023 }
4024}
4025
4026bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid)
4027{
4028 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4029
4030 return ccsr_channel_next_v(channel) == ccsr_channel_next_true_v();
4031}
4032
4033bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid)
4034{
4035 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4036 u32 status = ccsr_channel_status_v(channel);
4037
4038 return (status == ccsr_channel_status_pending_ctx_reload_v() ||
4039 status == ccsr_channel_status_pending_acq_ctx_reload_v() ||
4040 status == ccsr_channel_status_on_pbdma_ctx_reload_v() ||
4041 status == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() ||
4042 status == ccsr_channel_status_on_eng_ctx_reload_v() ||
4043 status == ccsr_channel_status_on_eng_pending_ctx_reload_v() ||
4044 status == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v());
4045}
4046
4047void gk20a_dump_channel_status_ramfc(struct gk20a *g,
4048 struct gk20a_debug_output *o,
4049 u32 chid,
4050 struct ch_state *ch_state)
4051{
4052 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4053 u32 status = ccsr_channel_status_v(channel);
4054 u32 syncpointa, syncpointb;
4055 u32 *inst_mem;
4056 struct channel_gk20a *c = g->fifo.channel + chid;
4057 struct nvgpu_semaphore_int *hw_sema = NULL;
4058
4059 if (c->hw_sema) {
4060 hw_sema = c->hw_sema;
4061 }
4062
4063 if (!ch_state) {
4064 return;
4065 }
4066
4067 inst_mem = &ch_state->inst_block[0];
4068
4069 syncpointa = inst_mem[ram_fc_syncpointa_w()];
4070 syncpointb = inst_mem[ram_fc_syncpointb_w()];
4071
4072 gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", chid,
4073 g->name,
4074 ch_state->pid,
4075 ch_state->refs,
4076 ch_state->deterministic ? ", deterministic" : "");
4077 gk20a_debug_output(o, "channel status: %s in use %s %s\n",
4078 ccsr_channel_enable_v(channel) ? "" : "not",
4079 gk20a_decode_ccsr_chan_status(status),
4080 ccsr_channel_busy_v(channel) ? "busy" : "not busy");
4081 gk20a_debug_output(o, "RAMFC : TOP: %016llx PUT: %016llx GET: %016llx "
4082 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
4083 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
4084 (u64)inst_mem[ram_fc_pb_top_level_get_w()] +
4085 ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
4086 (u64)inst_mem[ram_fc_pb_put_w()] +
4087 ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
4088 (u64)inst_mem[ram_fc_pb_get_w()] +
4089 ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
4090 (u64)inst_mem[ram_fc_pb_fetch_w()] +
4091 ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
4092 inst_mem[ram_fc_pb_header_w()],
4093 inst_mem[ram_fc_pb_count_w()],
4094 syncpointa,
4095 syncpointb,
4096 inst_mem[ram_fc_semaphorea_w()],
4097 inst_mem[ram_fc_semaphoreb_w()],
4098 inst_mem[ram_fc_semaphorec_w()],
4099 inst_mem[ram_fc_semaphored_w()]);
4100 if (hw_sema) {
4101 gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
4102 "next_val: 0x%08x addr: 0x%010llx\n",
4103 __nvgpu_semaphore_read(hw_sema),
4104 nvgpu_atomic_read(&hw_sema->next_value),
4105 nvgpu_hw_sema_addr(hw_sema));
4106 }
4107
4108#ifdef CONFIG_TEGRA_GK20A_NVHOST
4109 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
4110 && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
4111 pbdma_syncpointb_wait_switch_en_v()))
4112 gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n",
4113 (status == 3 || status == 8) ? "Waiting" : "Waited",
4114 pbdma_syncpointb_syncpt_index_v(syncpointb),
4115 nvgpu_nvhost_syncpt_get_name(g->nvhost_dev,
4116 pbdma_syncpointb_syncpt_index_v(syncpointb)),
4117 pbdma_syncpointa_payload_v(syncpointa));
4118#endif
4119
4120 gk20a_debug_output(o, "\n");
4121}
4122
4123void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
4124 struct gk20a_debug_output *o)
4125{
4126 struct fifo_gk20a *f = &g->fifo;
4127 u32 chid;
4128 struct ch_state **ch_state;
4129
4130 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
4131 if (!ch_state) {
4132 gk20a_debug_output(o, "cannot alloc memory for channels\n");
4133 return;
4134 }
4135
4136 for (chid = 0; chid < f->num_channels; chid++) {
4137 struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
4138 if (ch != NULL) {
4139 ch_state[chid] =
4140 nvgpu_kmalloc(g, sizeof(struct ch_state) +
4141 ram_in_alloc_size_v());
4142 /* ref taken stays to below loop with
4143 * successful allocs */
4144 if (!ch_state[chid]) {
4145 gk20a_channel_put(ch);
4146 }
4147 }
4148 }
4149
4150 for (chid = 0; chid < f->num_channels; chid++) {
4151 struct channel_gk20a *ch = &f->channel[chid];
4152 if (!ch_state[chid]) {
4153 continue;
4154 }
4155
4156 ch_state[chid]->pid = ch->pid;
4157 ch_state[chid]->refs = nvgpu_atomic_read(&ch->ref_count);
4158 ch_state[chid]->deterministic = ch->deterministic;
4159 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
4160 &ch_state[chid]->inst_block[0],
4161 ram_in_alloc_size_v());
4162 gk20a_channel_put(ch);
4163 }
4164 for (chid = 0; chid < f->num_channels; chid++) {
4165 if (ch_state[chid]) {
4166 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
4167 ch_state[chid]);
4168 nvgpu_kfree(g, ch_state[chid]);
4169 }
4170 }
4171 nvgpu_kfree(g, ch_state);
4172}
4173
4174void gk20a_dump_pbdma_status(struct gk20a *g,
4175 struct gk20a_debug_output *o)
4176{
4177 u32 i, host_num_pbdma;
4178
4179 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
4180
4181 for (i = 0; i < host_num_pbdma; i++) {
4182 u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
4183 u32 chan_status = fifo_pbdma_status_chan_status_v(status);
4184
4185 gk20a_debug_output(o, "%s pbdma %d: ", g->name, i);
4186 gk20a_debug_output(o,
4187 "id: %d (%s), next_id: %d (%s) chan status: %s\n",
4188 fifo_pbdma_status_id_v(status),
4189 fifo_pbdma_status_id_type_v(status) ?
4190 "tsg" : "channel",
4191 fifo_pbdma_status_next_id_v(status),
4192 fifo_pbdma_status_next_id_type_v(status) ?
4193 "tsg" : "channel",
4194 gk20a_decode_pbdma_chan_eng_ctx_status(chan_status));
4195 gk20a_debug_output(o, "PBDMA_PUT: %016llx PBDMA_GET: %016llx "
4196 "GP_PUT: %08x GP_GET: %08x "
4197 "FETCH: %08x HEADER: %08x\n"
4198 "HDR: %08x SHADOW0: %08x SHADOW1: %08x",
4199 (u64)gk20a_readl(g, pbdma_put_r(i)) +
4200 ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
4201 (u64)gk20a_readl(g, pbdma_get_r(i)) +
4202 ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
4203 gk20a_readl(g, pbdma_gp_put_r(i)),
4204 gk20a_readl(g, pbdma_gp_get_r(i)),
4205 gk20a_readl(g, pbdma_gp_fetch_r(i)),
4206 gk20a_readl(g, pbdma_pb_header_r(i)),
4207 gk20a_readl(g, pbdma_hdr_shadow_r(i)),
4208 gk20a_readl(g, pbdma_gp_shadow_0_r(i)),
4209 gk20a_readl(g, pbdma_gp_shadow_1_r(i)));
4210 }
4211 gk20a_debug_output(o, "\n");
4212}
4213
4214void gk20a_dump_eng_status(struct gk20a *g,
4215 struct gk20a_debug_output *o)
4216{
4217 u32 i, host_num_engines;
4218
4219 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
4220
4221 for (i = 0; i < host_num_engines; i++) {
4222 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
4223 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
4224
4225 gk20a_debug_output(o, "%s eng %d: ", g->name, i);
4226 gk20a_debug_output(o,
4227 "id: %d (%s), next_id: %d (%s), ctx status: %s ",
4228 fifo_engine_status_id_v(status),
4229 fifo_engine_status_id_type_v(status) ?
4230 "tsg" : "channel",
4231 fifo_engine_status_next_id_v(status),
4232 fifo_engine_status_next_id_type_v(status) ?
4233 "tsg" : "channel",
4234 gk20a_decode_pbdma_chan_eng_ctx_status(ctx_status));
4235
4236 if (fifo_engine_status_faulted_v(status)) {
4237 gk20a_debug_output(o, "faulted ");
4238 }
4239 if (fifo_engine_status_engine_v(status)) {
4240 gk20a_debug_output(o, "busy ");
4241 }
4242 gk20a_debug_output(o, "\n");
4243 }
4244 gk20a_debug_output(o, "\n");
4245}
4246
4247void gk20a_fifo_enable_channel(struct channel_gk20a *ch)
4248{
4249 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4250 gk20a_readl(ch->g, ccsr_channel_r(ch->chid)) |
4251 ccsr_channel_enable_set_true_f());
4252}
4253
4254void gk20a_fifo_disable_channel(struct channel_gk20a *ch)
4255{
4256 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4257 gk20a_readl(ch->g,
4258 ccsr_channel_r(ch->chid)) |
4259 ccsr_channel_enable_clr_true_f());
4260}
4261
4262void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
4263{
4264 struct gk20a *g = ch_gk20a->g;
4265
4266 nvgpu_log_fn(g, " ");
4267
4268 if (nvgpu_atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
4269 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->chid),
4270 ccsr_channel_inst_ptr_f(0) |
4271 ccsr_channel_inst_bind_false_f());
4272 }
4273}
4274
4275static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
4276{
4277 u32 addr_lo;
4278 u32 addr_hi;
4279 struct gk20a *g = c->g;
4280
4281 nvgpu_log_fn(g, " ");
4282
4283 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
4284 addr_hi = u64_hi32(c->userd_iova);
4285
4286 nvgpu_log_info(g, "channel %d : set ramfc userd 0x%16llx",
4287 c->chid, (u64)c->userd_iova);
4288
4289 nvgpu_mem_wr32(g, &c->inst_block,
4290 ram_in_ramfc_w() + ram_fc_userd_w(),
4291 nvgpu_aperture_mask(g, &g->fifo.userd,
4292 pbdma_userd_target_sys_mem_ncoh_f(),
4293 pbdma_userd_target_sys_mem_coh_f(),
4294 pbdma_userd_target_vid_mem_f()) |
4295 pbdma_userd_addr_f(addr_lo));
4296
4297 nvgpu_mem_wr32(g, &c->inst_block,
4298 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
4299 pbdma_userd_hi_addr_f(addr_hi));
4300
4301 return 0;
4302}
4303
4304int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
4305 u64 gpfifo_base, u32 gpfifo_entries,
4306 unsigned long timeout,
4307 u32 flags)
4308{
4309 struct gk20a *g = c->g;
4310 struct nvgpu_mem *mem = &c->inst_block;
4311
4312 nvgpu_log_fn(g, " ");
4313
4314 nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
4315
4316 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
4317 pbdma_gp_base_offset_f(
4318 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
4319
4320 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
4321 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
4322 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
4323
4324 nvgpu_mem_wr32(g, mem, ram_fc_signature_w(),
4325 c->g->ops.fifo.get_pbdma_signature(c->g));
4326
4327 nvgpu_mem_wr32(g, mem, ram_fc_formats_w(),
4328 pbdma_formats_gp_fermi0_f() |
4329 pbdma_formats_pb_fermi1_f() |
4330 pbdma_formats_mp_fermi0_f());
4331
4332 nvgpu_mem_wr32(g, mem, ram_fc_pb_header_w(),
4333 pbdma_pb_header_priv_user_f() |
4334 pbdma_pb_header_method_zero_f() |
4335 pbdma_pb_header_subchannel_zero_f() |
4336 pbdma_pb_header_level_main_f() |
4337 pbdma_pb_header_first_true_f() |
4338 pbdma_pb_header_type_inc_f());
4339
4340 nvgpu_mem_wr32(g, mem, ram_fc_subdevice_w(),
4341 pbdma_subdevice_id_f(1) |
4342 pbdma_subdevice_status_active_f() |
4343 pbdma_subdevice_channel_dma_enable_f());
4344
4345 nvgpu_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
4346
4347 nvgpu_mem_wr32(g, mem, ram_fc_acquire_w(),
4348 g->ops.fifo.pbdma_acquire_val(timeout));
4349
4350 nvgpu_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
4351 fifo_runlist_timeslice_timeout_128_f() |
4352 fifo_runlist_timeslice_timescale_3_f() |
4353 fifo_runlist_timeslice_enable_true_f());
4354
4355 nvgpu_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
4356 fifo_pb_timeslice_timeout_16_f() |
4357 fifo_pb_timeslice_timescale_0_f() |
4358 fifo_pb_timeslice_enable_true_f());
4359
4360 nvgpu_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->chid));
4361
4362 if (c->is_privileged_channel) {
4363 gk20a_fifo_setup_ramfc_for_privileged_channel(c);
4364 }
4365
4366 return gk20a_fifo_commit_userd(c);
4367}
4368
4369void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
4370{
4371 struct gk20a *g = c->g;
4372 struct nvgpu_mem *mem = &c->inst_block;
4373
4374 nvgpu_log_info(g, "channel %d : set ramfc privileged_channel", c->chid);
4375
4376 /* Enable HCE priv mode for phys mode transfer */
4377 nvgpu_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
4378 pbdma_hce_ctrl_hce_priv_mode_yes_f());
4379}
4380
4381int gk20a_fifo_setup_userd(struct channel_gk20a *c)
4382{
4383 struct gk20a *g = c->g;
4384 struct nvgpu_mem *mem;
4385 u32 offset;
4386
4387 nvgpu_log_fn(g, " ");
4388
4389 if (nvgpu_mem_is_valid(&c->usermode_userd)) {
4390 mem = &c->usermode_userd;
4391 offset = 0;
4392 } else {
4393 mem = &g->fifo.userd;
4394 offset = c->chid * g->fifo.userd_entry_size / sizeof(u32);
4395 }
4396
4397 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
4398 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
4399 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
4400 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
4401 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
4402 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
4403 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
4404 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
4405 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
4406 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
4407
4408 return 0;
4409}
4410
4411int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
4412{
4413 int err;
4414
4415 nvgpu_log_fn(g, " ");
4416
4417 err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
4418 if (err) {
4419 return err;
4420 }
4421
4422 nvgpu_log_info(g, "channel %d inst block physical addr: 0x%16llx",
4423 ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
4424
4425 nvgpu_log_fn(g, "done");
4426 return 0;
4427}
4428
4429void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
4430{
4431 nvgpu_free_inst_block(g, &ch->inst_block);
4432}
4433
4434u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
4435{
4436 return gk20a_bar1_readl(g,
4437 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
4438}
4439
4440u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c)
4441{
4442 u32 lo = gk20a_bar1_readl(g,
4443 c->userd_gpu_va + sizeof(u32) * ram_userd_get_w());
4444 u32 hi = gk20a_bar1_readl(g,
4445 c->userd_gpu_va + sizeof(u32) * ram_userd_get_hi_w());
4446
4447 return ((u64)hi << 32) | lo;
4448}
4449
4450void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
4451{
4452 gk20a_bar1_writel(g,
4453 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
4454 c->gpfifo.put);
4455}
4456
4457u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
4458{
4459 u32 val, exp, man;
4460 unsigned int val_len;
4461
4462 val = pbdma_acquire_retry_man_2_f() |
4463 pbdma_acquire_retry_exp_2_f();
4464
4465 if (!timeout) {
4466 return val;
4467 }
4468
4469 timeout *= 80UL;
4470 do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
4471 timeout *= 1000000UL; /* ms -> ns */
4472 do_div(timeout, 1024); /* in unit of 1024ns */
4473 val_len = fls(timeout >> 32) + 32;
4474 if (val_len == 32) {
4475 val_len = fls(timeout);
4476 }
4477 if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
4478 exp = pbdma_acquire_timeout_exp_max_v();
4479 man = pbdma_acquire_timeout_man_max_v();
4480 } else if (val_len > 16) {
4481 exp = val_len - 16;
4482 man = timeout >> exp;
4483 } else {
4484 exp = 0;
4485 man = timeout;
4486 }
4487
4488 val |= pbdma_acquire_timeout_exp_f(exp) |
4489 pbdma_acquire_timeout_man_f(man) |
4490 pbdma_acquire_timeout_en_enable_f();
4491
4492 return val;
4493}
4494
4495const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
4496{
4497 switch (interleave_level) {
4498 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
4499 return "LOW";
4500
4501 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
4502 return "MEDIUM";
4503
4504 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
4505 return "HIGH";
4506
4507 default:
4508 return "?";
4509 }
4510}
4511
4512u32 gk20a_fifo_get_sema_wait_cmd_size(void)
4513{
4514 return 8;
4515}
4516
4517u32 gk20a_fifo_get_sema_incr_cmd_size(void)
4518{
4519 return 10;
4520}
4521
4522void gk20a_fifo_add_sema_cmd(struct gk20a *g,
4523 struct nvgpu_semaphore *s, u64 sema_va,
4524 struct priv_cmd_entry *cmd,
4525 u32 off, bool acquire, bool wfi)
4526{
4527 nvgpu_log_fn(g, " ");
4528
4529 /* semaphore_a */
4530 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
4531 /* offset_upper */
4532 nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff);
4533 /* semaphore_b */
4534 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
4535 /* offset */
4536 nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff);
4537
4538 if (acquire) {
4539 /* semaphore_c */
4540 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4541 /* payload */
4542 nvgpu_mem_wr32(g, cmd->mem, off++,
4543 nvgpu_semaphore_get_value(s));
4544 /* semaphore_d */
4545 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4546 /* operation: acq_geq, switch_en */
4547 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
4548 } else {
4549 /* semaphore_c */
4550 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4551 /* payload */
4552 nvgpu_mem_wr32(g, cmd->mem, off++,
4553 nvgpu_semaphore_get_value(s));
4554 /* semaphore_d */
4555 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4556 /* operation: release, wfi */
4557 nvgpu_mem_wr32(g, cmd->mem, off++,
4558 0x2 | ((wfi ? 0x0 : 0x1) << 20));
4559 /* non_stall_int */
4560 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
4561 /* ignored */
4562 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4563 }
4564}
4565
4566#ifdef CONFIG_TEGRA_GK20A_NVHOST
4567void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
4568 struct priv_cmd_entry *cmd, u32 off,
4569 u32 id, u32 thresh, u64 gpu_va)
4570{
4571 nvgpu_log_fn(g, " ");
4572
4573 off = cmd->off + off;
4574 /* syncpoint_a */
4575 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4576 /* payload */
4577 nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
4578 /* syncpoint_b */
4579 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4580 /* syncpt_id, switch_en, wait */
4581 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
4582}
4583
4584u32 gk20a_fifo_get_syncpt_wait_cmd_size(void)
4585{
4586 return 4;
4587}
4588
4589u32 gk20a_fifo_get_syncpt_incr_per_release(void)
4590{
4591 return 2;
4592}
4593
4594void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
4595 bool wfi_cmd, struct priv_cmd_entry *cmd,
4596 u32 id, u64 gpu_va)
4597{
4598 u32 off = cmd->off;
4599
4600 nvgpu_log_fn(g, " ");
4601 if (wfi_cmd) {
4602 /* wfi */
4603 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001E);
4604 /* handle, ignored */
4605 nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000);
4606 }
4607 /* syncpoint_a */
4608 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4609 /* payload, ignored */
4610 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4611 /* syncpoint_b */
4612 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4613 /* syncpt_id, incr */
4614 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4615 /* syncpoint_b */
4616 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4617 /* syncpt_id, incr */
4618 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4619
4620}
4621
4622u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd)
4623{
4624 if (wfi_cmd)
4625 return 8;
4626 else
4627 return 6;
4628}
4629
4630void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
4631 struct nvgpu_mem *syncpt_buf)
4632{
4633
4634}
4635
4636int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
4637 u32 syncpt_id, struct nvgpu_mem *syncpt_buf)
4638{
4639 return 0;
4640}
4641#endif
diff --git a/include/gk20a/fifo_gk20a.h b/include/gk20a/fifo_gk20a.h
deleted file mode 100644
index 078236d..0000000
--- a/include/gk20a/fifo_gk20a.h
+++ /dev/null
@@ -1,472 +0,0 @@
1/*
2 * GK20A graphics fifo (gr host)
3 *
4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef FIFO_GK20A_H
25#define FIFO_GK20A_H
26
27#include <nvgpu/kref.h>
28
29struct gk20a_debug_output;
30struct mmu_fault_info;
31struct nvgpu_semaphore;
32struct channel_gk20a;
33struct tsg_gk20a;
34
35enum {
36 NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW = 0,
37 NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM,
38 NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH,
39 NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS,
40};
41
42#define MAX_RUNLIST_BUFFERS 2
43
44#define FIFO_INVAL_ENGINE_ID ((u32)~0)
45#define FIFO_INVAL_CHANNEL_ID ((u32)~0)
46#define FIFO_INVAL_TSG_ID ((u32)~0)
47#define FIFO_INVAL_RUNLIST_ID ((u32)~0)
48
49#define ID_TYPE_CHANNEL 0
50#define ID_TYPE_TSG 1
51#define ID_TYPE_UNKNOWN ((u32)~0)
52
53#define RC_YES 1
54#define RC_NO 0
55
56#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000
57
58#define RC_TYPE_NO_RC 0
59#define RC_TYPE_MMU_FAULT 1
60#define RC_TYPE_PBDMA_FAULT 2
61#define RC_TYPE_GR_FAULT 3
62#define RC_TYPE_PREEMPT_TIMEOUT 4
63#define RC_TYPE_CTXSW_TIMEOUT 5
64#define RC_TYPE_RUNLIST_UPDATE_TIMEOUT 6
65#define RC_TYPE_FORCE_RESET 7
66#define RC_TYPE_SCHED_ERR 8
67
68#define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL
69#define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL
70
71/*
72 * Number of entries in the kickoff latency buffer, used to calculate
73 * the profiling and histogram. This number is calculated to be statistically
74 * significative on a histogram on a 5% step
75 */
76#ifdef CONFIG_DEBUG_FS
77#define FIFO_PROFILING_ENTRIES 16384
78#endif
79
80#define RUNLIST_DISABLED 0
81#define RUNLIST_ENABLED 1
82
83/* generally corresponds to the "pbdma" engine */
84
85struct fifo_runlist_info_gk20a {
86 unsigned long *active_channels;
87 unsigned long *active_tsgs;
88 /* Each engine has its own SW and HW runlist buffer.*/
89 struct nvgpu_mem mem[MAX_RUNLIST_BUFFERS];
90 u32 cur_buffer;
91 u32 total_entries;
92 u32 pbdma_bitmask; /* pbdmas supported for this runlist*/
93 u32 eng_bitmask; /* engines using this runlist */
94 u32 reset_eng_bitmask; /* engines to be reset during recovery */
95 u32 count; /* cached runlist_hw_submit parameter */
96 bool stopped;
97 bool support_tsg;
98 /* protect ch/tsg/runlist preempt & runlist update */
99 struct nvgpu_mutex runlist_lock;
100};
101
102enum {
103 ENGINE_GR_GK20A = 0U,
104 ENGINE_GRCE_GK20A = 1U,
105 ENGINE_ASYNC_CE_GK20A = 2U,
106 ENGINE_INVAL_GK20A = 3U,
107};
108
109struct fifo_pbdma_exception_info_gk20a {
110 u32 status_r; /* raw register value from hardware */
111 u32 id, next_id;
112 u32 chan_status_v; /* raw value from hardware */
113 bool id_is_chid, next_id_is_chid;
114 bool chsw_in_progress;
115};
116
117struct fifo_engine_exception_info_gk20a {
118 u32 status_r; /* raw register value from hardware */
119 u32 id, next_id;
120 u32 ctx_status_v; /* raw value from hardware */
121 bool id_is_chid, next_id_is_chid;
122 bool faulted, idle, ctxsw_in_progress;
123};
124
125struct fifo_engine_info_gk20a {
126 u32 engine_id;
127 u32 runlist_id;
128 u32 intr_mask;
129 u32 reset_mask;
130 u32 pbdma_id;
131 u32 inst_id;
132 u32 pri_base;
133 u32 fault_id;
134 u32 engine_enum;
135 struct fifo_pbdma_exception_info_gk20a pbdma_exception_info;
136 struct fifo_engine_exception_info_gk20a engine_exception_info;
137};
138
139enum {
140 PROFILE_IOCTL_ENTRY = 0U,
141 PROFILE_ENTRY,
142 PROFILE_JOB_TRACKING,
143 PROFILE_APPEND,
144 PROFILE_END,
145 PROFILE_IOCTL_EXIT,
146 PROFILE_MAX
147};
148
149struct fifo_profile_gk20a {
150 u64 timestamp[PROFILE_MAX];
151};
152
153struct fifo_gk20a {
154 struct gk20a *g;
155 unsigned int num_channels;
156 unsigned int runlist_entry_size;
157 unsigned int num_runlist_entries;
158
159 unsigned int num_pbdma;
160 u32 *pbdma_map;
161
162 struct fifo_engine_info_gk20a *engine_info;
163 u32 max_engines;
164 u32 num_engines;
165 u32 *active_engines_list;
166
167 struct fifo_runlist_info_gk20a *runlist_info;
168 u32 max_runlists;
169#ifdef CONFIG_DEBUG_FS
170 struct {
171 struct fifo_profile_gk20a *data;
172 nvgpu_atomic_t get;
173 bool enabled;
174 u64 *sorted;
175 struct nvgpu_ref ref;
176 struct nvgpu_mutex lock;
177 } profile;
178#endif
179 struct nvgpu_mem userd;
180 u32 userd_entry_size;
181
182 unsigned int used_channels;
183 struct channel_gk20a *channel;
184 /* zero-kref'd channels here */
185 struct nvgpu_list_node free_chs;
186 struct nvgpu_mutex free_chs_mutex;
187 struct nvgpu_mutex engines_reset_mutex;
188
189 struct tsg_gk20a *tsg;
190 struct nvgpu_mutex tsg_inuse_mutex;
191
192 void (*remove_support)(struct fifo_gk20a *);
193 bool sw_ready;
194 struct {
195 /* share info between isrs and non-isr code */
196 struct {
197 struct nvgpu_mutex mutex;
198 } isr;
199 struct {
200 u32 device_fatal_0;
201 u32 channel_fatal_0;
202 u32 restartable_0;
203 } pbdma;
204 struct {
205
206 } engine;
207
208
209 } intr;
210
211 unsigned long deferred_fault_engines;
212 bool deferred_reset_pending;
213 struct nvgpu_mutex deferred_reset_mutex;
214
215 u32 max_subctx_count;
216 u32 channel_base;
217};
218
219struct ch_state {
220 int pid;
221 int refs;
222 bool deterministic;
223 u32 inst_block[0];
224};
225
226int gk20a_init_fifo_support(struct gk20a *g);
227
228int gk20a_init_fifo_setup_hw(struct gk20a *g);
229
230void gk20a_fifo_isr(struct gk20a *g);
231u32 gk20a_fifo_nonstall_isr(struct gk20a *g);
232
233int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch);
234int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
235int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch);
236
237int gk20a_fifo_enable_engine_activity(struct gk20a *g,
238 struct fifo_engine_info_gk20a *eng_info);
239int gk20a_fifo_enable_all_engine_activity(struct gk20a *g);
240int gk20a_fifo_disable_engine_activity(struct gk20a *g,
241 struct fifo_engine_info_gk20a *eng_info,
242 bool wait_for_idle);
243int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
244 bool wait_for_idle);
245void gk20a_fifo_enable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);
246void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);
247
248u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid);
249
250int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
251int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
252 bool wait_preempt);
253
254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
255 bool add, bool wait_for_finish);
256
257int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
258 u32 chid, bool add,
259 bool wait_for_finish);
260int gk20a_fifo_suspend(struct gk20a *g);
261
262bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
263
264void gk20a_fifo_recover(struct gk20a *g,
265 u32 engine_ids, /* if zero, will be queried from HW */
266 u32 hw_id, /* if ~0, will be queried from HW */
267 bool id_is_tsg, /* ignored if hw_id == ~0 */
268 bool id_is_known, bool verbose, int rc_type);
269void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
270 bool verbose, u32 rc_type);
271void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
272 bool verbose, u32 rc_type);
273int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
274 u32 err_code, bool verbose);
275void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id);
276int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
277int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch);
278
279void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
280 unsigned long fault_id);
281int gk20a_fifo_wait_engine_idle(struct gk20a *g);
282bool gk20a_fifo_is_engine_busy(struct gk20a *g);
283u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
284u32 gk20a_fifo_act_eng_interrupt_mask(struct gk20a *g, u32 act_eng_id);
285u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
286u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
287 int *__id, bool *__is_tsg);
288void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
289 struct tsg_gk20a *tsg);
290void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt);
291void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
292 struct channel_gk20a *refch);
293bool gk20a_fifo_error_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
294bool gk20a_fifo_error_ch(struct gk20a *g, struct channel_gk20a *refch);
295
296void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
297int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
298 u32 id,
299 u32 runlist_id,
300 u32 new_level);
301int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
302
303const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
304
305int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
306 u32 *inst_id);
307
308u32 gk20a_fifo_get_engine_ids(struct gk20a *g, u32 engine_id[],
309 u32 engine_id_sz, u32 engine_enum);
310
311void gk20a_fifo_delete_runlist(struct fifo_gk20a *f);
312
313struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g,
314 u32 engine_id);
315
316bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id);
317
318u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g);
319
320int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch);
321
322u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g);
323
324u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g);
325
326u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g);
327
328bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id);
329
330int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 chid,
331 bool add, bool wait_for_finish);
332
333int gk20a_fifo_init_engine_info(struct fifo_gk20a *f);
334
335void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist);
336void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);
337void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
338 u32 runlist_state);
339
340u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
341void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
342u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c);
343
344bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid);
345#ifdef CONFIG_DEBUG_FS
346struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g);
347void gk20a_fifo_profile_release(struct gk20a *g,
348 struct fifo_profile_gk20a *profile);
349void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx);
350#else
351static inline struct fifo_profile_gk20a *
352gk20a_fifo_profile_acquire(struct gk20a *g)
353{
354 return NULL;
355}
356static inline void gk20a_fifo_profile_release(struct gk20a *g,
357 struct fifo_profile_gk20a *profile)
358{
359}
360static inline void gk20a_fifo_profile_snapshot(
361 struct fifo_profile_gk20a *profile, int idx)
362{
363}
364#endif
365
366void gk20a_dump_channel_status_ramfc(struct gk20a *g,
367 struct gk20a_debug_output *o,
368 u32 chid,
369 struct ch_state *ch_state);
370void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
371 struct gk20a_debug_output *o);
372void gk20a_dump_pbdma_status(struct gk20a *g,
373 struct gk20a_debug_output *o);
374void gk20a_dump_eng_status(struct gk20a *g,
375 struct gk20a_debug_output *o);
376const char *gk20a_decode_ccsr_chan_status(u32 index);
377const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index);
378void gk20a_fifo_enable_channel(struct channel_gk20a *ch);
379void gk20a_fifo_disable_channel(struct channel_gk20a *ch);
380
381bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid);
382bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid);
383int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch);
384
385struct channel_gk20a *gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr);
386void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a);
387
388u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g);
389
390int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
391 unsigned int id_type, bool preempt_retries_left);
392int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg,
393 bool preempt_retries_left);
394void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
395void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch);
396int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
397 u64 gpfifo_base, u32 gpfifo_entries,
398 unsigned long timeout, u32 flags);
399void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c);
400int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
401void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch);
402int gk20a_fifo_setup_userd(struct channel_gk20a *c);
403u32 gk20a_fifo_pbdma_acquire_val(u64 timeout);
404
405
406u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
407 struct fifo_runlist_info_gk20a *runlist,
408 u32 cur_level,
409 u32 *runlist_entry,
410 bool interleave_enabled,
411 bool prev_empty,
412 u32 *entries_left);
413void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
414 u32 count, u32 buffer_index);
415int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id);
416int gk20a_init_fifo_setup_sw_common(struct gk20a *g);
417int gk20a_init_fifo_setup_sw(struct gk20a *g);
418void gk20a_fifo_handle_runlist_event(struct gk20a *g);
419bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
420 u32 engine_subid, bool fake_fault);
421
422void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
423 u32 hw_id, unsigned int id_type, unsigned int rc_type,
424 struct mmu_fault_info *mmfault);
425
426bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
427 bool *verbose, u32 *ms);
428bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
429 bool *verbose, u32 *ms);
430void gk20a_fifo_teardown_mask_intr(struct gk20a *g);
431void gk20a_fifo_teardown_unmask_intr(struct gk20a *g);
432bool gk20a_fifo_handle_sched_error(struct gk20a *g);
433
434void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
435 int pbdma_method_index);
436unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
437 u32 pbdma_intr_0, u32 *handled, u32 *error_notifier);
438unsigned int gk20a_fifo_handle_pbdma_intr_1(struct gk20a *g, u32 pbdma_id,
439 u32 pbdma_intr_1, u32 *handled, u32 *error_notifier);
440u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
441 u32 pbdma_id, unsigned int rc);
442
443u32 gk20a_fifo_default_timeslice_us(struct gk20a *g);
444
445#ifdef CONFIG_TEGRA_GK20A_NVHOST
446void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
447 struct priv_cmd_entry *cmd, u32 off,
448 u32 id, u32 thresh, u64 gpu_va);
449u32 gk20a_fifo_get_syncpt_wait_cmd_size(void);
450u32 gk20a_fifo_get_syncpt_incr_per_release(void);
451void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
452 bool wfi_cmd, struct priv_cmd_entry *cmd,
453 u32 id, u64 gpu_va);
454u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd);
455void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
456 struct nvgpu_mem *syncpt_buf);
457int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
458 u32 syncpt_id, struct nvgpu_mem *syncpt_buf);
459#endif
460
461void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
462 struct mmu_fault_info *mmfault);
463void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault);
464void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault);
465void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault);
466u32 gk20a_fifo_get_sema_wait_cmd_size(void);
467u32 gk20a_fifo_get_sema_incr_cmd_size(void);
468void gk20a_fifo_add_sema_cmd(struct gk20a *g,
469 struct nvgpu_semaphore *s, u64 sema_va,
470 struct priv_cmd_entry *cmd,
471 u32 off, bool acquire, bool wfi);
472#endif /* FIFO_GK20A_H */
diff --git a/include/gk20a/flcn_gk20a.c b/include/gk20a/flcn_gk20a.c
deleted file mode 100644
index fdcaef9..0000000
--- a/include/gk20a/flcn_gk20a.c
+++ /dev/null
@@ -1,759 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#include <nvgpu/falcon.h>
23#include <nvgpu/pmu.h>
24#include <nvgpu/io.h>
25
26#include "gk20a/gk20a.h"
27#include "gk20a/flcn_gk20a.h"
28
29#include <nvgpu/hw/gm20b/hw_falcon_gm20b.h>
30
31static int gk20a_flcn_reset(struct nvgpu_falcon *flcn)
32{
33 struct gk20a *g = flcn->g;
34 u32 base_addr = flcn->flcn_base;
35 u32 unit_status = 0;
36 int status = 0;
37
38 if (flcn->flcn_engine_dep_ops.reset_eng) {
39 /* falcon & engine reset */
40 status = flcn->flcn_engine_dep_ops.reset_eng(g);
41 } else {
42 /* do falcon CPU hard reset */
43 unit_status = gk20a_readl(g, base_addr +
44 falcon_falcon_cpuctl_r());
45 gk20a_writel(g, base_addr + falcon_falcon_cpuctl_r(),
46 (unit_status | falcon_falcon_cpuctl_hreset_f(1)));
47 }
48
49 return status;
50}
51
52static bool gk20a_flcn_clear_halt_interrupt_status(struct nvgpu_falcon *flcn)
53{
54 struct gk20a *g = flcn->g;
55 u32 base_addr = flcn->flcn_base;
56 u32 data = 0;
57 bool status = false;
58
59 gk20a_writel(g, base_addr + falcon_falcon_irqsclr_r(),
60 gk20a_readl(g, base_addr + falcon_falcon_irqsclr_r()) |
61 (0x10));
62 data = gk20a_readl(g, (base_addr + falcon_falcon_irqstat_r()));
63
64 if ((data & falcon_falcon_irqstat_halt_true_f()) !=
65 falcon_falcon_irqstat_halt_true_f()) {
66 /*halt irq is clear*/
67 status = true;
68 }
69
70 return status;
71}
72
73static void gk20a_flcn_set_irq(struct nvgpu_falcon *flcn, bool enable)
74{
75 struct gk20a *g = flcn->g;
76 u32 base_addr = flcn->flcn_base;
77
78 if (!flcn->is_interrupt_enabled) {
79 nvgpu_warn(g, "Interrupt not supported on flcn 0x%x ",
80 flcn->flcn_id);
81 /* Keep interrupt disabled */
82 enable = false;
83 }
84
85 if (enable) {
86 gk20a_writel(g, base_addr + falcon_falcon_irqmset_r(),
87 flcn->intr_mask);
88 gk20a_writel(g, base_addr + falcon_falcon_irqdest_r(),
89 flcn->intr_dest);
90 } else {
91 gk20a_writel(g, base_addr + falcon_falcon_irqmclr_r(),
92 0xffffffff);
93 }
94}
95
96static bool gk20a_is_falcon_cpu_halted(struct nvgpu_falcon *flcn)
97{
98 struct gk20a *g = flcn->g;
99 u32 base_addr = flcn->flcn_base;
100
101 return (gk20a_readl(g, base_addr + falcon_falcon_cpuctl_r()) &
102 falcon_falcon_cpuctl_halt_intr_m() ?
103 true : false);
104}
105
106static bool gk20a_is_falcon_idle(struct nvgpu_falcon *flcn)
107{
108 struct gk20a *g = flcn->g;
109 u32 base_addr = flcn->flcn_base;
110 u32 unit_status = 0;
111 bool status = false;
112
113 unit_status = gk20a_readl(g,
114 base_addr + falcon_falcon_idlestate_r());
115
116 if (falcon_falcon_idlestate_falcon_busy_v(unit_status) == 0 &&
117 falcon_falcon_idlestate_ext_busy_v(unit_status) == 0) {
118 status = true;
119 } else {
120 status = false;
121 }
122
123 return status;
124}
125
126static bool gk20a_is_falcon_scrubbing_done(struct nvgpu_falcon *flcn)
127{
128 struct gk20a *g = flcn->g;
129 u32 base_addr = flcn->flcn_base;
130 u32 unit_status = 0;
131 bool status = false;
132
133 unit_status = gk20a_readl(g,
134 base_addr + falcon_falcon_dmactl_r());
135
136 if (unit_status & (falcon_falcon_dmactl_dmem_scrubbing_m() |
137 falcon_falcon_dmactl_imem_scrubbing_m())) {
138 status = false;
139 } else {
140 status = true;
141 }
142
143 return status;
144}
145
146static u32 gk20a_falcon_get_mem_size(struct nvgpu_falcon *flcn,
147 enum flcn_mem_type mem_type)
148{
149 struct gk20a *g = flcn->g;
150 u32 mem_size = 0;
151 u32 hw_cfg_reg = gk20a_readl(g,
152 flcn->flcn_base + falcon_falcon_hwcfg_r());
153
154 if (mem_type == MEM_DMEM) {
155 mem_size = falcon_falcon_hwcfg_dmem_size_v(hw_cfg_reg)
156 << GK20A_PMU_DMEM_BLKSIZE2;
157 } else {
158 mem_size = falcon_falcon_hwcfg_imem_size_v(hw_cfg_reg)
159 << GK20A_PMU_DMEM_BLKSIZE2;
160 }
161
162 return mem_size;
163}
164
165static int flcn_mem_overflow_check(struct nvgpu_falcon *flcn,
166 u32 offset, u32 size, enum flcn_mem_type mem_type)
167{
168 struct gk20a *g = flcn->g;
169 u32 mem_size = 0;
170
171 if (size == 0) {
172 nvgpu_err(g, "size is zero");
173 return -EINVAL;
174 }
175
176 if (offset & 0x3) {
177 nvgpu_err(g, "offset (0x%08x) not 4-byte aligned", offset);
178 return -EINVAL;
179 }
180
181 mem_size = gk20a_falcon_get_mem_size(flcn, mem_type);
182 if (!(offset <= mem_size && (offset + size) <= mem_size)) {
183 nvgpu_err(g, "flcn-id 0x%x, copy overflow ",
184 flcn->flcn_id);
185 nvgpu_err(g, "total size 0x%x, offset 0x%x, copy size 0x%x",
186 mem_size, offset, size);
187 return -EINVAL;
188 }
189
190 return 0;
191}
192
193static int gk20a_flcn_copy_from_dmem(struct nvgpu_falcon *flcn,
194 u32 src, u8 *dst, u32 size, u8 port)
195{
196 struct gk20a *g = flcn->g;
197 u32 base_addr = flcn->flcn_base;
198 u32 i, words, bytes;
199 u32 data, addr_mask;
200 u32 *dst_u32 = (u32 *)dst;
201
202 nvgpu_log_fn(g, " src dmem offset - %x, size - %x", src, size);
203
204 if (flcn_mem_overflow_check(flcn, src, size, MEM_DMEM)) {
205 nvgpu_err(g, "incorrect parameters");
206 return -EINVAL;
207 }
208
209 nvgpu_mutex_acquire(&flcn->copy_lock);
210
211 words = size >> 2;
212 bytes = size & 0x3;
213
214 addr_mask = falcon_falcon_dmemc_offs_m() |
215 falcon_falcon_dmemc_blk_m();
216
217 src &= addr_mask;
218
219 gk20a_writel(g, base_addr + falcon_falcon_dmemc_r(port),
220 src | falcon_falcon_dmemc_aincr_f(1));
221
222 for (i = 0; i < words; i++) {
223 dst_u32[i] = gk20a_readl(g,
224 base_addr + falcon_falcon_dmemd_r(port));
225 }
226
227 if (bytes > 0) {
228 data = gk20a_readl(g, base_addr + falcon_falcon_dmemd_r(port));
229 for (i = 0; i < bytes; i++) {
230 dst[(words << 2) + i] = ((u8 *)&data)[i];
231 }
232 }
233
234 nvgpu_mutex_release(&flcn->copy_lock);
235 return 0;
236}
237
238static int gk20a_flcn_copy_to_dmem(struct nvgpu_falcon *flcn,
239 u32 dst, u8 *src, u32 size, u8 port)
240{
241 struct gk20a *g = flcn->g;
242 u32 base_addr = flcn->flcn_base;
243 u32 i, words, bytes;
244 u32 data, addr_mask;
245 u32 *src_u32 = (u32 *)src;
246
247 nvgpu_log_fn(g, "dest dmem offset - %x, size - %x", dst, size);
248
249 if (flcn_mem_overflow_check(flcn, dst, size, MEM_DMEM)) {
250 nvgpu_err(g, "incorrect parameters");
251 return -EINVAL;
252 }
253
254 nvgpu_mutex_acquire(&flcn->copy_lock);
255
256 words = size >> 2;
257 bytes = size & 0x3;
258
259 addr_mask = falcon_falcon_dmemc_offs_m() |
260 falcon_falcon_dmemc_blk_m();
261
262 dst &= addr_mask;
263
264 gk20a_writel(g, base_addr + falcon_falcon_dmemc_r(port),
265 dst | falcon_falcon_dmemc_aincw_f(1));
266
267 for (i = 0; i < words; i++) {
268 gk20a_writel(g,
269 base_addr + falcon_falcon_dmemd_r(port), src_u32[i]);
270 }
271
272 if (bytes > 0) {
273 data = 0;
274 for (i = 0; i < bytes; i++) {
275 ((u8 *)&data)[i] = src[(words << 2) + i];
276 }
277 gk20a_writel(g, base_addr + falcon_falcon_dmemd_r(port), data);
278 }
279
280 size = ALIGN(size, 4);
281 data = gk20a_readl(g,
282 base_addr + falcon_falcon_dmemc_r(port)) & addr_mask;
283 if (data != ((dst + size) & addr_mask)) {
284 nvgpu_warn(g, "copy failed. bytes written %d, expected %d",
285 data - dst, size);
286 }
287
288 nvgpu_mutex_release(&flcn->copy_lock);
289
290 return 0;
291}
292
293static int gk20a_flcn_copy_from_imem(struct nvgpu_falcon *flcn, u32 src,
294 u8 *dst, u32 size, u8 port)
295{
296 struct gk20a *g = flcn->g;
297 u32 base_addr = flcn->flcn_base;
298 u32 *dst_u32 = (u32 *)dst;
299 u32 words = 0;
300 u32 bytes = 0;
301 u32 data = 0;
302 u32 blk = 0;
303 u32 i = 0;
304
305 nvgpu_log_info(g, "download %d bytes from 0x%x", size, src);
306
307 if (flcn_mem_overflow_check(flcn, src, size, MEM_IMEM)) {
308 nvgpu_err(g, "incorrect parameters");
309 return -EINVAL;
310 }
311
312 nvgpu_mutex_acquire(&flcn->copy_lock);
313
314 words = size >> 2;
315 bytes = size & 0x3;
316 blk = src >> 8;
317
318 nvgpu_log_info(g, "download %d words from 0x%x block %d",
319 words, src, blk);
320
321 gk20a_writel(g, base_addr + falcon_falcon_imemc_r(port),
322 falcon_falcon_imemc_offs_f(src >> 2) |
323 falcon_falcon_imemc_blk_f(blk) |
324 falcon_falcon_dmemc_aincr_f(1));
325
326 for (i = 0; i < words; i++) {
327 dst_u32[i] = gk20a_readl(g,
328 base_addr + falcon_falcon_imemd_r(port));
329 }
330
331 if (bytes > 0) {
332 data = gk20a_readl(g, base_addr + falcon_falcon_imemd_r(port));
333 for (i = 0; i < bytes; i++) {
334 dst[(words << 2) + i] = ((u8 *)&data)[i];
335 }
336 }
337
338 nvgpu_mutex_release(&flcn->copy_lock);
339
340 return 0;
341}
342
343static int gk20a_flcn_copy_to_imem(struct nvgpu_falcon *flcn, u32 dst,
344 u8 *src, u32 size, u8 port, bool sec, u32 tag)
345{
346 struct gk20a *g = flcn->g;
347 u32 base_addr = flcn->flcn_base;
348 u32 *src_u32 = (u32 *)src;
349 u32 words = 0;
350 u32 blk = 0;
351 u32 i = 0;
352
353 nvgpu_log_info(g, "upload %d bytes to 0x%x", size, dst);
354
355 if (flcn_mem_overflow_check(flcn, dst, size, MEM_IMEM)) {
356 nvgpu_err(g, "incorrect parameters");
357 return -EINVAL;
358 }
359
360 nvgpu_mutex_acquire(&flcn->copy_lock);
361
362 words = size >> 2;
363 blk = dst >> 8;
364
365 nvgpu_log_info(g, "upload %d words to 0x%x block %d, tag 0x%x",
366 words, dst, blk, tag);
367
368 gk20a_writel(g, base_addr + falcon_falcon_imemc_r(port),
369 falcon_falcon_imemc_offs_f(dst >> 2) |
370 falcon_falcon_imemc_blk_f(blk) |
371 /* Set Auto-Increment on write */
372 falcon_falcon_imemc_aincw_f(1) |
373 falcon_falcon_imemc_secure_f(sec ? 1U : 0U));
374
375 for (i = 0; i < words; i++) {
376 if (i % 64 == 0) {
377 /* tag is always 256B aligned */
378 gk20a_writel(g, base_addr + falcon_falcon_imemt_r(0),
379 tag);
380 tag++;
381 }
382
383 gk20a_writel(g, base_addr + falcon_falcon_imemd_r(port),
384 src_u32[i]);
385 }
386
387 /* WARNING : setting remaining bytes in block to 0x0 */
388 while (i % 64) {
389 gk20a_writel(g, base_addr + falcon_falcon_imemd_r(port), 0);
390 i++;
391 }
392
393 nvgpu_mutex_release(&flcn->copy_lock);
394
395 return 0;
396}
397
398static int gk20a_falcon_bootstrap(struct nvgpu_falcon *flcn,
399 u32 boot_vector)
400{
401 struct gk20a *g = flcn->g;
402 u32 base_addr = flcn->flcn_base;
403
404 nvgpu_log_info(g, "boot vec 0x%x", boot_vector);
405
406 gk20a_writel(g, base_addr + falcon_falcon_dmactl_r(),
407 falcon_falcon_dmactl_require_ctx_f(0));
408
409 gk20a_writel(g, base_addr + falcon_falcon_bootvec_r(),
410 falcon_falcon_bootvec_vec_f(boot_vector));
411
412 gk20a_writel(g, base_addr + falcon_falcon_cpuctl_r(),
413 falcon_falcon_cpuctl_startcpu_f(1));
414
415 return 0;
416}
417
418static u32 gk20a_falcon_mailbox_read(struct nvgpu_falcon *flcn,
419 u32 mailbox_index)
420{
421 struct gk20a *g = flcn->g;
422 u32 data = 0;
423
424 if (mailbox_index < FALCON_MAILBOX_COUNT) {
425 data = gk20a_readl(g, flcn->flcn_base + (mailbox_index ?
426 falcon_falcon_mailbox1_r() :
427 falcon_falcon_mailbox0_r()));
428 } else {
429 nvgpu_err(g, "incorrect mailbox id %d", mailbox_index);
430 }
431
432 return data;
433}
434
435static void gk20a_falcon_mailbox_write(struct nvgpu_falcon *flcn,
436 u32 mailbox_index, u32 data)
437{
438 struct gk20a *g = flcn->g;
439
440 if (mailbox_index < FALCON_MAILBOX_COUNT) {
441 gk20a_writel(g, flcn->flcn_base + (mailbox_index ?
442 falcon_falcon_mailbox1_r() :
443 falcon_falcon_mailbox0_r()),
444 data);
445 } else {
446 nvgpu_err(g, "incorrect mailbox id %d", mailbox_index);
447 }
448}
449
450static int gk20a_falcon_bl_bootstrap(struct nvgpu_falcon *flcn,
451 struct nvgpu_falcon_bl_info *bl_info)
452{
453 struct gk20a *g = flcn->g;
454 u32 base_addr = flcn->flcn_base;
455 u32 virt_addr = 0;
456 u32 dst = 0;
457 int err = 0;
458
459 /*copy bootloader interface structure to dmem*/
460 err = gk20a_flcn_copy_to_dmem(flcn, 0, (u8 *)bl_info->bl_desc,
461 bl_info->bl_desc_size, (u8)0);
462 if (err != 0) {
463 goto exit;
464 }
465
466 /* copy bootloader to TOP of IMEM */
467 dst = (falcon_falcon_hwcfg_imem_size_v(gk20a_readl(g,
468 base_addr + falcon_falcon_hwcfg_r())) << 8) - bl_info->bl_size;
469
470 err = gk20a_flcn_copy_to_imem(flcn, dst, (u8 *)(bl_info->bl_src),
471 bl_info->bl_size, (u8)0, false, bl_info->bl_start_tag);
472 if (err != 0) {
473 goto exit;
474 }
475
476 gk20a_falcon_mailbox_write(flcn, FALCON_MAILBOX_0, 0xDEADA5A5U);
477
478 virt_addr = bl_info->bl_start_tag << 8;
479
480 err = gk20a_falcon_bootstrap(flcn, virt_addr);
481
482exit:
483 if (err != 0) {
484 nvgpu_err(g, "falcon id-0x%x bootstrap failed", flcn->flcn_id);
485 }
486
487 return err;
488}
489
490static void gk20a_falcon_dump_imblk(struct nvgpu_falcon *flcn)
491{
492 struct gk20a *g = flcn->g;
493 u32 base_addr = flcn->flcn_base;
494 u32 i = 0, j = 0;
495 u32 data[8] = {0};
496 u32 block_count = 0;
497
498 block_count = falcon_falcon_hwcfg_imem_size_v(gk20a_readl(g,
499 flcn->flcn_base + falcon_falcon_hwcfg_r()));
500
501 /* block_count must be multiple of 8 */
502 block_count &= ~0x7;
503 nvgpu_err(g, "FALCON IMEM BLK MAPPING (PA->VA) (%d TOTAL):",
504 block_count);
505
506 for (i = 0; i < block_count; i += 8) {
507 for (j = 0; j < 8; j++) {
508 gk20a_writel(g, flcn->flcn_base +
509 falcon_falcon_imctl_debug_r(),
510 falcon_falcon_imctl_debug_cmd_f(0x2) |
511 falcon_falcon_imctl_debug_addr_blk_f(i + j));
512
513 data[j] = gk20a_readl(g, base_addr +
514 falcon_falcon_imstat_r());
515 }
516
517 nvgpu_err(g, " %#04x: %#010x %#010x %#010x %#010x",
518 i, data[0], data[1], data[2], data[3]);
519 nvgpu_err(g, " %#04x: %#010x %#010x %#010x %#010x",
520 i + 4, data[4], data[5], data[6], data[7]);
521 }
522}
523
524static void gk20a_falcon_dump_pc_trace(struct nvgpu_falcon *flcn)
525{
526 struct gk20a *g = flcn->g;
527 u32 base_addr = flcn->flcn_base;
528 u32 trace_pc_count = 0;
529 u32 pc = 0;
530 u32 i = 0;
531
532 if (gk20a_readl(g, base_addr + falcon_falcon_sctl_r()) & 0x02) {
533 nvgpu_err(g, " falcon is in HS mode, PC TRACE dump not supported");
534 return;
535 }
536
537 trace_pc_count = falcon_falcon_traceidx_maxidx_v(gk20a_readl(g,
538 base_addr + falcon_falcon_traceidx_r()));
539 nvgpu_err(g,
540 "PC TRACE (TOTAL %d ENTRIES. entry 0 is the most recent branch):",
541 trace_pc_count);
542
543 for (i = 0; i < trace_pc_count; i++) {
544 gk20a_writel(g, base_addr + falcon_falcon_traceidx_r(),
545 falcon_falcon_traceidx_idx_f(i));
546
547 pc = falcon_falcon_tracepc_pc_v(gk20a_readl(g,
548 base_addr + falcon_falcon_tracepc_r()));
549 nvgpu_err(g, "FALCON_TRACEPC(%d) : %#010x", i, pc);
550 }
551}
552
553void gk20a_falcon_dump_stats(struct nvgpu_falcon *flcn)
554{
555 struct gk20a *g = flcn->g;
556 u32 base_addr = flcn->flcn_base;
557 unsigned int i;
558
559 nvgpu_err(g, "<<< FALCON id-%d DEBUG INFORMATION - START >>>",
560 flcn->flcn_id);
561
562 /* imblk dump */
563 gk20a_falcon_dump_imblk(flcn);
564 /* PC trace dump */
565 gk20a_falcon_dump_pc_trace(flcn);
566
567 nvgpu_err(g, "FALCON ICD REGISTERS DUMP");
568
569 for (i = 0; i < 4; i++) {
570 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
571 falcon_falcon_icd_cmd_opc_rreg_f() |
572 falcon_falcon_icd_cmd_idx_f(FALCON_REG_PC));
573 nvgpu_err(g, "FALCON_REG_PC : 0x%x",
574 gk20a_readl(g, base_addr +
575 falcon_falcon_icd_rdata_r()));
576
577 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
578 falcon_falcon_icd_cmd_opc_rreg_f() |
579 falcon_falcon_icd_cmd_idx_f(FALCON_REG_SP));
580 nvgpu_err(g, "FALCON_REG_SP : 0x%x",
581 gk20a_readl(g, base_addr +
582 falcon_falcon_icd_rdata_r()));
583 }
584
585 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
586 falcon_falcon_icd_cmd_opc_rreg_f() |
587 falcon_falcon_icd_cmd_idx_f(FALCON_REG_IMB));
588 nvgpu_err(g, "FALCON_REG_IMB : 0x%x",
589 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
590
591 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
592 falcon_falcon_icd_cmd_opc_rreg_f() |
593 falcon_falcon_icd_cmd_idx_f(FALCON_REG_DMB));
594 nvgpu_err(g, "FALCON_REG_DMB : 0x%x",
595 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
596
597 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
598 falcon_falcon_icd_cmd_opc_rreg_f() |
599 falcon_falcon_icd_cmd_idx_f(FALCON_REG_CSW));
600 nvgpu_err(g, "FALCON_REG_CSW : 0x%x",
601 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
602
603 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
604 falcon_falcon_icd_cmd_opc_rreg_f() |
605 falcon_falcon_icd_cmd_idx_f(FALCON_REG_CTX));
606 nvgpu_err(g, "FALCON_REG_CTX : 0x%x",
607 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
608
609 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
610 falcon_falcon_icd_cmd_opc_rreg_f() |
611 falcon_falcon_icd_cmd_idx_f(FALCON_REG_EXCI));
612 nvgpu_err(g, "FALCON_REG_EXCI : 0x%x",
613 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
614
615 for (i = 0; i < 6; i++) {
616 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
617 falcon_falcon_icd_cmd_opc_rreg_f() |
618 falcon_falcon_icd_cmd_idx_f(
619 falcon_falcon_icd_cmd_opc_rstat_f()));
620 nvgpu_err(g, "FALCON_REG_RSTAT[%d] : 0x%x", i,
621 gk20a_readl(g, base_addr +
622 falcon_falcon_icd_rdata_r()));
623 }
624
625 nvgpu_err(g, " FALCON REGISTERS DUMP");
626 nvgpu_err(g, "falcon_falcon_os_r : %d",
627 gk20a_readl(g, base_addr + falcon_falcon_os_r()));
628 nvgpu_err(g, "falcon_falcon_cpuctl_r : 0x%x",
629 gk20a_readl(g, base_addr + falcon_falcon_cpuctl_r()));
630 nvgpu_err(g, "falcon_falcon_idlestate_r : 0x%x",
631 gk20a_readl(g, base_addr + falcon_falcon_idlestate_r()));
632 nvgpu_err(g, "falcon_falcon_mailbox0_r : 0x%x",
633 gk20a_readl(g, base_addr + falcon_falcon_mailbox0_r()));
634 nvgpu_err(g, "falcon_falcon_mailbox1_r : 0x%x",
635 gk20a_readl(g, base_addr + falcon_falcon_mailbox1_r()));
636 nvgpu_err(g, "falcon_falcon_irqstat_r : 0x%x",
637 gk20a_readl(g, base_addr + falcon_falcon_irqstat_r()));
638 nvgpu_err(g, "falcon_falcon_irqmode_r : 0x%x",
639 gk20a_readl(g, base_addr + falcon_falcon_irqmode_r()));
640 nvgpu_err(g, "falcon_falcon_irqmask_r : 0x%x",
641 gk20a_readl(g, base_addr + falcon_falcon_irqmask_r()));
642 nvgpu_err(g, "falcon_falcon_irqdest_r : 0x%x",
643 gk20a_readl(g, base_addr + falcon_falcon_irqdest_r()));
644 nvgpu_err(g, "falcon_falcon_debug1_r : 0x%x",
645 gk20a_readl(g, base_addr + falcon_falcon_debug1_r()));
646 nvgpu_err(g, "falcon_falcon_debuginfo_r : 0x%x",
647 gk20a_readl(g, base_addr + falcon_falcon_debuginfo_r()));
648 nvgpu_err(g, "falcon_falcon_bootvec_r : 0x%x",
649 gk20a_readl(g, base_addr + falcon_falcon_bootvec_r()));
650 nvgpu_err(g, "falcon_falcon_hwcfg_r : 0x%x",
651 gk20a_readl(g, base_addr + falcon_falcon_hwcfg_r()));
652 nvgpu_err(g, "falcon_falcon_engctl_r : 0x%x",
653 gk20a_readl(g, base_addr + falcon_falcon_engctl_r()));
654 nvgpu_err(g, "falcon_falcon_curctx_r : 0x%x",
655 gk20a_readl(g, base_addr + falcon_falcon_curctx_r()));
656 nvgpu_err(g, "falcon_falcon_nxtctx_r : 0x%x",
657 gk20a_readl(g, base_addr + falcon_falcon_nxtctx_r()));
658 nvgpu_err(g, "falcon_falcon_exterrstat_r : 0x%x",
659 gk20a_readl(g, base_addr + falcon_falcon_exterrstat_r()));
660 nvgpu_err(g, "falcon_falcon_exterraddr_r : 0x%x",
661 gk20a_readl(g, base_addr + falcon_falcon_exterraddr_r()));
662}
663
664static void gk20a_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn)
665{
666 struct gk20a *g = flcn->g;
667 struct nvgpu_falcon_engine_dependency_ops *flcn_eng_dep_ops =
668 &flcn->flcn_engine_dep_ops;
669
670 switch (flcn->flcn_id) {
671 case FALCON_ID_PMU:
672 flcn_eng_dep_ops->reset_eng = nvgpu_pmu_reset;
673 flcn_eng_dep_ops->queue_head = g->ops.pmu.pmu_queue_head;
674 flcn_eng_dep_ops->queue_tail = g->ops.pmu.pmu_queue_tail;
675 break;
676 default:
677 /* NULL assignment make sure
678 * CPU hard reset in gk20a_flcn_reset() gets execute
679 * if falcon doesn't need specific reset implementation
680 */
681 flcn_eng_dep_ops->reset_eng = NULL;
682 break;
683 }
684}
685
686void gk20a_falcon_ops(struct nvgpu_falcon *flcn)
687{
688 struct nvgpu_falcon_ops *flcn_ops = &flcn->flcn_ops;
689
690 flcn_ops->reset = gk20a_flcn_reset;
691 flcn_ops->set_irq = gk20a_flcn_set_irq;
692 flcn_ops->clear_halt_interrupt_status =
693 gk20a_flcn_clear_halt_interrupt_status;
694 flcn_ops->is_falcon_cpu_halted = gk20a_is_falcon_cpu_halted;
695 flcn_ops->is_falcon_idle = gk20a_is_falcon_idle;
696 flcn_ops->is_falcon_scrubbing_done = gk20a_is_falcon_scrubbing_done;
697 flcn_ops->copy_from_dmem = gk20a_flcn_copy_from_dmem;
698 flcn_ops->copy_to_dmem = gk20a_flcn_copy_to_dmem;
699 flcn_ops->copy_to_imem = gk20a_flcn_copy_to_imem;
700 flcn_ops->copy_from_imem = gk20a_flcn_copy_from_imem;
701 flcn_ops->bootstrap = gk20a_falcon_bootstrap;
702 flcn_ops->dump_falcon_stats = gk20a_falcon_dump_stats;
703 flcn_ops->mailbox_read = gk20a_falcon_mailbox_read;
704 flcn_ops->mailbox_write = gk20a_falcon_mailbox_write;
705 flcn_ops->bl_bootstrap = gk20a_falcon_bl_bootstrap;
706
707 gk20a_falcon_engine_dependency_ops(flcn);
708}
709
710int gk20a_falcon_hal_sw_init(struct nvgpu_falcon *flcn)
711{
712 struct gk20a *g = flcn->g;
713 int err = 0;
714
715 switch (flcn->flcn_id) {
716 case FALCON_ID_PMU:
717 flcn->flcn_base = FALCON_PWR_BASE;
718 flcn->is_falcon_supported = true;
719 flcn->is_interrupt_enabled = true;
720 break;
721 case FALCON_ID_SEC2:
722 flcn->flcn_base = FALCON_SEC_BASE;
723 flcn->is_falcon_supported = false;
724 flcn->is_interrupt_enabled = false;
725 break;
726 case FALCON_ID_FECS:
727 flcn->flcn_base = FALCON_FECS_BASE;
728 flcn->is_falcon_supported = true;
729 flcn->is_interrupt_enabled = false;
730 break;
731 case FALCON_ID_GPCCS:
732 flcn->flcn_base = FALCON_GPCCS_BASE;
733 flcn->is_falcon_supported = true;
734 flcn->is_interrupt_enabled = false;
735 break;
736 case FALCON_ID_NVDEC:
737 flcn->flcn_base = FALCON_NVDEC_BASE;
738 flcn->is_falcon_supported = false;
739 flcn->is_interrupt_enabled = false;
740 break;
741 default:
742 flcn->is_falcon_supported = false;
743 break;
744 }
745
746 if (flcn->is_falcon_supported) {
747 err = nvgpu_mutex_init(&flcn->copy_lock);
748 if (err != 0) {
749 nvgpu_err(g, "Error in flcn.copy_lock mutex initialization");
750 } else {
751 gk20a_falcon_ops(flcn);
752 }
753 } else {
754 nvgpu_log_info(g, "falcon 0x%x not supported on %s",
755 flcn->flcn_id, g->name);
756 }
757
758 return err;
759}
diff --git a/include/gk20a/flcn_gk20a.h b/include/gk20a/flcn_gk20a.h
deleted file mode 100644
index 9d27b38..0000000
--- a/include/gk20a/flcn_gk20a.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#ifndef NVGPU_GK20A_FLCN_GK20A_H
23#define NVGPU_GK20A_FLCN_GK20A_H
24
25void gk20a_falcon_ops(struct nvgpu_falcon *flcn);
26int gk20a_falcon_hal_sw_init(struct nvgpu_falcon *flcn);
27void gk20a_falcon_dump_stats(struct nvgpu_falcon *flcn);
28
29#endif /* NVGPU_GK20A_FLCN_GK20A_H */
diff --git a/include/gk20a/gk20a.c b/include/gk20a/gk20a.c
deleted file mode 100644
index 1a11716..0000000
--- a/include/gk20a/gk20a.c
+++ /dev/null
@@ -1,595 +0,0 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/allocator.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/soc.h>
30#include <nvgpu/enabled.h>
31#include <nvgpu/pmu.h>
32#include <nvgpu/gmmu.h>
33#include <nvgpu/ltc.h>
34#include <nvgpu/vidmem.h>
35#include <nvgpu/mm.h>
36#include <nvgpu/ctxsw_trace.h>
37#include <nvgpu/soc.h>
38#include <nvgpu/clk_arb.h>
39#include <nvgpu/therm.h>
40#include <nvgpu/mc.h>
41#include <nvgpu/channel_sync.h>
42#include <nvgpu/nvgpu_err.h>
43
44#include <trace/events/gk20a.h>
45
46#include "gk20a.h"
47
48#include "dbg_gpu_gk20a.h"
49#include "pstate/pstate.h"
50
51void __nvgpu_check_gpu_state(struct gk20a *g)
52{
53 u32 boot_0 = 0xffffffff;
54
55 boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL);
56 if (boot_0 == 0xffffffff) {
57 nvgpu_err(g, "GPU has disappeared from bus!!");
58 nvgpu_err(g, "Rebooting system!!");
59 nvgpu_kernel_restart(NULL);
60 }
61}
62
63void __gk20a_warn_on_no_regs(void)
64{
65 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
66}
67
68static void gk20a_mask_interrupts(struct gk20a *g)
69{
70 if (g->ops.mc.intr_mask != NULL) {
71 g->ops.mc.intr_mask(g);
72 }
73
74 if (g->ops.mc.log_pending_intrs != NULL) {
75 g->ops.mc.log_pending_intrs(g);
76 }
77}
78
79int gk20a_prepare_poweroff(struct gk20a *g)
80{
81 int ret = 0;
82
83 nvgpu_log_fn(g, " ");
84
85 if (g->ops.fifo.channel_suspend) {
86 ret = g->ops.fifo.channel_suspend(g);
87 if (ret) {
88 return ret;
89 }
90 }
91
92 /* disable elpg before gr or fifo suspend */
93 if (g->ops.pmu.is_pmu_supported(g)) {
94 ret |= nvgpu_pmu_destroy(g);
95 }
96
97 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
98 ret |= nvgpu_sec2_destroy(g);
99 }
100
101 ret |= gk20a_gr_suspend(g);
102 ret |= nvgpu_mm_suspend(g);
103 ret |= gk20a_fifo_suspend(g);
104
105 gk20a_ce_suspend(g);
106
107 /* Disable GPCPLL */
108 if (g->ops.clk.suspend_clk_support) {
109 ret |= g->ops.clk.suspend_clk_support(g);
110 }
111
112 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
113 gk20a_deinit_pstate_support(g);
114 }
115
116 gk20a_mask_interrupts(g);
117
118 g->power_on = false;
119
120 return ret;
121}
122
123int gk20a_finalize_poweron(struct gk20a *g)
124{
125 int err = 0;
126#if defined(CONFIG_TEGRA_GK20A_NVHOST)
127 u32 nr_pages;
128#endif
129
130 u32 fuse_status;
131
132 nvgpu_log_fn(g, " ");
133
134 if (g->power_on) {
135 return 0;
136 }
137
138 g->power_on = true;
139
140 /*
141 * Before probing the GPU make sure the GPU's state is cleared. This is
142 * relevant for rebind operations.
143 */
144 if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
145 g->ops.xve.reset_gpu(g);
146 g->gpu_reset_done = true;
147 }
148
149 if (g->ops.clock_gating.slcg_acb_load_gating_prod != NULL) {
150 g->ops.clock_gating.slcg_acb_load_gating_prod(g, true);
151 }
152
153 /*
154 * Do this early so any early VMs that get made are capable of mapping
155 * buffers.
156 */
157 err = nvgpu_pd_cache_init(g);
158 if (err) {
159 return err;
160 }
161
162 /* init interface layer support for PMU falcon */
163 err = nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
164 if (err != 0) {
165 nvgpu_err(g, "failed to sw init FALCON_ID_PMU");
166 goto done;
167 }
168 err = nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
169 if (err != 0) {
170 nvgpu_err(g, "failed to sw init FALCON_ID_SEC2");
171 goto done;
172 }
173 err = nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
174 if (err != 0) {
175 nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC");
176 goto done;
177 }
178 err = nvgpu_flcn_sw_init(g, FALCON_ID_GSPLITE);
179 if (err != 0) {
180 nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE");
181 goto done;
182 }
183
184 if (g->ops.acr.acr_sw_init != NULL &&
185 nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
186 g->ops.acr.acr_sw_init(g, &g->acr);
187 }
188
189 if (g->ops.bios.init) {
190 err = g->ops.bios.init(g);
191 }
192 if (err) {
193 goto done;
194 }
195
196 g->ops.bus.init_hw(g);
197
198 if (g->ops.clk.disable_slowboot) {
199 g->ops.clk.disable_slowboot(g);
200 }
201
202 g->ops.priv_ring.enable_priv_ring(g);
203
204 /* TBD: move this after graphics init in which blcg/slcg is enabled.
205 This function removes SlowdownOnBoot which applies 32x divider
206 on gpcpll bypass path. The purpose of slowdown is to save power
207 during boot but it also significantly slows down gk20a init on
208 simulation and emulation. We should remove SOB after graphics power
209 saving features (blcg/slcg) are enabled. For now, do it here. */
210 if (g->ops.clk.init_clk_support) {
211 err = g->ops.clk.init_clk_support(g);
212 if (err) {
213 nvgpu_err(g, "failed to init gk20a clk");
214 goto done;
215 }
216 }
217
218 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
219 err = g->ops.nvlink.init(g);
220 if (err) {
221 nvgpu_err(g, "failed to init nvlink");
222 goto done;
223 }
224 }
225
226 if (g->ops.fb.init_fbpa) {
227 err = g->ops.fb.init_fbpa(g);
228 if (err) {
229 nvgpu_err(g, "failed to init fbpa");
230 goto done;
231 }
232 }
233
234 if (g->ops.fb.mem_unlock) {
235 err = g->ops.fb.mem_unlock(g);
236 if (err) {
237 nvgpu_err(g, "failed to unlock memory");
238 goto done;
239 }
240 }
241
242 err = g->ops.fifo.reset_enable_hw(g);
243
244 if (err) {
245 nvgpu_err(g, "failed to reset gk20a fifo");
246 goto done;
247 }
248
249 err = nvgpu_init_ltc_support(g);
250 if (err) {
251 nvgpu_err(g, "failed to init ltc");
252 goto done;
253 }
254
255 err = nvgpu_init_mm_support(g);
256 if (err) {
257 nvgpu_err(g, "failed to init gk20a mm");
258 goto done;
259 }
260
261 err = gk20a_init_fifo_support(g);
262 if (err) {
263 nvgpu_err(g, "failed to init gk20a fifo");
264 goto done;
265 }
266
267 if (g->ops.therm.elcg_init_idle_filters) {
268 g->ops.therm.elcg_init_idle_filters(g);
269 }
270
271 g->ops.mc.intr_enable(g);
272
273 /*
274 * Power gate the chip as per the TPC PG mask
275 * and the fuse_status register.
276 * If TPC PG mask is invalid halt the GPU poweron.
277 */
278 g->can_tpc_powergate = false;
279 fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0);
280
281 if (g->ops.tpc.tpc_powergate) {
282 err = g->ops.tpc.tpc_powergate(g, fuse_status);
283 }
284
285 if (err) {
286 nvgpu_err(g, "failed to power ON GPU");
287 goto done;
288 }
289
290 nvgpu_mutex_acquire(&g->tpc_pg_lock);
291
292 if (g->can_tpc_powergate) {
293 if (g->ops.gr.powergate_tpc != NULL)
294 g->ops.gr.powergate_tpc(g);
295 }
296
297 err = gk20a_enable_gr_hw(g);
298 if (err) {
299 nvgpu_err(g, "failed to enable gr");
300 nvgpu_mutex_release(&g->tpc_pg_lock);
301 goto done;
302 }
303
304 if (g->ops.pmu.is_pmu_supported(g)) {
305 if (g->ops.pmu.prepare_ucode) {
306 err = g->ops.pmu.prepare_ucode(g);
307 }
308 if (err) {
309 nvgpu_err(g, "failed to init pmu ucode");
310 nvgpu_mutex_release(&g->tpc_pg_lock);
311 goto done;
312 }
313 }
314
315 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
316 err = gk20a_init_pstate_support(g);
317 if (err) {
318 nvgpu_err(g, "failed to init pstates");
319 nvgpu_mutex_release(&g->tpc_pg_lock);
320 goto done;
321 }
322 }
323
324 if (g->acr.bootstrap_hs_acr != NULL &&
325 nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
326 err = g->acr.bootstrap_hs_acr(g, &g->acr, &g->acr.acr);
327 if (err != 0) {
328 nvgpu_err(g, "ACR bootstrap failed");
329 nvgpu_mutex_release(&g->tpc_pg_lock);
330 goto done;
331 }
332 }
333
334 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
335 err = nvgpu_init_sec2_support(g);
336 if (err != 0) {
337 nvgpu_err(g, "failed to init sec2");
338 nvgpu_mutex_release(&g->tpc_pg_lock);
339 goto done;
340 }
341 }
342
343 if (g->ops.pmu.is_pmu_supported(g)) {
344 err = nvgpu_init_pmu_support(g);
345 if (err) {
346 nvgpu_err(g, "failed to init gk20a pmu");
347 nvgpu_mutex_release(&g->tpc_pg_lock);
348 goto done;
349 }
350 }
351
352 err = gk20a_init_gr_support(g);
353 if (err) {
354 nvgpu_err(g, "failed to init gk20a gr");
355 nvgpu_mutex_release(&g->tpc_pg_lock);
356 goto done;
357 }
358
359 nvgpu_mutex_release(&g->tpc_pg_lock);
360
361 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
362 err = gk20a_init_pstate_pmu_support(g);
363 if (err) {
364 nvgpu_err(g, "failed to init pstates");
365 goto done;
366 }
367 }
368
369 if (g->ops.pmu_ver.clk.clk_set_boot_clk && nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
370 g->ops.pmu_ver.clk.clk_set_boot_clk(g);
371 } else {
372 err = nvgpu_clk_arb_init_arbiter(g);
373 if (err) {
374 nvgpu_err(g, "failed to init clk arb");
375 goto done;
376 }
377 }
378
379 err = nvgpu_init_therm_support(g);
380 if (err) {
381 nvgpu_err(g, "failed to init gk20a therm");
382 goto done;
383 }
384
385 err = g->ops.chip_init_gpu_characteristics(g);
386 if (err) {
387 nvgpu_err(g, "failed to init gk20a gpu characteristics");
388 goto done;
389 }
390
391#ifdef CONFIG_GK20A_CTXSW_TRACE
392 err = gk20a_ctxsw_trace_init(g);
393 if (err)
394 nvgpu_warn(g, "could not initialize ctxsw tracing");
395#endif
396
397 /* Restore the debug setting */
398 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
399
400 gk20a_init_ce_support(g);
401
402 if (g->ops.xve.available_speeds) {
403 u32 speed;
404
405 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) {
406 g->ops.xve.disable_aspm(g);
407 }
408
409 g->ops.xve.available_speeds(g, &speed);
410
411 /* Set to max speed */
412 speed = 1 << (fls(speed) - 1);
413 err = g->ops.xve.set_speed(g, speed);
414 if (err) {
415 nvgpu_err(g, "Failed to set PCIe bus speed!");
416 goto done;
417 }
418 }
419
420#if defined(CONFIG_TEGRA_GK20A_NVHOST)
421 if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) {
422 if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
423 nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
424 __nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
425 g->syncpt_unit_base, nr_pages);
426 }
427 }
428#endif
429
430 if (g->ops.fifo.channel_resume) {
431 g->ops.fifo.channel_resume(g);
432 }
433
434done:
435 if (err) {
436 g->power_on = false;
437 }
438
439 return err;
440}
441
442int gk20a_wait_for_idle(struct gk20a *g)
443{
444 int wait_length = 150; /* 3 second overall max wait. */
445 int target_usage_count = 0;
446
447 if (!g) {
448 return -ENODEV;
449 }
450
451 while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
452 && (wait_length-- >= 0)) {
453 nvgpu_msleep(20);
454 }
455
456 if (wait_length < 0) {
457 nvgpu_warn(g, "Timed out waiting for idle (%d)!\n",
458 nvgpu_atomic_read(&g->usage_count));
459 return -ETIMEDOUT;
460 }
461
462 return 0;
463}
464
465int gk20a_init_gpu_characteristics(struct gk20a *g)
466{
467 __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
468 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
469 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
470
471 if (IS_ENABLED(CONFIG_SYNC)) {
472 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
473 }
474
475 if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) {
476 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
477 }
478
479 /*
480 * Fast submits are supported as long as the user doesn't request
481 * anything that depends on job tracking. (Here, fast means strictly no
482 * metadata, just the gpfifo contents are copied and gp_put updated).
483 */
484 __nvgpu_set_enabled(g,
485 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
486 true);
487
488 /*
489 * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
490 * and other heavy stuff, which prevents deterministic submits. This is
491 * supported otherwise, provided that the user doesn't request anything
492 * that depends on deferred cleanup.
493 */
494 if (!nvgpu_channel_sync_needs_os_fence_framework(g)) {
495 __nvgpu_set_enabled(g,
496 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
497 true);
498 }
499
500 __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
501
502 __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
503 __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
504
505 if (g->ops.clk_arb.get_arbiter_clk_domains != NULL &&
506 g->ops.clk.support_clk_freq_controller) {
507 __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
508 }
509
510 g->ops.gr.detect_sm_arch(g);
511
512 if (g->ops.gr.init_cyclestats) {
513 g->ops.gr.init_cyclestats(g);
514 }
515
516 g->ops.gr.get_rop_l2_en_mask(g);
517
518 return 0;
519}
520
521/*
522 * Free the gk20a struct.
523 */
524static void gk20a_free_cb(struct nvgpu_ref *refcount)
525{
526 struct gk20a *g = container_of(refcount,
527 struct gk20a, refcount);
528
529#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
530 nvgpu_deinit_ecc_reporting(g);
531#endif
532
533 nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!");
534
535 gk20a_ce_destroy(g);
536
537 if (g->remove_support) {
538 g->remove_support(g);
539 }
540
541 if (g->free) {
542 g->free(g);
543 }
544}
545
546/**
547 * gk20a_get() - Increment ref count on driver
548 *
549 * @g The driver to increment
550 * This will fail if the driver is in the process of being released. In that
551 * case it will return NULL. Otherwise a pointer to the driver passed in will
552 * be returned.
553 */
554struct gk20a * __must_check gk20a_get(struct gk20a *g)
555{
556 int success;
557
558 /*
559 * Handle the possibility we are still freeing the gk20a struct while
560 * gk20a_get() is called. Unlikely but plausible race condition. Ideally
561 * the code will never be in such a situation that this race is
562 * possible.
563 */
564 success = nvgpu_ref_get_unless_zero(&g->refcount);
565
566 nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s",
567 nvgpu_atomic_read(&g->refcount.refcount),
568 success ? "" : "(FAILED)");
569
570 return success ? g : NULL;
571}
572
573/**
574 * gk20a_put() - Decrement ref count on driver
575 *
576 * @g - The driver to decrement
577 *
578 * Decrement the driver ref-count. If neccesary also free the underlying driver
579 * memory
580 */
581void gk20a_put(struct gk20a *g)
582{
583 /*
584 * Note - this is racy, two instances of this could run before the
585 * actual kref_put(0 runs, you could see something like:
586 *
587 * ... PUT: refs currently 2
588 * ... PUT: refs currently 2
589 * ... Freeing GK20A struct!
590 */
591 nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d",
592 nvgpu_atomic_read(&g->refcount.refcount));
593
594 nvgpu_ref_put(&g->refcount, gk20a_free_cb);
595}
diff --git a/include/gk20a/gk20a.h b/include/gk20a/gk20a.h
deleted file mode 100644
index 16a2453..0000000
--- a/include/gk20a/gk20a.h
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * This file is used as a temporary redirection header for <nvgpu/gk20a.h>
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * GK20A Graphics
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27#ifndef GK20A_GK20A_H
28#define GK20A_GK20A_H
29
30/* no new headers should be added here */
31#include <nvgpu/gk20a.h>
32
33#endif
diff --git a/include/gk20a/gr_ctx_gk20a.c b/include/gk20a/gr_ctx_gk20a.c
deleted file mode 100644
index 8b9ac32..0000000
--- a/include/gk20a/gr_ctx_gk20a.c
+++ /dev/null
@@ -1,486 +0,0 @@
1/*
2 * GK20A Graphics Context
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28#include <nvgpu/firmware.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/io.h>
31
32#include "gk20a.h"
33#include "gr_ctx_gk20a.h"
34
35#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
36
37static int gr_gk20a_alloc_load_netlist_u32(struct gk20a *g, u32 *src, u32 len,
38 struct u32_list_gk20a *u32_list)
39{
40 u32_list->count = (len + sizeof(u32) - 1) / sizeof(u32);
41 if (!alloc_u32_list_gk20a(g, u32_list)) {
42 return -ENOMEM;
43 }
44
45 memcpy(u32_list->l, src, len);
46
47 return 0;
48}
49
50static int gr_gk20a_alloc_load_netlist_av(struct gk20a *g, u32 *src, u32 len,
51 struct av_list_gk20a *av_list)
52{
53 av_list->count = len / sizeof(struct av_gk20a);
54 if (!alloc_av_list_gk20a(g, av_list)) {
55 return -ENOMEM;
56 }
57
58 memcpy(av_list->l, src, len);
59
60 return 0;
61}
62
63static int gr_gk20a_alloc_load_netlist_av64(struct gk20a *g, u32 *src, u32 len,
64 struct av64_list_gk20a *av64_list)
65{
66 av64_list->count = len / sizeof(struct av64_gk20a);
67 if (!alloc_av64_list_gk20a(g, av64_list)) {
68 return -ENOMEM;
69 }
70
71 memcpy(av64_list->l, src, len);
72
73 return 0;
74}
75
76static int gr_gk20a_alloc_load_netlist_aiv(struct gk20a *g, u32 *src, u32 len,
77 struct aiv_list_gk20a *aiv_list)
78{
79 aiv_list->count = len / sizeof(struct aiv_gk20a);
80 if (!alloc_aiv_list_gk20a(g, aiv_list)) {
81 return -ENOMEM;
82 }
83
84 memcpy(aiv_list->l, src, len);
85
86 return 0;
87}
88
89static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
90{
91 struct nvgpu_firmware *netlist_fw;
92 struct netlist_image *netlist = NULL;
93 char name[MAX_NETLIST_NAME];
94 u32 i, major_v = ~0, major_v_hw, netlist_num;
95 int net, max, err = -ENOENT;
96
97 nvgpu_log_fn(g, " ");
98
99 if (g->ops.gr_ctx.is_fw_defined()) {
100 net = NETLIST_FINAL;
101 max = 0;
102 major_v_hw = ~0;
103 g->gr.ctx_vars.dynamic = false;
104 } else {
105 net = NETLIST_SLOT_A;
106 max = MAX_NETLIST;
107 major_v_hw = gk20a_readl(g,
108 gr_fecs_ctx_state_store_major_rev_id_r());
109 g->gr.ctx_vars.dynamic = true;
110 }
111
112 for (; net < max; net++) {
113 if (g->ops.gr_ctx.get_netlist_name(g, net, name) != 0) {
114 nvgpu_warn(g, "invalid netlist index %d", net);
115 continue;
116 }
117
118 netlist_fw = nvgpu_request_firmware(g, name, 0);
119 if (!netlist_fw) {
120 nvgpu_warn(g, "failed to load netlist %s", name);
121 continue;
122 }
123
124 netlist = (struct netlist_image *)netlist_fw->data;
125
126 for (i = 0; i < netlist->header.regions; i++) {
127 u32 *src = (u32 *)((u8 *)netlist + netlist->regions[i].data_offset);
128 u32 size = netlist->regions[i].data_size;
129
130 switch (netlist->regions[i].region_id) {
131 case NETLIST_REGIONID_FECS_UCODE_DATA:
132 nvgpu_log_info(g, "NETLIST_REGIONID_FECS_UCODE_DATA");
133 err = gr_gk20a_alloc_load_netlist_u32(g,
134 src, size, &g->gr.ctx_vars.ucode.fecs.data);
135 if (err) {
136 goto clean_up;
137 }
138 break;
139 case NETLIST_REGIONID_FECS_UCODE_INST:
140 nvgpu_log_info(g, "NETLIST_REGIONID_FECS_UCODE_INST");
141 err = gr_gk20a_alloc_load_netlist_u32(g,
142 src, size, &g->gr.ctx_vars.ucode.fecs.inst);
143 if (err) {
144 goto clean_up;
145 }
146 break;
147 case NETLIST_REGIONID_GPCCS_UCODE_DATA:
148 nvgpu_log_info(g, "NETLIST_REGIONID_GPCCS_UCODE_DATA");
149 err = gr_gk20a_alloc_load_netlist_u32(g,
150 src, size, &g->gr.ctx_vars.ucode.gpccs.data);
151 if (err) {
152 goto clean_up;
153 }
154 break;
155 case NETLIST_REGIONID_GPCCS_UCODE_INST:
156 nvgpu_log_info(g, "NETLIST_REGIONID_GPCCS_UCODE_INST");
157 err = gr_gk20a_alloc_load_netlist_u32(g,
158 src, size, &g->gr.ctx_vars.ucode.gpccs.inst);
159 if (err) {
160 goto clean_up;
161 }
162 break;
163 case NETLIST_REGIONID_SW_BUNDLE_INIT:
164 nvgpu_log_info(g, "NETLIST_REGIONID_SW_BUNDLE_INIT");
165 err = gr_gk20a_alloc_load_netlist_av(g,
166 src, size, &g->gr.ctx_vars.sw_bundle_init);
167 if (err) {
168 goto clean_up;
169 }
170 break;
171 case NETLIST_REGIONID_SW_METHOD_INIT:
172 nvgpu_log_info(g, "NETLIST_REGIONID_SW_METHOD_INIT");
173 err = gr_gk20a_alloc_load_netlist_av(g,
174 src, size, &g->gr.ctx_vars.sw_method_init);
175 if (err) {
176 goto clean_up;
177 }
178 break;
179 case NETLIST_REGIONID_SW_CTX_LOAD:
180 nvgpu_log_info(g, "NETLIST_REGIONID_SW_CTX_LOAD");
181 err = gr_gk20a_alloc_load_netlist_aiv(g,
182 src, size, &g->gr.ctx_vars.sw_ctx_load);
183 if (err) {
184 goto clean_up;
185 }
186 break;
187 case NETLIST_REGIONID_SW_NON_CTX_LOAD:
188 nvgpu_log_info(g, "NETLIST_REGIONID_SW_NON_CTX_LOAD");
189 err = gr_gk20a_alloc_load_netlist_av(g,
190 src, size, &g->gr.ctx_vars.sw_non_ctx_load);
191 if (err) {
192 goto clean_up;
193 }
194 break;
195 case NETLIST_REGIONID_SWVEIDBUNDLEINIT:
196 nvgpu_log_info(g,
197 "NETLIST_REGIONID_SW_VEID_BUNDLE_INIT");
198 err = gr_gk20a_alloc_load_netlist_av(g,
199 src, size,
200 &g->gr.ctx_vars.sw_veid_bundle_init);
201 if (err) {
202 goto clean_up;
203 }
204 break;
205 case NETLIST_REGIONID_CTXREG_SYS:
206 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_SYS");
207 err = gr_gk20a_alloc_load_netlist_aiv(g,
208 src, size, &g->gr.ctx_vars.ctxsw_regs.sys);
209 if (err) {
210 goto clean_up;
211 }
212 break;
213 case NETLIST_REGIONID_CTXREG_GPC:
214 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_GPC");
215 err = gr_gk20a_alloc_load_netlist_aiv(g,
216 src, size, &g->gr.ctx_vars.ctxsw_regs.gpc);
217 if (err) {
218 goto clean_up;
219 }
220 break;
221 case NETLIST_REGIONID_CTXREG_TPC:
222 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_TPC");
223 err = gr_gk20a_alloc_load_netlist_aiv(g,
224 src, size, &g->gr.ctx_vars.ctxsw_regs.tpc);
225 if (err) {
226 goto clean_up;
227 }
228 break;
229 case NETLIST_REGIONID_CTXREG_ZCULL_GPC:
230 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_ZCULL_GPC");
231 err = gr_gk20a_alloc_load_netlist_aiv(g,
232 src, size, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
233 if (err) {
234 goto clean_up;
235 }
236 break;
237 case NETLIST_REGIONID_CTXREG_PPC:
238 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PPC");
239 err = gr_gk20a_alloc_load_netlist_aiv(g,
240 src, size, &g->gr.ctx_vars.ctxsw_regs.ppc);
241 if (err) {
242 goto clean_up;
243 }
244 break;
245 case NETLIST_REGIONID_CTXREG_PM_SYS:
246 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_SYS");
247 err = gr_gk20a_alloc_load_netlist_aiv(g,
248 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_sys);
249 if (err) {
250 goto clean_up;
251 }
252 break;
253 case NETLIST_REGIONID_CTXREG_PM_GPC:
254 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_GPC");
255 err = gr_gk20a_alloc_load_netlist_aiv(g,
256 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_gpc);
257 if (err) {
258 goto clean_up;
259 }
260 break;
261 case NETLIST_REGIONID_CTXREG_PM_TPC:
262 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_TPC");
263 err = gr_gk20a_alloc_load_netlist_aiv(g,
264 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_tpc);
265 if (err) {
266 goto clean_up;
267 }
268 break;
269 case NETLIST_REGIONID_BUFFER_SIZE:
270 g->gr.ctx_vars.buffer_size = *src;
271 nvgpu_log_info(g, "NETLIST_REGIONID_BUFFER_SIZE : %d",
272 g->gr.ctx_vars.buffer_size);
273 break;
274 case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX:
275 g->gr.ctx_vars.regs_base_index = *src;
276 nvgpu_log_info(g, "NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %u",
277 g->gr.ctx_vars.regs_base_index);
278 break;
279 case NETLIST_REGIONID_MAJORV:
280 major_v = *src;
281 nvgpu_log_info(g, "NETLIST_REGIONID_MAJORV : %d",
282 major_v);
283 break;
284 case NETLIST_REGIONID_NETLIST_NUM:
285 netlist_num = *src;
286 nvgpu_log_info(g, "NETLIST_REGIONID_NETLIST_NUM : %d",
287 netlist_num);
288 break;
289 case NETLIST_REGIONID_CTXREG_PMPPC:
290 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMPPC");
291 err = gr_gk20a_alloc_load_netlist_aiv(g,
292 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ppc);
293 if (err) {
294 goto clean_up;
295 }
296 break;
297 case NETLIST_REGIONID_NVPERF_CTXREG_SYS:
298 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_CTXREG_SYS");
299 err = gr_gk20a_alloc_load_netlist_aiv(g,
300 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys);
301 if (err) {
302 goto clean_up;
303 }
304 break;
305 case NETLIST_REGIONID_NVPERF_FBP_CTXREGS:
306 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_CTXREGS");
307 err = gr_gk20a_alloc_load_netlist_aiv(g,
308 src, size, &g->gr.ctx_vars.ctxsw_regs.fbp);
309 if (err) {
310 goto clean_up;
311 }
312 break;
313 case NETLIST_REGIONID_NVPERF_CTXREG_GPC:
314 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_CTXREG_GPC");
315 err = gr_gk20a_alloc_load_netlist_aiv(g,
316 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_gpc);
317 if (err) {
318 goto clean_up;
319 }
320 break;
321 case NETLIST_REGIONID_NVPERF_FBP_ROUTER:
322 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_ROUTER");
323 err = gr_gk20a_alloc_load_netlist_aiv(g,
324 src, size, &g->gr.ctx_vars.ctxsw_regs.fbp_router);
325 if (err) {
326 goto clean_up;
327 }
328 break;
329 case NETLIST_REGIONID_NVPERF_GPC_ROUTER:
330 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_GPC_ROUTER");
331 err = gr_gk20a_alloc_load_netlist_aiv(g,
332 src, size, &g->gr.ctx_vars.ctxsw_regs.gpc_router);
333 if (err) {
334 goto clean_up;
335 }
336 break;
337 case NETLIST_REGIONID_CTXREG_PMLTC:
338 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMLTC");
339 err = gr_gk20a_alloc_load_netlist_aiv(g,
340 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ltc);
341 if (err) {
342 goto clean_up;
343 }
344 break;
345 case NETLIST_REGIONID_CTXREG_PMFBPA:
346 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMFBPA");
347 err = gr_gk20a_alloc_load_netlist_aiv(g,
348 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_fbpa);
349 if (err) {
350 goto clean_up;
351 }
352 break;
353 case NETLIST_REGIONID_NVPERF_SYS_ROUTER:
354 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_SYS_ROUTER");
355 err = gr_gk20a_alloc_load_netlist_aiv(g,
356 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router);
357 if (err) {
358 goto clean_up;
359 }
360 break;
361 case NETLIST_REGIONID_NVPERF_PMA:
362 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMA");
363 err = gr_gk20a_alloc_load_netlist_aiv(g,
364 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_pma);
365 if (err) {
366 goto clean_up;
367 }
368 break;
369 case NETLIST_REGIONID_CTXREG_PMROP:
370 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMROP");
371 err = gr_gk20a_alloc_load_netlist_aiv(g,
372 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_rop);
373 if (err) {
374 goto clean_up;
375 }
376 break;
377 case NETLIST_REGIONID_CTXREG_PMUCGPC:
378 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMUCGPC");
379 err = gr_gk20a_alloc_load_netlist_aiv(g,
380 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc);
381 if (err) {
382 goto clean_up;
383 }
384 break;
385 case NETLIST_REGIONID_CTXREG_ETPC:
386 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_ETPC");
387 err = gr_gk20a_alloc_load_netlist_aiv(g,
388 src, size, &g->gr.ctx_vars.ctxsw_regs.etpc);
389 if (err) {
390 goto clean_up;
391 }
392 break;
393 case NETLIST_REGIONID_SW_BUNDLE64_INIT:
394 nvgpu_log_info(g, "NETLIST_REGIONID_SW_BUNDLE64_INIT");
395 err = gr_gk20a_alloc_load_netlist_av64(g,
396 src, size,
397 &g->gr.ctx_vars.sw_bundle64_init);
398 if (err) {
399 goto clean_up;
400 }
401 break;
402 case NETLIST_REGIONID_NVPERF_PMCAU:
403 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMCAU");
404 err = gr_gk20a_alloc_load_netlist_aiv(g,
405 src, size,
406 &g->gr.ctx_vars.ctxsw_regs.pm_cau);
407 if (err) {
408 goto clean_up;
409 }
410 break;
411
412 default:
413 nvgpu_log_info(g, "unrecognized region %d skipped", i);
414 break;
415 }
416 }
417
418 if (net != NETLIST_FINAL && major_v != major_v_hw) {
419 nvgpu_log_info(g, "skip %s: major_v 0x%08x doesn't match hw 0x%08x",
420 name, major_v, major_v_hw);
421 goto clean_up;
422 }
423
424 g->gr.ctx_vars.valid = true;
425 g->gr.netlist = net;
426
427 nvgpu_release_firmware(g, netlist_fw);
428 nvgpu_log_fn(g, "done");
429 goto done;
430
431clean_up:
432 g->gr.ctx_vars.valid = false;
433 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.inst.l);
434 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.data.l);
435 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.inst.l);
436 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.data.l);
437 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle_init.l);
438 nvgpu_kfree(g, g->gr.ctx_vars.sw_method_init.l);
439 nvgpu_kfree(g, g->gr.ctx_vars.sw_ctx_load.l);
440 nvgpu_kfree(g, g->gr.ctx_vars.sw_non_ctx_load.l);
441 nvgpu_kfree(g, g->gr.ctx_vars.sw_veid_bundle_init.l);
442 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.sys.l);
443 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.gpc.l);
444 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.tpc.l);
445 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l);
446 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.ppc.l);
447 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
448 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
449 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
450 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_ppc.l);
451 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_sys.l);
452 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.fbp.l);
453 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_gpc.l);
454 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.fbp_router.l);
455 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.gpc_router.l);
456 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_ltc.l);
457 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
458 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_sys_router.l);
459 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_pma.l);
460 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_rop.l);
461 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_ucgpc.l);
462 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.etpc.l);
463 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle64_init.l);
464 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_cau.l);
465 nvgpu_release_firmware(g, netlist_fw);
466 err = -ENOENT;
467 }
468
469done:
470 if (g->gr.ctx_vars.valid) {
471 nvgpu_log_info(g, "netlist image %s loaded", name);
472 return 0;
473 } else {
474 nvgpu_err(g, "failed to load netlist image!!");
475 return err;
476 }
477}
478
479int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr)
480{
481 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
482 return gr_gk20a_init_ctx_vars_sim(g, gr);
483 } else {
484 return gr_gk20a_init_ctx_vars_fw(g, gr);
485 }
486}
diff --git a/include/gk20a/gr_ctx_gk20a.h b/include/gk20a/gr_ctx_gk20a.h
deleted file mode 100644
index e75472c..0000000
--- a/include/gk20a/gr_ctx_gk20a.h
+++ /dev/null
@@ -1,206 +0,0 @@
1/*
2 * GK20A Graphics Context
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef NVGPU_GK20A_GR_CTX_GK20A_H
25#define NVGPU_GK20A_GR_CTX_GK20A_H
26
27#include <nvgpu/kmem.h>
28
29struct gr_gk20a;
30
31/* emulation netlists, match majorV with HW */
32#define GK20A_NETLIST_IMAGE_A "NETA_img.bin"
33#define GK20A_NETLIST_IMAGE_B "NETB_img.bin"
34#define GK20A_NETLIST_IMAGE_C "NETC_img.bin"
35#define GK20A_NETLIST_IMAGE_D "NETD_img.bin"
36
37/*
38 * Need to support multiple ARCH in same GPU family
39 * then need to provide path like ARCH/NETIMAGE to
40 * point to correct netimage within GPU family,
41 * Example, gm20x can support gm204 or gm206,so path
42 * for netimage is gm204/NETC_img.bin, and '/' char
43 * will inserted at null terminator char of "GAxxx"
44 * to get complete path like gm204/NETC_img.bin
45 */
46#define GPU_ARCH "GAxxx"
47
48union __max_name {
49#ifdef GK20A_NETLIST_IMAGE_A
50 char __name_a[sizeof(GK20A_NETLIST_IMAGE_A)];
51#endif
52#ifdef GK20A_NETLIST_IMAGE_B
53 char __name_b[sizeof(GK20A_NETLIST_IMAGE_B)];
54#endif
55#ifdef GK20A_NETLIST_IMAGE_C
56 char __name_c[sizeof(GK20A_NETLIST_IMAGE_C)];
57#endif
58#ifdef GK20A_NETLIST_IMAGE_D
59 char __name_d[sizeof(GK20A_NETLIST_IMAGE_D)];
60#endif
61};
62
63#define MAX_NETLIST_NAME (sizeof(GPU_ARCH) + sizeof(union __max_name))
64
65/* index for emulation netlists */
66#define NETLIST_FINAL -1
67#define NETLIST_SLOT_A 0
68#define NETLIST_SLOT_B 1
69#define NETLIST_SLOT_C 2
70#define NETLIST_SLOT_D 3
71#define MAX_NETLIST 4
72
73/* netlist regions */
74#define NETLIST_REGIONID_FECS_UCODE_DATA 0
75#define NETLIST_REGIONID_FECS_UCODE_INST 1
76#define NETLIST_REGIONID_GPCCS_UCODE_DATA 2
77#define NETLIST_REGIONID_GPCCS_UCODE_INST 3
78#define NETLIST_REGIONID_SW_BUNDLE_INIT 4
79#define NETLIST_REGIONID_SW_CTX_LOAD 5
80#define NETLIST_REGIONID_SW_NON_CTX_LOAD 6
81#define NETLIST_REGIONID_SW_METHOD_INIT 7
82#define NETLIST_REGIONID_CTXREG_SYS 8
83#define NETLIST_REGIONID_CTXREG_GPC 9
84#define NETLIST_REGIONID_CTXREG_TPC 10
85#define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11
86#define NETLIST_REGIONID_CTXREG_PM_SYS 12
87#define NETLIST_REGIONID_CTXREG_PM_GPC 13
88#define NETLIST_REGIONID_CTXREG_PM_TPC 14
89#define NETLIST_REGIONID_MAJORV 15
90#define NETLIST_REGIONID_BUFFER_SIZE 16
91#define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17
92#define NETLIST_REGIONID_NETLIST_NUM 18
93#define NETLIST_REGIONID_CTXREG_PPC 19
94#define NETLIST_REGIONID_CTXREG_PMPPC 20
95#define NETLIST_REGIONID_NVPERF_CTXREG_SYS 21
96#define NETLIST_REGIONID_NVPERF_FBP_CTXREGS 22
97#define NETLIST_REGIONID_NVPERF_CTXREG_GPC 23
98#define NETLIST_REGIONID_NVPERF_FBP_ROUTER 24
99#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25
100#define NETLIST_REGIONID_CTXREG_PMLTC 26
101#define NETLIST_REGIONID_CTXREG_PMFBPA 27
102#define NETLIST_REGIONID_SWVEIDBUNDLEINIT 28
103#define NETLIST_REGIONID_NVPERF_SYS_ROUTER 29
104#define NETLIST_REGIONID_NVPERF_PMA 30
105#define NETLIST_REGIONID_CTXREG_PMROP 31
106#define NETLIST_REGIONID_CTXREG_PMUCGPC 32
107#define NETLIST_REGIONID_CTXREG_ETPC 33
108#define NETLIST_REGIONID_SW_BUNDLE64_INIT 34
109#define NETLIST_REGIONID_NVPERF_PMCAU 35
110
111struct netlist_region {
112 u32 region_id;
113 u32 data_size;
114 u32 data_offset;
115};
116
117struct netlist_image_header {
118 u32 version;
119 u32 regions;
120};
121
122struct netlist_image {
123 struct netlist_image_header header;
124 struct netlist_region regions[1];
125};
126
127struct av_gk20a {
128 u32 addr;
129 u32 value;
130};
131struct av64_gk20a {
132 u32 addr;
133 u32 value_lo;
134 u32 value_hi;
135};
136struct aiv_gk20a {
137 u32 addr;
138 u32 index;
139 u32 value;
140};
141struct aiv_list_gk20a {
142 struct aiv_gk20a *l;
143 u32 count;
144};
145struct av_list_gk20a {
146 struct av_gk20a *l;
147 u32 count;
148};
149struct av64_list_gk20a {
150 struct av64_gk20a *l;
151 u32 count;
152};
153struct u32_list_gk20a {
154 u32 *l;
155 u32 count;
156};
157
158struct ctxsw_buf_offset_map_entry {
159 u32 addr; /* Register address */
160 u32 offset; /* Offset in ctxt switch buffer */
161};
162
163static inline
164struct av_gk20a *alloc_av_list_gk20a(struct gk20a *g, struct av_list_gk20a *avl)
165{
166 avl->l = nvgpu_kzalloc(g, avl->count * sizeof(*avl->l));
167 return avl->l;
168}
169
170static inline
171struct av64_gk20a *alloc_av64_list_gk20a(struct gk20a *g, struct av64_list_gk20a *avl)
172{
173 avl->l = nvgpu_kzalloc(g, avl->count * sizeof(*avl->l));
174 return avl->l;
175}
176
177static inline
178struct aiv_gk20a *alloc_aiv_list_gk20a(struct gk20a *g,
179 struct aiv_list_gk20a *aivl)
180{
181 aivl->l = nvgpu_kzalloc(g, aivl->count * sizeof(*aivl->l));
182 return aivl->l;
183}
184
185static inline
186u32 *alloc_u32_list_gk20a(struct gk20a *g, struct u32_list_gk20a *u32l)
187{
188 u32l->l = nvgpu_kzalloc(g, u32l->count * sizeof(*u32l->l));
189 return u32l->l;
190}
191
192struct gr_ucode_gk20a {
193 struct {
194 struct u32_list_gk20a inst;
195 struct u32_list_gk20a data;
196 } gpccs, fecs;
197};
198
199/* main entry for grctx loading */
200int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
201int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr);
202
203struct gpu_ops;
204void gk20a_init_gr_ctx(struct gpu_ops *gops);
205
206#endif /*NVGPU_GK20A_GR_CTX_GK20A_H*/
diff --git a/include/gk20a/gr_ctx_gk20a_sim.c b/include/gk20a/gr_ctx_gk20a_sim.c
deleted file mode 100644
index ce65c77..0000000
--- a/include/gk20a/gr_ctx_gk20a_sim.c
+++ /dev/null
@@ -1,356 +0,0 @@
1/*
2 * GK20A Graphics Context for Simulation
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a.h"
26#include <nvgpu/sim.h>
27#include "gr_ctx_gk20a.h"
28
29#include <nvgpu/log.h>
30
31int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
32{
33 int err = -ENOMEM;
34 u32 i, temp;
35
36 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info,
37 "querying grctx info from chiplib");
38
39 g->gr.ctx_vars.dynamic = true;
40 g->gr.netlist = GR_NETLIST_DYNAMIC;
41
42 if (g->sim->esc_readl == NULL) {
43 nvgpu_err(g, "Invalid pointer to query function.");
44 err = -ENOENT;
45 goto fail;
46 }
47
48 /* query sizes and counts */
49 g->sim->esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0,
50 &g->gr.ctx_vars.ucode.fecs.inst.count);
51 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0,
52 &g->gr.ctx_vars.ucode.fecs.data.count);
53 g->sim->esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0,
54 &g->gr.ctx_vars.ucode.gpccs.inst.count);
55 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0,
56 &g->gr.ctx_vars.ucode.gpccs.data.count);
57 g->sim->esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp);
58 g->gr.ctx_vars.buffer_size = temp << 2;
59 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0,
60 &g->gr.ctx_vars.sw_bundle_init.count);
61 g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0,
62 &g->gr.ctx_vars.sw_method_init.count);
63 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0,
64 &g->gr.ctx_vars.sw_ctx_load.count);
65 g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT_SIZE", 0,
66 &g->gr.ctx_vars.sw_veid_bundle_init.count);
67 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT_SIZE", 0,
68 &g->gr.ctx_vars.sw_bundle64_init.count);
69
70 g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG_SIZE", 0,
71 &g->gr.ctx_vars.sw_non_ctx_load.count);
72 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0,
73 &g->gr.ctx_vars.ctxsw_regs.sys.count);
74 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0,
75 &g->gr.ctx_vars.ctxsw_regs.gpc.count);
76 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0,
77 &g->gr.ctx_vars.ctxsw_regs.tpc.count);
78 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0,
79 &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count);
80 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0,
81 &g->gr.ctx_vars.ctxsw_regs.pm_sys.count);
82 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0,
83 &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count);
84 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0,
85 &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count);
86 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
87 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
88 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0,
89 &g->gr.ctx_vars.ctxsw_regs.etpc.count);
90 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
91 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
92
93 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.inst) == NULL) {
94 goto fail;
95 }
96 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.data) == NULL) {
97 goto fail;
98 }
99 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.gpccs.inst) == NULL) {
100 goto fail;
101 }
102 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.gpccs.data) == NULL) {
103 goto fail;
104 }
105 if (alloc_av_list_gk20a(g, &g->gr.ctx_vars.sw_bundle_init) == NULL) {
106 goto fail;
107 }
108 if (alloc_av64_list_gk20a(g,
109 &g->gr.ctx_vars.sw_bundle64_init) == NULL) {
110 goto fail;
111 }
112 if (alloc_av_list_gk20a(g, &g->gr.ctx_vars.sw_method_init) == NULL) {
113 goto fail;
114 }
115 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.sw_ctx_load) == NULL) {
116 goto fail;
117 }
118 if (alloc_av_list_gk20a(g, &g->gr.ctx_vars.sw_non_ctx_load) == NULL) {
119 goto fail;
120 }
121 if (alloc_av_list_gk20a(g,
122 &g->gr.ctx_vars.sw_veid_bundle_init) == NULL) {
123 goto fail;
124 }
125 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.sys) == NULL) {
126 goto fail;
127 }
128 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.gpc) == NULL) {
129 goto fail;
130 }
131 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.tpc) == NULL) {
132 goto fail;
133 }
134 if (alloc_aiv_list_gk20a(g,
135 &g->gr.ctx_vars.ctxsw_regs.zcull_gpc) == NULL) {
136 goto fail;
137 }
138 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.ppc) == NULL) {
139 goto fail;
140 }
141 if (alloc_aiv_list_gk20a(g,
142 &g->gr.ctx_vars.ctxsw_regs.pm_sys) == NULL) {
143 goto fail;
144 }
145 if (alloc_aiv_list_gk20a(g,
146 &g->gr.ctx_vars.ctxsw_regs.pm_gpc) == NULL) {
147 goto fail;
148 }
149 if (alloc_aiv_list_gk20a(g,
150 &g->gr.ctx_vars.ctxsw_regs.pm_tpc) == NULL) {
151 goto fail;
152 }
153 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.etpc) == NULL) {
154 goto fail;
155 }
156
157 for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++) {
158 g->sim->esc_readl(g, "GRCTX_UCODE_INST_FECS",
159 i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]);
160 }
161
162 for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++) {
163 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_FECS",
164 i, &g->gr.ctx_vars.ucode.fecs.data.l[i]);
165 }
166
167 for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++) {
168 g->sim->esc_readl(g, "GRCTX_UCODE_INST_GPCCS",
169 i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]);
170 }
171
172 for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++) {
173 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_GPCCS",
174 i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]);
175 }
176
177 for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) {
178 struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l;
179 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR",
180 i, &l[i].addr);
181 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE",
182 i, &l[i].value);
183 }
184
185 for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) {
186 struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l;
187 g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR",
188 i, &l[i].addr);
189 g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE",
190 i, &l[i].value);
191 }
192
193 for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) {
194 struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l;
195 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR",
196 i, &l[i].addr);
197 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX",
198 i, &l[i].index);
199 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE",
200 i, &l[i].value);
201 }
202
203 for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) {
204 struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l;
205 g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG:REG",
206 i, &l[i].addr);
207 g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG:VALUE",
208 i, &l[i].value);
209 }
210
211 for (i = 0; i < g->gr.ctx_vars.sw_veid_bundle_init.count; i++) {
212 struct av_gk20a *l = g->gr.ctx_vars.sw_veid_bundle_init.l;
213
214 g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT:ADDR",
215 i, &l[i].addr);
216 g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT:VALUE",
217 i, &l[i].value);
218 }
219
220 for (i = 0; i < g->gr.ctx_vars.sw_bundle64_init.count; i++) {
221 struct av64_gk20a *l = g->gr.ctx_vars.sw_bundle64_init.l;
222
223 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:ADDR",
224 i, &l[i].addr);
225 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:VALUE_LO",
226 i, &l[i].value_lo);
227 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:VALUE_HI",
228 i, &l[i].value_hi);
229 }
230
231 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
232 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l;
233 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR",
234 i, &l[i].addr);
235 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX",
236 i, &l[i].index);
237 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE",
238 i, &l[i].value);
239 }
240
241 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
242 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l;
243 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR",
244 i, &l[i].addr);
245 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX",
246 i, &l[i].index);
247 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE",
248 i, &l[i].value);
249 }
250
251 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
252 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l;
253 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR",
254 i, &l[i].addr);
255 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX",
256 i, &l[i].index);
257 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE",
258 i, &l[i].value);
259 }
260
261 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
262 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l;
263 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR",
264 i, &l[i].addr);
265 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX",
266 i, &l[i].index);
267 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE",
268 i, &l[i].value);
269 }
270
271 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) {
272 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l;
273 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR",
274 i, &l[i].addr);
275 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX",
276 i, &l[i].index);
277 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE",
278 i, &l[i].value);
279 }
280
281 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) {
282 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l;
283 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR",
284 i, &l[i].addr);
285 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX",
286 i, &l[i].index);
287 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE",
288 i, &l[i].value);
289 }
290
291 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) {
292 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l;
293 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR",
294 i, &l[i].addr);
295 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX",
296 i, &l[i].index);
297 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE",
298 i, &l[i].value);
299 }
300
301 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) {
302 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l;
303 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR",
304 i, &l[i].addr);
305 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX",
306 i, &l[i].index);
307 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE",
308 i, &l[i].value);
309 }
310
311 nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_ETPC");
312 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
313 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.etpc.l;
314 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:ADDR",
315 i, &l[i].addr);
316 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:INDEX",
317 i, &l[i].index);
318 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:VALUE",
319 i, &l[i].value);
320 nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn,
321 "addr:0x%#08x index:0x%08x value:0x%08x",
322 l[i].addr, l[i].index, l[i].value);
323 }
324
325 g->gr.ctx_vars.valid = true;
326
327 g->sim->esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0,
328 &g->gr.ctx_vars.regs_base_index);
329
330 nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib");
331 return 0;
332fail:
333 nvgpu_err(g, "failed querying grctx info from chiplib");
334
335 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.inst.l);
336 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.data.l);
337 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.inst.l);
338 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.data.l);
339 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle_init.l);
340 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle64_init.l);
341 nvgpu_kfree(g, g->gr.ctx_vars.sw_method_init.l);
342 nvgpu_kfree(g, g->gr.ctx_vars.sw_ctx_load.l);
343 nvgpu_kfree(g, g->gr.ctx_vars.sw_non_ctx_load.l);
344 nvgpu_kfree(g, g->gr.ctx_vars.sw_veid_bundle_init.l);
345 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.sys.l);
346 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.gpc.l);
347 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.tpc.l);
348 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l);
349 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.ppc.l);
350 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
351 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
352 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
353 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.etpc.l);
354
355 return err;
356}
diff --git a/include/gk20a/gr_gk20a.c b/include/gk20a/gr_gk20a.c
deleted file mode 100644
index 1eda853..0000000
--- a/include/gk20a/gr_gk20a.c
+++ /dev/null
@@ -1,9090 +0,0 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/dma.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/log.h>
31#include <nvgpu/bsearch.h>
32#include <nvgpu/sort.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/firmware.h>
35#include <nvgpu/enabled.h>
36#include <nvgpu/debug.h>
37#include <nvgpu/barrier.h>
38#include <nvgpu/mm.h>
39#include <nvgpu/ctxsw_trace.h>
40#include <nvgpu/error_notifier.h>
41#include <nvgpu/ecc.h>
42#include <nvgpu/io.h>
43#include <nvgpu/utils.h>
44#include <nvgpu/channel.h>
45#include <nvgpu/unit.h>
46#include <nvgpu/power_features/pg.h>
47#include <nvgpu/power_features/cg.h>
48
49#include "gk20a.h"
50#include "gr_gk20a.h"
51#include "gk20a/fecs_trace_gk20a.h"
52#include "gr_ctx_gk20a.h"
53#include "gr_pri_gk20a.h"
54#include "regops_gk20a.h"
55#include "dbg_gpu_gk20a.h"
56
57#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
58#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
59#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
60#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
61#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
62#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
63#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
64#include <nvgpu/hw/gk20a/hw_pri_ringmaster_gk20a.h>
65#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
66#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
67
68#define BLK_SIZE (256)
69#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
70#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
71#define NV_PCFG_BASE 0x00088000
72#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020
73#define FE_PWR_MODE_TIMEOUT_MAX 2000
74#define FE_PWR_MODE_TIMEOUT_DEFAULT 10
75#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000
76#define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT 10
77#define FECS_ARB_CMD_TIMEOUT_MAX 40
78#define FECS_ARB_CMD_TIMEOUT_DEFAULT 2
79
80static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g);
81
82static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
83 struct vm_gk20a *vm,
84 struct nvgpu_gr_ctx *gr_ctx);
85
86/* channel patch ctx buffer */
87static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
88 struct channel_gk20a *c);
89static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
90 struct vm_gk20a *vm,
91 struct nvgpu_gr_ctx *gr_ctx);
92
93/* golden ctx image */
94static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
95 struct channel_gk20a *c);
96
97int gr_gk20a_get_ctx_id(struct gk20a *g,
98 struct channel_gk20a *c,
99 u32 *ctx_id)
100{
101 struct tsg_gk20a *tsg;
102 struct nvgpu_gr_ctx *gr_ctx = NULL;
103 struct nvgpu_mem *mem = NULL;
104
105 tsg = tsg_gk20a_from_ch(c);
106 if (tsg == NULL) {
107 return -EINVAL;
108 }
109
110 gr_ctx = &tsg->gr_ctx;
111 mem = &gr_ctx->mem;
112
113 /* Channel gr_ctx buffer is gpu cacheable.
114 Flush and invalidate before cpu update. */
115 g->ops.mm.l2_flush(g, true);
116
117 *ctx_id = nvgpu_mem_rd(g, mem,
118 ctxsw_prog_main_image_context_id_o());
119 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", *ctx_id);
120
121 return 0;
122}
123
124void gk20a_gpccs_dump_falcon_stats(struct gk20a *g)
125{
126 unsigned int i;
127
128 nvgpu_err(g, "gr_gpc0_gpccs_falcon_irqstat : %d",
129 gk20a_readl(g, gr_gpc0_gpccs_falcon_irqstat_r()));
130 nvgpu_err(g, "gr_gpc0_gpccs_falcon_irqmode : %d",
131 gk20a_readl(g, gr_gpc0_gpccs_falcon_irqmode_r()));
132 nvgpu_err(g, "gr_gpc0_gpccs_falcon_irqmask : %d",
133 gk20a_readl(g, gr_gpc0_gpccs_falcon_irqmask_r()));
134 nvgpu_err(g, "gr_gpc0_gpccs_falcon_irqdest : %d",
135 gk20a_readl(g, gr_gpc0_gpccs_falcon_irqdest_r()));
136 nvgpu_err(g, "gr_gpc0_gpccs_falcon_debug1 : %d",
137 gk20a_readl(g, gr_gpc0_gpccs_falcon_debug1_r()));
138 nvgpu_err(g, "gr_gpc0_gpccs_falcon_debuginfo : %d",
139 gk20a_readl(g, gr_gpc0_gpccs_falcon_debuginfo_r()));
140 nvgpu_err(g, "gr_gpc0_gpccs_falcon_engctl : %d",
141 gk20a_readl(g, gr_gpc0_gpccs_falcon_engctl_r()));
142 nvgpu_err(g, "gr_gpc0_gpccs_falcon_curctx : %d",
143 gk20a_readl(g, gr_gpc0_gpccs_falcon_curctx_r()));
144 nvgpu_err(g, "gr_gpc0_gpccs_falcon_nxtctx : %d",
145 gk20a_readl(g, gr_gpc0_gpccs_falcon_nxtctx_r()));
146 nvgpu_err(g, "gr_gpc0_gpccs_ctxsw_status_1 : %d",
147 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
148
149 for (i = 0; i < g->ops.gr.gpc0_gpccs_ctxsw_mailbox_size(); i++) {
150 nvgpu_err(g, "gr_gpc0_gpccs_ctxsw_mailbox_r(%d) : 0x%x",
151 i, gk20a_readl(g, gr_gpc0_gpccs_ctxsw_mailbox_r(i)));
152 }
153
154
155 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
156 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
157 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
158 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_IMB : 0x%x",
159 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
160
161 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
162 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
163 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
164 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_DMB : 0x%x",
165 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
166
167 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
168 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
169 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
170 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_CSW : 0x%x",
171 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
172
173 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
174 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
175 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
176 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_CTX : 0x%x",
177 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
178
179 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
180 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
181 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
182 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_EXCI : 0x%x",
183 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
184
185
186 for (i = 0; i < 4U; i++) {
187 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
188 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
189 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
190 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_PC : 0x%x",
191 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
192
193 gk20a_writel(g, gr_gpc0_gpccs_falcon_icd_cmd_r(),
194 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f() |
195 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
196 nvgpu_err(g, "GPC0_GPCCS_FALCON_REG_SP : 0x%x",
197 gk20a_readl(g, gr_gpc_gpccs_falcon_icd_rdata_r()));
198 }
199}
200
201void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
202{
203 unsigned int i;
204
205 nvgpu_err(g, "gr_fecs_os_r : %d",
206 gk20a_readl(g, gr_fecs_os_r()));
207 nvgpu_err(g, "gr_fecs_cpuctl_r : 0x%x",
208 gk20a_readl(g, gr_fecs_cpuctl_r()));
209 nvgpu_err(g, "gr_fecs_idlestate_r : 0x%x",
210 gk20a_readl(g, gr_fecs_idlestate_r()));
211 nvgpu_err(g, "gr_fecs_mailbox0_r : 0x%x",
212 gk20a_readl(g, gr_fecs_mailbox0_r()));
213 nvgpu_err(g, "gr_fecs_mailbox1_r : 0x%x",
214 gk20a_readl(g, gr_fecs_mailbox1_r()));
215 nvgpu_err(g, "gr_fecs_irqstat_r : 0x%x",
216 gk20a_readl(g, gr_fecs_irqstat_r()));
217 nvgpu_err(g, "gr_fecs_irqmode_r : 0x%x",
218 gk20a_readl(g, gr_fecs_irqmode_r()));
219 nvgpu_err(g, "gr_fecs_irqmask_r : 0x%x",
220 gk20a_readl(g, gr_fecs_irqmask_r()));
221 nvgpu_err(g, "gr_fecs_irqdest_r : 0x%x",
222 gk20a_readl(g, gr_fecs_irqdest_r()));
223 nvgpu_err(g, "gr_fecs_debug1_r : 0x%x",
224 gk20a_readl(g, gr_fecs_debug1_r()));
225 nvgpu_err(g, "gr_fecs_debuginfo_r : 0x%x",
226 gk20a_readl(g, gr_fecs_debuginfo_r()));
227 nvgpu_err(g, "gr_fecs_ctxsw_status_1_r : 0x%x",
228 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
229
230 for (i = 0; i < g->ops.gr.fecs_ctxsw_mailbox_size(); i++) {
231 nvgpu_err(g, "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x",
232 i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i)));
233 }
234
235 nvgpu_err(g, "gr_fecs_engctl_r : 0x%x",
236 gk20a_readl(g, gr_fecs_engctl_r()));
237 nvgpu_err(g, "gr_fecs_curctx_r : 0x%x",
238 gk20a_readl(g, gr_fecs_curctx_r()));
239 nvgpu_err(g, "gr_fecs_nxtctx_r : 0x%x",
240 gk20a_readl(g, gr_fecs_nxtctx_r()));
241
242 gk20a_writel(g, gr_fecs_icd_cmd_r(),
243 gr_fecs_icd_cmd_opc_rreg_f() |
244 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
245 nvgpu_err(g, "FECS_FALCON_REG_IMB : 0x%x",
246 gk20a_readl(g, gr_fecs_icd_rdata_r()));
247
248 gk20a_writel(g, gr_fecs_icd_cmd_r(),
249 gr_fecs_icd_cmd_opc_rreg_f() |
250 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
251 nvgpu_err(g, "FECS_FALCON_REG_DMB : 0x%x",
252 gk20a_readl(g, gr_fecs_icd_rdata_r()));
253
254 gk20a_writel(g, gr_fecs_icd_cmd_r(),
255 gr_fecs_icd_cmd_opc_rreg_f() |
256 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
257 nvgpu_err(g, "FECS_FALCON_REG_CSW : 0x%x",
258 gk20a_readl(g, gr_fecs_icd_rdata_r()));
259
260 gk20a_writel(g, gr_fecs_icd_cmd_r(),
261 gr_fecs_icd_cmd_opc_rreg_f() |
262 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
263 nvgpu_err(g, "FECS_FALCON_REG_CTX : 0x%x",
264 gk20a_readl(g, gr_fecs_icd_rdata_r()));
265
266 gk20a_writel(g, gr_fecs_icd_cmd_r(),
267 gr_fecs_icd_cmd_opc_rreg_f() |
268 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
269 nvgpu_err(g, "FECS_FALCON_REG_EXCI : 0x%x",
270 gk20a_readl(g, gr_fecs_icd_rdata_r()));
271
272 for (i = 0; i < 4; i++) {
273 gk20a_writel(g, gr_fecs_icd_cmd_r(),
274 gr_fecs_icd_cmd_opc_rreg_f() |
275 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC));
276 nvgpu_err(g, "FECS_FALCON_REG_PC : 0x%x",
277 gk20a_readl(g, gr_fecs_icd_rdata_r()));
278
279 gk20a_writel(g, gr_fecs_icd_cmd_r(),
280 gr_fecs_icd_cmd_opc_rreg_f() |
281 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP));
282 nvgpu_err(g, "FECS_FALCON_REG_SP : 0x%x",
283 gk20a_readl(g, gr_fecs_icd_rdata_r()));
284 }
285}
286
287static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
288{
289 u32 i, ucode_u32_size;
290 const u32 *ucode_u32_data;
291 u32 checksum;
292
293 nvgpu_log_fn(g, " ");
294
295 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
296 gr_gpccs_dmemc_blk_f(0) |
297 gr_gpccs_dmemc_aincw_f(1)));
298
299 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
300 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
301
302 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
303 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
304 checksum += ucode_u32_data[i];
305 }
306
307 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
308 gr_fecs_dmemc_blk_f(0) |
309 gr_fecs_dmemc_aincw_f(1)));
310
311 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
312 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
313
314 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
315 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
316 checksum += ucode_u32_data[i];
317 }
318 nvgpu_log_fn(g, "done");
319}
320
321static void gr_gk20a_load_falcon_imem(struct gk20a *g)
322{
323 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
324 const u32 *ucode_u32_data;
325 u32 tag, i, pad_start, pad_end;
326 u32 checksum;
327
328 nvgpu_log_fn(g, " ");
329
330 cfg = gk20a_readl(g, gr_fecs_cfg_r());
331 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
332
333 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
334 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
335
336 /* Use the broadcast address to access all of the GPCCS units. */
337 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
338 gr_gpccs_imemc_blk_f(0) |
339 gr_gpccs_imemc_aincw_f(1)));
340
341 /* Setup the tags for the instruction memory. */
342 tag = 0;
343 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
344
345 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
346 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
347
348 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
349 if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
350 tag++;
351 gk20a_writel(g, gr_gpccs_imemt_r(0),
352 gr_gpccs_imemt_tag_f(tag));
353 }
354 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
355 checksum += ucode_u32_data[i];
356 }
357
358 pad_start = i * 4U;
359 pad_end = pad_start + (256U - pad_start % 256U) + 256U;
360 for (i = pad_start;
361 (i < gpccs_imem_size * 256U) && (i < pad_end);
362 i += 4U) {
363 if ((i != 0U) && ((i % 256U) == 0U)) {
364 tag++;
365 gk20a_writel(g, gr_gpccs_imemt_r(0),
366 gr_gpccs_imemt_tag_f(tag));
367 }
368 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
369 }
370
371 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
372 gr_fecs_imemc_blk_f(0) |
373 gr_fecs_imemc_aincw_f(1)));
374
375 /* Setup the tags for the instruction memory. */
376 tag = 0;
377 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
378
379 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
380 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
381
382 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
383 if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
384 tag++;
385 gk20a_writel(g, gr_fecs_imemt_r(0),
386 gr_fecs_imemt_tag_f(tag));
387 }
388 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
389 checksum += ucode_u32_data[i];
390 }
391
392 pad_start = i * 4U;
393 pad_end = pad_start + (256U - pad_start % 256U) + 256U;
394 for (i = pad_start;
395 (i < fecs_imem_size * 256U) && i < pad_end;
396 i += 4U) {
397 if ((i != 0U) && ((i % 256U) == 0U)) {
398 tag++;
399 gk20a_writel(g, gr_fecs_imemt_r(0),
400 gr_fecs_imemt_tag_f(tag));
401 }
402 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
403 }
404}
405
406int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
407 u32 expect_delay)
408{
409 u32 delay = expect_delay;
410 bool ctxsw_active;
411 bool gr_busy;
412 u32 gr_engine_id;
413 u32 engine_status;
414 bool ctx_status_invalid;
415 struct nvgpu_timeout timeout;
416
417 nvgpu_log_fn(g, " ");
418
419 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
420
421 nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER);
422
423 do {
424 /* fmodel: host gets fifo_engine_status(gr) from gr
425 only when gr_status is read */
426 (void) gk20a_readl(g, gr_status_r());
427
428 engine_status = gk20a_readl(g,
429 fifo_engine_status_r(gr_engine_id));
430
431 ctxsw_active = engine_status &
432 fifo_engine_status_ctxsw_in_progress_f();
433
434 ctx_status_invalid =
435 (fifo_engine_status_ctx_status_v(engine_status) ==
436 fifo_engine_status_ctx_status_invalid_v());
437
438 gr_busy = gk20a_readl(g, gr_engine_status_r()) &
439 gr_engine_status_value_busy_f();
440
441 if (ctx_status_invalid || (!gr_busy && !ctxsw_active)) {
442 nvgpu_log_fn(g, "done");
443 return 0;
444 }
445
446 nvgpu_usleep_range(delay, delay * 2);
447 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
448
449 } while (nvgpu_timeout_expired(&timeout) == 0);
450
451 nvgpu_err(g,
452 "timeout, ctxsw busy : %d, gr busy : %d",
453 ctxsw_active, gr_busy);
454
455 return -EAGAIN;
456}
457
458int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
459 u32 expect_delay)
460{
461 u32 val;
462 u32 delay = expect_delay;
463 struct nvgpu_timeout timeout;
464
465 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
466 return 0;
467 }
468
469 nvgpu_log_fn(g, " ");
470
471 nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER);
472
473 do {
474 val = gk20a_readl(g, gr_status_r());
475
476 if (gr_status_fe_method_lower_v(val) == 0U) {
477 nvgpu_log_fn(g, "done");
478 return 0;
479 }
480
481 nvgpu_usleep_range(delay, delay * 2);
482 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
483 } while (nvgpu_timeout_expired(&timeout) == 0);
484
485 nvgpu_err(g,
486 "timeout, fe busy : %x", val);
487
488 return -EAGAIN;
489}
490
491int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
492 u32 *mailbox_ret, u32 opc_success,
493 u32 mailbox_ok, u32 opc_fail,
494 u32 mailbox_fail, bool sleepduringwait)
495{
496 struct nvgpu_timeout timeout;
497 u32 delay = GR_FECS_POLL_INTERVAL;
498 u32 check = WAIT_UCODE_LOOP;
499 u32 reg;
500
501 nvgpu_log_fn(g, " ");
502
503 if (sleepduringwait) {
504 delay = GR_IDLE_CHECK_DEFAULT;
505 }
506
507 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
508 NVGPU_TIMER_CPU_TIMER);
509
510 while (check == WAIT_UCODE_LOOP) {
511 if (nvgpu_timeout_expired(&timeout)) {
512 check = WAIT_UCODE_TIMEOUT;
513 }
514
515 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
516
517 if (mailbox_ret) {
518 *mailbox_ret = reg;
519 }
520
521 switch (opc_success) {
522 case GR_IS_UCODE_OP_EQUAL:
523 if (reg == mailbox_ok) {
524 check = WAIT_UCODE_OK;
525 }
526 break;
527 case GR_IS_UCODE_OP_NOT_EQUAL:
528 if (reg != mailbox_ok) {
529 check = WAIT_UCODE_OK;
530 }
531 break;
532 case GR_IS_UCODE_OP_AND:
533 if (reg & mailbox_ok) {
534 check = WAIT_UCODE_OK;
535 }
536 break;
537 case GR_IS_UCODE_OP_LESSER:
538 if (reg < mailbox_ok) {
539 check = WAIT_UCODE_OK;
540 }
541 break;
542 case GR_IS_UCODE_OP_LESSER_EQUAL:
543 if (reg <= mailbox_ok) {
544 check = WAIT_UCODE_OK;
545 }
546 break;
547 case GR_IS_UCODE_OP_SKIP:
548 /* do no success check */
549 break;
550 default:
551 nvgpu_err(g,
552 "invalid success opcode 0x%x", opc_success);
553
554 check = WAIT_UCODE_ERROR;
555 break;
556 }
557
558 switch (opc_fail) {
559 case GR_IS_UCODE_OP_EQUAL:
560 if (reg == mailbox_fail) {
561 check = WAIT_UCODE_ERROR;
562 }
563 break;
564 case GR_IS_UCODE_OP_NOT_EQUAL:
565 if (reg != mailbox_fail) {
566 check = WAIT_UCODE_ERROR;
567 }
568 break;
569 case GR_IS_UCODE_OP_AND:
570 if (reg & mailbox_fail) {
571 check = WAIT_UCODE_ERROR;
572 }
573 break;
574 case GR_IS_UCODE_OP_LESSER:
575 if (reg < mailbox_fail) {
576 check = WAIT_UCODE_ERROR;
577 }
578 break;
579 case GR_IS_UCODE_OP_LESSER_EQUAL:
580 if (reg <= mailbox_fail) {
581 check = WAIT_UCODE_ERROR;
582 }
583 break;
584 case GR_IS_UCODE_OP_SKIP:
585 /* do no check on fail*/
586 break;
587 default:
588 nvgpu_err(g,
589 "invalid fail opcode 0x%x", opc_fail);
590 check = WAIT_UCODE_ERROR;
591 break;
592 }
593
594 if (sleepduringwait) {
595 nvgpu_usleep_range(delay, delay * 2);
596 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
597 } else {
598 nvgpu_udelay(delay);
599 }
600 }
601
602 if (check == WAIT_UCODE_TIMEOUT) {
603 nvgpu_err(g,
604 "timeout waiting on mailbox=%d value=0x%08x",
605 mailbox_id, reg);
606 gk20a_fecs_dump_falcon_stats(g);
607 gk20a_gpccs_dump_falcon_stats(g);
608 gk20a_gr_debug_dump(g);
609 return -1;
610 } else if (check == WAIT_UCODE_ERROR) {
611 nvgpu_err(g,
612 "ucode method failed on mailbox=%d value=0x%08x",
613 mailbox_id, reg);
614 gk20a_fecs_dump_falcon_stats(g);
615 gk20a_gpccs_dump_falcon_stats(g);
616 return -1;
617 }
618
619 nvgpu_log_fn(g, "done");
620 return 0;
621}
622
623int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
624 struct fecs_method_op_gk20a op,
625 bool sleepduringwait)
626{
627 int ret;
628
629 if (op.mailbox.id != 0) {
630 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
631 op.mailbox.data);
632 }
633
634 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
635 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
636
637 gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
638 gk20a_writel(g, gr_fecs_method_push_r(),
639 gr_fecs_method_push_adr_f(op.method.addr));
640
641 /* op.mailbox.id == 4 cases require waiting for completion on
642 * for op.mailbox.id == 0 */
643 if (op.mailbox.id == 4) {
644 op.mailbox.id = 0;
645 }
646
647 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
648 op.cond.ok, op.mailbox.ok,
649 op.cond.fail, op.mailbox.fail,
650 sleepduringwait);
651 if (ret) {
652 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x",
653 op.method.data, op.method.addr);
654 }
655
656 return ret;
657}
658
659/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
660 * We should replace most, if not all, fecs method calls to this instead. */
661int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
662 struct fecs_method_op_gk20a op,
663 bool sleepduringwait)
664{
665 struct gr_gk20a *gr = &g->gr;
666 int ret;
667
668 nvgpu_mutex_acquire(&gr->fecs_mutex);
669
670 ret = gr_gk20a_submit_fecs_method_op_locked(g, op, sleepduringwait);
671
672 nvgpu_mutex_release(&gr->fecs_mutex);
673
674 return ret;
675}
676
677/* Sideband mailbox writes are done a bit differently */
678int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
679 struct fecs_method_op_gk20a op)
680{
681 struct gr_gk20a *gr = &g->gr;
682 int ret;
683
684 nvgpu_mutex_acquire(&gr->fecs_mutex);
685
686 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(op.mailbox.id),
687 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
688
689 gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
690 gk20a_writel(g, gr_fecs_method_push_r(),
691 gr_fecs_method_push_adr_f(op.method.addr));
692
693 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
694 op.cond.ok, op.mailbox.ok,
695 op.cond.fail, op.mailbox.fail,
696 false);
697 if (ret) {
698 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x",
699 op.method.data, op.method.addr);
700 }
701
702 nvgpu_mutex_release(&gr->fecs_mutex);
703
704 return ret;
705}
706
707static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
708{
709 return gr_gk20a_submit_fecs_method_op(g,
710 (struct fecs_method_op_gk20a) {
711 .method.addr = fecs_method,
712 .method.data = ~0,
713 .mailbox = { .id = 1, /*sideband?*/
714 .data = ~0, .clr = ~0, .ret = ret,
715 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
716 .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
717 .cond.ok = GR_IS_UCODE_OP_EQUAL,
718 .cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
719}
720
721/**
722 * Stop processing (stall) context switches at FECS:-
723 * If fecs is sent stop_ctxsw method, elpg entry/exit cannot happen
724 * and may timeout. It could manifest as different error signatures
725 * depending on when stop_ctxsw fecs method gets sent with respect
726 * to pmu elpg sequence. It could come as pmu halt or abort or
727 * maybe ext error too.
728*/
729int gr_gk20a_disable_ctxsw(struct gk20a *g)
730{
731 int err = 0;
732
733 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
734
735 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
736 g->ctxsw_disable_count++;
737 if (g->ctxsw_disable_count == 1) {
738 err = nvgpu_pg_elpg_disable(g);
739 if (err != 0) {
740 nvgpu_err(g, "failed to disable elpg. not safe to "
741 "stop_ctxsw");
742 /* stop ctxsw command is not sent */
743 g->ctxsw_disable_count--;
744 } else {
745 err = gr_gk20a_ctrl_ctxsw(g,
746 gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
747 if (err != 0) {
748 nvgpu_err(g, "failed to stop fecs ctxsw");
749 /* stop ctxsw failed */
750 g->ctxsw_disable_count--;
751 }
752 }
753 } else {
754 nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d",
755 g->ctxsw_disable_count);
756 }
757 nvgpu_mutex_release(&g->ctxsw_disable_lock);
758
759 return err;
760}
761
762/* Start processing (continue) context switches at FECS */
763int gr_gk20a_enable_ctxsw(struct gk20a *g)
764{
765 int err = 0;
766
767 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
768
769 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
770
771 if (g->ctxsw_disable_count == 0) {
772 goto ctxsw_already_enabled;
773 }
774 g->ctxsw_disable_count--;
775 WARN_ON(g->ctxsw_disable_count < 0);
776 if (g->ctxsw_disable_count == 0) {
777 err = gr_gk20a_ctrl_ctxsw(g,
778 gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
779 if (err != 0) {
780 nvgpu_err(g, "failed to start fecs ctxsw");
781 } else {
782 if (nvgpu_pg_elpg_enable(g) != 0) {
783 nvgpu_err(g, "failed to enable elpg "
784 "after start_ctxsw");
785 }
786 }
787 } else {
788 nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet",
789 g->ctxsw_disable_count);
790 }
791ctxsw_already_enabled:
792 nvgpu_mutex_release(&g->ctxsw_disable_lock);
793
794 return err;
795}
796
797int gr_gk20a_halt_pipe(struct gk20a *g)
798{
799 return gr_gk20a_submit_fecs_method_op(g,
800 (struct fecs_method_op_gk20a) {
801 .method.addr =
802 gr_fecs_method_push_adr_halt_pipeline_v(),
803 .method.data = ~0,
804 .mailbox = { .id = 1, /*sideband?*/
805 .data = ~0, .clr = ~0, .ret = NULL,
806 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
807 .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
808 .cond.ok = GR_IS_UCODE_OP_EQUAL,
809 .cond.fail = GR_IS_UCODE_OP_EQUAL }, false);
810}
811
812
813int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
814{
815 u32 addr_lo;
816 u32 addr_hi;
817
818 nvgpu_log_fn(c->g, " ");
819
820 addr_lo = u64_lo32(gpu_va) >> 12;
821 addr_hi = u64_hi32(gpu_va);
822
823 nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(),
824 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
825 ram_in_gr_wfi_ptr_lo_f(addr_lo));
826
827 nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(),
828 ram_in_gr_wfi_ptr_hi_f(addr_hi));
829
830 return 0;
831}
832
833/*
834 * Context state can be written directly, or "patched" at times. So that code
835 * can be used in either situation it is written using a series of
836 * _ctx_patch_write(..., patch) statements. However any necessary map overhead
837 * should be minimized; thus, bundle the sequence of these writes together, and
838 * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end.
839 */
840
841int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
842 struct nvgpu_gr_ctx *gr_ctx,
843 bool update_patch_count)
844{
845 if (update_patch_count) {
846 /* reset patch count if ucode has already processed it */
847 gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
848 &gr_ctx->mem,
849 ctxsw_prog_main_image_patch_count_o());
850 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
851 gr_ctx->patch_ctx.data_count);
852 }
853 return 0;
854}
855
856void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
857 struct nvgpu_gr_ctx *gr_ctx,
858 bool update_patch_count)
859{
860 /* Write context count to context image if it is mapped */
861 if (update_patch_count) {
862 nvgpu_mem_wr(g, &gr_ctx->mem,
863 ctxsw_prog_main_image_patch_count_o(),
864 gr_ctx->patch_ctx.data_count);
865 nvgpu_log(g, gpu_dbg_info, "write patch count %d",
866 gr_ctx->patch_ctx.data_count);
867 }
868}
869
870void gr_gk20a_ctx_patch_write(struct gk20a *g,
871 struct nvgpu_gr_ctx *gr_ctx,
872 u32 addr, u32 data, bool patch)
873{
874 if (patch) {
875 u32 patch_slot = gr_ctx->patch_ctx.data_count *
876 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
877 if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
878 gr_ctx->patch_ctx.mem.size) -
879 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
880 nvgpu_err(g, "failed to access patch_slot %d",
881 patch_slot);
882 return;
883 }
884 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr);
885 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data);
886 gr_ctx->patch_ctx.data_count++;
887 nvgpu_log(g, gpu_dbg_info,
888 "patch addr = 0x%x data = 0x%x data_count %d",
889 addr, data, gr_ctx->patch_ctx.data_count);
890 } else {
891 gk20a_writel(g, addr, data);
892 }
893}
894
895static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
896{
897 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
898 ram_in_base_shift_v();
899 u32 aperture = nvgpu_aperture_mask(g, inst_block,
900 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
901 gr_fecs_current_ctx_target_sys_mem_coh_f(),
902 gr_fecs_current_ctx_target_vid_mem_f());
903
904 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
905 gr_fecs_current_ctx_valid_f(1);
906}
907
908int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
909 struct channel_gk20a *c)
910{
911 u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
912 >> ram_in_base_shift_v());
913 u32 data = fecs_current_ctx_data(g, &c->inst_block);
914 u32 ret;
915
916 nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
917 c->chid, inst_base_ptr);
918
919 ret = gr_gk20a_submit_fecs_method_op(g,
920 (struct fecs_method_op_gk20a) {
921 .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
922 .method.data = data,
923 .mailbox = { .id = 0, .data = 0,
924 .clr = 0x30,
925 .ret = NULL,
926 .ok = 0x10,
927 .fail = 0x20, },
928 .cond.ok = GR_IS_UCODE_OP_AND,
929 .cond.fail = GR_IS_UCODE_OP_AND}, true);
930 if (ret) {
931 nvgpu_err(g,
932 "bind channel instance failed");
933 }
934
935 return ret;
936}
937
938void gr_gk20a_write_zcull_ptr(struct gk20a *g,
939 struct nvgpu_mem *mem, u64 gpu_va)
940{
941 u32 va = u64_lo32(gpu_va >> 8);
942
943 nvgpu_mem_wr(g, mem,
944 ctxsw_prog_main_image_zcull_ptr_o(), va);
945}
946
947void gr_gk20a_write_pm_ptr(struct gk20a *g,
948 struct nvgpu_mem *mem, u64 gpu_va)
949{
950 u32 va = u64_lo32(gpu_va >> 8);
951
952 nvgpu_mem_wr(g, mem,
953 ctxsw_prog_main_image_pm_ptr_o(), va);
954}
955
956static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
957{
958 struct tsg_gk20a *tsg;
959 struct nvgpu_gr_ctx *gr_ctx = NULL;
960 struct nvgpu_mem *mem = NULL;
961 struct nvgpu_mem *ctxheader = &c->ctx_header;
962 int ret = 0;
963
964 nvgpu_log_fn(g, " ");
965
966 tsg = tsg_gk20a_from_ch(c);
967 if (tsg == NULL) {
968 return -EINVAL;
969 }
970
971 gr_ctx = &tsg->gr_ctx;
972 mem = &gr_ctx->mem;
973
974 if (gr_ctx->zcull_ctx.gpu_va == 0 &&
975 gr_ctx->zcull_ctx.ctx_sw_mode ==
976 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
977 return -EINVAL;
978 }
979
980 ret = gk20a_disable_channel_tsg(g, c);
981 if (ret) {
982 nvgpu_err(g, "failed to disable channel/TSG");
983 return ret;
984 }
985 ret = gk20a_fifo_preempt(g, c);
986 if (ret) {
987 gk20a_enable_channel_tsg(g, c);
988 nvgpu_err(g, "failed to preempt channel/TSG");
989 return ret;
990 }
991
992 nvgpu_mem_wr(g, mem,
993 ctxsw_prog_main_image_zcull_o(),
994 gr_ctx->zcull_ctx.ctx_sw_mode);
995
996 if (ctxheader->gpu_va) {
997 g->ops.gr.write_zcull_ptr(g, ctxheader,
998 gr_ctx->zcull_ctx.gpu_va);
999 } else {
1000 g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
1001 }
1002
1003 gk20a_enable_channel_tsg(g, c);
1004
1005 return ret;
1006}
1007
1008u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc)
1009{
1010 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1011 u32 gpc_offset = gpc_stride * gpc;
1012
1013 return gpc_offset;
1014}
1015
1016u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc)
1017{
1018 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1019 GPU_LIT_TPC_IN_GPC_STRIDE);
1020 u32 tpc_offset = tpc_in_gpc_stride * tpc;
1021
1022 return tpc_offset;
1023}
1024
1025int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
1026 struct channel_gk20a *c, bool patch)
1027{
1028 struct gr_gk20a *gr = &g->gr;
1029 struct tsg_gk20a *tsg;
1030 struct nvgpu_gr_ctx *gr_ctx = NULL;
1031 u64 addr;
1032 u32 size;
1033
1034 nvgpu_log_fn(g, " ");
1035
1036 tsg = tsg_gk20a_from_ch(c);
1037 if (tsg == NULL) {
1038 return -EINVAL;
1039 }
1040
1041 gr_ctx = &tsg->gr_ctx;
1042 if (patch) {
1043 int err;
1044 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
1045 if (err != 0) {
1046 return err;
1047 }
1048 }
1049
1050 /* global pagepool buffer */
1051 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
1052 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
1053 (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
1054 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
1055
1056 size = gr->global_ctx_buffer[PAGEPOOL].mem.size /
1057 gr_scc_pagepool_total_pages_byte_granularity_v();
1058
1059 if (size == g->ops.gr.pagepool_default_size(g)) {
1060 size = gr_scc_pagepool_total_pages_hwmax_v();
1061 }
1062
1063 nvgpu_log_info(g, "pagepool buffer addr : 0x%016llx, size : %d",
1064 addr, size);
1065
1066 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch);
1067
1068 /* global bundle cb */
1069 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
1070 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
1071 (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
1072 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
1073
1074 size = gr->bundle_cb_default_size;
1075
1076 nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %d",
1077 addr, size);
1078
1079 g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
1080
1081 /* global attrib cb */
1082 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
1083 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1084 (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
1085 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1086
1087 nvgpu_log_info(g, "attrib cb addr : 0x%016llx", addr);
1088 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch);
1089 g->ops.gr.commit_global_cb_manager(g, c, patch);
1090
1091 if (patch) {
1092 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
1093 }
1094
1095 return 0;
1096}
1097
1098int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
1099{
1100 struct gr_gk20a *gr = &g->gr;
1101 struct nvgpu_gr_ctx *gr_ctx = NULL;
1102 u32 gpm_pd_cfg;
1103 u32 pd_ab_dist_cfg0;
1104 u32 ds_debug;
1105 u32 mpc_vtg_debug;
1106 u32 pe_vaf;
1107 u32 pe_vsc_vpc;
1108
1109 nvgpu_log_fn(g, " ");
1110
1111 gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
1112 pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
1113 ds_debug = gk20a_readl(g, gr_ds_debug_r());
1114 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
1115
1116 if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
1117 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
1118 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
1119
1120 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
1121 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
1122 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
1123 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
1124 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
1125 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
1126
1127 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
1128 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
1129 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
1130 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
1131 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
1132 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
1133 } else {
1134 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
1135 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
1136 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
1137 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
1138
1139 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
1140 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
1141 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
1142 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
1143 }
1144
1145 return 0;
1146}
1147
1148/*
1149 * Return map tiles count for given index
1150 * Return 0 if index is out-of-bounds
1151 */
1152static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index)
1153{
1154 if (index >= gr->map_tile_count) {
1155 return 0;
1156 }
1157
1158 return gr->map_tiles[index];
1159}
1160
1161int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1162{
1163 u32 norm_entries, norm_shift;
1164 u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
1165 u32 map0, map1, map2, map3, map4, map5;
1166
1167 if (gr->map_tiles == NULL) {
1168 return -1;
1169 }
1170
1171 nvgpu_log_fn(g, " ");
1172
1173 gk20a_writel(g, gr_crstr_map_table_cfg_r(),
1174 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
1175 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
1176
1177 map0 = gr_crstr_gpc_map0_tile0_f(gr_gk20a_get_map_tile_count(gr, 0)) |
1178 gr_crstr_gpc_map0_tile1_f(gr_gk20a_get_map_tile_count(gr, 1)) |
1179 gr_crstr_gpc_map0_tile2_f(gr_gk20a_get_map_tile_count(gr, 2)) |
1180 gr_crstr_gpc_map0_tile3_f(gr_gk20a_get_map_tile_count(gr, 3)) |
1181 gr_crstr_gpc_map0_tile4_f(gr_gk20a_get_map_tile_count(gr, 4)) |
1182 gr_crstr_gpc_map0_tile5_f(gr_gk20a_get_map_tile_count(gr, 5));
1183
1184 map1 = gr_crstr_gpc_map1_tile6_f(gr_gk20a_get_map_tile_count(gr, 6)) |
1185 gr_crstr_gpc_map1_tile7_f(gr_gk20a_get_map_tile_count(gr, 7)) |
1186 gr_crstr_gpc_map1_tile8_f(gr_gk20a_get_map_tile_count(gr, 8)) |
1187 gr_crstr_gpc_map1_tile9_f(gr_gk20a_get_map_tile_count(gr, 9)) |
1188 gr_crstr_gpc_map1_tile10_f(gr_gk20a_get_map_tile_count(gr, 10)) |
1189 gr_crstr_gpc_map1_tile11_f(gr_gk20a_get_map_tile_count(gr, 11));
1190
1191 map2 = gr_crstr_gpc_map2_tile12_f(gr_gk20a_get_map_tile_count(gr, 12)) |
1192 gr_crstr_gpc_map2_tile13_f(gr_gk20a_get_map_tile_count(gr, 13)) |
1193 gr_crstr_gpc_map2_tile14_f(gr_gk20a_get_map_tile_count(gr, 14)) |
1194 gr_crstr_gpc_map2_tile15_f(gr_gk20a_get_map_tile_count(gr, 15)) |
1195 gr_crstr_gpc_map2_tile16_f(gr_gk20a_get_map_tile_count(gr, 16)) |
1196 gr_crstr_gpc_map2_tile17_f(gr_gk20a_get_map_tile_count(gr, 17));
1197
1198 map3 = gr_crstr_gpc_map3_tile18_f(gr_gk20a_get_map_tile_count(gr, 18)) |
1199 gr_crstr_gpc_map3_tile19_f(gr_gk20a_get_map_tile_count(gr, 19)) |
1200 gr_crstr_gpc_map3_tile20_f(gr_gk20a_get_map_tile_count(gr, 20)) |
1201 gr_crstr_gpc_map3_tile21_f(gr_gk20a_get_map_tile_count(gr, 21)) |
1202 gr_crstr_gpc_map3_tile22_f(gr_gk20a_get_map_tile_count(gr, 22)) |
1203 gr_crstr_gpc_map3_tile23_f(gr_gk20a_get_map_tile_count(gr, 23));
1204
1205 map4 = gr_crstr_gpc_map4_tile24_f(gr_gk20a_get_map_tile_count(gr, 24)) |
1206 gr_crstr_gpc_map4_tile25_f(gr_gk20a_get_map_tile_count(gr, 25)) |
1207 gr_crstr_gpc_map4_tile26_f(gr_gk20a_get_map_tile_count(gr, 26)) |
1208 gr_crstr_gpc_map4_tile27_f(gr_gk20a_get_map_tile_count(gr, 27)) |
1209 gr_crstr_gpc_map4_tile28_f(gr_gk20a_get_map_tile_count(gr, 28)) |
1210 gr_crstr_gpc_map4_tile29_f(gr_gk20a_get_map_tile_count(gr, 29));
1211
1212 map5 = gr_crstr_gpc_map5_tile30_f(gr_gk20a_get_map_tile_count(gr, 30)) |
1213 gr_crstr_gpc_map5_tile31_f(gr_gk20a_get_map_tile_count(gr, 31)) |
1214 gr_crstr_gpc_map5_tile32_f(0) |
1215 gr_crstr_gpc_map5_tile33_f(0) |
1216 gr_crstr_gpc_map5_tile34_f(0) |
1217 gr_crstr_gpc_map5_tile35_f(0);
1218
1219 gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
1220 gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
1221 gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
1222 gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
1223 gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
1224 gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
1225
1226 switch (gr->tpc_count) {
1227 case 1:
1228 norm_shift = 4;
1229 break;
1230 case 2:
1231 case 3:
1232 norm_shift = 3;
1233 break;
1234 case 4:
1235 case 5:
1236 case 6:
1237 case 7:
1238 norm_shift = 2;
1239 break;
1240 case 8:
1241 case 9:
1242 case 10:
1243 case 11:
1244 case 12:
1245 case 13:
1246 case 14:
1247 case 15:
1248 norm_shift = 1;
1249 break;
1250 default:
1251 norm_shift = 0;
1252 break;
1253 }
1254
1255 norm_entries = gr->tpc_count << norm_shift;
1256 coeff5_mod = (1 << 5) % norm_entries;
1257 coeff6_mod = (1 << 6) % norm_entries;
1258 coeff7_mod = (1 << 7) % norm_entries;
1259 coeff8_mod = (1 << 8) % norm_entries;
1260 coeff9_mod = (1 << 9) % norm_entries;
1261 coeff10_mod = (1 << 10) % norm_entries;
1262 coeff11_mod = (1 << 11) % norm_entries;
1263
1264 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
1265 gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
1266 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
1267 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
1268 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
1269 gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
1270
1271 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
1272 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
1273 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
1274 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
1275 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
1276 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
1277 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
1278
1279 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
1280 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
1281 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
1282 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
1283 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
1284 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
1285
1286 gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
1287 gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
1288 gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
1289
1290 gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
1291 gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
1292 gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
1293 gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
1294 gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
1295 gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
1296
1297 return 0;
1298}
1299
1300static inline u32 count_bits(u32 mask)
1301{
1302 u32 temp = mask;
1303 u32 count;
1304 for (count = 0; temp != 0; count++) {
1305 temp &= temp - 1;
1306 }
1307
1308 return count;
1309}
1310
1311int gr_gk20a_init_sm_id_table(struct gk20a *g)
1312{
1313 u32 gpc, tpc;
1314 u32 sm_id = 0;
1315
1316 for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) {
1317 for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
1318
1319 if (tpc < g->gr.gpc_tpc_count[gpc]) {
1320 g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
1321 g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
1322 g->gr.sm_to_cluster[sm_id].sm_index = 0;
1323 g->gr.sm_to_cluster[sm_id].global_tpc_index =
1324 sm_id;
1325 sm_id++;
1326 }
1327 }
1328 }
1329 g->gr.no_of_sm = sm_id;
1330 return 0;
1331}
1332
1333/*
1334 * Return number of TPCs in a GPC
1335 * Return 0 if GPC index is invalid i.e. GPC is disabled
1336 */
1337u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index)
1338{
1339 if (gpc_index >= gr->gpc_count) {
1340 return 0;
1341 }
1342
1343 return gr->gpc_tpc_count[gpc_index];
1344}
1345
1346int gr_gk20a_init_fs_state(struct gk20a *g)
1347{
1348 struct gr_gk20a *gr = &g->gr;
1349 u32 tpc_index, gpc_index;
1350 u32 sm_id = 0, gpc_id = 0;
1351 u32 tpc_per_gpc;
1352 u32 fuse_tpc_mask;
1353 u32 reg_index;
1354 int err;
1355
1356 nvgpu_log_fn(g, " ");
1357
1358 if (g->ops.gr.init_sm_id_table) {
1359 err = g->ops.gr.init_sm_id_table(g);
1360 if (err != 0) {
1361 return err;
1362 }
1363
1364 /* Is table empty ? */
1365 if (g->gr.no_of_sm == 0) {
1366 return -EINVAL;
1367 }
1368 }
1369
1370 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
1371 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1372 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1373
1374 g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id);
1375
1376 if (g->ops.gr.program_active_tpc_counts) {
1377 g->ops.gr.program_active_tpc_counts(g, gpc_index);
1378 }
1379 }
1380
1381 for (reg_index = 0, gpc_id = 0;
1382 reg_index < gr_pd_num_tpc_per_gpc__size_1_v();
1383 reg_index++, gpc_id += 8) {
1384
1385 tpc_per_gpc =
1386 gr_pd_num_tpc_per_gpc_count0_f(gr_gk20a_get_tpc_count(gr, gpc_id + 0)) |
1387 gr_pd_num_tpc_per_gpc_count1_f(gr_gk20a_get_tpc_count(gr, gpc_id + 1)) |
1388 gr_pd_num_tpc_per_gpc_count2_f(gr_gk20a_get_tpc_count(gr, gpc_id + 2)) |
1389 gr_pd_num_tpc_per_gpc_count3_f(gr_gk20a_get_tpc_count(gr, gpc_id + 3)) |
1390 gr_pd_num_tpc_per_gpc_count4_f(gr_gk20a_get_tpc_count(gr, gpc_id + 4)) |
1391 gr_pd_num_tpc_per_gpc_count5_f(gr_gk20a_get_tpc_count(gr, gpc_id + 5)) |
1392 gr_pd_num_tpc_per_gpc_count6_f(gr_gk20a_get_tpc_count(gr, gpc_id + 6)) |
1393 gr_pd_num_tpc_per_gpc_count7_f(gr_gk20a_get_tpc_count(gr, gpc_id + 7));
1394
1395 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(reg_index), tpc_per_gpc);
1396 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(reg_index), tpc_per_gpc);
1397 }
1398
1399 /* gr__setup_pd_mapping stubbed for gk20a */
1400 g->ops.gr.setup_rop_mapping(g, gr);
1401 if (g->ops.gr.setup_alpha_beta_tables) {
1402 g->ops.gr.setup_alpha_beta_tables(g, gr);
1403 }
1404
1405 for (gpc_index = 0;
1406 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
1407 gpc_index += 4) {
1408
1409 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
1410 (gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) != 0U) ||
1411 (gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) != 0U) ||
1412 (gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) != 0U) ||
1413 (gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]) != 0U));
1414 }
1415
1416 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
1417 if ((g->tpc_fs_mask_user != 0U) &&
1418 (fuse_tpc_mask == BIT32(gr->max_tpc_count) - 1U)) {
1419 u32 val = g->tpc_fs_mask_user;
1420 val &= (0x1U << gr->max_tpc_count) - 1U;
1421 gk20a_writel(g, gr_cwd_fs_r(),
1422 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1423 gr_cwd_fs_num_tpcs_f(hweight32(val)));
1424 } else {
1425 gk20a_writel(g, gr_cwd_fs_r(),
1426 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1427 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1428 }
1429
1430 gk20a_writel(g, gr_bes_zrop_settings_r(),
1431 gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
1432 gk20a_writel(g, gr_bes_crop_settings_r(),
1433 gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
1434
1435 return 0;
1436}
1437
1438int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1439{
1440 struct gk20a *g = c->g;
1441 int ret;
1442
1443 nvgpu_log_fn(g, " ");
1444
1445 ret = gr_gk20a_submit_fecs_method_op(g,
1446 (struct fecs_method_op_gk20a) {
1447 .method.addr = save_type,
1448 .method.data = fecs_current_ctx_data(g, &c->inst_block),
1449 .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
1450 .ok = 1, .fail = 2,
1451 },
1452 .cond.ok = GR_IS_UCODE_OP_AND,
1453 .cond.fail = GR_IS_UCODE_OP_AND,
1454 }, true);
1455
1456 if (ret) {
1457 nvgpu_err(g, "save context image failed");
1458 }
1459
1460 return ret;
1461}
1462
1463u32 gk20a_init_sw_bundle(struct gk20a *g)
1464{
1465 struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
1466 u32 last_bundle_data = 0;
1467 u32 err = 0;
1468 unsigned int i;
1469
1470 /* disable fe_go_idle */
1471 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1472 gr_fe_go_idle_timeout_count_disabled_f());
1473 /* enable pipe mode override */
1474 gk20a_writel(g, gr_pipe_bundle_config_r(),
1475 gr_pipe_bundle_config_override_pipe_mode_enabled_f());
1476
1477 /* load bundle init */
1478 for (i = 0; i < sw_bundle_init->count; i++) {
1479 if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
1480 gk20a_writel(g, gr_pipe_bundle_data_r(),
1481 sw_bundle_init->l[i].value);
1482 last_bundle_data = sw_bundle_init->l[i].value;
1483 }
1484
1485 gk20a_writel(g, gr_pipe_bundle_address_r(),
1486 sw_bundle_init->l[i].addr);
1487
1488 if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
1489 GR_GO_IDLE_BUNDLE) {
1490 err = gr_gk20a_wait_idle(g,
1491 gk20a_get_gr_idle_timeout(g),
1492 GR_IDLE_CHECK_DEFAULT);
1493 if (err != 0U) {
1494 goto error;
1495 }
1496 }
1497
1498 err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g),
1499 GR_IDLE_CHECK_DEFAULT);
1500 if (err != 0U) {
1501 goto error;
1502 }
1503 }
1504
1505 if ((err == 0U) && (g->ops.gr.init_sw_veid_bundle != NULL)) {
1506 err = g->ops.gr.init_sw_veid_bundle(g);
1507 if (err != 0U) {
1508 goto error;
1509 }
1510 }
1511
1512 if (g->ops.gr.init_sw_bundle64) {
1513 err = g->ops.gr.init_sw_bundle64(g);
1514 if (err != 0U) {
1515 goto error;
1516 }
1517 }
1518
1519 /* disable pipe mode override */
1520 gk20a_writel(g, gr_pipe_bundle_config_r(),
1521 gr_pipe_bundle_config_override_pipe_mode_disabled_f());
1522
1523 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1524 GR_IDLE_CHECK_DEFAULT);
1525
1526 /* restore fe_go_idle */
1527 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1528 gr_fe_go_idle_timeout_count_prod_f());
1529
1530 return err;
1531
1532error:
1533 /* in case of error skip waiting for GR idle - just restore state */
1534 gk20a_writel(g, gr_pipe_bundle_config_r(),
1535 gr_pipe_bundle_config_override_pipe_mode_disabled_f());
1536
1537 /* restore fe_go_idle */
1538 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1539 gr_fe_go_idle_timeout_count_prod_f());
1540
1541 return err;
1542}
1543
1544/* init global golden image from a fresh gr_ctx in channel ctx.
1545 save a copy in local_golden_image in ctx_vars */
1546static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1547 struct channel_gk20a *c)
1548{
1549 struct gr_gk20a *gr = &g->gr;
1550 struct tsg_gk20a *tsg;
1551 struct nvgpu_gr_ctx *gr_ctx = NULL;
1552 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1553 u32 ctx_header_words;
1554 u32 i;
1555 u32 data;
1556 struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1557 struct nvgpu_mem *gr_mem;
1558 u32 err = 0;
1559 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1560 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
1561 u32 last_method_data = 0;
1562
1563 nvgpu_log_fn(g, " ");
1564
1565 tsg = tsg_gk20a_from_ch(c);
1566 if (tsg == NULL) {
1567 return -EINVAL;
1568 }
1569
1570 gr_ctx = &tsg->gr_ctx;
1571 gr_mem = &gr_ctx->mem;
1572
1573 /* golden ctx is global to all channels. Although only the first
1574 channel initializes golden image, driver needs to prevent multiple
1575 channels from initializing golden ctx at the same time */
1576 nvgpu_mutex_acquire(&gr->ctx_mutex);
1577
1578 if (gr->ctx_vars.golden_image_initialized) {
1579 goto clean_up;
1580 }
1581 if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
1582 struct nvgpu_timeout timeout;
1583
1584 nvgpu_timeout_init(g, &timeout,
1585 FE_PWR_MODE_TIMEOUT_MAX /
1586 FE_PWR_MODE_TIMEOUT_DEFAULT,
1587 NVGPU_TIMER_RETRY_TIMER);
1588 gk20a_writel(g, gr_fe_pwr_mode_r(),
1589 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f());
1590 do {
1591 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1592 if (req == gr_fe_pwr_mode_req_done_v()) {
1593 break;
1594 }
1595 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT);
1596 } while (nvgpu_timeout_expired_msg(&timeout,
1597 "timeout forcing FE on") == 0);
1598 }
1599
1600
1601 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
1602 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
1603 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
1604 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
1605 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
1606 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
1607 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
1608 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
1609 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
1610 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
1611 (void) gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
1612 nvgpu_udelay(10);
1613
1614 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
1615 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
1616 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
1617 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
1618 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
1619 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
1620 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
1621 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
1622 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
1623 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
1624 (void) gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
1625 nvgpu_udelay(10);
1626
1627 if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
1628 struct nvgpu_timeout timeout;
1629
1630 nvgpu_timeout_init(g, &timeout,
1631 FE_PWR_MODE_TIMEOUT_MAX /
1632 FE_PWR_MODE_TIMEOUT_DEFAULT,
1633 NVGPU_TIMER_RETRY_TIMER);
1634 gk20a_writel(g, gr_fe_pwr_mode_r(),
1635 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_auto_f());
1636
1637 do {
1638 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1639 if (req == gr_fe_pwr_mode_req_done_v()) {
1640 break;
1641 }
1642 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT);
1643 } while (nvgpu_timeout_expired_msg(&timeout,
1644 "timeout setting FE power to auto") == 0);
1645 }
1646
1647 /* clear scc ram */
1648 gk20a_writel(g, gr_scc_init_r(),
1649 gr_scc_init_ram_trigger_f());
1650
1651 err = gr_gk20a_fecs_ctx_bind_channel(g, c);
1652 if (err != 0U) {
1653 goto clean_up;
1654 }
1655
1656 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1657 GR_IDLE_CHECK_DEFAULT);
1658
1659 /* load ctx init */
1660 for (i = 0; i < sw_ctx_load->count; i++) {
1661 gk20a_writel(g, sw_ctx_load->l[i].addr,
1662 sw_ctx_load->l[i].value);
1663 }
1664
1665 if (g->ops.gr.disable_rd_coalesce) {
1666 g->ops.gr.disable_rd_coalesce(g);
1667 }
1668
1669 if (g->ops.gr.init_preemption_state) {
1670 g->ops.gr.init_preemption_state(g);
1671 }
1672
1673 if (g->ops.clock_gating.blcg_gr_load_gating_prod) {
1674 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
1675 }
1676
1677 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1678 GR_IDLE_CHECK_DEFAULT);
1679 if (err != 0U) {
1680 goto clean_up;
1681 }
1682
1683 /* disable fe_go_idle */
1684 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1685 gr_fe_go_idle_timeout_count_disabled_f());
1686
1687 err = g->ops.gr.commit_global_ctx_buffers(g, c, false);
1688 if (err != 0U) {
1689 goto clean_up;
1690 }
1691
1692 /* override a few ctx state registers */
1693 g->ops.gr.commit_global_timeslice(g, c);
1694
1695 /* floorsweep anything left */
1696 err = g->ops.gr.init_fs_state(g);
1697 if (err != 0U) {
1698 goto clean_up;
1699 }
1700
1701 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1702 GR_IDLE_CHECK_DEFAULT);
1703 if (err != 0U) {
1704 goto restore_fe_go_idle;
1705 }
1706
1707 err = gk20a_init_sw_bundle(g);
1708 if (err != 0U) {
1709 goto clean_up;
1710 }
1711
1712restore_fe_go_idle:
1713 /* restore fe_go_idle */
1714 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1715 gr_fe_go_idle_timeout_count_prod_f());
1716
1717 if ((err != 0U) || (gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1718 GR_IDLE_CHECK_DEFAULT) != 0)) {
1719 goto clean_up;
1720 }
1721
1722 /* load method init */
1723 if (sw_method_init->count) {
1724 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
1725 sw_method_init->l[0].value);
1726 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
1727 gr_pri_mme_shadow_raw_index_write_trigger_f() |
1728 sw_method_init->l[0].addr);
1729 last_method_data = sw_method_init->l[0].value;
1730 }
1731 for (i = 1; i < sw_method_init->count; i++) {
1732 if (sw_method_init->l[i].value != last_method_data) {
1733 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
1734 sw_method_init->l[i].value);
1735 last_method_data = sw_method_init->l[i].value;
1736 }
1737 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
1738 gr_pri_mme_shadow_raw_index_write_trigger_f() |
1739 sw_method_init->l[i].addr);
1740 }
1741
1742 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1743 GR_IDLE_CHECK_DEFAULT);
1744 if (err != 0U) {
1745 goto clean_up;
1746 }
1747
1748 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1749 ctx_header_words >>= 2;
1750
1751 g->ops.mm.l2_flush(g, true);
1752
1753 for (i = 0; i < ctx_header_words; i++) {
1754 data = nvgpu_mem_rd32(g, gr_mem, i);
1755 nvgpu_mem_wr32(g, gold_mem, i, data);
1756 }
1757 nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1758 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1759
1760 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1761
1762 err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1763 if (err != 0U) {
1764 goto clean_up;
1765 }
1766
1767 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
1768
1769
1770
1771 if (gr->ctx_vars.local_golden_image == NULL) {
1772
1773 gr->ctx_vars.local_golden_image =
1774 nvgpu_vzalloc(g, gr->ctx_vars.golden_image_size);
1775
1776 if (gr->ctx_vars.local_golden_image == NULL) {
1777 err = -ENOMEM;
1778 goto clean_up;
1779 }
1780 nvgpu_mem_rd_n(g, gold_mem, 0,
1781 gr->ctx_vars.local_golden_image,
1782 gr->ctx_vars.golden_image_size);
1783
1784 }
1785
1786 err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
1787 if (err != 0U) {
1788 goto clean_up;
1789 }
1790
1791 gr->ctx_vars.golden_image_initialized = true;
1792
1793 gk20a_writel(g, gr_fecs_current_ctx_r(),
1794 gr_fecs_current_ctx_valid_false_f());
1795
1796clean_up:
1797 if (err != 0U) {
1798 nvgpu_err(g, "fail");
1799 } else {
1800 nvgpu_log_fn(g, "done");
1801 }
1802
1803 nvgpu_mutex_release(&gr->ctx_mutex);
1804 return err;
1805}
1806
1807int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1808 struct channel_gk20a *c,
1809 bool enable_smpc_ctxsw)
1810{
1811 struct tsg_gk20a *tsg;
1812 struct nvgpu_gr_ctx *gr_ctx = NULL;
1813 struct nvgpu_mem *mem = NULL;
1814 u32 data;
1815 int ret;
1816
1817 nvgpu_log_fn(g, " ");
1818
1819 tsg = tsg_gk20a_from_ch(c);
1820 if (tsg == NULL) {
1821 return -EINVAL;
1822 }
1823
1824 gr_ctx = &tsg->gr_ctx;
1825 mem = &gr_ctx->mem;
1826 if (!nvgpu_mem_is_valid(mem)) {
1827 nvgpu_err(g, "no graphics context allocated");
1828 return -EFAULT;
1829 }
1830
1831 ret = gk20a_disable_channel_tsg(g, c);
1832 if (ret) {
1833 nvgpu_err(g, "failed to disable channel/TSG");
1834 goto out;
1835 }
1836 ret = gk20a_fifo_preempt(g, c);
1837 if (ret) {
1838 gk20a_enable_channel_tsg(g, c);
1839 nvgpu_err(g, "failed to preempt channel/TSG");
1840 goto out;
1841 }
1842
1843 /* Channel gr_ctx buffer is gpu cacheable.
1844 Flush and invalidate before cpu update. */
1845 g->ops.mm.l2_flush(g, true);
1846
1847 data = nvgpu_mem_rd(g, mem,
1848 ctxsw_prog_main_image_pm_o());
1849
1850 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1851 data |= enable_smpc_ctxsw ?
1852 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1853 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1854
1855 nvgpu_mem_wr(g, mem,
1856 ctxsw_prog_main_image_pm_o(), data);
1857
1858out:
1859 gk20a_enable_channel_tsg(g, c);
1860 return ret;
1861}
1862
1863int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1864 struct channel_gk20a *c,
1865 u64 gpu_va,
1866 u32 mode)
1867{
1868 struct tsg_gk20a *tsg;
1869 struct nvgpu_mem *gr_mem = NULL;
1870 struct nvgpu_gr_ctx *gr_ctx;
1871 struct pm_ctx_desc *pm_ctx;
1872 u32 data;
1873 u64 virt_addr = 0;
1874 struct nvgpu_mem *ctxheader = &c->ctx_header;
1875 int ret;
1876
1877 nvgpu_log_fn(g, " ");
1878
1879 tsg = tsg_gk20a_from_ch(c);
1880 if (tsg == NULL) {
1881 return -EINVAL;
1882 }
1883
1884 gr_ctx = &tsg->gr_ctx;
1885 pm_ctx = &gr_ctx->pm_ctx;
1886 gr_mem = &gr_ctx->mem;
1887 if (!nvgpu_mem_is_valid(gr_mem)) {
1888 nvgpu_err(g, "no graphics context allocated");
1889 return -EFAULT;
1890 }
1891
1892 if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
1893 (g->ops.gr.get_hw_accessor_stream_out_mode == NULL)) {
1894 nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
1895 return -EINVAL;
1896 }
1897
1898 switch (mode) {
1899 case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
1900 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1901 return 0;
1902 }
1903 break;
1904 case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
1905 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
1906 return 0;
1907 }
1908 break;
1909 case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1910 if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
1911 return 0;
1912 }
1913 break;
1914 default:
1915 nvgpu_err(g, "invalid hwpm context switch mode");
1916 return -EINVAL;
1917 }
1918
1919 ret = gk20a_disable_channel_tsg(g, c);
1920 if (ret) {
1921 nvgpu_err(g, "failed to disable channel/TSG");
1922 return ret;
1923 }
1924
1925 ret = gk20a_fifo_preempt(g, c);
1926 if (ret) {
1927 gk20a_enable_channel_tsg(g, c);
1928 nvgpu_err(g, "failed to preempt channel/TSG");
1929 return ret;
1930 }
1931
1932 /* Channel gr_ctx buffer is gpu cacheable.
1933 Flush and invalidate before cpu update. */
1934 g->ops.mm.l2_flush(g, true);
1935
1936 if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
1937 /* Allocate buffer if necessary */
1938 if (pm_ctx->mem.gpu_va == 0) {
1939 ret = nvgpu_dma_alloc_sys(g,
1940 g->gr.ctx_vars.pm_ctxsw_image_size,
1941 &pm_ctx->mem);
1942 if (ret) {
1943 c->g->ops.fifo.enable_channel(c);
1944 nvgpu_err(g,
1945 "failed to allocate pm ctxt buffer");
1946 return ret;
1947 }
1948
1949 pm_ctx->mem.gpu_va = nvgpu_gmmu_map_fixed(c->vm,
1950 &pm_ctx->mem,
1951 gpu_va,
1952 pm_ctx->mem.size,
1953 NVGPU_VM_MAP_CACHEABLE,
1954 gk20a_mem_flag_none, true,
1955 pm_ctx->mem.aperture);
1956 if (pm_ctx->mem.gpu_va == 0ULL) {
1957 nvgpu_err(g,
1958 "failed to map pm ctxt buffer");
1959 nvgpu_dma_free(g, &pm_ctx->mem);
1960 c->g->ops.fifo.enable_channel(c);
1961 return -ENOMEM;
1962 }
1963 }
1964
1965 if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
1966 (g->ops.gr.init_hwpm_pmm_register != NULL)) {
1967 g->ops.gr.init_hwpm_pmm_register(g);
1968 }
1969 }
1970
1971 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1972 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1973
1974 switch (mode) {
1975 case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
1976 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
1977 virt_addr = pm_ctx->mem.gpu_va;
1978 break;
1979 case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1980 pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
1981 virt_addr = pm_ctx->mem.gpu_va;
1982 break;
1983 case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
1984 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1985 virt_addr = 0;
1986 }
1987
1988 data |= pm_ctx->pm_mode;
1989
1990 nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1991
1992 if (ctxheader->gpu_va) {
1993 struct channel_gk20a *ch;
1994
1995 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1996 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1997 g->ops.gr.write_pm_ptr(g, &ch->ctx_header, virt_addr);
1998 }
1999 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2000 } else {
2001 g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
2002 }
2003
2004 /* enable channel */
2005 gk20a_enable_channel_tsg(g, c);
2006
2007 return 0;
2008}
2009
2010void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
2011 struct nvgpu_mem *mem)
2012{
2013 nvgpu_mem_wr(g, mem,
2014 ctxsw_prog_main_image_num_save_ops_o(), 0);
2015 nvgpu_mem_wr(g, mem,
2016 ctxsw_prog_main_image_num_restore_ops_o(), 0);
2017}
2018
2019/* load saved fresh copy of gloden image into channel gr_ctx */
2020int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2021 struct channel_gk20a *c)
2022{
2023 struct gr_gk20a *gr = &g->gr;
2024 struct tsg_gk20a *tsg;
2025 struct nvgpu_gr_ctx *gr_ctx;
2026 u32 virt_addr_lo;
2027 u32 virt_addr_hi;
2028 u64 virt_addr = 0;
2029 u32 v, data;
2030 int ret = 0;
2031 struct nvgpu_mem *mem;
2032
2033 nvgpu_log_fn(g, " ");
2034
2035 tsg = tsg_gk20a_from_ch(c);
2036 if (tsg == NULL) {
2037 return -EINVAL;
2038 }
2039
2040 gr_ctx = &tsg->gr_ctx;
2041 mem = &gr_ctx->mem;
2042 if (gr->ctx_vars.local_golden_image == NULL) {
2043 return -EINVAL;
2044 }
2045
2046 /* Channel gr_ctx buffer is gpu cacheable.
2047 Flush and invalidate before cpu update. */
2048 g->ops.mm.l2_flush(g, true);
2049
2050 nvgpu_mem_wr_n(g, mem, 0,
2051 gr->ctx_vars.local_golden_image,
2052 gr->ctx_vars.golden_image_size);
2053
2054 if (g->ops.gr.init_ctxsw_hdr_data) {
2055 g->ops.gr.init_ctxsw_hdr_data(g, mem);
2056 }
2057
2058 if ((g->ops.gr.enable_cde_in_fecs != NULL) && c->cde) {
2059 g->ops.gr.enable_cde_in_fecs(g, mem);
2060 }
2061
2062 /* set priv access map */
2063 virt_addr_lo =
2064 u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
2065 virt_addr_hi =
2066 u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
2067
2068 if (g->allow_all) {
2069 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
2070 } else {
2071 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
2072 }
2073
2074 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
2075 data);
2076
2077 nvgpu_mem_wr(g, mem,
2078 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
2079 virt_addr_lo);
2080 nvgpu_mem_wr(g, mem,
2081 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
2082 virt_addr_hi);
2083
2084 /* disable verif features */
2085 v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
2086 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
2087 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
2088 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
2089
2090 if (g->ops.gr.update_ctxsw_preemption_mode) {
2091 g->ops.gr.update_ctxsw_preemption_mode(g, c, mem);
2092 }
2093
2094 if (g->ops.gr.update_boosted_ctx) {
2095 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
2096 }
2097
2098 virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
2099 virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
2100
2101 nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
2102 gr_ctx->patch_ctx.data_count);
2103 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
2104 gr_ctx->patch_ctx.data_count);
2105
2106 nvgpu_mem_wr(g, mem,
2107 ctxsw_prog_main_image_patch_adr_lo_o(),
2108 virt_addr_lo);
2109 nvgpu_mem_wr(g, mem,
2110 ctxsw_prog_main_image_patch_adr_hi_o(),
2111 virt_addr_hi);
2112
2113 /* Update main header region of the context buffer with the info needed
2114 * for PM context switching, including mode and possibly a pointer to
2115 * the PM backing store.
2116 */
2117 if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
2118 if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
2119 nvgpu_err(g,
2120 "context switched pm with no pm buffer!");
2121 return -EFAULT;
2122 }
2123
2124 virt_addr = gr_ctx->pm_ctx.mem.gpu_va;
2125 } else {
2126 virt_addr = 0;
2127 }
2128
2129 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
2130 data = data & ~ctxsw_prog_main_image_pm_mode_m();
2131 data |= gr_ctx->pm_ctx.pm_mode;
2132
2133 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
2134
2135 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
2136
2137 return ret;
2138}
2139
2140static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
2141{
2142 nvgpu_log_fn(g, " ");
2143
2144 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
2145 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
2146
2147 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
2148 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
2149
2150 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
2151 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
2152
2153 nvgpu_log_fn(g, "done");
2154}
2155
2156static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
2157{
2158 struct mm_gk20a *mm = &g->mm;
2159 struct vm_gk20a *vm = mm->pmu.vm;
2160 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2161 int err;
2162
2163 err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
2164 if (err != 0) {
2165 return err;
2166 }
2167
2168 g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
2169
2170 /* Map ucode surface to GMMU */
2171 ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
2172 &ucode_info->surface_desc,
2173 ucode_info->surface_desc.size,
2174 0, /* flags */
2175 gk20a_mem_flag_read_only,
2176 false,
2177 ucode_info->surface_desc.aperture);
2178 if (ucode_info->surface_desc.gpu_va == 0ULL) {
2179 nvgpu_err(g, "failed to update gmmu ptes");
2180 return -ENOMEM;
2181 }
2182
2183 return 0;
2184}
2185
2186static void gr_gk20a_init_ctxsw_ucode_segment(
2187 struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
2188{
2189 p_seg->offset = *offset;
2190 p_seg->size = size;
2191 *offset = ALIGN(*offset + size, BLK_SIZE);
2192}
2193
2194static void gr_gk20a_init_ctxsw_ucode_segments(
2195 struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
2196 struct gk20a_ctxsw_bootloader_desc *bootdesc,
2197 u32 code_size, u32 data_size)
2198{
2199 u32 boot_size = ALIGN(bootdesc->size, sizeof(u32));
2200 segments->boot_entry = bootdesc->entry_point;
2201 segments->boot_imem_offset = bootdesc->imem_offset;
2202 gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size);
2203 gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size);
2204 gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size);
2205}
2206
2207static int gr_gk20a_copy_ctxsw_ucode_segments(
2208 struct gk20a *g,
2209 struct nvgpu_mem *dst,
2210 struct gk20a_ctxsw_ucode_segments *segments,
2211 u32 *bootimage,
2212 u32 *code, u32 *data)
2213{
2214 unsigned int i;
2215
2216 nvgpu_mem_wr_n(g, dst, segments->boot.offset, bootimage,
2217 segments->boot.size);
2218 nvgpu_mem_wr_n(g, dst, segments->code.offset, code,
2219 segments->code.size);
2220 nvgpu_mem_wr_n(g, dst, segments->data.offset, data,
2221 segments->data.size);
2222
2223 /* compute a "checksum" for the boot binary to detect its version */
2224 segments->boot_signature = 0;
2225 for (i = 0; i < segments->boot.size / sizeof(u32); i++) {
2226 segments->boot_signature += bootimage[i];
2227 }
2228
2229 return 0;
2230}
2231
2232int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2233{
2234 struct mm_gk20a *mm = &g->mm;
2235 struct vm_gk20a *vm = mm->pmu.vm;
2236 struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
2237 struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
2238 struct nvgpu_firmware *fecs_fw;
2239 struct nvgpu_firmware *gpccs_fw;
2240 u32 *fecs_boot_image;
2241 u32 *gpccs_boot_image;
2242 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2243 u32 ucode_size;
2244 int err = 0;
2245
2246 fecs_fw = nvgpu_request_firmware(g, GK20A_FECS_UCODE_IMAGE, 0);
2247 if (fecs_fw == NULL) {
2248 nvgpu_err(g, "failed to load fecs ucode!!");
2249 return -ENOENT;
2250 }
2251
2252 fecs_boot_desc = (void *)fecs_fw->data;
2253 fecs_boot_image = (void *)(fecs_fw->data +
2254 sizeof(struct gk20a_ctxsw_bootloader_desc));
2255
2256 gpccs_fw = nvgpu_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE, 0);
2257 if (gpccs_fw == NULL) {
2258 nvgpu_release_firmware(g, fecs_fw);
2259 nvgpu_err(g, "failed to load gpccs ucode!!");
2260 return -ENOENT;
2261 }
2262
2263 gpccs_boot_desc = (void *)gpccs_fw->data;
2264 gpccs_boot_image = (void *)(gpccs_fw->data +
2265 sizeof(struct gk20a_ctxsw_bootloader_desc));
2266
2267 ucode_size = 0;
2268 gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size,
2269 fecs_boot_desc,
2270 g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32),
2271 g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32));
2272 gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size,
2273 gpccs_boot_desc,
2274 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
2275 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
2276
2277 err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc);
2278 if (err != 0) {
2279 goto clean_up;
2280 }
2281
2282 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2283 &ucode_info->fecs,
2284 fecs_boot_image,
2285 g->gr.ctx_vars.ucode.fecs.inst.l,
2286 g->gr.ctx_vars.ucode.fecs.data.l);
2287
2288 nvgpu_release_firmware(g, fecs_fw);
2289 fecs_fw = NULL;
2290
2291 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2292 &ucode_info->gpccs,
2293 gpccs_boot_image,
2294 g->gr.ctx_vars.ucode.gpccs.inst.l,
2295 g->gr.ctx_vars.ucode.gpccs.data.l);
2296
2297 nvgpu_release_firmware(g, gpccs_fw);
2298 gpccs_fw = NULL;
2299
2300 err = gr_gk20a_init_ctxsw_ucode_vaspace(g);
2301 if (err != 0) {
2302 goto clean_up;
2303 }
2304
2305 return 0;
2306
2307clean_up:
2308 if (ucode_info->surface_desc.gpu_va) {
2309 nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc,
2310 ucode_info->surface_desc.gpu_va);
2311 }
2312 nvgpu_dma_free(g, &ucode_info->surface_desc);
2313
2314 nvgpu_release_firmware(g, gpccs_fw);
2315 gpccs_fw = NULL;
2316 nvgpu_release_firmware(g, fecs_fw);
2317 fecs_fw = NULL;
2318
2319 return err;
2320}
2321
2322static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g)
2323{
2324 int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2325 u32 val;
2326
2327 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2328 while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) {
2329 nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
2330 retries--;
2331 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2332 }
2333
2334 if (retries == 0) {
2335 nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
2336 gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
2337 }
2338
2339 retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2340 while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
2341 gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
2342 (retries != 0)) {
2343 nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
2344 retries--;
2345 }
2346 if (retries == 0) {
2347 nvgpu_err(g,
2348 "arbiter idle timeout, fecs ctxsw status: 0x%08x",
2349 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
2350 }
2351}
2352
2353void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2354{
2355 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2356 int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2357 u64 inst_ptr;
2358
2359 while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
2360 gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
2361 (retries != 0)) {
2362 nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
2363 retries--;
2364 }
2365 if (retries == 0) {
2366 nvgpu_err(g,
2367 "arbiter idle timeout, status: %08x",
2368 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
2369 }
2370
2371 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
2372
2373 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2374 gk20a_writel(g, gr_fecs_new_ctx_r(),
2375 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2376 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2377 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2378 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2379 gr_fecs_new_ctx_target_vid_mem_f()) |
2380 gr_fecs_new_ctx_valid_m());
2381
2382 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2383 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2384 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2385 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2386 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2387 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2388
2389 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
2390
2391 /* Wait for arbiter command to complete */
2392 gr_gk20a_wait_for_fecs_arb_idle(g);
2393
2394 gk20a_writel(g, gr_fecs_current_ctx_r(),
2395 gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) |
2396 gr_fecs_current_ctx_target_m() |
2397 gr_fecs_current_ctx_valid_m());
2398 /* Send command to arbiter to flush */
2399 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
2400
2401 gr_gk20a_wait_for_fecs_arb_idle(g);
2402
2403}
2404
2405void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
2406 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2407{
2408 u32 addr_code32;
2409 u32 addr_data32;
2410
2411 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
2412 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
2413
2414 /*
2415 * Copy falcon bootloader header into dmem at offset 0.
2416 * Configure dmem port 0 for auto-incrementing writes starting at dmem
2417 * offset 0.
2418 */
2419 gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
2420 gr_fecs_dmemc_offs_f(0) |
2421 gr_fecs_dmemc_blk_f(0) |
2422 gr_fecs_dmemc_aincw_f(1));
2423
2424 /* Write out the actual data */
2425 switch (segments->boot_signature) {
2426 case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED:
2427 case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE:
2428 case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
2429 case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
2430 case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
2431 case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
2432 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2433 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2434 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2435 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2436 /* fallthrough */
2437 case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
2438 case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
2439 case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
2440 case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
2441 case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
2442 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2443 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2444 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2445 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2446 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
2447 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2448 addr_code32);
2449 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2450 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2451 segments->code.size);
2452 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2453 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2454 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2455 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2456 addr_data32);
2457 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2458 segments->data.size);
2459 break;
2460 case FALCON_UCODE_SIG_T12X_FECS_OLDER:
2461 case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
2462 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2463 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2464 addr_code32);
2465 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2466 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2467 segments->code.size);
2468 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2469 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2470 addr_data32);
2471 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2472 segments->data.size);
2473 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2474 addr_code32);
2475 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2476 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2477 break;
2478 default:
2479 nvgpu_err(g,
2480 "unknown falcon ucode boot signature 0x%08x"
2481 " with reg_offset 0x%08x",
2482 segments->boot_signature, reg_offset);
2483 BUG();
2484 }
2485}
2486
2487void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
2488 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2489{
2490 u32 addr_load32;
2491 u32 blocks;
2492 u32 b;
2493 u32 dst;
2494
2495 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
2496 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
2497
2498 /*
2499 * Set the base FB address for the DMA transfer. Subtract off the 256
2500 * byte IMEM block offset such that the relative FB and IMEM offsets
2501 * match, allowing the IMEM tags to be properly created.
2502 */
2503
2504 dst = segments->boot_imem_offset;
2505 gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
2506 (addr_load32 - (dst >> 8)));
2507
2508 for (b = 0; b < blocks; b++) {
2509 /* Setup destination IMEM offset */
2510 gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
2511 dst + (b << 8));
2512
2513 /* Setup source offset (relative to BASE) */
2514 gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
2515 dst + (b << 8));
2516
2517 gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
2518 gr_fecs_dmatrfcmd_imem_f(0x01) |
2519 gr_fecs_dmatrfcmd_write_f(0x00) |
2520 gr_fecs_dmatrfcmd_size_f(0x06) |
2521 gr_fecs_dmatrfcmd_ctxdma_f(0));
2522 }
2523
2524 /* Specify the falcon boot vector */
2525 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
2526 gr_fecs_bootvec_vec_f(segments->boot_entry));
2527}
2528
2529static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2530{
2531 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2532 u64 addr_base = ucode_info->surface_desc.gpu_va;
2533
2534 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
2535
2536 gr_gk20a_load_falcon_bind_instblk(g);
2537
2538 g->ops.gr.falcon_load_ucode(g, addr_base,
2539 &g->ctxsw_ucode_info.fecs, 0);
2540
2541 g->ops.gr.falcon_load_ucode(g, addr_base,
2542 &g->ctxsw_ucode_info.gpccs,
2543 gr_gpcs_gpccs_falcon_hwcfg_r() -
2544 gr_fecs_falcon_hwcfg_r());
2545}
2546
2547int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2548{
2549 int err;
2550
2551 nvgpu_log_fn(g, " ");
2552
2553 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
2554 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
2555 gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
2556 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
2557 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
2558 }
2559
2560 /*
2561 * In case bootloader is not supported, revert to the old way of
2562 * loading gr ucode, without the faster bootstrap routine.
2563 */
2564 if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
2565 gr_gk20a_load_falcon_dmem(g);
2566 gr_gk20a_load_falcon_imem(g);
2567 gr_gk20a_start_falcon_ucode(g);
2568 } else {
2569 if (!g->gr.skip_ucode_init) {
2570 err = gr_gk20a_init_ctxsw_ucode(g);
2571
2572 if (err != 0) {
2573 return err;
2574 }
2575 }
2576 gr_gk20a_load_falcon_with_bootloader(g);
2577 g->gr.skip_ucode_init = true;
2578 }
2579 nvgpu_log_fn(g, "done");
2580 return 0;
2581}
2582
2583int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g)
2584{
2585 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
2586 gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
2587 gk20a_writel(g, gr_fecs_method_push_r(),
2588 gr_fecs_method_push_adr_set_watchdog_timeout_f());
2589
2590 return 0;
2591}
2592
2593static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
2594{
2595 u32 ret;
2596
2597 nvgpu_log_fn(g, " ");
2598
2599 ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL,
2600 GR_IS_UCODE_OP_EQUAL,
2601 eUcodeHandshakeInitComplete,
2602 GR_IS_UCODE_OP_SKIP, 0, false);
2603 if (ret) {
2604 nvgpu_err(g, "falcon ucode init timeout");
2605 return ret;
2606 }
2607
2608 if (nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP) ||
2609 nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
2610 gk20a_writel(g, gr_fecs_current_ctx_r(),
2611 gr_fecs_current_ctx_valid_false_f());
2612 }
2613
2614 ret = g->ops.gr.set_fecs_watchdog_timeout(g);
2615 if (ret) {
2616 nvgpu_err(g, "fail to set watchdog timeout");
2617 return ret;
2618 }
2619
2620 nvgpu_log_fn(g, "done");
2621 return 0;
2622}
2623
2624int gr_gk20a_init_ctx_state(struct gk20a *g)
2625{
2626 u32 ret;
2627 struct fecs_method_op_gk20a op = {
2628 .mailbox = { .id = 0, .data = 0,
2629 .clr = ~0, .ok = 0, .fail = 0},
2630 .method.data = 0,
2631 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
2632 .cond.fail = GR_IS_UCODE_OP_SKIP,
2633 };
2634
2635 nvgpu_log_fn(g, " ");
2636 /* query ctxsw image sizes, if golden context is not created */
2637 if (!g->gr.ctx_vars.golden_image_initialized) {
2638 op.method.addr =
2639 gr_fecs_method_push_adr_discover_image_size_v();
2640 op.mailbox.ret = &g->gr.ctx_vars.golden_image_size;
2641 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2642 if (ret) {
2643 nvgpu_err(g,
2644 "query golden image size failed");
2645 return ret;
2646 }
2647 op.method.addr =
2648 gr_fecs_method_push_adr_discover_zcull_image_size_v();
2649 op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size;
2650 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2651 if (ret) {
2652 nvgpu_err(g,
2653 "query zcull ctx image size failed");
2654 return ret;
2655 }
2656 op.method.addr =
2657 gr_fecs_method_push_adr_discover_pm_image_size_v();
2658 op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size;
2659 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2660 if (ret) {
2661 nvgpu_err(g,
2662 "query pm ctx image size failed");
2663 return ret;
2664 }
2665 g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
2666#ifdef CONFIG_GK20A_CTXSW_TRACE
2667 g->gr.ctx_vars.fecs_trace_buffer_size =
2668 gk20a_fecs_trace_buffer_size(g);
2669#endif
2670 }
2671
2672 nvgpu_log_fn(g, "done");
2673 return 0;
2674}
2675
2676void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
2677 struct gr_ctx_buffer_desc *desc)
2678{
2679 if (desc == NULL) {
2680 return;
2681 }
2682 nvgpu_dma_free(g, &desc->mem);
2683 desc->destroy = NULL;
2684}
2685
2686int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
2687 struct gr_ctx_buffer_desc *desc,
2688 size_t size)
2689{
2690 int err = 0;
2691
2692 nvgpu_log_fn(g, " ");
2693
2694 if (nvgpu_mem_is_valid(&desc->mem)) {
2695 return 0;
2696 }
2697
2698 err = nvgpu_dma_alloc_sys(g, size, &desc->mem);
2699 if (err != 0) {
2700 return err;
2701 }
2702
2703 desc->destroy = gk20a_gr_destroy_ctx_buffer;
2704
2705 return err;
2706}
2707
2708static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
2709{
2710 struct gr_gk20a *gr = &g->gr;
2711 u32 i;
2712
2713 for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
2714 /* destroy exists iff buffer is allocated */
2715 if (gr->global_ctx_buffer[i].destroy) {
2716 gr->global_ctx_buffer[i].destroy(g,
2717 &gr->global_ctx_buffer[i]);
2718 }
2719 }
2720
2721 nvgpu_log_fn(g, "done");
2722}
2723
2724int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2725{
2726 struct gr_gk20a *gr = &g->gr;
2727 int attr_buffer_size, err;
2728
2729 u32 cb_buffer_size = gr->bundle_cb_default_size *
2730 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
2731
2732 u32 pagepool_buffer_size = g->ops.gr.pagepool_default_size(g) *
2733 gr_scc_pagepool_total_pages_byte_granularity_v();
2734
2735 nvgpu_log_fn(g, " ");
2736
2737 attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
2738
2739 nvgpu_log_info(g, "cb_buffer_size : %d", cb_buffer_size);
2740
2741 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[CIRCULAR],
2742 cb_buffer_size);
2743 if (err != 0) {
2744 goto clean_up;
2745 }
2746
2747 if (g->ops.secure_alloc) {
2748 err = g->ops.secure_alloc(g,
2749 &gr->global_ctx_buffer[CIRCULAR_VPR],
2750 cb_buffer_size);
2751 if (err != 0) {
2752 goto clean_up;
2753 }
2754 }
2755
2756 nvgpu_log_info(g, "pagepool_buffer_size : %d", pagepool_buffer_size);
2757
2758 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[PAGEPOOL],
2759 pagepool_buffer_size);
2760 if (err != 0) {
2761 goto clean_up;
2762 }
2763
2764 if (g->ops.secure_alloc) {
2765 err = g->ops.secure_alloc(g,
2766 &gr->global_ctx_buffer[PAGEPOOL_VPR],
2767 pagepool_buffer_size);
2768 if (err != 0) {
2769 goto clean_up;
2770 }
2771 }
2772
2773 nvgpu_log_info(g, "attr_buffer_size : %d", attr_buffer_size);
2774
2775 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[ATTRIBUTE],
2776 attr_buffer_size);
2777 if (err != 0) {
2778 goto clean_up;
2779 }
2780
2781 if (g->ops.secure_alloc) {
2782 err = g->ops.secure_alloc(g,
2783 &gr->global_ctx_buffer[ATTRIBUTE_VPR],
2784 attr_buffer_size);
2785 if (err != 0) {
2786 goto clean_up;
2787 }
2788 }
2789
2790 nvgpu_log_info(g, "golden_image_size : %d",
2791 gr->ctx_vars.golden_image_size);
2792
2793 err = gk20a_gr_alloc_ctx_buffer(g,
2794 &gr->global_ctx_buffer[GOLDEN_CTX],
2795 gr->ctx_vars.golden_image_size);
2796 if (err != 0) {
2797 goto clean_up;
2798 }
2799
2800 nvgpu_log_info(g, "priv_access_map_size : %d",
2801 gr->ctx_vars.priv_access_map_size);
2802
2803 err = gk20a_gr_alloc_ctx_buffer(g,
2804 &gr->global_ctx_buffer[PRIV_ACCESS_MAP],
2805 gr->ctx_vars.priv_access_map_size);
2806
2807 if (err != 0) {
2808 goto clean_up;
2809 }
2810
2811#ifdef CONFIG_GK20A_CTXSW_TRACE
2812 nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
2813 gr->ctx_vars.fecs_trace_buffer_size);
2814
2815 err = nvgpu_dma_alloc_sys(g,
2816 gr->ctx_vars.fecs_trace_buffer_size,
2817 &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
2818 if (err != 0) {
2819 goto clean_up;
2820 }
2821
2822 gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
2823 gk20a_gr_destroy_ctx_buffer;
2824#endif
2825
2826 nvgpu_log_fn(g, "done");
2827 return 0;
2828
2829 clean_up:
2830 nvgpu_err(g, "fail");
2831 gr_gk20a_free_global_ctx_buffers(g);
2832 return -ENOMEM;
2833}
2834
2835static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
2836 struct vm_gk20a *vm,
2837 struct nvgpu_gr_ctx *gr_ctx)
2838{
2839 u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va;
2840 u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size;
2841 int *g_bfr_index = gr_ctx->global_ctx_buffer_index;
2842 u32 i;
2843
2844 nvgpu_log_fn(g, " ");
2845
2846 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
2847 if (g_bfr_index[i]) {
2848 struct nvgpu_mem *mem;
2849
2850 /*
2851 * Translate from VA index to buffer index to determine
2852 * the correct struct nvgpu_mem to use. Handles the VPR
2853 * vs non-VPR difference in context images.
2854 */
2855 mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem;
2856
2857 nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]);
2858 }
2859 }
2860
2861 memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
2862 memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size));
2863 memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
2864
2865 gr_ctx->global_ctx_buffer_mapped = false;
2866}
2867
2868int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2869 struct channel_gk20a *c)
2870{
2871 struct tsg_gk20a *tsg;
2872 struct vm_gk20a *ch_vm = c->vm;
2873 u64 *g_bfr_va;
2874 u64 *g_bfr_size;
2875 int *g_bfr_index;
2876 struct gr_gk20a *gr = &g->gr;
2877 struct nvgpu_mem *mem;
2878 u64 gpu_va;
2879
2880 nvgpu_log_fn(g, " ");
2881
2882 tsg = tsg_gk20a_from_ch(c);
2883 if (tsg == NULL) {
2884 return -EINVAL;
2885 }
2886
2887 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
2888 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
2889 g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index;
2890
2891 /* Circular Buffer */
2892 if (c->vpr &&
2893 nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) {
2894 mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem;
2895 g_bfr_index[CIRCULAR_VA] = CIRCULAR_VPR;
2896 } else {
2897 mem = &gr->global_ctx_buffer[CIRCULAR].mem;
2898 g_bfr_index[CIRCULAR_VA] = CIRCULAR;
2899 }
2900
2901 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2902 NVGPU_VM_MAP_CACHEABLE,
2903 gk20a_mem_flag_none, true, mem->aperture);
2904 if (gpu_va == 0ULL) {
2905 goto clean_up;
2906 }
2907 g_bfr_va[CIRCULAR_VA] = gpu_va;
2908 g_bfr_size[CIRCULAR_VA] = mem->size;
2909
2910 /* Attribute Buffer */
2911 if (c->vpr &&
2912 nvgpu_mem_is_valid(&gr->global_ctx_buffer[ATTRIBUTE_VPR].mem)) {
2913 mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem;
2914 g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE_VPR;
2915 } else {
2916 mem = &gr->global_ctx_buffer[ATTRIBUTE].mem;
2917 g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE;
2918 }
2919
2920 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2921 NVGPU_VM_MAP_CACHEABLE,
2922 gk20a_mem_flag_none, false, mem->aperture);
2923 if (gpu_va == 0ULL) {
2924 goto clean_up;
2925 }
2926 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
2927 g_bfr_size[ATTRIBUTE_VA] = mem->size;
2928
2929 /* Page Pool */
2930 if (c->vpr &&
2931 nvgpu_mem_is_valid(&gr->global_ctx_buffer[PAGEPOOL_VPR].mem)) {
2932 mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem;
2933 g_bfr_index[PAGEPOOL_VA] = PAGEPOOL_VPR;
2934 } else {
2935 mem = &gr->global_ctx_buffer[PAGEPOOL].mem;
2936 g_bfr_index[PAGEPOOL_VA] = PAGEPOOL;
2937 }
2938
2939 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2940 NVGPU_VM_MAP_CACHEABLE,
2941 gk20a_mem_flag_none, true, mem->aperture);
2942 if (gpu_va == 0ULL) {
2943 goto clean_up;
2944 }
2945 g_bfr_va[PAGEPOOL_VA] = gpu_va;
2946 g_bfr_size[PAGEPOOL_VA] = mem->size;
2947
2948 /* Golden Image */
2949 mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
2950 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2951 gk20a_mem_flag_none, true, mem->aperture);
2952 if (gpu_va == 0ULL) {
2953 goto clean_up;
2954 }
2955 g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
2956 g_bfr_size[GOLDEN_CTX_VA] = mem->size;
2957 g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX;
2958
2959 /* Priv register Access Map */
2960 mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
2961 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2962 gk20a_mem_flag_none, true, mem->aperture);
2963 if (gpu_va == 0ULL) {
2964 goto clean_up;
2965 }
2966 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
2967 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size;
2968 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2969
2970 tsg->gr_ctx.global_ctx_buffer_mapped = true;
2971
2972#ifdef CONFIG_GK20A_CTXSW_TRACE
2973 /* FECS trace buffer */
2974 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
2975 mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
2976 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2977 gk20a_mem_flag_none, true, mem->aperture);
2978 if (!gpu_va)
2979 goto clean_up;
2980 g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
2981 g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
2982 g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
2983 }
2984#endif
2985
2986 return 0;
2987
2988clean_up:
2989 gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx);
2990
2991 return -ENOMEM;
2992}
2993
2994int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2995 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
2996 u32 class,
2997 u32 padding)
2998{
2999 struct gr_gk20a *gr = &g->gr;
3000 int err = 0;
3001
3002 nvgpu_log_fn(g, " ");
3003
3004 if (gr->ctx_vars.buffer_size == 0) {
3005 return 0;
3006 }
3007
3008 /* alloc channel gr ctx buffer */
3009 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
3010 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
3011
3012 err = nvgpu_dma_alloc(g, gr->ctx_vars.buffer_total_size, &gr_ctx->mem);
3013 if (err != 0) {
3014 return err;
3015 }
3016
3017 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
3018 &gr_ctx->mem,
3019 gr_ctx->mem.size,
3020 0, /* not GPU-cacheable */
3021 gk20a_mem_flag_none, true,
3022 gr_ctx->mem.aperture);
3023 if (gr_ctx->mem.gpu_va == 0ULL) {
3024 goto err_free_mem;
3025 }
3026
3027 return 0;
3028
3029 err_free_mem:
3030 nvgpu_dma_free(g, &gr_ctx->mem);
3031
3032 return err;
3033}
3034
3035static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
3036 struct tsg_gk20a *tsg, u32 class, u32 padding)
3037{
3038 struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
3039 int err;
3040
3041 if (tsg->vm == NULL) {
3042 nvgpu_err(tsg->g, "No address space bound");
3043 return -ENOMEM;
3044 }
3045
3046 err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding);
3047 if (err != 0) {
3048 return err;
3049 }
3050
3051 gr_ctx->tsgid = tsg->tsgid;
3052
3053 return 0;
3054}
3055
3056void gr_gk20a_free_gr_ctx(struct gk20a *g,
3057 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
3058{
3059 nvgpu_log_fn(g, " ");
3060
3061 if (gr_ctx->mem.gpu_va) {
3062 gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx);
3063 gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx);
3064 gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx);
3065
3066 if ((g->ops.gr.dump_ctxsw_stats != NULL) &&
3067 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) {
3068 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx);
3069 }
3070
3071 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
3072 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
3073 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
3074 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
3075 nvgpu_dma_unmap_free(vm, &gr_ctx->mem);
3076
3077 memset(gr_ctx, 0, sizeof(*gr_ctx));
3078 }
3079}
3080
3081void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
3082{
3083 struct gk20a *g = tsg->g;
3084
3085 if (tsg->vm == NULL) {
3086 nvgpu_err(g, "No address space bound");
3087 return;
3088 }
3089 tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx);
3090}
3091
3092u32 gr_gk20a_get_patch_slots(struct gk20a *g)
3093{
3094 return PATCH_CTX_SLOTS_PER_PAGE;
3095}
3096
3097static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
3098 struct channel_gk20a *c)
3099{
3100 struct tsg_gk20a *tsg;
3101 struct patch_desc *patch_ctx;
3102 struct vm_gk20a *ch_vm = c->vm;
3103 u32 alloc_size;
3104 int err = 0;
3105
3106 nvgpu_log_fn(g, " ");
3107
3108 tsg = tsg_gk20a_from_ch(c);
3109 if (tsg == NULL) {
3110 return -EINVAL;
3111 }
3112
3113 patch_ctx = &tsg->gr_ctx.patch_ctx;
3114 alloc_size = g->ops.gr.get_patch_slots(g) *
3115 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
3116
3117 nvgpu_log(g, gpu_dbg_info, "patch buffer size in entries: %d",
3118 alloc_size);
3119
3120 err = nvgpu_dma_alloc_map_sys(ch_vm,
3121 alloc_size * sizeof(u32), &patch_ctx->mem);
3122 if (err != 0) {
3123 return err;
3124 }
3125
3126 nvgpu_log_fn(g, "done");
3127 return 0;
3128}
3129
3130static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
3131 struct vm_gk20a *vm,
3132 struct nvgpu_gr_ctx *gr_ctx)
3133{
3134 struct patch_desc *patch_ctx = &gr_ctx->patch_ctx;
3135
3136 nvgpu_log_fn(g, " ");
3137
3138 if (patch_ctx->mem.gpu_va) {
3139 nvgpu_gmmu_unmap(vm, &patch_ctx->mem,
3140 patch_ctx->mem.gpu_va);
3141 }
3142
3143 nvgpu_dma_free(g, &patch_ctx->mem);
3144 patch_ctx->data_count = 0;
3145}
3146
3147static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
3148 struct vm_gk20a *vm,
3149 struct nvgpu_gr_ctx *gr_ctx)
3150{
3151 struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
3152
3153 nvgpu_log_fn(g, " ");
3154
3155 if (pm_ctx->mem.gpu_va) {
3156 nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
3157
3158 nvgpu_dma_free(g, &pm_ctx->mem);
3159 }
3160}
3161
3162int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3163{
3164 struct gk20a *g = c->g;
3165 struct nvgpu_gr_ctx *gr_ctx;
3166 struct tsg_gk20a *tsg = NULL;
3167 int err = 0;
3168
3169 nvgpu_log_fn(g, " ");
3170
3171 /* an address space needs to have been bound at this point.*/
3172 if (!gk20a_channel_as_bound(c) && (c->vm == NULL)) {
3173 nvgpu_err(g,
3174 "not bound to address space at time"
3175 " of grctx allocation");
3176 return -EINVAL;
3177 }
3178
3179 if (!g->ops.gr.is_valid_class(g, class_num)) {
3180 nvgpu_err(g,
3181 "invalid obj class 0x%x", class_num);
3182 err = -EINVAL;
3183 goto out;
3184 }
3185 c->obj_class = class_num;
3186
3187 tsg = tsg_gk20a_from_ch(c);
3188 if (tsg == NULL) {
3189 return -EINVAL;
3190 }
3191
3192 gr_ctx = &tsg->gr_ctx;
3193
3194 if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
3195 tsg->vm = c->vm;
3196 nvgpu_vm_get(tsg->vm);
3197 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
3198 class_num,
3199 flags);
3200 if (err != 0) {
3201 nvgpu_err(g,
3202 "fail to allocate TSG gr ctx buffer");
3203 nvgpu_vm_put(tsg->vm);
3204 tsg->vm = NULL;
3205 goto out;
3206 }
3207
3208 /* allocate patch buffer */
3209 if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
3210 gr_ctx->patch_ctx.data_count = 0;
3211 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
3212 if (err != 0) {
3213 nvgpu_err(g,
3214 "fail to allocate patch buffer");
3215 goto out;
3216 }
3217 }
3218
3219 /* map global buffer to channel gpu_va and commit */
3220 err = g->ops.gr.map_global_ctx_buffers(g, c);
3221 if (err != 0) {
3222 nvgpu_err(g,
3223 "fail to map global ctx buffer");
3224 goto out;
3225 }
3226 g->ops.gr.commit_global_ctx_buffers(g, c, true);
3227
3228 /* commit gr ctx buffer */
3229 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
3230 if (err != 0) {
3231 nvgpu_err(g,
3232 "fail to commit gr ctx buffer");
3233 goto out;
3234 }
3235
3236 /* init golden image */
3237 err = gr_gk20a_init_golden_ctx_image(g, c);
3238 if (err != 0) {
3239 nvgpu_err(g,
3240 "fail to init golden ctx image");
3241 goto out;
3242 }
3243
3244 /* Re-enable ELPG now that golden image has been initialized.
3245 * The PMU PG init code may already have tried to enable elpg, but
3246 * would not have been able to complete this action since the golden
3247 * image hadn't been initialized yet, so do this now.
3248 */
3249 err = nvgpu_pmu_reenable_elpg(g);
3250 if (err != 0) {
3251 nvgpu_err(g, "fail to re-enable elpg");
3252 goto out;
3253 }
3254
3255 /* load golden image */
3256 gr_gk20a_load_golden_ctx_image(g, c);
3257 if (err != 0) {
3258 nvgpu_err(g,
3259 "fail to load golden ctx image");
3260 goto out;
3261 }
3262#ifdef CONFIG_GK20A_CTXSW_TRACE
3263 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3264 err = g->ops.fecs_trace.bind_channel(g, c);
3265 if (err != 0) {
3266 nvgpu_warn(g,
3267 "fail to bind channel for ctxsw trace");
3268 }
3269 }
3270#endif
3271
3272 if (g->ops.gr.set_czf_bypass) {
3273 g->ops.gr.set_czf_bypass(g, c);
3274 }
3275
3276 /* PM ctxt switch is off by default */
3277 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
3278 } else {
3279 /* commit gr ctx buffer */
3280 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
3281 if (err != 0) {
3282 nvgpu_err(g,
3283 "fail to commit gr ctx buffer");
3284 goto out;
3285 }
3286#ifdef CONFIG_GK20A_CTXSW_TRACE
3287 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3288 err = g->ops.fecs_trace.bind_channel(g, c);
3289 if (err != 0) {
3290 nvgpu_warn(g,
3291 "fail to bind channel for ctxsw trace");
3292 }
3293 }
3294#endif
3295 }
3296
3297 nvgpu_log_fn(g, "done");
3298 return 0;
3299out:
3300 /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
3301 can be reused so no need to release them.
3302 2. golden image init and load is a one time thing so if
3303 they pass, no need to undo. */
3304 nvgpu_err(g, "fail");
3305 return err;
3306}
3307
3308static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3309{
3310 struct gk20a *g = gr->g;
3311
3312 nvgpu_log_fn(g, " ");
3313
3314 gr_gk20a_free_cyclestats_snapshot_data(g);
3315
3316 gr_gk20a_free_global_ctx_buffers(g);
3317
3318 nvgpu_dma_free(g, &gr->compbit_store.mem);
3319
3320 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
3321
3322 nvgpu_kfree(g, gr->gpc_tpc_count);
3323 nvgpu_kfree(g, gr->gpc_zcb_count);
3324 nvgpu_kfree(g, gr->gpc_ppc_count);
3325 nvgpu_kfree(g, gr->pes_tpc_count[0]);
3326 nvgpu_kfree(g, gr->pes_tpc_count[1]);
3327 nvgpu_kfree(g, gr->pes_tpc_mask[0]);
3328 nvgpu_kfree(g, gr->pes_tpc_mask[1]);
3329 nvgpu_kfree(g, gr->sm_to_cluster);
3330 nvgpu_kfree(g, gr->gpc_skip_mask);
3331 nvgpu_kfree(g, gr->map_tiles);
3332 nvgpu_kfree(g, gr->fbp_rop_l2_en_mask);
3333 gr->gpc_tpc_count = NULL;
3334 gr->gpc_zcb_count = NULL;
3335 gr->gpc_ppc_count = NULL;
3336 gr->pes_tpc_count[0] = NULL;
3337 gr->pes_tpc_count[1] = NULL;
3338 gr->pes_tpc_mask[0] = NULL;
3339 gr->pes_tpc_mask[1] = NULL;
3340 gr->gpc_skip_mask = NULL;
3341 gr->map_tiles = NULL;
3342 gr->fbp_rop_l2_en_mask = NULL;
3343
3344 gr->ctx_vars.valid = false;
3345 nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.inst.l);
3346 nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.data.l);
3347 nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.inst.l);
3348 nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.data.l);
3349 nvgpu_kfree(g, gr->ctx_vars.sw_bundle_init.l);
3350 nvgpu_kfree(g, gr->ctx_vars.sw_veid_bundle_init.l);
3351 nvgpu_kfree(g, gr->ctx_vars.sw_method_init.l);
3352 nvgpu_kfree(g, gr->ctx_vars.sw_ctx_load.l);
3353 nvgpu_kfree(g, gr->ctx_vars.sw_non_ctx_load.l);
3354 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.sys.l);
3355 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc.l);
3356 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.tpc.l);
3357 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
3358 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.ppc.l);
3359 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_sys.l);
3360 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_gpc.l);
3361 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_tpc.l);
3362 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ppc.l);
3363 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_sys.l);
3364 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp.l);
3365 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_gpc.l);
3366 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp_router.l);
3367 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc_router.l);
3368 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ltc.l);
3369 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_fbpa.l);
3370 nvgpu_kfree(g, gr->ctx_vars.sw_bundle64_init.l);
3371 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_cau.l);
3372
3373 nvgpu_vfree(g, gr->ctx_vars.local_golden_image);
3374 gr->ctx_vars.local_golden_image = NULL;
3375
3376 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) {
3377 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3378 }
3379 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3380
3381 gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
3382
3383 nvgpu_ecc_remove_support(g);
3384}
3385
3386static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3387{
3388 u32 gpc_index, pes_index;
3389 u32 pes_tpc_mask;
3390 u32 pes_tpc_count;
3391 u32 pes_heavy_index;
3392 u32 gpc_new_skip_mask;
3393 u32 tmp;
3394 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3395 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
3396
3397 tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
3398 gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
3399
3400 tmp = gk20a_readl(g, top_num_gpcs_r());
3401 gr->max_gpc_count = top_num_gpcs_value_v(tmp);
3402
3403 tmp = gk20a_readl(g, top_num_fbps_r());
3404 gr->max_fbps_count = top_num_fbps_value_v(tmp);
3405
3406 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
3407
3408 if (gr->fbp_rop_l2_en_mask == NULL) {
3409 gr->fbp_rop_l2_en_mask =
3410 nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
3411 if (gr->fbp_rop_l2_en_mask == NULL) {
3412 goto clean_up;
3413 }
3414 } else {
3415 memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count *
3416 sizeof(u32));
3417 }
3418
3419 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
3420 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
3421
3422 gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
3423
3424 tmp = gk20a_readl(g, top_num_fbps_r());
3425 gr->sys_count = top_num_fbps_value_v(tmp);
3426
3427 tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
3428 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
3429
3430 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
3431 if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
3432 "too many pes per gpc\n")) {
3433 goto clean_up;
3434 }
3435
3436 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS);
3437
3438 if (gr->gpc_count == 0U) {
3439 nvgpu_err(g, "gpc_count==0!");
3440 goto clean_up;
3441 }
3442
3443 if (gr->gpc_tpc_count == NULL) {
3444 gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count *
3445 sizeof(u32));
3446 } else {
3447 memset(gr->gpc_tpc_count, 0, gr->gpc_count *
3448 sizeof(u32));
3449 }
3450
3451 if (gr->gpc_tpc_mask == NULL) {
3452 gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count *
3453 sizeof(u32));
3454 } else {
3455 memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count *
3456 sizeof(u32));
3457 }
3458
3459 if (gr->gpc_zcb_count == NULL) {
3460 gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count *
3461 sizeof(u32));
3462 } else {
3463 memset(gr->gpc_zcb_count, 0, gr->gpc_count *
3464 sizeof(u32));
3465 }
3466
3467 if (gr->gpc_ppc_count == NULL) {
3468 gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count *
3469 sizeof(u32));
3470 } else {
3471 memset(gr->gpc_ppc_count, 0, gr->gpc_count *
3472 sizeof(u32));
3473 }
3474
3475 if (gr->gpc_skip_mask == NULL) {
3476 gr->gpc_skip_mask =
3477 nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() *
3478 4 * sizeof(u32));
3479 } else {
3480 memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() *
3481 4 * sizeof(u32));
3482 }
3483
3484 if ((gr->gpc_tpc_count == NULL) || (gr->gpc_tpc_mask == NULL) ||
3485 (gr->gpc_zcb_count == NULL) || (gr->gpc_ppc_count == NULL) ||
3486 (gr->gpc_skip_mask == NULL)) {
3487 goto clean_up;
3488 }
3489
3490 for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) {
3491 if (g->ops.gr.get_gpc_tpc_mask) {
3492 gr->gpc_tpc_mask[gpc_index] =
3493 g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
3494 }
3495 }
3496
3497 gr->ppc_count = 0;
3498 gr->tpc_count = 0;
3499 gr->zcb_count = 0;
3500 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3501 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r() +
3502 gpc_stride * gpc_index);
3503
3504 gr->gpc_tpc_count[gpc_index] =
3505 gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
3506 gr->tpc_count += gr->gpc_tpc_count[gpc_index];
3507
3508 gr->gpc_zcb_count[gpc_index] =
3509 gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
3510 gr->zcb_count += gr->gpc_zcb_count[gpc_index];
3511
3512 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
3513 if (gr->pes_tpc_count[pes_index] == NULL) {
3514 gr->pes_tpc_count[pes_index] =
3515 nvgpu_kzalloc(g, gr->gpc_count *
3516 sizeof(u32));
3517 gr->pes_tpc_mask[pes_index] =
3518 nvgpu_kzalloc(g, gr->gpc_count *
3519 sizeof(u32));
3520 if ((gr->pes_tpc_count[pes_index] == NULL) ||
3521 (gr->pes_tpc_mask[pes_index] == NULL)) {
3522 goto clean_up;
3523 }
3524 }
3525
3526 tmp = gk20a_readl(g,
3527 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
3528 gpc_index * gpc_stride);
3529
3530 pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
3531 pes_tpc_count = count_bits(pes_tpc_mask);
3532
3533 /* detect PES presence by seeing if there are
3534 * TPCs connected to it.
3535 */
3536 if (pes_tpc_count != 0) {
3537 gr->gpc_ppc_count[gpc_index]++;
3538 }
3539
3540 gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
3541 gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
3542 }
3543
3544 gr->ppc_count += gr->gpc_ppc_count[gpc_index];
3545
3546 gpc_new_skip_mask = 0;
3547 if (gr->pe_count_per_gpc > 1 &&
3548 gr->pes_tpc_count[0][gpc_index] +
3549 gr->pes_tpc_count[1][gpc_index] == 5) {
3550 pes_heavy_index =
3551 gr->pes_tpc_count[0][gpc_index] >
3552 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
3553
3554 gpc_new_skip_mask =
3555 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
3556 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3557 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3558
3559 } else if (gr->pe_count_per_gpc > 1 &&
3560 (gr->pes_tpc_count[0][gpc_index] +
3561 gr->pes_tpc_count[1][gpc_index] == 4) &&
3562 (gr->pes_tpc_count[0][gpc_index] !=
3563 gr->pes_tpc_count[1][gpc_index])) {
3564 pes_heavy_index =
3565 gr->pes_tpc_count[0][gpc_index] >
3566 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
3567
3568 gpc_new_skip_mask =
3569 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
3570 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3571 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3572 }
3573 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
3574 }
3575
3576 /* allocate for max tpc per gpc */
3577 if (gr->sm_to_cluster == NULL) {
3578 gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count *
3579 gr->max_tpc_per_gpc_count *
3580 sm_per_tpc * sizeof(struct sm_info));
3581 if (!gr->sm_to_cluster)
3582 goto clean_up;
3583 } else {
3584 memset(gr->sm_to_cluster, 0, gr->gpc_count *
3585 gr->max_tpc_per_gpc_count *
3586 sm_per_tpc * sizeof(struct sm_info));
3587 }
3588 gr->no_of_sm = 0;
3589
3590 nvgpu_log_info(g, "fbps: %d", gr->num_fbps);
3591 nvgpu_log_info(g, "max_gpc_count: %d", gr->max_gpc_count);
3592 nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count);
3593 nvgpu_log_info(g, "max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
3594 nvgpu_log_info(g, "max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
3595 nvgpu_log_info(g, "max_tpc_count: %d", gr->max_tpc_count);
3596 nvgpu_log_info(g, "sys_count: %d", gr->sys_count);
3597 nvgpu_log_info(g, "gpc_count: %d", gr->gpc_count);
3598 nvgpu_log_info(g, "pe_count_per_gpc: %d", gr->pe_count_per_gpc);
3599 nvgpu_log_info(g, "tpc_count: %d", gr->tpc_count);
3600 nvgpu_log_info(g, "ppc_count: %d", gr->ppc_count);
3601
3602 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3603 nvgpu_log_info(g, "gpc_tpc_count[%d] : %d",
3604 gpc_index, gr->gpc_tpc_count[gpc_index]);
3605 }
3606 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3607 nvgpu_log_info(g, "gpc_zcb_count[%d] : %d",
3608 gpc_index, gr->gpc_zcb_count[gpc_index]);
3609 }
3610 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3611 nvgpu_log_info(g, "gpc_ppc_count[%d] : %d",
3612 gpc_index, gr->gpc_ppc_count[gpc_index]);
3613 }
3614 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3615 nvgpu_log_info(g, "gpc_skip_mask[%d] : %d",
3616 gpc_index, gr->gpc_skip_mask[gpc_index]);
3617 }
3618 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3619 for (pes_index = 0;
3620 pes_index < gr->pe_count_per_gpc;
3621 pes_index++) {
3622 nvgpu_log_info(g, "pes_tpc_count[%d][%d] : %d",
3623 pes_index, gpc_index,
3624 gr->pes_tpc_count[pes_index][gpc_index]);
3625 }
3626 }
3627
3628 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3629 for (pes_index = 0;
3630 pes_index < gr->pe_count_per_gpc;
3631 pes_index++) {
3632 nvgpu_log_info(g, "pes_tpc_mask[%d][%d] : %d",
3633 pes_index, gpc_index,
3634 gr->pes_tpc_mask[pes_index][gpc_index]);
3635 }
3636 }
3637
3638 g->ops.gr.bundle_cb_defaults(g);
3639 g->ops.gr.cb_size_default(g);
3640 g->ops.gr.calc_global_ctx_buffer_size(g);
3641 gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
3642
3643 nvgpu_log_info(g, "bundle_cb_default_size: %d",
3644 gr->bundle_cb_default_size);
3645 nvgpu_log_info(g, "min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
3646 nvgpu_log_info(g, "bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
3647 nvgpu_log_info(g, "attrib_cb_default_size: %d",
3648 gr->attrib_cb_default_size);
3649 nvgpu_log_info(g, "attrib_cb_size: %d", gr->attrib_cb_size);
3650 nvgpu_log_info(g, "alpha_cb_default_size: %d", gr->alpha_cb_default_size);
3651 nvgpu_log_info(g, "alpha_cb_size: %d", gr->alpha_cb_size);
3652 nvgpu_log_info(g, "timeslice_mode: %d", gr->timeslice_mode);
3653
3654 return 0;
3655
3656clean_up:
3657 return -ENOMEM;
3658}
3659
3660static u32 prime_set[18] = {
3661 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
3662
3663static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3664{
3665 s32 comm_denom;
3666 s32 mul_factor;
3667 s32 *init_frac = NULL;
3668 s32 *init_err = NULL;
3669 s32 *run_err = NULL;
3670 s32 *sorted_num_tpcs = NULL;
3671 s32 *sorted_to_unsorted_gpc_map = NULL;
3672 u32 gpc_index;
3673 u32 gpc_mark = 0;
3674 u32 num_tpc;
3675 u32 max_tpc_count = 0;
3676 u32 swap;
3677 u32 tile_count;
3678 u32 index;
3679 bool delete_map = false;
3680 bool gpc_sorted;
3681 int ret = 0;
3682 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
3683 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
3684 int map_tile_count = num_gpcs * num_tpc_per_gpc;
3685
3686 init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3687 init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3688 run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3689 sorted_num_tpcs =
3690 nvgpu_kzalloc(g, num_gpcs * num_tpc_per_gpc * sizeof(s32));
3691 sorted_to_unsorted_gpc_map =
3692 nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3693
3694 if (!((init_frac != NULL) &&
3695 (init_err != NULL) &&
3696 (run_err != NULL) &&
3697 (sorted_num_tpcs != NULL) &&
3698 (sorted_to_unsorted_gpc_map != NULL))) {
3699 ret = -ENOMEM;
3700 goto clean_up;
3701 }
3702
3703 gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
3704
3705 if (gr->tpc_count == 3) {
3706 gr->map_row_offset = 2;
3707 } else if (gr->tpc_count < 3) {
3708 gr->map_row_offset = 1;
3709 } else {
3710 gr->map_row_offset = 3;
3711
3712 for (index = 1; index < 18; index++) {
3713 u32 prime = prime_set[index];
3714 if ((gr->tpc_count % prime) != 0) {
3715 gr->map_row_offset = prime;
3716 break;
3717 }
3718 }
3719 }
3720
3721 switch (gr->tpc_count) {
3722 case 15:
3723 gr->map_row_offset = 6;
3724 break;
3725 case 14:
3726 gr->map_row_offset = 5;
3727 break;
3728 case 13:
3729 gr->map_row_offset = 2;
3730 break;
3731 case 11:
3732 gr->map_row_offset = 7;
3733 break;
3734 case 10:
3735 gr->map_row_offset = 6;
3736 break;
3737 case 7:
3738 case 5:
3739 gr->map_row_offset = 1;
3740 break;
3741 default:
3742 break;
3743 }
3744
3745 if (gr->map_tiles) {
3746 if (gr->map_tile_count != gr->tpc_count) {
3747 delete_map = true;
3748 }
3749
3750 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
3751 if (gr_gk20a_get_map_tile_count(gr, tile_count)
3752 >= gr->tpc_count) {
3753 delete_map = true;
3754 }
3755 }
3756
3757 if (delete_map) {
3758 nvgpu_kfree(g, gr->map_tiles);
3759 gr->map_tiles = NULL;
3760 gr->map_tile_count = 0;
3761 }
3762 }
3763
3764 if (gr->map_tiles == NULL) {
3765 gr->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
3766 if (gr->map_tiles == NULL) {
3767 ret = -ENOMEM;
3768 goto clean_up;
3769 }
3770 gr->map_tile_count = map_tile_count;
3771
3772 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3773 sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
3774 sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
3775 }
3776
3777 gpc_sorted = false;
3778 while (!gpc_sorted) {
3779 gpc_sorted = true;
3780 for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
3781 if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
3782 gpc_sorted = false;
3783 swap = sorted_num_tpcs[gpc_index];
3784 sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
3785 sorted_num_tpcs[gpc_index + 1] = swap;
3786 swap = sorted_to_unsorted_gpc_map[gpc_index];
3787 sorted_to_unsorted_gpc_map[gpc_index] =
3788 sorted_to_unsorted_gpc_map[gpc_index + 1];
3789 sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
3790 }
3791 }
3792 }
3793
3794 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3795 if (gr->gpc_tpc_count[gpc_index] > max_tpc_count) {
3796 max_tpc_count = gr->gpc_tpc_count[gpc_index];
3797 }
3798 }
3799
3800 mul_factor = gr->gpc_count * max_tpc_count;
3801 if (mul_factor & 0x1) {
3802 mul_factor = 2;
3803 } else {
3804 mul_factor = 1;
3805 }
3806
3807 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
3808
3809 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3810 num_tpc = sorted_num_tpcs[gpc_index];
3811
3812 init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
3813
3814 if (num_tpc != 0) {
3815 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
3816 } else {
3817 init_err[gpc_index] = 0;
3818 }
3819
3820 run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
3821 }
3822
3823 while (gpc_mark < gr->tpc_count) {
3824 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3825 if ((run_err[gpc_index] * 2) >= comm_denom) {
3826 gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
3827 run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
3828 } else {
3829 run_err[gpc_index] += init_frac[gpc_index];
3830 }
3831 }
3832 }
3833 }
3834
3835clean_up:
3836 nvgpu_kfree(g, init_frac);
3837 nvgpu_kfree(g, init_err);
3838 nvgpu_kfree(g, run_err);
3839 nvgpu_kfree(g, sorted_num_tpcs);
3840 nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
3841
3842 if (ret) {
3843 nvgpu_err(g, "fail");
3844 } else {
3845 nvgpu_log_fn(g, "done");
3846 }
3847
3848 return ret;
3849}
3850
3851static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
3852{
3853 struct gr_zcull_gk20a *zcull = &gr->zcull;
3854
3855 zcull->aliquot_width = gr->tpc_count * 16;
3856 zcull->aliquot_height = 16;
3857
3858 zcull->width_align_pixels = gr->tpc_count * 16;
3859 zcull->height_align_pixels = 32;
3860
3861 zcull->aliquot_size =
3862 zcull->aliquot_width * zcull->aliquot_height;
3863
3864 /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
3865 zcull->pixel_squares_by_aliquots =
3866 gr->zcb_count * 16 * 16 * gr->tpc_count /
3867 (gr->gpc_count * gr->gpc_tpc_count[0]);
3868
3869 zcull->total_aliquots =
3870 gr_gpc0_zcull_total_ram_size_num_aliquots_f(
3871 gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
3872
3873 return 0;
3874}
3875
3876u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
3877{
3878 /* assuming gr has already been initialized */
3879 return gr->ctx_vars.zcull_ctxsw_image_size;
3880}
3881
3882int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
3883 struct channel_gk20a *c, u64 zcull_va, u32 mode)
3884{
3885 struct tsg_gk20a *tsg;
3886 struct zcull_ctx_desc *zcull_ctx;
3887
3888 tsg = tsg_gk20a_from_ch(c);
3889 if (tsg == NULL) {
3890 return -EINVAL;
3891 }
3892
3893 zcull_ctx = &tsg->gr_ctx.zcull_ctx;
3894 zcull_ctx->ctx_sw_mode = mode;
3895 zcull_ctx->gpu_va = zcull_va;
3896
3897 /* TBD: don't disable channel in sw method processing */
3898 return gr_gk20a_ctx_zcull_setup(g, c);
3899}
3900
3901int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
3902 struct gr_zcull_info *zcull_params)
3903{
3904 struct gr_zcull_gk20a *zcull = &gr->zcull;
3905
3906 zcull_params->width_align_pixels = zcull->width_align_pixels;
3907 zcull_params->height_align_pixels = zcull->height_align_pixels;
3908 zcull_params->pixel_squares_by_aliquots =
3909 zcull->pixel_squares_by_aliquots;
3910 zcull_params->aliquot_total = zcull->total_aliquots;
3911
3912 zcull_params->region_byte_multiplier =
3913 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
3914 zcull_params->region_header_size =
3915 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
3916 gr_zcull_save_restore_header_bytes_per_gpc_v();
3917
3918 zcull_params->subregion_header_size =
3919 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
3920 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
3921
3922 zcull_params->subregion_width_align_pixels =
3923 gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
3924 zcull_params->subregion_height_align_pixels =
3925 gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
3926 zcull_params->subregion_count = gr_zcull_subregion_qty_v();
3927
3928 return 0;
3929}
3930
3931int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
3932 struct zbc_entry *color_val, u32 index)
3933{
3934 u32 i;
3935
3936 /* update l2 table */
3937 g->ops.ltc.set_zbc_color_entry(g, color_val, index);
3938
3939 /* update ds table */
3940 gk20a_writel(g, gr_ds_zbc_color_r_r(),
3941 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
3942 gk20a_writel(g, gr_ds_zbc_color_g_r(),
3943 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
3944 gk20a_writel(g, gr_ds_zbc_color_b_r(),
3945 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
3946 gk20a_writel(g, gr_ds_zbc_color_a_r(),
3947 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
3948
3949 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
3950 gr_ds_zbc_color_fmt_val_f(color_val->format));
3951
3952 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3953 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
3954
3955 /* trigger the write */
3956 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3957 gr_ds_zbc_tbl_ld_select_c_f() |
3958 gr_ds_zbc_tbl_ld_action_write_f() |
3959 gr_ds_zbc_tbl_ld_trigger_active_f());
3960
3961 /* update local copy */
3962 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3963 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
3964 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
3965 }
3966 gr->zbc_col_tbl[index].format = color_val->format;
3967 gr->zbc_col_tbl[index].ref_cnt++;
3968
3969 return 0;
3970}
3971
3972int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
3973 struct zbc_entry *depth_val, u32 index)
3974{
3975 /* update l2 table */
3976 g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
3977
3978 /* update ds table */
3979 gk20a_writel(g, gr_ds_zbc_z_r(),
3980 gr_ds_zbc_z_val_f(depth_val->depth));
3981
3982 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
3983 gr_ds_zbc_z_fmt_val_f(depth_val->format));
3984
3985 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3986 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
3987
3988 /* trigger the write */
3989 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3990 gr_ds_zbc_tbl_ld_select_z_f() |
3991 gr_ds_zbc_tbl_ld_action_write_f() |
3992 gr_ds_zbc_tbl_ld_trigger_active_f());
3993
3994 /* update local copy */
3995 gr->zbc_dep_tbl[index].depth = depth_val->depth;
3996 gr->zbc_dep_tbl[index].format = depth_val->format;
3997 gr->zbc_dep_tbl[index].ref_cnt++;
3998
3999 return 0;
4000}
4001
4002void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
4003{
4004 struct fifo_gk20a *f = &g->fifo;
4005 struct fifo_engine_info_gk20a *gr_info = NULL;
4006 u32 ret;
4007 u32 engine_id;
4008
4009 engine_id = gk20a_fifo_get_gr_engine_id(g);
4010 gr_info = (f->engine_info + engine_id);
4011
4012 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
4013 if (ret) {
4014 nvgpu_err(g,
4015 "failed to disable gr engine activity");
4016 return;
4017 }
4018
4019 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
4020 GR_IDLE_CHECK_DEFAULT);
4021 if (ret) {
4022 nvgpu_err(g,
4023 "failed to idle graphics");
4024 goto clean_up;
4025 }
4026
4027 /* update zbc */
4028 g->ops.gr.pmu_save_zbc(g, entries);
4029
4030clean_up:
4031 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
4032 if (ret) {
4033 nvgpu_err(g,
4034 "failed to enable gr engine activity");
4035 }
4036}
4037
4038int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
4039 struct zbc_entry *zbc_val)
4040{
4041 struct zbc_color_table *c_tbl;
4042 struct zbc_depth_table *d_tbl;
4043 u32 i;
4044 int ret = -ENOSPC;
4045 bool added = false;
4046 u32 entries;
4047
4048 /* no endian swap ? */
4049
4050 nvgpu_mutex_acquire(&gr->zbc_lock);
4051 nvgpu_speculation_barrier();
4052 switch (zbc_val->type) {
4053 case GK20A_ZBC_TYPE_COLOR:
4054 /* search existing tables */
4055 for (i = 0; i < gr->max_used_color_index; i++) {
4056
4057 c_tbl = &gr->zbc_col_tbl[i];
4058
4059 if ((c_tbl->ref_cnt != 0U) &&
4060 (c_tbl->format == zbc_val->format) &&
4061 (memcmp(c_tbl->color_ds, zbc_val->color_ds,
4062 sizeof(zbc_val->color_ds)) == 0) &&
4063 (memcmp(c_tbl->color_l2, zbc_val->color_l2,
4064 sizeof(zbc_val->color_l2)) == 0)) {
4065
4066 added = true;
4067 c_tbl->ref_cnt++;
4068 ret = 0;
4069 break;
4070 }
4071 }
4072 /* add new table */
4073 if (!added &&
4074 gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
4075
4076 c_tbl =
4077 &gr->zbc_col_tbl[gr->max_used_color_index];
4078 WARN_ON(c_tbl->ref_cnt != 0);
4079
4080 ret = g->ops.gr.add_zbc_color(g, gr,
4081 zbc_val, gr->max_used_color_index);
4082
4083 if (ret == 0) {
4084 gr->max_used_color_index++;
4085 }
4086 }
4087 break;
4088 case GK20A_ZBC_TYPE_DEPTH:
4089 /* search existing tables */
4090 for (i = 0; i < gr->max_used_depth_index; i++) {
4091
4092 d_tbl = &gr->zbc_dep_tbl[i];
4093
4094 if ((d_tbl->ref_cnt != 0U) &&
4095 (d_tbl->depth == zbc_val->depth) &&
4096 (d_tbl->format == zbc_val->format)) {
4097 added = true;
4098 d_tbl->ref_cnt++;
4099 ret = 0;
4100 break;
4101 }
4102 }
4103 /* add new table */
4104 if (!added &&
4105 gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
4106
4107 d_tbl =
4108 &gr->zbc_dep_tbl[gr->max_used_depth_index];
4109 WARN_ON(d_tbl->ref_cnt != 0);
4110
4111 ret = g->ops.gr.add_zbc_depth(g, gr,
4112 zbc_val, gr->max_used_depth_index);
4113
4114 if (ret == 0) {
4115 gr->max_used_depth_index++;
4116 }
4117 }
4118 break;
4119 case T19X_ZBC:
4120 if (g->ops.gr.add_zbc_type_s) {
4121 added = g->ops.gr.add_zbc_type_s(g, gr, zbc_val, &ret);
4122 } else {
4123 nvgpu_err(g,
4124 "invalid zbc table type %d", zbc_val->type);
4125 ret = -EINVAL;
4126 goto err_mutex;
4127 }
4128 break;
4129 default:
4130 nvgpu_err(g,
4131 "invalid zbc table type %d", zbc_val->type);
4132 ret = -EINVAL;
4133 goto err_mutex;
4134 }
4135
4136 if (!added && ret == 0) {
4137 /* update zbc for elpg only when new entry is added */
4138 entries = max(gr->max_used_color_index,
4139 gr->max_used_depth_index);
4140 g->ops.gr.pmu_save_zbc(g, entries);
4141 }
4142
4143err_mutex:
4144 nvgpu_mutex_release(&gr->zbc_lock);
4145 return ret;
4146}
4147
4148/* get a zbc table entry specified by index
4149 * return table size when type is invalid */
4150int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
4151 struct zbc_query_params *query_params)
4152{
4153 u32 index = query_params->index_size;
4154 u32 i;
4155
4156 nvgpu_speculation_barrier();
4157 switch (query_params->type) {
4158 case GK20A_ZBC_TYPE_INVALID:
4159 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
4160 break;
4161 case GK20A_ZBC_TYPE_COLOR:
4162 if (index >= GK20A_ZBC_TABLE_SIZE) {
4163 nvgpu_err(g,
4164 "invalid zbc color table index");
4165 return -EINVAL;
4166 }
4167
4168 nvgpu_speculation_barrier();
4169 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4170 query_params->color_l2[i] =
4171 gr->zbc_col_tbl[index].color_l2[i];
4172 query_params->color_ds[i] =
4173 gr->zbc_col_tbl[index].color_ds[i];
4174 }
4175 query_params->format = gr->zbc_col_tbl[index].format;
4176 query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
4177 break;
4178 case GK20A_ZBC_TYPE_DEPTH:
4179 if (index >= GK20A_ZBC_TABLE_SIZE) {
4180 nvgpu_err(g,
4181 "invalid zbc depth table index");
4182 return -EINVAL;
4183 }
4184
4185 nvgpu_speculation_barrier();
4186 query_params->depth = gr->zbc_dep_tbl[index].depth;
4187 query_params->format = gr->zbc_dep_tbl[index].format;
4188 query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
4189 break;
4190 case T19X_ZBC:
4191 if (g->ops.gr.zbc_s_query_table) {
4192 return g->ops.gr.zbc_s_query_table(g, gr,
4193 query_params);
4194 } else {
4195 nvgpu_err(g,
4196 "invalid zbc table type");
4197 return -EINVAL;
4198 }
4199 break;
4200 default:
4201 nvgpu_err(g,
4202 "invalid zbc table type");
4203 return -EINVAL;
4204 }
4205
4206 return 0;
4207}
4208
4209static int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
4210{
4211 unsigned int i;
4212 int ret;
4213
4214 for (i = 0; i < gr->max_used_color_index; i++) {
4215 struct zbc_color_table *c_tbl = &gr->zbc_col_tbl[i];
4216 struct zbc_entry zbc_val;
4217
4218 zbc_val.type = GK20A_ZBC_TYPE_COLOR;
4219 memcpy(zbc_val.color_ds,
4220 c_tbl->color_ds, sizeof(zbc_val.color_ds));
4221 memcpy(zbc_val.color_l2,
4222 c_tbl->color_l2, sizeof(zbc_val.color_l2));
4223 zbc_val.format = c_tbl->format;
4224
4225 ret = g->ops.gr.add_zbc_color(g, gr, &zbc_val, i);
4226
4227 if (ret) {
4228 return ret;
4229 }
4230 }
4231 for (i = 0; i < gr->max_used_depth_index; i++) {
4232 struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i];
4233 struct zbc_entry zbc_val;
4234
4235 zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
4236 zbc_val.depth = d_tbl->depth;
4237 zbc_val.format = d_tbl->format;
4238
4239 ret = g->ops.gr.add_zbc_depth(g, gr, &zbc_val, i);
4240 if (ret) {
4241 return ret;
4242 }
4243 }
4244
4245 if (g->ops.gr.load_zbc_s_tbl) {
4246 ret = g->ops.gr.load_zbc_s_tbl(g, gr);
4247 if (ret) {
4248 return ret;
4249 }
4250 }
4251
4252 return 0;
4253}
4254
4255int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
4256{
4257 struct zbc_entry zbc_val;
4258 u32 i = 0;
4259 int err = 0;
4260
4261 err = nvgpu_mutex_init(&gr->zbc_lock);
4262 if (err != 0) {
4263 nvgpu_err(g, "Error in zbc_lock mutex initialization");
4264 return err;
4265 }
4266
4267 /* load default color table */
4268 zbc_val.type = GK20A_ZBC_TYPE_COLOR;
4269
4270 /* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */
4271 zbc_val.format = gr_ds_zbc_color_fmt_val_a8_b8_g8_r8_v();
4272 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4273 zbc_val.color_ds[i] = 0;
4274 zbc_val.color_l2[i] = 0;
4275 }
4276 zbc_val.color_l2[0] = 0xff000000;
4277 zbc_val.color_ds[3] = 0x3f800000;
4278 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4279 if (err != 0) {
4280 goto color_fail;
4281 }
4282
4283 /* Transparent black = (fmt 1 = zero) */
4284 zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
4285 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4286 zbc_val.color_ds[i] = 0;
4287 zbc_val.color_l2[i] = 0;
4288 }
4289 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4290 if (err != 0) {
4291 goto color_fail;
4292 }
4293
4294 /* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */
4295 zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
4296 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4297 zbc_val.color_ds[i] = 0x3f800000;
4298 zbc_val.color_l2[i] = 0xffffffff;
4299 }
4300 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4301 if (err != 0) {
4302 goto color_fail;
4303 }
4304
4305 gr->max_default_color_index = 3;
4306
4307 /* load default depth table */
4308 zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
4309
4310 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
4311 zbc_val.depth = 0x3f800000;
4312 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4313 if (err != 0) {
4314 goto depth_fail;
4315 }
4316
4317 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
4318 zbc_val.depth = 0;
4319 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4320 if (err != 0) {
4321 goto depth_fail;
4322 }
4323
4324 gr->max_default_depth_index = 2;
4325
4326 if (g->ops.gr.load_zbc_s_default_tbl) {
4327 err = g->ops.gr.load_zbc_s_default_tbl(g, gr);
4328 if (err != 0) {
4329 return err;
4330 }
4331 }
4332
4333 return 0;
4334
4335color_fail:
4336 nvgpu_err(g, "fail to load default zbc color table");
4337 return err;
4338depth_fail:
4339 nvgpu_err(g, "fail to load default zbc depth table");
4340 return err;
4341}
4342
4343int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
4344 struct zbc_entry *zbc_val)
4345{
4346 struct fifo_gk20a *f = &g->fifo;
4347 struct fifo_engine_info_gk20a *gr_info = NULL;
4348 int ret;
4349 u32 engine_id;
4350
4351 engine_id = gk20a_fifo_get_gr_engine_id(g);
4352 gr_info = (f->engine_info + engine_id);
4353
4354 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
4355 if (ret) {
4356 nvgpu_err(g,
4357 "failed to disable gr engine activity");
4358 return ret;
4359 }
4360
4361 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
4362 GR_IDLE_CHECK_DEFAULT);
4363 if (ret) {
4364 nvgpu_err(g,
4365 "failed to idle graphics");
4366 goto clean_up;
4367 }
4368
4369 ret = gr_gk20a_add_zbc(g, gr, zbc_val);
4370
4371clean_up:
4372 if (gk20a_fifo_enable_engine_activity(g, gr_info)) {
4373 nvgpu_err(g,
4374 "failed to enable gr engine activity");
4375 }
4376
4377 return ret;
4378}
4379
4380int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
4381 struct zbc_entry *zbc_val)
4382{
4383 nvgpu_log_fn(g, " ");
4384
4385 return gr_gk20a_elpg_protected_call(g,
4386 gr_gk20a_add_zbc(g, gr, zbc_val));
4387}
4388
4389void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
4390 u32 *zcull_map_tiles)
4391{
4392 u32 val;
4393
4394 nvgpu_log_fn(g, " ");
4395
4396 if (zcull_num_entries >= 8) {
4397 nvgpu_log_fn(g, "map0");
4398 val =
4399 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(
4400 zcull_map_tiles[0]) |
4401 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(
4402 zcull_map_tiles[1]) |
4403 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(
4404 zcull_map_tiles[2]) |
4405 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(
4406 zcull_map_tiles[3]) |
4407 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(
4408 zcull_map_tiles[4]) |
4409 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(
4410 zcull_map_tiles[5]) |
4411 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(
4412 zcull_map_tiles[6]) |
4413 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(
4414 zcull_map_tiles[7]);
4415
4416 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val);
4417 }
4418
4419 if (zcull_num_entries >= 16) {
4420 nvgpu_log_fn(g, "map1");
4421 val =
4422 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(
4423 zcull_map_tiles[8]) |
4424 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(
4425 zcull_map_tiles[9]) |
4426 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(
4427 zcull_map_tiles[10]) |
4428 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(
4429 zcull_map_tiles[11]) |
4430 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(
4431 zcull_map_tiles[12]) |
4432 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(
4433 zcull_map_tiles[13]) |
4434 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(
4435 zcull_map_tiles[14]) |
4436 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(
4437 zcull_map_tiles[15]);
4438
4439 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val);
4440 }
4441
4442 if (zcull_num_entries >= 24) {
4443 nvgpu_log_fn(g, "map2");
4444 val =
4445 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(
4446 zcull_map_tiles[16]) |
4447 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(
4448 zcull_map_tiles[17]) |
4449 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(
4450 zcull_map_tiles[18]) |
4451 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(
4452 zcull_map_tiles[19]) |
4453 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(
4454 zcull_map_tiles[20]) |
4455 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(
4456 zcull_map_tiles[21]) |
4457 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(
4458 zcull_map_tiles[22]) |
4459 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(
4460 zcull_map_tiles[23]);
4461
4462 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val);
4463 }
4464
4465 if (zcull_num_entries >= 32) {
4466 nvgpu_log_fn(g, "map3");
4467 val =
4468 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(
4469 zcull_map_tiles[24]) |
4470 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(
4471 zcull_map_tiles[25]) |
4472 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(
4473 zcull_map_tiles[26]) |
4474 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(
4475 zcull_map_tiles[27]) |
4476 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(
4477 zcull_map_tiles[28]) |
4478 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(
4479 zcull_map_tiles[29]) |
4480 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(
4481 zcull_map_tiles[30]) |
4482 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(
4483 zcull_map_tiles[31]);
4484
4485 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val);
4486 }
4487
4488}
4489
4490static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4491{
4492 u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
4493 u32 *zcull_map_tiles, *zcull_bank_counters;
4494 u32 map_counter;
4495 u32 rcp_conserv;
4496 u32 offset;
4497 bool floorsweep = false;
4498 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
4499 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
4500 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
4501 GPU_LIT_NUM_TPC_PER_GPC);
4502 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
4503 u32 map_tile_count;
4504
4505 if (gr->map_tiles == NULL) {
4506 return -1;
4507 }
4508
4509 if (zcull_alloc_num % 8 != 0) {
4510 /* Total 8 fields per map reg i.e. tile_0 to tile_7*/
4511 zcull_alloc_num += (zcull_alloc_num % 8);
4512 }
4513 zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
4514
4515 if (zcull_map_tiles == NULL) {
4516 nvgpu_err(g,
4517 "failed to allocate zcull map titles");
4518 return -ENOMEM;
4519 }
4520
4521 zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
4522
4523 if (zcull_bank_counters == NULL) {
4524 nvgpu_err(g,
4525 "failed to allocate zcull bank counters");
4526 nvgpu_kfree(g, zcull_map_tiles);
4527 return -ENOMEM;
4528 }
4529
4530 for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
4531 map_tile_count = gr_gk20a_get_map_tile_count(gr, map_counter);
4532 zcull_map_tiles[map_counter] =
4533 zcull_bank_counters[map_tile_count];
4534 zcull_bank_counters[map_tile_count]++;
4535 }
4536
4537 if (g->ops.gr.program_zcull_mapping != NULL) {
4538 g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
4539 zcull_map_tiles);
4540 }
4541
4542 nvgpu_kfree(g, zcull_map_tiles);
4543 nvgpu_kfree(g, zcull_bank_counters);
4544
4545 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4546 gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
4547 gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
4548
4549 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
4550 gpc_zcull_count < gpc_tpc_count) {
4551 nvgpu_err(g,
4552 "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
4553 gpc_zcull_count, gpc_tpc_count, gpc_index);
4554 return -EINVAL;
4555 }
4556 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
4557 gpc_zcull_count != 0) {
4558 floorsweep = true;
4559 }
4560 }
4561
4562 /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */
4563 rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(),
4564 gr->gpc_tpc_count[0]);
4565
4566 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4567 offset = gpc_index * gpc_stride;
4568
4569 if (floorsweep) {
4570 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
4571 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
4572 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
4573 gr->max_zcull_per_gpc_count));
4574 } else {
4575 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
4576 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
4577 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
4578 gr->gpc_tpc_count[gpc_index]));
4579 }
4580
4581 gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
4582 gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
4583 gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
4584
4585 gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
4586 gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
4587 }
4588
4589 gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
4590 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
4591
4592 return 0;
4593}
4594
4595void gk20a_gr_enable_exceptions(struct gk20a *g)
4596{
4597 gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
4598 gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
4599 gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
4600 gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
4601 gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
4602 gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
4603}
4604
4605void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
4606{
4607 struct gr_gk20a *gr = &g->gr;
4608 u32 tpc_mask;
4609
4610 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
4611 gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f() |
4612 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
4613
4614 tpc_mask =
4615 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1);
4616
4617 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask);
4618}
4619
4620
4621void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
4622{
4623 /* enable exceptions */
4624 gk20a_writel(g, gr_fe_hww_esr_r(),
4625 gr_fe_hww_esr_en_enable_f() |
4626 gr_fe_hww_esr_reset_active_f());
4627 gk20a_writel(g, gr_memfmt_hww_esr_r(),
4628 gr_memfmt_hww_esr_en_enable_f() |
4629 gr_memfmt_hww_esr_reset_active_f());
4630}
4631
4632void gr_gk20a_fecs_host_int_enable(struct gk20a *g)
4633{
4634 gk20a_writel(g, gr_fecs_host_int_enable_r(),
4635 gr_fecs_host_int_enable_ctxsw_intr1_enable_f() |
4636 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
4637 gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
4638 gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
4639 gr_fecs_host_int_enable_watchdog_enable_f());
4640}
4641
4642static int gk20a_init_gr_setup_hw(struct gk20a *g)
4643{
4644 struct gr_gk20a *gr = &g->gr;
4645 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
4646 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
4647 u32 data;
4648 u32 last_method_data = 0;
4649 u32 i, err;
4650
4651 nvgpu_log_fn(g, " ");
4652
4653 if (g->ops.gr.init_gpc_mmu) {
4654 g->ops.gr.init_gpc_mmu(g);
4655 }
4656
4657 /* load gr floorsweeping registers */
4658 data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
4659 data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
4660 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
4661 gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
4662
4663 gr_gk20a_zcull_init_hw(g, gr);
4664
4665 if (g->ops.priv_ring.set_ppriv_timeout_settings != NULL) {
4666 g->ops.priv_ring.set_ppriv_timeout_settings(g);
4667 }
4668
4669 /* enable fifo access */
4670 gk20a_writel(g, gr_gpfifo_ctl_r(),
4671 gr_gpfifo_ctl_access_enabled_f() |
4672 gr_gpfifo_ctl_semaphore_access_enabled_f());
4673
4674 /* TBD: reload gr ucode when needed */
4675
4676 /* enable interrupts */
4677 gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
4678 gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
4679
4680 /* enable fecs error interrupts */
4681 g->ops.gr.fecs_host_int_enable(g);
4682
4683 g->ops.gr.enable_hww_exceptions(g);
4684 g->ops.gr.set_hww_esr_report_mask(g);
4685
4686 /* enable TPC exceptions per GPC */
4687 if (g->ops.gr.enable_gpc_exceptions) {
4688 g->ops.gr.enable_gpc_exceptions(g);
4689 }
4690
4691 /* enable ECC for L1/SM */
4692 if (g->ops.gr.ecc_init_scrub_reg) {
4693 g->ops.gr.ecc_init_scrub_reg(g);
4694 }
4695
4696 /* TBD: enable per BE exceptions */
4697
4698 /* reset and enable exceptions */
4699 g->ops.gr.enable_exceptions(g);
4700
4701 gr_gk20a_load_zbc_table(g, gr);
4702
4703 if (g->ops.ltc.init_cbc) {
4704 g->ops.ltc.init_cbc(g, gr);
4705 }
4706
4707 if (g->ops.fb.init_cbc) {
4708 g->ops.fb.init_cbc(g, gr);
4709 }
4710
4711 /* load ctx init */
4712 for (i = 0; i < sw_ctx_load->count; i++) {
4713 gk20a_writel(g, sw_ctx_load->l[i].addr,
4714 sw_ctx_load->l[i].value);
4715 }
4716
4717 if (g->ops.gr.disable_rd_coalesce) {
4718 g->ops.gr.disable_rd_coalesce(g);
4719 }
4720
4721 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4722 GR_IDLE_CHECK_DEFAULT);
4723 if (err != 0U) {
4724 goto out;
4725 }
4726
4727 if (g->ops.gr.init_preemption_state) {
4728 err = g->ops.gr.init_preemption_state(g);
4729 if (err != 0U) {
4730 goto out;
4731 }
4732 }
4733
4734 /* disable fe_go_idle */
4735 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
4736 gr_fe_go_idle_timeout_count_disabled_f());
4737
4738 /* override a few ctx state registers */
4739 g->ops.gr.commit_global_timeslice(g, NULL);
4740
4741 /* floorsweep anything left */
4742 err = g->ops.gr.init_fs_state(g);
4743 if (err != 0U) {
4744 goto out;
4745 }
4746
4747 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4748 GR_IDLE_CHECK_DEFAULT);
4749 if (err != 0U) {
4750 goto restore_fe_go_idle;
4751 }
4752
4753restore_fe_go_idle:
4754 /* restore fe_go_idle */
4755 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
4756 gr_fe_go_idle_timeout_count_prod_f());
4757
4758 if ((err != 0U) || (gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4759 GR_IDLE_CHECK_DEFAULT) != 0)) {
4760 goto out;
4761 }
4762
4763 /* load method init */
4764 if (sw_method_init->count) {
4765 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
4766 sw_method_init->l[0].value);
4767 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
4768 gr_pri_mme_shadow_raw_index_write_trigger_f() |
4769 sw_method_init->l[0].addr);
4770 last_method_data = sw_method_init->l[0].value;
4771 }
4772 for (i = 1; i < sw_method_init->count; i++) {
4773 if (sw_method_init->l[i].value != last_method_data) {
4774 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
4775 sw_method_init->l[i].value);
4776 last_method_data = sw_method_init->l[i].value;
4777 }
4778 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
4779 gr_pri_mme_shadow_raw_index_write_trigger_f() |
4780 sw_method_init->l[i].addr);
4781 }
4782
4783 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4784 GR_IDLE_CHECK_DEFAULT);
4785out:
4786 nvgpu_log_fn(g, "done");
4787 return err;
4788}
4789
4790static int gk20a_init_gr_prepare(struct gk20a *g)
4791{
4792 u32 err = 0;
4793
4794 /* reset gr engine */
4795 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_GRAPH) |
4796 g->ops.mc.reset_mask(g, NVGPU_UNIT_BLG) |
4797 g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
4798
4799 nvgpu_cg_init_gr_load_gating_prod(g);
4800
4801 /* Disable elcg until it gets enabled later in the init*/
4802 nvgpu_cg_elcg_disable_no_wait(g);
4803
4804 /* enable fifo access */
4805 gk20a_writel(g, gr_gpfifo_ctl_r(),
4806 gr_gpfifo_ctl_access_enabled_f() |
4807 gr_gpfifo_ctl_semaphore_access_enabled_f());
4808
4809 if (!g->gr.ctx_vars.valid) {
4810 err = gr_gk20a_init_ctx_vars(g, &g->gr);
4811 if (err != 0U) {
4812 nvgpu_err(g,
4813 "fail to load gr init ctx");
4814 }
4815 }
4816 return err;
4817}
4818
4819static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g)
4820{
4821 struct nvgpu_timeout timeout;
4822 bool fecs_scrubbing;
4823 bool gpccs_scrubbing;
4824
4825 nvgpu_log_fn(g, " ");
4826
4827 nvgpu_timeout_init(g, &timeout,
4828 CTXSW_MEM_SCRUBBING_TIMEOUT_MAX /
4829 CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT,
4830 NVGPU_TIMER_RETRY_TIMER);
4831 do {
4832 fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) &
4833 (gr_fecs_dmactl_imem_scrubbing_m() |
4834 gr_fecs_dmactl_dmem_scrubbing_m());
4835
4836 gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) &
4837 (gr_gpccs_dmactl_imem_scrubbing_m() |
4838 gr_gpccs_dmactl_imem_scrubbing_m());
4839
4840 if (!fecs_scrubbing && !gpccs_scrubbing) {
4841 nvgpu_log_fn(g, "done");
4842 return 0;
4843 }
4844
4845 nvgpu_udelay(CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT);
4846 } while (nvgpu_timeout_expired(&timeout) == 0);
4847
4848 nvgpu_err(g, "Falcon mem scrubbing timeout");
4849 return -ETIMEDOUT;
4850}
4851
4852static int gr_gk20a_init_ctxsw(struct gk20a *g)
4853{
4854 u32 err = 0;
4855
4856 err = g->ops.gr.load_ctxsw_ucode(g);
4857 if (err != 0U) {
4858 goto out;
4859 }
4860
4861 err = gr_gk20a_wait_ctxsw_ready(g);
4862 if (err != 0U) {
4863 goto out;
4864 }
4865
4866out:
4867 if (err != 0U) {
4868 nvgpu_err(g, "fail");
4869 } else {
4870 nvgpu_log_fn(g, "done");
4871 }
4872
4873 return err;
4874}
4875
4876static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
4877{
4878 struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
4879 u32 i, err = 0;
4880
4881 nvgpu_log_fn(g, " ");
4882
4883 /* enable interrupts */
4884 gk20a_writel(g, gr_intr_r(), ~0);
4885 gk20a_writel(g, gr_intr_en_r(), ~0);
4886
4887 /* load non_ctx init */
4888 for (i = 0; i < sw_non_ctx_load->count; i++) {
4889 gk20a_writel(g, sw_non_ctx_load->l[i].addr,
4890 sw_non_ctx_load->l[i].value);
4891 }
4892
4893 err = gr_gk20a_wait_mem_scrubbing(g);
4894 if (err != 0U) {
4895 goto out;
4896 }
4897
4898 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4899 GR_IDLE_CHECK_DEFAULT);
4900 if (err != 0U) {
4901 goto out;
4902 }
4903
4904out:
4905 if (err != 0U) {
4906 nvgpu_err(g, "fail");
4907 } else {
4908 nvgpu_log_fn(g, "done");
4909 }
4910
4911 return 0;
4912}
4913
4914static int gr_gk20a_init_access_map(struct gk20a *g)
4915{
4916 struct gr_gk20a *gr = &g->gr;
4917 struct nvgpu_mem *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
4918 u32 nr_pages =
4919 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4920 PAGE_SIZE);
4921 u32 *whitelist = NULL;
4922 int w, num_entries = 0;
4923
4924 nvgpu_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4925
4926 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
4927
4928 for (w = 0; w < num_entries; w++) {
4929 u32 map_bit, map_byte, map_shift, x;
4930 map_bit = whitelist[w] >> 2;
4931 map_byte = map_bit >> 3;
4932 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4933 nvgpu_log_info(g, "access map addr:0x%x byte:0x%x bit:%d",
4934 whitelist[w], map_byte, map_shift);
4935 x = nvgpu_mem_rd32(g, mem, map_byte / sizeof(u32));
4936 x |= 1 << (
4937 (map_byte % sizeof(u32) * BITS_PER_BYTE)
4938 + map_shift);
4939 nvgpu_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4940 }
4941
4942 return 0;
4943}
4944
4945static int gk20a_init_gr_setup_sw(struct gk20a *g)
4946{
4947 struct gr_gk20a *gr = &g->gr;
4948 int err = 0;
4949
4950 nvgpu_log_fn(g, " ");
4951
4952 if (gr->sw_ready) {
4953 nvgpu_log_fn(g, "skip init");
4954 return 0;
4955 }
4956
4957 gr->g = g;
4958
4959#if defined(CONFIG_GK20A_CYCLE_STATS)
4960 err = nvgpu_mutex_init(&g->gr.cs_lock);
4961 if (err != 0) {
4962 nvgpu_err(g, "Error in gr.cs_lock mutex initialization");
4963 return err;
4964 }
4965#endif
4966
4967 err = gr_gk20a_init_gr_config(g, gr);
4968 if (err != 0) {
4969 goto clean_up;
4970 }
4971
4972 err = gr_gk20a_init_map_tiles(g, gr);
4973 if (err != 0) {
4974 goto clean_up;
4975 }
4976
4977 if (g->ops.ltc.init_comptags) {
4978 err = g->ops.ltc.init_comptags(g, gr);
4979 if (err != 0) {
4980 goto clean_up;
4981 }
4982 }
4983
4984 err = gr_gk20a_init_zcull(g, gr);
4985 if (err != 0) {
4986 goto clean_up;
4987 }
4988
4989 err = g->ops.gr.alloc_global_ctx_buffers(g);
4990 if (err != 0) {
4991 goto clean_up;
4992 }
4993
4994 err = gr_gk20a_init_access_map(g);
4995 if (err != 0) {
4996 goto clean_up;
4997 }
4998
4999 gr_gk20a_load_zbc_default_table(g, gr);
5000
5001 if (g->ops.gr.init_czf_bypass) {
5002 g->ops.gr.init_czf_bypass(g);
5003 }
5004
5005 if (g->ops.gr.init_gfxp_wfi_timeout_count) {
5006 g->ops.gr.init_gfxp_wfi_timeout_count(g);
5007 }
5008
5009 err = nvgpu_mutex_init(&gr->ctx_mutex);
5010 if (err != 0) {
5011 nvgpu_err(g, "Error in gr.ctx_mutex initialization");
5012 goto clean_up;
5013 }
5014
5015 nvgpu_spinlock_init(&gr->ch_tlb_lock);
5016
5017 gr->remove_support = gk20a_remove_gr_support;
5018 gr->sw_ready = true;
5019
5020 err = nvgpu_ecc_init_support(g);
5021 if (err != 0) {
5022 goto clean_up;
5023 }
5024
5025 nvgpu_log_fn(g, "done");
5026 return 0;
5027
5028clean_up:
5029 nvgpu_err(g, "fail");
5030 gk20a_remove_gr_support(gr);
5031 return err;
5032}
5033
5034static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
5035{
5036 struct nvgpu_pmu *pmu = &g->pmu;
5037 struct mm_gk20a *mm = &g->mm;
5038 struct vm_gk20a *vm = mm->pmu.vm;
5039 int err = 0;
5040
5041 u32 size;
5042
5043 nvgpu_log_fn(g, " ");
5044
5045 size = 0;
5046
5047 err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
5048 if (err != 0) {
5049 nvgpu_err(g,
5050 "fail to query fecs pg buffer size");
5051 return err;
5052 }
5053
5054 if (pmu->pg_buf.cpu_va == NULL) {
5055 err = nvgpu_dma_alloc_map_sys(vm, size, &pmu->pg_buf);
5056 if (err != 0) {
5057 nvgpu_err(g, "failed to allocate memory");
5058 return -ENOMEM;
5059 }
5060 }
5061
5062
5063 err = gr_gk20a_fecs_set_reglist_bind_inst(g, &mm->pmu.inst_block);
5064 if (err != 0) {
5065 nvgpu_err(g,
5066 "fail to bind pmu inst to gr");
5067 return err;
5068 }
5069
5070 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.gpu_va);
5071 if (err != 0) {
5072 nvgpu_err(g,
5073 "fail to set pg buffer pmu va");
5074 return err;
5075 }
5076
5077 return err;
5078}
5079
5080int gk20a_init_gr_support(struct gk20a *g)
5081{
5082 int err = 0;
5083
5084 nvgpu_log_fn(g, " ");
5085
5086 g->gr.initialized = false;
5087
5088 /* this is required before gr_gk20a_init_ctx_state */
5089 err = nvgpu_mutex_init(&g->gr.fecs_mutex);
5090 if (err != 0) {
5091 nvgpu_err(g, "Error in gr.fecs_mutex initialization");
5092 return err;
5093 }
5094
5095 err = gr_gk20a_init_ctxsw(g);
5096 if (err != 0) {
5097 return err;
5098 }
5099
5100 /* this appears query for sw states but fecs actually init
5101 ramchain, etc so this is hw init */
5102 err = g->ops.gr.init_ctx_state(g);
5103 if (err != 0) {
5104 return err;
5105 }
5106
5107 err = gk20a_init_gr_setup_sw(g);
5108 if (err != 0) {
5109 return err;
5110 }
5111
5112 err = gk20a_init_gr_setup_hw(g);
5113 if (err != 0) {
5114 return err;
5115 }
5116
5117 if (g->can_elpg) {
5118 err = gk20a_init_gr_bind_fecs_elpg(g);
5119 if (err != 0) {
5120 return err;
5121 }
5122 }
5123
5124 /* GR is inialized, signal possible waiters */
5125 g->gr.initialized = true;
5126 nvgpu_cond_signal(&g->gr.init_wq);
5127
5128 return 0;
5129}
5130
5131/* Wait until GR is initialized */
5132void gk20a_gr_wait_initialized(struct gk20a *g)
5133{
5134 NVGPU_COND_WAIT(&g->gr.init_wq, g->gr.initialized, 0);
5135}
5136
5137#define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
5138#define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280
5139#define NVA297_SET_SHADER_EXCEPTIONS 0x1528
5140#define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528
5141
5142#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
5143
5144void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data)
5145{
5146 nvgpu_log_fn(g, " ");
5147
5148 if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) {
5149 gk20a_writel(g,
5150 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0);
5151 gk20a_writel(g,
5152 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0);
5153 } else {
5154 /* setup sm warp esr report masks */
5155 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
5156 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
5157 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
5158 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
5159 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
5160 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
5161 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
5162 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
5163 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
5164 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
5165 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
5166 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
5167 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
5168 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
5169 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
5170 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
5171 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
5172 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
5173 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
5174 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
5175 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
5176
5177 /* setup sm global esr report mask */
5178 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
5179 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
5180 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
5181 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
5182 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
5183 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
5184 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
5185 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
5186 }
5187}
5188
5189int gk20a_enable_gr_hw(struct gk20a *g)
5190{
5191 int err;
5192
5193 nvgpu_log_fn(g, " ");
5194
5195 err = gk20a_init_gr_prepare(g);
5196 if (err != 0) {
5197 return err;
5198 }
5199
5200 err = gk20a_init_gr_reset_enable_hw(g);
5201 if (err != 0) {
5202 return err;
5203 }
5204
5205 nvgpu_log_fn(g, "done");
5206
5207 return 0;
5208}
5209
5210int gk20a_gr_reset(struct gk20a *g)
5211{
5212 int err;
5213 u32 size;
5214
5215 g->gr.initialized = false;
5216
5217 nvgpu_mutex_acquire(&g->gr.fecs_mutex);
5218
5219 err = gk20a_enable_gr_hw(g);
5220 if (err != 0) {
5221 nvgpu_mutex_release(&g->gr.fecs_mutex);
5222 return err;
5223 }
5224
5225 err = gk20a_init_gr_setup_hw(g);
5226 if (err != 0) {
5227 nvgpu_mutex_release(&g->gr.fecs_mutex);
5228 return err;
5229 }
5230
5231 err = gr_gk20a_init_ctxsw(g);
5232 if (err != 0) {
5233 nvgpu_mutex_release(&g->gr.fecs_mutex);
5234 return err;
5235 }
5236
5237 nvgpu_mutex_release(&g->gr.fecs_mutex);
5238
5239 /* this appears query for sw states but fecs actually init
5240 ramchain, etc so this is hw init */
5241 err = g->ops.gr.init_ctx_state(g);
5242 if (err != 0) {
5243 return err;
5244 }
5245
5246 size = 0;
5247 err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
5248 if (err != 0) {
5249 nvgpu_err(g,
5250 "fail to query fecs pg buffer size");
5251 return err;
5252 }
5253
5254 err = gr_gk20a_fecs_set_reglist_bind_inst(g, &g->mm.pmu.inst_block);
5255 if (err != 0) {
5256 nvgpu_err(g,
5257 "fail to bind pmu inst to gr");
5258 return err;
5259 }
5260
5261 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.gpu_va);
5262 if (err != 0) {
5263 nvgpu_err(g,
5264 "fail to set pg buffer pmu va");
5265 return err;
5266 }
5267
5268 nvgpu_cg_init_gr_load_gating_prod(g);
5269 nvgpu_cg_elcg_enable_no_wait(g);
5270
5271 /* GR is inialized, signal possible waiters */
5272 g->gr.initialized = true;
5273 nvgpu_cond_signal(&g->gr.init_wq);
5274
5275 return err;
5276}
5277
5278static void gk20a_gr_set_error_notifier(struct gk20a *g,
5279 struct gr_gk20a_isr_data *isr_data, u32 error_notifier)
5280{
5281 struct channel_gk20a *ch;
5282 struct tsg_gk20a *tsg;
5283 struct channel_gk20a *ch_tsg;
5284
5285 ch = isr_data->ch;
5286
5287 if (ch == NULL) {
5288 return;
5289 }
5290
5291 tsg = tsg_gk20a_from_ch(ch);
5292 if (tsg != NULL) {
5293 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
5294 nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
5295 channel_gk20a, ch_entry) {
5296 if (gk20a_channel_get(ch_tsg)) {
5297 g->ops.fifo.set_error_notifier(ch_tsg,
5298 error_notifier);
5299 gk20a_channel_put(ch_tsg);
5300 }
5301
5302 }
5303 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
5304 } else {
5305 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
5306 }
5307}
5308
5309static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
5310 struct gr_gk20a_isr_data *isr_data)
5311{
5312 nvgpu_log_fn(g, " ");
5313 gk20a_gr_set_error_notifier(g, isr_data,
5314 NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
5315 nvgpu_err(g,
5316 "gr semaphore timeout");
5317 return -EINVAL;
5318}
5319
5320static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
5321 struct gr_gk20a_isr_data *isr_data)
5322{
5323 nvgpu_log_fn(g, " ");
5324 gk20a_gr_set_error_notifier(g, isr_data,
5325 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
5326 /* This is an unrecoverable error, reset is needed */
5327 nvgpu_err(g,
5328 "gr semaphore timeout");
5329 return -EINVAL;
5330}
5331
5332static int gk20a_gr_handle_illegal_method(struct gk20a *g,
5333 struct gr_gk20a_isr_data *isr_data)
5334{
5335 int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
5336 isr_data->class_num, isr_data->offset,
5337 isr_data->data_lo);
5338 if (ret) {
5339 gk20a_gr_set_error_notifier(g, isr_data,
5340 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
5341 nvgpu_err(g, "invalid method class 0x%08x"
5342 ", offset 0x%08x address 0x%08x",
5343 isr_data->class_num, isr_data->offset, isr_data->addr);
5344 }
5345 return ret;
5346}
5347
5348static int gk20a_gr_handle_illegal_class(struct gk20a *g,
5349 struct gr_gk20a_isr_data *isr_data)
5350{
5351 nvgpu_log_fn(g, " ");
5352 gk20a_gr_set_error_notifier(g, isr_data,
5353 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5354 nvgpu_err(g,
5355 "invalid class 0x%08x, offset 0x%08x",
5356 isr_data->class_num, isr_data->offset);
5357 return -EINVAL;
5358}
5359
5360int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
5361 struct gr_gk20a_isr_data *isr_data)
5362{
5363 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
5364 int ret = 0;
5365 u32 chid = isr_data->ch != NULL ?
5366 isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
5367
5368 if (gr_fecs_intr == 0U) {
5369 return 0;
5370 }
5371
5372 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
5373 gk20a_gr_set_error_notifier(g, isr_data,
5374 NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
5375 nvgpu_err(g,
5376 "firmware method error 0x%08x for offset 0x%04x",
5377 gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
5378 isr_data->data_lo);
5379 ret = -1;
5380 } else if ((gr_fecs_intr &
5381 gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
5382 /* currently, recovery is not initiated */
5383 nvgpu_err(g, "fecs watchdog triggered for channel %u", chid);
5384 gk20a_fecs_dump_falcon_stats(g);
5385 gk20a_gpccs_dump_falcon_stats(g);
5386 gk20a_gr_debug_dump(g);
5387 } else if ((gr_fecs_intr &
5388 gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
5389 u32 mailbox_value = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6));
5390
5391 if (mailbox_value == MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL) {
5392 nvgpu_info(g, "ctxsw intr0 set by ucode, "
5393 "timestamp buffer full");
5394#ifdef CONFIG_GK20A_CTXSW_TRACE
5395 gk20a_fecs_trace_reset_buffer(g);
5396#else
5397 ret = -1;
5398#endif
5399 } else {
5400 nvgpu_err(g,
5401 "ctxsw intr0 set by ucode, error_code: 0x%08x",
5402 mailbox_value);
5403 ret = -1;
5404 }
5405 } else {
5406 nvgpu_err(g,
5407 "unhandled fecs error interrupt 0x%08x for channel %u",
5408 gr_fecs_intr, chid);
5409 gk20a_fecs_dump_falcon_stats(g);
5410 gk20a_gpccs_dump_falcon_stats(g);
5411 }
5412
5413 gk20a_writel(g, gr_fecs_host_int_clear_r(), gr_fecs_intr);
5414 return ret;
5415}
5416
5417static int gk20a_gr_handle_class_error(struct gk20a *g,
5418 struct gr_gk20a_isr_data *isr_data)
5419{
5420 u32 gr_class_error;
5421 u32 chid = isr_data->ch != NULL ?
5422 isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
5423
5424 nvgpu_log_fn(g, " ");
5425
5426 gr_class_error =
5427 gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
5428 gk20a_gr_set_error_notifier(g, isr_data,
5429 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5430 nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
5431 "sub channel 0x%08x mme generated %d,"
5432 " mme pc 0x%08xdata high %d priv status %d"
5433 " unhandled intr 0x%08x for channel %u",
5434 isr_data->class_num, (isr_data->offset << 2),
5435 gr_trapped_addr_subch_v(isr_data->addr),
5436 gr_trapped_addr_mme_generated_v(isr_data->addr),
5437 gr_trapped_data_mme_pc_v(
5438 gk20a_readl(g, gr_trapped_data_mme_r())),
5439 gr_trapped_addr_datahigh_v(isr_data->addr),
5440 gr_trapped_addr_priv_v(isr_data->addr),
5441 gr_class_error, chid);
5442
5443 nvgpu_err(g, "trapped data low 0x%08x",
5444 gk20a_readl(g, gr_trapped_data_lo_r()));
5445 if (gr_trapped_addr_datahigh_v(isr_data->addr)) {
5446 nvgpu_err(g, "trapped data high 0x%08x",
5447 gk20a_readl(g, gr_trapped_data_hi_r()));
5448 }
5449
5450 return -EINVAL;
5451}
5452
5453static int gk20a_gr_handle_firmware_method(struct gk20a *g,
5454 struct gr_gk20a_isr_data *isr_data)
5455{
5456 u32 chid = isr_data->ch != NULL ?
5457 isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
5458
5459 nvgpu_log_fn(g, " ");
5460
5461 gk20a_gr_set_error_notifier(g, isr_data,
5462 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5463 nvgpu_err(g,
5464 "firmware method 0x%08x, offset 0x%08x for channel %u",
5465 isr_data->class_num, isr_data->offset,
5466 chid);
5467 return -EINVAL;
5468}
5469
5470int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
5471 struct gr_gk20a_isr_data *isr_data)
5472{
5473 struct channel_gk20a *ch = isr_data->ch;
5474 struct tsg_gk20a *tsg;
5475
5476 if (ch == NULL) {
5477 return 0;
5478 }
5479
5480 tsg = tsg_gk20a_from_ch(ch);
5481 if (tsg != NULL) {
5482 g->ops.fifo.post_event_id(tsg,
5483 NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN);
5484
5485 nvgpu_cond_broadcast(&ch->semaphore_wq);
5486 } else {
5487 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
5488 }
5489
5490 return 0;
5491}
5492
5493#if defined(CONFIG_GK20A_CYCLE_STATS)
5494static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
5495 u32 offset)
5496{
5497 /* support only 24-bit 4-byte aligned offsets */
5498 bool valid = !(offset & 0xFF000003);
5499
5500 if (g->allow_all)
5501 return true;
5502
5503 /* whitelist check */
5504 valid = valid &&
5505 is_bar0_global_offset_whitelisted_gk20a(g, offset);
5506 /* resource size check in case there was a problem
5507 * with allocating the assumed size of bar0 */
5508 valid = valid && gk20a_io_valid_reg(g, offset);
5509 return valid;
5510}
5511#endif
5512
5513int gk20a_gr_handle_notify_pending(struct gk20a *g,
5514 struct gr_gk20a_isr_data *isr_data)
5515{
5516 struct channel_gk20a *ch = isr_data->ch;
5517
5518#if defined(CONFIG_GK20A_CYCLE_STATS)
5519 void *virtual_address;
5520 u32 buffer_size;
5521 u32 offset;
5522 bool exit;
5523#endif
5524 if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) {
5525 return 0;
5526 }
5527
5528#if defined(CONFIG_GK20A_CYCLE_STATS)
5529 /* GL will never use payload 0 for cycle state */
5530 if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
5531 return 0;
5532
5533 nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
5534
5535 virtual_address = ch->cyclestate.cyclestate_buffer;
5536 buffer_size = ch->cyclestate.cyclestate_buffer_size;
5537 offset = isr_data->data_lo;
5538 exit = false;
5539 while (!exit) {
5540 struct share_buffer_head *sh_hdr;
5541 u32 min_element_size;
5542
5543 /* validate offset */
5544 if (offset + sizeof(struct share_buffer_head) > buffer_size ||
5545 offset + sizeof(struct share_buffer_head) < offset) {
5546 nvgpu_err(g,
5547 "cyclestats buffer overrun at offset 0x%x",
5548 offset);
5549 break;
5550 }
5551
5552 sh_hdr = (struct share_buffer_head *)
5553 ((char *)virtual_address + offset);
5554
5555 min_element_size =
5556 (sh_hdr->operation == OP_END ?
5557 sizeof(struct share_buffer_head) :
5558 sizeof(struct gk20a_cyclestate_buffer_elem));
5559
5560 /* validate sh_hdr->size */
5561 if (sh_hdr->size < min_element_size ||
5562 offset + sh_hdr->size > buffer_size ||
5563 offset + sh_hdr->size < offset) {
5564 nvgpu_err(g,
5565 "bad cyclestate buffer header size at offset 0x%x",
5566 offset);
5567 sh_hdr->failed = true;
5568 break;
5569 }
5570
5571 switch (sh_hdr->operation) {
5572 case OP_END:
5573 exit = true;
5574 break;
5575
5576 case BAR0_READ32:
5577 case BAR0_WRITE32:
5578 {
5579 struct gk20a_cyclestate_buffer_elem *op_elem =
5580 (struct gk20a_cyclestate_buffer_elem *)sh_hdr;
5581 bool valid = is_valid_cyclestats_bar0_offset_gk20a(
5582 g, op_elem->offset_bar0);
5583 u32 raw_reg;
5584 u64 mask_orig;
5585 u64 v;
5586
5587 if (!valid) {
5588 nvgpu_err(g,
5589 "invalid cycletstats op offset: 0x%x",
5590 op_elem->offset_bar0);
5591
5592 sh_hdr->failed = exit = true;
5593 break;
5594 }
5595
5596
5597 mask_orig =
5598 ((1ULL <<
5599 (op_elem->last_bit + 1))
5600 -1)&~((1ULL <<
5601 op_elem->first_bit)-1);
5602
5603 raw_reg =
5604 gk20a_readl(g,
5605 op_elem->offset_bar0);
5606
5607 switch (sh_hdr->operation) {
5608 case BAR0_READ32:
5609 op_elem->data =
5610 (raw_reg & mask_orig)
5611 >> op_elem->first_bit;
5612 break;
5613
5614 case BAR0_WRITE32:
5615 v = 0;
5616 if ((unsigned int)mask_orig !=
5617 (unsigned int)~0) {
5618 v = (unsigned int)
5619 (raw_reg & ~mask_orig);
5620 }
5621
5622 v |= ((op_elem->data
5623 << op_elem->first_bit)
5624 & mask_orig);
5625
5626 gk20a_writel(g,
5627 op_elem->offset_bar0,
5628 (unsigned int)v);
5629 break;
5630 default:
5631 /* nop ok?*/
5632 break;
5633 }
5634 }
5635 break;
5636
5637 default:
5638 /* no operation content case */
5639 exit = true;
5640 break;
5641 }
5642 sh_hdr->completed = true;
5643 offset += sh_hdr->size;
5644 }
5645 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
5646#endif
5647 nvgpu_log_fn(g, " ");
5648 nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
5649 return 0;
5650}
5651
5652/* Used by sw interrupt thread to translate current ctx to chid.
5653 * Also used by regops to translate current ctx to chid and tsgid.
5654 * For performance, we don't want to go through 128 channels every time.
5655 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
5656 * A small tlb is used here to cache translation.
5657 *
5658 * Returned channel must be freed with gk20a_channel_put() */
5659static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5660 struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid)
5661{
5662 struct fifo_gk20a *f = &g->fifo;
5663 struct gr_gk20a *gr = &g->gr;
5664 u32 chid = -1;
5665 u32 tsgid = NVGPU_INVALID_TSG_ID;
5666 u32 i;
5667 struct channel_gk20a *ret = NULL;
5668
5669 /* when contexts are unloaded from GR, the valid bit is reset
5670 * but the instance pointer information remains intact.
5671 * This might be called from gr_isr where contexts might be
5672 * unloaded. No need to check ctx_valid bit
5673 */
5674
5675 nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
5676
5677 /* check cache first */
5678 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
5679 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5680 chid = gr->chid_tlb[i].chid;
5681 tsgid = gr->chid_tlb[i].tsgid;
5682 ret = gk20a_channel_from_id(g, chid);
5683 goto unlock;
5684 }
5685 }
5686
5687 /* slow path */
5688 for (chid = 0; chid < f->num_channels; chid++) {
5689 struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
5690
5691 if (ch == NULL) {
5692 continue;
5693 }
5694
5695 if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
5696 ram_in_base_shift_v()) ==
5697 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5698 tsgid = ch->tsgid;
5699 /* found it */
5700 ret = ch;
5701 break;
5702 }
5703 gk20a_channel_put(ch);
5704 }
5705
5706 if (ret == NULL) {
5707 goto unlock;
5708 }
5709
5710 /* add to free tlb entry */
5711 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
5712 if (gr->chid_tlb[i].curr_ctx == 0) {
5713 gr->chid_tlb[i].curr_ctx = curr_ctx;
5714 gr->chid_tlb[i].chid = chid;
5715 gr->chid_tlb[i].tsgid = tsgid;
5716 goto unlock;
5717 }
5718 }
5719
5720 /* no free entry, flush one */
5721 gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
5722 gr->chid_tlb[gr->channel_tlb_flush_index].chid = chid;
5723 gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid;
5724
5725 gr->channel_tlb_flush_index =
5726 (gr->channel_tlb_flush_index + 1) &
5727 (GR_CHANNEL_MAP_TLB_SIZE - 1);
5728
5729unlock:
5730 nvgpu_spinlock_release(&gr->ch_tlb_lock);
5731 if (curr_tsgid) {
5732 *curr_tsgid = tsgid;
5733 }
5734 return ret;
5735}
5736
5737int gk20a_gr_lock_down_sm(struct gk20a *g,
5738 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
5739 bool check_errors)
5740{
5741 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5742 u32 dbgr_control0;
5743
5744 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5745 "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
5746
5747 /* assert stop trigger */
5748 dbgr_control0 =
5749 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
5750 dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
5751 gk20a_writel(g,
5752 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
5753
5754 return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
5755 check_errors);
5756}
5757
5758bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
5759{
5760 u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
5761
5762 /* check if an sm debugger is attached.
5763 * assumption: all SMs will have debug mode enabled/disabled
5764 * uniformly. */
5765 if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
5766 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) {
5767 return true;
5768 }
5769
5770 return false;
5771}
5772
5773int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
5774 bool *post_event, struct channel_gk20a *fault_ch,
5775 u32 *hww_global_esr)
5776{
5777 int ret = 0;
5778 bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
5779 bool disable_sm_exceptions = true;
5780 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5781 bool sm_debugger_attached;
5782 u32 global_esr, warp_esr, global_mask;
5783
5784 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
5785
5786 sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
5787
5788 global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm);
5789 *hww_global_esr = global_esr;
5790 warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
5791 global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g);
5792
5793 if (!sm_debugger_attached) {
5794 nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
5795 global_esr, warp_esr);
5796 return -EFAULT;
5797 }
5798
5799 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5800 "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr);
5801
5802 gr_gk20a_elpg_protected_call(g,
5803 g->ops.gr.record_sm_error_state(g, gpc, tpc, sm, fault_ch));
5804
5805 if (g->ops.gr.pre_process_sm_exception) {
5806 ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm,
5807 global_esr, warp_esr,
5808 sm_debugger_attached,
5809 fault_ch,
5810 &early_exit,
5811 &ignore_debugger);
5812 if (ret) {
5813 nvgpu_err(g, "could not pre-process sm error!");
5814 return ret;
5815 }
5816 }
5817
5818 if (early_exit) {
5819 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5820 "returning early");
5821 return ret;
5822 }
5823
5824 /*
5825 * Disable forwarding of tpc exceptions,
5826 * the debugger will reenable exceptions after servicing them.
5827 *
5828 * Do not disable exceptions if the only SM exception is BPT_INT
5829 */
5830 if ((global_esr == gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
5831 && (warp_esr == 0)) {
5832 disable_sm_exceptions = false;
5833 }
5834
5835 if (!ignore_debugger && disable_sm_exceptions) {
5836 u32 tpc_exception_en = gk20a_readl(g,
5837 gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
5838 offset);
5839 tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
5840 gk20a_writel(g,
5841 gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + offset,
5842 tpc_exception_en);
5843 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM Exceptions disabled");
5844 }
5845
5846 /* if a debugger is present and an error has occurred, do a warp sync */
5847 if (!ignore_debugger &&
5848 ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
5849 nvgpu_log(g, gpu_dbg_intr, "warp sync needed");
5850 do_warp_sync = true;
5851 }
5852
5853 if (do_warp_sync) {
5854 ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
5855 global_mask, true);
5856 if (ret) {
5857 nvgpu_err(g, "sm did not lock down!");
5858 return ret;
5859 }
5860 }
5861
5862 if (ignore_debugger) {
5863 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5864 "ignore_debugger set, skipping event posting");
5865 } else {
5866 *post_event = true;
5867 }
5868
5869 return ret;
5870}
5871
5872int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
5873 bool *post_event)
5874{
5875 int ret = 0;
5876 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5877 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5878 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5879 u32 esr;
5880
5881 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
5882
5883 esr = gk20a_readl(g,
5884 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
5885 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
5886
5887 gk20a_writel(g,
5888 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
5889 esr);
5890
5891 return ret;
5892}
5893
5894void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
5895 u32 *esr_sm_sel)
5896{
5897 *esr_sm_sel = 1;
5898}
5899
5900static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
5901 bool *post_event, struct channel_gk20a *fault_ch,
5902 u32 *hww_global_esr)
5903{
5904 int ret = 0;
5905 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5906 u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
5907 + offset);
5908 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
5909
5910 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5911 "GPC%d TPC%d: pending exception 0x%x",
5912 gpc, tpc, tpc_exception);
5913
5914 /* check if an sm exeption is pending */
5915 if (gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(tpc_exception) ==
5916 gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v()) {
5917 u32 esr_sm_sel, sm;
5918
5919 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5920 "GPC%d TPC%d: SM exception pending", gpc, tpc);
5921
5922 if (g->ops.gr.handle_tpc_sm_ecc_exception) {
5923 g->ops.gr.handle_tpc_sm_ecc_exception(g, gpc, tpc,
5924 post_event, fault_ch, hww_global_esr);
5925 }
5926
5927 g->ops.gr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel);
5928
5929 for (sm = 0; sm < sm_per_tpc; sm++) {
5930
5931 if ((esr_sm_sel & BIT32(sm)) == 0U) {
5932 continue;
5933 }
5934
5935 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5936 "GPC%d TPC%d: SM%d exception pending",
5937 gpc, tpc, sm);
5938
5939 ret |= g->ops.gr.handle_sm_exception(g,
5940 gpc, tpc, sm, post_event, fault_ch,
5941 hww_global_esr);
5942 /* clear the hwws, also causes tpc and gpc
5943 * exceptions to be cleared. Should be cleared
5944 * only if SM is locked down or empty.
5945 */
5946 g->ops.gr.clear_sm_hww(g,
5947 gpc, tpc, sm, *hww_global_esr);
5948
5949 }
5950
5951 }
5952
5953 /* check if a tex exeption is pending */
5954 if (gr_gpc0_tpc0_tpccs_tpc_exception_tex_v(tpc_exception) ==
5955 gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v()) {
5956 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5957 "GPC%d TPC%d: TEX exception pending", gpc, tpc);
5958 ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event);
5959 }
5960
5961 if (g->ops.gr.handle_tpc_mpc_exception) {
5962 ret |= g->ops.gr.handle_tpc_mpc_exception(g,
5963 gpc, tpc, post_event);
5964 }
5965
5966 return ret;
5967}
5968
5969static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5970 struct channel_gk20a *fault_ch, u32 *hww_global_esr)
5971{
5972 int ret = 0;
5973 u32 gpc_offset, gpc, tpc;
5974 struct gr_gk20a *gr = &g->gr;
5975 u32 exception1 = gk20a_readl(g, gr_exception1_r());
5976 u32 gpc_exception;
5977
5978 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " ");
5979
5980 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
5981 if ((exception1 & (1 << gpc)) == 0) {
5982 continue;
5983 }
5984
5985 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5986 "GPC%d exception pending", gpc);
5987
5988 gpc_offset = gk20a_gr_gpc_offset(g, gpc);
5989
5990 gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r()
5991 + gpc_offset);
5992
5993 /* check if any tpc has an exception */
5994 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
5995 if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) &
5996 (1 << tpc)) == 0) {
5997 continue;
5998 }
5999
6000 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
6001 "GPC%d: TPC%d exception pending", gpc, tpc);
6002
6003 ret |= gk20a_gr_handle_tpc_exception(g, gpc, tpc,
6004 post_event, fault_ch, hww_global_esr);
6005
6006 }
6007
6008 /* Handle GCC exception */
6009 if ((gr_gpc0_gpccs_gpc_exception_gcc_v(gpc_exception) != 0U) &&
6010 (g->ops.gr.handle_gcc_exception != NULL)) {
6011 int gcc_ret = 0;
6012 gcc_ret = g->ops.gr.handle_gcc_exception(g, gpc, tpc,
6013 post_event, fault_ch, hww_global_esr);
6014 ret |= (ret != 0) ? ret : gcc_ret;
6015 }
6016
6017 /* Handle GPCCS exceptions */
6018 if (g->ops.gr.handle_gpc_gpccs_exception) {
6019 int ret_ecc = 0;
6020 ret_ecc = g->ops.gr.handle_gpc_gpccs_exception(g, gpc,
6021 gpc_exception);
6022 ret |= (ret != 0) ? ret : ret_ecc;
6023 }
6024
6025 /* Handle GPCMMU exceptions */
6026 if (g->ops.gr.handle_gpc_gpcmmu_exception) {
6027 int ret_mmu = 0;
6028
6029 ret_mmu = g->ops.gr.handle_gpc_gpcmmu_exception(g, gpc,
6030 gpc_exception);
6031 ret |= (ret != 0) ? ret : ret_mmu;
6032 }
6033
6034 }
6035
6036 return ret;
6037}
6038
6039static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg,
6040 u32 global_esr)
6041{
6042 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) {
6043 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT);
6044 }
6045
6046 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) {
6047 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE);
6048 }
6049
6050 return 0;
6051}
6052
6053int gk20a_gr_isr(struct gk20a *g)
6054{
6055 struct gr_gk20a_isr_data isr_data;
6056 u32 grfifo_ctl;
6057 u32 obj_table;
6058 bool need_reset = false;
6059 u32 gr_intr = gk20a_readl(g, gr_intr_r());
6060 struct channel_gk20a *ch = NULL;
6061 struct channel_gk20a *fault_ch = NULL;
6062 u32 tsgid = NVGPU_INVALID_TSG_ID;
6063 struct tsg_gk20a *tsg = NULL;
6064 u32 gr_engine_id;
6065 u32 global_esr = 0;
6066 u32 chid;
6067
6068 nvgpu_log_fn(g, " ");
6069 nvgpu_log(g, gpu_dbg_intr, "pgraph intr 0x%08x", gr_intr);
6070
6071 if (gr_intr == 0U) {
6072 return 0;
6073 }
6074
6075 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
6076 if (gr_engine_id != FIFO_INVAL_ENGINE_ID) {
6077 gr_engine_id = BIT(gr_engine_id);
6078 }
6079
6080 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
6081 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
6082 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
6083
6084 gk20a_writel(g, gr_gpfifo_ctl_r(),
6085 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
6086 gr_gpfifo_ctl_semaphore_access_f(0));
6087
6088 isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
6089 isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
6090 isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
6091 isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6092 isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
6093 isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
6094 obj_table = (isr_data.sub_chan < 4) ? gk20a_readl(g,
6095 gr_fe_object_table_r(isr_data.sub_chan)) : 0;
6096 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
6097
6098 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, &tsgid);
6099 isr_data.ch = ch;
6100 chid = ch != NULL ? ch->chid : FIFO_INVAL_CHANNEL_ID;
6101
6102 if (ch == NULL) {
6103 nvgpu_err(g, "pgraph intr: 0x%08x, chid: INVALID", gr_intr);
6104 } else {
6105 tsg = tsg_gk20a_from_ch(ch);
6106 if (tsg == NULL) {
6107 nvgpu_err(g, "pgraph intr: 0x%08x, chid: %d "
6108 "not bound to tsg", gr_intr, chid);
6109 }
6110 }
6111
6112 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
6113 "channel %d: addr 0x%08x, "
6114 "data 0x%08x 0x%08x,"
6115 "ctx 0x%08x, offset 0x%08x, "
6116 "subchannel 0x%08x, class 0x%08x",
6117 chid, isr_data.addr,
6118 isr_data.data_hi, isr_data.data_lo,
6119 isr_data.curr_ctx, isr_data.offset,
6120 isr_data.sub_chan, isr_data.class_num);
6121
6122 if (gr_intr & gr_intr_notify_pending_f()) {
6123 g->ops.gr.handle_notify_pending(g, &isr_data);
6124 gk20a_writel(g, gr_intr_r(),
6125 gr_intr_notify_reset_f());
6126 gr_intr &= ~gr_intr_notify_pending_f();
6127 }
6128
6129 if (gr_intr & gr_intr_semaphore_pending_f()) {
6130 g->ops.gr.handle_semaphore_pending(g, &isr_data);
6131 gk20a_writel(g, gr_intr_r(),
6132 gr_intr_semaphore_reset_f());
6133 gr_intr &= ~gr_intr_semaphore_pending_f();
6134 }
6135
6136 if (gr_intr & gr_intr_semaphore_timeout_pending_f()) {
6137 if (gk20a_gr_handle_semaphore_timeout_pending(g,
6138 &isr_data) != 0) {
6139 need_reset = true;
6140 }
6141 gk20a_writel(g, gr_intr_r(),
6142 gr_intr_semaphore_reset_f());
6143 gr_intr &= ~gr_intr_semaphore_pending_f();
6144 }
6145
6146 if (gr_intr & gr_intr_illegal_notify_pending_f()) {
6147 if (gk20a_gr_intr_illegal_notify_pending(g,
6148 &isr_data) != 0) {
6149 need_reset = true;
6150 }
6151 gk20a_writel(g, gr_intr_r(),
6152 gr_intr_illegal_notify_reset_f());
6153 gr_intr &= ~gr_intr_illegal_notify_pending_f();
6154 }
6155
6156 if (gr_intr & gr_intr_illegal_method_pending_f()) {
6157 if (gk20a_gr_handle_illegal_method(g, &isr_data) != 0) {
6158 need_reset = true;
6159 }
6160 gk20a_writel(g, gr_intr_r(),
6161 gr_intr_illegal_method_reset_f());
6162 gr_intr &= ~gr_intr_illegal_method_pending_f();
6163 }
6164
6165 if (gr_intr & gr_intr_illegal_class_pending_f()) {
6166 if (gk20a_gr_handle_illegal_class(g, &isr_data) != 0) {
6167 need_reset = true;
6168 }
6169 gk20a_writel(g, gr_intr_r(),
6170 gr_intr_illegal_class_reset_f());
6171 gr_intr &= ~gr_intr_illegal_class_pending_f();
6172 }
6173
6174 if (gr_intr & gr_intr_fecs_error_pending_f()) {
6175 if (g->ops.gr.handle_fecs_error(g, ch, &isr_data) != 0) {
6176 need_reset = true;
6177 }
6178 gk20a_writel(g, gr_intr_r(),
6179 gr_intr_fecs_error_reset_f());
6180 gr_intr &= ~gr_intr_fecs_error_pending_f();
6181 }
6182
6183 if (gr_intr & gr_intr_class_error_pending_f()) {
6184 if (gk20a_gr_handle_class_error(g, &isr_data) != 0) {
6185 need_reset = true;
6186 }
6187 gk20a_writel(g, gr_intr_r(),
6188 gr_intr_class_error_reset_f());
6189 gr_intr &= ~gr_intr_class_error_pending_f();
6190 }
6191
6192 /* this one happens if someone tries to hit a non-whitelisted
6193 * register using set_falcon[4] */
6194 if (gr_intr & gr_intr_firmware_method_pending_f()) {
6195 if (gk20a_gr_handle_firmware_method(g, &isr_data) != 0) {
6196 need_reset = true;
6197 }
6198 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n");
6199 gk20a_writel(g, gr_intr_r(),
6200 gr_intr_firmware_method_reset_f());
6201 gr_intr &= ~gr_intr_firmware_method_pending_f();
6202 }
6203
6204 if (gr_intr & gr_intr_exception_pending_f()) {
6205 u32 exception = gk20a_readl(g, gr_exception_r());
6206
6207 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
6208
6209 if (exception & gr_exception_fe_m()) {
6210 u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
6211 u32 info = gk20a_readl(g, gr_fe_hww_esr_info_r());
6212
6213 nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x",
6214 fe, info);
6215 gk20a_writel(g, gr_fe_hww_esr_r(),
6216 gr_fe_hww_esr_reset_active_f());
6217 need_reset = true;
6218 }
6219
6220 if (exception & gr_exception_memfmt_m()) {
6221 u32 memfmt = gk20a_readl(g, gr_memfmt_hww_esr_r());
6222
6223 nvgpu_err(g, "memfmt exception: esr %08x", memfmt);
6224 gk20a_writel(g, gr_memfmt_hww_esr_r(),
6225 gr_memfmt_hww_esr_reset_active_f());
6226 need_reset = true;
6227 }
6228
6229 if (exception & gr_exception_pd_m()) {
6230 u32 pd = gk20a_readl(g, gr_pd_hww_esr_r());
6231
6232 nvgpu_err(g, "pd exception: esr 0x%08x", pd);
6233 gk20a_writel(g, gr_pd_hww_esr_r(),
6234 gr_pd_hww_esr_reset_active_f());
6235 need_reset = true;
6236 }
6237
6238 if (exception & gr_exception_scc_m()) {
6239 u32 scc = gk20a_readl(g, gr_scc_hww_esr_r());
6240
6241 nvgpu_err(g, "scc exception: esr 0x%08x", scc);
6242 gk20a_writel(g, gr_scc_hww_esr_r(),
6243 gr_scc_hww_esr_reset_active_f());
6244 need_reset = true;
6245 }
6246
6247 if (exception & gr_exception_ds_m()) {
6248 u32 ds = gk20a_readl(g, gr_ds_hww_esr_r());
6249
6250 nvgpu_err(g, "ds exception: esr: 0x%08x", ds);
6251 gk20a_writel(g, gr_ds_hww_esr_r(),
6252 gr_ds_hww_esr_reset_task_f());
6253 need_reset = true;
6254 }
6255
6256 if (exception & gr_exception_ssync_m()) {
6257 if (g->ops.gr.handle_ssync_hww) {
6258 if (g->ops.gr.handle_ssync_hww(g) != 0) {
6259 need_reset = true;
6260 }
6261 } else {
6262 nvgpu_err(g, "unhandled ssync exception");
6263 }
6264 }
6265
6266 if (exception & gr_exception_mme_m()) {
6267 u32 mme = gk20a_readl(g, gr_mme_hww_esr_r());
6268 u32 info = gk20a_readl(g, gr_mme_hww_esr_info_r());
6269
6270 nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x",
6271 mme, info);
6272 gk20a_writel(g, gr_mme_hww_esr_r(),
6273 gr_mme_hww_esr_reset_active_f());
6274 need_reset = true;
6275 }
6276
6277 if (exception & gr_exception_sked_m()) {
6278 u32 sked = gk20a_readl(g, gr_sked_hww_esr_r());
6279
6280 nvgpu_err(g, "sked exception: esr 0x%08x", sked);
6281 gk20a_writel(g, gr_sked_hww_esr_r(),
6282 gr_sked_hww_esr_reset_active_f());
6283 need_reset = true;
6284 }
6285
6286 /* check if a gpc exception has occurred */
6287 if (((exception & gr_exception_gpc_m()) != 0U) &&
6288 !need_reset) {
6289 bool post_event = false;
6290
6291 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
6292 "GPC exception pending");
6293
6294 if (tsg != NULL) {
6295 fault_ch = isr_data.ch;
6296 }
6297
6298 /* fault_ch can be NULL */
6299 /* check if any gpc has an exception */
6300 if (gk20a_gr_handle_gpc_exception(g, &post_event,
6301 fault_ch, &global_esr) != 0) {
6302 need_reset = true;
6303 }
6304
6305 /* signal clients waiting on an event */
6306 if (g->ops.gr.sm_debugger_attached(g) &&
6307 post_event && (fault_ch != NULL)) {
6308 g->ops.debugger.post_events(fault_ch);
6309 }
6310 }
6311
6312 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
6313 gr_intr &= ~gr_intr_exception_pending_f();
6314
6315 if (need_reset) {
6316 nvgpu_err(g, "set gr exception notifier");
6317 gk20a_gr_set_error_notifier(g, &isr_data,
6318 NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
6319 }
6320 }
6321
6322 if (need_reset) {
6323 if (tsg != NULL) {
6324 gk20a_fifo_recover(g, gr_engine_id,
6325 tsgid, true, true, true,
6326 RC_TYPE_GR_FAULT);
6327 } else {
6328 if (ch != NULL) {
6329 nvgpu_err(g, "chid: %d referenceable but not "
6330 "bound to tsg", chid);
6331 }
6332 gk20a_fifo_recover(g, gr_engine_id,
6333 0, false, false, true,
6334 RC_TYPE_GR_FAULT);
6335 }
6336 }
6337
6338 if (gr_intr != 0U) {
6339 /* clear unhandled interrupts */
6340 if (ch == NULL) {
6341 /*
6342 * This is probably an interrupt during
6343 * gk20a_free_channel()
6344 */
6345 nvgpu_err(g, "unhandled gr intr 0x%08x for "
6346 "unreferenceable channel, clearing",
6347 gr_intr);
6348 } else {
6349 nvgpu_err(g, "unhandled gr intr 0x%08x for chid: %d",
6350 gr_intr, chid);
6351 }
6352 gk20a_writel(g, gr_intr_r(), gr_intr);
6353 }
6354
6355 gk20a_writel(g, gr_gpfifo_ctl_r(),
6356 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
6357 gr_gpfifo_ctl_semaphore_access_f(1));
6358
6359
6360 /* Posting of BPT events should be the last thing in this function */
6361 if ((global_esr != 0U) && (tsg != NULL)) {
6362 gk20a_gr_post_bpt_events(g, tsg, global_esr);
6363 }
6364
6365 if (ch) {
6366 gk20a_channel_put(ch);
6367 }
6368
6369 return 0;
6370}
6371
6372u32 gk20a_gr_nonstall_isr(struct gk20a *g)
6373{
6374 u32 ops = 0;
6375 u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r());
6376
6377 nvgpu_log(g, gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr);
6378
6379 if ((gr_intr & gr_intr_nonstall_trap_pending_f()) != 0U) {
6380 /* Clear the interrupt */
6381 gk20a_writel(g, gr_intr_nonstall_r(),
6382 gr_intr_nonstall_trap_pending_f());
6383 ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE |
6384 GK20A_NONSTALL_OPS_POST_EVENTS);
6385 }
6386 return ops;
6387}
6388
6389int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
6390{
6391 BUG_ON(size == NULL);
6392 return gr_gk20a_submit_fecs_method_op(g,
6393 (struct fecs_method_op_gk20a) {
6394 .mailbox.id = 0,
6395 .mailbox.data = 0,
6396 .mailbox.clr = ~0,
6397 .method.data = 1,
6398 .method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(),
6399 .mailbox.ret = size,
6400 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
6401 .mailbox.ok = 0,
6402 .cond.fail = GR_IS_UCODE_OP_SKIP,
6403 .mailbox.fail = 0}, false);
6404}
6405
6406int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
6407 struct nvgpu_mem *inst_block)
6408{
6409 u32 data = fecs_current_ctx_data(g, inst_block);
6410
6411 return gr_gk20a_submit_fecs_method_op(g,
6412 (struct fecs_method_op_gk20a){
6413 .mailbox.id = 4,
6414 .mailbox.data = data,
6415 .mailbox.clr = ~0,
6416 .method.data = 1,
6417 .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),
6418 .mailbox.ret = NULL,
6419 .cond.ok = GR_IS_UCODE_OP_EQUAL,
6420 .mailbox.ok = 1,
6421 .cond.fail = GR_IS_UCODE_OP_SKIP,
6422 .mailbox.fail = 0}, false);
6423}
6424
6425int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
6426{
6427 return gr_gk20a_submit_fecs_method_op(g,
6428 (struct fecs_method_op_gk20a) {
6429 .mailbox.id = 4,
6430 .mailbox.data = u64_lo32(pmu_va >> 8),
6431 .mailbox.clr = ~0,
6432 .method.data = 1,
6433 .method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(),
6434 .mailbox.ret = NULL,
6435 .cond.ok = GR_IS_UCODE_OP_EQUAL,
6436 .mailbox.ok = 1,
6437 .cond.fail = GR_IS_UCODE_OP_SKIP,
6438 .mailbox.fail = 0}, false);
6439}
6440
6441int gk20a_gr_suspend(struct gk20a *g)
6442{
6443 u32 ret = 0;
6444
6445 nvgpu_log_fn(g, " ");
6446
6447 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
6448 GR_IDLE_CHECK_DEFAULT);
6449 if (ret) {
6450 return ret;
6451 }
6452
6453 gk20a_writel(g, gr_gpfifo_ctl_r(),
6454 gr_gpfifo_ctl_access_disabled_f());
6455
6456 /* disable gr intr */
6457 gk20a_writel(g, gr_intr_r(), 0);
6458 gk20a_writel(g, gr_intr_en_r(), 0);
6459
6460 /* disable all exceptions */
6461 gk20a_writel(g, gr_exception_r(), 0);
6462 gk20a_writel(g, gr_exception_en_r(), 0);
6463 gk20a_writel(g, gr_exception1_r(), 0);
6464 gk20a_writel(g, gr_exception1_en_r(), 0);
6465 gk20a_writel(g, gr_exception2_r(), 0);
6466 gk20a_writel(g, gr_exception2_en_r(), 0);
6467
6468 gk20a_gr_flush_channel_tlb(&g->gr);
6469
6470 g->gr.initialized = false;
6471
6472 nvgpu_log_fn(g, "done");
6473 return ret;
6474}
6475
6476static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6477 u32 addr,
6478 bool is_quad, u32 quad,
6479 u32 *context_buffer,
6480 u32 context_buffer_size,
6481 u32 *priv_offset);
6482
6483static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
6484 u32 addr,
6485 u32 *priv_offset);
6486
6487/* This function will decode a priv address and return the partition type and numbers. */
6488int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6489 enum ctxsw_addr_type *addr_type,
6490 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
6491 u32 *broadcast_flags)
6492{
6493 u32 gpc_addr;
6494
6495 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6496
6497 /* setup defaults */
6498 *addr_type = CTXSW_ADDR_TYPE_SYS;
6499 *broadcast_flags = PRI_BROADCAST_FLAGS_NONE;
6500 *gpc_num = 0;
6501 *tpc_num = 0;
6502 *ppc_num = 0;
6503 *be_num = 0;
6504
6505 if (pri_is_gpc_addr(g, addr)) {
6506 *addr_type = CTXSW_ADDR_TYPE_GPC;
6507 gpc_addr = pri_gpccs_addr_mask(addr);
6508 if (pri_is_gpc_addr_shared(g, addr)) {
6509 *addr_type = CTXSW_ADDR_TYPE_GPC;
6510 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
6511 } else {
6512 *gpc_num = pri_get_gpc_num(g, addr);
6513 }
6514
6515 if (pri_is_ppc_addr(g, gpc_addr)) {
6516 *addr_type = CTXSW_ADDR_TYPE_PPC;
6517 if (pri_is_ppc_addr_shared(g, gpc_addr)) {
6518 *broadcast_flags |= PRI_BROADCAST_FLAGS_PPC;
6519 return 0;
6520 }
6521 }
6522 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
6523 *addr_type = CTXSW_ADDR_TYPE_TPC;
6524 if (pri_is_tpc_addr_shared(g, gpc_addr)) {
6525 *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC;
6526 return 0;
6527 }
6528 *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6529 }
6530 return 0;
6531 } else if (pri_is_be_addr(g, addr)) {
6532 *addr_type = CTXSW_ADDR_TYPE_BE;
6533 if (pri_is_be_addr_shared(g, addr)) {
6534 *broadcast_flags |= PRI_BROADCAST_FLAGS_BE;
6535 return 0;
6536 }
6537 *be_num = pri_get_be_num(g, addr);
6538 return 0;
6539 } else if (g->ops.ltc.pri_is_ltc_addr(g, addr)) {
6540 *addr_type = CTXSW_ADDR_TYPE_LTCS;
6541 if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) {
6542 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
6543 } else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) {
6544 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
6545 }
6546 return 0;
6547 } else if (pri_is_fbpa_addr(g, addr)) {
6548 *addr_type = CTXSW_ADDR_TYPE_FBPA;
6549 if (pri_is_fbpa_addr_shared(g, addr)) {
6550 *broadcast_flags |= PRI_BROADCAST_FLAGS_FBPA;
6551 return 0;
6552 }
6553 return 0;
6554 } else if ((g->ops.gr.is_egpc_addr != NULL) &&
6555 g->ops.gr.is_egpc_addr(g, addr)) {
6556 return g->ops.gr.decode_egpc_addr(g,
6557 addr, addr_type, gpc_num,
6558 tpc_num, broadcast_flags);
6559 } else {
6560 *addr_type = CTXSW_ADDR_TYPE_SYS;
6561 return 0;
6562 }
6563 /* PPC!?!?!?! */
6564
6565 /*NOTREACHED*/
6566 return -EINVAL;
6567}
6568
6569void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
6570 u32 num_fbpas,
6571 u32 *priv_addr_table, u32 *t)
6572{
6573 u32 fbpa_id;
6574
6575 for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) {
6576 priv_addr_table[(*t)++] = pri_fbpa_addr(g,
6577 pri_fbpa_addr_mask(g, addr), fbpa_id);
6578 }
6579}
6580
6581int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
6582 u32 gpc_num,
6583 u32 *priv_addr_table, u32 *t)
6584{
6585 u32 ppc_num;
6586
6587 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6588
6589 for (ppc_num = 0; ppc_num < g->gr.gpc_ppc_count[gpc_num]; ppc_num++) {
6590 priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr),
6591 gpc_num, ppc_num);
6592 }
6593
6594 return 0;
6595}
6596
6597/*
6598 * The context buffer is indexed using BE broadcast addresses and GPC/TPC
6599 * unicast addresses. This function will convert a BE unicast address to a BE
6600 * broadcast address and split a GPC/TPC broadcast address into a table of
6601 * GPC/TPC addresses. The addresses generated by this function can be
6602 * successfully processed by gr_gk20a_find_priv_offset_in_buffer
6603 */
6604int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6605 u32 addr,
6606 u32 *priv_addr_table,
6607 u32 *num_registers)
6608{
6609 enum ctxsw_addr_type addr_type;
6610 u32 gpc_num, tpc_num, ppc_num, be_num;
6611 u32 priv_addr, gpc_addr;
6612 u32 broadcast_flags;
6613 u32 t;
6614 int err;
6615
6616 t = 0;
6617 *num_registers = 0;
6618
6619 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6620
6621 err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
6622 &gpc_num, &tpc_num, &ppc_num, &be_num,
6623 &broadcast_flags);
6624 nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
6625 if (err != 0) {
6626 return err;
6627 }
6628
6629 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
6630 (addr_type == CTXSW_ADDR_TYPE_BE)) {
6631 /* The BE broadcast registers are included in the compressed PRI
6632 * table. Convert a BE unicast address to a broadcast address
6633 * so that we can look up the offset. */
6634 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
6635 ((broadcast_flags & PRI_BROADCAST_FLAGS_BE) == 0U)) {
6636 priv_addr_table[t++] = pri_be_shared_addr(g, addr);
6637 } else {
6638 priv_addr_table[t++] = addr;
6639 }
6640
6641 *num_registers = t;
6642 return 0;
6643 }
6644
6645 /* The GPC/TPC unicast registers are included in the compressed PRI
6646 * tables. Convert a GPC/TPC broadcast address to unicast addresses so
6647 * that we can look up the offsets. */
6648 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
6649 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
6650
6651 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) {
6652 for (tpc_num = 0;
6653 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6654 tpc_num++) {
6655 priv_addr_table[t++] =
6656 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6657 gpc_num, tpc_num);
6658 }
6659
6660 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
6661 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
6662 priv_addr_table, &t);
6663 if (err != 0) {
6664 return err;
6665 }
6666 } else {
6667 priv_addr = pri_gpc_addr(g,
6668 pri_gpccs_addr_mask(addr),
6669 gpc_num);
6670
6671 gpc_addr = pri_gpccs_addr_mask(priv_addr);
6672 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6673 if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) {
6674 continue;
6675 }
6676
6677 priv_addr_table[t++] = priv_addr;
6678 }
6679 }
6680 } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
6681 (addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
6682 (g->ops.gr.egpc_etpc_priv_addr_table != NULL)) {
6683 nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
6684 g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num, tpc_num,
6685 broadcast_flags, priv_addr_table, &t);
6686 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
6687 g->ops.ltc.split_lts_broadcast_addr(g, addr,
6688 priv_addr_table, &t);
6689 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
6690 g->ops.ltc.split_ltc_broadcast_addr(g, addr,
6691 priv_addr_table, &t);
6692 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) {
6693 g->ops.gr.split_fbpa_broadcast_addr(g, addr,
6694 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS),
6695 priv_addr_table, &t);
6696 } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) {
6697 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) {
6698 for (tpc_num = 0;
6699 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6700 tpc_num++) {
6701 priv_addr_table[t++] =
6702 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6703 gpc_num, tpc_num);
6704 }
6705 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
6706 err = gr_gk20a_split_ppc_broadcast_addr(g,
6707 addr, gpc_num, priv_addr_table, &t);
6708 } else {
6709 priv_addr_table[t++] = addr;
6710 }
6711 }
6712
6713 *num_registers = t;
6714 return 0;
6715}
6716
6717int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6718 u32 addr,
6719 u32 max_offsets,
6720 u32 *offsets, u32 *offset_addrs,
6721 u32 *num_offsets,
6722 bool is_quad, u32 quad)
6723{
6724 u32 i;
6725 u32 priv_offset = 0;
6726 u32 *priv_registers;
6727 u32 num_registers = 0;
6728 int err = 0;
6729 struct gr_gk20a *gr = &g->gr;
6730 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
6731 u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count *
6732 sm_per_tpc;
6733
6734 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6735
6736 /* implementation is crossed-up if either of these happen */
6737 if (max_offsets > potential_offsets) {
6738 nvgpu_log_fn(g, "max_offsets > potential_offsets");
6739 return -EINVAL;
6740 }
6741
6742 if (!g->gr.ctx_vars.golden_image_initialized) {
6743 return -ENODEV;
6744 }
6745
6746 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6747 if (priv_registers == NULL) {
6748 nvgpu_log_fn(g, "failed alloc for potential_offsets=%d", potential_offsets);
6749 err = PTR_ERR(priv_registers);
6750 goto cleanup;
6751 }
6752 memset(offsets, 0, sizeof(u32) * max_offsets);
6753 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6754 *num_offsets = 0;
6755
6756 g->ops.gr.create_priv_addr_table(g, addr, &priv_registers[0],
6757 &num_registers);
6758
6759 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6760 nvgpu_log_fn(g, "max_offsets = %d, num_registers = %d",
6761 max_offsets, num_registers);
6762 err = -EINVAL;
6763 goto cleanup;
6764 }
6765
6766 if ((max_offsets == 1) && (num_registers > 1)) {
6767 num_registers = 1;
6768 }
6769
6770 if (g->gr.ctx_vars.local_golden_image == NULL) {
6771 nvgpu_log_fn(g, "no context switch header info to work with");
6772 err = -EINVAL;
6773 goto cleanup;
6774 }
6775
6776 for (i = 0; i < num_registers; i++) {
6777 err = gr_gk20a_find_priv_offset_in_buffer(g,
6778 priv_registers[i],
6779 is_quad, quad,
6780 g->gr.ctx_vars.local_golden_image,
6781 g->gr.ctx_vars.golden_image_size,
6782 &priv_offset);
6783 if (err != 0) {
6784 nvgpu_log_fn(g, "Could not determine priv_offset for addr:0x%x",
6785 addr); /*, grPriRegStr(addr)));*/
6786 goto cleanup;
6787 }
6788
6789 offsets[i] = priv_offset;
6790 offset_addrs[i] = priv_registers[i];
6791 }
6792
6793 *num_offsets = num_registers;
6794cleanup:
6795 if (!IS_ERR_OR_NULL(priv_registers)) {
6796 nvgpu_kfree(g, priv_registers);
6797 }
6798
6799 return err;
6800}
6801
6802int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
6803 u32 addr,
6804 u32 max_offsets,
6805 u32 *offsets, u32 *offset_addrs,
6806 u32 *num_offsets)
6807{
6808 u32 i;
6809 u32 priv_offset = 0;
6810 u32 *priv_registers;
6811 u32 num_registers = 0;
6812 int err = 0;
6813 struct gr_gk20a *gr = &g->gr;
6814 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
6815 u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count *
6816 sm_per_tpc;
6817
6818 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6819
6820 /* implementation is crossed-up if either of these happen */
6821 if (max_offsets > potential_offsets) {
6822 return -EINVAL;
6823 }
6824
6825 if (!g->gr.ctx_vars.golden_image_initialized) {
6826 return -ENODEV;
6827 }
6828
6829 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6830 if (priv_registers == NULL) {
6831 nvgpu_log_fn(g, "failed alloc for potential_offsets=%d", potential_offsets);
6832 return -ENOMEM;
6833 }
6834 memset(offsets, 0, sizeof(u32) * max_offsets);
6835 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6836 *num_offsets = 0;
6837
6838 g->ops.gr.create_priv_addr_table(g, addr, priv_registers,
6839 &num_registers);
6840
6841 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6842 err = -EINVAL;
6843 goto cleanup;
6844 }
6845
6846 if ((max_offsets == 1) && (num_registers > 1)) {
6847 num_registers = 1;
6848 }
6849
6850 if (g->gr.ctx_vars.local_golden_image == NULL) {
6851 nvgpu_log_fn(g, "no context switch header info to work with");
6852 err = -EINVAL;
6853 goto cleanup;
6854 }
6855
6856 for (i = 0; i < num_registers; i++) {
6857 err = gr_gk20a_find_priv_offset_in_pm_buffer(g,
6858 priv_registers[i],
6859 &priv_offset);
6860 if (err != 0) {
6861 nvgpu_log_fn(g, "Could not determine priv_offset for addr:0x%x",
6862 addr); /*, grPriRegStr(addr)));*/
6863 goto cleanup;
6864 }
6865
6866 offsets[i] = priv_offset;
6867 offset_addrs[i] = priv_registers[i];
6868 }
6869
6870 *num_offsets = num_registers;
6871cleanup:
6872 nvgpu_kfree(g, priv_registers);
6873
6874 return err;
6875}
6876
6877/* Setup some register tables. This looks hacky; our
6878 * register/offset functions are just that, functions.
6879 * So they can't be used as initializers... TBD: fix to
6880 * generate consts at least on an as-needed basis.
6881 */
6882static const u32 _num_ovr_perf_regs = 17;
6883static u32 _ovr_perf_regs[17] = { 0, };
6884/* Following are the blocks of registers that the ucode
6885 stores in the extended region.*/
6886
6887void gk20a_gr_init_ovr_sm_dsm_perf(void)
6888{
6889 if (_ovr_perf_regs[0] != 0) {
6890 return;
6891 }
6892
6893 _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r();
6894 _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r();
6895 _ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
6896 _ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
6897 _ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r();
6898 _ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r();
6899 _ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r();
6900 _ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r();
6901 _ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r();
6902 _ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r();
6903 _ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r();
6904 _ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r();
6905 _ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r();
6906 _ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r();
6907 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
6908 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
6909 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
6910
6911}
6912
6913/* TBD: would like to handle this elsewhere, at a higher level.
6914 * these are currently constructed in a "test-then-write" style
6915 * which makes it impossible to know externally whether a ctx
6916 * write will actually occur. so later we should put a lazy,
6917 * map-and-hold system in the patch write state */
6918static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6919 struct channel_gk20a *ch,
6920 u32 addr, u32 data,
6921 struct nvgpu_mem *mem)
6922{
6923 u32 num_gpc = g->gr.gpc_count;
6924 u32 num_tpc;
6925 u32 tpc, gpc, reg;
6926 u32 chk_addr;
6927 u32 vaddr_lo;
6928 u32 vaddr_hi;
6929 u32 tmp;
6930 u32 num_ovr_perf_regs = 0;
6931 u32 *ovr_perf_regs = NULL;
6932 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6933 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6934 struct tsg_gk20a *tsg;
6935 struct nvgpu_gr_ctx *gr_ctx;
6936 struct nvgpu_mem *ctxheader = &ch->ctx_header;
6937
6938 tsg = tsg_gk20a_from_ch(ch);
6939 if (tsg == NULL) {
6940 return -EINVAL;
6941 }
6942
6943 gr_ctx = &tsg->gr_ctx;
6944 g->ops.gr.init_ovr_sm_dsm_perf();
6945 g->ops.gr.init_sm_dsm_reg_info();
6946 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs);
6947
6948 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6949
6950 for (reg = 0; reg < num_ovr_perf_regs; reg++) {
6951 for (gpc = 0; gpc < num_gpc; gpc++) {
6952 num_tpc = g->gr.gpc_tpc_count[gpc];
6953 for (tpc = 0; tpc < num_tpc; tpc++) {
6954 chk_addr = ((gpc_stride * gpc) +
6955 (tpc_in_gpc_stride * tpc) +
6956 ovr_perf_regs[reg]);
6957 if (chk_addr != addr) {
6958 continue;
6959 }
6960 /* reset the patch count from previous
6961 runs,if ucode has already processed
6962 it */
6963 tmp = nvgpu_mem_rd(g, mem,
6964 ctxsw_prog_main_image_patch_count_o());
6965
6966 if (tmp == 0U) {
6967 gr_ctx->patch_ctx.data_count = 0;
6968 }
6969
6970 gr_gk20a_ctx_patch_write(g, gr_ctx,
6971 addr, data, true);
6972
6973 vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
6974 vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
6975
6976 nvgpu_mem_wr(g, mem,
6977 ctxsw_prog_main_image_patch_count_o(),
6978 gr_ctx->patch_ctx.data_count);
6979 if (ctxheader->gpu_va) {
6980 nvgpu_mem_wr(g, ctxheader,
6981 ctxsw_prog_main_image_patch_adr_lo_o(),
6982 vaddr_lo);
6983 nvgpu_mem_wr(g, ctxheader,
6984 ctxsw_prog_main_image_patch_adr_hi_o(),
6985 vaddr_hi);
6986 } else {
6987 nvgpu_mem_wr(g, mem,
6988 ctxsw_prog_main_image_patch_adr_lo_o(),
6989 vaddr_lo);
6990 nvgpu_mem_wr(g, mem,
6991 ctxsw_prog_main_image_patch_adr_hi_o(),
6992 vaddr_hi);
6993 }
6994
6995 /* we're not caching these on cpu side,
6996 but later watch for it */
6997 return 0;
6998 }
6999 }
7000 }
7001
7002 return 0;
7003}
7004
7005#define ILLEGAL_ID ((u32)~0)
7006
7007static inline bool check_main_image_header_magic(u8 *context)
7008{
7009 u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
7010 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
7011}
7012static inline bool check_local_header_magic(u8 *context)
7013{
7014 u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
7015 return magic == ctxsw_prog_local_magic_value_v_value_v();
7016
7017}
7018
7019/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
7020static inline int ctxsw_prog_ucode_header_size_in_bytes(void)
7021{
7022 return 256;
7023}
7024
7025void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
7026 u32 **ovr_perf_regs)
7027{
7028 *num_ovr_perf_regs = _num_ovr_perf_regs;
7029 *ovr_perf_regs = _ovr_perf_regs;
7030}
7031
7032static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
7033 u32 addr,
7034 bool is_quad, u32 quad,
7035 u32 *context_buffer,
7036 u32 context_buffer_size,
7037 u32 *priv_offset)
7038{
7039 u32 i, data32;
7040 u32 gpc_num, tpc_num;
7041 u32 num_gpcs, num_tpcs;
7042 u32 chk_addr;
7043 u32 ext_priv_offset, ext_priv_size;
7044 u8 *context;
7045 u32 offset_to_segment, offset_to_segment_end;
7046 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
7047 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
7048 u32 num_ext_gpccs_ext_buffer_segments;
7049 u32 inter_seg_offset;
7050 u32 max_tpc_count;
7051 u32 *sm_dsm_perf_ctrl_regs = NULL;
7052 u32 num_sm_dsm_perf_ctrl_regs = 0;
7053 u32 *sm_dsm_perf_regs = NULL;
7054 u32 num_sm_dsm_perf_regs = 0;
7055 u32 buffer_segments_size = 0;
7056 u32 marker_size = 0;
7057 u32 control_register_stride = 0;
7058 u32 perf_register_stride = 0;
7059 struct gr_gk20a *gr = &g->gr;
7060 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
7061 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7062 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7063 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7064 u32 tpc_gpc_mask = (tpc_in_gpc_stride - 1);
7065
7066 /* Only have TPC registers in extended region, so if not a TPC reg,
7067 then return error so caller can look elsewhere. */
7068 if (pri_is_gpc_addr(g, addr)) {
7069 u32 gpc_addr = 0;
7070 gpc_num = pri_get_gpc_num(g, addr);
7071 gpc_addr = pri_gpccs_addr_mask(addr);
7072 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
7073 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
7074 } else {
7075 return -EINVAL;
7076 }
7077
7078 nvgpu_log_info(g, " gpc = %d tpc = %d",
7079 gpc_num, tpc_num);
7080 } else if ((g->ops.gr.is_etpc_addr != NULL) &&
7081 g->ops.gr.is_etpc_addr(g, addr)) {
7082 g->ops.gr.get_egpc_etpc_num(g, addr, &gpc_num, &tpc_num);
7083 gpc_base = g->ops.gr.get_egpc_base(g);
7084 } else {
7085 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7086 "does not exist in extended region");
7087 return -EINVAL;
7088 }
7089
7090 buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
7091 /* note below is in words/num_registers */
7092 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
7093
7094 context = (u8 *)context_buffer;
7095 /* sanity check main header */
7096 if (!check_main_image_header_magic(context)) {
7097 nvgpu_err(g,
7098 "Invalid main header: magic value");
7099 return -EINVAL;
7100 }
7101 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7102 if (gpc_num >= num_gpcs) {
7103 nvgpu_err(g,
7104 "GPC 0x%08x is greater than total count 0x%08x!",
7105 gpc_num, num_gpcs);
7106 return -EINVAL;
7107 }
7108
7109 data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
7110 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
7111 if (0 == ext_priv_size) {
7112 nvgpu_log_info(g, " No extended memory in context buffer");
7113 return -EINVAL;
7114 }
7115 ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
7116
7117 offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
7118 offset_to_segment_end = offset_to_segment +
7119 (ext_priv_size * buffer_segments_size);
7120
7121 /* check local header magic */
7122 context += ctxsw_prog_ucode_header_size_in_bytes();
7123 if (!check_local_header_magic(context)) {
7124 nvgpu_err(g,
7125 "Invalid local header: magic value");
7126 return -EINVAL;
7127 }
7128
7129 /*
7130 * See if the incoming register address is in the first table of
7131 * registers. We check this by decoding only the TPC addr portion.
7132 * If we get a hit on the TPC bit, we then double check the address
7133 * by computing it from the base gpc/tpc strides. Then make sure
7134 * it is a real match.
7135 */
7136 g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
7137 &sm_dsm_perf_regs,
7138 &perf_register_stride);
7139
7140 g->ops.gr.init_sm_dsm_reg_info();
7141
7142 for (i = 0; i < num_sm_dsm_perf_regs; i++) {
7143 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
7144 sm_dsm_perf_reg_id = i;
7145
7146 nvgpu_log_info(g, "register match: 0x%08x",
7147 sm_dsm_perf_regs[i]);
7148
7149 chk_addr = (gpc_base + gpc_stride * gpc_num) +
7150 tpc_in_gpc_base +
7151 (tpc_in_gpc_stride * tpc_num) +
7152 (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask);
7153
7154 if (chk_addr != addr) {
7155 nvgpu_err(g,
7156 "Oops addr miss-match! : 0x%08x != 0x%08x",
7157 addr, chk_addr);
7158 return -EINVAL;
7159 }
7160 break;
7161 }
7162 }
7163
7164 /* Didn't find reg in supported group 1.
7165 * so try the second group now */
7166 g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
7167 &sm_dsm_perf_ctrl_regs,
7168 &control_register_stride);
7169
7170 if (ILLEGAL_ID == sm_dsm_perf_reg_id) {
7171 for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) {
7172 if ((addr & tpc_gpc_mask) ==
7173 (sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) {
7174 sm_dsm_perf_ctrl_reg_id = i;
7175
7176 nvgpu_log_info(g, "register match: 0x%08x",
7177 sm_dsm_perf_ctrl_regs[i]);
7178
7179 chk_addr = (gpc_base + gpc_stride * gpc_num) +
7180 tpc_in_gpc_base +
7181 tpc_in_gpc_stride * tpc_num +
7182 (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] &
7183 tpc_gpc_mask);
7184
7185 if (chk_addr != addr) {
7186 nvgpu_err(g,
7187 "Oops addr miss-match! : 0x%08x != 0x%08x",
7188 addr, chk_addr);
7189 return -EINVAL;
7190
7191 }
7192
7193 break;
7194 }
7195 }
7196 }
7197
7198 if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) &&
7199 (ILLEGAL_ID == sm_dsm_perf_reg_id)) {
7200 return -EINVAL;
7201 }
7202
7203 /* Skip the FECS extended header, nothing there for us now. */
7204 offset_to_segment += buffer_segments_size;
7205
7206 /* skip through the GPCCS extended headers until we get to the data for
7207 * our GPC. The size of each gpc extended segment is enough to hold the
7208 * max tpc count for the gpcs,in 256b chunks.
7209 */
7210
7211 max_tpc_count = gr->max_tpc_per_gpc_count;
7212
7213 num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2);
7214
7215 offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
7216 buffer_segments_size * gpc_num);
7217
7218 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
7219
7220 /* skip the head marker to start with */
7221 inter_seg_offset = marker_size;
7222
7223 if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) {
7224 /* skip over control regs of TPC's before the one we want.
7225 * then skip to the register in this tpc */
7226 inter_seg_offset = inter_seg_offset +
7227 (tpc_num * control_register_stride) +
7228 sm_dsm_perf_ctrl_reg_id;
7229 } else {
7230 /* skip all the control registers */
7231 inter_seg_offset = inter_seg_offset +
7232 (num_tpcs * control_register_stride);
7233
7234 /* skip the marker between control and counter segments */
7235 inter_seg_offset += marker_size;
7236
7237 /* skip over counter regs of TPCs before the one we want */
7238 inter_seg_offset = inter_seg_offset +
7239 (tpc_num * perf_register_stride) *
7240 ctxsw_prog_extended_num_smpc_quadrants_v();
7241
7242 /* skip over the register for the quadrants we do not want.
7243 * then skip to the register in this tpc */
7244 inter_seg_offset = inter_seg_offset +
7245 (perf_register_stride * quad) +
7246 sm_dsm_perf_reg_id;
7247 }
7248
7249 /* set the offset to the segment offset plus the inter segment offset to
7250 * our register */
7251 offset_to_segment += (inter_seg_offset * 4);
7252
7253 /* last sanity check: did we somehow compute an offset outside the
7254 * extended buffer? */
7255 if (offset_to_segment > offset_to_segment_end) {
7256 nvgpu_err(g,
7257 "Overflow ctxsw buffer! 0x%08x > 0x%08x",
7258 offset_to_segment, offset_to_segment_end);
7259 return -EINVAL;
7260 }
7261
7262 *priv_offset = offset_to_segment;
7263
7264 return 0;
7265}
7266
7267
7268static int
7269gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7270 enum ctxsw_addr_type addr_type,
7271 u32 pri_addr,
7272 u32 gpc_num, u32 num_tpcs,
7273 u32 num_ppcs, u32 ppc_mask,
7274 u32 *priv_offset)
7275{
7276 u32 i;
7277 u32 address, base_address;
7278 u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
7279 u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
7280 struct aiv_gk20a *reg;
7281 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
7282 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7283 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
7284 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
7285 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7286 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7287
7288 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
7289
7290 if (!g->gr.ctx_vars.valid) {
7291 return -EINVAL;
7292 }
7293
7294 /* Process the SYS/BE segment. */
7295 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
7296 (addr_type == CTXSW_ADDR_TYPE_BE)) {
7297 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
7298 reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i];
7299 address = reg->addr;
7300 sys_offset = reg->index;
7301
7302 if (pri_addr == address) {
7303 *priv_offset = sys_offset;
7304 return 0;
7305 }
7306 }
7307 }
7308
7309 /* Process the TPC segment. */
7310 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
7311 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
7312 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
7313 reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i];
7314 address = reg->addr;
7315 tpc_addr = pri_tpccs_addr_mask(address);
7316 base_address = gpc_base +
7317 (gpc_num * gpc_stride) +
7318 tpc_in_gpc_base +
7319 (tpc_num * tpc_in_gpc_stride);
7320 address = base_address + tpc_addr;
7321 /*
7322 * The data for the TPCs is interleaved in the context buffer.
7323 * Example with num_tpcs = 2
7324 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
7325 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
7326 */
7327 tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
7328
7329 if (pri_addr == address) {
7330 *priv_offset = tpc_offset;
7331 return 0;
7332 }
7333 }
7334 }
7335 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
7336 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
7337 if (g->ops.gr.get_egpc_base == NULL) {
7338 return -EINVAL;
7339 }
7340
7341 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
7342 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
7343 reg = &g->gr.ctx_vars.ctxsw_regs.etpc.l[i];
7344 address = reg->addr;
7345 tpc_addr = pri_tpccs_addr_mask(address);
7346 base_address = g->ops.gr.get_egpc_base(g) +
7347 (gpc_num * gpc_stride) +
7348 tpc_in_gpc_base +
7349 (tpc_num * tpc_in_gpc_stride);
7350 address = base_address + tpc_addr;
7351 /*
7352 * The data for the TPCs is interleaved in the context buffer.
7353 * Example with num_tpcs = 2
7354 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
7355 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
7356 */
7357 tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
7358
7359 if (pri_addr == address) {
7360 *priv_offset = tpc_offset;
7361 nvgpu_log(g,
7362 gpu_dbg_fn | gpu_dbg_gpu_dbg,
7363 "egpc/etpc priv_offset=0x%#08x",
7364 *priv_offset);
7365 return 0;
7366 }
7367 }
7368 }
7369 }
7370
7371
7372 /* Process the PPC segment. */
7373 if (addr_type == CTXSW_ADDR_TYPE_PPC) {
7374 for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
7375 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
7376 reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i];
7377 address = reg->addr;
7378 ppc_addr = pri_ppccs_addr_mask(address);
7379 base_address = gpc_base +
7380 (gpc_num * gpc_stride) +
7381 ppc_in_gpc_base +
7382 (ppc_num * ppc_in_gpc_stride);
7383 address = base_address + ppc_addr;
7384 /*
7385 * The data for the PPCs is interleaved in the context buffer.
7386 * Example with numPpcs = 2
7387 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
7388 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
7389 */
7390 ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4);
7391
7392 if (pri_addr == address) {
7393 *priv_offset = ppc_offset;
7394 return 0;
7395 }
7396 }
7397 }
7398 }
7399
7400
7401 /* Process the GPC segment. */
7402 if (addr_type == CTXSW_ADDR_TYPE_GPC) {
7403 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
7404 reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i];
7405
7406 address = reg->addr;
7407 gpc_addr = pri_gpccs_addr_mask(address);
7408 gpc_offset = reg->index;
7409
7410 base_address = gpc_base + (gpc_num * gpc_stride);
7411 address = base_address + gpc_addr;
7412
7413 if (pri_addr == address) {
7414 *priv_offset = gpc_offset;
7415 return 0;
7416 }
7417 }
7418 }
7419 return -EINVAL;
7420}
7421
7422static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7423 u8 *context,
7424 u32 *num_ppcs, u32 *ppc_mask,
7425 u32 *reg_ppc_count)
7426{
7427 u32 data32;
7428 u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
7429
7430 /*
7431 * if there is only 1 PES_PER_GPC, then we put the PES registers
7432 * in the GPC reglist, so we can't error out if ppc.count == 0
7433 */
7434 if ((!g->gr.ctx_vars.valid) ||
7435 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
7436 (num_pes_per_gpc > 1))) {
7437 return -EINVAL;
7438 }
7439
7440 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7441
7442 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
7443 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
7444
7445 *reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count;
7446
7447 return 0;
7448}
7449
7450int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
7451 enum ctxsw_addr_type addr_type,
7452 u32 num_tpcs,
7453 u32 num_ppcs,
7454 u32 reg_list_ppc_count,
7455 u32 *__offset_in_segment)
7456{
7457 u32 offset_in_segment = 0;
7458 struct gr_gk20a *gr = &g->gr;
7459
7460 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
7461 /*
7462 * reg = gr->ctx_vars.ctxsw_regs.tpc.l;
7463 * offset_in_segment = 0;
7464 */
7465 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
7466 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
7467 offset_in_segment =
7468 ((gr->ctx_vars.ctxsw_regs.tpc.count *
7469 num_tpcs) << 2);
7470
7471 nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
7472 "egpc etpc offset_in_segment 0x%#08x",
7473 offset_in_segment);
7474 } else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
7475 /*
7476 * The ucode stores TPC data before PPC data.
7477 * Advance offset past TPC data to PPC data.
7478 */
7479 offset_in_segment =
7480 (((gr->ctx_vars.ctxsw_regs.tpc.count +
7481 gr->ctx_vars.ctxsw_regs.etpc.count) *
7482 num_tpcs) << 2);
7483 } else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
7484 /*
7485 * The ucode stores TPC/PPC data before GPC data.
7486 * Advance offset past TPC/PPC data to GPC data.
7487 *
7488 * Note 1 PES_PER_GPC case
7489 */
7490 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
7491 GPU_LIT_NUM_PES_PER_GPC);
7492 if (num_pes_per_gpc > 1) {
7493 offset_in_segment =
7494 ((((gr->ctx_vars.ctxsw_regs.tpc.count +
7495 gr->ctx_vars.ctxsw_regs.etpc.count) *
7496 num_tpcs) << 2) +
7497 ((reg_list_ppc_count * num_ppcs) << 2));
7498 } else {
7499 offset_in_segment =
7500 (((gr->ctx_vars.ctxsw_regs.tpc.count +
7501 gr->ctx_vars.ctxsw_regs.etpc.count) *
7502 num_tpcs) << 2);
7503 }
7504 } else {
7505 nvgpu_log_fn(g, "Unknown address type.");
7506 return -EINVAL;
7507 }
7508
7509 *__offset_in_segment = offset_in_segment;
7510 return 0;
7511}
7512
7513/*
7514 * This function will return the 32 bit offset for a priv register if it is
7515 * present in the context buffer. The context buffer is in CPU memory.
7516 */
7517static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7518 u32 addr,
7519 bool is_quad, u32 quad,
7520 u32 *context_buffer,
7521 u32 context_buffer_size,
7522 u32 *priv_offset)
7523{
7524 u32 i, data32;
7525 int err;
7526 enum ctxsw_addr_type addr_type;
7527 u32 broadcast_flags;
7528 u32 gpc_num, tpc_num, ppc_num, be_num;
7529 u32 num_gpcs, num_tpcs, num_ppcs;
7530 u32 offset;
7531 u32 sys_priv_offset, gpc_priv_offset;
7532 u32 ppc_mask, reg_list_ppc_count;
7533 u8 *context;
7534 u32 offset_to_segment, offset_in_segment = 0;
7535
7536 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
7537
7538 err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
7539 &gpc_num, &tpc_num, &ppc_num, &be_num,
7540 &broadcast_flags);
7541 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7542 "addr_type = %d, broadcast_flags: %08x",
7543 addr_type, broadcast_flags);
7544 if (err != 0) {
7545 return err;
7546 }
7547
7548 context = (u8 *)context_buffer;
7549 if (!check_main_image_header_magic(context)) {
7550 nvgpu_err(g,
7551 "Invalid main header: magic value");
7552 return -EINVAL;
7553 }
7554 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7555
7556 /* Parse the FECS local header. */
7557 context += ctxsw_prog_ucode_header_size_in_bytes();
7558 if (!check_local_header_magic(context)) {
7559 nvgpu_err(g,
7560 "Invalid FECS local header: magic value");
7561 return -EINVAL;
7562 }
7563 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7564 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7565 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
7566
7567 /* If found in Ext buffer, ok.
7568 * If it failed and we expected to find it there (quad offset)
7569 * then return the error. Otherwise continue on.
7570 */
7571 err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
7572 addr, is_quad, quad, context_buffer,
7573 context_buffer_size, priv_offset);
7574 if ((err == 0) || ((err != 0) && is_quad)) {
7575 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7576 "err = %d, is_quad = %s",
7577 err, is_quad ? "true" : "false");
7578 return err;
7579 }
7580
7581 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
7582 (addr_type == CTXSW_ADDR_TYPE_BE)) {
7583 /* Find the offset in the FECS segment. */
7584 offset_to_segment = sys_priv_offset *
7585 ctxsw_prog_ucode_header_size_in_bytes();
7586
7587 err = gr_gk20a_process_context_buffer_priv_segment(g,
7588 addr_type, addr,
7589 0, 0, 0, 0,
7590 &offset);
7591 if (err != 0) {
7592 return err;
7593 }
7594
7595 *priv_offset = (offset_to_segment + offset);
7596 return 0;
7597 }
7598
7599 if ((gpc_num + 1) > num_gpcs) {
7600 nvgpu_err(g,
7601 "GPC %d not in this context buffer.",
7602 gpc_num);
7603 return -EINVAL;
7604 }
7605
7606 /* Parse the GPCCS local header(s).*/
7607 for (i = 0; i < num_gpcs; i++) {
7608 context += ctxsw_prog_ucode_header_size_in_bytes();
7609 if (!check_local_header_magic(context)) {
7610 nvgpu_err(g,
7611 "Invalid GPCCS local header: magic value");
7612 return -EINVAL;
7613
7614 }
7615 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7616 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7617
7618 err = gr_gk20a_determine_ppc_configuration(g, context,
7619 &num_ppcs, &ppc_mask,
7620 &reg_list_ppc_count);
7621 if (err != 0) {
7622 nvgpu_err(g, "determine ppc configuration failed");
7623 return err;
7624 }
7625
7626
7627 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7628
7629 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
7630 nvgpu_err(g,
7631 "GPC %d TPC %d not in this context buffer.",
7632 gpc_num, tpc_num);
7633 return -EINVAL;
7634 }
7635
7636 /* Find the offset in the GPCCS segment.*/
7637 if (i == gpc_num) {
7638 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7639 "gpc_priv_offset 0x%#08x",
7640 gpc_priv_offset);
7641 offset_to_segment = gpc_priv_offset *
7642 ctxsw_prog_ucode_header_size_in_bytes();
7643
7644 err = g->ops.gr.get_offset_in_gpccs_segment(g,
7645 addr_type,
7646 num_tpcs, num_ppcs, reg_list_ppc_count,
7647 &offset_in_segment);
7648 if (err != 0) {
7649 return -EINVAL;
7650 }
7651
7652 offset_to_segment += offset_in_segment;
7653 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7654 "offset_to_segment 0x%#08x",
7655 offset_to_segment);
7656
7657 err = gr_gk20a_process_context_buffer_priv_segment(g,
7658 addr_type, addr,
7659 i, num_tpcs,
7660 num_ppcs, ppc_mask,
7661 &offset);
7662 if (err != 0) {
7663 return -EINVAL;
7664 }
7665
7666 *priv_offset = offset_to_segment + offset;
7667 return 0;
7668 }
7669 }
7670
7671 return -EINVAL;
7672}
7673
7674static int map_cmp(const void *a, const void *b)
7675{
7676 struct ctxsw_buf_offset_map_entry *e1 =
7677 (struct ctxsw_buf_offset_map_entry *)a;
7678 struct ctxsw_buf_offset_map_entry *e2 =
7679 (struct ctxsw_buf_offset_map_entry *)b;
7680
7681 if (e1->addr < e2->addr) {
7682 return -1;
7683 }
7684
7685 if (e1->addr > e2->addr) {
7686 return 1;
7687 }
7688 return 0;
7689}
7690
7691static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry *map,
7692 struct aiv_list_gk20a *regs,
7693 u32 *count, u32 *offset,
7694 u32 max_cnt, u32 base, u32 mask)
7695{
7696 u32 idx;
7697 u32 cnt = *count;
7698 u32 off = *offset;
7699
7700 if ((cnt + regs->count) > max_cnt) {
7701 return -EINVAL;
7702 }
7703
7704 for (idx = 0; idx < regs->count; idx++) {
7705 if ((base + (regs->l[idx].addr & mask)) < 0xFFF) {
7706 map[cnt].addr = base + (regs->l[idx].addr & mask)
7707 + NV_PCFG_BASE;
7708 } else {
7709 map[cnt].addr = base + (regs->l[idx].addr & mask);
7710 }
7711 map[cnt++].offset = off;
7712 off += 4;
7713 }
7714 *count = cnt;
7715 *offset = off;
7716 return 0;
7717}
7718
7719static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
7720 struct ctxsw_buf_offset_map_entry *map,
7721 struct aiv_list_gk20a *regs,
7722 u32 *count, u32 *offset,
7723 u32 max_cnt, u32 base, u32 mask)
7724{
7725 u32 idx;
7726 u32 cnt = *count;
7727 u32 off = *offset;
7728
7729 if ((cnt + regs->count) > max_cnt) {
7730 return -EINVAL;
7731 }
7732
7733 /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
7734 * To handle the case of PPC registers getting added into GPC, the below
7735 * code specifically checks for any PPC offsets and adds them using
7736 * proper mask
7737 */
7738 for (idx = 0; idx < regs->count; idx++) {
7739 /* Check if the address is PPC address */
7740 if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
7741 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
7742 GPU_LIT_PPC_IN_GPC_BASE);
7743 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
7744 GPU_LIT_PPC_IN_GPC_STRIDE);
7745 /* Use PPC mask instead of the GPC mask provided */
7746 u32 ppcmask = ppc_in_gpc_stride - 1;
7747
7748 map[cnt].addr = base + ppc_in_gpc_base
7749 + (regs->l[idx].addr & ppcmask);
7750 } else {
7751 map[cnt].addr = base + (regs->l[idx].addr & mask);
7752 }
7753 map[cnt++].offset = off;
7754 off += 4;
7755 }
7756 *count = cnt;
7757 *offset = off;
7758 return 0;
7759}
7760
7761static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
7762 struct aiv_list_gk20a *regs,
7763 u32 *count, u32 *offset,
7764 u32 max_cnt, u32 base, u32 mask)
7765{
7766 u32 idx;
7767 u32 cnt = *count;
7768 u32 off = *offset;
7769
7770 if ((cnt + regs->count) > max_cnt) {
7771 return -EINVAL;
7772 }
7773
7774 for (idx = 0; idx < regs->count; idx++) {
7775 map[cnt].addr = base + (regs->l[idx].addr & mask);
7776 map[cnt++].offset = off;
7777 off += 4;
7778 }
7779 *count = cnt;
7780 *offset = off;
7781 return 0;
7782}
7783
7784/* Helper function to add register entries to the register map for all
7785 * subunits
7786 */
7787static int add_ctxsw_buffer_map_entries_subunits(
7788 struct ctxsw_buf_offset_map_entry *map,
7789 struct aiv_list_gk20a *regs,
7790 u32 *count, u32 *offset,
7791 u32 max_cnt, u32 base,
7792 u32 num_units, u32 stride, u32 mask)
7793{
7794 u32 unit;
7795 u32 idx;
7796 u32 cnt = *count;
7797 u32 off = *offset;
7798
7799 if ((cnt + (regs->count * num_units)) > max_cnt) {
7800 return -EINVAL;
7801 }
7802
7803 /* Data is interleaved for units in ctxsw buffer */
7804 for (idx = 0; idx < regs->count; idx++) {
7805 for (unit = 0; unit < num_units; unit++) {
7806 map[cnt].addr = base + (regs->l[idx].addr & mask) +
7807 (unit * stride);
7808 map[cnt++].offset = off;
7809 off += 4;
7810 }
7811 }
7812 *count = cnt;
7813 *offset = off;
7814 return 0;
7815}
7816
7817int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
7818 struct ctxsw_buf_offset_map_entry *map,
7819 struct aiv_list_gk20a *regs,
7820 u32 *count, u32 *offset,
7821 u32 max_cnt, u32 base,
7822 u32 num_fbpas, u32 stride, u32 mask)
7823{
7824 return add_ctxsw_buffer_map_entries_subunits(map, regs, count, offset,
7825 max_cnt, base, num_fbpas, stride, mask);
7826}
7827
7828static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7829 struct ctxsw_buf_offset_map_entry *map,
7830 u32 *count, u32 *offset, u32 max_cnt)
7831{
7832 u32 num_gpcs = g->gr.gpc_count;
7833 u32 num_ppcs, num_tpcs, gpc_num, base;
7834 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
7835 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7836 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
7837 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
7838 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7839 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7840
7841 for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
7842 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
7843 base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base;
7844 if (add_ctxsw_buffer_map_entries_subunits(map,
7845 &g->gr.ctx_vars.ctxsw_regs.pm_tpc,
7846 count, offset, max_cnt, base, num_tpcs,
7847 tpc_in_gpc_stride,
7848 (tpc_in_gpc_stride - 1))) {
7849 return -EINVAL;
7850 }
7851
7852 num_ppcs = g->gr.gpc_ppc_count[gpc_num];
7853 base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
7854 if (add_ctxsw_buffer_map_entries_subunits(map,
7855 &g->gr.ctx_vars.ctxsw_regs.pm_ppc,
7856 count, offset, max_cnt, base, num_ppcs,
7857 ppc_in_gpc_stride,
7858 (ppc_in_gpc_stride - 1))) {
7859 return -EINVAL;
7860 }
7861
7862 base = gpc_base + (gpc_stride * gpc_num);
7863 if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
7864 &g->gr.ctx_vars.ctxsw_regs.pm_gpc,
7865 count, offset, max_cnt, base,
7866 (gpc_stride - 1))) {
7867 return -EINVAL;
7868 }
7869
7870 base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
7871 if (add_ctxsw_buffer_map_entries(map,
7872 &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc,
7873 count, offset, max_cnt, base, ~0)) {
7874 return -EINVAL;
7875 }
7876
7877 base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num);
7878 if (add_ctxsw_buffer_map_entries(map,
7879 &g->gr.ctx_vars.ctxsw_regs.perf_gpc,
7880 count, offset, max_cnt, base, ~0)) {
7881 return -EINVAL;
7882 }
7883
7884 base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
7885 if (add_ctxsw_buffer_map_entries(map,
7886 &g->gr.ctx_vars.ctxsw_regs.gpc_router,
7887 count, offset, max_cnt, base, ~0)) {
7888 return -EINVAL;
7889 }
7890
7891 /* Counter Aggregation Unit, if available */
7892 if (g->gr.ctx_vars.ctxsw_regs.pm_cau.count) {
7893 base = gpc_base + (gpc_stride * gpc_num)
7894 + tpc_in_gpc_base;
7895 if (add_ctxsw_buffer_map_entries_subunits(map,
7896 &g->gr.ctx_vars.ctxsw_regs.pm_cau,
7897 count, offset, max_cnt, base, num_tpcs,
7898 tpc_in_gpc_stride,
7899 (tpc_in_gpc_stride - 1))) {
7900 return -EINVAL;
7901 }
7902 }
7903
7904 *offset = ALIGN(*offset, 256);
7905 }
7906 return 0;
7907}
7908
7909int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
7910 struct aiv_list_gk20a *regs,
7911 u32 *count, u32 *offset,
7912 u32 max_cnt, u32 base, u32 mask)
7913{
7914 return add_ctxsw_buffer_map_entries(map, regs,
7915 count, offset, max_cnt, base, mask);
7916}
7917
7918/*
7919 * PM CTXSW BUFFER LAYOUT :
7920 *|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE
7921 *| |
7922 *| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
7923 *|---------------------------------------------|
7924 *| |
7925 *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
7926 *|---------------------------------------------|
7927 *| |
7928 *| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
7929 *|---------------------------------------------|
7930 *| |
7931 *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
7932 *|---------------------------------------------|
7933 *| PADDING for 256 byte alignment |
7934 *|---------------------------------------------|<----256 byte aligned
7935 *| LIST_compressed_nv_perf_fbp_ctx_regs |
7936 *| |Space allocated: numRegs * n words (for n FB units)
7937 *|---------------------------------------------|
7938 *| LIST_compressed_nv_perf_fbprouter_ctx_regs |
7939 *| |Space allocated: numRegs * n words (for n FB units)
7940 *|---------------------------------------------|
7941 *| LIST_compressed_pm_fbpa_ctx_regs |
7942 *| |Space allocated: numRegs * n words (for n FB units)
7943 *|---------------------------------------------|
7944 *| LIST_compressed_pm_rop_ctx_regs |
7945 *|---------------------------------------------|
7946 *| LIST_compressed_pm_ltc_ctx_regs |
7947 *| LTC0 LTS0 |
7948 *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
7949 *| LTCn LTS0 |
7950 *| LTC0 LTS1 |
7951 *| LTC1 LTS1 |
7952 *| LTCn LTS1 |
7953 *| LTC0 LTSn |
7954 *| LTC1 LTSn |
7955 *| LTCn LTSn |
7956 *|---------------------------------------------|
7957 *| PADDING for 256 byte alignment |
7958 *|---------------------------------------------|<----256 byte aligned
7959 *| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate
7960 *| REG0 TPC1 | all the GPC/TPC register lists
7961 *| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
7962 *| LIST_pm_ctx_reg_TPC REG1 TPC0 |
7963 *| * numTpcs REG1 TPC1 |
7964 *| LIST_pm_ctx_reg_PPC REG1 TPCn |
7965 *| * numPpcs REGn TPC0 |
7966 *| LIST_pm_ctx_reg_GPC REGn TPC1 |
7967 *| List_pm_ctx_reg_uc_GPC REGn TPCn |
7968 *| LIST_nv_perf_ctx_reg_GPC |
7969 *| LIST_nv_perf_gpcrouter_ctx_reg |
7970 *| LIST_nv_perf_ctx_reg_CAU |
7971 *| ---- |--
7972 *| GPC1 . |
7973 *| . |<----
7974 *|---------------------------------------------|
7975 *= =
7976 *| GPCn |
7977 *= =
7978 *|---------------------------------------------|
7979 */
7980
7981static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7982{
7983 u32 hwpm_ctxsw_buffer_size = g->gr.ctx_vars.pm_ctxsw_image_size;
7984 u32 hwpm_ctxsw_reg_count_max;
7985 u32 map_size;
7986 u32 i, count = 0;
7987 u32 offset = 0;
7988 struct ctxsw_buf_offset_map_entry *map;
7989 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
7990 u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
7991 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
7992 u32 num_ltc = g->ops.gr.get_max_ltc_per_fbp(g) * g->gr.num_fbps;
7993
7994 if (hwpm_ctxsw_buffer_size == 0) {
7995 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7996 "no PM Ctxsw buffer memory in context buffer");
7997 return -EINVAL;
7998 }
7999
8000 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
8001 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
8002
8003 map = nvgpu_big_zalloc(g, map_size);
8004 if (map == NULL) {
8005 return -ENOMEM;
8006 }
8007
8008 /* Add entries from _LIST_pm_ctx_reg_SYS */
8009 if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
8010 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
8011 goto cleanup;
8012 }
8013
8014 /* Add entries from _LIST_nv_perf_ctx_reg_SYS */
8015 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys,
8016 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
8017 goto cleanup;
8018 }
8019
8020 /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
8021 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router,
8022 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
8023 goto cleanup;
8024 }
8025
8026 /* Add entries from _LIST_nv_perf_pma_ctx_reg*/
8027 if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma,
8028 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
8029 goto cleanup;
8030 }
8031
8032 offset = ALIGN(offset, 256);
8033
8034 /* Add entries from _LIST_nv_perf_fbp_ctx_regs */
8035 if (add_ctxsw_buffer_map_entries_subunits(map,
8036 &g->gr.ctx_vars.ctxsw_regs.fbp,
8037 &count, &offset,
8038 hwpm_ctxsw_reg_count_max, 0,
8039 g->gr.num_fbps,
8040 g->ops.gr.get_pmm_per_chiplet_offset(),
8041 ~0)) {
8042 goto cleanup;
8043 }
8044
8045 /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
8046 if (add_ctxsw_buffer_map_entries_subunits(map,
8047 &g->gr.ctx_vars.ctxsw_regs.fbp_router,
8048 &count, &offset,
8049 hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps,
8050 NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) {
8051 goto cleanup;
8052 }
8053
8054 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
8055 if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map,
8056 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
8057 &count, &offset,
8058 hwpm_ctxsw_reg_count_max, 0,
8059 num_fbpas, fbpa_stride, ~0)) {
8060 goto cleanup;
8061 }
8062
8063 /* Add entries from _LIST_nv_pm_rop_ctx_regs */
8064 if (add_ctxsw_buffer_map_entries(map,
8065 &g->gr.ctx_vars.ctxsw_regs.pm_rop,
8066 &count, &offset,
8067 hwpm_ctxsw_reg_count_max, 0, ~0)) {
8068 goto cleanup;
8069 }
8070
8071 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
8072 if (add_ctxsw_buffer_map_entries_subunits(map,
8073 &g->gr.ctx_vars.ctxsw_regs.pm_ltc,
8074 &count, &offset,
8075 hwpm_ctxsw_reg_count_max, 0,
8076 num_ltc, ltc_stride, ~0)) {
8077 goto cleanup;
8078 }
8079
8080 offset = ALIGN(offset, 256);
8081
8082 /* Add GPC entries */
8083 if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
8084 hwpm_ctxsw_reg_count_max)) {
8085 goto cleanup;
8086 }
8087
8088 if (offset > hwpm_ctxsw_buffer_size) {
8089 nvgpu_err(g, "offset > buffer size");
8090 goto cleanup;
8091 }
8092
8093 sort(map, count, sizeof(*map), map_cmp, NULL);
8094
8095 g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map = map;
8096 g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map_count = count;
8097
8098 nvgpu_log_info(g, "Reg Addr => HWPM Ctxt switch buffer offset");
8099
8100 for (i = 0; i < count; i++) {
8101 nvgpu_log_info(g, "%08x => %08x", map[i].addr, map[i].offset);
8102 }
8103
8104 return 0;
8105cleanup:
8106 nvgpu_err(g, "Failed to create HWPM buffer offset map");
8107 nvgpu_big_free(g, map);
8108 return -EINVAL;
8109}
8110
8111/*
8112 * This function will return the 32 bit offset for a priv register if it is
8113 * present in the PM context buffer.
8114 */
8115static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
8116 u32 addr,
8117 u32 *priv_offset)
8118{
8119 struct gr_gk20a *gr = &g->gr;
8120 int err = 0;
8121 u32 count;
8122 struct ctxsw_buf_offset_map_entry *map, *result, map_key;
8123
8124 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
8125
8126 /* Create map of pri address and pm offset if necessary */
8127 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) {
8128 err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g);
8129 if (err != 0) {
8130 return err;
8131 }
8132 }
8133
8134 *priv_offset = 0;
8135
8136 map = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map;
8137 count = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map_count;
8138
8139 map_key.addr = addr;
8140 result = bsearch(&map_key, map, count, sizeof(*map), map_cmp);
8141
8142 if (result) {
8143 *priv_offset = result->offset;
8144 } else {
8145 nvgpu_err(g, "Lookup failed for address 0x%x", addr);
8146 err = -EINVAL;
8147 }
8148 return err;
8149}
8150
8151bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
8152{
8153 int curr_gr_ctx;
8154 u32 curr_gr_tsgid;
8155 struct gk20a *g = ch->g;
8156 struct channel_gk20a *curr_ch;
8157 bool ret = false;
8158 struct tsg_gk20a *tsg;
8159
8160 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
8161
8162 /* when contexts are unloaded from GR, the valid bit is reset
8163 * but the instance pointer information remains intact. So the
8164 * valid bit must be checked to be absolutely certain that a
8165 * valid context is currently resident.
8166 */
8167 if (gr_fecs_current_ctx_valid_v(curr_gr_ctx) == 0U) {
8168 return NULL;
8169 }
8170
8171 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
8172 &curr_gr_tsgid);
8173
8174 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
8175 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
8176 " ch->chid=%d",
8177 (curr_ch != NULL) ? curr_ch->chid : U32_MAX,
8178 curr_gr_tsgid,
8179 ch->tsgid,
8180 ch->chid);
8181
8182 if (curr_ch == NULL) {
8183 return false;
8184 }
8185
8186 if (ch->chid == curr_ch->chid) {
8187 ret = true;
8188 }
8189
8190 tsg = tsg_gk20a_from_ch(ch);
8191 if ((tsg != NULL) && (tsg->tsgid == curr_gr_tsgid)) {
8192 ret = true;
8193 }
8194
8195 gk20a_channel_put(curr_ch);
8196 return ret;
8197}
8198
8199int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8200 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
8201 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
8202 bool ch_is_curr_ctx)
8203{
8204 struct gk20a *g = ch->g;
8205 struct tsg_gk20a *tsg;
8206 struct nvgpu_gr_ctx *gr_ctx;
8207 bool gr_ctx_ready = false;
8208 bool pm_ctx_ready = false;
8209 struct nvgpu_mem *current_mem = NULL;
8210 u32 i, j, offset, v;
8211 struct gr_gk20a *gr = &g->gr;
8212 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8213 u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count *
8214 sm_per_tpc;
8215 u32 *offsets = NULL;
8216 u32 *offset_addrs = NULL;
8217 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
8218 int err = 0, pass;
8219
8220 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
8221 num_ctx_wr_ops, num_ctx_rd_ops);
8222
8223 tsg = tsg_gk20a_from_ch(ch);
8224 if (tsg == NULL) {
8225 return -EINVAL;
8226 }
8227
8228 gr_ctx = &tsg->gr_ctx;
8229
8230 if (ch_is_curr_ctx) {
8231 for (pass = 0; pass < 2; pass++) {
8232 ctx_op_nr = 0;
8233 for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
8234 /* only do ctx ops and only on the right pass */
8235 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
8236 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
8237 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) {
8238 continue;
8239 }
8240
8241 /* if this is a quad access, setup for special access*/
8242 if ((ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD))
8243 && (g->ops.gr.access_smpc_reg != NULL)) {
8244 g->ops.gr.access_smpc_reg(g,
8245 ctx_ops[i].quad,
8246 ctx_ops[i].offset);
8247 }
8248 offset = ctx_ops[i].offset;
8249
8250 if (pass == 0) { /* write pass */
8251 v = gk20a_readl(g, offset);
8252 v &= ~ctx_ops[i].and_n_mask_lo;
8253 v |= ctx_ops[i].value_lo;
8254 gk20a_writel(g, offset, v);
8255
8256 nvgpu_log(g, gpu_dbg_gpu_dbg,
8257 "direct wr: offset=0x%x v=0x%x",
8258 offset, v);
8259
8260 if (ctx_ops[i].op == REGOP(WRITE_64)) {
8261 v = gk20a_readl(g, offset + 4);
8262 v &= ~ctx_ops[i].and_n_mask_hi;
8263 v |= ctx_ops[i].value_hi;
8264 gk20a_writel(g, offset + 4, v);
8265
8266 nvgpu_log(g, gpu_dbg_gpu_dbg,
8267 "direct wr: offset=0x%x v=0x%x",
8268 offset + 4, v);
8269 }
8270
8271 } else { /* read pass */
8272 ctx_ops[i].value_lo =
8273 gk20a_readl(g, offset);
8274
8275 nvgpu_log(g, gpu_dbg_gpu_dbg,
8276 "direct rd: offset=0x%x v=0x%x",
8277 offset, ctx_ops[i].value_lo);
8278
8279 if (ctx_ops[i].op == REGOP(READ_64)) {
8280 ctx_ops[i].value_hi =
8281 gk20a_readl(g, offset + 4);
8282
8283 nvgpu_log(g, gpu_dbg_gpu_dbg,
8284 "direct rd: offset=0x%x v=0x%x",
8285 offset, ctx_ops[i].value_lo);
8286 } else {
8287 ctx_ops[i].value_hi = 0;
8288 }
8289 }
8290 ctx_op_nr++;
8291 }
8292 }
8293 goto cleanup;
8294 }
8295
8296 /* they're the same size, so just use one alloc for both */
8297 offsets = nvgpu_kzalloc(g, 2 * sizeof(u32) * max_offsets);
8298 if (offsets == NULL) {
8299 err = -ENOMEM;
8300 goto cleanup;
8301 }
8302 offset_addrs = offsets + max_offsets;
8303
8304 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
8305 if (err != 0) {
8306 goto cleanup;
8307 }
8308
8309 g->ops.mm.l2_flush(g, true);
8310
8311 /* write to appropriate place in context image,
8312 * first have to figure out where that really is */
8313
8314 /* first pass is writes, second reads */
8315 for (pass = 0; pass < 2; pass++) {
8316 ctx_op_nr = 0;
8317 for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
8318 u32 num_offsets;
8319
8320 /* only do ctx ops and only on the right pass */
8321 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
8322 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
8323 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) {
8324 continue;
8325 }
8326
8327 err = gr_gk20a_get_ctx_buffer_offsets(g,
8328 ctx_ops[i].offset,
8329 max_offsets,
8330 offsets, offset_addrs,
8331 &num_offsets,
8332 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
8333 ctx_ops[i].quad);
8334 if (err == 0) {
8335 if (!gr_ctx_ready) {
8336 gr_ctx_ready = true;
8337 }
8338 current_mem = &gr_ctx->mem;
8339 } else {
8340 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
8341 ctx_ops[i].offset,
8342 max_offsets,
8343 offsets, offset_addrs,
8344 &num_offsets);
8345 if (err != 0) {
8346 nvgpu_log(g, gpu_dbg_gpu_dbg,
8347 "ctx op invalid offset: offset=0x%x",
8348 ctx_ops[i].offset);
8349 ctx_ops[i].status =
8350 REGOP(STATUS_INVALID_OFFSET);
8351 continue;
8352 }
8353 if (!pm_ctx_ready) {
8354 /* Make sure ctx buffer was initialized */
8355 if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) {
8356 nvgpu_err(g,
8357 "Invalid ctx buffer");
8358 err = -EINVAL;
8359 goto cleanup;
8360 }
8361 pm_ctx_ready = true;
8362 }
8363 current_mem = &gr_ctx->pm_ctx.mem;
8364 }
8365
8366 /* if this is a quad access, setup for special access*/
8367 if ((ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)) &&
8368 (g->ops.gr.access_smpc_reg != NULL)) {
8369 g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
8370 ctx_ops[i].offset);
8371 }
8372
8373 for (j = 0; j < num_offsets; j++) {
8374 /* sanity check gr ctxt offsets,
8375 * don't write outside, worst case
8376 */
8377 if ((current_mem == &gr_ctx->mem) &&
8378 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) {
8379 continue;
8380 }
8381 if (pass == 0) { /* write pass */
8382 v = nvgpu_mem_rd(g, current_mem, offsets[j]);
8383 v &= ~ctx_ops[i].and_n_mask_lo;
8384 v |= ctx_ops[i].value_lo;
8385 nvgpu_mem_wr(g, current_mem, offsets[j], v);
8386
8387 nvgpu_log(g, gpu_dbg_gpu_dbg,
8388 "context wr: offset=0x%x v=0x%x",
8389 offsets[j], v);
8390
8391 if (ctx_ops[i].op == REGOP(WRITE_64)) {
8392 v = nvgpu_mem_rd(g, current_mem, offsets[j] + 4);
8393 v &= ~ctx_ops[i].and_n_mask_hi;
8394 v |= ctx_ops[i].value_hi;
8395 nvgpu_mem_wr(g, current_mem, offsets[j] + 4, v);
8396
8397 nvgpu_log(g, gpu_dbg_gpu_dbg,
8398 "context wr: offset=0x%x v=0x%x",
8399 offsets[j] + 4, v);
8400 }
8401
8402 /* check to see if we need to add a special WAR
8403 for some of the SMPC perf regs */
8404 gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j],
8405 v, current_mem);
8406
8407 } else { /* read pass */
8408 ctx_ops[i].value_lo =
8409 nvgpu_mem_rd(g, current_mem, offsets[0]);
8410
8411 nvgpu_log(g, gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
8412 offsets[0], ctx_ops[i].value_lo);
8413
8414 if (ctx_ops[i].op == REGOP(READ_64)) {
8415 ctx_ops[i].value_hi =
8416 nvgpu_mem_rd(g, current_mem, offsets[0] + 4);
8417
8418 nvgpu_log(g, gpu_dbg_gpu_dbg,
8419 "context rd: offset=0x%x v=0x%x",
8420 offsets[0] + 4, ctx_ops[i].value_hi);
8421 } else {
8422 ctx_ops[i].value_hi = 0;
8423 }
8424 }
8425 }
8426 ctx_op_nr++;
8427 }
8428 }
8429
8430 cleanup:
8431 if (offsets) {
8432 nvgpu_kfree(g, offsets);
8433 }
8434
8435 if (gr_ctx->patch_ctx.mem.cpu_va) {
8436 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
8437 }
8438
8439 return err;
8440}
8441
8442int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8443 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
8444 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
8445 bool *is_curr_ctx)
8446{
8447 struct gk20a *g = ch->g;
8448 int err, tmp_err;
8449 bool ch_is_curr_ctx;
8450
8451 /* disable channel switching.
8452 * at that point the hardware state can be inspected to
8453 * determine if the context we're interested in is current.
8454 */
8455 err = gr_gk20a_disable_ctxsw(g);
8456 if (err != 0) {
8457 nvgpu_err(g, "unable to stop gr ctxsw");
8458 /* this should probably be ctx-fatal... */
8459 return err;
8460 }
8461
8462 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8463 if (is_curr_ctx != NULL) {
8464 *is_curr_ctx = ch_is_curr_ctx;
8465 }
8466 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
8467 ch_is_curr_ctx);
8468
8469 err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
8470 num_ctx_rd_ops, ch_is_curr_ctx);
8471
8472 tmp_err = gr_gk20a_enable_ctxsw(g);
8473 if (tmp_err) {
8474 nvgpu_err(g, "unable to restart ctxsw!");
8475 err = tmp_err;
8476 }
8477
8478 return err;
8479}
8480
8481void gr_gk20a_commit_global_pagepool(struct gk20a *g,
8482 struct nvgpu_gr_ctx *gr_ctx,
8483 u64 addr, u32 size, bool patch)
8484{
8485 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
8486 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
8487
8488 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
8489 gr_scc_pagepool_total_pages_f(size) |
8490 gr_scc_pagepool_valid_true_f(), patch);
8491
8492 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
8493 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
8494
8495 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
8496 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
8497
8498 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(),
8499 gr_pd_pagepool_total_pages_f(size) |
8500 gr_pd_pagepool_valid_true_f(), patch);
8501}
8502
8503void gk20a_init_gr(struct gk20a *g)
8504{
8505 nvgpu_cond_init(&g->gr.init_wq);
8506}
8507
8508int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
8509 u32 global_esr_mask, bool check_errors)
8510{
8511 bool locked_down;
8512 bool no_error_pending;
8513 u32 delay = GR_IDLE_CHECK_DEFAULT;
8514 bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
8515 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8516 u32 dbgr_status0 = 0, dbgr_control0 = 0;
8517 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
8518 struct nvgpu_timeout timeout;
8519 u32 warp_esr;
8520
8521 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
8522 "GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
8523
8524 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
8525 NVGPU_TIMER_CPU_TIMER);
8526
8527 /* wait for the sm to lock down */
8528 do {
8529 u32 global_esr = g->ops.gr.get_sm_hww_global_esr(g,
8530 gpc, tpc, sm);
8531 dbgr_status0 = gk20a_readl(g,
8532 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
8533
8534 warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
8535
8536 locked_down =
8537 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
8538 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
8539 no_error_pending =
8540 check_errors &&
8541 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
8542 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
8543 ((global_esr & ~global_esr_mask) == 0);
8544
8545 if (locked_down || no_error_pending) {
8546 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
8547 "GPC%d TPC%d SM%d: locked down SM",
8548 gpc, tpc, sm);
8549 return 0;
8550 }
8551
8552 /* if an mmu fault is pending and mmu debug mode is not
8553 * enabled, the sm will never lock down. */
8554 if (!mmu_debug_mode_enabled &&
8555 (g->ops.mm.mmu_fault_pending(g))) {
8556 nvgpu_err(g,
8557 "GPC%d TPC%d: mmu fault pending,"
8558 " SM%d will never lock down!", gpc, tpc, sm);
8559 return -EFAULT;
8560 }
8561
8562 nvgpu_usleep_range(delay, delay * 2);
8563 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
8564 } while (nvgpu_timeout_expired(&timeout) == 0);
8565
8566 dbgr_control0 = gk20a_readl(g,
8567 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
8568
8569 /* 64 bit read */
8570 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_1_r() + offset) << 32;
8571 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + offset);
8572
8573 /* 64 bit read */
8574 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r() + offset) << 32;
8575 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + offset);
8576
8577 /* 64 bit read */
8578 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r() + offset) << 32;
8579 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + offset);
8580
8581 nvgpu_err(g,
8582 "GPC%d TPC%d: timed out while trying to lock down SM", gpc, tpc);
8583 nvgpu_err(g,
8584 "STATUS0(0x%x)=0x%x CONTROL0=0x%x VALID_MASK=0x%llx PAUSE_MASK=0x%llx TRAP_MASK=0x%llx",
8585 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset, dbgr_status0, dbgr_control0,
8586 warps_valid, warps_paused, warps_trapped);
8587
8588 return -ETIMEDOUT;
8589}
8590
8591void gk20a_gr_suspend_single_sm(struct gk20a *g,
8592 u32 gpc, u32 tpc, u32 sm,
8593 u32 global_esr_mask, bool check_errors)
8594{
8595 int err;
8596 u32 dbgr_control0;
8597 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8598
8599 /* if an SM debugger isn't attached, skip suspend */
8600 if (!g->ops.gr.sm_debugger_attached(g)) {
8601 nvgpu_err(g,
8602 "SM debugger not attached, skipping suspend!");
8603 return;
8604 }
8605
8606 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
8607 "suspending gpc:%d, tpc:%d, sm%d", gpc, tpc, sm);
8608
8609 /* assert stop trigger. */
8610 dbgr_control0 = gk20a_readl(g,
8611 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
8612 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8613 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
8614 dbgr_control0);
8615
8616 err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
8617 global_esr_mask, check_errors);
8618 if (err != 0) {
8619 nvgpu_err(g,
8620 "SuspendSm failed");
8621 return;
8622 }
8623}
8624
8625void gk20a_gr_suspend_all_sms(struct gk20a *g,
8626 u32 global_esr_mask, bool check_errors)
8627{
8628 struct gr_gk20a *gr = &g->gr;
8629 u32 gpc, tpc, sm;
8630 int err;
8631 u32 dbgr_control0;
8632 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8633
8634 /* if an SM debugger isn't attached, skip suspend */
8635 if (!g->ops.gr.sm_debugger_attached(g)) {
8636 nvgpu_err(g,
8637 "SM debugger not attached, skipping suspend!");
8638 return;
8639 }
8640
8641 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "suspending all sms");
8642 /* assert stop trigger. uniformity assumption: all SMs will have
8643 * the same state in dbg_control0.
8644 */
8645 dbgr_control0 =
8646 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
8647 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8648
8649 /* broadcast write */
8650 gk20a_writel(g,
8651 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8652
8653 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
8654 for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
8655 for (sm = 0; sm < sm_per_tpc; sm++) {
8656 err = g->ops.gr.wait_for_sm_lock_down(g,
8657 gpc, tpc, sm,
8658 global_esr_mask, check_errors);
8659 if (err != 0) {
8660 nvgpu_err(g, "SuspendAllSms failed");
8661 return;
8662 }
8663 }
8664 }
8665 }
8666}
8667
8668void gk20a_gr_resume_single_sm(struct gk20a *g,
8669 u32 gpc, u32 tpc, u32 sm)
8670{
8671 u32 dbgr_control0;
8672 u32 offset;
8673 /*
8674 * The following requires some clarification. Despite the fact that both
8675 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
8676 * names, only one is actually a trigger, and that is the STOP_TRIGGER.
8677 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
8678 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
8679 * (_DISABLE) as well.
8680
8681 * Advice from the arch group: Disable the stop trigger first, as a
8682 * separate operation, in order to ensure that the trigger has taken
8683 * effect, before enabling the run trigger.
8684 */
8685
8686 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8687
8688 /*De-assert stop trigger */
8689 dbgr_control0 =
8690 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
8691 dbgr_control0 = set_field(dbgr_control0,
8692 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(),
8693 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f());
8694 gk20a_writel(g,
8695 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
8696
8697 /* Run trigger */
8698 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f();
8699 gk20a_writel(g,
8700 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
8701}
8702
8703void gk20a_gr_resume_all_sms(struct gk20a *g)
8704{
8705 u32 dbgr_control0;
8706 /*
8707 * The following requires some clarification. Despite the fact that both
8708 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
8709 * names, only one is actually a trigger, and that is the STOP_TRIGGER.
8710 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
8711 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
8712 * (_DISABLE) as well.
8713
8714 * Advice from the arch group: Disable the stop trigger first, as a
8715 * separate operation, in order to ensure that the trigger has taken
8716 * effect, before enabling the run trigger.
8717 */
8718
8719 /*De-assert stop trigger */
8720 dbgr_control0 =
8721 gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r());
8722 dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8723 gk20a_writel(g,
8724 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8725
8726 /* Run trigger */
8727 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f();
8728 gk20a_writel(g,
8729 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8730}
8731
8732int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
8733 struct channel_gk20a *ch, u64 sms, bool enable)
8734{
8735 struct nvgpu_dbg_reg_op *ops;
8736 unsigned int i = 0, sm_id;
8737 int err;
8738 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
8739 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
8740
8741 ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops));
8742 if (ops == NULL) {
8743 return -ENOMEM;
8744 }
8745 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
8746 int gpc, tpc;
8747 u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val;
8748
8749 if ((sms & BIT64(sm_id)) == 0ULL) {
8750 continue;
8751 }
8752
8753 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8754 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8755
8756 tpc_offset = tpc_in_gpc_stride * tpc;
8757 gpc_offset = gpc_stride * gpc;
8758 reg_offset = tpc_offset + gpc_offset;
8759
8760 ops[i].op = REGOP(WRITE_32);
8761 ops[i].type = REGOP(TYPE_GR_CTX);
8762 ops[i].offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
8763
8764 reg_mask = 0;
8765 reg_val = 0;
8766 if (enable) {
8767 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m();
8768 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_f();
8769 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m();
8770 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f();
8771 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m();
8772 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f();
8773 } else {
8774 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m();
8775 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_f();
8776 }
8777
8778 ops[i].and_n_mask_lo = reg_mask;
8779 ops[i].value_lo = reg_val;
8780 i++;
8781 }
8782
8783 err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0, NULL);
8784 if (err != 0) {
8785 nvgpu_err(g, "Failed to access register");
8786 }
8787 nvgpu_kfree(g, ops);
8788 return err;
8789}
8790
8791/*
8792 * gr_gk20a_suspend_context()
8793 * This API should be called with dbg_session lock held
8794 * and ctxsw disabled
8795 * Returns bool value indicating if context was resident
8796 * or not
8797 */
8798bool gr_gk20a_suspend_context(struct channel_gk20a *ch)
8799{
8800 struct gk20a *g = ch->g;
8801 bool ctx_resident = false;
8802
8803 if (gk20a_is_channel_ctx_resident(ch)) {
8804 g->ops.gr.suspend_all_sms(g, 0, false);
8805 ctx_resident = true;
8806 } else {
8807 gk20a_disable_channel_tsg(g, ch);
8808 }
8809
8810 return ctx_resident;
8811}
8812
8813bool gr_gk20a_resume_context(struct channel_gk20a *ch)
8814{
8815 struct gk20a *g = ch->g;
8816 bool ctx_resident = false;
8817
8818 if (gk20a_is_channel_ctx_resident(ch)) {
8819 g->ops.gr.resume_all_sms(g);
8820 ctx_resident = true;
8821 } else {
8822 gk20a_enable_channel_tsg(g, ch);
8823 }
8824
8825 return ctx_resident;
8826}
8827
8828int gr_gk20a_suspend_contexts(struct gk20a *g,
8829 struct dbg_session_gk20a *dbg_s,
8830 int *ctx_resident_ch_fd)
8831{
8832 int local_ctx_resident_ch_fd = -1;
8833 bool ctx_resident;
8834 struct channel_gk20a *ch;
8835 struct dbg_session_channel_data *ch_data;
8836 int err = 0;
8837
8838 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
8839
8840 err = gr_gk20a_disable_ctxsw(g);
8841 if (err != 0) {
8842 nvgpu_err(g, "unable to stop gr ctxsw");
8843 goto clean_up;
8844 }
8845
8846 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
8847
8848 nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
8849 dbg_session_channel_data, ch_entry) {
8850 ch = g->fifo.channel + ch_data->chid;
8851
8852 ctx_resident = gr_gk20a_suspend_context(ch);
8853 if (ctx_resident) {
8854 local_ctx_resident_ch_fd = ch_data->channel_fd;
8855 }
8856 }
8857
8858 nvgpu_mutex_release(&dbg_s->ch_list_lock);
8859
8860 err = gr_gk20a_enable_ctxsw(g);
8861 if (err != 0) {
8862 nvgpu_err(g, "unable to restart ctxsw!");
8863 }
8864
8865 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
8866
8867clean_up:
8868 nvgpu_mutex_release(&g->dbg_sessions_lock);
8869
8870 return err;
8871}
8872
8873int gr_gk20a_resume_contexts(struct gk20a *g,
8874 struct dbg_session_gk20a *dbg_s,
8875 int *ctx_resident_ch_fd)
8876{
8877 int local_ctx_resident_ch_fd = -1;
8878 bool ctx_resident;
8879 struct channel_gk20a *ch;
8880 int err = 0;
8881 struct dbg_session_channel_data *ch_data;
8882
8883 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
8884
8885 err = gr_gk20a_disable_ctxsw(g);
8886 if (err != 0) {
8887 nvgpu_err(g, "unable to stop gr ctxsw");
8888 goto clean_up;
8889 }
8890
8891 nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
8892 dbg_session_channel_data, ch_entry) {
8893 ch = g->fifo.channel + ch_data->chid;
8894
8895 ctx_resident = gr_gk20a_resume_context(ch);
8896 if (ctx_resident) {
8897 local_ctx_resident_ch_fd = ch_data->channel_fd;
8898 }
8899 }
8900
8901 err = gr_gk20a_enable_ctxsw(g);
8902 if (err != 0) {
8903 nvgpu_err(g, "unable to restart ctxsw!");
8904 }
8905
8906 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
8907
8908clean_up:
8909 nvgpu_mutex_release(&g->dbg_sessions_lock);
8910
8911 return err;
8912}
8913
8914int gr_gk20a_trigger_suspend(struct gk20a *g)
8915{
8916 int err = 0;
8917 u32 dbgr_control0;
8918
8919 /* assert stop trigger. uniformity assumption: all SMs will have
8920 * the same state in dbg_control0. */
8921 dbgr_control0 =
8922 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
8923 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8924
8925 /* broadcast write */
8926 gk20a_writel(g,
8927 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8928
8929 return err;
8930}
8931
8932int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state)
8933{
8934 int err = 0;
8935 struct gr_gk20a *gr = &g->gr;
8936 u32 gpc, tpc, sm, sm_id;
8937 u32 global_mask;
8938
8939 if (!g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask ||
8940 !g->ops.gr.lock_down_sm || !g->ops.gr.bpt_reg_info)
8941 return -EINVAL;
8942
8943 /* Wait for the SMs to reach full stop. This condition is:
8944 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
8945 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
8946 * masks.
8947 */
8948 global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g);
8949
8950 /* Lock down all SMs */
8951 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
8952
8953 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8954 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8955 sm = g->gr.sm_to_cluster[sm_id].sm_index;
8956
8957 err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
8958 global_mask, false);
8959 if (err != 0) {
8960 nvgpu_err(g, "sm did not lock down!");
8961 return err;
8962 }
8963 }
8964
8965 /* Read the warp status */
8966 g->ops.gr.bpt_reg_info(g, w_state);
8967
8968 return 0;
8969}
8970
8971int gr_gk20a_resume_from_pause(struct gk20a *g)
8972{
8973 int err = 0;
8974 u32 reg_val;
8975
8976 /* Clear the pause mask to tell the GPU we want to resume everyone */
8977 gk20a_writel(g,
8978 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
8979
8980 /* explicitly re-enable forwarding of SM interrupts upon any resume */
8981 reg_val = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
8982 reg_val |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
8983 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), reg_val);
8984
8985 /* Now resume all sms, write a 0 to the stop trigger
8986 * then a 1 to the run trigger */
8987 g->ops.gr.resume_all_sms(g);
8988
8989 return err;
8990}
8991
8992int gr_gk20a_clear_sm_errors(struct gk20a *g)
8993{
8994 int ret = 0;
8995 u32 gpc, tpc, sm;
8996 struct gr_gk20a *gr = &g->gr;
8997 u32 global_esr;
8998 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8999
9000 if (!g->ops.gr.get_sm_hww_global_esr || !g->ops.gr.clear_sm_hww)
9001 return -EINVAL;
9002
9003 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
9004
9005 /* check if any tpc has an exception */
9006 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
9007
9008 for (sm = 0; sm < sm_per_tpc; sm++) {
9009 global_esr = g->ops.gr.get_sm_hww_global_esr(g,
9010 gpc, tpc, sm);
9011
9012 /* clearing hwws, also causes tpc and gpc
9013 * exceptions to be cleared
9014 */
9015 g->ops.gr.clear_sm_hww(g,
9016 gpc, tpc, sm, global_esr);
9017 }
9018 }
9019 }
9020
9021 return ret;
9022}
9023
9024u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)
9025{
9026 struct gr_gk20a *gr = &g->gr;
9027 u32 sm_id, tpc_exception_en = 0;
9028 u32 offset, regval, tpc_offset, gpc_offset;
9029 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
9030 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
9031
9032 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
9033
9034 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
9035 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
9036 offset = tpc_offset + gpc_offset;
9037
9038 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
9039 offset);
9040 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
9041 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
9042 }
9043
9044 return tpc_exception_en;
9045}
9046
9047u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
9048{
9049 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
9050 u32 hww_warp_esr = gk20a_readl(g,
9051 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
9052 return hww_warp_esr;
9053}
9054
9055u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
9056{
9057 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
9058
9059 u32 hww_global_esr = gk20a_readl(g,
9060 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
9061
9062 return hww_global_esr;
9063}
9064
9065u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g)
9066{
9067 /*
9068 * These three interrupts don't require locking down the SM. They can
9069 * be handled by usermode clients as they aren't fatal. Additionally,
9070 * usermode clients may wish to allow some warps to execute while others
9071 * are at breakpoints, as opposed to fatal errors where all warps should
9072 * halt.
9073 */
9074 u32 global_esr_mask =
9075 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
9076 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
9077 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
9078
9079 return global_esr_mask;
9080}
9081
9082/* invalidate channel lookup tlb */
9083void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr)
9084{
9085 nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
9086 memset(gr->chid_tlb, 0,
9087 sizeof(struct gr_channel_map_tlb_entry) *
9088 GR_CHANNEL_MAP_TLB_SIZE);
9089 nvgpu_spinlock_release(&gr->ch_tlb_lock);
9090}
diff --git a/include/gk20a/gr_gk20a.h b/include/gk20a/gr_gk20a.h
deleted file mode 100644
index 2cd6a4f..0000000
--- a/include/gk20a/gr_gk20a.h
+++ /dev/null
@@ -1,852 +0,0 @@
1/*
2 * GK20A Graphics Engine
3 *
4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef GR_GK20A_H
25#define GR_GK20A_H
26
27#include <nvgpu/types.h>
28
29#include "gr_ctx_gk20a.h"
30#include "mm_gk20a.h"
31#include <nvgpu/power_features/pg.h>
32
33#include <nvgpu/comptags.h>
34#include <nvgpu/cond.h>
35
36#define GR_IDLE_CHECK_DEFAULT 10 /* usec */
37#define GR_IDLE_CHECK_MAX 200 /* usec */
38#define GR_FECS_POLL_INTERVAL 5 /* usec */
39
40#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
41#define INVALID_MAX_WAYS 0xFFFFFFFF
42
43#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
44#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
45
46#define GK20A_GR_MAX_PES_PER_GPC 3
47
48#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */
49
50/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
51#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
52#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
53
54/*
55 * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
56 * of address and data pairs
57 */
58#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2
59#define PATCH_CTX_SLOTS_PER_PAGE \
60 (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32)))
61#define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32))
62
63#define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI (1 << 0)
64#define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP (1 << 1)
65
66#define NVGPU_PREEMPTION_MODE_COMPUTE_WFI (1 << 0)
67#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1)
68#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2)
69
70#define CTXSW_INTR0 BIT32(0)
71#define CTXSW_INTR1 BIT32(1)
72
73#define MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL 0x26
74
75struct tsg_gk20a;
76struct channel_gk20a;
77struct nvgpu_warpstate;
78
79enum ctxsw_addr_type;
80
81enum /* global_ctx_buffer */ {
82 CIRCULAR = 0,
83 PAGEPOOL = 1,
84 ATTRIBUTE = 2,
85 CIRCULAR_VPR = 3,
86 PAGEPOOL_VPR = 4,
87 ATTRIBUTE_VPR = 5,
88 GOLDEN_CTX = 6,
89 PRIV_ACCESS_MAP = 7,
90 /* #8 is reserved */
91 FECS_TRACE_BUFFER = 9,
92 NR_GLOBAL_CTX_BUF = 10
93};
94
95/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
96enum /*global_ctx_buffer_va */ {
97 CIRCULAR_VA = 0,
98 PAGEPOOL_VA = 1,
99 ATTRIBUTE_VA = 2,
100 GOLDEN_CTX_VA = 3,
101 PRIV_ACCESS_MAP_VA = 4,
102 /* #5 is reserved */
103 FECS_TRACE_BUFFER_VA = 6,
104 NR_GLOBAL_CTX_BUF_VA = 7
105};
106
107enum {
108 WAIT_UCODE_LOOP,
109 WAIT_UCODE_TIMEOUT,
110 WAIT_UCODE_ERROR,
111 WAIT_UCODE_OK
112};
113
114enum {
115 GR_IS_UCODE_OP_EQUAL,
116 GR_IS_UCODE_OP_NOT_EQUAL,
117 GR_IS_UCODE_OP_AND,
118 GR_IS_UCODE_OP_LESSER,
119 GR_IS_UCODE_OP_LESSER_EQUAL,
120 GR_IS_UCODE_OP_SKIP
121};
122
123enum {
124 eUcodeHandshakeInitComplete = 1,
125 eUcodeHandshakeMethodFinished
126};
127
128enum {
129 ELCG_MODE = (1 << 0),
130 BLCG_MODE = (1 << 1),
131 INVALID_MODE = (1 << 2)
132};
133
134enum {
135 NVGPU_EVENT_ID_BPT_INT = 0,
136 NVGPU_EVENT_ID_BPT_PAUSE,
137 NVGPU_EVENT_ID_BLOCKING_SYNC,
138 NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED,
139 NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE,
140 NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN,
141 NVGPU_EVENT_ID_MAX,
142};
143
144#ifndef GR_GO_IDLE_BUNDLE
145#define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */
146#endif
147
148struct gr_channel_map_tlb_entry {
149 u32 curr_ctx;
150 u32 chid;
151 u32 tsgid;
152};
153
154struct gr_zcull_gk20a {
155 u32 aliquot_width;
156 u32 aliquot_height;
157 u32 aliquot_size;
158 u32 total_aliquots;
159
160 u32 width_align_pixels;
161 u32 height_align_pixels;
162 u32 pixel_squares_by_aliquots;
163};
164
165struct gr_zcull_info {
166 u32 width_align_pixels;
167 u32 height_align_pixels;
168 u32 pixel_squares_by_aliquots;
169 u32 aliquot_total;
170 u32 region_byte_multiplier;
171 u32 region_header_size;
172 u32 subregion_header_size;
173 u32 subregion_width_align_pixels;
174 u32 subregion_height_align_pixels;
175 u32 subregion_count;
176};
177
178#define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */
179
180#define GK20A_STARTOF_ZBC_TABLE 1U /* index zero reserved to indicate "not ZBCd" */
181#define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */
182#define GK20A_ZBC_TABLE_SIZE (16 - 1)
183
184#define GK20A_ZBC_TYPE_INVALID 0
185#define GK20A_ZBC_TYPE_COLOR 1
186#define GK20A_ZBC_TYPE_DEPTH 2
187#define T19X_ZBC 3
188
189struct zbc_color_table {
190 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
191 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
192 u32 format;
193 u32 ref_cnt;
194};
195
196struct zbc_depth_table {
197 u32 depth;
198 u32 format;
199 u32 ref_cnt;
200};
201
202struct zbc_s_table {
203 u32 stencil;
204 u32 format;
205 u32 ref_cnt;
206};
207
208struct zbc_entry {
209 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
210 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
211 u32 depth;
212 u32 type; /* color or depth */
213 u32 format;
214};
215
216struct zbc_query_params {
217 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
218 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
219 u32 depth;
220 u32 ref_cnt;
221 u32 format;
222 u32 type; /* color or depth */
223 u32 index_size; /* [out] size, [in] index */
224};
225
226struct sm_info {
227 u32 gpc_index;
228 u32 tpc_index;
229 u32 sm_index;
230 u32 global_tpc_index;
231};
232
233#if defined(CONFIG_GK20A_CYCLE_STATS)
234struct gk20a_cs_snapshot_client;
235struct gk20a_cs_snapshot;
236#endif
237
238struct gr_gk20a_isr_data {
239 u32 addr;
240 u32 data_lo;
241 u32 data_hi;
242 u32 curr_ctx;
243 struct channel_gk20a *ch;
244 u32 offset;
245 u32 sub_chan;
246 u32 class_num;
247};
248
249struct gr_ctx_buffer_desc {
250 void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);
251 struct nvgpu_mem mem;
252 void *priv;
253};
254
255struct nvgpu_preemption_modes_rec {
256 u32 graphics_preemption_mode_flags; /* supported preemption modes */
257 u32 compute_preemption_mode_flags; /* supported preemption modes */
258
259 u32 default_graphics_preempt_mode; /* default mode */
260 u32 default_compute_preempt_mode; /* default mode */
261};
262
263struct gr_gk20a {
264 struct gk20a *g;
265 struct {
266 bool dynamic;
267
268 u32 buffer_size;
269 u32 buffer_total_size;
270
271 bool golden_image_initialized;
272 u32 golden_image_size;
273 u32 *local_golden_image;
274
275 u32 hwpm_ctxsw_buffer_offset_map_count;
276 struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;
277
278 u32 zcull_ctxsw_image_size;
279
280 u32 pm_ctxsw_image_size;
281
282 u32 buffer_header_size;
283
284 u32 priv_access_map_size;
285
286 u32 fecs_trace_buffer_size;
287
288 struct gr_ucode_gk20a ucode;
289
290 struct av_list_gk20a sw_bundle_init;
291 struct av_list_gk20a sw_method_init;
292 struct aiv_list_gk20a sw_ctx_load;
293 struct av_list_gk20a sw_non_ctx_load;
294 struct av_list_gk20a sw_veid_bundle_init;
295 struct av64_list_gk20a sw_bundle64_init;
296 struct {
297 struct aiv_list_gk20a sys;
298 struct aiv_list_gk20a gpc;
299 struct aiv_list_gk20a tpc;
300 struct aiv_list_gk20a zcull_gpc;
301 struct aiv_list_gk20a ppc;
302 struct aiv_list_gk20a pm_sys;
303 struct aiv_list_gk20a pm_gpc;
304 struct aiv_list_gk20a pm_tpc;
305 struct aiv_list_gk20a pm_ppc;
306 struct aiv_list_gk20a perf_sys;
307 struct aiv_list_gk20a perf_gpc;
308 struct aiv_list_gk20a fbp;
309 struct aiv_list_gk20a fbp_router;
310 struct aiv_list_gk20a gpc_router;
311 struct aiv_list_gk20a pm_ltc;
312 struct aiv_list_gk20a pm_fbpa;
313 struct aiv_list_gk20a perf_sys_router;
314 struct aiv_list_gk20a perf_pma;
315 struct aiv_list_gk20a pm_rop;
316 struct aiv_list_gk20a pm_ucgpc;
317 struct aiv_list_gk20a etpc;
318 struct aiv_list_gk20a pm_cau;
319 } ctxsw_regs;
320 u32 regs_base_index;
321 bool valid;
322
323 u32 preempt_image_size;
324 bool force_preemption_gfxp;
325 bool force_preemption_cilp;
326 bool dump_ctxsw_stats_on_channel_close;
327 } ctx_vars;
328
329 struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
330 struct nvgpu_mutex fecs_mutex; /* protect fecs method */
331
332#define GR_NETLIST_DYNAMIC -1
333#define GR_NETLIST_STATIC_A 'A'
334 int netlist;
335
336 struct nvgpu_cond init_wq;
337 int initialized;
338
339 u32 num_fbps;
340
341 u32 max_comptag_lines;
342 u32 compbit_backing_size;
343 u32 comptags_per_cacheline;
344 u32 slices_per_ltc;
345 u32 cacheline_size;
346 u32 gobs_per_comptagline_per_slice;
347
348 u32 max_gpc_count;
349 u32 max_fbps_count;
350 u32 max_tpc_per_gpc_count;
351 u32 max_zcull_per_gpc_count;
352 u32 max_tpc_count;
353
354 u32 sys_count;
355 u32 gpc_count;
356 u32 pe_count_per_gpc;
357 u32 ppc_count;
358 u32 *gpc_ppc_count;
359 u32 tpc_count;
360 u32 *gpc_tpc_count;
361 u32 *gpc_tpc_mask;
362 u32 zcb_count;
363 u32 *gpc_zcb_count;
364 u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
365 u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
366 u32 *gpc_skip_mask;
367
368 u32 bundle_cb_default_size;
369 u32 min_gpm_fifo_depth;
370 u32 bundle_cb_token_limit;
371 u32 attrib_cb_default_size;
372 u32 attrib_cb_size;
373 u32 attrib_cb_gfxp_default_size;
374 u32 attrib_cb_gfxp_size;
375 u32 alpha_cb_default_size;
376 u32 alpha_cb_size;
377 u32 timeslice_mode;
378 u32 czf_bypass;
379 u32 pd_max_batches;
380 u32 gfxp_wfi_timeout_count;
381 u32 gfxp_wfi_timeout_unit;
382
383 /*
384 * The deductible memory size for max_comptag_mem (in MBytes)
385 * Usually close to memory size that running system is taking
386 */
387 u32 comptag_mem_deduct;
388
389 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
390
391 u8 *map_tiles;
392 u32 map_tile_count;
393 u32 map_row_offset;
394
395 u32 max_comptag_mem; /* max memory size (MB) for comptag */
396 struct compbit_store_desc compbit_store;
397 struct gk20a_comptag_allocator comp_tags;
398
399 struct gr_zcull_gk20a zcull;
400
401 struct nvgpu_mutex zbc_lock;
402 struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
403 struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
404 struct zbc_s_table zbc_s_tbl[GK20A_ZBC_TABLE_SIZE];
405 s32 max_default_color_index;
406 s32 max_default_depth_index;
407 s32 max_default_s_index;
408
409 u32 max_used_color_index;
410 u32 max_used_depth_index;
411 u32 max_used_s_index;
412
413#define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */
414 struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
415 u32 channel_tlb_flush_index;
416 struct nvgpu_spinlock ch_tlb_lock;
417
418 void (*remove_support)(struct gr_gk20a *gr);
419 bool sw_ready;
420 bool skip_ucode_init;
421
422 struct nvgpu_preemption_modes_rec preemption_mode_rec;
423
424 u32 fecs_feature_override_ecc_val;
425
426 int cilp_preempt_pending_chid;
427
428 u32 fbp_en_mask;
429 u32 *fbp_rop_l2_en_mask;
430 u32 no_of_sm;
431 struct sm_info *sm_to_cluster;
432
433#if defined(CONFIG_GK20A_CYCLE_STATS)
434 struct nvgpu_mutex cs_lock;
435 struct gk20a_cs_snapshot *cs_data;
436#endif
437 u32 max_css_buffer_size;
438};
439
440void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
441void gk20a_gpccs_dump_falcon_stats(struct gk20a *g);
442
443/* contexts associated with a TSG */
444struct nvgpu_gr_ctx {
445 struct nvgpu_mem mem;
446
447 u32 graphics_preempt_mode;
448 u32 compute_preempt_mode;
449
450 struct nvgpu_mem preempt_ctxsw_buffer;
451 struct nvgpu_mem spill_ctxsw_buffer;
452 struct nvgpu_mem betacb_ctxsw_buffer;
453 struct nvgpu_mem pagepool_ctxsw_buffer;
454 u32 ctx_id;
455 bool ctx_id_valid;
456 bool cilp_preempt_pending;
457 bool boosted_ctx;
458 bool golden_img_loaded;
459
460#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
461 u64 virt_ctx;
462#endif
463
464 struct patch_desc patch_ctx;
465 struct zcull_ctx_desc zcull_ctx;
466 struct pm_ctx_desc pm_ctx;
467 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
468 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
469 int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
470 bool global_ctx_buffer_mapped;
471
472 u32 tsgid;
473};
474
475struct gk20a_ctxsw_ucode_segment {
476 u32 offset;
477 u32 size;
478};
479
480struct gk20a_ctxsw_ucode_segments {
481 u32 boot_entry;
482 u32 boot_imem_offset;
483 u32 boot_signature;
484 struct gk20a_ctxsw_ucode_segment boot;
485 struct gk20a_ctxsw_ucode_segment code;
486 struct gk20a_ctxsw_ucode_segment data;
487};
488
489/* sums over the ucode files as sequences of u32, computed to the
490 * boot_signature field in the structure above */
491
492/* T18X FECS remains same as T21X,
493 * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
494 * for T18X*/
495#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34
496#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5c
497#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5c
498#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78
499#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b
500#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f
501
502#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2
503#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5
504#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3
505#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877
506
507#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d
508#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10
509
510#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da
511
512struct gk20a_ctxsw_ucode_info {
513 u64 *p_va;
514 struct nvgpu_mem inst_blk_desc;
515 struct nvgpu_mem surface_desc;
516 struct gk20a_ctxsw_ucode_segments fecs;
517 struct gk20a_ctxsw_ucode_segments gpccs;
518};
519
520struct gk20a_ctxsw_bootloader_desc {
521 u32 start_offset;
522 u32 size;
523 u32 imem_offset;
524 u32 entry_point;
525};
526
527struct fecs_method_op_gk20a {
528 struct {
529 u32 addr;
530 u32 data;
531 } method;
532
533 struct {
534 u32 id;
535 u32 data;
536 u32 clr;
537 u32 *ret;
538 u32 ok;
539 u32 fail;
540 } mailbox;
541
542 struct {
543 u32 ok;
544 u32 fail;
545 } cond;
546
547};
548
549struct nvgpu_warpstate {
550 u64 valid_warps[2];
551 u64 trapped_warps[2];
552 u64 paused_warps[2];
553};
554
555struct gpu_ops;
556int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
557 struct channel_gk20a *c);
558void gk20a_init_gr(struct gk20a *g);
559int gk20a_init_gr_support(struct gk20a *g);
560int gk20a_enable_gr_hw(struct gk20a *g);
561int gk20a_gr_reset(struct gk20a *g);
562void gk20a_gr_wait_initialized(struct gk20a *g);
563
564int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
565
566int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
567
568int gk20a_gr_isr(struct gk20a *g);
569u32 gk20a_gr_nonstall_isr(struct gk20a *g);
570
571/* zcull */
572u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
573int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
574 struct channel_gk20a *c, u64 zcull_va, u32 mode);
575int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
576 struct gr_zcull_info *zcull_params);
577void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
578 u32 *zcull_map_tiles);
579/* zbc */
580int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
581 struct zbc_entry *zbc_val);
582int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
583 struct zbc_query_params *query_params);
584int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
585 struct zbc_entry *zbc_val);
586int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
587
588/* pmu */
589int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
590int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
591 struct nvgpu_mem *inst_block);
592int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
593
594void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);
595
596/* sm */
597bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
598u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g);
599
600#define gr_gk20a_elpg_protected_call(g, func) \
601 ({ \
602 int err = 0; \
603 if (g->support_pmu) {\
604 err = nvgpu_pg_elpg_disable(g);\
605 if (err != 0) {\
606 (void)nvgpu_pg_elpg_enable(g); \
607 } \
608 } \
609 if (err == 0) { \
610 err = func; \
611 if (g->support_pmu) {\
612 (void)nvgpu_pg_elpg_enable(g); \
613 } \
614 } \
615 err; \
616 })
617
618int gk20a_gr_suspend(struct gk20a *g);
619
620struct nvgpu_dbg_reg_op;
621int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
622 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
623 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
624 bool *is_curr_ctx);
625int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
626 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
627 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
628 bool ch_is_curr_ctx);
629int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
630 u32 addr,
631 u32 max_offsets,
632 u32 *offsets, u32 *offset_addrs,
633 u32 *num_offsets,
634 bool is_quad, u32 quad);
635int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
636 u32 addr,
637 u32 max_offsets,
638 u32 *offsets, u32 *offset_addrs,
639 u32 *num_offsets);
640int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
641 struct channel_gk20a *c,
642 bool enable_smpc_ctxsw);
643int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
644 struct channel_gk20a *c,
645 u64 gpu_va,
646 u32 mode);
647
648struct nvgpu_gr_ctx;
649void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
650 u32 addr, u32 data, bool patch);
651int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
652 struct nvgpu_gr_ctx *ch_ctx,
653 bool update_patch_count);
654void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
655 struct nvgpu_gr_ctx *ch_ctx,
656 bool update_patch_count);
657void gr_gk20a_commit_global_pagepool(struct gk20a *g,
658 struct nvgpu_gr_ctx *ch_ctx,
659 u64 addr, u32 size, bool patch);
660void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
661void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
662int gr_gk20a_init_fs_state(struct gk20a *g);
663int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
664int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
665int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
666void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
667void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
668 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
669void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
670 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
671
672
673void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
674int gr_gk20a_disable_ctxsw(struct gk20a *g);
675int gr_gk20a_enable_ctxsw(struct gk20a *g);
676void gk20a_gr_resume_single_sm(struct gk20a *g,
677 u32 gpc, u32 tpc, u32 sm);
678void gk20a_gr_resume_all_sms(struct gk20a *g);
679void gk20a_gr_suspend_single_sm(struct gk20a *g,
680 u32 gpc, u32 tpc, u32 sm,
681 u32 global_esr_mask, bool check_errors);
682void gk20a_gr_suspend_all_sms(struct gk20a *g,
683 u32 global_esr_mask, bool check_errors);
684u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
685int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
686 struct channel_gk20a *ch, u64 sms, bool enable);
687bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
688int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
689 struct zbc_entry *color_val, u32 index);
690int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
691 struct zbc_entry *depth_val, u32 index);
692int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
693 struct zbc_entry *zbc_val);
694void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
695int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
696 u32 expect_delay);
697int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
698 bool *post_event, struct channel_gk20a *fault_ch,
699 u32 *hww_global_esr);
700int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
701 bool *post_event);
702int gr_gk20a_init_ctx_state(struct gk20a *g);
703int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
704 struct fecs_method_op_gk20a op,
705 bool sleepduringwait);
706int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
707 struct fecs_method_op_gk20a op,
708 bool sleepduringwait);
709int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
710 struct fecs_method_op_gk20a op);
711int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
712 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
713 u32 class, u32 padding);
714void gr_gk20a_free_gr_ctx(struct gk20a *g,
715 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
716int gr_gk20a_halt_pipe(struct gk20a *g);
717
718#if defined(CONFIG_GK20A_CYCLE_STATS)
719int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
720 u32 perfmon_id_count, /* in - number of perfmons*/
721 u32 *perfmon_id_start, /* out- index of first pm */
722 /* in/out - pointer to client data used in later */
723 struct gk20a_cs_snapshot_client *css_client);
724
725int gr_gk20a_css_detach(struct channel_gk20a *ch,
726 struct gk20a_cs_snapshot_client *css_client);
727int gr_gk20a_css_flush(struct channel_gk20a *ch,
728 struct gk20a_cs_snapshot_client *css_client);
729
730void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
731
732#else
733/* fake empty cleanup function if no cyclestats snapshots enabled */
734static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
735{
736 (void)g;
737}
738#endif
739
740void gr_gk20a_fecs_host_int_enable(struct gk20a *g);
741int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
742 struct gr_gk20a_isr_data *isr_data);
743int gk20a_gr_lock_down_sm(struct gk20a *g,
744 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
745 bool check_errors);
746int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
747 u32 global_esr_mask, bool check_errors);
748int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
749 u32 *mailbox_ret, u32 opc_success,
750 u32 mailbox_ok, u32 opc_fail,
751 u32 mailbox_fail, bool sleepduringwait);
752
753int gr_gk20a_get_ctx_id(struct gk20a *g,
754 struct channel_gk20a *c,
755 u32 *ctx_id);
756
757u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
758u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
759
760int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
761 u32 expect_delay);
762
763struct dbg_session_gk20a;
764
765bool gr_gk20a_suspend_context(struct channel_gk20a *ch);
766bool gr_gk20a_resume_context(struct channel_gk20a *ch);
767int gr_gk20a_suspend_contexts(struct gk20a *g,
768 struct dbg_session_gk20a *dbg_s,
769 int *ctx_resident_ch_fd);
770int gr_gk20a_resume_contexts(struct gk20a *g,
771 struct dbg_session_gk20a *dbg_s,
772 int *ctx_resident_ch_fd);
773void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
774void gk20a_gr_enable_exceptions(struct gk20a *g);
775int gr_gk20a_trigger_suspend(struct gk20a *g);
776int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state);
777int gr_gk20a_resume_from_pause(struct gk20a *g);
778int gr_gk20a_clear_sm_errors(struct gk20a *g);
779u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
780
781int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
782
783int gr_gk20a_init_sm_id_table(struct gk20a *g);
784
785int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
786
787void gr_gk20a_write_zcull_ptr(struct gk20a *g,
788 struct nvgpu_mem *mem, u64 gpu_va);
789
790void gr_gk20a_write_pm_ptr(struct gk20a *g,
791 struct nvgpu_mem *mem, u64 gpu_va);
792
793u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
794u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
795void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
796 u32 *esr_sm_sel);
797void gk20a_gr_init_ovr_sm_dsm_perf(void);
798void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
799 u32 **ovr_perf_regs);
800void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
801 struct nvgpu_mem *mem);
802u32 gr_gk20a_get_patch_slots(struct gk20a *g);
803int gk20a_gr_handle_notify_pending(struct gk20a *g,
804 struct gr_gk20a_isr_data *isr_data);
805
806int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
807int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
808 struct channel_gk20a *c);
809int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
810 struct channel_gk20a *c, bool patch);
811
812int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
813 struct channel_gk20a *c);
814u32 gk20a_init_sw_bundle(struct gk20a *g);
815int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
816int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
817 struct gr_gk20a_isr_data *isr_data);
818int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
819 struct ctxsw_buf_offset_map_entry *map,
820 struct aiv_list_gk20a *regs,
821 u32 *count, u32 *offset,
822 u32 max_cnt, u32 base,
823 u32 num_fbpas, u32 stride, u32 mask);
824int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
825 struct aiv_list_gk20a *regs,
826 u32 *count, u32 *offset,
827 u32 max_cnt, u32 base, u32 mask);
828int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
829 enum ctxsw_addr_type *addr_type,
830 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
831 u32 *broadcast_flags);
832int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
833 u32 gpc_num,
834 u32 *priv_addr_table, u32 *t);
835int gr_gk20a_create_priv_addr_table(struct gk20a *g,
836 u32 addr,
837 u32 *priv_addr_table,
838 u32 *num_registers);
839void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
840 u32 num_fbpas,
841 u32 *priv_addr_table, u32 *t);
842int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
843 enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs,
844 u32 reg_list_ppc_count, u32 *__offset_in_segment);
845
846void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
847 struct gr_ctx_buffer_desc *desc);
848int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
849 struct gr_ctx_buffer_desc *desc, size_t size);
850void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr);
851int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g);
852#endif /*__GR_GK20A_H__*/
diff --git a/include/gk20a/gr_pri_gk20a.h b/include/gk20a/gr_pri_gk20a.h
deleted file mode 100644
index d832d90..0000000
--- a/include/gk20a/gr_pri_gk20a.h
+++ /dev/null
@@ -1,261 +0,0 @@
1/*
2 * GK20A Graphics Context Pri Register Addressing
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef GR_PRI_GK20A_H
25#define GR_PRI_GK20A_H
26
27/*
28 * These convenience macros are generally for use in the management/modificaiton
29 * of the context state store for gr/compute contexts.
30 */
31
32/*
33 * GPC pri addressing
34 */
35static inline u32 pri_gpccs_addr_width(void)
36{
37 return 15; /*from where?*/
38}
39static inline u32 pri_gpccs_addr_mask(u32 addr)
40{
41 return addr & ((1 << pri_gpccs_addr_width()) - 1);
42}
43static inline u32 pri_gpc_addr(struct gk20a *g, u32 addr, u32 gpc)
44{
45 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
46 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
47 return gpc_base + (gpc * gpc_stride) + addr;
48}
49static inline bool pri_is_gpc_addr_shared(struct gk20a *g, u32 addr)
50{
51 u32 gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_SHARED_BASE);
52 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
53 return (addr >= gpc_shared_base) &&
54 (addr < gpc_shared_base + gpc_stride);
55}
56static inline bool pri_is_gpc_addr(struct gk20a *g, u32 addr)
57{
58 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
59 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
60 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
61 return ((addr >= gpc_base) &&
62 (addr < gpc_base + num_gpcs * gpc_stride)) ||
63 pri_is_gpc_addr_shared(g, addr);
64}
65static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr)
66{
67 u32 i, start;
68 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
69 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
70 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
71 for (i = 0; i < num_gpcs; i++) {
72 start = gpc_base + (i * gpc_stride);
73 if ((addr >= start) && (addr < (start + gpc_stride)))
74 return i;
75 }
76 return 0;
77}
78
79/*
80 * PPC pri addressing
81 */
82static inline bool pri_is_ppc_addr_shared(struct gk20a *g, u32 addr)
83{
84 u32 ppc_in_gpc_shared_base = nvgpu_get_litter_value(g,
85 GPU_LIT_PPC_IN_GPC_SHARED_BASE);
86 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
87 GPU_LIT_PPC_IN_GPC_STRIDE);
88
89 return ((addr >= ppc_in_gpc_shared_base) &&
90 (addr < (ppc_in_gpc_shared_base + ppc_in_gpc_stride)));
91}
92
93static inline bool pri_is_ppc_addr(struct gk20a *g, u32 addr)
94{
95 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
96 GPU_LIT_PPC_IN_GPC_BASE);
97 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
98 GPU_LIT_NUM_PES_PER_GPC);
99 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
100 GPU_LIT_PPC_IN_GPC_STRIDE);
101
102 return ((addr >= ppc_in_gpc_base) &&
103 (addr < ppc_in_gpc_base + num_pes_per_gpc * ppc_in_gpc_stride))
104 || pri_is_ppc_addr_shared(g, addr);
105}
106
107/*
108 * TPC pri addressing
109 */
110static inline u32 pri_tpccs_addr_width(void)
111{
112 return 11; /* from where? */
113}
114static inline u32 pri_tpccs_addr_mask(u32 addr)
115{
116 return addr & ((1 << pri_tpccs_addr_width()) - 1);
117}
118static inline u32 pri_fbpa_addr_mask(struct gk20a *g, u32 addr)
119{
120 return addr & (nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE) - 1);
121}
122static inline u32 pri_tpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc)
123{
124 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
125 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
126 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
127 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
128 return gpc_base + (gpc * gpc_stride) +
129 tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) +
130 addr;
131}
132static inline bool pri_is_tpc_addr_shared(struct gk20a *g, u32 addr)
133{
134 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
135 u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
136 return (addr >= tpc_in_gpc_shared_base) &&
137 (addr < (tpc_in_gpc_shared_base +
138 tpc_in_gpc_stride));
139}
140static inline u32 pri_fbpa_addr(struct gk20a *g, u32 addr, u32 fbpa)
141{
142 return (nvgpu_get_litter_value(g, GPU_LIT_FBPA_BASE) + addr +
143 (fbpa * nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE)));
144}
145static inline bool pri_is_fbpa_addr_shared(struct gk20a *g, u32 addr)
146{
147 u32 fbpa_shared_base = nvgpu_get_litter_value(g, GPU_LIT_FBPA_SHARED_BASE);
148 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
149 return ((addr >= fbpa_shared_base) &&
150 (addr < (fbpa_shared_base + fbpa_stride)));
151}
152static inline bool pri_is_fbpa_addr(struct gk20a *g, u32 addr)
153{
154 u32 fbpa_base = nvgpu_get_litter_value(g, GPU_LIT_FBPA_BASE);
155 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
156 u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
157 return (((addr >= fbpa_base) &&
158 (addr < (fbpa_base + num_fbpas * fbpa_stride)))
159 || pri_is_fbpa_addr_shared(g, addr));
160}
161/*
162 * BE pri addressing
163 */
164static inline u32 pri_becs_addr_width(void)
165{
166 return 10;/* from where? */
167}
168static inline u32 pri_becs_addr_mask(u32 addr)
169{
170 return addr & ((1 << pri_becs_addr_width()) - 1);
171}
172static inline bool pri_is_be_addr_shared(struct gk20a *g, u32 addr)
173{
174 u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE);
175 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
176 return (addr >= rop_shared_base) &&
177 (addr < rop_shared_base + rop_stride);
178}
179static inline u32 pri_be_shared_addr(struct gk20a *g, u32 addr)
180{
181 u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE);
182 return rop_shared_base + pri_becs_addr_mask(addr);
183}
184static inline bool pri_is_be_addr(struct gk20a *g, u32 addr)
185{
186 u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE);
187 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
188 return ((addr >= rop_base) &&
189 (addr < rop_base + g->ltc_count * rop_stride)) ||
190 pri_is_be_addr_shared(g, addr);
191}
192
193static inline u32 pri_get_be_num(struct gk20a *g, u32 addr)
194{
195 u32 i, start;
196 u32 num_fbps = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS);
197 u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE);
198 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
199 for (i = 0; i < num_fbps; i++) {
200 start = rop_base + (i * rop_stride);
201 if ((addr >= start) && (addr < (start + rop_stride)))
202 return i;
203 }
204 return 0;
205}
206
207/*
208 * PPC pri addressing
209 */
210static inline u32 pri_ppccs_addr_width(void)
211{
212 return 9; /* from where? */
213}
214static inline u32 pri_ppccs_addr_mask(u32 addr)
215{
216 return addr & ((1 << pri_ppccs_addr_width()) - 1);
217}
218static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc)
219{
220 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
221 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
222 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
223 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
224 return gpc_base + (gpc * gpc_stride) +
225 ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr;
226}
227
228enum ctxsw_addr_type {
229 CTXSW_ADDR_TYPE_SYS = 0,
230 CTXSW_ADDR_TYPE_GPC = 1,
231 CTXSW_ADDR_TYPE_TPC = 2,
232 CTXSW_ADDR_TYPE_BE = 3,
233 CTXSW_ADDR_TYPE_PPC = 4,
234 CTXSW_ADDR_TYPE_LTCS = 5,
235 CTXSW_ADDR_TYPE_FBPA = 6,
236 CTXSW_ADDR_TYPE_EGPC = 7,
237 CTXSW_ADDR_TYPE_ETPC = 8,
238 CTXSW_ADDR_TYPE_ROP = 9,
239 CTXSW_ADDR_TYPE_FBP = 10,
240};
241
242#define PRI_BROADCAST_FLAGS_NONE 0U
243#define PRI_BROADCAST_FLAGS_GPC BIT32(0)
244#define PRI_BROADCAST_FLAGS_TPC BIT32(1)
245#define PRI_BROADCAST_FLAGS_BE BIT32(2)
246#define PRI_BROADCAST_FLAGS_PPC BIT32(3)
247#define PRI_BROADCAST_FLAGS_LTCS BIT32(4)
248#define PRI_BROADCAST_FLAGS_LTSS BIT32(5)
249#define PRI_BROADCAST_FLAGS_FBPA BIT32(6)
250#define PRI_BROADCAST_FLAGS_EGPC BIT32(7)
251#define PRI_BROADCAST_FLAGS_ETPC BIT32(8)
252#define PRI_BROADCAST_FLAGS_PMMGPC BIT32(9)
253#define PRI_BROADCAST_FLAGS_PMM_GPCS BIT32(10)
254#define PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCA BIT32(11)
255#define PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCB BIT32(12)
256#define PRI_BROADCAST_FLAGS_PMMFBP BIT32(13)
257#define PRI_BROADCAST_FLAGS_PMM_FBPS BIT32(14)
258#define PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC BIT32(15)
259#define PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP BIT32(16)
260
261#endif /* GR_PRI_GK20A_H */
diff --git a/include/gk20a/mm_gk20a.c b/include/gk20a/mm_gk20a.c
deleted file mode 100644
index 10ca84d..0000000
--- a/include/gk20a/mm_gk20a.c
+++ /dev/null
@@ -1,654 +0,0 @@
1/*
2 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/mm.h>
26#include <nvgpu/vm.h>
27#include <nvgpu/vm_area.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/timers.h>
31#include <nvgpu/pramin.h>
32#include <nvgpu/list.h>
33#include <nvgpu/nvgpu_mem.h>
34#include <nvgpu/allocator.h>
35#include <nvgpu/semaphore.h>
36#include <nvgpu/page_allocator.h>
37#include <nvgpu/log.h>
38#include <nvgpu/bug.h>
39#include <nvgpu/log2.h>
40#include <nvgpu/enabled.h>
41#include <nvgpu/vidmem.h>
42#include <nvgpu/sizes.h>
43#include <nvgpu/io.h>
44#include <nvgpu/utils.h>
45#include <nvgpu/channel.h>
46
47#include "gk20a.h"
48#include "mm_gk20a.h"
49#include "fence_gk20a.h"
50
51#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
52#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
53#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
54#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
55
56/*
57 * GPU mapping life cycle
58 * ======================
59 *
60 * Kernel mappings
61 * ---------------
62 *
63 * Kernel mappings are created through vm.map(..., false):
64 *
65 * - Mappings to the same allocations are reused and refcounted.
66 * - This path does not support deferred unmapping (i.e. kernel must wait for
67 * all hw operations on the buffer to complete before unmapping).
68 * - References to dmabuf are owned and managed by the (kernel) clients of
69 * the gk20a_vm layer.
70 *
71 *
72 * User space mappings
73 * -------------------
74 *
75 * User space mappings are created through as.map_buffer -> vm.map(..., true):
76 *
77 * - Mappings to the same allocations are reused and refcounted.
78 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
79 * until all hw operations have completed).
80 * - References to dmabuf are owned and managed by the vm_gk20a
81 * layer itself. vm.map acquires these refs, and sets
82 * mapped_buffer->own_mem_ref to record that we must release the refs when we
83 * actually unmap.
84 *
85 */
86
87/* make sure gk20a_init_mm_support is called before */
88int gk20a_init_mm_setup_hw(struct gk20a *g)
89{
90 struct mm_gk20a *mm = &g->mm;
91 int err;
92
93 nvgpu_log_fn(g, " ");
94
95 if (g->ops.fb.set_mmu_page_size) {
96 g->ops.fb.set_mmu_page_size(g);
97 }
98
99 if (g->ops.fb.set_use_full_comp_tag_line) {
100 mm->use_full_comp_tag_line =
101 g->ops.fb.set_use_full_comp_tag_line(g);
102 }
103
104 g->ops.fb.init_hw(g);
105
106 if (g->ops.bus.bar1_bind) {
107 g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
108 }
109
110 if (g->ops.bus.bar2_bind) {
111 err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
112 if (err) {
113 return err;
114 }
115 }
116
117 if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g)) {
118 return -EBUSY;
119 }
120
121 nvgpu_log_fn(g, "done");
122 return 0;
123}
124
125/* for gk20a the "video memory" apertures here are misnomers. */
126static inline u32 big_valid_pde0_bits(struct gk20a *g,
127 struct nvgpu_gmmu_pd *pd, u64 addr)
128{
129 u32 pde0_bits =
130 nvgpu_aperture_mask(g, pd->mem,
131 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
132 gmmu_pde_aperture_big_sys_mem_coh_f(),
133 gmmu_pde_aperture_big_video_memory_f()) |
134 gmmu_pde_address_big_sys_f(
135 (u32)(addr >> gmmu_pde_address_shift_v()));
136
137 return pde0_bits;
138}
139
140static inline u32 small_valid_pde1_bits(struct gk20a *g,
141 struct nvgpu_gmmu_pd *pd, u64 addr)
142{
143 u32 pde1_bits =
144 nvgpu_aperture_mask(g, pd->mem,
145 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
146 gmmu_pde_aperture_small_sys_mem_coh_f(),
147 gmmu_pde_aperture_small_video_memory_f()) |
148 gmmu_pde_vol_small_true_f() | /* tbd: why? */
149 gmmu_pde_address_small_sys_f(
150 (u32)(addr >> gmmu_pde_address_shift_v()));
151
152 return pde1_bits;
153}
154
155static void update_gmmu_pde_locked(struct vm_gk20a *vm,
156 const struct gk20a_mmu_level *l,
157 struct nvgpu_gmmu_pd *pd,
158 u32 pd_idx,
159 u64 virt_addr,
160 u64 phys_addr,
161 struct nvgpu_gmmu_attrs *attrs)
162{
163 struct gk20a *g = gk20a_from_vm(vm);
164 bool small_valid, big_valid;
165 u32 pd_offset = pd_offset_from_index(l, pd_idx);
166 u32 pde_v[2] = {0, 0};
167
168 small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
169 big_valid = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
170
171 pde_v[0] = gmmu_pde_size_full_f();
172 pde_v[0] |= big_valid ?
173 big_valid_pde0_bits(g, pd, phys_addr) :
174 gmmu_pde_aperture_big_invalid_f();
175
176 pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
177 (gmmu_pde_aperture_small_invalid_f() |
178 gmmu_pde_vol_small_false_f()))
179 |
180 (big_valid ? (gmmu_pde_vol_big_true_f()) :
181 gmmu_pde_vol_big_false_f());
182
183 pte_dbg(g, attrs,
184 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
185 "GPU %#-12llx phys %#-12llx "
186 "[0x%08x, 0x%08x]",
187 pd_idx, l->entry_size, pd_offset,
188 small_valid ? 'S' : '-',
189 big_valid ? 'B' : '-',
190 virt_addr, phys_addr,
191 pde_v[1], pde_v[0]);
192
193 pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
194 pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
195}
196
197static void __update_pte_sparse(u32 *pte_w)
198{
199 pte_w[0] = gmmu_pte_valid_false_f();
200 pte_w[1] |= gmmu_pte_vol_true_f();
201}
202
203static void __update_pte(struct vm_gk20a *vm,
204 u32 *pte_w,
205 u64 phys_addr,
206 struct nvgpu_gmmu_attrs *attrs)
207{
208 struct gk20a *g = gk20a_from_vm(vm);
209 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
210 u32 pte_valid = attrs->valid ?
211 gmmu_pte_valid_true_f() :
212 gmmu_pte_valid_false_f();
213 u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
214 u32 addr = attrs->aperture == APERTURE_SYSMEM ?
215 gmmu_pte_address_sys_f(phys_shifted) :
216 gmmu_pte_address_vid_f(phys_shifted);
217 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
218
219 pte_w[0] = pte_valid | addr;
220
221 if (attrs->priv) {
222 pte_w[0] |= gmmu_pte_privilege_true_f();
223 }
224
225 pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
226 gmmu_pte_aperture_sys_mem_ncoh_f(),
227 gmmu_pte_aperture_sys_mem_coh_f(),
228 gmmu_pte_aperture_video_memory_f()) |
229 gmmu_pte_kind_f(attrs->kind_v) |
230 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
231
232 if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
233 phys_addr & 0x10000) {
234 pte_w[1] |= gmmu_pte_comptagline_f(
235 1 << (gmmu_pte_comptagline_s() - 1));
236 }
237
238 if (attrs->rw_flag == gk20a_mem_flag_read_only) {
239 pte_w[0] |= gmmu_pte_read_only_true_f();
240 pte_w[1] |= gmmu_pte_write_disable_true_f();
241 } else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
242 pte_w[1] |= gmmu_pte_read_disable_true_f();
243 }
244
245 if (!attrs->cacheable) {
246 pte_w[1] |= gmmu_pte_vol_true_f();
247 }
248
249 if (attrs->ctag) {
250 attrs->ctag += page_size;
251 }
252}
253
254static void update_gmmu_pte_locked(struct vm_gk20a *vm,
255 const struct gk20a_mmu_level *l,
256 struct nvgpu_gmmu_pd *pd,
257 u32 pd_idx,
258 u64 virt_addr,
259 u64 phys_addr,
260 struct nvgpu_gmmu_attrs *attrs)
261{
262 struct gk20a *g = gk20a_from_vm(vm);
263 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
264 u32 pd_offset = pd_offset_from_index(l, pd_idx);
265 u32 pte_w[2] = {0, 0};
266 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
267
268 if (phys_addr) {
269 __update_pte(vm, pte_w, phys_addr, attrs);
270 } else if (attrs->sparse) {
271 __update_pte_sparse(pte_w);
272 }
273
274 pte_dbg(g, attrs,
275 "PTE: i=%-4u size=%-2u offs=%-4u | "
276 "GPU %#-12llx phys %#-12llx "
277 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
278 "ctag=0x%08x "
279 "[0x%08x, 0x%08x]",
280 pd_idx, l->entry_size, pd_offset,
281 virt_addr, phys_addr,
282 page_size >> 10,
283 nvgpu_gmmu_perm_str(attrs->rw_flag),
284 attrs->kind_v,
285 nvgpu_aperture_str(g, attrs->aperture),
286 attrs->cacheable ? 'C' : '-',
287 attrs->sparse ? 'S' : '-',
288 attrs->priv ? 'P' : '-',
289 attrs->valid ? 'V' : '-',
290 (u32)attrs->ctag >> ctag_shift,
291 pte_w[1], pte_w[0]);
292
293 pd_write(g, pd, pd_offset + 0, pte_w[0]);
294 pd_write(g, pd, pd_offset + 1, pte_w[1]);
295}
296
297u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
298 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
299{
300 /*
301 * big and small page sizes are the same
302 */
303 return GMMU_PAGE_SIZE_SMALL;
304}
305
306u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
307 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
308{
309 /*
310 * return invalid
311 */
312 return GMMU_NR_PAGE_SIZES;
313}
314
315const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
316 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
317 .lo_bit = {26, 26},
318 .update_entry = update_gmmu_pde_locked,
319 .entry_size = 8,
320 .get_pgsz = gk20a_get_pde_pgsz},
321 {.hi_bit = {25, 25},
322 .lo_bit = {12, 16},
323 .update_entry = update_gmmu_pte_locked,
324 .entry_size = 8,
325 .get_pgsz = gk20a_get_pte_pgsz},
326 {.update_entry = NULL}
327};
328
329const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
330 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
331 .lo_bit = {27, 27},
332 .update_entry = update_gmmu_pde_locked,
333 .entry_size = 8,
334 .get_pgsz = gk20a_get_pde_pgsz},
335 {.hi_bit = {26, 26},
336 .lo_bit = {12, 17},
337 .update_entry = update_gmmu_pte_locked,
338 .entry_size = 8,
339 .get_pgsz = gk20a_get_pte_pgsz},
340 {.update_entry = NULL}
341};
342
343int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
344{
345 int err = 0;
346
347 nvgpu_log_fn(ch->g, " ");
348
349 nvgpu_vm_get(vm);
350 ch->vm = vm;
351 err = channel_gk20a_commit_va(ch);
352 if (err) {
353 ch->vm = NULL;
354 }
355
356 nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
357 ch->chid, vm->name);
358
359 return err;
360}
361
362void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
363 struct vm_gk20a *vm)
364{
365 u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
366 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
367 u32 pdb_addr_hi = u64_hi32(pdb_addr);
368
369 nvgpu_log_info(g, "pde pa=0x%llx", pdb_addr);
370
371 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
372 nvgpu_aperture_mask(g, vm->pdb.mem,
373 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
374 ram_in_page_dir_base_target_sys_mem_coh_f(),
375 ram_in_page_dir_base_target_vid_mem_f()) |
376 ram_in_page_dir_base_vol_true_f() |
377 ram_in_page_dir_base_lo_f(pdb_addr_lo));
378
379 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
380 ram_in_page_dir_base_hi_f(pdb_addr_hi));
381}
382
383void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
384 u32 big_page_size)
385{
386 struct gk20a *g = gk20a_from_vm(vm);
387
388 nvgpu_log_info(g, "inst block phys = 0x%llx, kv = 0x%p",
389 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
390
391 g->ops.mm.init_pdb(g, inst_block, vm);
392
393 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
394 u64_lo32(vm->va_limit - 1) & ~0xfff);
395
396 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
397 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
398
399 if (big_page_size && g->ops.mm.set_big_page_size) {
400 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
401 }
402}
403
404int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
405{
406 int err;
407
408 nvgpu_log_fn(g, " ");
409
410 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
411 if (err) {
412 nvgpu_err(g, "%s: memory allocation failed", __func__);
413 return err;
414 }
415
416 nvgpu_log_fn(g, "done");
417 return 0;
418}
419
420int gk20a_mm_fb_flush(struct gk20a *g)
421{
422 struct mm_gk20a *mm = &g->mm;
423 u32 data;
424 int ret = 0;
425 struct nvgpu_timeout timeout;
426 u32 retries;
427
428 nvgpu_log_fn(g, " ");
429
430 gk20a_busy_noresume(g);
431 if (!g->power_on) {
432 gk20a_idle_nosuspend(g);
433 return 0;
434 }
435
436 retries = 100;
437
438 if (g->ops.mm.get_flush_retries) {
439 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
440 }
441
442 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
443
444 nvgpu_mutex_acquire(&mm->l2_op_lock);
445
446 /* Make sure all previous writes are committed to the L2. There's no
447 guarantee that writes are to DRAM. This will be a sysmembar internal
448 to the L2. */
449
450 trace_gk20a_mm_fb_flush(g->name);
451
452 gk20a_writel(g, flush_fb_flush_r(),
453 flush_fb_flush_pending_busy_f());
454
455 do {
456 data = gk20a_readl(g, flush_fb_flush_r());
457
458 if (flush_fb_flush_outstanding_v(data) ==
459 flush_fb_flush_outstanding_true_v() ||
460 flush_fb_flush_pending_v(data) ==
461 flush_fb_flush_pending_busy_v()) {
462 nvgpu_log_info(g, "fb_flush 0x%x", data);
463 nvgpu_udelay(5);
464 } else {
465 break;
466 }
467 } while (!nvgpu_timeout_expired(&timeout));
468
469 if (nvgpu_timeout_peek_expired(&timeout)) {
470 if (g->ops.fb.dump_vpr_info) {
471 g->ops.fb.dump_vpr_info(g);
472 }
473 if (g->ops.fb.dump_wpr_info) {
474 g->ops.fb.dump_wpr_info(g);
475 }
476 ret = -EBUSY;
477 }
478
479 trace_gk20a_mm_fb_flush_done(g->name);
480
481 nvgpu_mutex_release(&mm->l2_op_lock);
482
483 gk20a_idle_nosuspend(g);
484
485 return ret;
486}
487
488static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
489{
490 u32 data;
491 struct nvgpu_timeout timeout;
492 u32 retries = 200;
493
494 trace_gk20a_mm_l2_invalidate(g->name);
495
496 if (g->ops.mm.get_flush_retries) {
497 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
498 }
499
500 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
501
502 /* Invalidate any clean lines from the L2 so subsequent reads go to
503 DRAM. Dirty lines are not affected by this operation. */
504 gk20a_writel(g, flush_l2_system_invalidate_r(),
505 flush_l2_system_invalidate_pending_busy_f());
506
507 do {
508 data = gk20a_readl(g, flush_l2_system_invalidate_r());
509
510 if (flush_l2_system_invalidate_outstanding_v(data) ==
511 flush_l2_system_invalidate_outstanding_true_v() ||
512 flush_l2_system_invalidate_pending_v(data) ==
513 flush_l2_system_invalidate_pending_busy_v()) {
514 nvgpu_log_info(g, "l2_system_invalidate 0x%x",
515 data);
516 nvgpu_udelay(5);
517 } else {
518 break;
519 }
520 } while (!nvgpu_timeout_expired(&timeout));
521
522 if (nvgpu_timeout_peek_expired(&timeout)) {
523 nvgpu_warn(g, "l2_system_invalidate too many retries");
524 }
525
526 trace_gk20a_mm_l2_invalidate_done(g->name);
527}
528
529void gk20a_mm_l2_invalidate(struct gk20a *g)
530{
531 struct mm_gk20a *mm = &g->mm;
532 gk20a_busy_noresume(g);
533 if (g->power_on) {
534 nvgpu_mutex_acquire(&mm->l2_op_lock);
535 gk20a_mm_l2_invalidate_locked(g);
536 nvgpu_mutex_release(&mm->l2_op_lock);
537 }
538 gk20a_idle_nosuspend(g);
539}
540
541void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
542{
543 struct mm_gk20a *mm = &g->mm;
544 u32 data;
545 struct nvgpu_timeout timeout;
546 u32 retries = 2000;
547
548 nvgpu_log_fn(g, " ");
549
550 gk20a_busy_noresume(g);
551 if (!g->power_on) {
552 goto hw_was_off;
553 }
554
555 if (g->ops.mm.get_flush_retries) {
556 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
557 }
558
559 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
560
561 nvgpu_mutex_acquire(&mm->l2_op_lock);
562
563 trace_gk20a_mm_l2_flush(g->name);
564
565 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
566 as clean, so subsequent reads might hit in the L2. */
567 gk20a_writel(g, flush_l2_flush_dirty_r(),
568 flush_l2_flush_dirty_pending_busy_f());
569
570 do {
571 data = gk20a_readl(g, flush_l2_flush_dirty_r());
572
573 if (flush_l2_flush_dirty_outstanding_v(data) ==
574 flush_l2_flush_dirty_outstanding_true_v() ||
575 flush_l2_flush_dirty_pending_v(data) ==
576 flush_l2_flush_dirty_pending_busy_v()) {
577 nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
578 nvgpu_udelay(5);
579 } else {
580 break;
581 }
582 } while (!nvgpu_timeout_expired_msg(&timeout,
583 "l2_flush_dirty too many retries"));
584
585 trace_gk20a_mm_l2_flush_done(g->name);
586
587 if (invalidate) {
588 gk20a_mm_l2_invalidate_locked(g);
589 }
590
591 nvgpu_mutex_release(&mm->l2_op_lock);
592
593hw_was_off:
594 gk20a_idle_nosuspend(g);
595}
596
597void gk20a_mm_cbc_clean(struct gk20a *g)
598{
599 struct mm_gk20a *mm = &g->mm;
600 u32 data;
601 struct nvgpu_timeout timeout;
602 u32 retries = 200;
603
604 nvgpu_log_fn(g, " ");
605
606 gk20a_busy_noresume(g);
607 if (!g->power_on) {
608 goto hw_was_off;
609 }
610
611 if (g->ops.mm.get_flush_retries) {
612 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
613 }
614
615 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
616
617 nvgpu_mutex_acquire(&mm->l2_op_lock);
618
619 /* Flush all dirty lines from the CBC to L2 */
620 gk20a_writel(g, flush_l2_clean_comptags_r(),
621 flush_l2_clean_comptags_pending_busy_f());
622
623 do {
624 data = gk20a_readl(g, flush_l2_clean_comptags_r());
625
626 if (flush_l2_clean_comptags_outstanding_v(data) ==
627 flush_l2_clean_comptags_outstanding_true_v() ||
628 flush_l2_clean_comptags_pending_v(data) ==
629 flush_l2_clean_comptags_pending_busy_v()) {
630 nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
631 nvgpu_udelay(5);
632 } else {
633 break;
634 }
635 } while (!nvgpu_timeout_expired_msg(&timeout,
636 "l2_clean_comptags too many retries"));
637
638 nvgpu_mutex_release(&mm->l2_op_lock);
639
640hw_was_off:
641 gk20a_idle_nosuspend(g);
642}
643
644u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
645{
646 return 34;
647}
648
649const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
650 u32 big_page_size)
651{
652 return (big_page_size == SZ_64K) ?
653 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
654}
diff --git a/include/gk20a/mm_gk20a.h b/include/gk20a/mm_gk20a.h
deleted file mode 100644
index 76a1621..0000000
--- a/include/gk20a/mm_gk20a.h
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef MM_GK20A_H
24#define MM_GK20A_H
25
26#include <nvgpu/nvgpu_mem.h>
27#include <nvgpu/allocator.h>
28#include <nvgpu/vm.h>
29#include <nvgpu/list.h>
30#include <nvgpu/rbtree.h>
31#include <nvgpu/kref.h>
32
33enum gk20a_mem_rw_flag;
34
35struct patch_desc {
36 struct nvgpu_mem mem;
37 u32 data_count;
38};
39
40struct zcull_ctx_desc {
41 u64 gpu_va;
42 u32 ctx_attr;
43 u32 ctx_sw_mode;
44};
45
46struct pm_ctx_desc {
47 struct nvgpu_mem mem;
48 u32 pm_mode;
49};
50
51struct compbit_store_desc {
52 struct nvgpu_mem mem;
53
54 /* The value that is written to the hardware. This depends on
55 * on the number of ltcs and is not an address. */
56 u64 base_hw;
57};
58
59struct gk20a_buffer_state {
60 struct nvgpu_list_node list;
61
62 /* The valid compbits and the fence must be changed atomically. */
63 struct nvgpu_mutex lock;
64
65 /* Offset of the surface within the dma-buf whose state is
66 * described by this struct (one dma-buf can contain multiple
67 * surfaces with different states). */
68 size_t offset;
69
70 /* A bitmask of valid sets of compbits (0 = uncompressed). */
71 u32 valid_compbits;
72
73 /* The ZBC color used on this buffer. */
74 u32 zbc_color;
75
76 /* This struct reflects the state of the buffer when this
77 * fence signals. */
78 struct gk20a_fence *fence;
79};
80
81static inline struct gk20a_buffer_state *
82gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
83{
84 return (struct gk20a_buffer_state *)
85 ((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
86};
87
88struct gk20a;
89struct channel_gk20a;
90
91int gk20a_mm_fb_flush(struct gk20a *g);
92void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
93void gk20a_mm_cbc_clean(struct gk20a *g);
94void gk20a_mm_l2_invalidate(struct gk20a *g);
95
96#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
97
98void gk20a_mm_ltc_isr(struct gk20a *g);
99
100bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
101
102int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
103void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
104 u32 big_page_size);
105int gk20a_init_mm_setup_hw(struct gk20a *g);
106
107u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
108 u64 map_offset,
109 struct nvgpu_sgt *sgt,
110 u64 buffer_offset,
111 u64 size,
112 u32 pgsz_idx,
113 u8 kind_v,
114 u32 ctag_offset,
115 u32 flags,
116 enum gk20a_mem_rw_flag rw_flag,
117 bool clear_ctags,
118 bool sparse,
119 bool priv,
120 struct vm_gk20a_mapping_batch *batch,
121 enum nvgpu_aperture aperture);
122
123void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
124 u64 vaddr,
125 u64 size,
126 u32 pgsz_idx,
127 bool va_allocated,
128 enum gk20a_mem_rw_flag rw_flag,
129 bool sparse,
130 struct vm_gk20a_mapping_batch *batch);
131
132/* vm-as interface */
133struct nvgpu_as_alloc_space_args;
134struct nvgpu_as_free_space_args;
135int gk20a_vm_release_share(struct gk20a_as_share *as_share);
136int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
137
138void pde_range_from_vaddr_range(struct vm_gk20a *vm,
139 u64 addr_lo, u64 addr_hi,
140 u32 *pde_lo, u32 *pde_hi);
141u32 gk20a_mm_get_iommu_bit(struct gk20a *g);
142
143const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
144 u32 big_page_size);
145void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
146 struct vm_gk20a *vm);
147
148extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
149extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
150
151u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
152 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
153u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
154 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
155#endif /* MM_GK20A_H */
diff --git a/include/gk20a/pmu_gk20a.c b/include/gk20a/pmu_gk20a.c
deleted file mode 100644
index 63a32f0..0000000
--- a/include/gk20a/pmu_gk20a.c
+++ /dev/null
@@ -1,879 +0,0 @@
1/*
2 * GK20A PMU (aka. gPMU outside gk20a context)
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/timers.h>
27#include <nvgpu/kmem.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/log.h>
30#include <nvgpu/bug.h>
31#include <nvgpu/firmware.h>
32#include <nvgpu/falcon.h>
33#include <nvgpu/mm.h>
34#include <nvgpu/io.h>
35#include <nvgpu/clk_arb.h>
36#include <nvgpu/utils.h>
37#include <nvgpu/unit.h>
38
39#include "gk20a.h"
40#include "gr_gk20a.h"
41#include "pmu_gk20a.h"
42
43#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_pwr_gk20a.h>
45#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
46
47#define gk20a_dbg_pmu(g, fmt, arg...) \
48 nvgpu_log(g, gpu_dbg_pmu, fmt, ##arg)
49
50bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos)
51{
52 u32 i = 0, j = strlen(strings);
53
54 for (; i < j; i++) {
55 if (strings[i] == '%') {
56 if (strings[i + 1] == 'x' || strings[i + 1] == 'X') {
57 *hex_pos = i;
58 return true;
59 }
60 }
61 }
62 *hex_pos = -1;
63 return false;
64}
65
66static void print_pmu_trace(struct nvgpu_pmu *pmu)
67{
68 struct gk20a *g = pmu->g;
69 u32 i = 0, j = 0, k, l, m, count;
70 char part_str[40], buf[0x40];
71 void *tracebuffer;
72 char *trace;
73 u32 *trace1;
74
75 /* allocate system memory to copy pmu trace buffer */
76 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
77 if (tracebuffer == NULL) {
78 return;
79 }
80
81 /* read pmu traces into system memory buffer */
82 nvgpu_mem_rd_n(g, &pmu->trace_buf, 0, tracebuffer,
83 GK20A_PMU_TRACE_BUFSIZE);
84
85 trace = (char *)tracebuffer;
86 trace1 = (u32 *)tracebuffer;
87
88 nvgpu_err(g, "dump PMU trace buffer");
89 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
90 for (j = 0; j < 0x40; j++) {
91 if (trace1[(i / 4) + j]) {
92 break;
93 }
94 }
95 if (j == 0x40) {
96 break;
97 }
98 count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]);
99 l = 0;
100 m = 0;
101 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
102 if (k >= 40) {
103 break;
104 }
105 strncpy(part_str, (trace+i+20+m), k);
106 part_str[k] = '\0';
107 count += scnprintf((buf + count), 0x40, "%s0x%x",
108 part_str, trace1[(i / 4) + 1 + l]);
109 l++;
110 m += k + 2;
111 }
112
113 scnprintf((buf + count), 0x40, "%s", (trace+i+20+m));
114 nvgpu_err(g, "%s", buf);
115 }
116
117 nvgpu_kfree(g, tracebuffer);
118}
119
120u32 gk20a_pmu_get_irqdest(struct gk20a *g)
121{
122 u32 intr_dest;
123
124 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
125 intr_dest = pwr_falcon_irqdest_host_gptmr_f(0) |
126 pwr_falcon_irqdest_host_wdtmr_f(1) |
127 pwr_falcon_irqdest_host_mthd_f(0) |
128 pwr_falcon_irqdest_host_ctxsw_f(0) |
129 pwr_falcon_irqdest_host_halt_f(1) |
130 pwr_falcon_irqdest_host_exterr_f(0) |
131 pwr_falcon_irqdest_host_swgen0_f(1) |
132 pwr_falcon_irqdest_host_swgen1_f(0) |
133 pwr_falcon_irqdest_host_ext_f(0xff) |
134 pwr_falcon_irqdest_target_gptmr_f(1) |
135 pwr_falcon_irqdest_target_wdtmr_f(0) |
136 pwr_falcon_irqdest_target_mthd_f(0) |
137 pwr_falcon_irqdest_target_ctxsw_f(0) |
138 pwr_falcon_irqdest_target_halt_f(0) |
139 pwr_falcon_irqdest_target_exterr_f(0) |
140 pwr_falcon_irqdest_target_swgen0_f(0) |
141 pwr_falcon_irqdest_target_swgen1_f(0) |
142 pwr_falcon_irqdest_target_ext_f(0xff);
143
144 return intr_dest;
145}
146
147void gk20a_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable)
148{
149 struct gk20a *g = gk20a_from_pmu(pmu);
150 u32 intr_mask;
151 u32 intr_dest;
152
153 nvgpu_log_fn(g, " ");
154
155 g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_DISABLE, true,
156 mc_intr_mask_0_pmu_enabled_f());
157 g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_DISABLE, false,
158 mc_intr_mask_1_pmu_enabled_f());
159
160 nvgpu_flcn_set_irq(pmu->flcn, false, 0x0, 0x0);
161
162 if (enable) {
163 intr_dest = g->ops.pmu.get_irqdest(g);
164 /* 0=disable, 1=enable */
165 intr_mask = pwr_falcon_irqmset_gptmr_f(1) |
166 pwr_falcon_irqmset_wdtmr_f(1) |
167 pwr_falcon_irqmset_mthd_f(0) |
168 pwr_falcon_irqmset_ctxsw_f(0) |
169 pwr_falcon_irqmset_halt_f(1) |
170 pwr_falcon_irqmset_exterr_f(1) |
171 pwr_falcon_irqmset_swgen0_f(1) |
172 pwr_falcon_irqmset_swgen1_f(1);
173
174 nvgpu_flcn_set_irq(pmu->flcn, true, intr_mask, intr_dest);
175
176 g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_ENABLE, true,
177 mc_intr_mask_0_pmu_enabled_f());
178 }
179
180 nvgpu_log_fn(g, "done");
181}
182
183
184
185int pmu_bootstrap(struct nvgpu_pmu *pmu)
186{
187 struct gk20a *g = gk20a_from_pmu(pmu);
188 struct mm_gk20a *mm = &g->mm;
189 struct pmu_ucode_desc *desc = pmu->desc;
190 u64 addr_code, addr_data, addr_load;
191 u32 i, blocks, addr_args;
192
193 nvgpu_log_fn(g, " ");
194
195 gk20a_writel(g, pwr_falcon_itfen_r(),
196 gk20a_readl(g, pwr_falcon_itfen_r()) |
197 pwr_falcon_itfen_ctxen_enable_f());
198 gk20a_writel(g, pwr_pmu_new_instblk_r(),
199 pwr_pmu_new_instblk_ptr_f(
200 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
201 pwr_pmu_new_instblk_valid_f(1) |
202 pwr_pmu_new_instblk_target_sys_coh_f());
203
204 /* TBD: load all other surfaces */
205 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
206 pmu, GK20A_PMU_TRACE_BUFSIZE);
207 g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
208 g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
209 pmu, GK20A_PMU_DMAIDX_VIRT);
210
211 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
212 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
213
214 addr_args = (pwr_falcon_hwcfg_dmem_size_v(
215 gk20a_readl(g, pwr_falcon_hwcfg_r()))
216 << GK20A_PMU_DMEM_BLKSIZE2) -
217 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
218
219 nvgpu_flcn_copy_to_dmem(pmu->flcn, addr_args,
220 (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
221 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
222
223 gk20a_writel(g, pwr_falcon_dmemc_r(0),
224 pwr_falcon_dmemc_offs_f(0) |
225 pwr_falcon_dmemc_blk_f(0) |
226 pwr_falcon_dmemc_aincw_f(1));
227
228 addr_code = u64_lo32((pmu->ucode.gpu_va +
229 desc->app_start_offset +
230 desc->app_resident_code_offset) >> 8) ;
231 addr_data = u64_lo32((pmu->ucode.gpu_va +
232 desc->app_start_offset +
233 desc->app_resident_data_offset) >> 8);
234 addr_load = u64_lo32((pmu->ucode.gpu_va +
235 desc->bootloader_start_offset) >> 8);
236
237 gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
238 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
239 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
240 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
241 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
242 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
243 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
244 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
245 gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
246 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
247
248 g->ops.pmu.write_dmatrfbase(g,
249 addr_load - (desc->bootloader_imem_offset >> 8));
250
251 blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
252
253 for (i = 0; i < blocks; i++) {
254 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
255 desc->bootloader_imem_offset + (i << 8));
256 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
257 desc->bootloader_imem_offset + (i << 8));
258 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
259 pwr_falcon_dmatrfcmd_imem_f(1) |
260 pwr_falcon_dmatrfcmd_write_f(0) |
261 pwr_falcon_dmatrfcmd_size_f(6) |
262 pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
263 }
264
265 nvgpu_flcn_bootstrap(g->pmu.flcn, desc->bootloader_entry_point);
266
267 gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
268
269 return 0;
270}
271
272void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id)
273{
274 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(pg_engine_id),
275 PMU_PG_IDLE_THRESHOLD);
276 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(pg_engine_id),
277 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
278}
279
280int gk20a_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token)
281{
282 struct gk20a *g = gk20a_from_pmu(pmu);
283 struct pmu_mutex *mutex;
284 u32 data, owner, max_retry;
285
286 if (!pmu->initialized) {
287 return -EINVAL;
288 }
289
290 BUG_ON(!token);
291 BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
292 BUG_ON(id > pmu->mutex_cnt);
293
294 mutex = &pmu->mutex[id];
295
296 owner = pwr_pmu_mutex_value_v(
297 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
298
299 if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
300 BUG_ON(mutex->ref_cnt == 0);
301 gk20a_dbg_pmu(g, "already acquired by owner : 0x%08x", *token);
302 mutex->ref_cnt++;
303 return 0;
304 }
305
306 max_retry = 40;
307 do {
308 data = pwr_pmu_mutex_id_value_v(
309 gk20a_readl(g, pwr_pmu_mutex_id_r()));
310 if (data == pwr_pmu_mutex_id_value_init_v() ||
311 data == pwr_pmu_mutex_id_value_not_avail_v()) {
312 nvgpu_warn(g,
313 "fail to generate mutex token: val 0x%08x",
314 owner);
315 nvgpu_usleep_range(20, 40);
316 continue;
317 }
318
319 owner = data;
320 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
321 pwr_pmu_mutex_value_f(owner));
322
323 data = pwr_pmu_mutex_value_v(
324 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
325
326 if (owner == data) {
327 mutex->ref_cnt = 1;
328 gk20a_dbg_pmu(g, "mutex acquired: id=%d, token=0x%x",
329 mutex->index, *token);
330 *token = owner;
331 return 0;
332 } else {
333 nvgpu_log_info(g, "fail to acquire mutex idx=0x%08x",
334 mutex->index);
335
336 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
337 data = set_field(data,
338 pwr_pmu_mutex_id_release_value_m(),
339 pwr_pmu_mutex_id_release_value_f(owner));
340 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
341
342 nvgpu_usleep_range(20, 40);
343 continue;
344 }
345 } while (max_retry-- > 0);
346
347 return -EBUSY;
348}
349
350int gk20a_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token)
351{
352 struct gk20a *g = gk20a_from_pmu(pmu);
353 struct pmu_mutex *mutex;
354 u32 owner, data;
355
356 if (!pmu->initialized) {
357 return -EINVAL;
358 }
359
360 BUG_ON(!token);
361 BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
362 BUG_ON(id > pmu->mutex_cnt);
363
364 mutex = &pmu->mutex[id];
365
366 owner = pwr_pmu_mutex_value_v(
367 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
368
369 if (*token != owner) {
370 nvgpu_err(g, "requester 0x%08x NOT match owner 0x%08x",
371 *token, owner);
372 return -EINVAL;
373 }
374
375 if (--mutex->ref_cnt > 0) {
376 return -EBUSY;
377 }
378
379 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
380 pwr_pmu_mutex_value_initial_lock_f());
381
382 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
383 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
384 pwr_pmu_mutex_id_release_value_f(owner));
385 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
386
387 gk20a_dbg_pmu(g, "mutex released: id=%d, token=0x%x",
388 mutex->index, *token);
389
390 return 0;
391}
392
393int gk20a_pmu_queue_head(struct gk20a *g, struct nvgpu_falcon_queue *queue,
394 u32 *head, bool set)
395{
396 u32 queue_head_size = 0;
397
398 if (g->ops.pmu.pmu_get_queue_head_size) {
399 queue_head_size = g->ops.pmu.pmu_get_queue_head_size();
400 }
401
402 BUG_ON(!head || !queue_head_size);
403
404 if (PMU_IS_COMMAND_QUEUE(queue->id)) {
405
406 if (queue->index >= queue_head_size) {
407 return -EINVAL;
408 }
409
410 if (!set) {
411 *head = pwr_pmu_queue_head_address_v(
412 gk20a_readl(g,
413 g->ops.pmu.pmu_get_queue_head(queue->index)));
414 } else {
415 gk20a_writel(g,
416 g->ops.pmu.pmu_get_queue_head(queue->index),
417 pwr_pmu_queue_head_address_f(*head));
418 }
419 } else {
420 if (!set) {
421 *head = pwr_pmu_msgq_head_val_v(
422 gk20a_readl(g, pwr_pmu_msgq_head_r()));
423 } else {
424 gk20a_writel(g,
425 pwr_pmu_msgq_head_r(),
426 pwr_pmu_msgq_head_val_f(*head));
427 }
428 }
429
430 return 0;
431}
432
433int gk20a_pmu_queue_tail(struct gk20a *g, struct nvgpu_falcon_queue *queue,
434 u32 *tail, bool set)
435{
436 u32 queue_tail_size = 0;
437
438 if (g->ops.pmu.pmu_get_queue_tail_size) {
439 queue_tail_size = g->ops.pmu.pmu_get_queue_tail_size();
440 }
441
442 BUG_ON(!tail || !queue_tail_size);
443
444 if (PMU_IS_COMMAND_QUEUE(queue->id)) {
445
446 if (queue->index >= queue_tail_size) {
447 return -EINVAL;
448 }
449
450 if (!set) {
451 *tail = pwr_pmu_queue_tail_address_v(gk20a_readl(g,
452 g->ops.pmu.pmu_get_queue_tail(queue->index)));
453 } else {
454 gk20a_writel(g,
455 g->ops.pmu.pmu_get_queue_tail(queue->index),
456 pwr_pmu_queue_tail_address_f(*tail));
457 }
458
459 } else {
460 if (!set) {
461 *tail = pwr_pmu_msgq_tail_val_v(
462 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
463 } else {
464 gk20a_writel(g,
465 pwr_pmu_msgq_tail_r(),
466 pwr_pmu_msgq_tail_val_f(*tail));
467 }
468 }
469
470 return 0;
471}
472
473void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set)
474{
475 struct gk20a *g = gk20a_from_pmu(pmu);
476 u32 queue_tail_size = 0;
477
478 if (g->ops.pmu.pmu_get_queue_tail_size) {
479 queue_tail_size = g->ops.pmu.pmu_get_queue_tail_size();
480 }
481
482 BUG_ON(!tail || !queue_tail_size);
483
484 if (!set) {
485 *tail = pwr_pmu_msgq_tail_val_v(
486 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
487 } else {
488 gk20a_writel(g,
489 pwr_pmu_msgq_tail_r(),
490 pwr_pmu_msgq_tail_val_f(*tail));
491 }
492}
493
494void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr)
495{
496 gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr);
497}
498
499bool gk20a_pmu_is_engine_in_reset(struct gk20a *g)
500{
501 bool status = false;
502
503 status = g->ops.mc.is_enabled(g, NVGPU_UNIT_PWR);
504
505 return status;
506}
507
508int gk20a_pmu_engine_reset(struct gk20a *g, bool do_reset)
509{
510 u32 reset_mask = g->ops.mc.reset_mask(g, NVGPU_UNIT_PWR);
511
512 if (do_reset) {
513 g->ops.mc.enable(g, reset_mask);
514 } else {
515 g->ops.mc.disable(g, reset_mask);
516 }
517
518 return 0;
519}
520
521bool gk20a_is_pmu_supported(struct gk20a *g)
522{
523 return true;
524}
525
526u32 gk20a_pmu_pg_engines_list(struct gk20a *g)
527{
528 return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
529}
530
531u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id)
532{
533 if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) {
534 return NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING;
535 }
536
537 return 0;
538}
539
540static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
541 void *param, u32 handle, u32 status)
542{
543 struct nvgpu_pmu *pmu = param;
544 gk20a_dbg_pmu(g, "reply ZBC_TABLE_UPDATE");
545 pmu->zbc_save_done = 1;
546}
547
548void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
549{
550 struct nvgpu_pmu *pmu = &g->pmu;
551 struct pmu_cmd cmd;
552 u32 seq;
553
554 if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) {
555 return;
556 }
557
558 memset(&cmd, 0, sizeof(struct pmu_cmd));
559 cmd.hdr.unit_id = PMU_UNIT_PG;
560 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
561 cmd.cmd.zbc.cmd_type = g->pmu_ver_cmd_id_zbc_table_update;
562 cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
563
564 pmu->zbc_save_done = 0;
565
566 gk20a_dbg_pmu(g, "cmd post ZBC_TABLE_UPDATE");
567 nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
568 pmu_handle_zbc_msg, pmu, &seq, ~0);
569 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
570 &pmu->zbc_save_done, 1);
571 if (!pmu->zbc_save_done) {
572 nvgpu_err(g, "ZBC save timeout");
573 }
574}
575
576int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
577 struct nv_pmu_therm_msg *msg)
578{
579 struct gk20a *g = gk20a_from_pmu(pmu);
580
581 nvgpu_log_fn(g, " ");
582
583 switch (msg->msg_type) {
584 case NV_PMU_THERM_MSG_ID_EVENT_HW_SLOWDOWN_NOTIFICATION:
585 if (msg->hw_slct_msg.mask == BIT(NV_PMU_THERM_EVENT_THERMAL_1)) {
586 nvgpu_clk_arb_send_thermal_alarm(pmu->g);
587 } else {
588 gk20a_dbg_pmu(g, "Unwanted/Unregistered thermal event received %d",
589 msg->hw_slct_msg.mask);
590 }
591 break;
592 default:
593 gk20a_dbg_pmu(g, "unkown therm event received %d", msg->msg_type);
594 break;
595 }
596
597 return 0;
598}
599
600void gk20a_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu)
601{
602 struct gk20a *g = gk20a_from_pmu(pmu);
603
604 gk20a_dbg_pmu(g, "pwr_pmu_idle_mask_supp_r(3): 0x%08x",
605 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
606 gk20a_dbg_pmu(g, "pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
607 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
608 gk20a_dbg_pmu(g, "pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
609 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
610 gk20a_dbg_pmu(g, "pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
611 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
612 gk20a_dbg_pmu(g, "pwr_pmu_pg_intren_r(0): 0x%08x",
613 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
614
615 gk20a_dbg_pmu(g, "pwr_pmu_idle_count_r(3): 0x%08x",
616 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
617 gk20a_dbg_pmu(g, "pwr_pmu_idle_count_r(4): 0x%08x",
618 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
619 gk20a_dbg_pmu(g, "pwr_pmu_idle_count_r(7): 0x%08x",
620 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
621}
622
623void gk20a_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu)
624{
625 struct gk20a *g = gk20a_from_pmu(pmu);
626 unsigned int i;
627
628 for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) {
629 nvgpu_err(g, "pwr_pmu_mailbox_r(%d) : 0x%x",
630 i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
631 }
632
633 for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) {
634 nvgpu_err(g, "pwr_pmu_debug_r(%d) : 0x%x",
635 i, gk20a_readl(g, pwr_pmu_debug_r(i)));
636 }
637
638 i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
639 nvgpu_err(g, "pwr_pmu_bar0_error_status_r : 0x%x", i);
640 if (i != 0) {
641 nvgpu_err(g, "pwr_pmu_bar0_addr_r : 0x%x",
642 gk20a_readl(g, pwr_pmu_bar0_addr_r()));
643 nvgpu_err(g, "pwr_pmu_bar0_data_r : 0x%x",
644 gk20a_readl(g, pwr_pmu_bar0_data_r()));
645 nvgpu_err(g, "pwr_pmu_bar0_timeout_r : 0x%x",
646 gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
647 nvgpu_err(g, "pwr_pmu_bar0_ctl_r : 0x%x",
648 gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
649 }
650
651 i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
652 nvgpu_err(g, "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
653
654 i = gk20a_readl(g, pwr_falcon_exterrstat_r());
655 nvgpu_err(g, "pwr_falcon_exterrstat_r : 0x%x", i);
656 if (pwr_falcon_exterrstat_valid_v(i) ==
657 pwr_falcon_exterrstat_valid_true_v()) {
658 nvgpu_err(g, "pwr_falcon_exterraddr_r : 0x%x",
659 gk20a_readl(g, pwr_falcon_exterraddr_r()));
660 }
661
662 /* Print PMU F/W debug prints */
663 print_pmu_trace(pmu);
664}
665
666bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu)
667{
668 struct gk20a *g = gk20a_from_pmu(pmu);
669 u32 servicedpmuint;
670
671 servicedpmuint = pwr_falcon_irqstat_halt_true_f() |
672 pwr_falcon_irqstat_exterr_true_f() |
673 pwr_falcon_irqstat_swgen0_true_f();
674
675 if (gk20a_readl(g, pwr_falcon_irqstat_r()) & servicedpmuint) {
676 return true;
677 }
678
679 return false;
680}
681
682void gk20a_pmu_isr(struct gk20a *g)
683{
684 struct nvgpu_pmu *pmu = &g->pmu;
685 struct nvgpu_falcon_queue *queue;
686 u32 intr, mask;
687 bool recheck = false;
688
689 nvgpu_log_fn(g, " ");
690
691 nvgpu_mutex_acquire(&pmu->isr_mutex);
692 if (!pmu->isr_enabled) {
693 nvgpu_mutex_release(&pmu->isr_mutex);
694 return;
695 }
696
697 mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
698 gk20a_readl(g, pwr_falcon_irqdest_r());
699
700 intr = gk20a_readl(g, pwr_falcon_irqstat_r());
701
702 gk20a_dbg_pmu(g, "received falcon interrupt: 0x%08x", intr);
703
704 intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
705 if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
706 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
707 nvgpu_mutex_release(&pmu->isr_mutex);
708 return;
709 }
710
711 if (intr & pwr_falcon_irqstat_halt_true_f()) {
712 nvgpu_err(g, "pmu halt intr not implemented");
713 nvgpu_pmu_dump_falcon_stats(pmu);
714 if (gk20a_readl(g, pwr_pmu_mailbox_r
715 (PMU_MODE_MISMATCH_STATUS_MAILBOX_R)) ==
716 PMU_MODE_MISMATCH_STATUS_VAL) {
717 if (g->ops.pmu.dump_secure_fuses) {
718 g->ops.pmu.dump_secure_fuses(g);
719 }
720 }
721 }
722 if (intr & pwr_falcon_irqstat_exterr_true_f()) {
723 nvgpu_err(g,
724 "pmu exterr intr not implemented. Clearing interrupt.");
725 nvgpu_pmu_dump_falcon_stats(pmu);
726
727 gk20a_writel(g, pwr_falcon_exterrstat_r(),
728 gk20a_readl(g, pwr_falcon_exterrstat_r()) &
729 ~pwr_falcon_exterrstat_valid_m());
730 }
731
732 if (g->ops.pmu.handle_ext_irq) {
733 g->ops.pmu.handle_ext_irq(g, intr);
734 }
735
736 if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
737 nvgpu_pmu_process_message(pmu);
738 recheck = true;
739 }
740
741 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
742
743 if (recheck) {
744 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
745 if (!nvgpu_flcn_queue_is_empty(pmu->flcn, queue)) {
746 gk20a_writel(g, pwr_falcon_irqsset_r(),
747 pwr_falcon_irqsset_swgen0_set_f());
748 }
749 }
750
751 nvgpu_mutex_release(&pmu->isr_mutex);
752}
753
754void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
755{
756 u32 data;
757
758 /* use counter #3 for GR && CE2 busy cycles */
759 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
760 pwr_pmu_idle_mask_gr_enabled_f() |
761 pwr_pmu_idle_mask_ce_2_enabled_f());
762
763 /* assign same mask setting from GR ELPG to counter #3 */
764 data = gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(0));
765 gk20a_writel(g, pwr_pmu_idle_mask_1_r(3), data);
766
767 /* disable idle filtering for counters 3 and 6 */
768 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
769 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
770 pwr_pmu_idle_ctrl_filter_m(),
771 pwr_pmu_idle_ctrl_value_busy_f() |
772 pwr_pmu_idle_ctrl_filter_disabled_f());
773 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
774
775 /* use counter #6 for total cycles */
776 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
777 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
778 pwr_pmu_idle_ctrl_filter_m(),
779 pwr_pmu_idle_ctrl_value_always_f() |
780 pwr_pmu_idle_ctrl_filter_disabled_f());
781 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
782
783 /*
784 * We don't want to disturb counters #3 and #6, which are used by
785 * perfmon, so we add wiring also to counters #1 and #2 for
786 * exposing raw counter readings.
787 */
788 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
789 pwr_pmu_idle_mask_gr_enabled_f() |
790 pwr_pmu_idle_mask_ce_2_enabled_f());
791
792 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
793 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
794 pwr_pmu_idle_ctrl_filter_m(),
795 pwr_pmu_idle_ctrl_value_busy_f() |
796 pwr_pmu_idle_ctrl_filter_disabled_f());
797 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
798
799 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
800 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
801 pwr_pmu_idle_ctrl_filter_m(),
802 pwr_pmu_idle_ctrl_value_always_f() |
803 pwr_pmu_idle_ctrl_filter_disabled_f());
804 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
805
806 /*
807 * use counters 4 and 0 for perfmon to log busy cycles and total cycles
808 * counter #0 overflow sets pmu idle intr status bit
809 */
810 gk20a_writel(g, pwr_pmu_idle_intr_r(),
811 pwr_pmu_idle_intr_en_f(0));
812
813 gk20a_writel(g, pwr_pmu_idle_threshold_r(0),
814 pwr_pmu_idle_threshold_value_f(0x7FFFFFFF));
815
816 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0));
817 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
818 pwr_pmu_idle_ctrl_filter_m(),
819 pwr_pmu_idle_ctrl_value_always_f() |
820 pwr_pmu_idle_ctrl_filter_disabled_f());
821 gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data);
822
823 gk20a_writel(g, pwr_pmu_idle_mask_r(4),
824 pwr_pmu_idle_mask_gr_enabled_f() |
825 pwr_pmu_idle_mask_ce_2_enabled_f());
826
827 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4));
828 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
829 pwr_pmu_idle_ctrl_filter_m(),
830 pwr_pmu_idle_ctrl_value_busy_f() |
831 pwr_pmu_idle_ctrl_filter_disabled_f());
832 gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data);
833
834 gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1));
835 gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1));
836 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
837 pwr_pmu_idle_intr_status_intr_f(1));
838}
839
840u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
841{
842 return pwr_pmu_idle_count_value_v(
843 gk20a_readl(g, pwr_pmu_idle_count_r(counter_id)));
844}
845
846void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
847{
848 gk20a_writel(g, pwr_pmu_idle_count_r(counter_id),
849 pwr_pmu_idle_count_reset_f(1));
850}
851
852u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g)
853{
854 return pwr_pmu_idle_intr_status_intr_v(
855 gk20a_readl(g, pwr_pmu_idle_intr_status_r()));
856}
857
858void gk20a_pmu_clear_idle_intr_status(struct gk20a *g)
859{
860 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
861 pwr_pmu_idle_intr_status_intr_f(1));
862}
863
864void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
865 struct pmu_pg_stats_data *pg_stat_data)
866{
867 struct nvgpu_pmu *pmu = &g->pmu;
868 struct pmu_pg_stats stats;
869
870 nvgpu_flcn_copy_from_dmem(pmu->flcn,
871 pmu->stat_dmem_offset[pg_engine_id],
872 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
873
874 pg_stat_data->ingating_time = stats.pg_ingating_time_us;
875 pg_stat_data->ungating_time = stats.pg_ungating_time_us;
876 pg_stat_data->gating_cnt = stats.pg_gating_cnt;
877 pg_stat_data->avg_entry_latency_us = stats.pg_avg_entry_time_us;
878 pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us;
879}
diff --git a/include/gk20a/pmu_gk20a.h b/include/gk20a/pmu_gk20a.h
deleted file mode 100644
index 65ffd63..0000000
--- a/include/gk20a/pmu_gk20a.h
+++ /dev/null
@@ -1,80 +0,0 @@
1/*
2 * drivers/video/tegra/host/gk20a/pmu_gk20a.h
3 *
4 * GK20A PMU (aka. gPMU outside gk20a context)
5 *
6 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26#ifndef NVGPU_GK20A_PMU_GK20A_H
27#define NVGPU_GK20A_PMU_GK20A_H
28
29#include <nvgpu/flcnif_cmn.h>
30#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
31#include <nvgpu/pmu.h>
32
33struct nvgpu_firmware;
34
35#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe)
36
37bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu);
38void gk20a_pmu_isr(struct gk20a *g);
39
40u32 gk20a_pmu_pg_engines_list(struct gk20a *g);
41u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id);
42
43void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
44
45void gk20a_pmu_init_perfmon_counter(struct gk20a *g);
46
47void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id);
48
49int gk20a_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token);
50int gk20a_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token);
51
52int gk20a_pmu_queue_head(struct gk20a *g, struct nvgpu_falcon_queue *queue,
53 u32 *head, bool set);
54int gk20a_pmu_queue_tail(struct gk20a *g, struct nvgpu_falcon_queue *queue,
55 u32 *tail, bool set);
56void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set);
57
58u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
59void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
60
61u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g);
62void gk20a_pmu_clear_idle_intr_status(struct gk20a *g);
63
64void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
65bool gk20a_is_pmu_supported(struct gk20a *g);
66
67int pmu_bootstrap(struct nvgpu_pmu *pmu);
68
69void gk20a_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu);
70void gk20a_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu);
71
72void gk20a_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable);
73void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
74 void *param, u32 handle, u32 status);
75void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
76 struct pmu_pg_stats_data *pg_stat_data);
77bool gk20a_pmu_is_engine_in_reset(struct gk20a *g);
78int gk20a_pmu_engine_reset(struct gk20a *g, bool do_reset);
79u32 gk20a_pmu_get_irqdest(struct gk20a *g);
80#endif /*NVGPU_GK20A_PMU_GK20A_H*/
diff --git a/include/gk20a/regops_gk20a.c b/include/gk20a/regops_gk20a.c
deleted file mode 100644
index 0aec4f8..0000000
--- a/include/gk20a/regops_gk20a.c
+++ /dev/null
@@ -1,472 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a.h"
26#include "gr_gk20a.h"
27#include "dbg_gpu_gk20a.h"
28#include "regops_gk20a.h"
29
30#include <nvgpu/log.h>
31#include <nvgpu/bsearch.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/io.h>
34
35static int regop_bsearch_range_cmp(const void *pkey, const void *pelem)
36{
37 u32 key = *(u32 *)pkey;
38 struct regop_offset_range *prange = (struct regop_offset_range *)pelem;
39 if (key < prange->base) {
40 return -1;
41 } else if (prange->base <= key && key < (prange->base +
42 (prange->count * 4U))) {
43 return 0;
44 }
45 return 1;
46}
47
48static inline bool linear_search(u32 offset, const u32 *list, int size)
49{
50 int i;
51 for (i = 0; i < size; i++) {
52 if (list[i] == offset) {
53 return true;
54 }
55 }
56 return false;
57}
58
59/*
60 * In order to perform a context relative op the context has
61 * to be created already... which would imply that the
62 * context switch mechanism has already been put in place.
63 * So by the time we perform such an opertation it should always
64 * be possible to query for the appropriate context offsets, etc.
65 *
66 * But note: while the dbg_gpu bind requires the a channel fd,
67 * it doesn't require an allocated gr/compute obj at that point...
68 */
69static bool gr_context_info_available(struct gr_gk20a *gr)
70{
71 int err;
72
73 nvgpu_mutex_acquire(&gr->ctx_mutex);
74 err = !gr->ctx_vars.golden_image_initialized;
75 nvgpu_mutex_release(&gr->ctx_mutex);
76 if (err) {
77 return false;
78 }
79
80 return true;
81
82}
83
84static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
85 u32 *ctx_rd_count, u32 *ctx_wr_count,
86 struct nvgpu_dbg_reg_op *ops,
87 u32 op_count);
88
89
90int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
91 struct nvgpu_dbg_reg_op *ops,
92 u64 num_ops,
93 bool *is_current_ctx)
94{
95 int err = 0;
96 unsigned int i;
97 struct channel_gk20a *ch = NULL;
98 struct gk20a *g = dbg_s->g;
99 /*struct gr_gk20a *gr = &g->gr;*/
100 u32 data32_lo = 0, data32_hi = 0;
101 u32 ctx_rd_count = 0, ctx_wr_count = 0;
102 bool skip_read_lo, skip_read_hi;
103 bool ok;
104
105 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
106
107 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
108
109 /* For vgpu, the regops routines need to be handled in the
110 * context of the server and support for that does not exist.
111 *
112 * The two users of the regops interface are the compute driver
113 * and tools. The compute driver will work without a functional
114 * regops implementation, so we return -ENOSYS. This will allow
115 * compute apps to run with vgpu. Tools will not work in this
116 * configuration and are not required to work at this time. */
117 if (g->is_virtual) {
118 return -ENOSYS;
119 }
120
121 ok = validate_reg_ops(dbg_s,
122 &ctx_rd_count, &ctx_wr_count,
123 ops, num_ops);
124 if (!ok) {
125 nvgpu_err(g, "invalid op(s)");
126 err = -EINVAL;
127 /* each op has its own err/status */
128 goto clean_up;
129 }
130
131 /* be sure that ctx info is in place if there are ctx ops */
132 if (ctx_wr_count | ctx_rd_count) {
133 if (!gr_context_info_available(&g->gr)) {
134 nvgpu_err(g, "gr context data not available");
135 return -ENODEV;
136 }
137 }
138
139 for (i = 0; i < num_ops; i++) {
140 /* if it isn't global then it is done in the ctx ops... */
141 if (ops[i].type != REGOP(TYPE_GLOBAL)) {
142 continue;
143 }
144
145 switch (ops[i].op) {
146
147 case REGOP(READ_32):
148 ops[i].value_hi = 0;
149 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
150 nvgpu_log(g, gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x",
151 ops[i].value_lo, ops[i].offset);
152
153 break;
154
155 case REGOP(READ_64):
156 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
157 ops[i].value_hi =
158 gk20a_readl(g, ops[i].offset + 4);
159
160 nvgpu_log(g, gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x",
161 ops[i].value_hi, ops[i].value_lo,
162 ops[i].offset);
163 break;
164
165 case REGOP(WRITE_32):
166 case REGOP(WRITE_64):
167 /* some of this appears wonky/unnecessary but
168 we've kept it for compat with existing
169 debugger code. just in case... */
170 skip_read_lo = skip_read_hi = false;
171 if (ops[i].and_n_mask_lo == ~(u32)0) {
172 data32_lo = ops[i].value_lo;
173 skip_read_lo = true;
174 }
175
176 if ((ops[i].op == REGOP(WRITE_64)) &&
177 (ops[i].and_n_mask_hi == ~(u32)0)) {
178 data32_hi = ops[i].value_hi;
179 skip_read_hi = true;
180 }
181
182 /* read first 32bits */
183 if (skip_read_lo == false) {
184 data32_lo = gk20a_readl(g, ops[i].offset);
185 data32_lo &= ~ops[i].and_n_mask_lo;
186 data32_lo |= ops[i].value_lo;
187 }
188
189 /* if desired, read second 32bits */
190 if ((ops[i].op == REGOP(WRITE_64)) &&
191 !skip_read_hi) {
192 data32_hi = gk20a_readl(g, ops[i].offset + 4);
193 data32_hi &= ~ops[i].and_n_mask_hi;
194 data32_hi |= ops[i].value_hi;
195 }
196
197 /* now update first 32bits */
198 gk20a_writel(g, ops[i].offset, data32_lo);
199 nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
200 data32_lo, ops[i].offset);
201 /* if desired, update second 32bits */
202 if (ops[i].op == REGOP(WRITE_64)) {
203 gk20a_writel(g, ops[i].offset + 4, data32_hi);
204 nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
205 data32_hi, ops[i].offset + 4);
206
207 }
208
209
210 break;
211
212 /* shouldn't happen as we've already screened */
213 default:
214 BUG();
215 err = -EINVAL;
216 goto clean_up;
217 break;
218 }
219 }
220
221 if (ctx_wr_count | ctx_rd_count) {
222 err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops,
223 ctx_wr_count, ctx_rd_count,
224 is_current_ctx);
225 if (err) {
226 nvgpu_warn(g, "failed to perform ctx ops\n");
227 goto clean_up;
228 }
229 }
230
231 clean_up:
232 nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
233 return err;
234
235}
236
237
238static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s,
239 struct nvgpu_dbg_reg_op *op)
240{
241 int err = 0;
242
243 op->status = REGOP(STATUS_SUCCESS);
244
245 switch (op->op) {
246 case REGOP(READ_32):
247 case REGOP(READ_64):
248 case REGOP(WRITE_32):
249 case REGOP(WRITE_64):
250 break;
251 default:
252 op->status |= REGOP(STATUS_UNSUPPORTED_OP);
253 err = -EINVAL;
254 break;
255 }
256
257 switch (op->type) {
258 case REGOP(TYPE_GLOBAL):
259 case REGOP(TYPE_GR_CTX):
260 case REGOP(TYPE_GR_CTX_TPC):
261 case REGOP(TYPE_GR_CTX_SM):
262 case REGOP(TYPE_GR_CTX_CROP):
263 case REGOP(TYPE_GR_CTX_ZROP):
264 case REGOP(TYPE_GR_CTX_QUAD):
265 break;
266 /*
267 case NVGPU_DBG_GPU_REG_OP_TYPE_FB:
268 */
269 default:
270 op->status |= REGOP(STATUS_INVALID_TYPE);
271 err = -EINVAL;
272 break;
273 }
274
275 return err;
276}
277
278static bool check_whitelists(struct dbg_session_gk20a *dbg_s,
279 struct nvgpu_dbg_reg_op *op, u32 offset)
280{
281 struct gk20a *g = dbg_s->g;
282 bool valid = false;
283 struct channel_gk20a *ch;
284
285 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
286
287 if (op->type == REGOP(TYPE_GLOBAL)) {
288 /* search global list */
289 valid = g->ops.regops.get_global_whitelist_ranges &&
290 !!bsearch(&offset,
291 g->ops.regops.get_global_whitelist_ranges(),
292 g->ops.regops.get_global_whitelist_ranges_count(),
293 sizeof(*g->ops.regops.get_global_whitelist_ranges()),
294 regop_bsearch_range_cmp);
295
296 /* if debug session and channel is bound search context list */
297 if ((!valid) && (!dbg_s->is_profiler && ch)) {
298 /* binary search context list */
299 valid = g->ops.regops.get_context_whitelist_ranges &&
300 !!bsearch(&offset,
301 g->ops.regops.get_context_whitelist_ranges(),
302 g->ops.regops.get_context_whitelist_ranges_count(),
303 sizeof(*g->ops.regops.get_context_whitelist_ranges()),
304 regop_bsearch_range_cmp);
305 }
306
307 /* if debug session and channel is bound search runcontrol list */
308 if ((!valid) && (!dbg_s->is_profiler && ch)) {
309 valid = g->ops.regops.get_runcontrol_whitelist &&
310 linear_search(offset,
311 g->ops.regops.get_runcontrol_whitelist(),
312 g->ops.regops.get_runcontrol_whitelist_count());
313 }
314 } else if (op->type == REGOP(TYPE_GR_CTX)) {
315 /* it's a context-relative op */
316 if (!ch) {
317 nvgpu_err(dbg_s->g, "can't perform ctx regop unless bound");
318 op->status = REGOP(STATUS_UNSUPPORTED_OP);
319 return valid;
320 }
321
322 /* binary search context list */
323 valid = g->ops.regops.get_context_whitelist_ranges &&
324 !!bsearch(&offset,
325 g->ops.regops.get_context_whitelist_ranges(),
326 g->ops.regops.get_context_whitelist_ranges_count(),
327 sizeof(*g->ops.regops.get_context_whitelist_ranges()),
328 regop_bsearch_range_cmp);
329
330 /* if debug session and channel is bound search runcontrol list */
331 if ((!valid) && (!dbg_s->is_profiler && ch)) {
332 valid = g->ops.regops.get_runcontrol_whitelist &&
333 linear_search(offset,
334 g->ops.regops.get_runcontrol_whitelist(),
335 g->ops.regops.get_runcontrol_whitelist_count());
336 }
337
338 } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) {
339 valid = g->ops.regops.get_qctl_whitelist &&
340 linear_search(offset,
341 g->ops.regops.get_qctl_whitelist(),
342 g->ops.regops.get_qctl_whitelist_count());
343 }
344
345 return valid;
346}
347
348/* note: the op here has already been through validate_reg_op_info */
349static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
350 struct nvgpu_dbg_reg_op *op)
351{
352 int err;
353 u32 buf_offset_lo, buf_offset_addr, num_offsets, offset;
354 bool valid = false;
355
356 op->status = 0;
357 offset = op->offset;
358
359 /* support only 24-bit 4-byte aligned offsets */
360 if (offset & 0xFF000003) {
361 nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset);
362 op->status |= REGOP(STATUS_INVALID_OFFSET);
363 return -EINVAL;
364 }
365
366 valid = check_whitelists(dbg_s, op, offset);
367 if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
368 valid = check_whitelists(dbg_s, op, offset + 4);
369 }
370
371 if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
372 err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
373 op->offset,
374 1,
375 &buf_offset_lo,
376 &buf_offset_addr,
377 &num_offsets,
378 op->type == REGOP(TYPE_GR_CTX_QUAD),
379 op->quad);
380 if (err) {
381 err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g,
382 op->offset,
383 1,
384 &buf_offset_lo,
385 &buf_offset_addr,
386 &num_offsets);
387
388 if (err) {
389 op->status |= REGOP(STATUS_INVALID_OFFSET);
390 return -EINVAL;
391 }
392 }
393 if (!num_offsets) {
394 op->status |= REGOP(STATUS_INVALID_OFFSET);
395 return -EINVAL;
396 }
397 }
398
399 if (!valid) {
400 nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset);
401 op->status |= REGOP(STATUS_INVALID_OFFSET);
402 return -EINVAL;
403 }
404
405 return 0;
406}
407
408static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
409 u32 *ctx_rd_count, u32 *ctx_wr_count,
410 struct nvgpu_dbg_reg_op *ops,
411 u32 op_count)
412{
413 u32 i;
414 bool ok = true;
415 struct gk20a *g = dbg_s->g;
416
417 /* keep going until the end so every op can get
418 * a separate error code if needed */
419 for (i = 0; i < op_count; i++) {
420
421 if (validate_reg_op_info(dbg_s, &ops[i]) != 0) {
422 ok = false;
423 }
424
425 if (reg_op_is_gr_ctx(ops[i].type)) {
426 if (reg_op_is_read(ops[i].op)) {
427 (*ctx_rd_count)++;
428 } else {
429 (*ctx_wr_count)++;
430 }
431 }
432
433 /* if "allow_all" flag enabled, dont validate offset */
434 if (!g->allow_all) {
435 if (validate_reg_op_offset(dbg_s, &ops[i]) != 0) {
436 ok = false;
437 }
438 }
439 }
440
441 nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
442 *ctx_wr_count, *ctx_rd_count);
443
444 return ok;
445}
446
447/* exported for tools like cyclestats, etc */
448bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset)
449{
450 bool valid = !!bsearch(&offset,
451 g->ops.regops.get_global_whitelist_ranges(),
452 g->ops.regops.get_global_whitelist_ranges_count(),
453 sizeof(*g->ops.regops.get_global_whitelist_ranges()),
454 regop_bsearch_range_cmp);
455 return valid;
456}
457
458bool reg_op_is_gr_ctx(u8 type)
459{
460 return type == REGOP(TYPE_GR_CTX) ||
461 type == REGOP(TYPE_GR_CTX_TPC) ||
462 type == REGOP(TYPE_GR_CTX_SM) ||
463 type == REGOP(TYPE_GR_CTX_CROP) ||
464 type == REGOP(TYPE_GR_CTX_ZROP) ||
465 type == REGOP(TYPE_GR_CTX_QUAD);
466}
467
468bool reg_op_is_read(u8 op)
469{
470 return op == REGOP(READ_32) ||
471 op == REGOP(READ_64);
472}
diff --git a/include/gk20a/regops_gk20a.h b/include/gk20a/regops_gk20a.h
deleted file mode 100644
index 9670587..0000000
--- a/include/gk20a/regops_gk20a.h
+++ /dev/null
@@ -1,90 +0,0 @@
1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef REGOPS_GK20A_H
25#define REGOPS_GK20A_H
26
27/*
28 * Register operations
29 * All operations are targeted towards first channel
30 * attached to debug session
31 */
32/* valid op values */
33#define NVGPU_DBG_REG_OP_READ_32 (0x00000000)
34#define NVGPU_DBG_REG_OP_WRITE_32 (0x00000001)
35#define NVGPU_DBG_REG_OP_READ_64 (0x00000002)
36#define NVGPU_DBG_REG_OP_WRITE_64 (0x00000003)
37/* note: 8b ops are unsupported */
38#define NVGPU_DBG_REG_OP_READ_08 (0x00000004)
39#define NVGPU_DBG_REG_OP_WRITE_08 (0x00000005)
40
41/* valid type values */
42#define NVGPU_DBG_REG_OP_TYPE_GLOBAL (0x00000000)
43#define NVGPU_DBG_REG_OP_TYPE_GR_CTX (0x00000001)
44#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_TPC (0x00000002)
45#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_SM (0x00000004)
46#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_CROP (0x00000008)
47#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_ZROP (0x00000010)
48/*#define NVGPU_DBG_REG_OP_TYPE_FB (0x00000020)*/
49#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_QUAD (0x00000040)
50
51/* valid status values */
52#define NVGPU_DBG_REG_OP_STATUS_SUCCESS (0x00000000)
53#define NVGPU_DBG_REG_OP_STATUS_INVALID_OP (0x00000001)
54#define NVGPU_DBG_REG_OP_STATUS_INVALID_TYPE (0x00000002)
55#define NVGPU_DBG_REG_OP_STATUS_INVALID_OFFSET (0x00000004)
56#define NVGPU_DBG_REG_OP_STATUS_UNSUPPORTED_OP (0x00000008)
57#define NVGPU_DBG_REG_OP_STATUS_INVALID_MASK (0x00000010)
58
59struct nvgpu_dbg_reg_op {
60 u8 op;
61 u8 type;
62 u8 status;
63 u8 quad;
64 u32 group_mask;
65 u32 sub_group_mask;
66 u32 offset;
67 u32 value_lo;
68 u32 value_hi;
69 u32 and_n_mask_lo;
70 u32 and_n_mask_hi;
71};
72
73struct regop_offset_range {
74 u32 base:24;
75 u32 count:8;
76};
77
78int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
79 struct nvgpu_dbg_reg_op *ops,
80 u64 num_ops,
81 bool *is_current_ctx);
82
83/* turn seriously unwieldy names -> something shorter */
84#define REGOP(x) NVGPU_DBG_REG_OP_##x
85
86bool reg_op_is_gr_ctx(u8 type);
87bool reg_op_is_read(u8 op);
88bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset);
89
90#endif /* REGOPS_GK20A_H */