aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
commit01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch)
tree4ef34501728a087be24f4ba0af90f91486bf780b /include/gk20a/fifo_gk20a.c
parent306a03d18b305e4e573be3b2931978fa10679eb9 (diff)
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/gk20a/fifo_gk20a.c')
-rw-r--r--include/gk20a/fifo_gk20a.c4649
1 files changed, 4649 insertions, 0 deletions
diff --git a/include/gk20a/fifo_gk20a.c b/include/gk20a/fifo_gk20a.c
new file mode 100644
index 0000000..4477f7c
--- /dev/null
+++ b/include/gk20a/fifo_gk20a.c
@@ -0,0 +1,4649 @@
1/*
2 * GK20A Graphics FIFO (gr host)
3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26
27#include <nvgpu/mm.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/timers.h>
30#include <nvgpu/semaphore.h>
31#include <nvgpu/enabled.h>
32#include <nvgpu/kmem.h>
33#include <nvgpu/log.h>
34#include <nvgpu/soc.h>
35#include <nvgpu/atomic.h>
36#include <nvgpu/bug.h>
37#include <nvgpu/log2.h>
38#include <nvgpu/debug.h>
39#include <nvgpu/nvhost.h>
40#include <nvgpu/barrier.h>
41#include <nvgpu/ctxsw_trace.h>
42#include <nvgpu/error_notifier.h>
43#include <nvgpu/ptimer.h>
44#include <nvgpu/io.h>
45#include <nvgpu/utils.h>
46#include <nvgpu/channel.h>
47#include <nvgpu/unit.h>
48#include <nvgpu/power_features/power_features.h>
49#include <nvgpu/power_features/cg.h>
50
51#include "gk20a.h"
52#include "mm_gk20a.h"
53
54#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
55#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
56#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
57#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
58#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
59#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
60
61#include <os/linux/os_linux.h>
62
63#define FECS_METHOD_WFI_RESTORE 0x80000
64#define FECS_MAILBOX_0_ACK_RESTORE 0x4
65
66
67static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
68
69static const char *const pbdma_intr_fault_type_desc[] = {
70 "MEMREQ timeout", "MEMACK_TIMEOUT", "MEMACK_EXTRA acks",
71 "MEMDAT_TIMEOUT", "MEMDAT_EXTRA acks", "MEMFLUSH noack",
72 "MEMOP noack", "LBCONNECT noack", "NONE - was LBREQ",
73 "LBACK_TIMEOUT", "LBACK_EXTRA acks", "LBDAT_TIMEOUT",
74 "LBDAT_EXTRA acks", "GPFIFO won't fit", "GPPTR invalid",
75 "GPENTRY invalid", "GPCRC mismatch", "PBPTR get>put",
76 "PBENTRY invld", "PBCRC mismatch", "NONE - was XBARC",
77 "METHOD invld", "METHODCRC mismat", "DEVICE sw method",
78 "[ENGINE]", "SEMAPHORE invlid", "ACQUIRE timeout",
79 "PRI forbidden", "ILLEGAL SYNCPT", "[NO_CTXSW_SEG]",
80 "PBSEG badsplit", "SIGNATURE bad"
81};
82
83u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
84 u32 engine_id[], u32 engine_id_sz,
85 u32 engine_enum)
86{
87 struct fifo_gk20a *f = NULL;
88 u32 instance_cnt = 0;
89 u32 engine_id_idx;
90 u32 active_engine_id = 0;
91 struct fifo_engine_info_gk20a *info = NULL;
92
93 if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) {
94 f = &g->fifo;
95 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
96 active_engine_id = f->active_engines_list[engine_id_idx];
97 info = &f->engine_info[active_engine_id];
98
99 if (info->engine_enum == engine_enum) {
100 if (instance_cnt < engine_id_sz) {
101 engine_id[instance_cnt] = active_engine_id;
102 ++instance_cnt;
103 } else {
104 nvgpu_log_info(g, "warning engine_id table sz is small %d",
105 engine_id_sz);
106 }
107 }
108 }
109 }
110 return instance_cnt;
111}
112
113struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id)
114{
115 struct fifo_gk20a *f = NULL;
116 u32 engine_id_idx;
117 struct fifo_engine_info_gk20a *info = NULL;
118
119 if (!g) {
120 return info;
121 }
122
123 f = &g->fifo;
124
125 if (engine_id < f->max_engines) {
126 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
127 if (engine_id == f->active_engines_list[engine_id_idx]) {
128 info = &f->engine_info[engine_id];
129 break;
130 }
131 }
132 }
133
134 if (!info) {
135 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
136 }
137
138 return info;
139}
140
141bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id)
142{
143 struct fifo_gk20a *f = NULL;
144 u32 engine_id_idx;
145 bool valid = false;
146
147 if (!g) {
148 return valid;
149 }
150
151 f = &g->fifo;
152
153 if (engine_id < f->max_engines) {
154 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
155 if (engine_id == f->active_engines_list[engine_id_idx]) {
156 valid = true;
157 break;
158 }
159 }
160 }
161
162 if (!valid) {
163 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
164 }
165
166 return valid;
167}
168
169u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g)
170{
171 u32 gr_engine_cnt = 0;
172 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
173
174 /* Consider 1st available GR engine */
175 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
176 1, ENGINE_GR_GK20A);
177
178 if (!gr_engine_cnt) {
179 nvgpu_err(g, "No GR engine available on this device!");
180 }
181
182 return gr_engine_id;
183}
184
185u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
186{
187 u32 reset_mask = 0;
188 u32 engine_enum = ENGINE_INVAL_GK20A;
189 struct fifo_gk20a *f = NULL;
190 u32 engine_id_idx;
191 struct fifo_engine_info_gk20a *engine_info;
192 u32 active_engine_id = 0;
193
194 if (!g) {
195 return reset_mask;
196 }
197
198 f = &g->fifo;
199
200 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
201 active_engine_id = f->active_engines_list[engine_id_idx];
202 engine_info = &f->engine_info[active_engine_id];
203 engine_enum = engine_info->engine_enum;
204
205 if ((engine_enum == ENGINE_GRCE_GK20A) ||
206 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
207 reset_mask |= engine_info->reset_mask;
208 }
209 }
210
211 return reset_mask;
212}
213
214u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g)
215{
216 u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g);
217 u32 engine_enum = ENGINE_INVAL_GK20A;
218 struct fifo_gk20a *f = NULL;
219 u32 engine_id_idx;
220 struct fifo_engine_info_gk20a *engine_info;
221 u32 active_engine_id = 0;
222
223 if (!g) {
224 return ce_runlist_id;
225 }
226
227 f = &g->fifo;
228
229 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
230 active_engine_id = f->active_engines_list[engine_id_idx];
231 engine_info = &f->engine_info[active_engine_id];
232 engine_enum = engine_info->engine_enum;
233
234 /* selecet last available ASYNC_CE if available */
235 if (engine_enum == ENGINE_ASYNC_CE_GK20A) {
236 ce_runlist_id = engine_info->runlist_id;
237 }
238 }
239
240 return ce_runlist_id;
241}
242
243u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
244{
245 u32 gr_engine_cnt = 0;
246 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
247 struct fifo_engine_info_gk20a *engine_info;
248 u32 gr_runlist_id = ~0;
249
250 /* Consider 1st available GR engine */
251 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
252 1, ENGINE_GR_GK20A);
253
254 if (!gr_engine_cnt) {
255 nvgpu_err(g,
256 "No GR engine available on this device!");
257 goto end;
258 }
259
260 engine_info = gk20a_fifo_get_engine_info(g, gr_engine_id);
261
262 if (engine_info) {
263 gr_runlist_id = engine_info->runlist_id;
264 } else {
265 nvgpu_err(g,
266 "gr_engine_id is not in active list/invalid %d", gr_engine_id);
267 }
268
269end:
270 return gr_runlist_id;
271}
272
273bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
274{
275 struct fifo_gk20a *f = NULL;
276 u32 engine_id_idx;
277 u32 active_engine_id;
278 struct fifo_engine_info_gk20a *engine_info;
279
280 if (!g) {
281 return false;
282 }
283
284 f = &g->fifo;
285
286 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
287 active_engine_id = f->active_engines_list[engine_id_idx];
288 engine_info = gk20a_fifo_get_engine_info(g, active_engine_id);
289 if (engine_info && (engine_info->runlist_id == runlist_id)) {
290 return true;
291 }
292 }
293
294 return false;
295}
296
297/*
298 * Link engine IDs to MMU IDs and vice versa.
299 */
300
301static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
302{
303 u32 fault_id = FIFO_INVAL_ENGINE_ID;
304 struct fifo_engine_info_gk20a *engine_info;
305
306 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
307
308 if (engine_info) {
309 fault_id = engine_info->fault_id;
310 } else {
311 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
312 }
313 return fault_id;
314}
315
316static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
317{
318 u32 engine_id;
319 u32 active_engine_id;
320 struct fifo_engine_info_gk20a *engine_info;
321 struct fifo_gk20a *f = &g->fifo;
322
323 for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
324 active_engine_id = f->active_engines_list[engine_id];
325 engine_info = &g->fifo.engine_info[active_engine_id];
326
327 if (engine_info->fault_id == fault_id) {
328 break;
329 }
330 active_engine_id = FIFO_INVAL_ENGINE_ID;
331 }
332 return active_engine_id;
333}
334
335int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
336 u32 *inst_id)
337{
338 int ret = ENGINE_INVAL_GK20A;
339
340 nvgpu_log_info(g, "engine type %d", engine_type);
341 if (engine_type == top_device_info_type_enum_graphics_v()) {
342 ret = ENGINE_GR_GK20A;
343 } else if ((engine_type >= top_device_info_type_enum_copy0_v()) &&
344 (engine_type <= top_device_info_type_enum_copy2_v())) {
345 /* Lets consider all the CE engine have separate runlist at this point
346 * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id
347 * comparsion logic with GR runlist_id in init_engine_info() */
348 ret = ENGINE_ASYNC_CE_GK20A;
349 /* inst_id starts from CE0 to CE2 */
350 if (inst_id) {
351 *inst_id = (engine_type - top_device_info_type_enum_copy0_v());
352 }
353 }
354
355 return ret;
356}
357
358int gk20a_fifo_init_engine_info(struct fifo_gk20a *f)
359{
360 struct gk20a *g = f->g;
361 u32 i;
362 u32 max_info_entries = top_device_info__size_1_v();
363 u32 engine_enum = ENGINE_INVAL_GK20A;
364 u32 engine_id = FIFO_INVAL_ENGINE_ID;
365 u32 runlist_id = ~0;
366 u32 pbdma_id = ~0;
367 u32 intr_id = ~0;
368 u32 reset_id = ~0;
369 u32 inst_id = 0;
370 u32 pri_base = 0;
371 u32 fault_id = 0;
372 u32 gr_runlist_id = ~0;
373 bool found_pbdma_for_runlist = false;
374
375 nvgpu_log_fn(g, " ");
376
377 f->num_engines = 0;
378
379 for (i = 0; i < max_info_entries; i++) {
380 u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
381 u32 entry = top_device_info_entry_v(table_entry);
382 u32 runlist_bit;
383
384 if (entry == top_device_info_entry_enum_v()) {
385 if (top_device_info_engine_v(table_entry)) {
386 engine_id =
387 top_device_info_engine_enum_v(table_entry);
388 nvgpu_log_info(g, "info: engine_id %d",
389 top_device_info_engine_enum_v(table_entry));
390 }
391
392
393 if (top_device_info_runlist_v(table_entry)) {
394 runlist_id =
395 top_device_info_runlist_enum_v(table_entry);
396 nvgpu_log_info(g, "gr info: runlist_id %d", runlist_id);
397
398 runlist_bit = BIT(runlist_id);
399
400 found_pbdma_for_runlist = false;
401 for (pbdma_id = 0; pbdma_id < f->num_pbdma;
402 pbdma_id++) {
403 if (f->pbdma_map[pbdma_id] &
404 runlist_bit) {
405 nvgpu_log_info(g,
406 "gr info: pbdma_map[%d]=%d",
407 pbdma_id,
408 f->pbdma_map[pbdma_id]);
409 found_pbdma_for_runlist = true;
410 break;
411 }
412 }
413
414 if (!found_pbdma_for_runlist) {
415 nvgpu_err(g, "busted pbdma map");
416 return -EINVAL;
417 }
418 }
419
420 if (top_device_info_intr_v(table_entry)) {
421 intr_id =
422 top_device_info_intr_enum_v(table_entry);
423 nvgpu_log_info(g, "gr info: intr_id %d", intr_id);
424 }
425
426 if (top_device_info_reset_v(table_entry)) {
427 reset_id =
428 top_device_info_reset_enum_v(table_entry);
429 nvgpu_log_info(g, "gr info: reset_id %d",
430 reset_id);
431 }
432 } else if (entry == top_device_info_entry_engine_type_v()) {
433 u32 engine_type =
434 top_device_info_type_enum_v(table_entry);
435 engine_enum =
436 g->ops.fifo.engine_enum_from_type(g,
437 engine_type, &inst_id);
438 } else if (entry == top_device_info_entry_data_v()) {
439 /* gk20a doesn't support device_info_data packet parsing */
440 if (g->ops.fifo.device_info_data_parse) {
441 g->ops.fifo.device_info_data_parse(g,
442 table_entry, &inst_id, &pri_base,
443 &fault_id);
444 }
445 }
446
447 if (!top_device_info_chain_v(table_entry)) {
448 if (engine_enum < ENGINE_INVAL_GK20A) {
449 struct fifo_engine_info_gk20a *info =
450 &g->fifo.engine_info[engine_id];
451
452 info->intr_mask |= BIT(intr_id);
453 info->reset_mask |= BIT(reset_id);
454 info->runlist_id = runlist_id;
455 info->pbdma_id = pbdma_id;
456 info->inst_id = inst_id;
457 info->pri_base = pri_base;
458
459 if (engine_enum == ENGINE_GR_GK20A) {
460 gr_runlist_id = runlist_id;
461 }
462
463 /* GR and GR_COPY shares same runlist_id */
464 if ((engine_enum == ENGINE_ASYNC_CE_GK20A) &&
465 (gr_runlist_id == runlist_id)) {
466 engine_enum = ENGINE_GRCE_GK20A;
467 }
468
469 info->engine_enum = engine_enum;
470
471 if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A)) {
472 fault_id = 0x1b;
473 }
474 info->fault_id = fault_id;
475
476 /* engine_id starts from 0 to NV_HOST_NUM_ENGINES */
477 f->active_engines_list[f->num_engines] = engine_id;
478
479 ++f->num_engines;
480
481 engine_enum = ENGINE_INVAL_GK20A;
482 }
483 }
484 }
485
486 return 0;
487}
488
489u32 gk20a_fifo_act_eng_interrupt_mask(struct gk20a *g, u32 act_eng_id)
490{
491 struct fifo_engine_info_gk20a *engine_info = NULL;
492
493 engine_info = gk20a_fifo_get_engine_info(g, act_eng_id);
494 if (engine_info) {
495 return engine_info->intr_mask;
496 }
497
498 return 0;
499}
500
501u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
502{
503 u32 eng_intr_mask = 0;
504 unsigned int i;
505 u32 active_engine_id = 0;
506 u32 engine_enum = ENGINE_INVAL_GK20A;
507
508 for (i = 0; i < g->fifo.num_engines; i++) {
509 u32 intr_mask;
510 active_engine_id = g->fifo.active_engines_list[i];
511 intr_mask = g->fifo.engine_info[active_engine_id].intr_mask;
512 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
513 if (((engine_enum == ENGINE_GRCE_GK20A) ||
514 (engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
515 (!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall)) {
516 continue;
517 }
518
519 eng_intr_mask |= intr_mask;
520 }
521
522 return eng_intr_mask;
523}
524
525void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
526{
527 u32 i;
528 u32 runlist_id;
529 struct fifo_runlist_info_gk20a *runlist;
530 struct gk20a *g = NULL;
531
532 if (!f || !f->runlist_info) {
533 return;
534 }
535
536 g = f->g;
537
538 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
539 runlist = &f->runlist_info[runlist_id];
540 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
541 nvgpu_dma_free(g, &runlist->mem[i]);
542 }
543
544 nvgpu_kfree(g, runlist->active_channels);
545 runlist->active_channels = NULL;
546
547 nvgpu_kfree(g, runlist->active_tsgs);
548 runlist->active_tsgs = NULL;
549
550 nvgpu_mutex_destroy(&runlist->runlist_lock);
551
552 }
553 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
554 f->max_runlists));
555
556 nvgpu_kfree(g, f->runlist_info);
557 f->runlist_info = NULL;
558 f->max_runlists = 0;
559}
560
561static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
562{
563 struct gk20a *g = f->g;
564 unsigned int i = 0;
565
566 nvgpu_log_fn(g, " ");
567
568 nvgpu_channel_worker_deinit(g);
569 /*
570 * Make sure all channels are closed before deleting them.
571 */
572 for (; i < f->num_channels; i++) {
573 struct channel_gk20a *c = f->channel + i;
574 struct tsg_gk20a *tsg = f->tsg + i;
575
576 /*
577 * Could race but worst that happens is we get an error message
578 * from gk20a_free_channel() complaining about multiple closes.
579 */
580 if (c->referenceable) {
581 __gk20a_channel_kill(c);
582 }
583
584 nvgpu_mutex_destroy(&tsg->event_id_list_lock);
585
586 nvgpu_mutex_destroy(&c->ioctl_lock);
587 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
588 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
589 nvgpu_mutex_destroy(&c->sync_lock);
590#if defined(CONFIG_GK20A_CYCLE_STATS)
591 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
592 nvgpu_mutex_destroy(&c->cs_client_mutex);
593#endif
594 nvgpu_mutex_destroy(&c->dbg_s_lock);
595
596 }
597
598 nvgpu_vfree(g, f->channel);
599 nvgpu_vfree(g, f->tsg);
600 if (g->ops.mm.is_bar1_supported(g)) {
601 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
602 } else {
603 nvgpu_dma_free(g, &f->userd);
604 }
605
606 gk20a_fifo_delete_runlist(f);
607
608 nvgpu_kfree(g, f->pbdma_map);
609 f->pbdma_map = NULL;
610 nvgpu_kfree(g, f->engine_info);
611 f->engine_info = NULL;
612 nvgpu_kfree(g, f->active_engines_list);
613 f->active_engines_list = NULL;
614}
615
616/* reads info from hardware and fills in pbmda exception info record */
617static inline void get_exception_pbdma_info(
618 struct gk20a *g,
619 struct fifo_engine_info_gk20a *eng_info)
620{
621 struct fifo_pbdma_exception_info_gk20a *e =
622 &eng_info->pbdma_exception_info;
623
624 u32 pbdma_status_r = e->status_r = gk20a_readl(g,
625 fifo_pbdma_status_r(eng_info->pbdma_id));
626 e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
627 e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
628 fifo_pbdma_status_id_type_chid_v();
629 e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r);
630 e->next_id_is_chid =
631 fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
632 fifo_pbdma_status_next_id_type_chid_v();
633 e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
634 e->chsw_in_progress =
635 fifo_pbdma_status_chsw_v(pbdma_status_r) ==
636 fifo_pbdma_status_chsw_in_progress_v();
637}
638
639static void fifo_pbdma_exception_status(struct gk20a *g,
640 struct fifo_engine_info_gk20a *eng_info)
641{
642 struct fifo_pbdma_exception_info_gk20a *e;
643 get_exception_pbdma_info(g, eng_info);
644 e = &eng_info->pbdma_exception_info;
645
646 nvgpu_log_fn(g, "pbdma_id %d, "
647 "id_type %s, id %d, chan_status %d, "
648 "next_id_type %s, next_id %d, "
649 "chsw_in_progress %d",
650 eng_info->pbdma_id,
651 e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
652 e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
653 e->chsw_in_progress);
654}
655
656/* reads info from hardware and fills in pbmda exception info record */
657static inline void get_exception_engine_info(
658 struct gk20a *g,
659 struct fifo_engine_info_gk20a *eng_info)
660{
661 struct fifo_engine_exception_info_gk20a *e =
662 &eng_info->engine_exception_info;
663 u32 engine_status_r = e->status_r =
664 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
665 e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
666 e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
667 fifo_engine_status_id_type_chid_v();
668 e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
669 e->faulted =
670 fifo_engine_status_faulted_v(engine_status_r) ==
671 fifo_engine_status_faulted_true_v();
672 e->idle =
673 fifo_engine_status_engine_v(engine_status_r) ==
674 fifo_engine_status_engine_idle_v();
675 e->ctxsw_in_progress =
676 fifo_engine_status_ctxsw_v(engine_status_r) ==
677 fifo_engine_status_ctxsw_in_progress_v();
678}
679
680static void fifo_engine_exception_status(struct gk20a *g,
681 struct fifo_engine_info_gk20a *eng_info)
682{
683 struct fifo_engine_exception_info_gk20a *e;
684 get_exception_engine_info(g, eng_info);
685 e = &eng_info->engine_exception_info;
686
687 nvgpu_log_fn(g, "engine_id %d, id_type %s, id %d, ctx_status %d, "
688 "faulted %d, idle %d, ctxsw_in_progress %d, ",
689 eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
690 e->id, e->ctx_status_v,
691 e->faulted, e->idle, e->ctxsw_in_progress);
692}
693
694static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
695{
696 struct fifo_runlist_info_gk20a *runlist;
697 struct fifo_engine_info_gk20a *engine_info;
698 unsigned int runlist_id;
699 u32 i;
700 size_t runlist_size;
701 u32 active_engine_id, pbdma_id, engine_id;
702 int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ?
703 NVGPU_DMA_FORCE_CONTIGUOUS : 0;
704 int err = 0;
705
706 nvgpu_log_fn(g, " ");
707
708 f->max_runlists = g->ops.fifo.eng_runlist_base_size();
709 f->runlist_info = nvgpu_kzalloc(g,
710 sizeof(struct fifo_runlist_info_gk20a) *
711 f->max_runlists);
712 if (!f->runlist_info) {
713 goto clean_up_runlist;
714 }
715
716 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
717 f->max_runlists));
718
719 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
720 runlist = &f->runlist_info[runlist_id];
721
722 runlist->active_channels =
723 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
724 BITS_PER_BYTE));
725 if (!runlist->active_channels) {
726 goto clean_up_runlist;
727 }
728
729 runlist->active_tsgs =
730 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
731 BITS_PER_BYTE));
732 if (!runlist->active_tsgs) {
733 goto clean_up_runlist;
734 }
735
736 runlist_size = f->runlist_entry_size * f->num_runlist_entries;
737 nvgpu_log(g, gpu_dbg_info,
738 "runlist_entries %d runlist size %zu",
739 f->num_runlist_entries, runlist_size);
740
741 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
742 err = nvgpu_dma_alloc_flags_sys(g, flags,
743 runlist_size,
744 &runlist->mem[i]);
745 if (err) {
746 nvgpu_err(g, "memory allocation failed");
747 goto clean_up_runlist;
748 }
749 }
750
751 err = nvgpu_mutex_init(&runlist->runlist_lock);
752 if (err != 0) {
753 nvgpu_err(g,
754 "Error in runlist_lock mutex initialization");
755 goto clean_up_runlist;
756 }
757
758 /* None of buffers is pinned if this value doesn't change.
759 Otherwise, one of them (cur_buffer) must have been pinned. */
760 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
761
762 for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
763 if (f->pbdma_map[pbdma_id] & BIT(runlist_id)) {
764 runlist->pbdma_bitmask |= BIT(pbdma_id);
765 }
766 }
767 nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
768 runlist_id, runlist->pbdma_bitmask);
769
770 for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
771 active_engine_id = f->active_engines_list[engine_id];
772 engine_info = &f->engine_info[active_engine_id];
773
774 if (engine_info && engine_info->runlist_id == runlist_id) {
775 runlist->eng_bitmask |= BIT(active_engine_id);
776 }
777 }
778 nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
779 runlist_id, runlist->eng_bitmask);
780 }
781
782 nvgpu_log_fn(g, "done");
783 return 0;
784
785clean_up_runlist:
786 gk20a_fifo_delete_runlist(f);
787 nvgpu_log_fn(g, "fail");
788 return err;
789}
790
791u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g)
792{
793 u32 intr_0_error_mask =
794 fifo_intr_0_bind_error_pending_f() |
795 fifo_intr_0_sched_error_pending_f() |
796 fifo_intr_0_chsw_error_pending_f() |
797 fifo_intr_0_fb_flush_timeout_pending_f() |
798 fifo_intr_0_dropped_mmu_fault_pending_f() |
799 fifo_intr_0_mmu_fault_pending_f() |
800 fifo_intr_0_lb_error_pending_f() |
801 fifo_intr_0_pio_error_pending_f();
802
803 return intr_0_error_mask;
804}
805
806static u32 gk20a_fifo_intr_0_en_mask(struct gk20a *g)
807{
808 u32 intr_0_en_mask;
809
810 intr_0_en_mask = g->ops.fifo.intr_0_error_mask(g);
811
812 intr_0_en_mask |= fifo_intr_0_runlist_event_pending_f() |
813 fifo_intr_0_pbdma_intr_pending_f();
814
815 return intr_0_en_mask;
816}
817
818int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
819{
820 u32 intr_stall;
821 u32 mask;
822 u32 timeout;
823 unsigned int i;
824 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
825
826 nvgpu_log_fn(g, " ");
827
828 /* enable pmc pfifo */
829 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_FIFO));
830
831 nvgpu_cg_slcg_fifo_load_enable(g);
832
833 nvgpu_cg_blcg_fifo_load_enable(g);
834
835 timeout = gk20a_readl(g, fifo_fb_timeout_r());
836 timeout = set_field(timeout, fifo_fb_timeout_period_m(),
837 fifo_fb_timeout_period_max_f());
838 nvgpu_log_info(g, "fifo_fb_timeout reg val = 0x%08x", timeout);
839 gk20a_writel(g, fifo_fb_timeout_r(), timeout);
840
841 /* write pbdma timeout value */
842 for (i = 0; i < host_num_pbdma; i++) {
843 timeout = gk20a_readl(g, pbdma_timeout_r(i));
844 timeout = set_field(timeout, pbdma_timeout_period_m(),
845 pbdma_timeout_period_max_f());
846 nvgpu_log_info(g, "pbdma_timeout reg val = 0x%08x", timeout);
847 gk20a_writel(g, pbdma_timeout_r(i), timeout);
848 }
849 if (g->ops.fifo.apply_pb_timeout) {
850 g->ops.fifo.apply_pb_timeout(g);
851 }
852
853 if (g->ops.fifo.apply_ctxsw_timeout_intr) {
854 g->ops.fifo.apply_ctxsw_timeout_intr(g);
855 } else {
856 timeout = g->fifo_eng_timeout_us;
857 timeout = scale_ptimer(timeout,
858 ptimer_scalingfactor10x(g->ptimer_src_freq));
859 timeout |= fifo_eng_timeout_detection_enabled_f();
860 gk20a_writel(g, fifo_eng_timeout_r(), timeout);
861 }
862
863 /* clear and enable pbdma interrupt */
864 for (i = 0; i < host_num_pbdma; i++) {
865 gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
866 gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
867
868 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
869 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
870 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
871 nvgpu_log_info(g, "pbdma id:%u, intr_en_0 0x%08x", i, intr_stall);
872 gk20a_writel(g, pbdma_intr_en_0_r(i), intr_stall);
873 intr_stall = gk20a_readl(g, pbdma_intr_stall_1_r(i));
874 /*
875 * For bug 2082123
876 * Mask the unused HCE_RE_ILLEGAL_OP bit from the interrupt.
877 */
878 intr_stall &= ~pbdma_intr_stall_1_hce_illegal_op_enabled_f();
879 nvgpu_log_info(g, "pbdma id:%u, intr_en_1 0x%08x", i, intr_stall);
880 gk20a_writel(g, pbdma_intr_en_1_r(i), intr_stall);
881 }
882
883 /* reset runlist interrupts */
884 gk20a_writel(g, fifo_intr_runlist_r(), ~0);
885
886 /* clear and enable pfifo interrupt */
887 gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
888 mask = gk20a_fifo_intr_0_en_mask(g);
889 nvgpu_log_info(g, "fifo_intr_en_0 0x%08x", mask);
890 gk20a_writel(g, fifo_intr_en_0_r(), mask);
891 nvgpu_log_info(g, "fifo_intr_en_1 = 0x80000000");
892 gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);
893
894 nvgpu_log_fn(g, "done");
895
896 return 0;
897}
898
899int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
900{
901 struct fifo_gk20a *f = &g->fifo;
902 unsigned int chid, i;
903 int err = 0;
904
905 nvgpu_log_fn(g, " ");
906
907 f->g = g;
908
909 err = nvgpu_mutex_init(&f->intr.isr.mutex);
910 if (err) {
911 nvgpu_err(g, "failed to init isr.mutex");
912 return err;
913 }
914
915 err = nvgpu_mutex_init(&f->engines_reset_mutex);
916 if (err) {
917 nvgpu_err(g, "failed to init engines_reset_mutex");
918 return err;
919 }
920
921 g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
922
923 f->num_channels = g->ops.fifo.get_num_fifos(g);
924 f->runlist_entry_size = g->ops.fifo.runlist_entry_size();
925 f->num_runlist_entries = fifo_eng_runlist_length_max_v();
926 f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
927 f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
928
929 f->userd_entry_size = 1 << ram_userd_base_shift_v();
930
931 f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
932 f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
933 f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
934 f->engine_info = nvgpu_kzalloc(g, f->max_engines *
935 sizeof(*f->engine_info));
936 f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
937
938 if (!(f->channel && f->tsg && f->pbdma_map && f->engine_info &&
939 f->active_engines_list)) {
940 err = -ENOMEM;
941 goto clean_up;
942 }
943 memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
944
945 /* pbdma map needs to be in place before calling engine info init */
946 for (i = 0; i < f->num_pbdma; ++i) {
947 f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
948 }
949
950 g->ops.fifo.init_engine_info(f);
951
952 err = init_runlist(g, f);
953 if (err) {
954 nvgpu_err(g, "failed to init runlist");
955 goto clean_up;
956 }
957
958 nvgpu_init_list_node(&f->free_chs);
959
960 err = nvgpu_mutex_init(&f->free_chs_mutex);
961 if (err) {
962 nvgpu_err(g, "failed to init free_chs_mutex");
963 goto clean_up;
964 }
965
966 for (chid = 0; chid < f->num_channels; chid++) {
967 gk20a_init_channel_support(g, chid);
968 gk20a_init_tsg_support(g, chid);
969 }
970
971 err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
972 if (err) {
973 nvgpu_err(g, "failed to init tsg_inuse_mutex");
974 goto clean_up;
975 }
976
977 f->remove_support = gk20a_remove_fifo_support;
978
979 f->deferred_reset_pending = false;
980
981 err = nvgpu_mutex_init(&f->deferred_reset_mutex);
982 if (err) {
983 nvgpu_err(g, "failed to init deferred_reset_mutex");
984 goto clean_up;
985 }
986
987 nvgpu_log_fn(g, "done");
988 return 0;
989
990clean_up:
991 nvgpu_err(g, "fail");
992
993 nvgpu_vfree(g, f->channel);
994 f->channel = NULL;
995 nvgpu_vfree(g, f->tsg);
996 f->tsg = NULL;
997 nvgpu_kfree(g, f->pbdma_map);
998 f->pbdma_map = NULL;
999 nvgpu_kfree(g, f->engine_info);
1000 f->engine_info = NULL;
1001 nvgpu_kfree(g, f->active_engines_list);
1002 f->active_engines_list = NULL;
1003
1004 return err;
1005}
1006
1007int gk20a_init_fifo_setup_sw(struct gk20a *g)
1008{
1009 struct fifo_gk20a *f = &g->fifo;
1010 unsigned int chid;
1011 u64 userd_base;
1012 int err = 0;
1013
1014 nvgpu_log_fn(g, " ");
1015
1016 if (f->sw_ready) {
1017 nvgpu_log_fn(g, "skip init");
1018 return 0;
1019 }
1020
1021 err = gk20a_init_fifo_setup_sw_common(g);
1022 if (err) {
1023 nvgpu_err(g, "fail: err: %d", err);
1024 return err;
1025 }
1026
1027 if (g->ops.mm.is_bar1_supported(g)) {
1028 err = nvgpu_dma_alloc_map_sys(g->mm.bar1.vm,
1029 f->userd_entry_size * f->num_channels,
1030 &f->userd);
1031 } else {
1032 err = nvgpu_dma_alloc_sys(g, f->userd_entry_size *
1033 f->num_channels, &f->userd);
1034 }
1035 if (err) {
1036 nvgpu_err(g, "userd memory allocation failed");
1037 goto clean_up;
1038 }
1039 nvgpu_log(g, gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
1040
1041 userd_base = nvgpu_mem_get_addr(g, &f->userd);
1042 for (chid = 0; chid < f->num_channels; chid++) {
1043 f->channel[chid].userd_iova = userd_base +
1044 chid * f->userd_entry_size;
1045 f->channel[chid].userd_gpu_va =
1046 f->userd.gpu_va + chid * f->userd_entry_size;
1047 }
1048
1049 err = nvgpu_channel_worker_init(g);
1050 if (err) {
1051 goto clean_up;
1052 }
1053
1054 f->sw_ready = true;
1055
1056 nvgpu_log_fn(g, "done");
1057 return 0;
1058
1059clean_up:
1060 nvgpu_log_fn(g, "fail");
1061 if (nvgpu_mem_is_valid(&f->userd)) {
1062 if (g->ops.mm.is_bar1_supported(g)) {
1063 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
1064 } else {
1065 nvgpu_dma_free(g, &f->userd);
1066 }
1067 }
1068
1069 return err;
1070}
1071
1072void gk20a_fifo_handle_runlist_event(struct gk20a *g)
1073{
1074 u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
1075
1076 nvgpu_log(g, gpu_dbg_intr, "runlist event %08x",
1077 runlist_event);
1078
1079 gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
1080}
1081
1082int gk20a_init_fifo_setup_hw(struct gk20a *g)
1083{
1084 struct fifo_gk20a *f = &g->fifo;
1085
1086 nvgpu_log_fn(g, " ");
1087
1088 /* test write, read through bar1 @ userd region before
1089 * turning on the snooping */
1090 {
1091 struct fifo_gk20a *f = &g->fifo;
1092 u32 v, v1 = 0x33, v2 = 0x55;
1093
1094 u32 bar1_vaddr = f->userd.gpu_va;
1095 volatile u32 *cpu_vaddr = f->userd.cpu_va;
1096
1097 nvgpu_log_info(g, "test bar1 @ vaddr 0x%x",
1098 bar1_vaddr);
1099
1100 v = gk20a_bar1_readl(g, bar1_vaddr);
1101
1102 *cpu_vaddr = v1;
1103 nvgpu_mb();
1104
1105 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
1106 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
1107 GPU read 0x%x", *cpu_vaddr, gk20a_bar1_readl(g, bar1_vaddr));
1108 return -EINVAL;
1109 }
1110
1111 gk20a_bar1_writel(g, bar1_vaddr, v2);
1112
1113 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
1114 nvgpu_err(g, "bar1 broken @ gk20a: GPU wrote 0x%x, \
1115 CPU read 0x%x", gk20a_bar1_readl(g, bar1_vaddr), *cpu_vaddr);
1116 return -EINVAL;
1117 }
1118
1119 /* is it visible to the cpu? */
1120 if (*cpu_vaddr != v2) {
1121 nvgpu_err(g,
1122 "cpu didn't see bar1 write @ %p!",
1123 cpu_vaddr);
1124 }
1125
1126 /* put it back */
1127 gk20a_bar1_writel(g, bar1_vaddr, v);
1128 }
1129
1130 /*XXX all manner of flushes and caching worries, etc */
1131
1132 /* set the base for the userd region now */
1133 gk20a_writel(g, fifo_bar1_base_r(),
1134 fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
1135 fifo_bar1_base_valid_true_f());
1136
1137 nvgpu_log_fn(g, "done");
1138
1139 return 0;
1140}
1141
1142int gk20a_init_fifo_support(struct gk20a *g)
1143{
1144 u32 err;
1145
1146 err = g->ops.fifo.setup_sw(g);
1147 if (err) {
1148 return err;
1149 }
1150
1151 if (g->ops.fifo.init_fifo_setup_hw) {
1152 err = g->ops.fifo.init_fifo_setup_hw(g);
1153 }
1154 if (err) {
1155 return err;
1156 }
1157
1158 return err;
1159}
1160
1161/* return with a reference to the channel, caller must put it back */
1162struct channel_gk20a *
1163gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
1164{
1165 struct fifo_gk20a *f = &g->fifo;
1166 unsigned int ci;
1167 if (unlikely(!f->channel)) {
1168 return NULL;
1169 }
1170 for (ci = 0; ci < f->num_channels; ci++) {
1171 struct channel_gk20a *ch;
1172 u64 ch_inst_ptr;
1173
1174 ch = gk20a_channel_from_id(g, ci);
1175 /* only alive channels are searched */
1176 if (!ch) {
1177 continue;
1178 }
1179
1180 ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
1181 if (inst_ptr == ch_inst_ptr) {
1182 return ch;
1183 }
1184
1185 gk20a_channel_put(ch);
1186 }
1187 return NULL;
1188}
1189
1190/* fault info/descriptions.
1191 * tbd: move to setup
1192 * */
1193static const char * const gk20a_fault_type_descs[] = {
1194 "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
1195 "pde size",
1196 "pte",
1197 "va limit viol",
1198 "unbound inst",
1199 "priv viol",
1200 "ro viol",
1201 "wo viol",
1202 "pitch mask",
1203 "work creation",
1204 "bad aperture",
1205 "compression failure",
1206 "bad kind",
1207 "region viol",
1208 "dual ptes",
1209 "poisoned",
1210};
1211/* engine descriptions */
1212static const char * const engine_subid_descs[] = {
1213 "gpc",
1214 "hub",
1215};
1216
1217static const char * const gk20a_hub_client_descs[] = {
1218 "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
1219 "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
1220 "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
1221 "scc nb", "sec", "ssync", "gr copy", "xv", "mmu nb",
1222 "msenc", "d falcon", "sked", "a falcon", "n/a",
1223};
1224
1225static const char * const gk20a_gpc_client_descs[] = {
1226 "l1 0", "t1 0", "pe 0",
1227 "l1 1", "t1 1", "pe 1",
1228 "l1 2", "t1 2", "pe 2",
1229 "l1 3", "t1 3", "pe 3",
1230 "rast", "gcc", "gpccs",
1231 "prop 0", "prop 1", "prop 2", "prop 3",
1232 "l1 4", "t1 4", "pe 4",
1233 "l1 5", "t1 5", "pe 5",
1234 "l1 6", "t1 6", "pe 6",
1235 "l1 7", "t1 7", "pe 7",
1236};
1237
1238static const char * const does_not_exist[] = {
1239 "does not exist"
1240};
1241
1242/* fill in mmu fault desc */
1243void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault)
1244{
1245 if (mmfault->fault_type >= ARRAY_SIZE(gk20a_fault_type_descs)) {
1246 WARN_ON(mmfault->fault_type >=
1247 ARRAY_SIZE(gk20a_fault_type_descs));
1248 } else {
1249 mmfault->fault_type_desc =
1250 gk20a_fault_type_descs[mmfault->fault_type];
1251 }
1252}
1253
1254/* fill in mmu fault client description */
1255void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault)
1256{
1257 if (mmfault->client_id >= ARRAY_SIZE(gk20a_hub_client_descs)) {
1258 WARN_ON(mmfault->client_id >=
1259 ARRAY_SIZE(gk20a_hub_client_descs));
1260 } else {
1261 mmfault->client_id_desc =
1262 gk20a_hub_client_descs[mmfault->client_id];
1263 }
1264}
1265
1266/* fill in mmu fault gpc description */
1267void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault)
1268{
1269 if (mmfault->client_id >= ARRAY_SIZE(gk20a_gpc_client_descs)) {
1270 WARN_ON(mmfault->client_id >=
1271 ARRAY_SIZE(gk20a_gpc_client_descs));
1272 } else {
1273 mmfault->client_id_desc =
1274 gk20a_gpc_client_descs[mmfault->client_id];
1275 }
1276}
1277
1278static void get_exception_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1279 struct mmu_fault_info *mmfault)
1280{
1281 g->ops.fifo.get_mmu_fault_info(g, mmu_fault_id, mmfault);
1282
1283 /* parse info */
1284 mmfault->fault_type_desc = does_not_exist[0];
1285 if (g->ops.fifo.get_mmu_fault_desc) {
1286 g->ops.fifo.get_mmu_fault_desc(mmfault);
1287 }
1288
1289 if (mmfault->client_type >= ARRAY_SIZE(engine_subid_descs)) {
1290 WARN_ON(mmfault->client_type >= ARRAY_SIZE(engine_subid_descs));
1291 mmfault->client_type_desc = does_not_exist[0];
1292 } else {
1293 mmfault->client_type_desc =
1294 engine_subid_descs[mmfault->client_type];
1295 }
1296
1297 mmfault->client_id_desc = does_not_exist[0];
1298 if ((mmfault->client_type ==
1299 fifo_intr_mmu_fault_info_engine_subid_hub_v())
1300 && g->ops.fifo.get_mmu_fault_client_desc) {
1301 g->ops.fifo.get_mmu_fault_client_desc(mmfault);
1302 } else if ((mmfault->client_type ==
1303 fifo_intr_mmu_fault_info_engine_subid_gpc_v())
1304 && g->ops.fifo.get_mmu_fault_gpc_desc) {
1305 g->ops.fifo.get_mmu_fault_gpc_desc(mmfault);
1306 }
1307}
1308
1309/* reads info from hardware and fills in mmu fault info record */
1310void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1311 struct mmu_fault_info *mmfault)
1312{
1313 u32 fault_info;
1314 u32 addr_lo, addr_hi;
1315
1316 nvgpu_log_fn(g, "mmu_fault_id %d", mmu_fault_id);
1317
1318 memset(mmfault, 0, sizeof(*mmfault));
1319
1320 fault_info = gk20a_readl(g,
1321 fifo_intr_mmu_fault_info_r(mmu_fault_id));
1322 mmfault->fault_type =
1323 fifo_intr_mmu_fault_info_type_v(fault_info);
1324 mmfault->access_type =
1325 fifo_intr_mmu_fault_info_write_v(fault_info);
1326 mmfault->client_type =
1327 fifo_intr_mmu_fault_info_engine_subid_v(fault_info);
1328 mmfault->client_id =
1329 fifo_intr_mmu_fault_info_client_v(fault_info);
1330
1331 addr_lo = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(mmu_fault_id));
1332 addr_hi = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(mmu_fault_id));
1333 mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo);
1334 /* note:ignoring aperture on gk20a... */
1335 mmfault->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
1336 gk20a_readl(g, fifo_intr_mmu_fault_inst_r(mmu_fault_id)));
1337 /* note: inst_ptr is a 40b phys addr. */
1338 mmfault->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
1339}
1340
1341void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1342{
1343 u32 engine_enum = ENGINE_INVAL_GK20A;
1344 struct fifo_engine_info_gk20a *engine_info;
1345
1346 nvgpu_log_fn(g, " ");
1347
1348 if (!g) {
1349 return;
1350 }
1351
1352 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1353
1354 if (engine_info) {
1355 engine_enum = engine_info->engine_enum;
1356 }
1357
1358 if (engine_enum == ENGINE_INVAL_GK20A) {
1359 nvgpu_err(g, "unsupported engine_id %d", engine_id);
1360 }
1361
1362 if (engine_enum == ENGINE_GR_GK20A) {
1363 if (g->support_pmu) {
1364 if (nvgpu_pg_elpg_disable(g) != 0 ) {
1365 nvgpu_err(g, "failed to set disable elpg");
1366 }
1367 }
1368
1369#ifdef CONFIG_GK20A_CTXSW_TRACE
1370 /*
1371 * Resetting engine will alter read/write index. Need to flush
1372 * circular buffer before re-enabling FECS.
1373 */
1374 if (g->ops.fecs_trace.reset)
1375 g->ops.fecs_trace.reset(g);
1376#endif
1377 if (!nvgpu_platform_is_simulation(g)) {
1378 /*HALT_PIPELINE method, halt GR engine*/
1379 if (gr_gk20a_halt_pipe(g)) {
1380 nvgpu_err(g, "failed to HALT gr pipe");
1381 }
1382 /*
1383 * resetting engine using mc_enable_r() is not
1384 * enough, we do full init sequence
1385 */
1386 nvgpu_log(g, gpu_dbg_info, "resetting gr engine");
1387 gk20a_gr_reset(g);
1388 } else {
1389 nvgpu_log(g, gpu_dbg_info,
1390 "HALT gr pipe not supported and "
1391 "gr cannot be reset without halting gr pipe");
1392 }
1393 if (g->support_pmu) {
1394 if (nvgpu_pg_elpg_enable(g) != 0 ) {
1395 nvgpu_err(g, "failed to set enable elpg");
1396 }
1397 }
1398 }
1399 if ((engine_enum == ENGINE_GRCE_GK20A) ||
1400 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
1401 g->ops.mc.reset(g, engine_info->reset_mask);
1402 }
1403}
1404
1405static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
1406{
1407 u32 intr;
1408
1409 intr = gk20a_readl(g, fifo_intr_chsw_error_r());
1410 nvgpu_err(g, "chsw: %08x", intr);
1411 gk20a_fecs_dump_falcon_stats(g);
1412 gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
1413}
1414
1415static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
1416{
1417 u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1418 nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id);
1419}
1420
1421bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid)
1422{
1423 return (engine_subid == fifo_intr_mmu_fault_info_engine_subid_gpc_v());
1424}
1425
1426bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
1427 u32 engine_subid, bool fake_fault)
1428{
1429 u32 engine_enum = ENGINE_INVAL_GK20A;
1430 struct fifo_engine_info_gk20a *engine_info;
1431
1432 if (!g) {
1433 return false;
1434 }
1435
1436 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1437
1438 if (engine_info) {
1439 engine_enum = engine_info->engine_enum;
1440 }
1441
1442 if (engine_enum == ENGINE_INVAL_GK20A) {
1443 return false;
1444 }
1445
1446 /* channel recovery is only deferred if an sm debugger
1447 is attached and has MMU debug mode is enabled */
1448 if (!g->ops.gr.sm_debugger_attached(g) ||
1449 !g->ops.fb.is_debug_mode_enabled(g)) {
1450 return false;
1451 }
1452
1453 /* if this fault is fake (due to RC recovery), don't defer recovery */
1454 if (fake_fault) {
1455 return false;
1456 }
1457
1458 if (engine_enum != ENGINE_GR_GK20A) {
1459 return false;
1460 }
1461
1462 return g->ops.fifo.is_fault_engine_subid_gpc(g, engine_subid);
1463}
1464
1465/* caller must hold a channel reference */
1466static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
1467 struct channel_gk20a *refch)
1468{
1469 bool verbose = false;
1470 if (!refch) {
1471 return verbose;
1472 }
1473
1474 if (nvgpu_is_error_notifier_set(refch,
1475 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
1476 verbose = refch->timeout_debug_dump;
1477 }
1478
1479 return verbose;
1480}
1481
1482/* caller must hold a channel reference */
1483static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1484 struct channel_gk20a *refch)
1485{
1486 if (refch) {
1487 /* mark channel as faulted */
1488 gk20a_channel_set_timedout(refch);
1489
1490 /* unblock pending waits */
1491 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1492 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
1493 }
1494}
1495
1496/* caller must hold a channel reference */
1497bool gk20a_fifo_error_ch(struct gk20a *g,
1498 struct channel_gk20a *refch)
1499{
1500 bool verbose;
1501
1502 verbose = gk20a_fifo_ch_timeout_debug_dump_state(g, refch);
1503 gk20a_fifo_set_has_timedout_and_wake_up_wqs(g, refch);
1504
1505 return verbose;
1506}
1507
1508bool gk20a_fifo_error_tsg(struct gk20a *g,
1509 struct tsg_gk20a *tsg)
1510{
1511 struct channel_gk20a *ch = NULL;
1512 bool verbose = false;
1513
1514 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1515 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1516 if (gk20a_channel_get(ch)) {
1517 if (gk20a_fifo_error_ch(g, ch)) {
1518 verbose = true;
1519 }
1520 gk20a_channel_put(ch);
1521 }
1522 }
1523 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1524
1525 return verbose;
1526
1527}
1528/* caller must hold a channel reference */
1529void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
1530 struct channel_gk20a *refch)
1531{
1532 nvgpu_err(g,
1533 "channel %d generated a mmu fault", refch->chid);
1534 g->ops.fifo.set_error_notifier(refch,
1535 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
1536}
1537
1538void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
1539 struct tsg_gk20a *tsg)
1540{
1541 struct channel_gk20a *ch = NULL;
1542
1543 nvgpu_err(g,
1544 "TSG %d generated a mmu fault", tsg->tsgid);
1545
1546 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1547 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1548 if (gk20a_channel_get(ch)) {
1549 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1550 gk20a_channel_put(ch);
1551 }
1552 }
1553 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1554
1555}
1556
1557void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)
1558{
1559 struct channel_gk20a *ch = NULL;
1560
1561 nvgpu_log_fn(g, " ");
1562
1563 g->ops.fifo.disable_tsg(tsg);
1564
1565 if (preempt) {
1566 g->ops.fifo.preempt_tsg(g, tsg);
1567 }
1568
1569 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1570 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1571 if (gk20a_channel_get(ch)) {
1572 gk20a_channel_set_timedout(ch);
1573 if (ch->g->ops.fifo.ch_abort_clean_up) {
1574 ch->g->ops.fifo.ch_abort_clean_up(ch);
1575 }
1576 gk20a_channel_put(ch);
1577 }
1578 }
1579 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1580}
1581
1582int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
1583{
1584 unsigned long engine_id, engines = 0U;
1585 struct tsg_gk20a *tsg;
1586 bool deferred_reset_pending;
1587 struct fifo_gk20a *f = &g->fifo;
1588
1589 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1590
1591 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1592 deferred_reset_pending = g->fifo.deferred_reset_pending;
1593 nvgpu_mutex_release(&f->deferred_reset_mutex);
1594
1595 if (!deferred_reset_pending) {
1596 nvgpu_mutex_release(&g->dbg_sessions_lock);
1597 return 0;
1598 }
1599
1600 gr_gk20a_disable_ctxsw(g);
1601
1602 tsg = tsg_gk20a_from_ch(ch);
1603 if (tsg != NULL) {
1604 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
1605 } else {
1606 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
1607 }
1608
1609 if (engines == 0U) {
1610 goto clean_up;
1611 }
1612
1613 /*
1614 * If deferred reset is set for an engine, and channel is running
1615 * on that engine, reset it
1616 */
1617 for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32) {
1618 if (BIT(engine_id) & engines) {
1619 gk20a_fifo_reset_engine(g, engine_id);
1620 }
1621 }
1622
1623 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1624 g->fifo.deferred_fault_engines = 0;
1625 g->fifo.deferred_reset_pending = false;
1626 nvgpu_mutex_release(&f->deferred_reset_mutex);
1627
1628clean_up:
1629 gr_gk20a_enable_ctxsw(g);
1630 nvgpu_mutex_release(&g->dbg_sessions_lock);
1631
1632 return 0;
1633}
1634
1635static bool gk20a_fifo_handle_mmu_fault_locked(
1636 struct gk20a *g,
1637 u32 mmu_fault_engines, /* queried from HW if 0 */
1638 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1639 bool id_is_tsg)
1640{
1641 bool fake_fault;
1642 unsigned long fault_id;
1643 unsigned long engine_mmu_fault_id;
1644 bool verbose = true;
1645 u32 grfifo_ctl;
1646
1647 bool deferred_reset_pending = false;
1648 struct fifo_gk20a *f = &g->fifo;
1649
1650 nvgpu_log_fn(g, " ");
1651
1652 /* Disable power management */
1653 if (g->support_pmu) {
1654 if (nvgpu_cg_pg_disable(g) != 0) {
1655 nvgpu_warn(g, "fail to disable power mgmt");
1656 }
1657 }
1658
1659 /* Disable fifo access */
1660 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
1661 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
1662 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
1663
1664 gk20a_writel(g, gr_gpfifo_ctl_r(),
1665 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
1666 gr_gpfifo_ctl_semaphore_access_f(0));
1667
1668 if (mmu_fault_engines) {
1669 fault_id = mmu_fault_engines;
1670 fake_fault = true;
1671 } else {
1672 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1673 fake_fault = false;
1674 gk20a_debug_dump(g);
1675 }
1676
1677 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1678 g->fifo.deferred_reset_pending = false;
1679 nvgpu_mutex_release(&f->deferred_reset_mutex);
1680
1681 /* go through all faulted engines */
1682 for_each_set_bit(engine_mmu_fault_id, &fault_id, 32) {
1683 /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
1684 * engines. Convert engine_mmu_id to engine_id */
1685 u32 engine_id = gk20a_mmu_id_to_engine_id(g,
1686 engine_mmu_fault_id);
1687 struct mmu_fault_info mmfault_info;
1688 struct channel_gk20a *ch = NULL;
1689 struct tsg_gk20a *tsg = NULL;
1690 struct channel_gk20a *refch = NULL;
1691 /* read and parse engine status */
1692 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1693 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1694 bool ctxsw = (ctx_status ==
1695 fifo_engine_status_ctx_status_ctxsw_switch_v()
1696 || ctx_status ==
1697 fifo_engine_status_ctx_status_ctxsw_save_v()
1698 || ctx_status ==
1699 fifo_engine_status_ctx_status_ctxsw_load_v());
1700
1701 get_exception_mmu_fault_info(g, engine_mmu_fault_id,
1702 &mmfault_info);
1703 trace_gk20a_mmu_fault(mmfault_info.fault_addr,
1704 mmfault_info.fault_type,
1705 mmfault_info.access_type,
1706 mmfault_info.inst_ptr,
1707 engine_id,
1708 mmfault_info.client_type_desc,
1709 mmfault_info.client_id_desc,
1710 mmfault_info.fault_type_desc);
1711 nvgpu_err(g, "%s mmu fault on engine %d, "
1712 "engine subid %d (%s), client %d (%s), "
1713 "addr 0x%llx, type %d (%s), access_type 0x%08x,"
1714 "inst_ptr 0x%llx",
1715 fake_fault ? "fake" : "",
1716 engine_id,
1717 mmfault_info.client_type,
1718 mmfault_info.client_type_desc,
1719 mmfault_info.client_id, mmfault_info.client_id_desc,
1720 mmfault_info.fault_addr,
1721 mmfault_info.fault_type,
1722 mmfault_info.fault_type_desc,
1723 mmfault_info.access_type, mmfault_info.inst_ptr);
1724
1725 if (ctxsw) {
1726 gk20a_fecs_dump_falcon_stats(g);
1727 nvgpu_err(g, "gr_status_r : 0x%x",
1728 gk20a_readl(g, gr_status_r()));
1729 }
1730
1731 /* get the channel/TSG */
1732 if (fake_fault) {
1733 /* use next_id if context load is failing */
1734 u32 id, type;
1735
1736 if (hw_id == ~(u32)0) {
1737 id = (ctx_status ==
1738 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1739 fifo_engine_status_next_id_v(status) :
1740 fifo_engine_status_id_v(status);
1741 type = (ctx_status ==
1742 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1743 fifo_engine_status_next_id_type_v(status) :
1744 fifo_engine_status_id_type_v(status);
1745 } else {
1746 id = hw_id;
1747 type = id_is_tsg ?
1748 fifo_engine_status_id_type_tsgid_v() :
1749 fifo_engine_status_id_type_chid_v();
1750 }
1751
1752 if (type == fifo_engine_status_id_type_tsgid_v()) {
1753 tsg = &g->fifo.tsg[id];
1754 } else if (type == fifo_engine_status_id_type_chid_v()) {
1755 ch = &g->fifo.channel[id];
1756 refch = gk20a_channel_get(ch);
1757 if (refch != NULL) {
1758 tsg = tsg_gk20a_from_ch(refch);
1759 }
1760 }
1761 } else {
1762 /* read channel based on instruction pointer */
1763 ch = gk20a_refch_from_inst_ptr(g,
1764 mmfault_info.inst_ptr);
1765 refch = ch;
1766 if (refch != NULL) {
1767 tsg = tsg_gk20a_from_ch(refch);
1768 }
1769 }
1770
1771 /* check if engine reset should be deferred */
1772 if (engine_id != FIFO_INVAL_ENGINE_ID) {
1773 bool defer = gk20a_fifo_should_defer_engine_reset(g,
1774 engine_id, mmfault_info.client_type,
1775 fake_fault);
1776 if ((ch || tsg) && defer) {
1777 g->fifo.deferred_fault_engines |= BIT(engine_id);
1778
1779 /* handled during channel free */
1780 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1781 g->fifo.deferred_reset_pending = true;
1782 nvgpu_mutex_release(&f->deferred_reset_mutex);
1783
1784 deferred_reset_pending = true;
1785
1786 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
1787 "sm debugger attached,"
1788 " deferring channel recovery to channel free");
1789 } else {
1790 gk20a_fifo_reset_engine(g, engine_id);
1791 }
1792 }
1793
1794#ifdef CONFIG_GK20A_CTXSW_TRACE
1795 if (tsg) {
1796 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1797 }
1798#endif
1799 /*
1800 * Disable the channel/TSG from hw and increment syncpoints.
1801 */
1802 if (tsg) {
1803 if (deferred_reset_pending) {
1804 gk20a_disable_tsg(tsg);
1805 } else {
1806 if (!fake_fault) {
1807 gk20a_fifo_set_ctx_mmu_error_tsg(g,
1808 tsg);
1809 }
1810 verbose = gk20a_fifo_error_tsg(g, tsg);
1811 gk20a_fifo_abort_tsg(g, tsg, false);
1812 }
1813
1814 /* put back the ref taken early above */
1815 if (refch) {
1816 gk20a_channel_put(ch);
1817 }
1818 } else if (refch != NULL) {
1819 nvgpu_err(g, "mmu error in unbound channel %d",
1820 ch->chid);
1821 gk20a_channel_put(ch);
1822 } else if (mmfault_info.inst_ptr ==
1823 nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
1824 nvgpu_err(g, "mmu fault from bar1");
1825 } else if (mmfault_info.inst_ptr ==
1826 nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
1827 nvgpu_err(g, "mmu fault from pmu");
1828 } else {
1829 nvgpu_err(g, "couldn't locate channel for mmu fault");
1830 }
1831 }
1832
1833 /* clear interrupt */
1834 gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
1835
1836 /* resume scheduler */
1837 gk20a_writel(g, fifo_error_sched_disable_r(),
1838 gk20a_readl(g, fifo_error_sched_disable_r()));
1839
1840 /* Re-enable fifo access */
1841 gk20a_writel(g, gr_gpfifo_ctl_r(),
1842 gr_gpfifo_ctl_access_enabled_f() |
1843 gr_gpfifo_ctl_semaphore_access_enabled_f());
1844
1845 /* It is safe to enable ELPG again. */
1846 if (g->support_pmu) {
1847 if (nvgpu_cg_pg_enable(g) != 0) {
1848 nvgpu_warn(g, "fail to enable power mgmt");
1849 }
1850 }
1851
1852 return verbose;
1853}
1854
1855static bool gk20a_fifo_handle_mmu_fault(
1856 struct gk20a *g,
1857 u32 mmu_fault_engines, /* queried from HW if 0 */
1858 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1859 bool id_is_tsg)
1860{
1861 u32 rlid;
1862 bool verbose;
1863
1864 nvgpu_log_fn(g, " ");
1865
1866 nvgpu_log_info(g, "acquire engines_reset_mutex");
1867 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1868
1869 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
1870 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1871 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
1872 }
1873
1874 verbose = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
1875 hw_id, id_is_tsg);
1876
1877 nvgpu_log_info(g, "release runlist_lock for all runlists");
1878 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1879 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
1880 }
1881
1882 nvgpu_log_info(g, "release engines_reset_mutex");
1883 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
1884
1885 return verbose;
1886}
1887
1888static void gk20a_fifo_get_faulty_id_type(struct gk20a *g, int engine_id,
1889 u32 *id, u32 *type)
1890{
1891 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1892 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1893
1894 /* use next_id if context load is failing */
1895 *id = (ctx_status ==
1896 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1897 fifo_engine_status_next_id_v(status) :
1898 fifo_engine_status_id_v(status);
1899
1900 *type = (ctx_status ==
1901 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1902 fifo_engine_status_next_id_type_v(status) :
1903 fifo_engine_status_id_type_v(status);
1904}
1905
1906static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1907{
1908 unsigned int i;
1909 u32 engines = 0;
1910
1911 for (i = 0; i < g->fifo.num_engines; i++) {
1912 u32 active_engine_id = g->fifo.active_engines_list[i];
1913 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
1914 u32 ctx_status =
1915 fifo_engine_status_ctx_status_v(status);
1916 u32 ctx_id = (ctx_status ==
1917 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1918 fifo_engine_status_next_id_v(status) :
1919 fifo_engine_status_id_v(status);
1920 u32 type = (ctx_status ==
1921 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1922 fifo_engine_status_next_id_type_v(status) :
1923 fifo_engine_status_id_type_v(status);
1924 bool busy = fifo_engine_status_engine_v(status) ==
1925 fifo_engine_status_engine_busy_v();
1926 if (busy && ctx_id == id) {
1927 if ((is_tsg && type ==
1928 fifo_engine_status_id_type_tsgid_v()) ||
1929 (!is_tsg && type ==
1930 fifo_engine_status_id_type_chid_v())) {
1931 engines |= BIT(active_engine_id);
1932 }
1933 }
1934 }
1935
1936 return engines;
1937}
1938
1939void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
1940 bool verbose, u32 rc_type)
1941{
1942 u32 engines;
1943
1944 /* stop context switching to prevent engine assignments from
1945 changing until channel is recovered */
1946 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1947 gr_gk20a_disable_ctxsw(g);
1948
1949 engines = gk20a_fifo_engines_on_id(g, ch->chid, false);
1950
1951 if (engines) {
1952 gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
1953 rc_type);
1954 } else {
1955 gk20a_channel_abort(ch, false);
1956
1957 if (gk20a_fifo_error_ch(g, ch)) {
1958 gk20a_debug_dump(g);
1959 }
1960 }
1961
1962 gr_gk20a_enable_ctxsw(g);
1963 nvgpu_mutex_release(&g->dbg_sessions_lock);
1964}
1965
1966void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1967 bool verbose, u32 rc_type)
1968{
1969 u32 engines = 0U;
1970 int err;
1971
1972 /* stop context switching to prevent engine assignments from
1973 changing until TSG is recovered */
1974 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1975
1976 /* disable tsg so that it does not get scheduled again */
1977 g->ops.fifo.disable_tsg(tsg);
1978
1979 /*
1980 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
1981 * fifo_engine_status register. Also while the engine is held in reset
1982 * h/w passes busy/idle straight through. fifo_engine_status registers
1983 * are correct in that there is no context switch outstanding
1984 * as the CTXSW is aborted when reset is asserted.
1985 */
1986 nvgpu_log_info(g, "acquire engines_reset_mutex");
1987 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1988
1989 /*
1990 * stop context switching to prevent engine assignments from
1991 * changing until engine status is checked to make sure tsg
1992 * being recovered is not loaded on the engines
1993 */
1994 err = gr_gk20a_disable_ctxsw(g);
1995
1996 if (err != 0) {
1997 /* if failed to disable ctxsw, just abort tsg */
1998 nvgpu_err(g, "failed to disable ctxsw");
1999 } else {
2000 /* recover engines if tsg is loaded on the engines */
2001 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
2002
2003 /*
2004 * it is ok to enable ctxsw before tsg is recovered. If engines
2005 * is 0, no engine recovery is needed and if it is non zero,
2006 * gk20a_fifo_recover will call get_engines_mask_on_id again.
2007 * By that time if tsg is not on the engine, engine need not
2008 * be reset.
2009 */
2010 err = gr_gk20a_enable_ctxsw(g);
2011 if (err != 0) {
2012 nvgpu_err(g, "failed to enable ctxsw");
2013 }
2014 }
2015
2016 nvgpu_log_info(g, "release engines_reset_mutex");
2017 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2018
2019 if (engines) {
2020 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
2021 rc_type);
2022 } else {
2023 if (gk20a_fifo_error_tsg(g, tsg) && verbose) {
2024 gk20a_debug_dump(g);
2025 }
2026
2027 gk20a_fifo_abort_tsg(g, tsg, false);
2028 }
2029
2030 nvgpu_mutex_release(&g->dbg_sessions_lock);
2031}
2032
2033void gk20a_fifo_teardown_mask_intr(struct gk20a *g)
2034{
2035 u32 val;
2036
2037 val = gk20a_readl(g, fifo_intr_en_0_r());
2038 val &= ~(fifo_intr_en_0_sched_error_m() |
2039 fifo_intr_en_0_mmu_fault_m());
2040 gk20a_writel(g, fifo_intr_en_0_r(), val);
2041 gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f());
2042}
2043
2044void gk20a_fifo_teardown_unmask_intr(struct gk20a *g)
2045{
2046 u32 val;
2047
2048 val = gk20a_readl(g, fifo_intr_en_0_r());
2049 val |= fifo_intr_en_0_mmu_fault_f(1) | fifo_intr_en_0_sched_error_f(1);
2050 gk20a_writel(g, fifo_intr_en_0_r(), val);
2051
2052}
2053
2054void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
2055 u32 hw_id, unsigned int id_type, unsigned int rc_type,
2056 struct mmu_fault_info *mmfault)
2057{
2058 unsigned long engine_id, i;
2059 unsigned long _engine_ids = __engine_ids;
2060 unsigned long engine_ids = 0;
2061 u32 mmu_fault_engines = 0;
2062 u32 ref_type;
2063 u32 ref_id;
2064 u32 ref_id_is_tsg = false;
2065 bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false;
2066 bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false;
2067 u32 rlid;
2068
2069 nvgpu_log_info(g, "acquire engines_reset_mutex");
2070 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
2071
2072 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
2073 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2074 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
2075 }
2076
2077 if (id_is_known) {
2078 engine_ids = gk20a_fifo_engines_on_id(g, hw_id, id_is_tsg);
2079 ref_id = hw_id;
2080 ref_type = id_is_tsg ?
2081 fifo_engine_status_id_type_tsgid_v() :
2082 fifo_engine_status_id_type_chid_v();
2083 ref_id_is_tsg = id_is_tsg;
2084 /* atleast one engine will get passed during sched err*/
2085 engine_ids |= __engine_ids;
2086 for_each_set_bit(engine_id, &engine_ids, 32) {
2087 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id);
2088
2089 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2090 mmu_fault_engines |= BIT(mmu_id);
2091 }
2092 }
2093 } else {
2094 /* store faulted engines in advance */
2095 for_each_set_bit(engine_id, &_engine_ids, 32) {
2096 gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id,
2097 &ref_type);
2098 if (ref_type == fifo_engine_status_id_type_tsgid_v()) {
2099 ref_id_is_tsg = true;
2100 } else {
2101 ref_id_is_tsg = false;
2102 }
2103 /* Reset *all* engines that use the
2104 * same channel as faulty engine */
2105 for (i = 0; i < g->fifo.num_engines; i++) {
2106 u32 active_engine_id = g->fifo.active_engines_list[i];
2107 u32 type;
2108 u32 id;
2109
2110 gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type);
2111 if (ref_type == type && ref_id == id) {
2112 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, active_engine_id);
2113
2114 engine_ids |= BIT(active_engine_id);
2115 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2116 mmu_fault_engines |= BIT(mmu_id);
2117 }
2118 }
2119 }
2120 }
2121 }
2122
2123 if (mmu_fault_engines) {
2124 g->ops.fifo.teardown_mask_intr(g);
2125 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
2126 gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id,
2127 ref_id_is_tsg);
2128
2129 g->ops.fifo.teardown_unmask_intr(g);
2130 }
2131
2132 nvgpu_log_info(g, "release runlist_lock for all runlists");
2133 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2134 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
2135 }
2136
2137 nvgpu_log_info(g, "release engines_reset_mutex");
2138 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2139}
2140
2141void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
2142 u32 hw_id, bool id_is_tsg,
2143 bool id_is_known, bool verbose, int rc_type)
2144{
2145 unsigned int id_type;
2146
2147 if (verbose) {
2148 gk20a_debug_dump(g);
2149 }
2150
2151 if (g->ops.ltc.flush) {
2152 g->ops.ltc.flush(g);
2153 }
2154
2155 if (id_is_known) {
2156 id_type = id_is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
2157 } else {
2158 id_type = ID_TYPE_UNKNOWN;
2159 }
2160
2161 g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type,
2162 rc_type, NULL);
2163}
2164
2165/* force reset channel and tsg */
2166int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
2167 u32 err_code, bool verbose)
2168{
2169 struct channel_gk20a *ch_tsg = NULL;
2170 struct gk20a *g = ch->g;
2171
2172 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2173
2174 if (tsg != NULL) {
2175 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2176
2177 nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
2178 channel_gk20a, ch_entry) {
2179 if (gk20a_channel_get(ch_tsg)) {
2180 g->ops.fifo.set_error_notifier(ch_tsg,
2181 err_code);
2182 gk20a_channel_put(ch_tsg);
2183 }
2184 }
2185
2186 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2187 gk20a_fifo_recover_tsg(g, tsg, verbose,
2188 RC_TYPE_FORCE_RESET);
2189 } else {
2190 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2191 }
2192
2193 return 0;
2194}
2195
2196int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
2197{
2198 struct gk20a *g = ch->g;
2199
2200 if (gk20a_fifo_channel_status_is_next(g, ch->chid)) {
2201 nvgpu_err(g, "Channel %d to be removed from TSG %d has NEXT set!",
2202 ch->chid, ch->tsgid);
2203 return -EINVAL;
2204 }
2205
2206 if (g->ops.fifo.tsg_verify_status_ctx_reload) {
2207 g->ops.fifo.tsg_verify_status_ctx_reload(ch);
2208 }
2209
2210 if (g->ops.fifo.tsg_verify_status_faulted) {
2211 g->ops.fifo.tsg_verify_status_faulted(ch);
2212 }
2213
2214 return 0;
2215}
2216
2217static bool gk20a_fifo_tsg_is_multi_channel(struct tsg_gk20a *tsg)
2218{
2219 bool ret = false;
2220
2221 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2222 if (nvgpu_list_first_entry(&tsg->ch_list, channel_gk20a,
2223 ch_entry) !=
2224 nvgpu_list_last_entry(&tsg->ch_list, channel_gk20a,
2225 ch_entry)) {
2226 ret = true;
2227 }
2228 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2229
2230 return ret;
2231}
2232
2233int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
2234{
2235 struct gk20a *g = ch->g;
2236 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2237 int err;
2238 bool tsg_timedout = false;
2239
2240 if (tsg == NULL) {
2241 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2242 return 0;
2243 }
2244
2245 /* If one channel in TSG times out, we disable all channels */
2246 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2247 tsg_timedout = gk20a_channel_check_timedout(ch);
2248 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2249
2250 /* Disable TSG and examine status before unbinding channel */
2251 g->ops.fifo.disable_tsg(tsg);
2252
2253 err = g->ops.fifo.preempt_tsg(g, tsg);
2254 if (err != 0) {
2255 goto fail_enable_tsg;
2256 }
2257
2258 /*
2259 * State validation is only necessary if there are multiple channels in
2260 * the TSG.
2261 */
2262 if (gk20a_fifo_tsg_is_multi_channel(tsg) &&
2263 g->ops.fifo.tsg_verify_channel_status && !tsg_timedout) {
2264 err = g->ops.fifo.tsg_verify_channel_status(ch);
2265 if (err) {
2266 goto fail_enable_tsg;
2267 }
2268 }
2269
2270 /* Channel should be seen as TSG channel while updating runlist */
2271 err = channel_gk20a_update_runlist(ch, false);
2272 if (err) {
2273 goto fail_enable_tsg;
2274 }
2275
2276 while (ch->mmu_debug_mode_refcnt > 0U) {
2277 err = nvgpu_tsg_set_mmu_debug_mode(ch, false);
2278 if (err != 0) {
2279 nvgpu_err(g, "disable mmu debug mode failed ch:%u",
2280 ch->chid);
2281 break;
2282 }
2283 }
2284
2285 /* Remove channel from TSG and re-enable rest of the channels */
2286 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2287 nvgpu_list_del(&ch->ch_entry);
2288 ch->tsgid = NVGPU_INVALID_TSG_ID;
2289
2290 /* another thread could have re-enabled the channel because it was
2291 * still on the list at that time, so make sure it's truly disabled
2292 */
2293 g->ops.fifo.disable_channel(ch);
2294 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2295
2296 /*
2297 * Don't re-enable all channels if TSG has timed out already
2298 *
2299 * Note that we can skip disabling and preempting TSG too in case of
2300 * time out, but we keep that to ensure TSG is kicked out
2301 */
2302 if (!tsg_timedout) {
2303 g->ops.fifo.enable_tsg(tsg);
2304 }
2305
2306 if (ch->g->ops.fifo.ch_abort_clean_up) {
2307 ch->g->ops.fifo.ch_abort_clean_up(ch);
2308 }
2309
2310 return 0;
2311
2312fail_enable_tsg:
2313 if (!tsg_timedout) {
2314 g->ops.fifo.enable_tsg(tsg);
2315 }
2316 return err;
2317}
2318
2319u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
2320 int *__id, bool *__is_tsg)
2321{
2322 u32 engine_id;
2323 int id = -1;
2324 bool is_tsg = false;
2325 u32 mailbox2;
2326 u32 active_engine_id = FIFO_INVAL_ENGINE_ID;
2327
2328 for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
2329 u32 status;
2330 u32 ctx_status;
2331 bool failing_engine;
2332
2333 active_engine_id = g->fifo.active_engines_list[engine_id];
2334 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
2335 ctx_status = fifo_engine_status_ctx_status_v(status);
2336
2337 /* we are interested in busy engines */
2338 failing_engine = fifo_engine_status_engine_v(status) ==
2339 fifo_engine_status_engine_busy_v();
2340
2341 /* ..that are doing context switch */
2342 failing_engine = failing_engine &&
2343 (ctx_status ==
2344 fifo_engine_status_ctx_status_ctxsw_switch_v()
2345 || ctx_status ==
2346 fifo_engine_status_ctx_status_ctxsw_save_v()
2347 || ctx_status ==
2348 fifo_engine_status_ctx_status_ctxsw_load_v());
2349
2350 if (!failing_engine) {
2351 active_engine_id = FIFO_INVAL_ENGINE_ID;
2352 continue;
2353 }
2354
2355 if (ctx_status ==
2356 fifo_engine_status_ctx_status_ctxsw_load_v()) {
2357 id = fifo_engine_status_next_id_v(status);
2358 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2359 fifo_engine_status_next_id_type_chid_v();
2360 } else if (ctx_status ==
2361 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
2362 mailbox2 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(2));
2363 if (mailbox2 & FECS_METHOD_WFI_RESTORE) {
2364 id = fifo_engine_status_next_id_v(status);
2365 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2366 fifo_engine_status_next_id_type_chid_v();
2367 } else {
2368 id = fifo_engine_status_id_v(status);
2369 is_tsg = fifo_engine_status_id_type_v(status) !=
2370 fifo_engine_status_id_type_chid_v();
2371 }
2372 } else {
2373 id = fifo_engine_status_id_v(status);
2374 is_tsg = fifo_engine_status_id_type_v(status) !=
2375 fifo_engine_status_id_type_chid_v();
2376 }
2377 break;
2378 }
2379
2380 *__id = id;
2381 *__is_tsg = is_tsg;
2382
2383 return active_engine_id;
2384}
2385
2386bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
2387 bool *verbose, u32 *ms)
2388{
2389 bool recover = false;
2390 bool progress = false;
2391 struct gk20a *g = ch->g;
2392
2393 if (gk20a_channel_get(ch)) {
2394 recover = gk20a_channel_update_and_check_timeout(ch,
2395 g->fifo_eng_timeout_us / 1000,
2396 &progress);
2397 *verbose = ch->timeout_debug_dump;
2398 *ms = ch->timeout_accumulated_ms;
2399 if (recover) {
2400 g->ops.fifo.set_error_notifier(ch,
2401 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2402 }
2403
2404 gk20a_channel_put(ch);
2405 }
2406 return recover;
2407}
2408
2409bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
2410 bool *verbose, u32 *ms)
2411{
2412 struct channel_gk20a *ch;
2413 bool recover = false;
2414 bool progress = false;
2415 struct gk20a *g = tsg->g;
2416
2417 *verbose = false;
2418 *ms = g->fifo_eng_timeout_us / 1000;
2419
2420 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2421
2422 /* check if there was some progress on any of the TSG channels.
2423 * fifo recovery is needed if at least one channel reached the
2424 * maximum timeout without progress (update in gpfifo pointers).
2425 */
2426 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
2427 if (gk20a_channel_get(ch)) {
2428 recover = gk20a_channel_update_and_check_timeout(ch,
2429 *ms, &progress);
2430 if (progress || recover) {
2431 break;
2432 }
2433 gk20a_channel_put(ch);
2434 }
2435 }
2436
2437 if (recover) {
2438 /*
2439 * if one channel is presumed dead (no progress for too long),
2440 * then fifo recovery is needed. we can't really figure out
2441 * which channel caused the problem, so set timeout error
2442 * notifier for all channels.
2443 */
2444 nvgpu_log_info(g, "timeout on tsg=%d ch=%d",
2445 tsg->tsgid, ch->chid);
2446 *ms = ch->timeout_accumulated_ms;
2447 gk20a_channel_put(ch);
2448 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2449 channel_gk20a, ch_entry) {
2450 if (gk20a_channel_get(ch)) {
2451 ch->g->ops.fifo.set_error_notifier(ch,
2452 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2453 if (ch->timeout_debug_dump) {
2454 *verbose = true;
2455 }
2456 gk20a_channel_put(ch);
2457 }
2458 }
2459 } else if (progress) {
2460 /*
2461 * if at least one channel in the TSG made some progress, reset
2462 * accumulated timeout for all channels in the TSG. In
2463 * particular, this resets timeout for channels that already
2464 * completed their work
2465 */
2466 nvgpu_log_info(g, "progress on tsg=%d ch=%d",
2467 tsg->tsgid, ch->chid);
2468 gk20a_channel_put(ch);
2469 *ms = g->fifo_eng_timeout_us / 1000;
2470 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2471 channel_gk20a, ch_entry) {
2472 if (gk20a_channel_get(ch)) {
2473 ch->timeout_accumulated_ms = *ms;
2474 gk20a_channel_put(ch);
2475 }
2476 }
2477 }
2478
2479 /* if we could not detect progress on any of the channel, but none
2480 * of them has reached the timeout, there is nothing more to do:
2481 * timeout_accumulated_ms has been updated for all of them.
2482 */
2483 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2484 return recover;
2485}
2486
2487bool gk20a_fifo_handle_sched_error(struct gk20a *g)
2488{
2489 u32 sched_error;
2490 u32 engine_id;
2491 int id = -1;
2492 bool is_tsg = false;
2493 bool ret = false;
2494
2495 /* read the scheduler error register */
2496 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
2497
2498 engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
2499 /*
2500 * Could not find the engine
2501 * Possible Causes:
2502 * a)
2503 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
2504 * fifo_engine_status register. Also while the engine is held in reset
2505 * h/w passes busy/idle straight through. fifo_engine_status registers
2506 * are correct in that there is no context switch outstanding
2507 * as the CTXSW is aborted when reset is asserted.
2508 * This is just a side effect of how gv100 and earlier versions of
2509 * ctxsw_timeout behave.
2510 * With gv11b and later, h/w snaps the context at the point of error
2511 * so that s/w can see the tsg_id which caused the HW timeout.
2512 * b)
2513 * If engines are not busy and ctxsw state is valid then intr occurred
2514 * in the past and if the ctxsw state has moved on to VALID from LOAD
2515 * or SAVE, it means that whatever timed out eventually finished
2516 * anyways. The problem with this is that s/w cannot conclude which
2517 * context caused the problem as maybe more switches occurred before
2518 * intr is handled.
2519 */
2520 if (engine_id == FIFO_INVAL_ENGINE_ID) {
2521 nvgpu_info(g, "fifo sched error: 0x%08x, failed to find engine "
2522 "that is busy doing ctxsw. "
2523 "May be ctxsw already happened", sched_error);
2524 ret = false;
2525 goto err;
2526 }
2527
2528 /* could not find the engine - should never happen */
2529 if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
2530 nvgpu_err(g, "fifo sched error : 0x%08x, failed to find engine",
2531 sched_error);
2532 ret = false;
2533 goto err;
2534 }
2535
2536 if (fifo_intr_sched_error_code_f(sched_error) ==
2537 fifo_intr_sched_error_code_ctxsw_timeout_v()) {
2538 struct fifo_gk20a *f = &g->fifo;
2539 u32 ms = 0;
2540 bool verbose = false;
2541
2542 if (is_tsg) {
2543 ret = g->ops.fifo.check_tsg_ctxsw_timeout(
2544 &f->tsg[id], &verbose, &ms);
2545 } else {
2546 ret = g->ops.fifo.check_ch_ctxsw_timeout(
2547 &f->channel[id], &verbose, &ms);
2548 }
2549
2550 if (ret) {
2551 nvgpu_err(g,
2552 "fifo sched ctxsw timeout error: "
2553 "engine=%u, %s=%d, ms=%u",
2554 engine_id, is_tsg ? "tsg" : "ch", id, ms);
2555 /*
2556 * Cancel all channels' timeout since SCHED error might
2557 * trigger multiple watchdogs at a time
2558 */
2559 gk20a_channel_timeout_restart_all_channels(g);
2560 gk20a_fifo_recover(g, BIT(engine_id), id,
2561 is_tsg, true, verbose,
2562 RC_TYPE_CTXSW_TIMEOUT);
2563 } else {
2564 nvgpu_log_info(g,
2565 "fifo is waiting for ctx switch for %d ms, "
2566 "%s=%d", ms, is_tsg ? "tsg" : "ch", id);
2567 }
2568 } else {
2569 nvgpu_err(g,
2570 "fifo sched error : 0x%08x, engine=%u, %s=%d",
2571 sched_error, engine_id, is_tsg ? "tsg" : "ch", id);
2572 }
2573
2574err:
2575 return ret;
2576}
2577
2578static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
2579{
2580 bool print_channel_reset_log = false;
2581 u32 handled = 0;
2582
2583 nvgpu_log_fn(g, "fifo_intr=0x%08x", fifo_intr);
2584
2585 if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
2586 /* pio mode is unused. this shouldn't happen, ever. */
2587 /* should we clear it or just leave it pending? */
2588 nvgpu_err(g, "fifo pio error!");
2589 BUG_ON(1);
2590 }
2591
2592 if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
2593 u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
2594 nvgpu_err(g, "fifo bind error: 0x%08x", bind_error);
2595 print_channel_reset_log = true;
2596 handled |= fifo_intr_0_bind_error_pending_f();
2597 }
2598
2599 if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
2600 print_channel_reset_log = g->ops.fifo.handle_sched_error(g);
2601 handled |= fifo_intr_0_sched_error_pending_f();
2602 }
2603
2604 if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
2605 gk20a_fifo_handle_chsw_fault(g);
2606 handled |= fifo_intr_0_chsw_error_pending_f();
2607 }
2608
2609 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
2610 if (gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false)) {
2611 print_channel_reset_log = true;
2612 }
2613 handled |= fifo_intr_0_mmu_fault_pending_f();
2614 }
2615
2616 if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
2617 gk20a_fifo_handle_dropped_mmu_fault(g);
2618 handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
2619 }
2620
2621 print_channel_reset_log = !g->fifo.deferred_reset_pending
2622 && print_channel_reset_log;
2623
2624 if (print_channel_reset_log) {
2625 unsigned int engine_id;
2626 nvgpu_err(g,
2627 "channel reset initiated from %s; intr=0x%08x",
2628 __func__, fifo_intr);
2629 for (engine_id = 0;
2630 engine_id < g->fifo.num_engines;
2631 engine_id++) {
2632 u32 active_engine_id = g->fifo.active_engines_list[engine_id];
2633 u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
2634 nvgpu_log_fn(g, "enum:%d -> engine_id:%d", engine_enum,
2635 active_engine_id);
2636 fifo_pbdma_exception_status(g,
2637 &g->fifo.engine_info[active_engine_id]);
2638 fifo_engine_exception_status(g,
2639 &g->fifo.engine_info[active_engine_id]);
2640 }
2641 }
2642
2643 return handled;
2644}
2645
2646static inline void gk20a_fifo_reset_pbdma_header(struct gk20a *g, int pbdma_id)
2647{
2648 gk20a_writel(g, pbdma_pb_header_r(pbdma_id),
2649 pbdma_pb_header_first_true_f() |
2650 pbdma_pb_header_type_non_inc_f());
2651}
2652
2653void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
2654 int pbdma_method_index)
2655{
2656 u32 pbdma_method_stride;
2657 u32 pbdma_method_reg;
2658
2659 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2660 pbdma_method0_r(pbdma_id);
2661
2662 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2663 (pbdma_method_index * pbdma_method_stride);
2664
2665 gk20a_writel(g, pbdma_method_reg,
2666 pbdma_method0_valid_true_f() |
2667 pbdma_method0_first_true_f() |
2668 pbdma_method0_addr_f(
2669 pbdma_udma_nop_r() >> 2));
2670}
2671
2672static bool gk20a_fifo_is_sw_method_subch(struct gk20a *g, int pbdma_id,
2673 int pbdma_method_index)
2674{
2675 u32 pbdma_method_stride;
2676 u32 pbdma_method_reg, pbdma_method_subch;
2677
2678 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2679 pbdma_method0_r(pbdma_id);
2680
2681 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2682 (pbdma_method_index * pbdma_method_stride);
2683
2684 pbdma_method_subch = pbdma_method0_subch_v(
2685 gk20a_readl(g, pbdma_method_reg));
2686
2687 if (pbdma_method_subch == 5 ||
2688 pbdma_method_subch == 6 ||
2689 pbdma_method_subch == 7) {
2690 return true;
2691 }
2692
2693 return false;
2694}
2695
2696unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
2697 u32 pbdma_intr_0, u32 *handled, u32 *error_notifier)
2698{
2699 struct fifo_gk20a *f = &g->fifo;
2700 unsigned int rc_type = RC_TYPE_NO_RC;
2701 int i;
2702 unsigned long pbdma_intr_err;
2703 u32 bit;
2704
2705 if ((f->intr.pbdma.device_fatal_0 |
2706 f->intr.pbdma.channel_fatal_0 |
2707 f->intr.pbdma.restartable_0) & pbdma_intr_0) {
2708
2709 pbdma_intr_err = (unsigned long)pbdma_intr_0;
2710 for_each_set_bit(bit, &pbdma_intr_err, 32) {
2711 nvgpu_err(g, "PBDMA intr %s Error",
2712 pbdma_intr_fault_type_desc[bit]);
2713 }
2714
2715 nvgpu_err(g,
2716 "pbdma_intr_0(%d):0x%08x PBH: %08x "
2717 "SHADOW: %08x gp shadow0: %08x gp shadow1: %08x"
2718 "M0: %08x %08x %08x %08x ",
2719 pbdma_id, pbdma_intr_0,
2720 gk20a_readl(g, pbdma_pb_header_r(pbdma_id)),
2721 gk20a_readl(g, pbdma_hdr_shadow_r(pbdma_id)),
2722 gk20a_readl(g, pbdma_gp_shadow_0_r(pbdma_id)),
2723 gk20a_readl(g, pbdma_gp_shadow_1_r(pbdma_id)),
2724 gk20a_readl(g, pbdma_method0_r(pbdma_id)),
2725 gk20a_readl(g, pbdma_method1_r(pbdma_id)),
2726 gk20a_readl(g, pbdma_method2_r(pbdma_id)),
2727 gk20a_readl(g, pbdma_method3_r(pbdma_id))
2728 );
2729
2730 rc_type = RC_TYPE_PBDMA_FAULT;
2731 *handled |= ((f->intr.pbdma.device_fatal_0 |
2732 f->intr.pbdma.channel_fatal_0 |
2733 f->intr.pbdma.restartable_0) &
2734 pbdma_intr_0);
2735 }
2736
2737 if (pbdma_intr_0 & pbdma_intr_0_acquire_pending_f()) {
2738 u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id));
2739
2740 val &= ~pbdma_acquire_timeout_en_enable_f();
2741 gk20a_writel(g, pbdma_acquire_r(pbdma_id), val);
2742 if (nvgpu_is_timeouts_enabled(g)) {
2743 rc_type = RC_TYPE_PBDMA_FAULT;
2744 nvgpu_err(g,
2745 "semaphore acquire timeout!");
2746 *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
2747 }
2748 *handled |= pbdma_intr_0_acquire_pending_f();
2749 }
2750
2751 if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) {
2752 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2753 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2754 rc_type = RC_TYPE_PBDMA_FAULT;
2755 }
2756
2757 if (pbdma_intr_0 & pbdma_intr_0_method_pending_f()) {
2758 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2759 rc_type = RC_TYPE_PBDMA_FAULT;
2760 }
2761
2762 if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
2763 *error_notifier =
2764 NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
2765 rc_type = RC_TYPE_PBDMA_FAULT;
2766 }
2767
2768 if (pbdma_intr_0 & pbdma_intr_0_device_pending_f()) {
2769 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2770
2771 for (i = 0; i < 4; i++) {
2772 if (gk20a_fifo_is_sw_method_subch(g,
2773 pbdma_id, i)) {
2774 gk20a_fifo_reset_pbdma_method(g,
2775 pbdma_id, i);
2776 }
2777 }
2778 rc_type = RC_TYPE_PBDMA_FAULT;
2779 }
2780
2781 return rc_type;
2782}
2783
2784unsigned int gk20a_fifo_handle_pbdma_intr_1(struct gk20a *g,
2785 u32 pbdma_id, u32 pbdma_intr_1,
2786 u32 *handled, u32 *error_notifier)
2787{
2788 unsigned int rc_type = RC_TYPE_PBDMA_FAULT;
2789
2790 /*
2791 * all of the interrupts in _intr_1 are "host copy engine"
2792 * related, which is not supported. For now just make them
2793 * channel fatal.
2794 */
2795 nvgpu_err(g, "hce err: pbdma_intr_1(%d):0x%08x",
2796 pbdma_id, pbdma_intr_1);
2797 *handled |= pbdma_intr_1;
2798
2799 return rc_type;
2800}
2801
2802static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2803 struct fifo_gk20a *f, u32 pbdma_id,
2804 u32 error_notifier, u32 status)
2805{
2806 u32 id;
2807
2808 nvgpu_log(g, gpu_dbg_info, "pbdma id %d error notifier %d",
2809 pbdma_id, error_notifier);
2810 /* Remove channel from runlist */
2811 id = fifo_pbdma_status_id_v(status);
2812 if (fifo_pbdma_status_id_type_v(status)
2813 == fifo_pbdma_status_id_type_chid_v()) {
2814 struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
2815
2816 if (ch != NULL) {
2817 g->ops.fifo.set_error_notifier(ch, error_notifier);
2818 gk20a_fifo_recover_ch(g, ch, true, RC_TYPE_PBDMA_FAULT);
2819 gk20a_channel_put(ch);
2820 }
2821 } else if (fifo_pbdma_status_id_type_v(status)
2822 == fifo_pbdma_status_id_type_tsgid_v()) {
2823 struct tsg_gk20a *tsg = &f->tsg[id];
2824 struct channel_gk20a *ch = NULL;
2825
2826 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2827 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2828 channel_gk20a, ch_entry) {
2829 if (gk20a_channel_get(ch)) {
2830 g->ops.fifo.set_error_notifier(ch,
2831 error_notifier);
2832 gk20a_channel_put(ch);
2833 }
2834 }
2835 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2836 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PBDMA_FAULT);
2837 }
2838}
2839
2840u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2841 u32 pbdma_id, unsigned int rc)
2842{
2843 u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
2844 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
2845
2846 u32 handled = 0;
2847 u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
2848 unsigned int rc_type = RC_TYPE_NO_RC;
2849 u32 pbdma_status_info = 0;
2850
2851 if (pbdma_intr_0) {
2852 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2853 "pbdma id %d intr_0 0x%08x pending",
2854 pbdma_id, pbdma_intr_0);
2855
2856 if (g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, pbdma_intr_0,
2857 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2858 rc_type = RC_TYPE_PBDMA_FAULT;
2859
2860 pbdma_status_info = gk20a_readl(g,
2861 fifo_pbdma_status_r(pbdma_id));
2862 }
2863 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
2864 }
2865
2866 if (pbdma_intr_1) {
2867 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2868 "pbdma id %d intr_1 0x%08x pending",
2869 pbdma_id, pbdma_intr_1);
2870
2871 if (g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, pbdma_intr_1,
2872 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2873 rc_type = RC_TYPE_PBDMA_FAULT;
2874
2875 pbdma_status_info = gk20a_readl(g,
2876 fifo_pbdma_status_r(pbdma_id));
2877 }
2878 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
2879 }
2880
2881 if (rc == RC_YES && rc_type == RC_TYPE_PBDMA_FAULT) {
2882 gk20a_fifo_pbdma_fault_rc(g, f, pbdma_id, error_notifier,
2883 pbdma_status_info);
2884 }
2885
2886 return handled;
2887}
2888
2889static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
2890{
2891 struct fifo_gk20a *f = &g->fifo;
2892 u32 clear_intr = 0, i;
2893 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
2894 u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());
2895
2896 for (i = 0; i < host_num_pbdma; i++) {
2897 if (fifo_intr_pbdma_id_status_v(pbdma_pending, i)) {
2898 nvgpu_log(g, gpu_dbg_intr, "pbdma id %d intr pending", i);
2899 clear_intr |=
2900 gk20a_fifo_handle_pbdma_intr(g, f, i, RC_YES);
2901 }
2902 }
2903 return fifo_intr_0_pbdma_intr_pending_f();
2904}
2905
2906void gk20a_fifo_isr(struct gk20a *g)
2907{
2908 u32 error_intr_mask;
2909 u32 clear_intr = 0;
2910 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2911
2912 error_intr_mask = g->ops.fifo.intr_0_error_mask(g);
2913
2914 if (g->fifo.sw_ready) {
2915 /* note we're not actually in an "isr", but rather
2916 * in a threaded interrupt context... */
2917 nvgpu_mutex_acquire(&g->fifo.intr.isr.mutex);
2918
2919 nvgpu_log(g, gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
2920
2921 /* handle runlist update */
2922 if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
2923 gk20a_fifo_handle_runlist_event(g);
2924 clear_intr |= fifo_intr_0_runlist_event_pending_f();
2925 }
2926 if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f()) {
2927 clear_intr |= fifo_pbdma_isr(g, fifo_intr);
2928 }
2929
2930 if (g->ops.fifo.handle_ctxsw_timeout) {
2931 g->ops.fifo.handle_ctxsw_timeout(g, fifo_intr);
2932 }
2933
2934 if (unlikely((fifo_intr & error_intr_mask) != 0U)) {
2935 clear_intr |= fifo_error_isr(g, fifo_intr);
2936 }
2937
2938 nvgpu_mutex_release(&g->fifo.intr.isr.mutex);
2939 }
2940 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2941
2942 return;
2943}
2944
2945u32 gk20a_fifo_nonstall_isr(struct gk20a *g)
2946{
2947 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2948 u32 clear_intr = 0;
2949
2950 nvgpu_log(g, gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);
2951
2952 if (fifo_intr & fifo_intr_0_channel_intr_pending_f()) {
2953 clear_intr = fifo_intr_0_channel_intr_pending_f();
2954 }
2955
2956 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2957
2958 return GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE;
2959}
2960
2961void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
2962{
2963 if (is_tsg) {
2964 gk20a_writel(g, fifo_preempt_r(),
2965 fifo_preempt_id_f(id) |
2966 fifo_preempt_type_tsg_f());
2967 } else {
2968 gk20a_writel(g, fifo_preempt_r(),
2969 fifo_preempt_chid_f(id) |
2970 fifo_preempt_type_channel_f());
2971 }
2972}
2973
2974static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g)
2975{
2976 /* Use fifo_eng_timeout converted to ms for preempt
2977 * polling. gr_idle_timeout i.e 3000 ms is and not appropriate
2978 * for polling preempt done as context switch timeout gets
2979 * triggered every 100 ms and context switch recovery
2980 * happens every 3000 ms */
2981
2982 return g->fifo_eng_timeout_us / 1000;
2983}
2984
2985int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
2986 unsigned int id_type)
2987{
2988 struct nvgpu_timeout timeout;
2989 u32 delay = GR_IDLE_CHECK_DEFAULT;
2990 int ret = -EBUSY;
2991
2992 nvgpu_timeout_init(g, &timeout, gk20a_fifo_get_preempt_timeout(g),
2993 NVGPU_TIMER_CPU_TIMER);
2994 do {
2995 if (!(gk20a_readl(g, fifo_preempt_r()) &
2996 fifo_preempt_pending_true_f())) {
2997 ret = 0;
2998 break;
2999 }
3000
3001 nvgpu_usleep_range(delay, delay * 2);
3002 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3003 } while (!nvgpu_timeout_expired(&timeout));
3004
3005 if (ret) {
3006 nvgpu_err(g, "preempt timeout: id: %u id_type: %d ",
3007 id, id_type);
3008 }
3009 return ret;
3010}
3011
3012void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3013{
3014 struct channel_gk20a *ch = NULL;
3015
3016 nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
3017
3018 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3019 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3020 channel_gk20a, ch_entry) {
3021 if (!gk20a_channel_get(ch)) {
3022 continue;
3023 }
3024 g->ops.fifo.set_error_notifier(ch,
3025 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3026 gk20a_channel_put(ch);
3027 }
3028 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3029 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
3030}
3031
3032void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
3033{
3034 nvgpu_err(g, "preempt channel %d timeout", ch->chid);
3035
3036 g->ops.fifo.set_error_notifier(ch,
3037 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3038 gk20a_fifo_recover_ch(g, ch, true,
3039 RC_TYPE_PREEMPT_TIMEOUT);
3040}
3041
3042int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
3043{
3044 int ret;
3045 unsigned int id_type;
3046
3047 nvgpu_log_fn(g, "id: %d is_tsg: %d", id, is_tsg);
3048
3049 /* issue preempt */
3050 gk20a_fifo_issue_preempt(g, id, is_tsg);
3051
3052 id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
3053
3054 /* wait for preempt */
3055 ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
3056
3057 return ret;
3058}
3059
3060int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
3061{
3062 struct fifo_gk20a *f = &g->fifo;
3063 u32 ret = 0;
3064 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3065 u32 mutex_ret = 0;
3066 u32 i;
3067
3068 nvgpu_log_fn(g, "chid: %d", ch->chid);
3069
3070 /* we have no idea which runlist we are using. lock all */
3071 for (i = 0; i < g->fifo.max_runlists; i++) {
3072 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3073 }
3074
3075 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3076
3077 ret = __locked_fifo_preempt(g, ch->chid, false);
3078
3079 if (!mutex_ret) {
3080 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3081 }
3082
3083 for (i = 0; i < g->fifo.max_runlists; i++) {
3084 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3085 }
3086
3087 if (ret) {
3088 if (nvgpu_platform_is_silicon(g)) {
3089 nvgpu_err(g, "preempt timed out for chid: %u, "
3090 "ctxsw timeout will trigger recovery if needed",
3091 ch->chid);
3092 } else {
3093 gk20a_fifo_preempt_timeout_rc(g, ch);
3094 }
3095 }
3096
3097 return ret;
3098}
3099
3100int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3101{
3102 struct fifo_gk20a *f = &g->fifo;
3103 u32 ret = 0;
3104 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3105 u32 mutex_ret = 0;
3106 u32 i;
3107
3108 nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
3109
3110 /* we have no idea which runlist we are using. lock all */
3111 for (i = 0; i < g->fifo.max_runlists; i++) {
3112 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3113 }
3114
3115 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3116
3117 ret = __locked_fifo_preempt(g, tsg->tsgid, true);
3118
3119 if (!mutex_ret) {
3120 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3121 }
3122
3123 for (i = 0; i < g->fifo.max_runlists; i++) {
3124 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3125 }
3126
3127 if (ret) {
3128 if (nvgpu_platform_is_silicon(g)) {
3129 nvgpu_err(g, "preempt timed out for tsgid: %u, "
3130 "ctxsw timeout will trigger recovery if needed",
3131 tsg->tsgid);
3132 } else {
3133 gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
3134 }
3135 }
3136
3137 return ret;
3138}
3139
3140int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch)
3141{
3142 int err;
3143 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
3144
3145 if (tsg != NULL) {
3146 err = g->ops.fifo.preempt_tsg(ch->g, tsg);
3147 } else {
3148 err = g->ops.fifo.preempt_channel(ch->g, ch);
3149 }
3150
3151 return err;
3152}
3153
3154static void gk20a_fifo_sched_disable_rw(struct gk20a *g, u32 runlists_mask,
3155 u32 runlist_state)
3156{
3157 u32 reg_val;
3158
3159 reg_val = gk20a_readl(g, fifo_sched_disable_r());
3160
3161 if (runlist_state == RUNLIST_DISABLED) {
3162 reg_val |= runlists_mask;
3163 } else {
3164 reg_val &= (~runlists_mask);
3165 }
3166
3167 gk20a_writel(g, fifo_sched_disable_r(), reg_val);
3168
3169}
3170
3171void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
3172 u32 runlist_state)
3173{
3174 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3175 u32 mutex_ret;
3176
3177 nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
3178 runlists_mask, runlist_state);
3179
3180 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3181
3182 gk20a_fifo_sched_disable_rw(g, runlists_mask, runlist_state);
3183
3184 if (!mutex_ret) {
3185 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3186 }
3187}
3188
3189void gk20a_fifo_enable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3190{
3191 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3192 tsg->runlist_id), RUNLIST_ENABLED);
3193
3194}
3195
3196void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3197{
3198 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3199 tsg->runlist_id), RUNLIST_DISABLED);
3200}
3201
3202int gk20a_fifo_enable_engine_activity(struct gk20a *g,
3203 struct fifo_engine_info_gk20a *eng_info)
3204{
3205 nvgpu_log(g, gpu_dbg_info, "start");
3206
3207 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3208 eng_info->runlist_id), RUNLIST_ENABLED);
3209 return 0;
3210}
3211
3212int gk20a_fifo_enable_all_engine_activity(struct gk20a *g)
3213{
3214 unsigned int i;
3215 int err = 0, ret = 0;
3216
3217 for (i = 0; i < g->fifo.num_engines; i++) {
3218 u32 active_engine_id = g->fifo.active_engines_list[i];
3219 err = gk20a_fifo_enable_engine_activity(g,
3220 &g->fifo.engine_info[active_engine_id]);
3221 if (err) {
3222 nvgpu_err(g,
3223 "failed to enable engine %d activity", active_engine_id);
3224 ret = err;
3225 }
3226 }
3227
3228 return ret;
3229}
3230
3231int gk20a_fifo_disable_engine_activity(struct gk20a *g,
3232 struct fifo_engine_info_gk20a *eng_info,
3233 bool wait_for_idle)
3234{
3235 u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
3236 u32 pbdma_chid = FIFO_INVAL_CHANNEL_ID;
3237 u32 engine_chid = FIFO_INVAL_CHANNEL_ID;
3238 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3239 int mutex_ret;
3240 struct channel_gk20a *ch = NULL;
3241 int err = 0;
3242
3243 nvgpu_log_fn(g, " ");
3244
3245 gr_stat =
3246 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3247 if (fifo_engine_status_engine_v(gr_stat) ==
3248 fifo_engine_status_engine_busy_v() && !wait_for_idle) {
3249 return -EBUSY;
3250 }
3251
3252 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3253
3254 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3255 eng_info->runlist_id), RUNLIST_DISABLED);
3256
3257 /* chid from pbdma status */
3258 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
3259 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
3260 if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
3261 chan_stat == fifo_pbdma_status_chan_status_chsw_save_v()) {
3262 pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
3263 } else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
3264 chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v()) {
3265 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
3266 }
3267
3268 if (pbdma_chid != FIFO_INVAL_CHANNEL_ID) {
3269 ch = gk20a_channel_from_id(g, pbdma_chid);
3270 if (ch != NULL) {
3271 err = g->ops.fifo.preempt_channel(g, ch);
3272 gk20a_channel_put(ch);
3273 }
3274 if (err != 0) {
3275 goto clean_up;
3276 }
3277 }
3278
3279 /* chid from engine status */
3280 eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3281 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
3282 if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
3283 ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v()) {
3284 engine_chid = fifo_engine_status_id_v(eng_stat);
3285 } else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
3286 ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3287 engine_chid = fifo_engine_status_next_id_v(eng_stat);
3288 }
3289
3290 if (engine_chid != FIFO_INVAL_ENGINE_ID && engine_chid != pbdma_chid) {
3291 ch = gk20a_channel_from_id(g, engine_chid);
3292 if (ch != NULL) {
3293 err = g->ops.fifo.preempt_channel(g, ch);
3294 gk20a_channel_put(ch);
3295 }
3296 if (err != 0) {
3297 goto clean_up;
3298 }
3299 }
3300
3301clean_up:
3302 if (!mutex_ret) {
3303 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3304 }
3305
3306 if (err) {
3307 nvgpu_log_fn(g, "failed");
3308 if (gk20a_fifo_enable_engine_activity(g, eng_info)) {
3309 nvgpu_err(g,
3310 "failed to enable gr engine activity");
3311 }
3312 } else {
3313 nvgpu_log_fn(g, "done");
3314 }
3315 return err;
3316}
3317
3318int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
3319 bool wait_for_idle)
3320{
3321 unsigned int i;
3322 int err = 0, ret = 0;
3323 u32 active_engine_id;
3324
3325 for (i = 0; i < g->fifo.num_engines; i++) {
3326 active_engine_id = g->fifo.active_engines_list[i];
3327 err = gk20a_fifo_disable_engine_activity(g,
3328 &g->fifo.engine_info[active_engine_id],
3329 wait_for_idle);
3330 if (err) {
3331 nvgpu_err(g, "failed to disable engine %d activity",
3332 active_engine_id);
3333 ret = err;
3334 break;
3335 }
3336 }
3337
3338 if (err) {
3339 while (i-- != 0) {
3340 active_engine_id = g->fifo.active_engines_list[i];
3341 err = gk20a_fifo_enable_engine_activity(g,
3342 &g->fifo.engine_info[active_engine_id]);
3343 if (err) {
3344 nvgpu_err(g,
3345 "failed to re-enable engine %d activity",
3346 active_engine_id);
3347 }
3348 }
3349 }
3350
3351 return ret;
3352}
3353
3354static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
3355{
3356 struct fifo_gk20a *f = &g->fifo;
3357 u32 engines = 0;
3358 unsigned int i;
3359
3360 for (i = 0; i < f->num_engines; i++) {
3361 u32 active_engine_id = g->fifo.active_engines_list[i];
3362 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
3363 bool engine_busy = fifo_engine_status_engine_v(status) ==
3364 fifo_engine_status_engine_busy_v();
3365
3366 if (engine_busy &&
3367 (f->engine_info[active_engine_id].runlist_id == runlist_id)) {
3368 engines |= BIT(active_engine_id);
3369 }
3370 }
3371
3372 if (engines) {
3373 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true,
3374 RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
3375 }
3376}
3377
3378int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
3379{
3380 struct nvgpu_timeout timeout;
3381 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3382 int ret = -ETIMEDOUT;
3383
3384 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3385 NVGPU_TIMER_CPU_TIMER);
3386
3387 do {
3388 if ((gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
3389 fifo_eng_runlist_pending_true_f()) == 0) {
3390 ret = 0;
3391 break;
3392 }
3393
3394 nvgpu_usleep_range(delay, delay * 2);
3395 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3396 } while (!nvgpu_timeout_expired(&timeout));
3397
3398 if (ret) {
3399 nvgpu_err(g, "runlist wait timeout: runlist id: %u",
3400 runlist_id);
3401 }
3402
3403 return ret;
3404}
3405
3406void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist)
3407{
3408
3409 u32 runlist_entry_0 = ram_rl_entry_id_f(tsg->tsgid) |
3410 ram_rl_entry_type_tsg_f() |
3411 ram_rl_entry_tsg_length_f(tsg->num_active_channels);
3412
3413 if (tsg->timeslice_timeout) {
3414 runlist_entry_0 |=
3415 ram_rl_entry_timeslice_scale_f(tsg->timeslice_scale) |
3416 ram_rl_entry_timeslice_timeout_f(tsg->timeslice_timeout);
3417 } else {
3418 runlist_entry_0 |=
3419 ram_rl_entry_timeslice_scale_f(
3420 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) |
3421 ram_rl_entry_timeslice_timeout_f(
3422 NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT);
3423 }
3424
3425 runlist[0] = runlist_entry_0;
3426 runlist[1] = 0;
3427
3428}
3429
3430u32 gk20a_fifo_default_timeslice_us(struct gk20a *g)
3431{
3432 return (((u64)(NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT <<
3433 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) *
3434 (u64)g->ptimer_src_freq) /
3435 (u64)PTIMER_REF_FREQ_HZ);
3436}
3437
3438void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist)
3439{
3440 runlist[0] = ram_rl_entry_chid_f(ch->chid);
3441 runlist[1] = 0;
3442}
3443
3444/* recursively construct a runlist with interleaved bare channels and TSGs */
3445u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
3446 struct fifo_runlist_info_gk20a *runlist,
3447 u32 cur_level,
3448 u32 *runlist_entry,
3449 bool interleave_enabled,
3450 bool prev_empty,
3451 u32 *entries_left)
3452{
3453 bool last_level = cur_level == NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
3454 struct channel_gk20a *ch;
3455 bool skip_next = false;
3456 u32 tsgid, count = 0;
3457 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3458 struct gk20a *g = f->g;
3459
3460 nvgpu_log_fn(g, " ");
3461
3462 /* for each TSG, T, on this level, insert all higher-level channels
3463 and TSGs before inserting T. */
3464 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
3465 struct tsg_gk20a *tsg = &f->tsg[tsgid];
3466
3467 if (tsg->interleave_level != cur_level) {
3468 continue;
3469 }
3470
3471 if (!last_level && !skip_next) {
3472 runlist_entry = gk20a_runlist_construct_locked(f,
3473 runlist,
3474 cur_level + 1,
3475 runlist_entry,
3476 interleave_enabled,
3477 false,
3478 entries_left);
3479 if (!interleave_enabled) {
3480 skip_next = true;
3481 }
3482 }
3483
3484 if (*entries_left == 0U) {
3485 return NULL;
3486 }
3487
3488 /* add TSG entry */
3489 nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
3490 f->g->ops.fifo.get_tsg_runlist_entry(tsg, runlist_entry);
3491 nvgpu_log_info(g, "tsg runlist count %d runlist [0] %x [1] %x\n",
3492 count, runlist_entry[0], runlist_entry[1]);
3493 runlist_entry += runlist_entry_words;
3494 count++;
3495 (*entries_left)--;
3496
3497 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3498 /* add runnable channels bound to this TSG */
3499 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3500 channel_gk20a, ch_entry) {
3501 if (!test_bit((int)ch->chid,
3502 runlist->active_channels)) {
3503 continue;
3504 }
3505
3506 if (*entries_left == 0U) {
3507 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3508 return NULL;
3509 }
3510
3511 nvgpu_log_info(g, "add channel %d to runlist",
3512 ch->chid);
3513 f->g->ops.fifo.get_ch_runlist_entry(ch, runlist_entry);
3514 nvgpu_log_info(g,
3515 "run list count %d runlist [0] %x [1] %x\n",
3516 count, runlist_entry[0], runlist_entry[1]);
3517 count++;
3518 runlist_entry += runlist_entry_words;
3519 (*entries_left)--;
3520 }
3521 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3522 }
3523
3524 /* append entries from higher level if this level is empty */
3525 if (!count && !last_level) {
3526 runlist_entry = gk20a_runlist_construct_locked(f,
3527 runlist,
3528 cur_level + 1,
3529 runlist_entry,
3530 interleave_enabled,
3531 true,
3532 entries_left);
3533 }
3534
3535 /*
3536 * if previous and this level have entries, append
3537 * entries from higher level.
3538 *
3539 * ex. dropping from MEDIUM to LOW, need to insert HIGH
3540 */
3541 if (interleave_enabled && count && !prev_empty && !last_level) {
3542 runlist_entry = gk20a_runlist_construct_locked(f,
3543 runlist,
3544 cur_level + 1,
3545 runlist_entry,
3546 interleave_enabled,
3547 false,
3548 entries_left);
3549 }
3550 return runlist_entry;
3551}
3552
3553int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
3554 u32 id,
3555 u32 runlist_id,
3556 u32 new_level)
3557{
3558 nvgpu_log_fn(g, " ");
3559
3560 g->fifo.tsg[id].interleave_level = new_level;
3561
3562 return 0;
3563}
3564
3565int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
3566{
3567 struct gk20a *g = tsg->g;
3568
3569 if (timeslice < g->min_timeslice_us ||
3570 timeslice > g->max_timeslice_us) {
3571 return -EINVAL;
3572 }
3573
3574 gk20a_channel_get_timescale_from_timeslice(g, timeslice,
3575 &tsg->timeslice_timeout, &tsg->timeslice_scale);
3576
3577 tsg->timeslice_us = timeslice;
3578
3579 return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
3580}
3581
3582void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
3583 u32 count, u32 buffer_index)
3584{
3585 struct fifo_runlist_info_gk20a *runlist = NULL;
3586 u64 runlist_iova;
3587 u32 val_wrote;
3588 struct nvgpu_os_linux *l;
3589
3590 runlist = &g->fifo.runlist_info[runlist_id];
3591 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]);
3592
3593
3594 if (count != 0) {
3595 printk(KERN_INFO "Runlist base register: %0x\n", fifo_runlist_base_r());
3596 printk(KERN_INFO "Runlist KVA: %px\n", (void*)(runlist->mem[buffer_index].cpu_va));
3597 printk(KERN_INFO "Runlist PA: %px\n", (void*)virt_to_phys((runlist->mem[buffer_index].cpu_va)));
3598 printk(KERN_INFO "Runlist dma_address: %px\n", (void*)(runlist->mem[buffer_index].priv.sgt->sgl->dma_address));
3599 printk(KERN_INFO "Runlist pages KVA: %px\n", (void*)(runlist->mem[buffer_index].priv.pages));
3600 printk(KERN_INFO "Runlist pages PA: %px\n", (void*)virt_to_phys(runlist->mem[buffer_index].priv.pages));
3601 printk(KERN_INFO "Runlist dma_address: %px\n", (void*)(runlist->mem[buffer_index].priv.sgt->sgl->dma_address));
3602 printk(KERN_INFO "Runlist page_to_phys %px + offset %px\n", (void*)(page_to_phys(sg_page(runlist->mem[buffer_index].priv.sgt->sgl))), (void*)(runlist->mem[buffer_index].priv.sgt->sgl->offset));
3603 printk(KERN_INFO "Runlist IOVA: %px\n", (void*)runlist_iova);
3604 printk(KERN_INFO "Using struct gk20* %px\n", g);
3605 printk(KERN_INFO "g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n", g->name, g->power_on, g->sw_ready, g->is_virtual);
3606 printk(KERN_INFO "COHERENT_SYSMEM? %d, iommuable? %d\n", nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM), nvgpu_iommuable(g));
3607 l = container_of(g, struct nvgpu_os_linux, g);
3608 printk(KERN_INFO "l->regs %px\n", l->regs);
3609 gk20a_writel(g, fifo_runlist_base_r(),
3610 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3611 nvgpu_aperture_mask(g, &runlist->mem[buffer_index],
3612 fifo_runlist_base_target_sys_mem_ncoh_f(),
3613 fifo_runlist_base_target_sys_mem_coh_f(),
3614 fifo_runlist_base_target_vid_mem_f()));
3615 val_wrote = nvgpu_readl(g, 0x2270);
3616 printk(KERN_INFO "Wrote runlist base as %0llx\n", (u64)(val_wrote & 0x0fffffff) << 12);
3617 }
3618
3619 gk20a_writel(g, fifo_runlist_r(),
3620 fifo_runlist_engine_f(runlist_id) |
3621 fifo_eng_runlist_length_f(count));
3622}
3623
3624int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3625 u32 chid, bool add,
3626 bool wait_for_finish)
3627{
3628 int ret = 0;
3629 struct fifo_gk20a *f = &g->fifo;
3630 struct fifo_runlist_info_gk20a *runlist = NULL;
3631 u32 *runlist_entry_base = NULL;
3632 u64 runlist_iova;
3633 u32 new_buf;
3634 struct channel_gk20a *ch = NULL;
3635 struct tsg_gk20a *tsg = NULL;
3636 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3637
3638 runlist = &f->runlist_info[runlist_id];
3639
3640 /* valid channel, add/remove it from active list.
3641 Otherwise, keep active list untouched for suspend/resume. */
3642 if (chid != FIFO_INVAL_CHANNEL_ID) {
3643 ch = &f->channel[chid];
3644 tsg = tsg_gk20a_from_ch(ch);
3645
3646 if (add) {
3647 if (test_and_set_bit(chid,
3648 runlist->active_channels) == 1) {
3649 return 0;
3650 }
3651 if (tsg && ++tsg->num_active_channels) {
3652 set_bit((int)f->channel[chid].tsgid,
3653 runlist->active_tsgs);
3654 }
3655 } else {
3656 if (test_and_clear_bit(chid,
3657 runlist->active_channels) == 0) {
3658 return 0;
3659 }
3660 if (tsg && --tsg->num_active_channels == 0) {
3661 clear_bit((int)f->channel[chid].tsgid,
3662 runlist->active_tsgs);
3663 }
3664 }
3665 }
3666
3667 new_buf = !runlist->cur_buffer;
3668
3669 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[new_buf]);
3670
3671 nvgpu_log_info(g, "runlist_id : %d, switch to new buffer 0x%16llx",
3672 runlist_id, (u64)runlist_iova);
3673
3674 if (!runlist_iova) {
3675 ret = -EINVAL;
3676 goto clean_up;
3677 }
3678
3679 runlist_entry_base = runlist->mem[new_buf].cpu_va;
3680 if (!runlist_entry_base) {
3681 ret = -ENOMEM;
3682 goto clean_up;
3683 }
3684
3685 if (chid != FIFO_INVAL_CHANNEL_ID || /* add/remove a valid channel */
3686 add /* resume to add all channels back */) {
3687 u32 max_entries = f->num_runlist_entries;
3688 u32 *runlist_end;
3689
3690 runlist_end = gk20a_runlist_construct_locked(f,
3691 runlist,
3692 0,
3693 runlist_entry_base,
3694 g->runlist_interleave,
3695 true,
3696 &max_entries);
3697 if (!runlist_end) {
3698 ret = -E2BIG;
3699 goto clean_up;
3700 }
3701 runlist->count = (runlist_end - runlist_entry_base) /
3702 runlist_entry_words;
3703 WARN_ON(runlist->count > f->num_runlist_entries);
3704 } else {
3705 /* suspend to remove all channels */
3706 runlist->count = 0;
3707 }
3708
3709 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3710
3711 if (wait_for_finish) {
3712 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
3713
3714 if (ret == -ETIMEDOUT) {
3715 nvgpu_err(g, "runlist %d update timeout", runlist_id);
3716 /* trigger runlist update timeout recovery */
3717 return ret;
3718
3719 } else if (ret == -EINTR) {
3720 nvgpu_err(g, "runlist update interrupted");
3721 }
3722 }
3723
3724 runlist->cur_buffer = new_buf;
3725
3726clean_up:
3727 return ret;
3728}
3729
3730int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 chid,
3731 bool add, bool wait_for_finish)
3732{
3733 u32 ret = -EINVAL;
3734 u32 runlist_id = 0;
3735 u32 errcode;
3736 unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
3737
3738 if (!g) {
3739 goto end;
3740 }
3741
3742 ret = 0;
3743 for_each_set_bit(runlist_id, &ulong_runlist_ids, 32) {
3744 /* Capture the last failure error code */
3745 errcode = g->ops.fifo.update_runlist(g, runlist_id, chid, add, wait_for_finish);
3746 if (errcode) {
3747 nvgpu_err(g,
3748 "failed to update_runlist %d %d", runlist_id, errcode);
3749 ret = errcode;
3750 }
3751 }
3752end:
3753 return ret;
3754}
3755
3756/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3757static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3758 bool wait_preempt)
3759{
3760 struct gk20a *g = ch->g;
3761 struct fifo_runlist_info_gk20a *runlist =
3762 &g->fifo.runlist_info[ch->runlist_id];
3763 int ret = 0;
3764 u32 gr_eng_id = 0;
3765 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3766 u32 preempt_id;
3767 u32 preempt_type = 0;
3768
3769 if (1 != gk20a_fifo_get_engine_ids(
3770 g, &gr_eng_id, 1, ENGINE_GR_GK20A)) {
3771 return ret;
3772 }
3773 if (!(runlist->eng_bitmask & (1 << gr_eng_id))) {
3774 return ret;
3775 }
3776
3777 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3778 fifo_preempt_pending_true_f()) {
3779 return ret;
3780 }
3781
3782 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3783 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3784 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3785 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3786 /* host switching to next context, preempt that if needed */
3787 preempt_id = fifo_engine_status_next_id_v(engstat);
3788 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3789 } else {
3790 return ret;
3791 }
3792 if (preempt_id == ch->tsgid && preempt_type) {
3793 return ret;
3794 }
3795 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3796 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3797 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3798 /* preempt useless if FECS acked save and started restore */
3799 return ret;
3800 }
3801
3802 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3803#ifdef TRACEPOINTS_ENABLED
3804 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3805 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3806 gk20a_readl(g, fifo_preempt_r()));
3807#endif
3808 if (wait_preempt) {
3809 g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type);
3810 }
3811#ifdef TRACEPOINTS_ENABLED
3812 trace_gk20a_reschedule_preempted_next(ch->chid);
3813#endif
3814 return ret;
3815}
3816
3817int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3818{
3819 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3820}
3821
3822/* trigger host to expire current timeslice and reschedule runlist from front */
3823int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3824 bool wait_preempt)
3825{
3826 struct gk20a *g = ch->g;
3827 struct fifo_runlist_info_gk20a *runlist;
3828 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3829 u32 mutex_ret;
3830 int ret = 0;
3831
3832 runlist = &g->fifo.runlist_info[ch->runlist_id];
3833 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {
3834 return -EBUSY;
3835 }
3836
3837 mutex_ret = nvgpu_pmu_mutex_acquire(
3838 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3839
3840 g->ops.fifo.runlist_hw_submit(
3841 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3842
3843 if (preempt_next) {
3844 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3845 }
3846
3847 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3848
3849 if (!mutex_ret) {
3850 nvgpu_pmu_mutex_release(
3851 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3852 }
3853 nvgpu_mutex_release(&runlist->runlist_lock);
3854
3855 return ret;
3856}
3857
3858/* add/remove a channel from runlist
3859 special cases below: runlist->active_channels will NOT be changed.
3860 (chid == ~0 && !add) means remove all active channels from runlist.
3861 (chid == ~0 && add) means restore all active channels on runlist. */
3862int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 chid,
3863 bool add, bool wait_for_finish)
3864{
3865 struct fifo_runlist_info_gk20a *runlist = NULL;
3866 struct fifo_gk20a *f = &g->fifo;
3867 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3868 u32 mutex_ret;
3869 int ret = 0;
3870
3871 nvgpu_log_fn(g, " ");
3872
3873 runlist = &f->runlist_info[runlist_id];
3874
3875 nvgpu_mutex_acquire(&runlist->runlist_lock);
3876
3877 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3878
3879 ret = gk20a_fifo_update_runlist_locked(g, runlist_id, chid, add,
3880 wait_for_finish);
3881
3882 if (!mutex_ret) {
3883 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3884 }
3885
3886 nvgpu_mutex_release(&runlist->runlist_lock);
3887
3888 if (ret == -ETIMEDOUT) {
3889 gk20a_fifo_runlist_reset_engines(g, runlist_id);
3890 }
3891
3892 return ret;
3893}
3894
3895int gk20a_fifo_suspend(struct gk20a *g)
3896{
3897 nvgpu_log_fn(g, " ");
3898
3899 /* stop bar1 snooping */
3900 if (g->ops.mm.is_bar1_supported(g)) {
3901 gk20a_writel(g, fifo_bar1_base_r(),
3902 fifo_bar1_base_valid_false_f());
3903 }
3904
3905 /* disable fifo intr */
3906 gk20a_writel(g, fifo_intr_en_0_r(), 0);
3907 gk20a_writel(g, fifo_intr_en_1_r(), 0);
3908
3909 nvgpu_log_fn(g, "done");
3910 return 0;
3911}
3912
3913bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
3914{
3915 if (gk20a_readl(g, fifo_intr_0_r()) &
3916 fifo_intr_0_mmu_fault_pending_f()) {
3917 return true;
3918 } else {
3919 return false;
3920 }
3921}
3922
3923bool gk20a_fifo_is_engine_busy(struct gk20a *g)
3924{
3925 u32 i, host_num_engines;
3926
3927 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3928
3929 for (i = 0; i < host_num_engines; i++) {
3930 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3931 if (fifo_engine_status_engine_v(status) ==
3932 fifo_engine_status_engine_busy_v()) {
3933 return true;
3934 }
3935 }
3936 return false;
3937}
3938
3939int gk20a_fifo_wait_engine_idle(struct gk20a *g)
3940{
3941 struct nvgpu_timeout timeout;
3942 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3943 int ret = -ETIMEDOUT;
3944 u32 i, host_num_engines;
3945
3946 nvgpu_log_fn(g, " ");
3947
3948 host_num_engines =
3949 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3950
3951 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3952 NVGPU_TIMER_CPU_TIMER);
3953
3954 for (i = 0; i < host_num_engines; i++) {
3955 do {
3956 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3957 if (!fifo_engine_status_engine_v(status)) {
3958 ret = 0;
3959 break;
3960 }
3961
3962 nvgpu_usleep_range(delay, delay * 2);
3963 delay = min_t(unsigned long,
3964 delay << 1, GR_IDLE_CHECK_MAX);
3965 } while (!nvgpu_timeout_expired(&timeout));
3966
3967 if (ret) {
3968 nvgpu_log_info(g, "cannot idle engine %u", i);
3969 break;
3970 }
3971 }
3972
3973 nvgpu_log_fn(g, "done");
3974
3975 return ret;
3976}
3977
3978u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
3979{
3980 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
3981}
3982
3983static const char * const ccsr_chan_status_str[] = {
3984 "idle",
3985 "pending",
3986 "pending_ctx_reload",
3987 "pending_acquire",
3988 "pending_acq_ctx_reload",
3989 "on_pbdma",
3990 "on_pbdma_and_eng",
3991 "on_eng",
3992 "on_eng_pending_acquire",
3993 "on_eng_pending",
3994 "on_pbdma_ctx_reload",
3995 "on_pbdma_and_eng_ctx_reload",
3996 "on_eng_ctx_reload",
3997 "on_eng_pending_ctx_reload",
3998 "on_eng_pending_acq_ctx_reload",
3999};
4000
4001static const char * const pbdma_chan_eng_ctx_status_str[] = {
4002 "invalid",
4003 "valid",
4004 "NA",
4005 "NA",
4006 "NA",
4007 "load",
4008 "save",
4009 "switch",
4010};
4011
4012static const char * const not_found_str[] = {
4013 "NOT FOUND"
4014};
4015
4016const char *gk20a_decode_ccsr_chan_status(u32 index)
4017{
4018 if (index >= ARRAY_SIZE(ccsr_chan_status_str)) {
4019 return not_found_str[0];
4020 } else {
4021 return ccsr_chan_status_str[index];
4022 }
4023}
4024
4025const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index)
4026{
4027 if (index >= ARRAY_SIZE(pbdma_chan_eng_ctx_status_str)) {
4028 return not_found_str[0];
4029 } else {
4030 return pbdma_chan_eng_ctx_status_str[index];
4031 }
4032}
4033
4034bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid)
4035{
4036 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4037
4038 return ccsr_channel_next_v(channel) == ccsr_channel_next_true_v();
4039}
4040
4041bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid)
4042{
4043 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4044 u32 status = ccsr_channel_status_v(channel);
4045
4046 return (status == ccsr_channel_status_pending_ctx_reload_v() ||
4047 status == ccsr_channel_status_pending_acq_ctx_reload_v() ||
4048 status == ccsr_channel_status_on_pbdma_ctx_reload_v() ||
4049 status == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() ||
4050 status == ccsr_channel_status_on_eng_ctx_reload_v() ||
4051 status == ccsr_channel_status_on_eng_pending_ctx_reload_v() ||
4052 status == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v());
4053}
4054
4055void gk20a_dump_channel_status_ramfc(struct gk20a *g,
4056 struct gk20a_debug_output *o,
4057 u32 chid,
4058 struct ch_state *ch_state)
4059{
4060 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4061 u32 status = ccsr_channel_status_v(channel);
4062 u32 syncpointa, syncpointb;
4063 u32 *inst_mem;
4064 struct channel_gk20a *c = g->fifo.channel + chid;
4065 struct nvgpu_semaphore_int *hw_sema = NULL;
4066
4067 if (c->hw_sema) {
4068 hw_sema = c->hw_sema;
4069 }
4070
4071 if (!ch_state) {
4072 return;
4073 }
4074
4075 inst_mem = &ch_state->inst_block[0];
4076
4077 syncpointa = inst_mem[ram_fc_syncpointa_w()];
4078 syncpointb = inst_mem[ram_fc_syncpointb_w()];
4079
4080 gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", chid,
4081 g->name,
4082 ch_state->pid,
4083 ch_state->refs,
4084 ch_state->deterministic ? ", deterministic" : "");
4085 gk20a_debug_output(o, "channel status: %s in use %s %s\n",
4086 ccsr_channel_enable_v(channel) ? "" : "not",
4087 gk20a_decode_ccsr_chan_status(status),
4088 ccsr_channel_busy_v(channel) ? "busy" : "not busy");
4089 gk20a_debug_output(o, "RAMFC : TOP: %016llx PUT: %016llx GET: %016llx "
4090 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
4091 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
4092 (u64)inst_mem[ram_fc_pb_top_level_get_w()] +
4093 ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
4094 (u64)inst_mem[ram_fc_pb_put_w()] +
4095 ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
4096 (u64)inst_mem[ram_fc_pb_get_w()] +
4097 ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
4098 (u64)inst_mem[ram_fc_pb_fetch_w()] +
4099 ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
4100 inst_mem[ram_fc_pb_header_w()],
4101 inst_mem[ram_fc_pb_count_w()],
4102 syncpointa,
4103 syncpointb,
4104 inst_mem[ram_fc_semaphorea_w()],
4105 inst_mem[ram_fc_semaphoreb_w()],
4106 inst_mem[ram_fc_semaphorec_w()],
4107 inst_mem[ram_fc_semaphored_w()]);
4108 if (hw_sema) {
4109 gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
4110 "next_val: 0x%08x addr: 0x%010llx\n",
4111 __nvgpu_semaphore_read(hw_sema),
4112 nvgpu_atomic_read(&hw_sema->next_value),
4113 nvgpu_hw_sema_addr(hw_sema));
4114 }
4115
4116#ifdef CONFIG_TEGRA_GK20A_NVHOST
4117 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
4118 && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
4119 pbdma_syncpointb_wait_switch_en_v()))
4120 gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n",
4121 (status == 3 || status == 8) ? "Waiting" : "Waited",
4122 pbdma_syncpointb_syncpt_index_v(syncpointb),
4123 nvgpu_nvhost_syncpt_get_name(g->nvhost_dev,
4124 pbdma_syncpointb_syncpt_index_v(syncpointb)),
4125 pbdma_syncpointa_payload_v(syncpointa));
4126#endif
4127
4128 gk20a_debug_output(o, "\n");
4129}
4130
4131void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
4132 struct gk20a_debug_output *o)
4133{
4134 struct fifo_gk20a *f = &g->fifo;
4135 u32 chid;
4136 struct ch_state **ch_state;
4137
4138 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
4139 if (!ch_state) {
4140 gk20a_debug_output(o, "cannot alloc memory for channels\n");
4141 return;
4142 }
4143
4144 for (chid = 0; chid < f->num_channels; chid++) {
4145 struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
4146 if (ch != NULL) {
4147 ch_state[chid] =
4148 nvgpu_kmalloc(g, sizeof(struct ch_state) +
4149 ram_in_alloc_size_v());
4150 /* ref taken stays to below loop with
4151 * successful allocs */
4152 if (!ch_state[chid]) {
4153 gk20a_channel_put(ch);
4154 }
4155 }
4156 }
4157
4158 for (chid = 0; chid < f->num_channels; chid++) {
4159 struct channel_gk20a *ch = &f->channel[chid];
4160 if (!ch_state[chid]) {
4161 continue;
4162 }
4163
4164 ch_state[chid]->pid = ch->pid;
4165 ch_state[chid]->refs = nvgpu_atomic_read(&ch->ref_count);
4166 ch_state[chid]->deterministic = ch->deterministic;
4167 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
4168 &ch_state[chid]->inst_block[0],
4169 ram_in_alloc_size_v());
4170 gk20a_channel_put(ch);
4171 }
4172 for (chid = 0; chid < f->num_channels; chid++) {
4173 if (ch_state[chid]) {
4174 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
4175 ch_state[chid]);
4176 nvgpu_kfree(g, ch_state[chid]);
4177 }
4178 }
4179 nvgpu_kfree(g, ch_state);
4180}
4181
4182void gk20a_dump_pbdma_status(struct gk20a *g,
4183 struct gk20a_debug_output *o)
4184{
4185 u32 i, host_num_pbdma;
4186
4187 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
4188
4189 for (i = 0; i < host_num_pbdma; i++) {
4190 u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
4191 u32 chan_status = fifo_pbdma_status_chan_status_v(status);
4192
4193 gk20a_debug_output(o, "%s pbdma %d: ", g->name, i);
4194 gk20a_debug_output(o,
4195 "id: %d (%s), next_id: %d (%s) chan status: %s\n",
4196 fifo_pbdma_status_id_v(status),
4197 fifo_pbdma_status_id_type_v(status) ?
4198 "tsg" : "channel",
4199 fifo_pbdma_status_next_id_v(status),
4200 fifo_pbdma_status_next_id_type_v(status) ?
4201 "tsg" : "channel",
4202 gk20a_decode_pbdma_chan_eng_ctx_status(chan_status));
4203 gk20a_debug_output(o, "PBDMA_PUT: %016llx PBDMA_GET: %016llx "
4204 "GP_PUT: %08x GP_GET: %08x "
4205 "FETCH: %08x HEADER: %08x\n"
4206 "HDR: %08x SHADOW0: %08x SHADOW1: %08x",
4207 (u64)gk20a_readl(g, pbdma_put_r(i)) +
4208 ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
4209 (u64)gk20a_readl(g, pbdma_get_r(i)) +
4210 ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
4211 gk20a_readl(g, pbdma_gp_put_r(i)),
4212 gk20a_readl(g, pbdma_gp_get_r(i)),
4213 gk20a_readl(g, pbdma_gp_fetch_r(i)),
4214 gk20a_readl(g, pbdma_pb_header_r(i)),
4215 gk20a_readl(g, pbdma_hdr_shadow_r(i)),
4216 gk20a_readl(g, pbdma_gp_shadow_0_r(i)),
4217 gk20a_readl(g, pbdma_gp_shadow_1_r(i)));
4218 }
4219 gk20a_debug_output(o, "\n");
4220}
4221
4222void gk20a_dump_eng_status(struct gk20a *g,
4223 struct gk20a_debug_output *o)
4224{
4225 u32 i, host_num_engines;
4226
4227 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
4228
4229 for (i = 0; i < host_num_engines; i++) {
4230 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
4231 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
4232
4233 gk20a_debug_output(o, "%s eng %d: ", g->name, i);
4234 gk20a_debug_output(o,
4235 "id: %d (%s), next_id: %d (%s), ctx status: %s ",
4236 fifo_engine_status_id_v(status),
4237 fifo_engine_status_id_type_v(status) ?
4238 "tsg" : "channel",
4239 fifo_engine_status_next_id_v(status),
4240 fifo_engine_status_next_id_type_v(status) ?
4241 "tsg" : "channel",
4242 gk20a_decode_pbdma_chan_eng_ctx_status(ctx_status));
4243
4244 if (fifo_engine_status_faulted_v(status)) {
4245 gk20a_debug_output(o, "faulted ");
4246 }
4247 if (fifo_engine_status_engine_v(status)) {
4248 gk20a_debug_output(o, "busy ");
4249 }
4250 gk20a_debug_output(o, "\n");
4251 }
4252 gk20a_debug_output(o, "\n");
4253}
4254
4255void gk20a_fifo_enable_channel(struct channel_gk20a *ch)
4256{
4257 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4258 gk20a_readl(ch->g, ccsr_channel_r(ch->chid)) |
4259 ccsr_channel_enable_set_true_f());
4260}
4261
4262void gk20a_fifo_disable_channel(struct channel_gk20a *ch)
4263{
4264 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4265 gk20a_readl(ch->g,
4266 ccsr_channel_r(ch->chid)) |
4267 ccsr_channel_enable_clr_true_f());
4268}
4269
4270void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
4271{
4272 struct gk20a *g = ch_gk20a->g;
4273
4274 nvgpu_log_fn(g, " ");
4275
4276 if (nvgpu_atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
4277 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->chid),
4278 ccsr_channel_inst_ptr_f(0) |
4279 ccsr_channel_inst_bind_false_f());
4280 }
4281}
4282
4283static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
4284{
4285 u32 addr_lo;
4286 u32 addr_hi;
4287 struct gk20a *g = c->g;
4288
4289 nvgpu_log_fn(g, " ");
4290
4291 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
4292 addr_hi = u64_hi32(c->userd_iova);
4293
4294 nvgpu_log_info(g, "channel %d : set ramfc userd 0x%16llx",
4295 c->chid, (u64)c->userd_iova);
4296
4297 nvgpu_mem_wr32(g, &c->inst_block,
4298 ram_in_ramfc_w() + ram_fc_userd_w(),
4299 nvgpu_aperture_mask(g, &g->fifo.userd,
4300 pbdma_userd_target_sys_mem_ncoh_f(),
4301 pbdma_userd_target_sys_mem_coh_f(),
4302 pbdma_userd_target_vid_mem_f()) |
4303 pbdma_userd_addr_f(addr_lo));
4304
4305 nvgpu_mem_wr32(g, &c->inst_block,
4306 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
4307 pbdma_userd_hi_addr_f(addr_hi));
4308
4309 return 0;
4310}
4311
4312int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
4313 u64 gpfifo_base, u32 gpfifo_entries,
4314 unsigned long timeout,
4315 u32 flags)
4316{
4317 struct gk20a *g = c->g;
4318 struct nvgpu_mem *mem = &c->inst_block;
4319
4320 nvgpu_log_fn(g, " ");
4321
4322 nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
4323
4324 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
4325 pbdma_gp_base_offset_f(
4326 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
4327
4328 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
4329 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
4330 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
4331
4332 nvgpu_mem_wr32(g, mem, ram_fc_signature_w(),
4333 c->g->ops.fifo.get_pbdma_signature(c->g));
4334
4335 nvgpu_mem_wr32(g, mem, ram_fc_formats_w(),
4336 pbdma_formats_gp_fermi0_f() |
4337 pbdma_formats_pb_fermi1_f() |
4338 pbdma_formats_mp_fermi0_f());
4339
4340 nvgpu_mem_wr32(g, mem, ram_fc_pb_header_w(),
4341 pbdma_pb_header_priv_user_f() |
4342 pbdma_pb_header_method_zero_f() |
4343 pbdma_pb_header_subchannel_zero_f() |
4344 pbdma_pb_header_level_main_f() |
4345 pbdma_pb_header_first_true_f() |
4346 pbdma_pb_header_type_inc_f());
4347
4348 nvgpu_mem_wr32(g, mem, ram_fc_subdevice_w(),
4349 pbdma_subdevice_id_f(1) |
4350 pbdma_subdevice_status_active_f() |
4351 pbdma_subdevice_channel_dma_enable_f());
4352
4353 nvgpu_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
4354
4355 nvgpu_mem_wr32(g, mem, ram_fc_acquire_w(),
4356 g->ops.fifo.pbdma_acquire_val(timeout));
4357
4358 nvgpu_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
4359 fifo_runlist_timeslice_timeout_128_f() |
4360 fifo_runlist_timeslice_timescale_3_f() |
4361 fifo_runlist_timeslice_enable_true_f());
4362
4363 nvgpu_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
4364 fifo_pb_timeslice_timeout_16_f() |
4365 fifo_pb_timeslice_timescale_0_f() |
4366 fifo_pb_timeslice_enable_true_f());
4367
4368 nvgpu_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->chid));
4369
4370 if (c->is_privileged_channel) {
4371 gk20a_fifo_setup_ramfc_for_privileged_channel(c);
4372 }
4373
4374 return gk20a_fifo_commit_userd(c);
4375}
4376
4377void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
4378{
4379 struct gk20a *g = c->g;
4380 struct nvgpu_mem *mem = &c->inst_block;
4381
4382 nvgpu_log_info(g, "channel %d : set ramfc privileged_channel", c->chid);
4383
4384 /* Enable HCE priv mode for phys mode transfer */
4385 nvgpu_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
4386 pbdma_hce_ctrl_hce_priv_mode_yes_f());
4387}
4388
4389int gk20a_fifo_setup_userd(struct channel_gk20a *c)
4390{
4391 struct gk20a *g = c->g;
4392 struct nvgpu_mem *mem;
4393 u32 offset;
4394
4395 nvgpu_log_fn(g, " ");
4396
4397 if (nvgpu_mem_is_valid(&c->usermode_userd)) {
4398 mem = &c->usermode_userd;
4399 offset = 0;
4400 } else {
4401 mem = &g->fifo.userd;
4402 offset = c->chid * g->fifo.userd_entry_size / sizeof(u32);
4403 }
4404
4405 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
4406 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
4407 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
4408 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
4409 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
4410 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
4411 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
4412 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
4413 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
4414 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
4415
4416 return 0;
4417}
4418
4419int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
4420{
4421 int err;
4422
4423 nvgpu_log_fn(g, " ");
4424
4425 err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
4426 if (err) {
4427 return err;
4428 }
4429
4430 nvgpu_log_info(g, "channel %d inst block physical addr: 0x%16llx",
4431 ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
4432
4433 nvgpu_log_fn(g, "done");
4434 return 0;
4435}
4436
4437void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
4438{
4439 nvgpu_free_inst_block(g, &ch->inst_block);
4440}
4441
4442u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
4443{
4444 return gk20a_bar1_readl(g,
4445 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
4446}
4447
4448u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c)
4449{
4450 u32 lo = gk20a_bar1_readl(g,
4451 c->userd_gpu_va + sizeof(u32) * ram_userd_get_w());
4452 u32 hi = gk20a_bar1_readl(g,
4453 c->userd_gpu_va + sizeof(u32) * ram_userd_get_hi_w());
4454
4455 return ((u64)hi << 32) | lo;
4456}
4457
4458void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
4459{
4460 gk20a_bar1_writel(g,
4461 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
4462 c->gpfifo.put);
4463}
4464
4465u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
4466{
4467 u32 val, exp, man;
4468 unsigned int val_len;
4469
4470 val = pbdma_acquire_retry_man_2_f() |
4471 pbdma_acquire_retry_exp_2_f();
4472
4473 if (!timeout) {
4474 return val;
4475 }
4476
4477 timeout *= 80UL;
4478 do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
4479 timeout *= 1000000UL; /* ms -> ns */
4480 do_div(timeout, 1024); /* in unit of 1024ns */
4481 val_len = fls(timeout >> 32) + 32;
4482 if (val_len == 32) {
4483 val_len = fls(timeout);
4484 }
4485 if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
4486 exp = pbdma_acquire_timeout_exp_max_v();
4487 man = pbdma_acquire_timeout_man_max_v();
4488 } else if (val_len > 16) {
4489 exp = val_len - 16;
4490 man = timeout >> exp;
4491 } else {
4492 exp = 0;
4493 man = timeout;
4494 }
4495
4496 val |= pbdma_acquire_timeout_exp_f(exp) |
4497 pbdma_acquire_timeout_man_f(man) |
4498 pbdma_acquire_timeout_en_enable_f();
4499
4500 return val;
4501}
4502
4503const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
4504{
4505 switch (interleave_level) {
4506 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
4507 return "LOW";
4508
4509 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
4510 return "MEDIUM";
4511
4512 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
4513 return "HIGH";
4514
4515 default:
4516 return "?";
4517 }
4518}
4519
4520u32 gk20a_fifo_get_sema_wait_cmd_size(void)
4521{
4522 return 8;
4523}
4524
4525u32 gk20a_fifo_get_sema_incr_cmd_size(void)
4526{
4527 return 10;
4528}
4529
4530void gk20a_fifo_add_sema_cmd(struct gk20a *g,
4531 struct nvgpu_semaphore *s, u64 sema_va,
4532 struct priv_cmd_entry *cmd,
4533 u32 off, bool acquire, bool wfi)
4534{
4535 nvgpu_log_fn(g, " ");
4536
4537 /* semaphore_a */
4538 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
4539 /* offset_upper */
4540 nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff);
4541 /* semaphore_b */
4542 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
4543 /* offset */
4544 nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff);
4545
4546 if (acquire) {
4547 /* semaphore_c */
4548 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4549 /* payload */
4550 nvgpu_mem_wr32(g, cmd->mem, off++,
4551 nvgpu_semaphore_get_value(s));
4552 /* semaphore_d */
4553 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4554 /* operation: acq_geq, switch_en */
4555 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
4556 } else {
4557 /* semaphore_c */
4558 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4559 /* payload */
4560 nvgpu_mem_wr32(g, cmd->mem, off++,
4561 nvgpu_semaphore_get_value(s));
4562 /* semaphore_d */
4563 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4564 /* operation: release, wfi */
4565 nvgpu_mem_wr32(g, cmd->mem, off++,
4566 0x2 | ((wfi ? 0x0 : 0x1) << 20));
4567 /* non_stall_int */
4568 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
4569 /* ignored */
4570 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4571 }
4572}
4573
4574#ifdef CONFIG_TEGRA_GK20A_NVHOST
4575void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
4576 struct priv_cmd_entry *cmd, u32 off,
4577 u32 id, u32 thresh, u64 gpu_va)
4578{
4579 nvgpu_log_fn(g, " ");
4580
4581 off = cmd->off + off;
4582 /* syncpoint_a */
4583 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4584 /* payload */
4585 nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
4586 /* syncpoint_b */
4587 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4588 /* syncpt_id, switch_en, wait */
4589 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
4590}
4591
4592u32 gk20a_fifo_get_syncpt_wait_cmd_size(void)
4593{
4594 return 4;
4595}
4596
4597u32 gk20a_fifo_get_syncpt_incr_per_release(void)
4598{
4599 return 2;
4600}
4601
4602void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
4603 bool wfi_cmd, struct priv_cmd_entry *cmd,
4604 u32 id, u64 gpu_va)
4605{
4606 u32 off = cmd->off;
4607
4608 nvgpu_log_fn(g, " ");
4609 if (wfi_cmd) {
4610 /* wfi */
4611 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001E);
4612 /* handle, ignored */
4613 nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000);
4614 }
4615 /* syncpoint_a */
4616 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4617 /* payload, ignored */
4618 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4619 /* syncpoint_b */
4620 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4621 /* syncpt_id, incr */
4622 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4623 /* syncpoint_b */
4624 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4625 /* syncpt_id, incr */
4626 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4627
4628}
4629
4630u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd)
4631{
4632 if (wfi_cmd)
4633 return 8;
4634 else
4635 return 6;
4636}
4637
4638void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
4639 struct nvgpu_mem *syncpt_buf)
4640{
4641
4642}
4643
4644int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
4645 u32 syncpt_id, struct nvgpu_mem *syncpt_buf)
4646{
4647 return 0;
4648}
4649#endif