aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-25 16:09:09 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-25 16:09:09 -0400
commitf347fde22f1297e4f022600d201780d5ead78114 (patch)
tree76be305d6187003a1e0486ff6e91efb1062ae118 /include/gk20a/fifo_gk20a.c
parent8340d234d78a7d0f46c11a584de538148b78b7cb (diff)
Delete no-longer-needed nvgpu headersHEADmasterjbakita-wip
The dependency on these was removed in commit 8340d234.
Diffstat (limited to 'include/gk20a/fifo_gk20a.c')
-rw-r--r--include/gk20a/fifo_gk20a.c4641
1 files changed, 0 insertions, 4641 deletions
diff --git a/include/gk20a/fifo_gk20a.c b/include/gk20a/fifo_gk20a.c
deleted file mode 100644
index 77babc7..0000000
--- a/include/gk20a/fifo_gk20a.c
+++ /dev/null
@@ -1,4641 +0,0 @@
1/*
2 * GK20A Graphics FIFO (gr host)
3 *
4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26
27#include <nvgpu/mm.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/timers.h>
30#include <nvgpu/semaphore.h>
31#include <nvgpu/enabled.h>
32#include <nvgpu/kmem.h>
33#include <nvgpu/log.h>
34#include <nvgpu/soc.h>
35#include <nvgpu/atomic.h>
36#include <nvgpu/bug.h>
37#include <nvgpu/log2.h>
38#include <nvgpu/debug.h>
39#include <nvgpu/nvhost.h>
40#include <nvgpu/barrier.h>
41#include <nvgpu/ctxsw_trace.h>
42#include <nvgpu/error_notifier.h>
43#include <nvgpu/ptimer.h>
44#include <nvgpu/io.h>
45#include <nvgpu/utils.h>
46#include <nvgpu/channel.h>
47#include <nvgpu/unit.h>
48#include <nvgpu/power_features/power_features.h>
49#include <nvgpu/power_features/cg.h>
50
51#include "gk20a.h"
52#include "mm_gk20a.h"
53
54#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
55#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
56#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
57#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
58#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
59#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
60
61#define FECS_METHOD_WFI_RESTORE 0x80000
62#define FECS_MAILBOX_0_ACK_RESTORE 0x4
63
64
65static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
66
67static const char *const pbdma_intr_fault_type_desc[] = {
68 "MEMREQ timeout", "MEMACK_TIMEOUT", "MEMACK_EXTRA acks",
69 "MEMDAT_TIMEOUT", "MEMDAT_EXTRA acks", "MEMFLUSH noack",
70 "MEMOP noack", "LBCONNECT noack", "NONE - was LBREQ",
71 "LBACK_TIMEOUT", "LBACK_EXTRA acks", "LBDAT_TIMEOUT",
72 "LBDAT_EXTRA acks", "GPFIFO won't fit", "GPPTR invalid",
73 "GPENTRY invalid", "GPCRC mismatch", "PBPTR get>put",
74 "PBENTRY invld", "PBCRC mismatch", "NONE - was XBARC",
75 "METHOD invld", "METHODCRC mismat", "DEVICE sw method",
76 "[ENGINE]", "SEMAPHORE invlid", "ACQUIRE timeout",
77 "PRI forbidden", "ILLEGAL SYNCPT", "[NO_CTXSW_SEG]",
78 "PBSEG badsplit", "SIGNATURE bad"
79};
80
81u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
82 u32 engine_id[], u32 engine_id_sz,
83 u32 engine_enum)
84{
85 struct fifo_gk20a *f = NULL;
86 u32 instance_cnt = 0;
87 u32 engine_id_idx;
88 u32 active_engine_id = 0;
89 struct fifo_engine_info_gk20a *info = NULL;
90
91 if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) {
92 f = &g->fifo;
93 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
94 active_engine_id = f->active_engines_list[engine_id_idx];
95 info = &f->engine_info[active_engine_id];
96
97 if (info->engine_enum == engine_enum) {
98 if (instance_cnt < engine_id_sz) {
99 engine_id[instance_cnt] = active_engine_id;
100 ++instance_cnt;
101 } else {
102 nvgpu_log_info(g, "warning engine_id table sz is small %d",
103 engine_id_sz);
104 }
105 }
106 }
107 }
108 return instance_cnt;
109}
110
111struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id)
112{
113 struct fifo_gk20a *f = NULL;
114 u32 engine_id_idx;
115 struct fifo_engine_info_gk20a *info = NULL;
116
117 if (!g) {
118 return info;
119 }
120
121 f = &g->fifo;
122
123 if (engine_id < f->max_engines) {
124 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
125 if (engine_id == f->active_engines_list[engine_id_idx]) {
126 info = &f->engine_info[engine_id];
127 break;
128 }
129 }
130 }
131
132 if (!info) {
133 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
134 }
135
136 return info;
137}
138
139bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id)
140{
141 struct fifo_gk20a *f = NULL;
142 u32 engine_id_idx;
143 bool valid = false;
144
145 if (!g) {
146 return valid;
147 }
148
149 f = &g->fifo;
150
151 if (engine_id < f->max_engines) {
152 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
153 if (engine_id == f->active_engines_list[engine_id_idx]) {
154 valid = true;
155 break;
156 }
157 }
158 }
159
160 if (!valid) {
161 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
162 }
163
164 return valid;
165}
166
167u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g)
168{
169 u32 gr_engine_cnt = 0;
170 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
171
172 /* Consider 1st available GR engine */
173 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
174 1, ENGINE_GR_GK20A);
175
176 if (!gr_engine_cnt) {
177 nvgpu_err(g, "No GR engine available on this device!");
178 }
179
180 return gr_engine_id;
181}
182
183u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
184{
185 u32 reset_mask = 0;
186 u32 engine_enum = ENGINE_INVAL_GK20A;
187 struct fifo_gk20a *f = NULL;
188 u32 engine_id_idx;
189 struct fifo_engine_info_gk20a *engine_info;
190 u32 active_engine_id = 0;
191
192 if (!g) {
193 return reset_mask;
194 }
195
196 f = &g->fifo;
197
198 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
199 active_engine_id = f->active_engines_list[engine_id_idx];
200 engine_info = &f->engine_info[active_engine_id];
201 engine_enum = engine_info->engine_enum;
202
203 if ((engine_enum == ENGINE_GRCE_GK20A) ||
204 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
205 reset_mask |= engine_info->reset_mask;
206 }
207 }
208
209 return reset_mask;
210}
211
212u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g)
213{
214 u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g);
215 u32 engine_enum = ENGINE_INVAL_GK20A;
216 struct fifo_gk20a *f = NULL;
217 u32 engine_id_idx;
218 struct fifo_engine_info_gk20a *engine_info;
219 u32 active_engine_id = 0;
220
221 if (!g) {
222 return ce_runlist_id;
223 }
224
225 f = &g->fifo;
226
227 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
228 active_engine_id = f->active_engines_list[engine_id_idx];
229 engine_info = &f->engine_info[active_engine_id];
230 engine_enum = engine_info->engine_enum;
231
232 /* selecet last available ASYNC_CE if available */
233 if (engine_enum == ENGINE_ASYNC_CE_GK20A) {
234 ce_runlist_id = engine_info->runlist_id;
235 }
236 }
237
238 return ce_runlist_id;
239}
240
241u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
242{
243 u32 gr_engine_cnt = 0;
244 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
245 struct fifo_engine_info_gk20a *engine_info;
246 u32 gr_runlist_id = ~0;
247
248 /* Consider 1st available GR engine */
249 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
250 1, ENGINE_GR_GK20A);
251
252 if (!gr_engine_cnt) {
253 nvgpu_err(g,
254 "No GR engine available on this device!");
255 goto end;
256 }
257
258 engine_info = gk20a_fifo_get_engine_info(g, gr_engine_id);
259
260 if (engine_info) {
261 gr_runlist_id = engine_info->runlist_id;
262 } else {
263 nvgpu_err(g,
264 "gr_engine_id is not in active list/invalid %d", gr_engine_id);
265 }
266
267end:
268 return gr_runlist_id;
269}
270
271bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
272{
273 struct fifo_gk20a *f = NULL;
274 u32 engine_id_idx;
275 u32 active_engine_id;
276 struct fifo_engine_info_gk20a *engine_info;
277
278 if (!g) {
279 return false;
280 }
281
282 f = &g->fifo;
283
284 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
285 active_engine_id = f->active_engines_list[engine_id_idx];
286 engine_info = gk20a_fifo_get_engine_info(g, active_engine_id);
287 if (engine_info && (engine_info->runlist_id == runlist_id)) {
288 return true;
289 }
290 }
291
292 return false;
293}
294
295/*
296 * Link engine IDs to MMU IDs and vice versa.
297 */
298
299static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
300{
301 u32 fault_id = FIFO_INVAL_ENGINE_ID;
302 struct fifo_engine_info_gk20a *engine_info;
303
304 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
305
306 if (engine_info) {
307 fault_id = engine_info->fault_id;
308 } else {
309 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
310 }
311 return fault_id;
312}
313
314static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
315{
316 u32 engine_id;
317 u32 active_engine_id;
318 struct fifo_engine_info_gk20a *engine_info;
319 struct fifo_gk20a *f = &g->fifo;
320
321 for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
322 active_engine_id = f->active_engines_list[engine_id];
323 engine_info = &g->fifo.engine_info[active_engine_id];
324
325 if (engine_info->fault_id == fault_id) {
326 break;
327 }
328 active_engine_id = FIFO_INVAL_ENGINE_ID;
329 }
330 return active_engine_id;
331}
332
333int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
334 u32 *inst_id)
335{
336 int ret = ENGINE_INVAL_GK20A;
337
338 nvgpu_log_info(g, "engine type %d", engine_type);
339 if (engine_type == top_device_info_type_enum_graphics_v()) {
340 ret = ENGINE_GR_GK20A;
341 } else if ((engine_type >= top_device_info_type_enum_copy0_v()) &&
342 (engine_type <= top_device_info_type_enum_copy2_v())) {
343 /* Lets consider all the CE engine have separate runlist at this point
344 * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id
345 * comparsion logic with GR runlist_id in init_engine_info() */
346 ret = ENGINE_ASYNC_CE_GK20A;
347 /* inst_id starts from CE0 to CE2 */
348 if (inst_id) {
349 *inst_id = (engine_type - top_device_info_type_enum_copy0_v());
350 }
351 }
352
353 return ret;
354}
355
356int gk20a_fifo_init_engine_info(struct fifo_gk20a *f)
357{
358 struct gk20a *g = f->g;
359 u32 i;
360 u32 max_info_entries = top_device_info__size_1_v();
361 u32 engine_enum = ENGINE_INVAL_GK20A;
362 u32 engine_id = FIFO_INVAL_ENGINE_ID;
363 u32 runlist_id = ~0;
364 u32 pbdma_id = ~0;
365 u32 intr_id = ~0;
366 u32 reset_id = ~0;
367 u32 inst_id = 0;
368 u32 pri_base = 0;
369 u32 fault_id = 0;
370 u32 gr_runlist_id = ~0;
371 bool found_pbdma_for_runlist = false;
372
373 nvgpu_log_fn(g, " ");
374
375 f->num_engines = 0;
376
377 for (i = 0; i < max_info_entries; i++) {
378 u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
379 u32 entry = top_device_info_entry_v(table_entry);
380 u32 runlist_bit;
381
382 if (entry == top_device_info_entry_enum_v()) {
383 if (top_device_info_engine_v(table_entry)) {
384 engine_id =
385 top_device_info_engine_enum_v(table_entry);
386 nvgpu_log_info(g, "info: engine_id %d",
387 top_device_info_engine_enum_v(table_entry));
388 }
389
390
391 if (top_device_info_runlist_v(table_entry)) {
392 runlist_id =
393 top_device_info_runlist_enum_v(table_entry);
394 nvgpu_log_info(g, "gr info: runlist_id %d", runlist_id);
395
396 runlist_bit = BIT(runlist_id);
397
398 found_pbdma_for_runlist = false;
399 for (pbdma_id = 0; pbdma_id < f->num_pbdma;
400 pbdma_id++) {
401 if (f->pbdma_map[pbdma_id] &
402 runlist_bit) {
403 nvgpu_log_info(g,
404 "gr info: pbdma_map[%d]=%d",
405 pbdma_id,
406 f->pbdma_map[pbdma_id]);
407 found_pbdma_for_runlist = true;
408 break;
409 }
410 }
411
412 if (!found_pbdma_for_runlist) {
413 nvgpu_err(g, "busted pbdma map");
414 return -EINVAL;
415 }
416 }
417
418 if (top_device_info_intr_v(table_entry)) {
419 intr_id =
420 top_device_info_intr_enum_v(table_entry);
421 nvgpu_log_info(g, "gr info: intr_id %d", intr_id);
422 }
423
424 if (top_device_info_reset_v(table_entry)) {
425 reset_id =
426 top_device_info_reset_enum_v(table_entry);
427 nvgpu_log_info(g, "gr info: reset_id %d",
428 reset_id);
429 }
430 } else if (entry == top_device_info_entry_engine_type_v()) {
431 u32 engine_type =
432 top_device_info_type_enum_v(table_entry);
433 engine_enum =
434 g->ops.fifo.engine_enum_from_type(g,
435 engine_type, &inst_id);
436 } else if (entry == top_device_info_entry_data_v()) {
437 /* gk20a doesn't support device_info_data packet parsing */
438 if (g->ops.fifo.device_info_data_parse) {
439 g->ops.fifo.device_info_data_parse(g,
440 table_entry, &inst_id, &pri_base,
441 &fault_id);
442 }
443 }
444
445 if (!top_device_info_chain_v(table_entry)) {
446 if (engine_enum < ENGINE_INVAL_GK20A) {
447 struct fifo_engine_info_gk20a *info =
448 &g->fifo.engine_info[engine_id];
449
450 info->intr_mask |= BIT(intr_id);
451 info->reset_mask |= BIT(reset_id);
452 info->runlist_id = runlist_id;
453 info->pbdma_id = pbdma_id;
454 info->inst_id = inst_id;
455 info->pri_base = pri_base;
456
457 if (engine_enum == ENGINE_GR_GK20A) {
458 gr_runlist_id = runlist_id;
459 }
460
461 /* GR and GR_COPY shares same runlist_id */
462 if ((engine_enum == ENGINE_ASYNC_CE_GK20A) &&
463 (gr_runlist_id == runlist_id)) {
464 engine_enum = ENGINE_GRCE_GK20A;
465 }
466
467 info->engine_enum = engine_enum;
468
469 if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A)) {
470 fault_id = 0x1b;
471 }
472 info->fault_id = fault_id;
473
474 /* engine_id starts from 0 to NV_HOST_NUM_ENGINES */
475 f->active_engines_list[f->num_engines] = engine_id;
476
477 ++f->num_engines;
478
479 engine_enum = ENGINE_INVAL_GK20A;
480 }
481 }
482 }
483
484 return 0;
485}
486
487u32 gk20a_fifo_act_eng_interrupt_mask(struct gk20a *g, u32 act_eng_id)
488{
489 struct fifo_engine_info_gk20a *engine_info = NULL;
490
491 engine_info = gk20a_fifo_get_engine_info(g, act_eng_id);
492 if (engine_info) {
493 return engine_info->intr_mask;
494 }
495
496 return 0;
497}
498
499u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
500{
501 u32 eng_intr_mask = 0;
502 unsigned int i;
503 u32 active_engine_id = 0;
504 u32 engine_enum = ENGINE_INVAL_GK20A;
505
506 for (i = 0; i < g->fifo.num_engines; i++) {
507 u32 intr_mask;
508 active_engine_id = g->fifo.active_engines_list[i];
509 intr_mask = g->fifo.engine_info[active_engine_id].intr_mask;
510 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
511 if (((engine_enum == ENGINE_GRCE_GK20A) ||
512 (engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
513 (!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall)) {
514 continue;
515 }
516
517 eng_intr_mask |= intr_mask;
518 }
519
520 return eng_intr_mask;
521}
522
523void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
524{
525 u32 i;
526 u32 runlist_id;
527 struct fifo_runlist_info_gk20a *runlist;
528 struct gk20a *g = NULL;
529
530 if (!f || !f->runlist_info) {
531 return;
532 }
533
534 g = f->g;
535
536 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
537 runlist = &f->runlist_info[runlist_id];
538 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
539 nvgpu_dma_free(g, &runlist->mem[i]);
540 }
541
542 nvgpu_kfree(g, runlist->active_channels);
543 runlist->active_channels = NULL;
544
545 nvgpu_kfree(g, runlist->active_tsgs);
546 runlist->active_tsgs = NULL;
547
548 nvgpu_mutex_destroy(&runlist->runlist_lock);
549
550 }
551 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
552 f->max_runlists));
553
554 nvgpu_kfree(g, f->runlist_info);
555 f->runlist_info = NULL;
556 f->max_runlists = 0;
557}
558
559static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
560{
561 struct gk20a *g = f->g;
562 unsigned int i = 0;
563
564 nvgpu_log_fn(g, " ");
565
566 nvgpu_channel_worker_deinit(g);
567 /*
568 * Make sure all channels are closed before deleting them.
569 */
570 for (; i < f->num_channels; i++) {
571 struct channel_gk20a *c = f->channel + i;
572 struct tsg_gk20a *tsg = f->tsg + i;
573
574 /*
575 * Could race but worst that happens is we get an error message
576 * from gk20a_free_channel() complaining about multiple closes.
577 */
578 if (c->referenceable) {
579 __gk20a_channel_kill(c);
580 }
581
582 nvgpu_mutex_destroy(&tsg->event_id_list_lock);
583
584 nvgpu_mutex_destroy(&c->ioctl_lock);
585 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
586 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
587 nvgpu_mutex_destroy(&c->sync_lock);
588#if defined(CONFIG_GK20A_CYCLE_STATS)
589 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
590 nvgpu_mutex_destroy(&c->cs_client_mutex);
591#endif
592 nvgpu_mutex_destroy(&c->dbg_s_lock);
593
594 }
595
596 nvgpu_vfree(g, f->channel);
597 nvgpu_vfree(g, f->tsg);
598 if (g->ops.mm.is_bar1_supported(g)) {
599 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
600 } else {
601 nvgpu_dma_free(g, &f->userd);
602 }
603
604 gk20a_fifo_delete_runlist(f);
605
606 nvgpu_kfree(g, f->pbdma_map);
607 f->pbdma_map = NULL;
608 nvgpu_kfree(g, f->engine_info);
609 f->engine_info = NULL;
610 nvgpu_kfree(g, f->active_engines_list);
611 f->active_engines_list = NULL;
612}
613
614/* reads info from hardware and fills in pbmda exception info record */
615static inline void get_exception_pbdma_info(
616 struct gk20a *g,
617 struct fifo_engine_info_gk20a *eng_info)
618{
619 struct fifo_pbdma_exception_info_gk20a *e =
620 &eng_info->pbdma_exception_info;
621
622 u32 pbdma_status_r = e->status_r = gk20a_readl(g,
623 fifo_pbdma_status_r(eng_info->pbdma_id));
624 e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
625 e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
626 fifo_pbdma_status_id_type_chid_v();
627 e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r);
628 e->next_id_is_chid =
629 fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
630 fifo_pbdma_status_next_id_type_chid_v();
631 e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
632 e->chsw_in_progress =
633 fifo_pbdma_status_chsw_v(pbdma_status_r) ==
634 fifo_pbdma_status_chsw_in_progress_v();
635}
636
637static void fifo_pbdma_exception_status(struct gk20a *g,
638 struct fifo_engine_info_gk20a *eng_info)
639{
640 struct fifo_pbdma_exception_info_gk20a *e;
641 get_exception_pbdma_info(g, eng_info);
642 e = &eng_info->pbdma_exception_info;
643
644 nvgpu_log_fn(g, "pbdma_id %d, "
645 "id_type %s, id %d, chan_status %d, "
646 "next_id_type %s, next_id %d, "
647 "chsw_in_progress %d",
648 eng_info->pbdma_id,
649 e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
650 e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
651 e->chsw_in_progress);
652}
653
654/* reads info from hardware and fills in pbmda exception info record */
655static inline void get_exception_engine_info(
656 struct gk20a *g,
657 struct fifo_engine_info_gk20a *eng_info)
658{
659 struct fifo_engine_exception_info_gk20a *e =
660 &eng_info->engine_exception_info;
661 u32 engine_status_r = e->status_r =
662 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
663 e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
664 e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
665 fifo_engine_status_id_type_chid_v();
666 e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
667 e->faulted =
668 fifo_engine_status_faulted_v(engine_status_r) ==
669 fifo_engine_status_faulted_true_v();
670 e->idle =
671 fifo_engine_status_engine_v(engine_status_r) ==
672 fifo_engine_status_engine_idle_v();
673 e->ctxsw_in_progress =
674 fifo_engine_status_ctxsw_v(engine_status_r) ==
675 fifo_engine_status_ctxsw_in_progress_v();
676}
677
678static void fifo_engine_exception_status(struct gk20a *g,
679 struct fifo_engine_info_gk20a *eng_info)
680{
681 struct fifo_engine_exception_info_gk20a *e;
682 get_exception_engine_info(g, eng_info);
683 e = &eng_info->engine_exception_info;
684
685 nvgpu_log_fn(g, "engine_id %d, id_type %s, id %d, ctx_status %d, "
686 "faulted %d, idle %d, ctxsw_in_progress %d, ",
687 eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
688 e->id, e->ctx_status_v,
689 e->faulted, e->idle, e->ctxsw_in_progress);
690}
691
692static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
693{
694 struct fifo_runlist_info_gk20a *runlist;
695 struct fifo_engine_info_gk20a *engine_info;
696 unsigned int runlist_id;
697 u32 i;
698 size_t runlist_size;
699 u32 active_engine_id, pbdma_id, engine_id;
700 int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ?
701 NVGPU_DMA_FORCE_CONTIGUOUS : 0;
702 int err = 0;
703
704 nvgpu_log_fn(g, " ");
705
706 f->max_runlists = g->ops.fifo.eng_runlist_base_size();
707 f->runlist_info = nvgpu_kzalloc(g,
708 sizeof(struct fifo_runlist_info_gk20a) *
709 f->max_runlists);
710 if (!f->runlist_info) {
711 goto clean_up_runlist;
712 }
713
714 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
715 f->max_runlists));
716
717 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
718 runlist = &f->runlist_info[runlist_id];
719
720 runlist->active_channels =
721 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
722 BITS_PER_BYTE));
723 if (!runlist->active_channels) {
724 goto clean_up_runlist;
725 }
726
727 runlist->active_tsgs =
728 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
729 BITS_PER_BYTE));
730 if (!runlist->active_tsgs) {
731 goto clean_up_runlist;
732 }
733
734 runlist_size = f->runlist_entry_size * f->num_runlist_entries;
735 nvgpu_log(g, gpu_dbg_info,
736 "runlist_entries %d runlist size %zu",
737 f->num_runlist_entries, runlist_size);
738
739 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
740 err = nvgpu_dma_alloc_flags_sys(g, flags,
741 runlist_size,
742 &runlist->mem[i]);
743 if (err) {
744 nvgpu_err(g, "memory allocation failed");
745 goto clean_up_runlist;
746 }
747 }
748
749 err = nvgpu_mutex_init(&runlist->runlist_lock);
750 if (err != 0) {
751 nvgpu_err(g,
752 "Error in runlist_lock mutex initialization");
753 goto clean_up_runlist;
754 }
755
756 /* None of buffers is pinned if this value doesn't change.
757 Otherwise, one of them (cur_buffer) must have been pinned. */
758 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
759
760 for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
761 if (f->pbdma_map[pbdma_id] & BIT(runlist_id)) {
762 runlist->pbdma_bitmask |= BIT(pbdma_id);
763 }
764 }
765 nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
766 runlist_id, runlist->pbdma_bitmask);
767
768 for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
769 active_engine_id = f->active_engines_list[engine_id];
770 engine_info = &f->engine_info[active_engine_id];
771
772 if (engine_info && engine_info->runlist_id == runlist_id) {
773 runlist->eng_bitmask |= BIT(active_engine_id);
774 }
775 }
776 nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
777 runlist_id, runlist->eng_bitmask);
778 }
779
780 nvgpu_log_fn(g, "done");
781 return 0;
782
783clean_up_runlist:
784 gk20a_fifo_delete_runlist(f);
785 nvgpu_log_fn(g, "fail");
786 return err;
787}
788
789u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g)
790{
791 u32 intr_0_error_mask =
792 fifo_intr_0_bind_error_pending_f() |
793 fifo_intr_0_sched_error_pending_f() |
794 fifo_intr_0_chsw_error_pending_f() |
795 fifo_intr_0_fb_flush_timeout_pending_f() |
796 fifo_intr_0_dropped_mmu_fault_pending_f() |
797 fifo_intr_0_mmu_fault_pending_f() |
798 fifo_intr_0_lb_error_pending_f() |
799 fifo_intr_0_pio_error_pending_f();
800
801 return intr_0_error_mask;
802}
803
804static u32 gk20a_fifo_intr_0_en_mask(struct gk20a *g)
805{
806 u32 intr_0_en_mask;
807
808 intr_0_en_mask = g->ops.fifo.intr_0_error_mask(g);
809
810 intr_0_en_mask |= fifo_intr_0_runlist_event_pending_f() |
811 fifo_intr_0_pbdma_intr_pending_f();
812
813 return intr_0_en_mask;
814}
815
816int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
817{
818 u32 intr_stall;
819 u32 mask;
820 u32 timeout;
821 unsigned int i;
822 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
823
824 nvgpu_log_fn(g, " ");
825
826 /* enable pmc pfifo */
827 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_FIFO));
828
829 nvgpu_cg_slcg_fifo_load_enable(g);
830
831 nvgpu_cg_blcg_fifo_load_enable(g);
832
833 timeout = gk20a_readl(g, fifo_fb_timeout_r());
834 timeout = set_field(timeout, fifo_fb_timeout_period_m(),
835 fifo_fb_timeout_period_max_f());
836 nvgpu_log_info(g, "fifo_fb_timeout reg val = 0x%08x", timeout);
837 gk20a_writel(g, fifo_fb_timeout_r(), timeout);
838
839 /* write pbdma timeout value */
840 for (i = 0; i < host_num_pbdma; i++) {
841 timeout = gk20a_readl(g, pbdma_timeout_r(i));
842 timeout = set_field(timeout, pbdma_timeout_period_m(),
843 pbdma_timeout_period_max_f());
844 nvgpu_log_info(g, "pbdma_timeout reg val = 0x%08x", timeout);
845 gk20a_writel(g, pbdma_timeout_r(i), timeout);
846 }
847 if (g->ops.fifo.apply_pb_timeout) {
848 g->ops.fifo.apply_pb_timeout(g);
849 }
850
851 if (g->ops.fifo.apply_ctxsw_timeout_intr) {
852 g->ops.fifo.apply_ctxsw_timeout_intr(g);
853 } else {
854 timeout = g->fifo_eng_timeout_us;
855 timeout = scale_ptimer(timeout,
856 ptimer_scalingfactor10x(g->ptimer_src_freq));
857 timeout |= fifo_eng_timeout_detection_enabled_f();
858 gk20a_writel(g, fifo_eng_timeout_r(), timeout);
859 }
860
861 /* clear and enable pbdma interrupt */
862 for (i = 0; i < host_num_pbdma; i++) {
863 gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
864 gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
865
866 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
867 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
868 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
869 nvgpu_log_info(g, "pbdma id:%u, intr_en_0 0x%08x", i, intr_stall);
870 gk20a_writel(g, pbdma_intr_en_0_r(i), intr_stall);
871 intr_stall = gk20a_readl(g, pbdma_intr_stall_1_r(i));
872 /*
873 * For bug 2082123
874 * Mask the unused HCE_RE_ILLEGAL_OP bit from the interrupt.
875 */
876 intr_stall &= ~pbdma_intr_stall_1_hce_illegal_op_enabled_f();
877 nvgpu_log_info(g, "pbdma id:%u, intr_en_1 0x%08x", i, intr_stall);
878 gk20a_writel(g, pbdma_intr_en_1_r(i), intr_stall);
879 }
880
881 /* reset runlist interrupts */
882 gk20a_writel(g, fifo_intr_runlist_r(), ~0);
883
884 /* clear and enable pfifo interrupt */
885 gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
886 mask = gk20a_fifo_intr_0_en_mask(g);
887 nvgpu_log_info(g, "fifo_intr_en_0 0x%08x", mask);
888 gk20a_writel(g, fifo_intr_en_0_r(), mask);
889 nvgpu_log_info(g, "fifo_intr_en_1 = 0x80000000");
890 gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);
891
892 nvgpu_log_fn(g, "done");
893
894 return 0;
895}
896
897int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
898{
899 struct fifo_gk20a *f = &g->fifo;
900 unsigned int chid, i;
901 int err = 0;
902
903 nvgpu_log_fn(g, " ");
904
905 f->g = g;
906
907 err = nvgpu_mutex_init(&f->intr.isr.mutex);
908 if (err) {
909 nvgpu_err(g, "failed to init isr.mutex");
910 return err;
911 }
912
913 err = nvgpu_mutex_init(&f->engines_reset_mutex);
914 if (err) {
915 nvgpu_err(g, "failed to init engines_reset_mutex");
916 return err;
917 }
918
919 g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
920
921 f->num_channels = g->ops.fifo.get_num_fifos(g);
922 f->runlist_entry_size = g->ops.fifo.runlist_entry_size();
923 f->num_runlist_entries = fifo_eng_runlist_length_max_v();
924 f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
925 f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
926
927 f->userd_entry_size = 1 << ram_userd_base_shift_v();
928
929 f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
930 f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
931 f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
932 f->engine_info = nvgpu_kzalloc(g, f->max_engines *
933 sizeof(*f->engine_info));
934 f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
935
936 if (!(f->channel && f->tsg && f->pbdma_map && f->engine_info &&
937 f->active_engines_list)) {
938 err = -ENOMEM;
939 goto clean_up;
940 }
941 memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
942
943 /* pbdma map needs to be in place before calling engine info init */
944 for (i = 0; i < f->num_pbdma; ++i) {
945 f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
946 }
947
948 g->ops.fifo.init_engine_info(f);
949
950 err = init_runlist(g, f);
951 if (err) {
952 nvgpu_err(g, "failed to init runlist");
953 goto clean_up;
954 }
955
956 nvgpu_init_list_node(&f->free_chs);
957
958 err = nvgpu_mutex_init(&f->free_chs_mutex);
959 if (err) {
960 nvgpu_err(g, "failed to init free_chs_mutex");
961 goto clean_up;
962 }
963
964 for (chid = 0; chid < f->num_channels; chid++) {
965 gk20a_init_channel_support(g, chid);
966 gk20a_init_tsg_support(g, chid);
967 }
968
969 err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
970 if (err) {
971 nvgpu_err(g, "failed to init tsg_inuse_mutex");
972 goto clean_up;
973 }
974
975 f->remove_support = gk20a_remove_fifo_support;
976
977 f->deferred_reset_pending = false;
978
979 err = nvgpu_mutex_init(&f->deferred_reset_mutex);
980 if (err) {
981 nvgpu_err(g, "failed to init deferred_reset_mutex");
982 goto clean_up;
983 }
984
985 nvgpu_log_fn(g, "done");
986 return 0;
987
988clean_up:
989 nvgpu_err(g, "fail");
990
991 nvgpu_vfree(g, f->channel);
992 f->channel = NULL;
993 nvgpu_vfree(g, f->tsg);
994 f->tsg = NULL;
995 nvgpu_kfree(g, f->pbdma_map);
996 f->pbdma_map = NULL;
997 nvgpu_kfree(g, f->engine_info);
998 f->engine_info = NULL;
999 nvgpu_kfree(g, f->active_engines_list);
1000 f->active_engines_list = NULL;
1001
1002 return err;
1003}
1004
1005int gk20a_init_fifo_setup_sw(struct gk20a *g)
1006{
1007 struct fifo_gk20a *f = &g->fifo;
1008 unsigned int chid;
1009 u64 userd_base;
1010 int err = 0;
1011
1012 nvgpu_log_fn(g, " ");
1013
1014 if (f->sw_ready) {
1015 nvgpu_log_fn(g, "skip init");
1016 return 0;
1017 }
1018
1019 err = gk20a_init_fifo_setup_sw_common(g);
1020 if (err) {
1021 nvgpu_err(g, "fail: err: %d", err);
1022 return err;
1023 }
1024
1025 if (g->ops.mm.is_bar1_supported(g)) {
1026 err = nvgpu_dma_alloc_map_sys(g->mm.bar1.vm,
1027 f->userd_entry_size * f->num_channels,
1028 &f->userd);
1029 } else {
1030 err = nvgpu_dma_alloc_sys(g, f->userd_entry_size *
1031 f->num_channels, &f->userd);
1032 }
1033 if (err) {
1034 nvgpu_err(g, "userd memory allocation failed");
1035 goto clean_up;
1036 }
1037 nvgpu_log(g, gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
1038
1039 userd_base = nvgpu_mem_get_addr(g, &f->userd);
1040 for (chid = 0; chid < f->num_channels; chid++) {
1041 f->channel[chid].userd_iova = userd_base +
1042 chid * f->userd_entry_size;
1043 f->channel[chid].userd_gpu_va =
1044 f->userd.gpu_va + chid * f->userd_entry_size;
1045 }
1046
1047 err = nvgpu_channel_worker_init(g);
1048 if (err) {
1049 goto clean_up;
1050 }
1051
1052 f->sw_ready = true;
1053
1054 nvgpu_log_fn(g, "done");
1055 return 0;
1056
1057clean_up:
1058 nvgpu_log_fn(g, "fail");
1059 if (nvgpu_mem_is_valid(&f->userd)) {
1060 if (g->ops.mm.is_bar1_supported(g)) {
1061 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
1062 } else {
1063 nvgpu_dma_free(g, &f->userd);
1064 }
1065 }
1066
1067 return err;
1068}
1069
1070void gk20a_fifo_handle_runlist_event(struct gk20a *g)
1071{
1072 u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
1073
1074 nvgpu_log(g, gpu_dbg_intr, "runlist event %08x",
1075 runlist_event);
1076
1077 gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
1078}
1079
1080int gk20a_init_fifo_setup_hw(struct gk20a *g)
1081{
1082 struct fifo_gk20a *f = &g->fifo;
1083
1084 nvgpu_log_fn(g, " ");
1085
1086 /* test write, read through bar1 @ userd region before
1087 * turning on the snooping */
1088 {
1089 struct fifo_gk20a *f = &g->fifo;
1090 u32 v, v1 = 0x33, v2 = 0x55;
1091
1092 u32 bar1_vaddr = f->userd.gpu_va;
1093 volatile u32 *cpu_vaddr = f->userd.cpu_va;
1094
1095 nvgpu_log_info(g, "test bar1 @ vaddr 0x%x",
1096 bar1_vaddr);
1097
1098 v = gk20a_bar1_readl(g, bar1_vaddr);
1099
1100 *cpu_vaddr = v1;
1101 nvgpu_mb();
1102
1103 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
1104 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
1105 GPU read 0x%x", *cpu_vaddr, gk20a_bar1_readl(g, bar1_vaddr));
1106 return -EINVAL;
1107 }
1108
1109 gk20a_bar1_writel(g, bar1_vaddr, v2);
1110
1111 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
1112 nvgpu_err(g, "bar1 broken @ gk20a: GPU wrote 0x%x, \
1113 CPU read 0x%x", gk20a_bar1_readl(g, bar1_vaddr), *cpu_vaddr);
1114 return -EINVAL;
1115 }
1116
1117 /* is it visible to the cpu? */
1118 if (*cpu_vaddr != v2) {
1119 nvgpu_err(g,
1120 "cpu didn't see bar1 write @ %p!",
1121 cpu_vaddr);
1122 }
1123
1124 /* put it back */
1125 gk20a_bar1_writel(g, bar1_vaddr, v);
1126 }
1127
1128 /*XXX all manner of flushes and caching worries, etc */
1129
1130 /* set the base for the userd region now */
1131 gk20a_writel(g, fifo_bar1_base_r(),
1132 fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
1133 fifo_bar1_base_valid_true_f());
1134
1135 nvgpu_log_fn(g, "done");
1136
1137 return 0;
1138}
1139
1140int gk20a_init_fifo_support(struct gk20a *g)
1141{
1142 u32 err;
1143
1144 err = g->ops.fifo.setup_sw(g);
1145 if (err) {
1146 return err;
1147 }
1148
1149 if (g->ops.fifo.init_fifo_setup_hw) {
1150 err = g->ops.fifo.init_fifo_setup_hw(g);
1151 }
1152 if (err) {
1153 return err;
1154 }
1155
1156 return err;
1157}
1158
1159/* return with a reference to the channel, caller must put it back */
1160struct channel_gk20a *
1161gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
1162{
1163 struct fifo_gk20a *f = &g->fifo;
1164 unsigned int ci;
1165 if (unlikely(!f->channel)) {
1166 return NULL;
1167 }
1168 for (ci = 0; ci < f->num_channels; ci++) {
1169 struct channel_gk20a *ch;
1170 u64 ch_inst_ptr;
1171
1172 ch = gk20a_channel_from_id(g, ci);
1173 /* only alive channels are searched */
1174 if (!ch) {
1175 continue;
1176 }
1177
1178 ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
1179 if (inst_ptr == ch_inst_ptr) {
1180 return ch;
1181 }
1182
1183 gk20a_channel_put(ch);
1184 }
1185 return NULL;
1186}
1187
1188/* fault info/descriptions.
1189 * tbd: move to setup
1190 * */
1191static const char * const gk20a_fault_type_descs[] = {
1192 "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
1193 "pde size",
1194 "pte",
1195 "va limit viol",
1196 "unbound inst",
1197 "priv viol",
1198 "ro viol",
1199 "wo viol",
1200 "pitch mask",
1201 "work creation",
1202 "bad aperture",
1203 "compression failure",
1204 "bad kind",
1205 "region viol",
1206 "dual ptes",
1207 "poisoned",
1208};
1209/* engine descriptions */
1210static const char * const engine_subid_descs[] = {
1211 "gpc",
1212 "hub",
1213};
1214
1215static const char * const gk20a_hub_client_descs[] = {
1216 "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
1217 "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
1218 "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
1219 "scc nb", "sec", "ssync", "gr copy", "xv", "mmu nb",
1220 "msenc", "d falcon", "sked", "a falcon", "n/a",
1221};
1222
1223static const char * const gk20a_gpc_client_descs[] = {
1224 "l1 0", "t1 0", "pe 0",
1225 "l1 1", "t1 1", "pe 1",
1226 "l1 2", "t1 2", "pe 2",
1227 "l1 3", "t1 3", "pe 3",
1228 "rast", "gcc", "gpccs",
1229 "prop 0", "prop 1", "prop 2", "prop 3",
1230 "l1 4", "t1 4", "pe 4",
1231 "l1 5", "t1 5", "pe 5",
1232 "l1 6", "t1 6", "pe 6",
1233 "l1 7", "t1 7", "pe 7",
1234};
1235
1236static const char * const does_not_exist[] = {
1237 "does not exist"
1238};
1239
1240/* fill in mmu fault desc */
1241void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault)
1242{
1243 if (mmfault->fault_type >= ARRAY_SIZE(gk20a_fault_type_descs)) {
1244 WARN_ON(mmfault->fault_type >=
1245 ARRAY_SIZE(gk20a_fault_type_descs));
1246 } else {
1247 mmfault->fault_type_desc =
1248 gk20a_fault_type_descs[mmfault->fault_type];
1249 }
1250}
1251
1252/* fill in mmu fault client description */
1253void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault)
1254{
1255 if (mmfault->client_id >= ARRAY_SIZE(gk20a_hub_client_descs)) {
1256 WARN_ON(mmfault->client_id >=
1257 ARRAY_SIZE(gk20a_hub_client_descs));
1258 } else {
1259 mmfault->client_id_desc =
1260 gk20a_hub_client_descs[mmfault->client_id];
1261 }
1262}
1263
1264/* fill in mmu fault gpc description */
1265void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault)
1266{
1267 if (mmfault->client_id >= ARRAY_SIZE(gk20a_gpc_client_descs)) {
1268 WARN_ON(mmfault->client_id >=
1269 ARRAY_SIZE(gk20a_gpc_client_descs));
1270 } else {
1271 mmfault->client_id_desc =
1272 gk20a_gpc_client_descs[mmfault->client_id];
1273 }
1274}
1275
1276static void get_exception_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1277 struct mmu_fault_info *mmfault)
1278{
1279 g->ops.fifo.get_mmu_fault_info(g, mmu_fault_id, mmfault);
1280
1281 /* parse info */
1282 mmfault->fault_type_desc = does_not_exist[0];
1283 if (g->ops.fifo.get_mmu_fault_desc) {
1284 g->ops.fifo.get_mmu_fault_desc(mmfault);
1285 }
1286
1287 if (mmfault->client_type >= ARRAY_SIZE(engine_subid_descs)) {
1288 WARN_ON(mmfault->client_type >= ARRAY_SIZE(engine_subid_descs));
1289 mmfault->client_type_desc = does_not_exist[0];
1290 } else {
1291 mmfault->client_type_desc =
1292 engine_subid_descs[mmfault->client_type];
1293 }
1294
1295 mmfault->client_id_desc = does_not_exist[0];
1296 if ((mmfault->client_type ==
1297 fifo_intr_mmu_fault_info_engine_subid_hub_v())
1298 && g->ops.fifo.get_mmu_fault_client_desc) {
1299 g->ops.fifo.get_mmu_fault_client_desc(mmfault);
1300 } else if ((mmfault->client_type ==
1301 fifo_intr_mmu_fault_info_engine_subid_gpc_v())
1302 && g->ops.fifo.get_mmu_fault_gpc_desc) {
1303 g->ops.fifo.get_mmu_fault_gpc_desc(mmfault);
1304 }
1305}
1306
1307/* reads info from hardware and fills in mmu fault info record */
1308void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1309 struct mmu_fault_info *mmfault)
1310{
1311 u32 fault_info;
1312 u32 addr_lo, addr_hi;
1313
1314 nvgpu_log_fn(g, "mmu_fault_id %d", mmu_fault_id);
1315
1316 memset(mmfault, 0, sizeof(*mmfault));
1317
1318 fault_info = gk20a_readl(g,
1319 fifo_intr_mmu_fault_info_r(mmu_fault_id));
1320 mmfault->fault_type =
1321 fifo_intr_mmu_fault_info_type_v(fault_info);
1322 mmfault->access_type =
1323 fifo_intr_mmu_fault_info_write_v(fault_info);
1324 mmfault->client_type =
1325 fifo_intr_mmu_fault_info_engine_subid_v(fault_info);
1326 mmfault->client_id =
1327 fifo_intr_mmu_fault_info_client_v(fault_info);
1328
1329 addr_lo = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(mmu_fault_id));
1330 addr_hi = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(mmu_fault_id));
1331 mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo);
1332 /* note:ignoring aperture on gk20a... */
1333 mmfault->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
1334 gk20a_readl(g, fifo_intr_mmu_fault_inst_r(mmu_fault_id)));
1335 /* note: inst_ptr is a 40b phys addr. */
1336 mmfault->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
1337}
1338
1339void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1340{
1341 u32 engine_enum = ENGINE_INVAL_GK20A;
1342 struct fifo_engine_info_gk20a *engine_info;
1343
1344 nvgpu_log_fn(g, " ");
1345
1346 if (!g) {
1347 return;
1348 }
1349
1350 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1351
1352 if (engine_info) {
1353 engine_enum = engine_info->engine_enum;
1354 }
1355
1356 if (engine_enum == ENGINE_INVAL_GK20A) {
1357 nvgpu_err(g, "unsupported engine_id %d", engine_id);
1358 }
1359
1360 if (engine_enum == ENGINE_GR_GK20A) {
1361 if (g->support_pmu) {
1362 if (nvgpu_pg_elpg_disable(g) != 0 ) {
1363 nvgpu_err(g, "failed to set disable elpg");
1364 }
1365 }
1366
1367#ifdef CONFIG_GK20A_CTXSW_TRACE
1368 /*
1369 * Resetting engine will alter read/write index. Need to flush
1370 * circular buffer before re-enabling FECS.
1371 */
1372 if (g->ops.fecs_trace.reset)
1373 g->ops.fecs_trace.reset(g);
1374#endif
1375 if (!nvgpu_platform_is_simulation(g)) {
1376 /*HALT_PIPELINE method, halt GR engine*/
1377 if (gr_gk20a_halt_pipe(g)) {
1378 nvgpu_err(g, "failed to HALT gr pipe");
1379 }
1380 /*
1381 * resetting engine using mc_enable_r() is not
1382 * enough, we do full init sequence
1383 */
1384 nvgpu_log(g, gpu_dbg_info, "resetting gr engine");
1385 gk20a_gr_reset(g);
1386 } else {
1387 nvgpu_log(g, gpu_dbg_info,
1388 "HALT gr pipe not supported and "
1389 "gr cannot be reset without halting gr pipe");
1390 }
1391 if (g->support_pmu) {
1392 if (nvgpu_pg_elpg_enable(g) != 0 ) {
1393 nvgpu_err(g, "failed to set enable elpg");
1394 }
1395 }
1396 }
1397 if ((engine_enum == ENGINE_GRCE_GK20A) ||
1398 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
1399 g->ops.mc.reset(g, engine_info->reset_mask);
1400 }
1401}
1402
1403static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
1404{
1405 u32 intr;
1406
1407 intr = gk20a_readl(g, fifo_intr_chsw_error_r());
1408 nvgpu_err(g, "chsw: %08x", intr);
1409 gk20a_fecs_dump_falcon_stats(g);
1410 gk20a_gpccs_dump_falcon_stats(g);
1411 gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
1412}
1413
1414static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
1415{
1416 u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1417 nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id);
1418}
1419
1420bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid)
1421{
1422 return (engine_subid == fifo_intr_mmu_fault_info_engine_subid_gpc_v());
1423}
1424
1425bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
1426 u32 engine_subid, bool fake_fault)
1427{
1428 u32 engine_enum = ENGINE_INVAL_GK20A;
1429 struct fifo_engine_info_gk20a *engine_info;
1430
1431 if (!g) {
1432 return false;
1433 }
1434
1435 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1436
1437 if (engine_info) {
1438 engine_enum = engine_info->engine_enum;
1439 }
1440
1441 if (engine_enum == ENGINE_INVAL_GK20A) {
1442 return false;
1443 }
1444
1445 /* channel recovery is only deferred if an sm debugger
1446 is attached and has MMU debug mode is enabled */
1447 if (!g->ops.gr.sm_debugger_attached(g) ||
1448 !g->ops.fb.is_debug_mode_enabled(g)) {
1449 return false;
1450 }
1451
1452 /* if this fault is fake (due to RC recovery), don't defer recovery */
1453 if (fake_fault) {
1454 return false;
1455 }
1456
1457 if (engine_enum != ENGINE_GR_GK20A) {
1458 return false;
1459 }
1460
1461 return g->ops.fifo.is_fault_engine_subid_gpc(g, engine_subid);
1462}
1463
1464/* caller must hold a channel reference */
1465static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
1466 struct channel_gk20a *refch)
1467{
1468 bool verbose = false;
1469 if (!refch) {
1470 return verbose;
1471 }
1472
1473 if (nvgpu_is_error_notifier_set(refch,
1474 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
1475 verbose = refch->timeout_debug_dump;
1476 }
1477
1478 return verbose;
1479}
1480
1481/* caller must hold a channel reference */
1482static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1483 struct channel_gk20a *refch)
1484{
1485 if (refch) {
1486 /* mark channel as faulted */
1487 gk20a_channel_set_timedout(refch);
1488
1489 /* unblock pending waits */
1490 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1491 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
1492 }
1493}
1494
1495/* caller must hold a channel reference */
1496bool gk20a_fifo_error_ch(struct gk20a *g,
1497 struct channel_gk20a *refch)
1498{
1499 bool verbose;
1500
1501 verbose = gk20a_fifo_ch_timeout_debug_dump_state(g, refch);
1502 gk20a_fifo_set_has_timedout_and_wake_up_wqs(g, refch);
1503
1504 return verbose;
1505}
1506
1507bool gk20a_fifo_error_tsg(struct gk20a *g,
1508 struct tsg_gk20a *tsg)
1509{
1510 struct channel_gk20a *ch = NULL;
1511 bool verbose = false;
1512
1513 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1514 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1515 if (gk20a_channel_get(ch)) {
1516 if (gk20a_fifo_error_ch(g, ch)) {
1517 verbose = true;
1518 }
1519 gk20a_channel_put(ch);
1520 }
1521 }
1522 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1523
1524 return verbose;
1525
1526}
1527/* caller must hold a channel reference */
1528void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
1529 struct channel_gk20a *refch)
1530{
1531 nvgpu_err(g,
1532 "channel %d generated a mmu fault", refch->chid);
1533 g->ops.fifo.set_error_notifier(refch,
1534 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
1535}
1536
1537void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
1538 struct tsg_gk20a *tsg)
1539{
1540 struct channel_gk20a *ch = NULL;
1541
1542 nvgpu_err(g,
1543 "TSG %d generated a mmu fault", tsg->tsgid);
1544
1545 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1546 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1547 if (gk20a_channel_get(ch)) {
1548 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1549 gk20a_channel_put(ch);
1550 }
1551 }
1552 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1553
1554}
1555
1556void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)
1557{
1558 struct channel_gk20a *ch = NULL;
1559
1560 nvgpu_log_fn(g, " ");
1561
1562 g->ops.fifo.disable_tsg(tsg);
1563
1564 if (preempt) {
1565 g->ops.fifo.preempt_tsg(g, tsg);
1566 }
1567
1568 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1569 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1570 if (gk20a_channel_get(ch)) {
1571 gk20a_channel_set_timedout(ch);
1572 if (ch->g->ops.fifo.ch_abort_clean_up) {
1573 ch->g->ops.fifo.ch_abort_clean_up(ch);
1574 }
1575 gk20a_channel_put(ch);
1576 }
1577 }
1578 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1579}
1580
1581int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
1582{
1583 unsigned long engine_id, engines = 0U;
1584 struct tsg_gk20a *tsg;
1585 bool deferred_reset_pending;
1586 struct fifo_gk20a *f = &g->fifo;
1587
1588 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1589
1590 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1591 deferred_reset_pending = g->fifo.deferred_reset_pending;
1592 nvgpu_mutex_release(&f->deferred_reset_mutex);
1593
1594 if (!deferred_reset_pending) {
1595 nvgpu_mutex_release(&g->dbg_sessions_lock);
1596 return 0;
1597 }
1598
1599 gr_gk20a_disable_ctxsw(g);
1600
1601 tsg = tsg_gk20a_from_ch(ch);
1602 if (tsg != NULL) {
1603 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
1604 } else {
1605 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
1606 engines = g->fifo.deferred_fault_engines;
1607 }
1608
1609 if (engines == 0U) {
1610 goto clean_up;
1611 }
1612
1613 /*
1614 * If deferred reset is set for an engine, and channel is running
1615 * on that engine, reset it
1616 */
1617 for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32) {
1618 if (BIT(engine_id) & engines) {
1619 gk20a_fifo_reset_engine(g, engine_id);
1620 }
1621 }
1622
1623 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1624 g->fifo.deferred_fault_engines = 0;
1625 g->fifo.deferred_reset_pending = false;
1626 nvgpu_mutex_release(&f->deferred_reset_mutex);
1627
1628clean_up:
1629 gr_gk20a_enable_ctxsw(g);
1630 nvgpu_mutex_release(&g->dbg_sessions_lock);
1631
1632 return 0;
1633}
1634
1635static bool gk20a_fifo_handle_mmu_fault_locked(
1636 struct gk20a *g,
1637 u32 mmu_fault_engines, /* queried from HW if 0 */
1638 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1639 bool id_is_tsg)
1640{
1641 bool fake_fault;
1642 unsigned long fault_id;
1643 unsigned long engine_mmu_fault_id;
1644 bool verbose = true;
1645 u32 grfifo_ctl;
1646
1647 bool deferred_reset_pending = false;
1648 struct fifo_gk20a *f = &g->fifo;
1649
1650 nvgpu_log_fn(g, " ");
1651
1652 /* Disable power management */
1653 if (g->support_pmu) {
1654 if (nvgpu_cg_pg_disable(g) != 0) {
1655 nvgpu_warn(g, "fail to disable power mgmt");
1656 }
1657 }
1658
1659 /* Disable fifo access */
1660 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
1661 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
1662 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
1663
1664 gk20a_writel(g, gr_gpfifo_ctl_r(),
1665 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
1666 gr_gpfifo_ctl_semaphore_access_f(0));
1667
1668 if (mmu_fault_engines) {
1669 fault_id = mmu_fault_engines;
1670 fake_fault = true;
1671 } else {
1672 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1673 fake_fault = false;
1674 gk20a_debug_dump(g);
1675 }
1676
1677 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1678 g->fifo.deferred_reset_pending = false;
1679 nvgpu_mutex_release(&f->deferred_reset_mutex);
1680
1681 /* go through all faulted engines */
1682 for_each_set_bit(engine_mmu_fault_id, &fault_id, 32) {
1683 /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
1684 * engines. Convert engine_mmu_id to engine_id */
1685 u32 engine_id = gk20a_mmu_id_to_engine_id(g,
1686 engine_mmu_fault_id);
1687 struct mmu_fault_info mmfault_info;
1688 struct channel_gk20a *ch = NULL;
1689 struct tsg_gk20a *tsg = NULL;
1690 struct channel_gk20a *refch = NULL;
1691 /* read and parse engine status */
1692 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1693 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1694 bool ctxsw = (ctx_status ==
1695 fifo_engine_status_ctx_status_ctxsw_switch_v()
1696 || ctx_status ==
1697 fifo_engine_status_ctx_status_ctxsw_save_v()
1698 || ctx_status ==
1699 fifo_engine_status_ctx_status_ctxsw_load_v());
1700
1701 get_exception_mmu_fault_info(g, engine_mmu_fault_id,
1702 &mmfault_info);
1703 trace_gk20a_mmu_fault(mmfault_info.fault_addr,
1704 mmfault_info.fault_type,
1705 mmfault_info.access_type,
1706 mmfault_info.inst_ptr,
1707 engine_id,
1708 mmfault_info.client_type_desc,
1709 mmfault_info.client_id_desc,
1710 mmfault_info.fault_type_desc);
1711 nvgpu_err(g, "%s mmu fault on engine %d, "
1712 "engine subid %d (%s), client %d (%s), "
1713 "addr 0x%llx, type %d (%s), access_type 0x%08x,"
1714 "inst_ptr 0x%llx",
1715 fake_fault ? "fake" : "",
1716 engine_id,
1717 mmfault_info.client_type,
1718 mmfault_info.client_type_desc,
1719 mmfault_info.client_id, mmfault_info.client_id_desc,
1720 mmfault_info.fault_addr,
1721 mmfault_info.fault_type,
1722 mmfault_info.fault_type_desc,
1723 mmfault_info.access_type, mmfault_info.inst_ptr);
1724
1725 if (ctxsw) {
1726 gk20a_fecs_dump_falcon_stats(g);
1727 gk20a_gpccs_dump_falcon_stats(g);
1728 nvgpu_err(g, "gr_status_r : 0x%x",
1729 gk20a_readl(g, gr_status_r()));
1730 }
1731
1732 /* get the channel/TSG */
1733 if (fake_fault) {
1734 /* use next_id if context load is failing */
1735 u32 id, type;
1736
1737 if (hw_id == ~(u32)0) {
1738 id = (ctx_status ==
1739 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1740 fifo_engine_status_next_id_v(status) :
1741 fifo_engine_status_id_v(status);
1742 type = (ctx_status ==
1743 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1744 fifo_engine_status_next_id_type_v(status) :
1745 fifo_engine_status_id_type_v(status);
1746 } else {
1747 id = hw_id;
1748 type = id_is_tsg ?
1749 fifo_engine_status_id_type_tsgid_v() :
1750 fifo_engine_status_id_type_chid_v();
1751 }
1752
1753 if (type == fifo_engine_status_id_type_tsgid_v()) {
1754 tsg = &g->fifo.tsg[id];
1755 } else if (type == fifo_engine_status_id_type_chid_v()) {
1756 ch = &g->fifo.channel[id];
1757 refch = gk20a_channel_get(ch);
1758 if (refch != NULL) {
1759 tsg = tsg_gk20a_from_ch(refch);
1760 }
1761 }
1762 } else {
1763 /* read channel based on instruction pointer */
1764 ch = gk20a_refch_from_inst_ptr(g,
1765 mmfault_info.inst_ptr);
1766 refch = ch;
1767 if (refch != NULL) {
1768 tsg = tsg_gk20a_from_ch(refch);
1769 }
1770 }
1771
1772 /* check if engine reset should be deferred */
1773 if (engine_id != FIFO_INVAL_ENGINE_ID) {
1774 bool defer = gk20a_fifo_should_defer_engine_reset(g,
1775 engine_id, mmfault_info.client_type,
1776 fake_fault);
1777 if ((ch || tsg) && defer) {
1778 g->fifo.deferred_fault_engines |= BIT(engine_id);
1779
1780 /* handled during channel free */
1781 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1782 g->fifo.deferred_reset_pending = true;
1783 nvgpu_mutex_release(&f->deferred_reset_mutex);
1784
1785 deferred_reset_pending = true;
1786
1787 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
1788 "sm debugger attached,"
1789 " deferring channel recovery to channel free");
1790 } else {
1791 gk20a_fifo_reset_engine(g, engine_id);
1792 }
1793 }
1794
1795#ifdef CONFIG_GK20A_CTXSW_TRACE
1796 if (tsg) {
1797 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1798 }
1799#endif
1800 /*
1801 * Disable the channel/TSG from hw and increment syncpoints.
1802 */
1803 if (tsg) {
1804 if (deferred_reset_pending) {
1805 gk20a_disable_tsg(tsg);
1806 } else {
1807 if (!fake_fault) {
1808 gk20a_fifo_set_ctx_mmu_error_tsg(g,
1809 tsg);
1810 }
1811 verbose = gk20a_fifo_error_tsg(g, tsg);
1812 gk20a_fifo_abort_tsg(g, tsg, false);
1813 }
1814
1815 /* put back the ref taken early above */
1816 if (refch) {
1817 gk20a_channel_put(ch);
1818 }
1819 } else if (refch != NULL) {
1820 nvgpu_err(g, "mmu error in unbound channel %d",
1821 ch->chid);
1822 gk20a_channel_put(ch);
1823 } else if (mmfault_info.inst_ptr ==
1824 nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
1825 nvgpu_err(g, "mmu fault from bar1");
1826 } else if (mmfault_info.inst_ptr ==
1827 nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
1828 nvgpu_err(g, "mmu fault from pmu");
1829 } else {
1830 nvgpu_err(g, "couldn't locate channel for mmu fault");
1831 }
1832 }
1833
1834 /* clear interrupt */
1835 gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
1836
1837 /* resume scheduler */
1838 gk20a_writel(g, fifo_error_sched_disable_r(),
1839 gk20a_readl(g, fifo_error_sched_disable_r()));
1840
1841 /* Re-enable fifo access */
1842 gk20a_writel(g, gr_gpfifo_ctl_r(),
1843 gr_gpfifo_ctl_access_enabled_f() |
1844 gr_gpfifo_ctl_semaphore_access_enabled_f());
1845
1846 /* It is safe to enable ELPG again. */
1847 if (g->support_pmu) {
1848 if (nvgpu_cg_pg_enable(g) != 0) {
1849 nvgpu_warn(g, "fail to enable power mgmt");
1850 }
1851 }
1852
1853 return verbose;
1854}
1855
1856static bool gk20a_fifo_handle_mmu_fault(
1857 struct gk20a *g,
1858 u32 mmu_fault_engines, /* queried from HW if 0 */
1859 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1860 bool id_is_tsg)
1861{
1862 u32 rlid;
1863 bool verbose;
1864
1865 nvgpu_log_fn(g, " ");
1866
1867 nvgpu_log_info(g, "acquire engines_reset_mutex");
1868 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1869
1870 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
1871 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1872 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
1873 }
1874
1875 verbose = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
1876 hw_id, id_is_tsg);
1877
1878 nvgpu_log_info(g, "release runlist_lock for all runlists");
1879 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1880 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
1881 }
1882
1883 nvgpu_log_info(g, "release engines_reset_mutex");
1884 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
1885
1886 return verbose;
1887}
1888
1889static void gk20a_fifo_get_faulty_id_type(struct gk20a *g, int engine_id,
1890 u32 *id, u32 *type)
1891{
1892 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1893 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1894
1895 /* use next_id if context load is failing */
1896 *id = (ctx_status ==
1897 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1898 fifo_engine_status_next_id_v(status) :
1899 fifo_engine_status_id_v(status);
1900
1901 *type = (ctx_status ==
1902 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1903 fifo_engine_status_next_id_type_v(status) :
1904 fifo_engine_status_id_type_v(status);
1905}
1906
1907static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1908{
1909 unsigned int i;
1910 u32 engines = 0;
1911
1912 for (i = 0; i < g->fifo.num_engines; i++) {
1913 u32 active_engine_id = g->fifo.active_engines_list[i];
1914 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
1915 u32 ctx_status =
1916 fifo_engine_status_ctx_status_v(status);
1917 u32 ctx_id = (ctx_status ==
1918 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1919 fifo_engine_status_next_id_v(status) :
1920 fifo_engine_status_id_v(status);
1921 u32 type = (ctx_status ==
1922 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1923 fifo_engine_status_next_id_type_v(status) :
1924 fifo_engine_status_id_type_v(status);
1925 bool busy = fifo_engine_status_engine_v(status) ==
1926 fifo_engine_status_engine_busy_v();
1927 if (busy && ctx_id == id) {
1928 if ((is_tsg && type ==
1929 fifo_engine_status_id_type_tsgid_v()) ||
1930 (!is_tsg && type ==
1931 fifo_engine_status_id_type_chid_v())) {
1932 engines |= BIT(active_engine_id);
1933 }
1934 }
1935 }
1936
1937 return engines;
1938}
1939
1940void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
1941 bool verbose, u32 rc_type)
1942{
1943 u32 engines;
1944
1945 /* stop context switching to prevent engine assignments from
1946 changing until channel is recovered */
1947 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1948 gr_gk20a_disable_ctxsw(g);
1949
1950 engines = gk20a_fifo_engines_on_id(g, ch->chid, false);
1951
1952 if (engines) {
1953 gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
1954 rc_type);
1955 } else {
1956 gk20a_channel_abort(ch, false);
1957
1958 if (gk20a_fifo_error_ch(g, ch)) {
1959 gk20a_debug_dump(g);
1960 }
1961 }
1962
1963 gr_gk20a_enable_ctxsw(g);
1964 nvgpu_mutex_release(&g->dbg_sessions_lock);
1965}
1966
1967void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1968 bool verbose, u32 rc_type)
1969{
1970 u32 engines = 0U;
1971 int err;
1972
1973 /* stop context switching to prevent engine assignments from
1974 changing until TSG is recovered */
1975 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1976
1977 /* disable tsg so that it does not get scheduled again */
1978 g->ops.fifo.disable_tsg(tsg);
1979
1980 /*
1981 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
1982 * fifo_engine_status register. Also while the engine is held in reset
1983 * h/w passes busy/idle straight through. fifo_engine_status registers
1984 * are correct in that there is no context switch outstanding
1985 * as the CTXSW is aborted when reset is asserted.
1986 */
1987 nvgpu_log_info(g, "acquire engines_reset_mutex");
1988 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1989
1990 /*
1991 * stop context switching to prevent engine assignments from
1992 * changing until engine status is checked to make sure tsg
1993 * being recovered is not loaded on the engines
1994 */
1995 err = gr_gk20a_disable_ctxsw(g);
1996
1997 if (err != 0) {
1998 /* if failed to disable ctxsw, just abort tsg */
1999 nvgpu_err(g, "failed to disable ctxsw");
2000 } else {
2001 /* recover engines if tsg is loaded on the engines */
2002 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
2003
2004 /*
2005 * it is ok to enable ctxsw before tsg is recovered. If engines
2006 * is 0, no engine recovery is needed and if it is non zero,
2007 * gk20a_fifo_recover will call get_engines_mask_on_id again.
2008 * By that time if tsg is not on the engine, engine need not
2009 * be reset.
2010 */
2011 err = gr_gk20a_enable_ctxsw(g);
2012 if (err != 0) {
2013 nvgpu_err(g, "failed to enable ctxsw");
2014 }
2015 }
2016
2017 nvgpu_log_info(g, "release engines_reset_mutex");
2018 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2019
2020 if (engines) {
2021 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
2022 rc_type);
2023 } else {
2024 if (gk20a_fifo_error_tsg(g, tsg) && verbose) {
2025 gk20a_debug_dump(g);
2026 }
2027
2028 gk20a_fifo_abort_tsg(g, tsg, false);
2029 }
2030
2031 nvgpu_mutex_release(&g->dbg_sessions_lock);
2032}
2033
2034void gk20a_fifo_teardown_mask_intr(struct gk20a *g)
2035{
2036 u32 val;
2037
2038 val = gk20a_readl(g, fifo_intr_en_0_r());
2039 val &= ~(fifo_intr_en_0_sched_error_m() |
2040 fifo_intr_en_0_mmu_fault_m());
2041 gk20a_writel(g, fifo_intr_en_0_r(), val);
2042 gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f());
2043}
2044
2045void gk20a_fifo_teardown_unmask_intr(struct gk20a *g)
2046{
2047 u32 val;
2048
2049 val = gk20a_readl(g, fifo_intr_en_0_r());
2050 val |= fifo_intr_en_0_mmu_fault_f(1) | fifo_intr_en_0_sched_error_f(1);
2051 gk20a_writel(g, fifo_intr_en_0_r(), val);
2052
2053}
2054
2055void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
2056 u32 hw_id, unsigned int id_type, unsigned int rc_type,
2057 struct mmu_fault_info *mmfault)
2058{
2059 unsigned long engine_id, i;
2060 unsigned long _engine_ids = __engine_ids;
2061 unsigned long engine_ids = 0;
2062 u32 mmu_fault_engines = 0;
2063 u32 ref_type;
2064 u32 ref_id;
2065 u32 ref_id_is_tsg = false;
2066 bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false;
2067 bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false;
2068 u32 rlid;
2069
2070 nvgpu_log_info(g, "acquire engines_reset_mutex");
2071 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
2072
2073 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
2074 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2075 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
2076 }
2077
2078 if (id_is_known) {
2079 engine_ids = gk20a_fifo_engines_on_id(g, hw_id, id_is_tsg);
2080 ref_id = hw_id;
2081 ref_type = id_is_tsg ?
2082 fifo_engine_status_id_type_tsgid_v() :
2083 fifo_engine_status_id_type_chid_v();
2084 ref_id_is_tsg = id_is_tsg;
2085 /* atleast one engine will get passed during sched err*/
2086 engine_ids |= __engine_ids;
2087 for_each_set_bit(engine_id, &engine_ids, 32) {
2088 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id);
2089
2090 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2091 mmu_fault_engines |= BIT(mmu_id);
2092 }
2093 }
2094 } else {
2095 /* store faulted engines in advance */
2096 for_each_set_bit(engine_id, &_engine_ids, 32) {
2097 gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id,
2098 &ref_type);
2099 if (ref_type == fifo_engine_status_id_type_tsgid_v()) {
2100 ref_id_is_tsg = true;
2101 } else {
2102 ref_id_is_tsg = false;
2103 }
2104 /* Reset *all* engines that use the
2105 * same channel as faulty engine */
2106 for (i = 0; i < g->fifo.num_engines; i++) {
2107 u32 active_engine_id = g->fifo.active_engines_list[i];
2108 u32 type;
2109 u32 id;
2110
2111 gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type);
2112 if (ref_type == type && ref_id == id) {
2113 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, active_engine_id);
2114
2115 engine_ids |= BIT(active_engine_id);
2116 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2117 mmu_fault_engines |= BIT(mmu_id);
2118 }
2119 }
2120 }
2121 }
2122 }
2123
2124 if (mmu_fault_engines) {
2125 g->ops.fifo.teardown_mask_intr(g);
2126 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
2127 gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id,
2128 ref_id_is_tsg);
2129
2130 g->ops.fifo.teardown_unmask_intr(g);
2131 }
2132
2133 nvgpu_log_info(g, "release runlist_lock for all runlists");
2134 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2135 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
2136 }
2137
2138 nvgpu_log_info(g, "release engines_reset_mutex");
2139 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2140}
2141
2142void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
2143 u32 hw_id, bool id_is_tsg,
2144 bool id_is_known, bool verbose, int rc_type)
2145{
2146 unsigned int id_type;
2147
2148 if (verbose) {
2149 gk20a_debug_dump(g);
2150 }
2151
2152 if (g->ops.ltc.flush) {
2153 g->ops.ltc.flush(g);
2154 }
2155
2156 if (id_is_known) {
2157 id_type = id_is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
2158 } else {
2159 id_type = ID_TYPE_UNKNOWN;
2160 }
2161
2162 g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type,
2163 rc_type, NULL);
2164}
2165
2166/* force reset channel and tsg */
2167int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
2168 u32 err_code, bool verbose)
2169{
2170 struct channel_gk20a *ch_tsg = NULL;
2171 struct gk20a *g = ch->g;
2172
2173 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2174
2175 if (tsg != NULL) {
2176 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2177
2178 nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
2179 channel_gk20a, ch_entry) {
2180 if (gk20a_channel_get(ch_tsg)) {
2181 g->ops.fifo.set_error_notifier(ch_tsg,
2182 err_code);
2183 gk20a_channel_put(ch_tsg);
2184 }
2185 }
2186
2187 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2188 gk20a_fifo_recover_tsg(g, tsg, verbose,
2189 RC_TYPE_FORCE_RESET);
2190 } else {
2191 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2192 }
2193
2194 return 0;
2195}
2196
2197int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
2198{
2199 struct gk20a *g = ch->g;
2200
2201 if (gk20a_fifo_channel_status_is_next(g, ch->chid)) {
2202 nvgpu_log_info(g, "Channel %d to be removed from TSG %d has NEXT set!",
2203 ch->chid, ch->tsgid);
2204 return -EAGAIN;
2205 }
2206
2207 if (g->ops.fifo.tsg_verify_status_ctx_reload) {
2208 g->ops.fifo.tsg_verify_status_ctx_reload(ch);
2209 }
2210
2211 if (g->ops.fifo.tsg_verify_status_faulted) {
2212 g->ops.fifo.tsg_verify_status_faulted(ch);
2213 }
2214
2215 return 0;
2216}
2217
2218static bool gk20a_fifo_tsg_is_multi_channel(struct tsg_gk20a *tsg)
2219{
2220 bool ret = false;
2221
2222 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2223 if (nvgpu_list_first_entry(&tsg->ch_list, channel_gk20a,
2224 ch_entry) !=
2225 nvgpu_list_last_entry(&tsg->ch_list, channel_gk20a,
2226 ch_entry)) {
2227 ret = true;
2228 }
2229 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2230
2231 return ret;
2232}
2233
2234int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
2235{
2236 struct gk20a *g = ch->g;
2237 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2238 int err;
2239 bool tsg_timedout = false;
2240
2241 if (tsg == NULL) {
2242 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2243 return 0;
2244 }
2245
2246 /* If one channel in TSG times out, we disable all channels */
2247 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2248 tsg_timedout = gk20a_channel_check_timedout(ch);
2249 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2250
2251 /* Disable TSG and examine status before unbinding channel */
2252 g->ops.fifo.disable_tsg(tsg);
2253
2254 err = g->ops.fifo.preempt_tsg(g, tsg);
2255 if (err != 0) {
2256 goto fail_enable_tsg;
2257 }
2258
2259 /*
2260 * State validation is only necessary if there are multiple channels in
2261 * the TSG.
2262 */
2263 if (gk20a_fifo_tsg_is_multi_channel(tsg) &&
2264 g->ops.fifo.tsg_verify_channel_status && !tsg_timedout) {
2265 err = g->ops.fifo.tsg_verify_channel_status(ch);
2266 if (err) {
2267 goto fail_enable_tsg;
2268 }
2269 }
2270
2271 /* Channel should be seen as TSG channel while updating runlist */
2272 err = channel_gk20a_update_runlist(ch, false);
2273 if (err) {
2274 goto fail_enable_tsg;
2275 }
2276
2277 while (ch->mmu_debug_mode_refcnt > 0U) {
2278 err = nvgpu_tsg_set_mmu_debug_mode(ch, false);
2279 if (err != 0) {
2280 nvgpu_err(g, "disable mmu debug mode failed ch:%u",
2281 ch->chid);
2282 break;
2283 }
2284 }
2285
2286 /* Remove channel from TSG and re-enable rest of the channels */
2287 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2288 nvgpu_list_del(&ch->ch_entry);
2289 ch->tsgid = NVGPU_INVALID_TSG_ID;
2290
2291 /* another thread could have re-enabled the channel because it was
2292 * still on the list at that time, so make sure it's truly disabled
2293 */
2294 g->ops.fifo.disable_channel(ch);
2295 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2296
2297 /*
2298 * Don't re-enable all channels if TSG has timed out already
2299 *
2300 * Note that we can skip disabling and preempting TSG too in case of
2301 * time out, but we keep that to ensure TSG is kicked out
2302 */
2303 if (!tsg_timedout) {
2304 g->ops.fifo.enable_tsg(tsg);
2305 }
2306
2307 if (ch->g->ops.fifo.ch_abort_clean_up) {
2308 ch->g->ops.fifo.ch_abort_clean_up(ch);
2309 }
2310
2311 return 0;
2312
2313fail_enable_tsg:
2314 if (!tsg_timedout) {
2315 g->ops.fifo.enable_tsg(tsg);
2316 }
2317 return err;
2318}
2319
2320u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
2321 int *__id, bool *__is_tsg)
2322{
2323 u32 engine_id;
2324 int id = -1;
2325 bool is_tsg = false;
2326 u32 mailbox2;
2327 u32 active_engine_id = FIFO_INVAL_ENGINE_ID;
2328
2329 for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
2330 u32 status;
2331 u32 ctx_status;
2332 bool failing_engine;
2333
2334 active_engine_id = g->fifo.active_engines_list[engine_id];
2335 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
2336 ctx_status = fifo_engine_status_ctx_status_v(status);
2337
2338 /* we are interested in busy engines */
2339 failing_engine = fifo_engine_status_engine_v(status) ==
2340 fifo_engine_status_engine_busy_v();
2341
2342 /* ..that are doing context switch */
2343 failing_engine = failing_engine &&
2344 (ctx_status ==
2345 fifo_engine_status_ctx_status_ctxsw_switch_v()
2346 || ctx_status ==
2347 fifo_engine_status_ctx_status_ctxsw_save_v()
2348 || ctx_status ==
2349 fifo_engine_status_ctx_status_ctxsw_load_v());
2350
2351 if (!failing_engine) {
2352 active_engine_id = FIFO_INVAL_ENGINE_ID;
2353 continue;
2354 }
2355
2356 if (ctx_status ==
2357 fifo_engine_status_ctx_status_ctxsw_load_v()) {
2358 id = fifo_engine_status_next_id_v(status);
2359 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2360 fifo_engine_status_next_id_type_chid_v();
2361 } else if (ctx_status ==
2362 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
2363 mailbox2 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(2));
2364 if (mailbox2 & FECS_METHOD_WFI_RESTORE) {
2365 id = fifo_engine_status_next_id_v(status);
2366 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2367 fifo_engine_status_next_id_type_chid_v();
2368 } else {
2369 id = fifo_engine_status_id_v(status);
2370 is_tsg = fifo_engine_status_id_type_v(status) !=
2371 fifo_engine_status_id_type_chid_v();
2372 }
2373 } else {
2374 id = fifo_engine_status_id_v(status);
2375 is_tsg = fifo_engine_status_id_type_v(status) !=
2376 fifo_engine_status_id_type_chid_v();
2377 }
2378 break;
2379 }
2380
2381 *__id = id;
2382 *__is_tsg = is_tsg;
2383
2384 return active_engine_id;
2385}
2386
2387bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
2388 bool *verbose, u32 *ms)
2389{
2390 bool recover = false;
2391 bool progress = false;
2392 struct gk20a *g = ch->g;
2393
2394 if (gk20a_channel_get(ch)) {
2395 recover = gk20a_channel_update_and_check_timeout(ch,
2396 g->fifo_eng_timeout_us / 1000,
2397 &progress);
2398 *verbose = ch->timeout_debug_dump;
2399 *ms = ch->timeout_accumulated_ms;
2400 if (recover) {
2401 g->ops.fifo.set_error_notifier(ch,
2402 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2403 }
2404
2405 gk20a_channel_put(ch);
2406 }
2407 return recover;
2408}
2409
2410bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
2411 bool *verbose, u32 *ms)
2412{
2413 struct channel_gk20a *ch;
2414 bool recover = false;
2415 bool progress = false;
2416 struct gk20a *g = tsg->g;
2417
2418 *verbose = false;
2419 *ms = g->fifo_eng_timeout_us / 1000;
2420
2421 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2422
2423 /* check if there was some progress on any of the TSG channels.
2424 * fifo recovery is needed if at least one channel reached the
2425 * maximum timeout without progress (update in gpfifo pointers).
2426 */
2427 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
2428 if (gk20a_channel_get(ch)) {
2429 recover = gk20a_channel_update_and_check_timeout(ch,
2430 *ms, &progress);
2431 if (progress || recover) {
2432 break;
2433 }
2434 gk20a_channel_put(ch);
2435 }
2436 }
2437
2438 if (recover) {
2439 /*
2440 * if one channel is presumed dead (no progress for too long),
2441 * then fifo recovery is needed. we can't really figure out
2442 * which channel caused the problem, so set timeout error
2443 * notifier for all channels.
2444 */
2445 nvgpu_log_info(g, "timeout on tsg=%d ch=%d",
2446 tsg->tsgid, ch->chid);
2447 *ms = ch->timeout_accumulated_ms;
2448 gk20a_channel_put(ch);
2449 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2450 channel_gk20a, ch_entry) {
2451 if (gk20a_channel_get(ch)) {
2452 ch->g->ops.fifo.set_error_notifier(ch,
2453 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2454 if (ch->timeout_debug_dump) {
2455 *verbose = true;
2456 }
2457 gk20a_channel_put(ch);
2458 }
2459 }
2460 } else if (progress) {
2461 /*
2462 * if at least one channel in the TSG made some progress, reset
2463 * accumulated timeout for all channels in the TSG. In
2464 * particular, this resets timeout for channels that already
2465 * completed their work
2466 */
2467 nvgpu_log_info(g, "progress on tsg=%d ch=%d",
2468 tsg->tsgid, ch->chid);
2469 gk20a_channel_put(ch);
2470 *ms = g->fifo_eng_timeout_us / 1000;
2471 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2472 channel_gk20a, ch_entry) {
2473 if (gk20a_channel_get(ch)) {
2474 ch->timeout_accumulated_ms = *ms;
2475 gk20a_channel_put(ch);
2476 }
2477 }
2478 }
2479
2480 /* if we could not detect progress on any of the channel, but none
2481 * of them has reached the timeout, there is nothing more to do:
2482 * timeout_accumulated_ms has been updated for all of them.
2483 */
2484 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2485 return recover;
2486}
2487
2488bool gk20a_fifo_handle_sched_error(struct gk20a *g)
2489{
2490 u32 sched_error;
2491 u32 engine_id;
2492 int id = -1;
2493 bool is_tsg = false;
2494 bool ret = false;
2495
2496 /* read the scheduler error register */
2497 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
2498
2499 engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
2500 /*
2501 * Could not find the engine
2502 * Possible Causes:
2503 * a)
2504 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
2505 * fifo_engine_status register. Also while the engine is held in reset
2506 * h/w passes busy/idle straight through. fifo_engine_status registers
2507 * are correct in that there is no context switch outstanding
2508 * as the CTXSW is aborted when reset is asserted.
2509 * This is just a side effect of how gv100 and earlier versions of
2510 * ctxsw_timeout behave.
2511 * With gv11b and later, h/w snaps the context at the point of error
2512 * so that s/w can see the tsg_id which caused the HW timeout.
2513 * b)
2514 * If engines are not busy and ctxsw state is valid then intr occurred
2515 * in the past and if the ctxsw state has moved on to VALID from LOAD
2516 * or SAVE, it means that whatever timed out eventually finished
2517 * anyways. The problem with this is that s/w cannot conclude which
2518 * context caused the problem as maybe more switches occurred before
2519 * intr is handled.
2520 */
2521 if (engine_id == FIFO_INVAL_ENGINE_ID) {
2522 nvgpu_info(g, "fifo sched error: 0x%08x, failed to find engine "
2523 "that is busy doing ctxsw. "
2524 "May be ctxsw already happened", sched_error);
2525 ret = false;
2526 goto err;
2527 }
2528
2529 /* could not find the engine - should never happen */
2530 if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
2531 nvgpu_err(g, "fifo sched error : 0x%08x, failed to find engine",
2532 sched_error);
2533 ret = false;
2534 goto err;
2535 }
2536
2537 if (fifo_intr_sched_error_code_f(sched_error) ==
2538 fifo_intr_sched_error_code_ctxsw_timeout_v()) {
2539 struct fifo_gk20a *f = &g->fifo;
2540 u32 ms = 0;
2541 bool verbose = false;
2542
2543 if (is_tsg) {
2544 ret = g->ops.fifo.check_tsg_ctxsw_timeout(
2545 &f->tsg[id], &verbose, &ms);
2546 } else {
2547 ret = g->ops.fifo.check_ch_ctxsw_timeout(
2548 &f->channel[id], &verbose, &ms);
2549 }
2550
2551 if (ret) {
2552 nvgpu_err(g,
2553 "fifo sched ctxsw timeout error: "
2554 "engine=%u, %s=%d, ms=%u",
2555 engine_id, is_tsg ? "tsg" : "ch", id, ms);
2556 /*
2557 * Cancel all channels' timeout since SCHED error might
2558 * trigger multiple watchdogs at a time
2559 */
2560 gk20a_channel_timeout_restart_all_channels(g);
2561 gk20a_fifo_recover(g, BIT(engine_id), id,
2562 is_tsg, true, verbose,
2563 RC_TYPE_CTXSW_TIMEOUT);
2564 } else {
2565 nvgpu_log_info(g,
2566 "fifo is waiting for ctx switch for %d ms, "
2567 "%s=%d", ms, is_tsg ? "tsg" : "ch", id);
2568 }
2569 } else {
2570 nvgpu_err(g,
2571 "fifo sched error : 0x%08x, engine=%u, %s=%d",
2572 sched_error, engine_id, is_tsg ? "tsg" : "ch", id);
2573 }
2574
2575err:
2576 return ret;
2577}
2578
2579static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
2580{
2581 bool print_channel_reset_log = false;
2582 u32 handled = 0;
2583
2584 nvgpu_log_fn(g, "fifo_intr=0x%08x", fifo_intr);
2585
2586 if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
2587 /* pio mode is unused. this shouldn't happen, ever. */
2588 /* should we clear it or just leave it pending? */
2589 nvgpu_err(g, "fifo pio error!");
2590 BUG_ON(1);
2591 }
2592
2593 if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
2594 u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
2595 nvgpu_err(g, "fifo bind error: 0x%08x", bind_error);
2596 print_channel_reset_log = true;
2597 handled |= fifo_intr_0_bind_error_pending_f();
2598 }
2599
2600 if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
2601 print_channel_reset_log = g->ops.fifo.handle_sched_error(g);
2602 handled |= fifo_intr_0_sched_error_pending_f();
2603 }
2604
2605 if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
2606 gk20a_fifo_handle_chsw_fault(g);
2607 handled |= fifo_intr_0_chsw_error_pending_f();
2608 }
2609
2610 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
2611 if (gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false)) {
2612 print_channel_reset_log = true;
2613 }
2614 handled |= fifo_intr_0_mmu_fault_pending_f();
2615 }
2616
2617 if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
2618 gk20a_fifo_handle_dropped_mmu_fault(g);
2619 handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
2620 }
2621
2622 print_channel_reset_log = !g->fifo.deferred_reset_pending
2623 && print_channel_reset_log;
2624
2625 if (print_channel_reset_log) {
2626 unsigned int engine_id;
2627 nvgpu_err(g,
2628 "channel reset initiated from %s; intr=0x%08x",
2629 __func__, fifo_intr);
2630 for (engine_id = 0;
2631 engine_id < g->fifo.num_engines;
2632 engine_id++) {
2633 u32 active_engine_id = g->fifo.active_engines_list[engine_id];
2634 u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
2635 nvgpu_log_fn(g, "enum:%d -> engine_id:%d", engine_enum,
2636 active_engine_id);
2637 fifo_pbdma_exception_status(g,
2638 &g->fifo.engine_info[active_engine_id]);
2639 fifo_engine_exception_status(g,
2640 &g->fifo.engine_info[active_engine_id]);
2641 }
2642 }
2643
2644 return handled;
2645}
2646
2647static inline void gk20a_fifo_reset_pbdma_header(struct gk20a *g, int pbdma_id)
2648{
2649 gk20a_writel(g, pbdma_pb_header_r(pbdma_id),
2650 pbdma_pb_header_first_true_f() |
2651 pbdma_pb_header_type_non_inc_f());
2652}
2653
2654void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
2655 int pbdma_method_index)
2656{
2657 u32 pbdma_method_stride;
2658 u32 pbdma_method_reg;
2659
2660 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2661 pbdma_method0_r(pbdma_id);
2662
2663 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2664 (pbdma_method_index * pbdma_method_stride);
2665
2666 gk20a_writel(g, pbdma_method_reg,
2667 pbdma_method0_valid_true_f() |
2668 pbdma_method0_first_true_f() |
2669 pbdma_method0_addr_f(
2670 pbdma_udma_nop_r() >> 2));
2671}
2672
2673static bool gk20a_fifo_is_sw_method_subch(struct gk20a *g, int pbdma_id,
2674 int pbdma_method_index)
2675{
2676 u32 pbdma_method_stride;
2677 u32 pbdma_method_reg, pbdma_method_subch;
2678
2679 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2680 pbdma_method0_r(pbdma_id);
2681
2682 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2683 (pbdma_method_index * pbdma_method_stride);
2684
2685 pbdma_method_subch = pbdma_method0_subch_v(
2686 gk20a_readl(g, pbdma_method_reg));
2687
2688 if (pbdma_method_subch == 5 ||
2689 pbdma_method_subch == 6 ||
2690 pbdma_method_subch == 7) {
2691 return true;
2692 }
2693
2694 return false;
2695}
2696
2697unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
2698 u32 pbdma_intr_0, u32 *handled, u32 *error_notifier)
2699{
2700 struct fifo_gk20a *f = &g->fifo;
2701 unsigned int rc_type = RC_TYPE_NO_RC;
2702 int i;
2703 unsigned long pbdma_intr_err;
2704 u32 bit;
2705
2706 if ((f->intr.pbdma.device_fatal_0 |
2707 f->intr.pbdma.channel_fatal_0 |
2708 f->intr.pbdma.restartable_0) & pbdma_intr_0) {
2709
2710 pbdma_intr_err = (unsigned long)pbdma_intr_0;
2711 for_each_set_bit(bit, &pbdma_intr_err, 32) {
2712 nvgpu_err(g, "PBDMA intr %s Error",
2713 pbdma_intr_fault_type_desc[bit]);
2714 }
2715
2716 nvgpu_err(g,
2717 "pbdma_intr_0(%d):0x%08x PBH: %08x "
2718 "SHADOW: %08x gp shadow0: %08x gp shadow1: %08x"
2719 "M0: %08x %08x %08x %08x ",
2720 pbdma_id, pbdma_intr_0,
2721 gk20a_readl(g, pbdma_pb_header_r(pbdma_id)),
2722 gk20a_readl(g, pbdma_hdr_shadow_r(pbdma_id)),
2723 gk20a_readl(g, pbdma_gp_shadow_0_r(pbdma_id)),
2724 gk20a_readl(g, pbdma_gp_shadow_1_r(pbdma_id)),
2725 gk20a_readl(g, pbdma_method0_r(pbdma_id)),
2726 gk20a_readl(g, pbdma_method1_r(pbdma_id)),
2727 gk20a_readl(g, pbdma_method2_r(pbdma_id)),
2728 gk20a_readl(g, pbdma_method3_r(pbdma_id))
2729 );
2730
2731 rc_type = RC_TYPE_PBDMA_FAULT;
2732 *handled |= ((f->intr.pbdma.device_fatal_0 |
2733 f->intr.pbdma.channel_fatal_0 |
2734 f->intr.pbdma.restartable_0) &
2735 pbdma_intr_0);
2736 }
2737
2738 if (pbdma_intr_0 & pbdma_intr_0_acquire_pending_f()) {
2739 u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id));
2740
2741 val &= ~pbdma_acquire_timeout_en_enable_f();
2742 gk20a_writel(g, pbdma_acquire_r(pbdma_id), val);
2743 if (nvgpu_is_timeouts_enabled(g)) {
2744 rc_type = RC_TYPE_PBDMA_FAULT;
2745 nvgpu_err(g,
2746 "semaphore acquire timeout!");
2747 *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
2748 }
2749 *handled |= pbdma_intr_0_acquire_pending_f();
2750 }
2751
2752 if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) {
2753 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2754 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2755 rc_type = RC_TYPE_PBDMA_FAULT;
2756 }
2757
2758 if (pbdma_intr_0 & pbdma_intr_0_method_pending_f()) {
2759 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2760 rc_type = RC_TYPE_PBDMA_FAULT;
2761 }
2762
2763 if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
2764 *error_notifier =
2765 NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
2766 rc_type = RC_TYPE_PBDMA_FAULT;
2767 }
2768
2769 if (pbdma_intr_0 & pbdma_intr_0_device_pending_f()) {
2770 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2771
2772 for (i = 0; i < 4; i++) {
2773 if (gk20a_fifo_is_sw_method_subch(g,
2774 pbdma_id, i)) {
2775 gk20a_fifo_reset_pbdma_method(g,
2776 pbdma_id, i);
2777 }
2778 }
2779 rc_type = RC_TYPE_PBDMA_FAULT;
2780 }
2781
2782 return rc_type;
2783}
2784
2785unsigned int gk20a_fifo_handle_pbdma_intr_1(struct gk20a *g,
2786 u32 pbdma_id, u32 pbdma_intr_1,
2787 u32 *handled, u32 *error_notifier)
2788{
2789 unsigned int rc_type = RC_TYPE_PBDMA_FAULT;
2790
2791 /*
2792 * all of the interrupts in _intr_1 are "host copy engine"
2793 * related, which is not supported. For now just make them
2794 * channel fatal.
2795 */
2796 nvgpu_err(g, "hce err: pbdma_intr_1(%d):0x%08x",
2797 pbdma_id, pbdma_intr_1);
2798 *handled |= pbdma_intr_1;
2799
2800 return rc_type;
2801}
2802
2803static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2804 struct fifo_gk20a *f, u32 pbdma_id,
2805 u32 error_notifier, u32 status)
2806{
2807 u32 id;
2808
2809 nvgpu_log(g, gpu_dbg_info, "pbdma id %d error notifier %d",
2810 pbdma_id, error_notifier);
2811 /* Remove channel from runlist */
2812 id = fifo_pbdma_status_id_v(status);
2813 if (fifo_pbdma_status_id_type_v(status)
2814 == fifo_pbdma_status_id_type_chid_v()) {
2815 struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
2816
2817 if (ch != NULL) {
2818 g->ops.fifo.set_error_notifier(ch, error_notifier);
2819 gk20a_fifo_recover_ch(g, ch, true, RC_TYPE_PBDMA_FAULT);
2820 gk20a_channel_put(ch);
2821 }
2822 } else if (fifo_pbdma_status_id_type_v(status)
2823 == fifo_pbdma_status_id_type_tsgid_v()) {
2824 struct tsg_gk20a *tsg = &f->tsg[id];
2825 struct channel_gk20a *ch = NULL;
2826
2827 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2828 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2829 channel_gk20a, ch_entry) {
2830 if (gk20a_channel_get(ch)) {
2831 g->ops.fifo.set_error_notifier(ch,
2832 error_notifier);
2833 gk20a_channel_put(ch);
2834 }
2835 }
2836 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2837 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PBDMA_FAULT);
2838 }
2839}
2840
2841u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2842 u32 pbdma_id, unsigned int rc)
2843{
2844 u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
2845 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
2846
2847 u32 handled = 0;
2848 u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
2849 unsigned int rc_type = RC_TYPE_NO_RC;
2850 u32 pbdma_status_info = 0;
2851
2852 if (pbdma_intr_0) {
2853 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2854 "pbdma id %d intr_0 0x%08x pending",
2855 pbdma_id, pbdma_intr_0);
2856
2857 if (g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, pbdma_intr_0,
2858 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2859 rc_type = RC_TYPE_PBDMA_FAULT;
2860
2861 pbdma_status_info = gk20a_readl(g,
2862 fifo_pbdma_status_r(pbdma_id));
2863 }
2864 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
2865 }
2866
2867 if (pbdma_intr_1) {
2868 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2869 "pbdma id %d intr_1 0x%08x pending",
2870 pbdma_id, pbdma_intr_1);
2871
2872 if (g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, pbdma_intr_1,
2873 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2874 rc_type = RC_TYPE_PBDMA_FAULT;
2875
2876 pbdma_status_info = gk20a_readl(g,
2877 fifo_pbdma_status_r(pbdma_id));
2878 }
2879 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
2880 }
2881
2882 if (rc == RC_YES && rc_type == RC_TYPE_PBDMA_FAULT) {
2883 gk20a_fifo_pbdma_fault_rc(g, f, pbdma_id, error_notifier,
2884 pbdma_status_info);
2885 }
2886
2887 return handled;
2888}
2889
2890static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
2891{
2892 struct fifo_gk20a *f = &g->fifo;
2893 u32 clear_intr = 0, i;
2894 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
2895 u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());
2896
2897 for (i = 0; i < host_num_pbdma; i++) {
2898 if (fifo_intr_pbdma_id_status_v(pbdma_pending, i)) {
2899 nvgpu_log(g, gpu_dbg_intr, "pbdma id %d intr pending", i);
2900 clear_intr |=
2901 gk20a_fifo_handle_pbdma_intr(g, f, i, RC_YES);
2902 }
2903 }
2904 return fifo_intr_0_pbdma_intr_pending_f();
2905}
2906
2907void gk20a_fifo_isr(struct gk20a *g)
2908{
2909 u32 error_intr_mask;
2910 u32 clear_intr = 0;
2911 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2912
2913 error_intr_mask = g->ops.fifo.intr_0_error_mask(g);
2914
2915 if (g->fifo.sw_ready) {
2916 /* note we're not actually in an "isr", but rather
2917 * in a threaded interrupt context... */
2918 nvgpu_mutex_acquire(&g->fifo.intr.isr.mutex);
2919
2920 nvgpu_log(g, gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
2921
2922 /* handle runlist update */
2923 if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
2924 gk20a_fifo_handle_runlist_event(g);
2925 clear_intr |= fifo_intr_0_runlist_event_pending_f();
2926 }
2927 if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f()) {
2928 clear_intr |= fifo_pbdma_isr(g, fifo_intr);
2929 }
2930
2931 if (g->ops.fifo.handle_ctxsw_timeout) {
2932 g->ops.fifo.handle_ctxsw_timeout(g, fifo_intr);
2933 }
2934
2935 if (unlikely((fifo_intr & error_intr_mask) != 0U)) {
2936 clear_intr |= fifo_error_isr(g, fifo_intr);
2937 }
2938
2939 nvgpu_mutex_release(&g->fifo.intr.isr.mutex);
2940 }
2941 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2942
2943 return;
2944}
2945
2946u32 gk20a_fifo_nonstall_isr(struct gk20a *g)
2947{
2948 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2949 u32 clear_intr = 0;
2950
2951 nvgpu_log(g, gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);
2952
2953 if (fifo_intr & fifo_intr_0_channel_intr_pending_f()) {
2954 clear_intr = fifo_intr_0_channel_intr_pending_f();
2955 }
2956
2957 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2958
2959 return GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE;
2960}
2961
2962void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
2963{
2964 if (is_tsg) {
2965 gk20a_writel(g, fifo_preempt_r(),
2966 fifo_preempt_id_f(id) |
2967 fifo_preempt_type_tsg_f());
2968 } else {
2969 gk20a_writel(g, fifo_preempt_r(),
2970 fifo_preempt_chid_f(id) |
2971 fifo_preempt_type_channel_f());
2972 }
2973}
2974
2975static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g)
2976{
2977 /* Use fifo_eng_timeout converted to ms for preempt
2978 * polling. gr_idle_timeout i.e 3000 ms is and not appropriate
2979 * for polling preempt done as context switch timeout gets
2980 * triggered every 100 ms and context switch recovery
2981 * happens every 3000 ms */
2982
2983 return g->fifo_eng_timeout_us / 1000;
2984}
2985
2986int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
2987 unsigned int id_type, bool preempt_retries_left)
2988{
2989 struct nvgpu_timeout timeout;
2990 u32 delay = GR_IDLE_CHECK_DEFAULT;
2991 int ret = -EBUSY;
2992
2993 nvgpu_timeout_init(g, &timeout, gk20a_fifo_get_preempt_timeout(g),
2994 NVGPU_TIMER_CPU_TIMER);
2995 do {
2996 if (!(gk20a_readl(g, fifo_preempt_r()) &
2997 fifo_preempt_pending_true_f())) {
2998 ret = 0;
2999 break;
3000 }
3001
3002 nvgpu_usleep_range(delay, delay * 2);
3003 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3004 } while (!nvgpu_timeout_expired(&timeout));
3005
3006 if (ret) {
3007 nvgpu_err(g, "preempt timeout: id: %u id_type: %d ",
3008 id, id_type);
3009 }
3010 return ret;
3011}
3012
3013void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3014{
3015 struct channel_gk20a *ch = NULL;
3016
3017 nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
3018
3019 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3020 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3021 channel_gk20a, ch_entry) {
3022 if (!gk20a_channel_get(ch)) {
3023 continue;
3024 }
3025 g->ops.fifo.set_error_notifier(ch,
3026 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3027 gk20a_channel_put(ch);
3028 }
3029 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3030 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
3031}
3032
3033void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
3034{
3035 nvgpu_err(g, "preempt channel %d timeout", ch->chid);
3036
3037 g->ops.fifo.set_error_notifier(ch,
3038 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3039 gk20a_fifo_recover_ch(g, ch, true,
3040 RC_TYPE_PREEMPT_TIMEOUT);
3041}
3042
3043int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg,
3044 bool preempt_retries_left)
3045{
3046 int ret;
3047 unsigned int id_type;
3048
3049 nvgpu_log_fn(g, "id: %d is_tsg: %d", id, is_tsg);
3050
3051 /* issue preempt */
3052 gk20a_fifo_issue_preempt(g, id, is_tsg);
3053
3054 id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
3055
3056 /*
3057 * Poll for preempt done. if stalling interrupts are pending
3058 * while preempt is in progress we poll for stalling interrupts
3059 * to finish based on return value from this function and
3060 * retry preempt again.
3061 * If HW is hung, on the last retry instance we try to identify
3062 * the engines hung and set the runlist reset_eng_bitmask
3063 * and mark preemption completion.
3064 */
3065 ret = g->ops.fifo.is_preempt_pending(g, id, id_type,
3066 preempt_retries_left);
3067
3068 return ret;
3069}
3070
3071int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
3072{
3073 struct fifo_gk20a *f = &g->fifo;
3074 u32 ret = 0;
3075 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3076 u32 mutex_ret = 0;
3077 u32 i;
3078
3079 nvgpu_log_fn(g, "chid: %d", ch->chid);
3080
3081 /* we have no idea which runlist we are using. lock all */
3082 for (i = 0; i < g->fifo.max_runlists; i++) {
3083 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3084 }
3085
3086 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3087
3088 ret = __locked_fifo_preempt(g, ch->chid, false, false);
3089
3090 if (!mutex_ret) {
3091 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3092 }
3093
3094 for (i = 0; i < g->fifo.max_runlists; i++) {
3095 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3096 }
3097
3098 if (ret) {
3099 if (nvgpu_platform_is_silicon(g)) {
3100 nvgpu_err(g, "preempt timed out for chid: %u, "
3101 "ctxsw timeout will trigger recovery if needed",
3102 ch->chid);
3103 } else {
3104 gk20a_fifo_preempt_timeout_rc(g, ch);
3105 }
3106 }
3107
3108 return ret;
3109}
3110
3111int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3112{
3113 struct fifo_gk20a *f = &g->fifo;
3114 u32 ret = 0;
3115 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3116 u32 mutex_ret = 0;
3117 u32 i;
3118
3119 nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
3120
3121 /* we have no idea which runlist we are using. lock all */
3122 for (i = 0; i < g->fifo.max_runlists; i++) {
3123 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3124 }
3125
3126 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3127
3128 ret = __locked_fifo_preempt(g, tsg->tsgid, true, false);
3129
3130 if (!mutex_ret) {
3131 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3132 }
3133
3134 for (i = 0; i < g->fifo.max_runlists; i++) {
3135 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3136 }
3137
3138 if (ret) {
3139 if (nvgpu_platform_is_silicon(g)) {
3140 nvgpu_err(g, "preempt timed out for tsgid: %u, "
3141 "ctxsw timeout will trigger recovery if needed",
3142 tsg->tsgid);
3143 } else {
3144 gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
3145 }
3146 }
3147
3148 return ret;
3149}
3150
3151int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch)
3152{
3153 int err;
3154 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
3155
3156 if (tsg != NULL) {
3157 err = g->ops.fifo.preempt_tsg(ch->g, tsg);
3158 } else {
3159 err = g->ops.fifo.preempt_channel(ch->g, ch);
3160 }
3161
3162 return err;
3163}
3164
3165static void gk20a_fifo_sched_disable_rw(struct gk20a *g, u32 runlists_mask,
3166 u32 runlist_state)
3167{
3168 u32 reg_val;
3169
3170 reg_val = gk20a_readl(g, fifo_sched_disable_r());
3171
3172 if (runlist_state == RUNLIST_DISABLED) {
3173 reg_val |= runlists_mask;
3174 } else {
3175 reg_val &= (~runlists_mask);
3176 }
3177
3178 gk20a_writel(g, fifo_sched_disable_r(), reg_val);
3179
3180}
3181
3182void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
3183 u32 runlist_state)
3184{
3185 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3186 u32 mutex_ret;
3187
3188 nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
3189 runlists_mask, runlist_state);
3190
3191 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3192
3193 gk20a_fifo_sched_disable_rw(g, runlists_mask, runlist_state);
3194
3195 if (!mutex_ret) {
3196 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3197 }
3198}
3199
3200void gk20a_fifo_enable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3201{
3202 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3203 tsg->runlist_id), RUNLIST_ENABLED);
3204
3205}
3206
3207void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3208{
3209 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3210 tsg->runlist_id), RUNLIST_DISABLED);
3211}
3212
3213int gk20a_fifo_enable_engine_activity(struct gk20a *g,
3214 struct fifo_engine_info_gk20a *eng_info)
3215{
3216 nvgpu_log(g, gpu_dbg_info, "start");
3217
3218 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3219 eng_info->runlist_id), RUNLIST_ENABLED);
3220 return 0;
3221}
3222
3223int gk20a_fifo_enable_all_engine_activity(struct gk20a *g)
3224{
3225 unsigned int i;
3226 int err = 0, ret = 0;
3227
3228 for (i = 0; i < g->fifo.num_engines; i++) {
3229 u32 active_engine_id = g->fifo.active_engines_list[i];
3230 err = gk20a_fifo_enable_engine_activity(g,
3231 &g->fifo.engine_info[active_engine_id]);
3232 if (err) {
3233 nvgpu_err(g,
3234 "failed to enable engine %d activity", active_engine_id);
3235 ret = err;
3236 }
3237 }
3238
3239 return ret;
3240}
3241
3242int gk20a_fifo_disable_engine_activity(struct gk20a *g,
3243 struct fifo_engine_info_gk20a *eng_info,
3244 bool wait_for_idle)
3245{
3246 u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
3247 u32 pbdma_chid = FIFO_INVAL_CHANNEL_ID;
3248 u32 engine_chid = FIFO_INVAL_CHANNEL_ID;
3249 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3250 int mutex_ret;
3251 struct channel_gk20a *ch = NULL;
3252 int err = 0;
3253
3254 nvgpu_log_fn(g, " ");
3255
3256 gr_stat =
3257 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3258 if (fifo_engine_status_engine_v(gr_stat) ==
3259 fifo_engine_status_engine_busy_v() && !wait_for_idle) {
3260 return -EBUSY;
3261 }
3262
3263 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3264
3265 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3266 eng_info->runlist_id), RUNLIST_DISABLED);
3267
3268 /* chid from pbdma status */
3269 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
3270 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
3271 if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
3272 chan_stat == fifo_pbdma_status_chan_status_chsw_save_v()) {
3273 pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
3274 } else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
3275 chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v()) {
3276 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
3277 }
3278
3279 if (pbdma_chid != FIFO_INVAL_CHANNEL_ID) {
3280 ch = gk20a_channel_from_id(g, pbdma_chid);
3281 if (ch != NULL) {
3282 err = g->ops.fifo.preempt_channel(g, ch);
3283 gk20a_channel_put(ch);
3284 }
3285 if (err != 0) {
3286 goto clean_up;
3287 }
3288 }
3289
3290 /* chid from engine status */
3291 eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3292 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
3293 if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
3294 ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v()) {
3295 engine_chid = fifo_engine_status_id_v(eng_stat);
3296 } else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
3297 ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3298 engine_chid = fifo_engine_status_next_id_v(eng_stat);
3299 }
3300
3301 if (engine_chid != FIFO_INVAL_ENGINE_ID && engine_chid != pbdma_chid) {
3302 ch = gk20a_channel_from_id(g, engine_chid);
3303 if (ch != NULL) {
3304 err = g->ops.fifo.preempt_channel(g, ch);
3305 gk20a_channel_put(ch);
3306 }
3307 if (err != 0) {
3308 goto clean_up;
3309 }
3310 }
3311
3312clean_up:
3313 if (!mutex_ret) {
3314 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3315 }
3316
3317 if (err) {
3318 nvgpu_log_fn(g, "failed");
3319 if (gk20a_fifo_enable_engine_activity(g, eng_info)) {
3320 nvgpu_err(g,
3321 "failed to enable gr engine activity");
3322 }
3323 } else {
3324 nvgpu_log_fn(g, "done");
3325 }
3326 return err;
3327}
3328
3329int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
3330 bool wait_for_idle)
3331{
3332 unsigned int i;
3333 int err = 0, ret = 0;
3334 u32 active_engine_id;
3335
3336 for (i = 0; i < g->fifo.num_engines; i++) {
3337 active_engine_id = g->fifo.active_engines_list[i];
3338 err = gk20a_fifo_disable_engine_activity(g,
3339 &g->fifo.engine_info[active_engine_id],
3340 wait_for_idle);
3341 if (err) {
3342 nvgpu_err(g, "failed to disable engine %d activity",
3343 active_engine_id);
3344 ret = err;
3345 break;
3346 }
3347 }
3348
3349 if (err) {
3350 while (i-- != 0) {
3351 active_engine_id = g->fifo.active_engines_list[i];
3352 err = gk20a_fifo_enable_engine_activity(g,
3353 &g->fifo.engine_info[active_engine_id]);
3354 if (err) {
3355 nvgpu_err(g,
3356 "failed to re-enable engine %d activity",
3357 active_engine_id);
3358 }
3359 }
3360 }
3361
3362 return ret;
3363}
3364
3365static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
3366{
3367 struct fifo_gk20a *f = &g->fifo;
3368 u32 engines = 0;
3369 unsigned int i;
3370
3371 for (i = 0; i < f->num_engines; i++) {
3372 u32 active_engine_id = g->fifo.active_engines_list[i];
3373 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
3374 bool engine_busy = fifo_engine_status_engine_v(status) ==
3375 fifo_engine_status_engine_busy_v();
3376
3377 if (engine_busy &&
3378 (f->engine_info[active_engine_id].runlist_id == runlist_id)) {
3379 engines |= BIT(active_engine_id);
3380 }
3381 }
3382
3383 if (engines) {
3384 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true,
3385 RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
3386 }
3387}
3388
3389int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
3390{
3391 struct nvgpu_timeout timeout;
3392 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3393 int ret = -ETIMEDOUT;
3394
3395 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3396 NVGPU_TIMER_CPU_TIMER);
3397
3398 do {
3399 if ((gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
3400 fifo_eng_runlist_pending_true_f()) == 0) {
3401 ret = 0;
3402 break;
3403 }
3404
3405 nvgpu_usleep_range(delay, delay * 2);
3406 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3407 } while (!nvgpu_timeout_expired(&timeout));
3408
3409 if (ret) {
3410 nvgpu_err(g, "runlist wait timeout: runlist id: %u",
3411 runlist_id);
3412 }
3413
3414 return ret;
3415}
3416
3417void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist)
3418{
3419
3420 u32 runlist_entry_0 = ram_rl_entry_id_f(tsg->tsgid) |
3421 ram_rl_entry_type_tsg_f() |
3422 ram_rl_entry_tsg_length_f(tsg->num_active_channels);
3423
3424 if (tsg->timeslice_timeout) {
3425 runlist_entry_0 |=
3426 ram_rl_entry_timeslice_scale_f(tsg->timeslice_scale) |
3427 ram_rl_entry_timeslice_timeout_f(tsg->timeslice_timeout);
3428 } else {
3429 runlist_entry_0 |=
3430 ram_rl_entry_timeslice_scale_f(
3431 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) |
3432 ram_rl_entry_timeslice_timeout_f(
3433 NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT);
3434 }
3435
3436 runlist[0] = runlist_entry_0;
3437 runlist[1] = 0;
3438
3439}
3440
3441u32 gk20a_fifo_default_timeslice_us(struct gk20a *g)
3442{
3443 return (((u64)(NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT <<
3444 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) *
3445 (u64)g->ptimer_src_freq) /
3446 (u64)PTIMER_REF_FREQ_HZ);
3447}
3448
3449void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist)
3450{
3451 runlist[0] = ram_rl_entry_chid_f(ch->chid);
3452 runlist[1] = 0;
3453}
3454
3455/* recursively construct a runlist with interleaved bare channels and TSGs */
3456u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
3457 struct fifo_runlist_info_gk20a *runlist,
3458 u32 cur_level,
3459 u32 *runlist_entry,
3460 bool interleave_enabled,
3461 bool prev_empty,
3462 u32 *entries_left)
3463{
3464 bool last_level = cur_level == NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
3465 struct channel_gk20a *ch;
3466 bool skip_next = false;
3467 u32 tsgid, count = 0;
3468 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3469 struct gk20a *g = f->g;
3470
3471 nvgpu_log_fn(g, " ");
3472
3473 /* for each TSG, T, on this level, insert all higher-level channels
3474 and TSGs before inserting T. */
3475 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
3476 struct tsg_gk20a *tsg = &f->tsg[tsgid];
3477
3478 if (tsg->interleave_level != cur_level) {
3479 continue;
3480 }
3481
3482 if (!last_level && !skip_next) {
3483 runlist_entry = gk20a_runlist_construct_locked(f,
3484 runlist,
3485 cur_level + 1,
3486 runlist_entry,
3487 interleave_enabled,
3488 false,
3489 entries_left);
3490 if (!interleave_enabled) {
3491 skip_next = true;
3492 }
3493 }
3494
3495 if (*entries_left == 0U) {
3496 return NULL;
3497 }
3498
3499 /* add TSG entry */
3500 nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
3501 f->g->ops.fifo.get_tsg_runlist_entry(tsg, runlist_entry);
3502 nvgpu_log_info(g, "tsg runlist count %d runlist [0] %x [1] %x\n",
3503 count, runlist_entry[0], runlist_entry[1]);
3504 runlist_entry += runlist_entry_words;
3505 count++;
3506 (*entries_left)--;
3507
3508 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3509 /* add runnable channels bound to this TSG */
3510 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3511 channel_gk20a, ch_entry) {
3512 if (!test_bit((int)ch->chid,
3513 runlist->active_channels)) {
3514 continue;
3515 }
3516
3517 if (*entries_left == 0U) {
3518 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3519 return NULL;
3520 }
3521
3522 nvgpu_log_info(g, "add channel %d to runlist",
3523 ch->chid);
3524 f->g->ops.fifo.get_ch_runlist_entry(ch, runlist_entry);
3525 nvgpu_log_info(g,
3526 "run list count %d runlist [0] %x [1] %x\n",
3527 count, runlist_entry[0], runlist_entry[1]);
3528 count++;
3529 runlist_entry += runlist_entry_words;
3530 (*entries_left)--;
3531 }
3532 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3533 }
3534
3535 /* append entries from higher level if this level is empty */
3536 if (!count && !last_level) {
3537 runlist_entry = gk20a_runlist_construct_locked(f,
3538 runlist,
3539 cur_level + 1,
3540 runlist_entry,
3541 interleave_enabled,
3542 true,
3543 entries_left);
3544 }
3545
3546 /*
3547 * if previous and this level have entries, append
3548 * entries from higher level.
3549 *
3550 * ex. dropping from MEDIUM to LOW, need to insert HIGH
3551 */
3552 if (interleave_enabled && count && !prev_empty && !last_level) {
3553 runlist_entry = gk20a_runlist_construct_locked(f,
3554 runlist,
3555 cur_level + 1,
3556 runlist_entry,
3557 interleave_enabled,
3558 false,
3559 entries_left);
3560 }
3561 return runlist_entry;
3562}
3563
3564int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
3565 u32 id,
3566 u32 runlist_id,
3567 u32 new_level)
3568{
3569 nvgpu_log_fn(g, " ");
3570
3571 g->fifo.tsg[id].interleave_level = new_level;
3572
3573 return 0;
3574}
3575
3576int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
3577{
3578 struct gk20a *g = tsg->g;
3579
3580 if (timeslice < g->min_timeslice_us ||
3581 timeslice > g->max_timeslice_us) {
3582 return -EINVAL;
3583 }
3584
3585 gk20a_channel_get_timescale_from_timeslice(g, timeslice,
3586 &tsg->timeslice_timeout, &tsg->timeslice_scale);
3587
3588 tsg->timeslice_us = timeslice;
3589
3590 return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
3591}
3592
3593void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
3594 u32 count, u32 buffer_index)
3595{
3596 struct fifo_runlist_info_gk20a *runlist = NULL;
3597 u64 runlist_iova;
3598
3599 runlist = &g->fifo.runlist_info[runlist_id];
3600 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]);
3601
3602 if (count != 0) {
3603 gk20a_writel(g, fifo_runlist_base_r(),
3604 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3605 nvgpu_aperture_mask(g, &runlist->mem[buffer_index],
3606 fifo_runlist_base_target_sys_mem_ncoh_f(),
3607 fifo_runlist_base_target_sys_mem_coh_f(),
3608 fifo_runlist_base_target_vid_mem_f()));
3609 }
3610
3611 gk20a_writel(g, fifo_runlist_r(),
3612 fifo_runlist_engine_f(runlist_id) |
3613 fifo_eng_runlist_length_f(count));
3614}
3615
3616int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3617 u32 chid, bool add,
3618 bool wait_for_finish)
3619{
3620 int ret = 0;
3621 struct fifo_gk20a *f = &g->fifo;
3622 struct fifo_runlist_info_gk20a *runlist = NULL;
3623 u32 *runlist_entry_base = NULL;
3624 u64 runlist_iova;
3625 u32 new_buf;
3626 struct channel_gk20a *ch = NULL;
3627 struct tsg_gk20a *tsg = NULL;
3628 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3629
3630 runlist = &f->runlist_info[runlist_id];
3631
3632 /* valid channel, add/remove it from active list.
3633 Otherwise, keep active list untouched for suspend/resume. */
3634 if (chid != FIFO_INVAL_CHANNEL_ID) {
3635 ch = &f->channel[chid];
3636 tsg = tsg_gk20a_from_ch(ch);
3637
3638 if (add) {
3639 if (test_and_set_bit(chid,
3640 runlist->active_channels) == 1) {
3641 return 0;
3642 }
3643 if (tsg && ++tsg->num_active_channels) {
3644 set_bit((int)f->channel[chid].tsgid,
3645 runlist->active_tsgs);
3646 }
3647 } else {
3648 if (test_and_clear_bit(chid,
3649 runlist->active_channels) == 0) {
3650 return 0;
3651 }
3652 if (tsg && --tsg->num_active_channels == 0) {
3653 clear_bit((int)f->channel[chid].tsgid,
3654 runlist->active_tsgs);
3655 }
3656 }
3657 }
3658
3659 new_buf = !runlist->cur_buffer;
3660
3661 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[new_buf]);
3662
3663 nvgpu_log_info(g, "runlist_id : %d, switch to new buffer 0x%16llx",
3664 runlist_id, (u64)runlist_iova);
3665
3666 if (!runlist_iova) {
3667 ret = -EINVAL;
3668 goto clean_up;
3669 }
3670
3671 runlist_entry_base = runlist->mem[new_buf].cpu_va;
3672 if (!runlist_entry_base) {
3673 ret = -ENOMEM;
3674 goto clean_up;
3675 }
3676
3677 if (chid != FIFO_INVAL_CHANNEL_ID || /* add/remove a valid channel */
3678 add /* resume to add all channels back */) {
3679 u32 max_entries = f->num_runlist_entries;
3680 u32 *runlist_end;
3681
3682 runlist_end = gk20a_runlist_construct_locked(f,
3683 runlist,
3684 0,
3685 runlist_entry_base,
3686 g->runlist_interleave,
3687 true,
3688 &max_entries);
3689 if (!runlist_end) {
3690 ret = -E2BIG;
3691 goto clean_up;
3692 }
3693 runlist->count = (runlist_end - runlist_entry_base) /
3694 runlist_entry_words;
3695 WARN_ON(runlist->count > f->num_runlist_entries);
3696 } else {
3697 /* suspend to remove all channels */
3698 runlist->count = 0;
3699 }
3700
3701 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3702
3703 if (wait_for_finish) {
3704 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
3705
3706 if (ret == -ETIMEDOUT) {
3707 nvgpu_err(g, "runlist %d update timeout", runlist_id);
3708 /* trigger runlist update timeout recovery */
3709 return ret;
3710
3711 } else if (ret == -EINTR) {
3712 nvgpu_err(g, "runlist update interrupted");
3713 }
3714 }
3715
3716 runlist->cur_buffer = new_buf;
3717
3718clean_up:
3719 return ret;
3720}
3721
3722int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 chid,
3723 bool add, bool wait_for_finish)
3724{
3725 u32 ret = -EINVAL;
3726 u32 runlist_id = 0;
3727 u32 errcode;
3728 unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
3729
3730 if (!g) {
3731 goto end;
3732 }
3733
3734 ret = 0;
3735 for_each_set_bit(runlist_id, &ulong_runlist_ids, 32) {
3736 /* Capture the last failure error code */
3737 errcode = g->ops.fifo.update_runlist(g, runlist_id, chid, add, wait_for_finish);
3738 if (errcode) {
3739 nvgpu_err(g,
3740 "failed to update_runlist %d %d", runlist_id, errcode);
3741 ret = errcode;
3742 }
3743 }
3744end:
3745 return ret;
3746}
3747
3748/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3749static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3750 bool wait_preempt)
3751{
3752 struct gk20a *g = ch->g;
3753 struct fifo_runlist_info_gk20a *runlist =
3754 &g->fifo.runlist_info[ch->runlist_id];
3755 int ret = 0;
3756 u32 gr_eng_id = 0;
3757 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3758 u32 preempt_id;
3759 u32 preempt_type = 0;
3760
3761 if (1 != gk20a_fifo_get_engine_ids(
3762 g, &gr_eng_id, 1, ENGINE_GR_GK20A)) {
3763 return ret;
3764 }
3765 if (!(runlist->eng_bitmask & (1 << gr_eng_id))) {
3766 return ret;
3767 }
3768
3769 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3770 fifo_preempt_pending_true_f()) {
3771 return ret;
3772 }
3773
3774 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3775 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3776 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3777 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3778 /* host switching to next context, preempt that if needed */
3779 preempt_id = fifo_engine_status_next_id_v(engstat);
3780 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3781 } else {
3782 return ret;
3783 }
3784 if (preempt_id == ch->tsgid && preempt_type) {
3785 return ret;
3786 }
3787 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3788 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3789 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3790 /* preempt useless if FECS acked save and started restore */
3791 return ret;
3792 }
3793
3794 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3795#ifdef TRACEPOINTS_ENABLED
3796 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3797 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3798 gk20a_readl(g, fifo_preempt_r()));
3799#endif
3800 if (wait_preempt) {
3801 g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type, false);
3802 }
3803#ifdef TRACEPOINTS_ENABLED
3804 trace_gk20a_reschedule_preempted_next(ch->chid);
3805#endif
3806 return ret;
3807}
3808
3809int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3810{
3811 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3812}
3813
3814/* trigger host to expire current timeslice and reschedule runlist from front */
3815int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3816 bool wait_preempt)
3817{
3818 struct gk20a *g = ch->g;
3819 struct fifo_runlist_info_gk20a *runlist;
3820 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3821 u32 mutex_ret;
3822 int ret = 0;
3823
3824 runlist = &g->fifo.runlist_info[ch->runlist_id];
3825 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {
3826 return -EBUSY;
3827 }
3828
3829 mutex_ret = nvgpu_pmu_mutex_acquire(
3830 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3831
3832 g->ops.fifo.runlist_hw_submit(
3833 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3834
3835 if (preempt_next) {
3836 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3837 }
3838
3839 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3840
3841 if (!mutex_ret) {
3842 nvgpu_pmu_mutex_release(
3843 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3844 }
3845 nvgpu_mutex_release(&runlist->runlist_lock);
3846
3847 return ret;
3848}
3849
3850/* add/remove a channel from runlist
3851 special cases below: runlist->active_channels will NOT be changed.
3852 (chid == ~0 && !add) means remove all active channels from runlist.
3853 (chid == ~0 && add) means restore all active channels on runlist. */
3854int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 chid,
3855 bool add, bool wait_for_finish)
3856{
3857 struct fifo_runlist_info_gk20a *runlist = NULL;
3858 struct fifo_gk20a *f = &g->fifo;
3859 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3860 u32 mutex_ret;
3861 int ret = 0;
3862
3863 nvgpu_log_fn(g, " ");
3864
3865 runlist = &f->runlist_info[runlist_id];
3866
3867 nvgpu_mutex_acquire(&runlist->runlist_lock);
3868
3869 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3870
3871 ret = gk20a_fifo_update_runlist_locked(g, runlist_id, chid, add,
3872 wait_for_finish);
3873
3874 if (!mutex_ret) {
3875 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3876 }
3877
3878 nvgpu_mutex_release(&runlist->runlist_lock);
3879
3880 if (ret == -ETIMEDOUT) {
3881 gk20a_fifo_runlist_reset_engines(g, runlist_id);
3882 }
3883
3884 return ret;
3885}
3886
3887int gk20a_fifo_suspend(struct gk20a *g)
3888{
3889 nvgpu_log_fn(g, " ");
3890
3891 /* stop bar1 snooping */
3892 if (g->ops.mm.is_bar1_supported(g)) {
3893 gk20a_writel(g, fifo_bar1_base_r(),
3894 fifo_bar1_base_valid_false_f());
3895 }
3896
3897 /* disable fifo intr */
3898 gk20a_writel(g, fifo_intr_en_0_r(), 0);
3899 gk20a_writel(g, fifo_intr_en_1_r(), 0);
3900
3901 nvgpu_log_fn(g, "done");
3902 return 0;
3903}
3904
3905bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
3906{
3907 if (gk20a_readl(g, fifo_intr_0_r()) &
3908 fifo_intr_0_mmu_fault_pending_f()) {
3909 return true;
3910 } else {
3911 return false;
3912 }
3913}
3914
3915bool gk20a_fifo_is_engine_busy(struct gk20a *g)
3916{
3917 u32 i, host_num_engines;
3918
3919 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3920
3921 for (i = 0; i < host_num_engines; i++) {
3922 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3923 if (fifo_engine_status_engine_v(status) ==
3924 fifo_engine_status_engine_busy_v()) {
3925 return true;
3926 }
3927 }
3928 return false;
3929}
3930
3931int gk20a_fifo_wait_engine_idle(struct gk20a *g)
3932{
3933 struct nvgpu_timeout timeout;
3934 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3935 int ret = -ETIMEDOUT;
3936 u32 i, host_num_engines;
3937
3938 nvgpu_log_fn(g, " ");
3939
3940 host_num_engines =
3941 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3942
3943 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3944 NVGPU_TIMER_CPU_TIMER);
3945
3946 for (i = 0; i < host_num_engines; i++) {
3947 do {
3948 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3949 if (!fifo_engine_status_engine_v(status)) {
3950 ret = 0;
3951 break;
3952 }
3953
3954 nvgpu_usleep_range(delay, delay * 2);
3955 delay = min_t(unsigned long,
3956 delay << 1, GR_IDLE_CHECK_MAX);
3957 } while (!nvgpu_timeout_expired(&timeout));
3958
3959 if (ret) {
3960 nvgpu_log_info(g, "cannot idle engine %u", i);
3961 break;
3962 }
3963 }
3964
3965 nvgpu_log_fn(g, "done");
3966
3967 return ret;
3968}
3969
3970u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
3971{
3972 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
3973}
3974
3975static const char * const ccsr_chan_status_str[] = {
3976 "idle",
3977 "pending",
3978 "pending_ctx_reload",
3979 "pending_acquire",
3980 "pending_acq_ctx_reload",
3981 "on_pbdma",
3982 "on_pbdma_and_eng",
3983 "on_eng",
3984 "on_eng_pending_acquire",
3985 "on_eng_pending",
3986 "on_pbdma_ctx_reload",
3987 "on_pbdma_and_eng_ctx_reload",
3988 "on_eng_ctx_reload",
3989 "on_eng_pending_ctx_reload",
3990 "on_eng_pending_acq_ctx_reload",
3991};
3992
3993static const char * const pbdma_chan_eng_ctx_status_str[] = {
3994 "invalid",
3995 "valid",
3996 "NA",
3997 "NA",
3998 "NA",
3999 "load",
4000 "save",
4001 "switch",
4002};
4003
4004static const char * const not_found_str[] = {
4005 "NOT FOUND"
4006};
4007
4008const char *gk20a_decode_ccsr_chan_status(u32 index)
4009{
4010 if (index >= ARRAY_SIZE(ccsr_chan_status_str)) {
4011 return not_found_str[0];
4012 } else {
4013 return ccsr_chan_status_str[index];
4014 }
4015}
4016
4017const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index)
4018{
4019 if (index >= ARRAY_SIZE(pbdma_chan_eng_ctx_status_str)) {
4020 return not_found_str[0];
4021 } else {
4022 return pbdma_chan_eng_ctx_status_str[index];
4023 }
4024}
4025
4026bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid)
4027{
4028 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4029
4030 return ccsr_channel_next_v(channel) == ccsr_channel_next_true_v();
4031}
4032
4033bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid)
4034{
4035 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4036 u32 status = ccsr_channel_status_v(channel);
4037
4038 return (status == ccsr_channel_status_pending_ctx_reload_v() ||
4039 status == ccsr_channel_status_pending_acq_ctx_reload_v() ||
4040 status == ccsr_channel_status_on_pbdma_ctx_reload_v() ||
4041 status == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() ||
4042 status == ccsr_channel_status_on_eng_ctx_reload_v() ||
4043 status == ccsr_channel_status_on_eng_pending_ctx_reload_v() ||
4044 status == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v());
4045}
4046
4047void gk20a_dump_channel_status_ramfc(struct gk20a *g,
4048 struct gk20a_debug_output *o,
4049 u32 chid,
4050 struct ch_state *ch_state)
4051{
4052 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4053 u32 status = ccsr_channel_status_v(channel);
4054 u32 syncpointa, syncpointb;
4055 u32 *inst_mem;
4056 struct channel_gk20a *c = g->fifo.channel + chid;
4057 struct nvgpu_semaphore_int *hw_sema = NULL;
4058
4059 if (c->hw_sema) {
4060 hw_sema = c->hw_sema;
4061 }
4062
4063 if (!ch_state) {
4064 return;
4065 }
4066
4067 inst_mem = &ch_state->inst_block[0];
4068
4069 syncpointa = inst_mem[ram_fc_syncpointa_w()];
4070 syncpointb = inst_mem[ram_fc_syncpointb_w()];
4071
4072 gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", chid,
4073 g->name,
4074 ch_state->pid,
4075 ch_state->refs,
4076 ch_state->deterministic ? ", deterministic" : "");
4077 gk20a_debug_output(o, "channel status: %s in use %s %s\n",
4078 ccsr_channel_enable_v(channel) ? "" : "not",
4079 gk20a_decode_ccsr_chan_status(status),
4080 ccsr_channel_busy_v(channel) ? "busy" : "not busy");
4081 gk20a_debug_output(o, "RAMFC : TOP: %016llx PUT: %016llx GET: %016llx "
4082 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
4083 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
4084 (u64)inst_mem[ram_fc_pb_top_level_get_w()] +
4085 ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
4086 (u64)inst_mem[ram_fc_pb_put_w()] +
4087 ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
4088 (u64)inst_mem[ram_fc_pb_get_w()] +
4089 ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
4090 (u64)inst_mem[ram_fc_pb_fetch_w()] +
4091 ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
4092 inst_mem[ram_fc_pb_header_w()],
4093 inst_mem[ram_fc_pb_count_w()],
4094 syncpointa,
4095 syncpointb,
4096 inst_mem[ram_fc_semaphorea_w()],
4097 inst_mem[ram_fc_semaphoreb_w()],
4098 inst_mem[ram_fc_semaphorec_w()],
4099 inst_mem[ram_fc_semaphored_w()]);
4100 if (hw_sema) {
4101 gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
4102 "next_val: 0x%08x addr: 0x%010llx\n",
4103 __nvgpu_semaphore_read(hw_sema),
4104 nvgpu_atomic_read(&hw_sema->next_value),
4105 nvgpu_hw_sema_addr(hw_sema));
4106 }
4107
4108#ifdef CONFIG_TEGRA_GK20A_NVHOST
4109 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
4110 && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
4111 pbdma_syncpointb_wait_switch_en_v()))
4112 gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n",
4113 (status == 3 || status == 8) ? "Waiting" : "Waited",
4114 pbdma_syncpointb_syncpt_index_v(syncpointb),
4115 nvgpu_nvhost_syncpt_get_name(g->nvhost_dev,
4116 pbdma_syncpointb_syncpt_index_v(syncpointb)),
4117 pbdma_syncpointa_payload_v(syncpointa));
4118#endif
4119
4120 gk20a_debug_output(o, "\n");
4121}
4122
4123void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
4124 struct gk20a_debug_output *o)
4125{
4126 struct fifo_gk20a *f = &g->fifo;
4127 u32 chid;
4128 struct ch_state **ch_state;
4129
4130 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
4131 if (!ch_state) {
4132 gk20a_debug_output(o, "cannot alloc memory for channels\n");
4133 return;
4134 }
4135
4136 for (chid = 0; chid < f->num_channels; chid++) {
4137 struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
4138 if (ch != NULL) {
4139 ch_state[chid] =
4140 nvgpu_kmalloc(g, sizeof(struct ch_state) +
4141 ram_in_alloc_size_v());
4142 /* ref taken stays to below loop with
4143 * successful allocs */
4144 if (!ch_state[chid]) {
4145 gk20a_channel_put(ch);
4146 }
4147 }
4148 }
4149
4150 for (chid = 0; chid < f->num_channels; chid++) {
4151 struct channel_gk20a *ch = &f->channel[chid];
4152 if (!ch_state[chid]) {
4153 continue;
4154 }
4155
4156 ch_state[chid]->pid = ch->pid;
4157 ch_state[chid]->refs = nvgpu_atomic_read(&ch->ref_count);
4158 ch_state[chid]->deterministic = ch->deterministic;
4159 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
4160 &ch_state[chid]->inst_block[0],
4161 ram_in_alloc_size_v());
4162 gk20a_channel_put(ch);
4163 }
4164 for (chid = 0; chid < f->num_channels; chid++) {
4165 if (ch_state[chid]) {
4166 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
4167 ch_state[chid]);
4168 nvgpu_kfree(g, ch_state[chid]);
4169 }
4170 }
4171 nvgpu_kfree(g, ch_state);
4172}
4173
4174void gk20a_dump_pbdma_status(struct gk20a *g,
4175 struct gk20a_debug_output *o)
4176{
4177 u32 i, host_num_pbdma;
4178
4179 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
4180
4181 for (i = 0; i < host_num_pbdma; i++) {
4182 u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
4183 u32 chan_status = fifo_pbdma_status_chan_status_v(status);
4184
4185 gk20a_debug_output(o, "%s pbdma %d: ", g->name, i);
4186 gk20a_debug_output(o,
4187 "id: %d (%s), next_id: %d (%s) chan status: %s\n",
4188 fifo_pbdma_status_id_v(status),
4189 fifo_pbdma_status_id_type_v(status) ?
4190 "tsg" : "channel",
4191 fifo_pbdma_status_next_id_v(status),
4192 fifo_pbdma_status_next_id_type_v(status) ?
4193 "tsg" : "channel",
4194 gk20a_decode_pbdma_chan_eng_ctx_status(chan_status));
4195 gk20a_debug_output(o, "PBDMA_PUT: %016llx PBDMA_GET: %016llx "
4196 "GP_PUT: %08x GP_GET: %08x "
4197 "FETCH: %08x HEADER: %08x\n"
4198 "HDR: %08x SHADOW0: %08x SHADOW1: %08x",
4199 (u64)gk20a_readl(g, pbdma_put_r(i)) +
4200 ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
4201 (u64)gk20a_readl(g, pbdma_get_r(i)) +
4202 ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
4203 gk20a_readl(g, pbdma_gp_put_r(i)),
4204 gk20a_readl(g, pbdma_gp_get_r(i)),
4205 gk20a_readl(g, pbdma_gp_fetch_r(i)),
4206 gk20a_readl(g, pbdma_pb_header_r(i)),
4207 gk20a_readl(g, pbdma_hdr_shadow_r(i)),
4208 gk20a_readl(g, pbdma_gp_shadow_0_r(i)),
4209 gk20a_readl(g, pbdma_gp_shadow_1_r(i)));
4210 }
4211 gk20a_debug_output(o, "\n");
4212}
4213
4214void gk20a_dump_eng_status(struct gk20a *g,
4215 struct gk20a_debug_output *o)
4216{
4217 u32 i, host_num_engines;
4218
4219 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
4220
4221 for (i = 0; i < host_num_engines; i++) {
4222 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
4223 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
4224
4225 gk20a_debug_output(o, "%s eng %d: ", g->name, i);
4226 gk20a_debug_output(o,
4227 "id: %d (%s), next_id: %d (%s), ctx status: %s ",
4228 fifo_engine_status_id_v(status),
4229 fifo_engine_status_id_type_v(status) ?
4230 "tsg" : "channel",
4231 fifo_engine_status_next_id_v(status),
4232 fifo_engine_status_next_id_type_v(status) ?
4233 "tsg" : "channel",
4234 gk20a_decode_pbdma_chan_eng_ctx_status(ctx_status));
4235
4236 if (fifo_engine_status_faulted_v(status)) {
4237 gk20a_debug_output(o, "faulted ");
4238 }
4239 if (fifo_engine_status_engine_v(status)) {
4240 gk20a_debug_output(o, "busy ");
4241 }
4242 gk20a_debug_output(o, "\n");
4243 }
4244 gk20a_debug_output(o, "\n");
4245}
4246
4247void gk20a_fifo_enable_channel(struct channel_gk20a *ch)
4248{
4249 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4250 gk20a_readl(ch->g, ccsr_channel_r(ch->chid)) |
4251 ccsr_channel_enable_set_true_f());
4252}
4253
4254void gk20a_fifo_disable_channel(struct channel_gk20a *ch)
4255{
4256 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4257 gk20a_readl(ch->g,
4258 ccsr_channel_r(ch->chid)) |
4259 ccsr_channel_enable_clr_true_f());
4260}
4261
4262void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
4263{
4264 struct gk20a *g = ch_gk20a->g;
4265
4266 nvgpu_log_fn(g, " ");
4267
4268 if (nvgpu_atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
4269 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->chid),
4270 ccsr_channel_inst_ptr_f(0) |
4271 ccsr_channel_inst_bind_false_f());
4272 }
4273}
4274
4275static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
4276{
4277 u32 addr_lo;
4278 u32 addr_hi;
4279 struct gk20a *g = c->g;
4280
4281 nvgpu_log_fn(g, " ");
4282
4283 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
4284 addr_hi = u64_hi32(c->userd_iova);
4285
4286 nvgpu_log_info(g, "channel %d : set ramfc userd 0x%16llx",
4287 c->chid, (u64)c->userd_iova);
4288
4289 nvgpu_mem_wr32(g, &c->inst_block,
4290 ram_in_ramfc_w() + ram_fc_userd_w(),
4291 nvgpu_aperture_mask(g, &g->fifo.userd,
4292 pbdma_userd_target_sys_mem_ncoh_f(),
4293 pbdma_userd_target_sys_mem_coh_f(),
4294 pbdma_userd_target_vid_mem_f()) |
4295 pbdma_userd_addr_f(addr_lo));
4296
4297 nvgpu_mem_wr32(g, &c->inst_block,
4298 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
4299 pbdma_userd_hi_addr_f(addr_hi));
4300
4301 return 0;
4302}
4303
4304int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
4305 u64 gpfifo_base, u32 gpfifo_entries,
4306 unsigned long timeout,
4307 u32 flags)
4308{
4309 struct gk20a *g = c->g;
4310 struct nvgpu_mem *mem = &c->inst_block;
4311
4312 nvgpu_log_fn(g, " ");
4313
4314 nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
4315
4316 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
4317 pbdma_gp_base_offset_f(
4318 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
4319
4320 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
4321 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
4322 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
4323
4324 nvgpu_mem_wr32(g, mem, ram_fc_signature_w(),
4325 c->g->ops.fifo.get_pbdma_signature(c->g));
4326
4327 nvgpu_mem_wr32(g, mem, ram_fc_formats_w(),
4328 pbdma_formats_gp_fermi0_f() |
4329 pbdma_formats_pb_fermi1_f() |
4330 pbdma_formats_mp_fermi0_f());
4331
4332 nvgpu_mem_wr32(g, mem, ram_fc_pb_header_w(),
4333 pbdma_pb_header_priv_user_f() |
4334 pbdma_pb_header_method_zero_f() |
4335 pbdma_pb_header_subchannel_zero_f() |
4336 pbdma_pb_header_level_main_f() |
4337 pbdma_pb_header_first_true_f() |
4338 pbdma_pb_header_type_inc_f());
4339
4340 nvgpu_mem_wr32(g, mem, ram_fc_subdevice_w(),
4341 pbdma_subdevice_id_f(1) |
4342 pbdma_subdevice_status_active_f() |
4343 pbdma_subdevice_channel_dma_enable_f());
4344
4345 nvgpu_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
4346
4347 nvgpu_mem_wr32(g, mem, ram_fc_acquire_w(),
4348 g->ops.fifo.pbdma_acquire_val(timeout));
4349
4350 nvgpu_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
4351 fifo_runlist_timeslice_timeout_128_f() |
4352 fifo_runlist_timeslice_timescale_3_f() |
4353 fifo_runlist_timeslice_enable_true_f());
4354
4355 nvgpu_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
4356 fifo_pb_timeslice_timeout_16_f() |
4357 fifo_pb_timeslice_timescale_0_f() |
4358 fifo_pb_timeslice_enable_true_f());
4359
4360 nvgpu_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->chid));
4361
4362 if (c->is_privileged_channel) {
4363 gk20a_fifo_setup_ramfc_for_privileged_channel(c);
4364 }
4365
4366 return gk20a_fifo_commit_userd(c);
4367}
4368
4369void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
4370{
4371 struct gk20a *g = c->g;
4372 struct nvgpu_mem *mem = &c->inst_block;
4373
4374 nvgpu_log_info(g, "channel %d : set ramfc privileged_channel", c->chid);
4375
4376 /* Enable HCE priv mode for phys mode transfer */
4377 nvgpu_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
4378 pbdma_hce_ctrl_hce_priv_mode_yes_f());
4379}
4380
4381int gk20a_fifo_setup_userd(struct channel_gk20a *c)
4382{
4383 struct gk20a *g = c->g;
4384 struct nvgpu_mem *mem;
4385 u32 offset;
4386
4387 nvgpu_log_fn(g, " ");
4388
4389 if (nvgpu_mem_is_valid(&c->usermode_userd)) {
4390 mem = &c->usermode_userd;
4391 offset = 0;
4392 } else {
4393 mem = &g->fifo.userd;
4394 offset = c->chid * g->fifo.userd_entry_size / sizeof(u32);
4395 }
4396
4397 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
4398 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
4399 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
4400 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
4401 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
4402 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
4403 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
4404 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
4405 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
4406 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
4407
4408 return 0;
4409}
4410
4411int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
4412{
4413 int err;
4414
4415 nvgpu_log_fn(g, " ");
4416
4417 err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
4418 if (err) {
4419 return err;
4420 }
4421
4422 nvgpu_log_info(g, "channel %d inst block physical addr: 0x%16llx",
4423 ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
4424
4425 nvgpu_log_fn(g, "done");
4426 return 0;
4427}
4428
4429void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
4430{
4431 nvgpu_free_inst_block(g, &ch->inst_block);
4432}
4433
4434u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
4435{
4436 return gk20a_bar1_readl(g,
4437 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
4438}
4439
4440u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c)
4441{
4442 u32 lo = gk20a_bar1_readl(g,
4443 c->userd_gpu_va + sizeof(u32) * ram_userd_get_w());
4444 u32 hi = gk20a_bar1_readl(g,
4445 c->userd_gpu_va + sizeof(u32) * ram_userd_get_hi_w());
4446
4447 return ((u64)hi << 32) | lo;
4448}
4449
4450void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
4451{
4452 gk20a_bar1_writel(g,
4453 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
4454 c->gpfifo.put);
4455}
4456
4457u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
4458{
4459 u32 val, exp, man;
4460 unsigned int val_len;
4461
4462 val = pbdma_acquire_retry_man_2_f() |
4463 pbdma_acquire_retry_exp_2_f();
4464
4465 if (!timeout) {
4466 return val;
4467 }
4468
4469 timeout *= 80UL;
4470 do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
4471 timeout *= 1000000UL; /* ms -> ns */
4472 do_div(timeout, 1024); /* in unit of 1024ns */
4473 val_len = fls(timeout >> 32) + 32;
4474 if (val_len == 32) {
4475 val_len = fls(timeout);
4476 }
4477 if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
4478 exp = pbdma_acquire_timeout_exp_max_v();
4479 man = pbdma_acquire_timeout_man_max_v();
4480 } else if (val_len > 16) {
4481 exp = val_len - 16;
4482 man = timeout >> exp;
4483 } else {
4484 exp = 0;
4485 man = timeout;
4486 }
4487
4488 val |= pbdma_acquire_timeout_exp_f(exp) |
4489 pbdma_acquire_timeout_man_f(man) |
4490 pbdma_acquire_timeout_en_enable_f();
4491
4492 return val;
4493}
4494
4495const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
4496{
4497 switch (interleave_level) {
4498 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
4499 return "LOW";
4500
4501 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
4502 return "MEDIUM";
4503
4504 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
4505 return "HIGH";
4506
4507 default:
4508 return "?";
4509 }
4510}
4511
4512u32 gk20a_fifo_get_sema_wait_cmd_size(void)
4513{
4514 return 8;
4515}
4516
4517u32 gk20a_fifo_get_sema_incr_cmd_size(void)
4518{
4519 return 10;
4520}
4521
4522void gk20a_fifo_add_sema_cmd(struct gk20a *g,
4523 struct nvgpu_semaphore *s, u64 sema_va,
4524 struct priv_cmd_entry *cmd,
4525 u32 off, bool acquire, bool wfi)
4526{
4527 nvgpu_log_fn(g, " ");
4528
4529 /* semaphore_a */
4530 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
4531 /* offset_upper */
4532 nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff);
4533 /* semaphore_b */
4534 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
4535 /* offset */
4536 nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff);
4537
4538 if (acquire) {
4539 /* semaphore_c */
4540 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4541 /* payload */
4542 nvgpu_mem_wr32(g, cmd->mem, off++,
4543 nvgpu_semaphore_get_value(s));
4544 /* semaphore_d */
4545 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4546 /* operation: acq_geq, switch_en */
4547 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
4548 } else {
4549 /* semaphore_c */
4550 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4551 /* payload */
4552 nvgpu_mem_wr32(g, cmd->mem, off++,
4553 nvgpu_semaphore_get_value(s));
4554 /* semaphore_d */
4555 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4556 /* operation: release, wfi */
4557 nvgpu_mem_wr32(g, cmd->mem, off++,
4558 0x2 | ((wfi ? 0x0 : 0x1) << 20));
4559 /* non_stall_int */
4560 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
4561 /* ignored */
4562 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4563 }
4564}
4565
4566#ifdef CONFIG_TEGRA_GK20A_NVHOST
4567void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
4568 struct priv_cmd_entry *cmd, u32 off,
4569 u32 id, u32 thresh, u64 gpu_va)
4570{
4571 nvgpu_log_fn(g, " ");
4572
4573 off = cmd->off + off;
4574 /* syncpoint_a */
4575 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4576 /* payload */
4577 nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
4578 /* syncpoint_b */
4579 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4580 /* syncpt_id, switch_en, wait */
4581 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
4582}
4583
4584u32 gk20a_fifo_get_syncpt_wait_cmd_size(void)
4585{
4586 return 4;
4587}
4588
4589u32 gk20a_fifo_get_syncpt_incr_per_release(void)
4590{
4591 return 2;
4592}
4593
4594void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
4595 bool wfi_cmd, struct priv_cmd_entry *cmd,
4596 u32 id, u64 gpu_va)
4597{
4598 u32 off = cmd->off;
4599
4600 nvgpu_log_fn(g, " ");
4601 if (wfi_cmd) {
4602 /* wfi */
4603 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001E);
4604 /* handle, ignored */
4605 nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000);
4606 }
4607 /* syncpoint_a */
4608 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4609 /* payload, ignored */
4610 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4611 /* syncpoint_b */
4612 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4613 /* syncpt_id, incr */
4614 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4615 /* syncpoint_b */
4616 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4617 /* syncpt_id, incr */
4618 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4619
4620}
4621
4622u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd)
4623{
4624 if (wfi_cmd)
4625 return 8;
4626 else
4627 return 6;
4628}
4629
4630void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
4631 struct nvgpu_mem *syncpt_buf)
4632{
4633
4634}
4635
4636int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
4637 u32 syncpt_id, struct nvgpu_mem *syncpt_buf)
4638{
4639 return 0;
4640}
4641#endif