aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'include/gk20a')
-rw-r--r--include/gk20a/ce2_gk20a.c576
-rw-r--r--include/gk20a/ce2_gk20a.h156
-rw-r--r--include/gk20a/clk_gk20a.h134
-rw-r--r--include/gk20a/css_gr_gk20a.c636
-rw-r--r--include/gk20a/css_gr_gk20a.h151
-rw-r--r--include/gk20a/dbg_gpu_gk20a.c388
-rw-r--r--include/gk20a/dbg_gpu_gk20a.h147
-rw-r--r--include/gk20a/fecs_trace_gk20a.c744
-rw-r--r--include/gk20a/fecs_trace_gk20a.h45
-rw-r--r--include/gk20a/fence_gk20a.c319
-rw-r--r--include/gk20a/fence_gk20a.h100
-rw-r--r--include/gk20a/fifo_gk20a.c4649
-rw-r--r--include/gk20a/fifo_gk20a.h471
-rw-r--r--include/gk20a/flcn_gk20a.c759
-rw-r--r--include/gk20a/flcn_gk20a.h29
-rw-r--r--include/gk20a/gk20a.c590
-rw-r--r--include/gk20a/gk20a.h33
-rw-r--r--include/gk20a/gr_ctx_gk20a.c486
-rw-r--r--include/gk20a/gr_ctx_gk20a.h206
-rw-r--r--include/gk20a/gr_ctx_gk20a_sim.c356
-rw-r--r--include/gk20a/gr_gk20a.c8998
-rw-r--r--include/gk20a/gr_gk20a.h851
-rw-r--r--include/gk20a/gr_pri_gk20a.h261
-rw-r--r--include/gk20a/hw_bus_gk20a.h171
-rw-r--r--include/gk20a/hw_ccsr_gk20a.h163
-rw-r--r--include/gk20a/hw_ce2_gk20a.h87
-rw-r--r--include/gk20a/hw_ctxsw_prog_gk20a.h447
-rw-r--r--include/gk20a/hw_falcon_gk20a.h559
-rw-r--r--include/gk20a/hw_fb_gk20a.h263
-rw-r--r--include/gk20a/hw_fifo_gk20a.h619
-rw-r--r--include/gk20a/hw_flush_gk20a.h187
-rw-r--r--include/gk20a/hw_gmmu_gk20a.h283
-rw-r--r--include/gk20a/hw_gr_gk20a.h3807
-rw-r--r--include/gk20a/hw_ltc_gk20a.h455
-rw-r--r--include/gk20a/hw_mc_gk20a.h291
-rw-r--r--include/gk20a/hw_pbdma_gk20a.h575
-rw-r--r--include/gk20a/hw_perf_gk20a.h211
-rw-r--r--include/gk20a/hw_pram_gk20a.h63
-rw-r--r--include/gk20a/hw_pri_ringmaster_gk20a.h159
-rw-r--r--include/gk20a/hw_pri_ringstation_fbp_gk20a.h231
-rw-r--r--include/gk20a/hw_pri_ringstation_gpc_gk20a.h79
-rw-r--r--include/gk20a/hw_pri_ringstation_sys_gk20a.h91
-rw-r--r--include/gk20a/hw_proj_gk20a.h167
-rw-r--r--include/gk20a/hw_pwr_gk20a.h823
-rw-r--r--include/gk20a/hw_ram_gk20a.h443
-rw-r--r--include/gk20a/hw_therm_gk20a.h367
-rw-r--r--include/gk20a/hw_timer_gk20a.h127
-rw-r--r--include/gk20a/hw_top_gk20a.h211
-rw-r--r--include/gk20a/hw_trim_gk20a.h315
-rw-r--r--include/gk20a/mm_gk20a.c654
-rw-r--r--include/gk20a/mm_gk20a.h155
-rw-r--r--include/gk20a/pmu_gk20a.c879
-rw-r--r--include/gk20a/pmu_gk20a.h80
-rw-r--r--include/gk20a/regops_gk20a.c472
-rw-r--r--include/gk20a/regops_gk20a.h90
55 files changed, 34609 insertions, 0 deletions
diff --git a/include/gk20a/ce2_gk20a.c b/include/gk20a/ce2_gk20a.c
new file mode 100644
index 0000000..2a40b08
--- /dev/null
+++ b/include/gk20a/ce2_gk20a.c
@@ -0,0 +1,576 @@
1/*
2 * GK20A Graphics Copy Engine (gr host)
3 *
4 * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/dma.h>
27#include <nvgpu/os_sched.h>
28#include <nvgpu/log.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/io.h>
31#include <nvgpu/utils.h>
32#include <nvgpu/channel.h>
33#include <nvgpu/power_features/cg.h>
34
35#include "gk20a.h"
36#include "gk20a/fence_gk20a.h"
37
38#include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
39#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
40#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
41#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
42#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
44#include <nvgpu/barrier.h>
45
46/*
47 * Copy engine defines line size in pixels
48 */
49#define MAX_CE_SHIFT 31 /* 4Gpixels -1 */
50#define MAX_CE_MASK ((u32) (~(~0U << MAX_CE_SHIFT)))
51#define MAX_CE_ALIGN(a) (a & MAX_CE_MASK)
52
53
54static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
55{
56 nvgpu_log(g, gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
57
58 return ce2_intr_status_nonblockpipe_pending_f();
59}
60
61static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
62{
63 nvgpu_log(g, gpu_dbg_intr, "ce2 blocking pipe interrupt\n");
64
65 return ce2_intr_status_blockpipe_pending_f();
66}
67
68static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr)
69{
70 nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt\n");
71
72 return ce2_intr_status_launcherr_pending_f();
73}
74
75void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
76{
77 u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
78 u32 clear_intr = 0;
79
80 nvgpu_log(g, gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr);
81
82 /* clear blocking interrupts: they exibit broken behavior */
83 if (ce2_intr & ce2_intr_status_blockpipe_pending_f()) {
84 clear_intr |= ce2_blockpipe_isr(g, ce2_intr);
85 }
86
87 if (ce2_intr & ce2_intr_status_launcherr_pending_f()) {
88 clear_intr |= ce2_launcherr_isr(g, ce2_intr);
89 }
90
91 gk20a_writel(g, ce2_intr_status_r(), clear_intr);
92 return;
93}
94
95u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
96{
97 u32 ops = 0;
98 u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
99
100 nvgpu_log(g, gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr);
101
102 if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) {
103 gk20a_writel(g, ce2_intr_status_r(),
104 ce2_nonblockpipe_isr(g, ce2_intr));
105 ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE |
106 GK20A_NONSTALL_OPS_POST_EVENTS);
107 }
108 return ops;
109}
110
111/* static CE app api */
112static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
113{
114 u32 i;
115
116 for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
117 struct gk20a_fence **fence = &ce_ctx->postfences[i];
118 if (*fence) {
119 gk20a_fence_put(*fence);
120 }
121 *fence = NULL;
122 }
123}
124
125/* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */
126static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
127{
128 struct nvgpu_list_node *list = &ce_ctx->list;
129
130 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
131
132 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
133
134 if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
135 gk20a_ce_put_fences(ce_ctx);
136 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
137 }
138
139 /*
140 * free the channel
141 * gk20a_channel_close() will also unbind the channel from TSG
142 */
143 gk20a_channel_close(ce_ctx->ch);
144 nvgpu_ref_put(&ce_ctx->tsg->refcount, gk20a_tsg_release);
145
146 /* housekeeping on app */
147 if (list->prev && list->next) {
148 nvgpu_list_del(list);
149 }
150
151 nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
152 nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex);
153
154 nvgpu_kfree(ce_ctx->g, ce_ctx);
155}
156
157static inline unsigned int gk20a_ce_get_method_size(int request_operation,
158 u64 size)
159{
160 /* failure size */
161 unsigned int methodsize = UINT_MAX;
162 unsigned int iterations = 0;
163 u32 shift;
164 u64 chunk = size;
165 u32 height, width;
166
167 while (chunk) {
168 iterations++;
169
170 shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) :
171 MAX_CE_SHIFT;
172 width = chunk >> shift;
173 height = 1 << shift;
174 width = MAX_CE_ALIGN(width);
175
176 chunk -= (u64) height * width;
177 }
178
179 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) {
180 methodsize = (2 + (16 * iterations)) * sizeof(u32);
181 } else if (request_operation & NVGPU_CE_MEMSET) {
182 methodsize = (2 + (15 * iterations)) * sizeof(u32);
183 }
184
185 return methodsize;
186}
187
188int gk20a_ce_prepare_submit(u64 src_buf,
189 u64 dst_buf,
190 u64 size,
191 u32 *cmd_buf_cpu_va,
192 u32 max_cmd_buf_size,
193 unsigned int payload,
194 int launch_flags,
195 int request_operation,
196 u32 dma_copy_class)
197{
198 u32 launch = 0;
199 u32 methodSize = 0;
200 u64 offset = 0;
201 u64 chunk_size = 0;
202 u64 chunk = size;
203
204 /* failure case handling */
205 if ((gk20a_ce_get_method_size(request_operation, size) >
206 max_cmd_buf_size) || (!size) ||
207 (request_operation > NVGPU_CE_MEMSET)) {
208 return 0;
209 }
210
211 /* set the channel object */
212 cmd_buf_cpu_va[methodSize++] = 0x20018000;
213 cmd_buf_cpu_va[methodSize++] = dma_copy_class;
214
215 /*
216 * The purpose clear the memory in 2D rectangles. We get the ffs to
217 * determine the number of lines to copy. The only constraint is that
218 * maximum number of pixels per line is 4Gpix - 1, which is awkward for
219 * calculation, so we settle to 2Gpix per line to make calculatione
220 * more agreable
221 */
222
223 /* The copy engine in 2D mode can have (2^32 - 1) x (2^32 - 1) pixels in
224 * a single submit, we are going to try to clear a range of up to 2Gpix
225 * multiple lines. Because we want to copy byte aligned we will be
226 * setting 1 byte pixels */
227
228 /*
229 * per iteration
230 * <------------------------- 40 bits ------------------------------>
231 * 1 <------ ffs ------->
232 * <-----------up to 30 bits----------->
233 */
234 while (chunk) {
235 u32 width, height, shift;
236
237 /*
238 * We will be aligning to bytes, making the maximum number of
239 * pix per line 2Gb
240 */
241
242 shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) :
243 MAX_CE_SHIFT;
244 height = chunk >> shift;
245 width = 1 << shift;
246 height = MAX_CE_ALIGN(height);
247
248 chunk_size = (u64) height * width;
249
250 /* reset launch flag */
251 launch = 0;
252
253 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) {
254 /* setup the source */
255 cmd_buf_cpu_va[methodSize++] = 0x20028100;
256 cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf +
257 offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
258 cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf +
259 offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
260
261 cmd_buf_cpu_va[methodSize++] = 0x20018098;
262 if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) {
263 cmd_buf_cpu_va[methodSize++] = 0x00000000;
264 } else if (launch_flags &
265 NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) {
266 cmd_buf_cpu_va[methodSize++] = 0x00000002;
267 } else {
268 cmd_buf_cpu_va[methodSize++] = 0x00000001;
269 }
270
271 launch |= 0x00001000;
272 } else if (request_operation & NVGPU_CE_MEMSET) {
273 /* Remap from component A on 1 byte wide pixels */
274 cmd_buf_cpu_va[methodSize++] = 0x200181c2;
275 cmd_buf_cpu_va[methodSize++] = 0x00000004;
276
277 cmd_buf_cpu_va[methodSize++] = 0x200181c0;
278 cmd_buf_cpu_va[methodSize++] = payload;
279
280 launch |= 0x00000400;
281 } else {
282 /* Illegal size */
283 return 0;
284 }
285
286 /* setup the destination/output */
287 cmd_buf_cpu_va[methodSize++] = 0x20068102;
288 cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf +
289 offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
290 cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf +
291 offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
292 /* Pitch in/out */
293 cmd_buf_cpu_va[methodSize++] = width;
294 cmd_buf_cpu_va[methodSize++] = width;
295 /* width and line count */
296 cmd_buf_cpu_va[methodSize++] = width;
297 cmd_buf_cpu_va[methodSize++] = height;
298
299 cmd_buf_cpu_va[methodSize++] = 0x20018099;
300 if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) {
301 cmd_buf_cpu_va[methodSize++] = 0x00000000;
302 } else if (launch_flags &
303 NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) {
304 cmd_buf_cpu_va[methodSize++] = 0x00000002;
305 } else {
306 cmd_buf_cpu_va[methodSize++] = 0x00000001;
307 }
308
309 launch |= 0x00002005;
310
311 if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) {
312 launch |= 0x00000000;
313 } else {
314 launch |= 0x00000080;
315 }
316
317 if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) {
318 launch |= 0x00000000;
319 } else {
320 launch |= 0x00000100;
321 }
322
323 cmd_buf_cpu_va[methodSize++] = 0x200180c0;
324 cmd_buf_cpu_va[methodSize++] = launch;
325 offset += chunk_size;
326 chunk -= chunk_size;
327 }
328
329 return methodSize;
330}
331
332/* global CE app related apis */
333int gk20a_init_ce_support(struct gk20a *g)
334{
335 struct gk20a_ce_app *ce_app = &g->ce_app;
336 int err;
337 u32 ce_reset_mask;
338
339 ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g);
340
341 g->ops.mc.reset(g, ce_reset_mask);
342
343 nvgpu_cg_slcg_ce2_load_enable(g);
344
345 nvgpu_cg_blcg_ce_load_enable(g);
346
347 if (ce_app->initialised) {
348 /* assume this happen during poweron/poweroff GPU sequence */
349 ce_app->app_state = NVGPU_CE_ACTIVE;
350 return 0;
351 }
352
353 nvgpu_log(g, gpu_dbg_fn, "ce: init");
354
355 err = nvgpu_mutex_init(&ce_app->app_mutex);
356 if (err) {
357 return err;
358 }
359
360 nvgpu_mutex_acquire(&ce_app->app_mutex);
361
362 nvgpu_init_list_node(&ce_app->allocated_contexts);
363 ce_app->ctx_count = 0;
364 ce_app->next_ctx_id = 0;
365 ce_app->initialised = true;
366 ce_app->app_state = NVGPU_CE_ACTIVE;
367
368 nvgpu_mutex_release(&ce_app->app_mutex);
369
370 if (g->ops.ce2.init_prod_values != NULL) {
371 g->ops.ce2.init_prod_values(g);
372 }
373
374 nvgpu_log(g, gpu_dbg_cde_ctx, "ce: init finished");
375
376 return 0;
377}
378
379void gk20a_ce_destroy(struct gk20a *g)
380{
381 struct gk20a_ce_app *ce_app = &g->ce_app;
382 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
383
384 if (!ce_app->initialised) {
385 return;
386 }
387
388 ce_app->app_state = NVGPU_CE_SUSPEND;
389 ce_app->initialised = false;
390
391 nvgpu_mutex_acquire(&ce_app->app_mutex);
392
393 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
394 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
395 gk20a_ce_delete_gpu_context(ce_ctx);
396 }
397
398 nvgpu_init_list_node(&ce_app->allocated_contexts);
399 ce_app->ctx_count = 0;
400 ce_app->next_ctx_id = 0;
401
402 nvgpu_mutex_release(&ce_app->app_mutex);
403
404 nvgpu_mutex_destroy(&ce_app->app_mutex);
405}
406
407void gk20a_ce_suspend(struct gk20a *g)
408{
409 struct gk20a_ce_app *ce_app = &g->ce_app;
410
411 if (!ce_app->initialised) {
412 return;
413 }
414
415 ce_app->app_state = NVGPU_CE_SUSPEND;
416
417 return;
418}
419
420/* CE app utility functions */
421u32 gk20a_ce_create_context(struct gk20a *g,
422 int runlist_id,
423 int timeslice,
424 int runlist_level)
425{
426 struct gk20a_gpu_ctx *ce_ctx;
427 struct gk20a_ce_app *ce_app = &g->ce_app;
428 struct nvgpu_setup_bind_args setup_bind_args;
429 u32 ctx_id = ~0;
430 int err = 0;
431
432 if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
433 return ctx_id;
434 }
435
436 ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx));
437 if (!ce_ctx) {
438 return ctx_id;
439 }
440
441 err = nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex);
442 if (err) {
443 nvgpu_kfree(g, ce_ctx);
444 return ctx_id;
445 }
446
447 ce_ctx->g = g;
448
449 ce_ctx->cmd_buf_read_queue_offset = 0;
450
451 ce_ctx->vm = g->mm.ce.vm;
452
453 /* allocate a tsg if needed */
454 ce_ctx->tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
455 if (!ce_ctx->tsg) {
456 nvgpu_err(g, "ce: gk20a tsg not available");
457 err = -ENOMEM;
458 goto end;
459 }
460
461 /* always kernel client needs privileged channel */
462 ce_ctx->ch = gk20a_open_new_channel(g, runlist_id, true,
463 nvgpu_current_pid(g), nvgpu_current_tid(g));
464 if (!ce_ctx->ch) {
465 nvgpu_err(g, "ce: gk20a channel not available");
466 err = -ENOMEM;
467 goto end;
468 }
469 ce_ctx->ch->timeout.enabled = false;
470
471 /* bind the channel to the vm */
472 err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
473 if (err) {
474 nvgpu_err(g, "ce: could not bind vm");
475 goto end;
476 }
477
478 err = gk20a_tsg_bind_channel(ce_ctx->tsg, ce_ctx->ch);
479 if (err) {
480 nvgpu_err(g, "ce: unable to bind to tsg");
481 goto end;
482 }
483
484 setup_bind_args.num_gpfifo_entries = 1024;
485 setup_bind_args.num_inflight_jobs = 0;
486 setup_bind_args.flags = 0;
487 /* allocate gpfifo (1024 should be more than enough) */
488 err = nvgpu_channel_setup_bind(ce_ctx->ch, &setup_bind_args);
489 if (err) {
490 nvgpu_err(g, "ce: unable to setup and bind channel");
491 goto end;
492 }
493
494 /* allocate command buffer from sysmem */
495 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
496 NVGPU_CE_MAX_INFLIGHT_JOBS *
497 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
498 &ce_ctx->cmd_buf_mem);
499 if (err) {
500 nvgpu_err(g,
501 "ce: could not allocate command buffer for CE context");
502 goto end;
503 }
504
505 memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size);
506
507 /* -1 means default channel timeslice value */
508 if (timeslice != -1) {
509 err = gk20a_fifo_tsg_set_timeslice(ce_ctx->tsg, timeslice);
510 if (err) {
511 nvgpu_err(g,
512 "ce: could not set the channel timeslice value for CE context");
513 goto end;
514 }
515 }
516
517 /* -1 means default channel runlist level */
518 if (runlist_level != -1) {
519 err = gk20a_tsg_set_runlist_interleave(ce_ctx->tsg,
520 runlist_level);
521 if (err) {
522 nvgpu_err(g,
523 "ce: could not set the runlist interleave for CE context");
524 goto end;
525 }
526 }
527
528 nvgpu_mutex_acquire(&ce_app->app_mutex);
529 ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
530 nvgpu_list_add(&ce_ctx->list, &ce_app->allocated_contexts);
531 ++ce_app->next_ctx_id;
532 ++ce_app->ctx_count;
533 nvgpu_mutex_release(&ce_app->app_mutex);
534
535 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
536
537end:
538 if (ctx_id == (u32)~0) {
539 nvgpu_mutex_acquire(&ce_app->app_mutex);
540 gk20a_ce_delete_gpu_context(ce_ctx);
541 nvgpu_mutex_release(&ce_app->app_mutex);
542 }
543 return ctx_id;
544
545}
546
547void gk20a_ce_delete_context(struct gk20a *g,
548 u32 ce_ctx_id)
549{
550 gk20a_ce_delete_context_priv(g, ce_ctx_id);
551}
552
553void gk20a_ce_delete_context_priv(struct gk20a *g,
554 u32 ce_ctx_id)
555{
556 struct gk20a_ce_app *ce_app = &g->ce_app;
557 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
558
559 if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
560 return;
561 }
562
563 nvgpu_mutex_acquire(&ce_app->app_mutex);
564
565 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
566 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
567 if (ce_ctx->ctx_id == ce_ctx_id) {
568 gk20a_ce_delete_gpu_context(ce_ctx);
569 --ce_app->ctx_count;
570 break;
571 }
572 }
573
574 nvgpu_mutex_release(&ce_app->app_mutex);
575 return;
576}
diff --git a/include/gk20a/ce2_gk20a.h b/include/gk20a/ce2_gk20a.h
new file mode 100644
index 0000000..df3a0e8
--- /dev/null
+++ b/include/gk20a/ce2_gk20a.h
@@ -0,0 +1,156 @@
1/*
2 * drivers/video/tegra/host/gk20a/fifo_gk20a.h
3 *
4 * GK20A graphics copy engine (gr host)
5 *
6 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26#ifndef NVGPU_GK20A_CE2_GK20A_H
27#define NVGPU_GK20A_CE2_GK20A_H
28
29struct channel_gk20a;
30struct tsg_gk20a;
31
32void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
33u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
34
35/* CE command utility macros */
36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff
38
39#define NVGPU_CE_MAX_INFLIGHT_JOBS 32
40#define NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF 256
41
42/* dma launch_flags */
43enum {
44 /* location */
45 NVGPU_CE_SRC_LOCATION_COHERENT_SYSMEM = (1 << 0),
46 NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM = (1 << 1),
47 NVGPU_CE_SRC_LOCATION_LOCAL_FB = (1 << 2),
48 NVGPU_CE_DST_LOCATION_COHERENT_SYSMEM = (1 << 3),
49 NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM = (1 << 4),
50 NVGPU_CE_DST_LOCATION_LOCAL_FB = (1 << 5),
51
52 /* memory layout */
53 NVGPU_CE_SRC_MEMORY_LAYOUT_PITCH = (1 << 6),
54 NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 7),
55 NVGPU_CE_DST_MEMORY_LAYOUT_PITCH = (1 << 8),
56 NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 9),
57
58 /* transfer type */
59 NVGPU_CE_DATA_TRANSFER_TYPE_PIPELINED = (1 << 10),
60 NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED = (1 << 11),
61};
62
63/* CE operation mode */
64enum {
65 NVGPU_CE_PHYS_MODE_TRANSFER = (1 << 0),
66 NVGPU_CE_MEMSET = (1 << 1),
67};
68
69/* CE app state machine flags */
70enum {
71 NVGPU_CE_ACTIVE = (1 << 0),
72 NVGPU_CE_SUSPEND = (1 << 1),
73};
74
75/* gpu context state machine flags */
76enum {
77 NVGPU_CE_GPU_CTX_ALLOCATED = (1 << 0),
78 NVGPU_CE_GPU_CTX_DELETED = (1 << 1),
79};
80
81/* global ce app db */
82struct gk20a_ce_app {
83 bool initialised;
84 struct nvgpu_mutex app_mutex;
85 int app_state;
86
87 struct nvgpu_list_node allocated_contexts;
88 u32 ctx_count;
89 u32 next_ctx_id;
90};
91
92/* ce context db */
93struct gk20a_gpu_ctx {
94 struct gk20a *g;
95 u32 ctx_id;
96 struct nvgpu_mutex gpu_ctx_mutex;
97 int gpu_ctx_state;
98
99 /* tsg related data */
100 struct tsg_gk20a *tsg;
101
102 /* channel related data */
103 struct channel_gk20a *ch;
104 struct vm_gk20a *vm;
105
106 /* cmd buf mem_desc */
107 struct nvgpu_mem cmd_buf_mem;
108 struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
109
110 struct nvgpu_list_node list;
111
112 u32 cmd_buf_read_queue_offset;
113};
114
115static inline struct gk20a_gpu_ctx *
116gk20a_gpu_ctx_from_list(struct nvgpu_list_node *node)
117{
118 return (struct gk20a_gpu_ctx *)
119 ((uintptr_t)node - offsetof(struct gk20a_gpu_ctx, list));
120};
121
122/* global CE app related apis */
123int gk20a_init_ce_support(struct gk20a *g);
124void gk20a_ce_suspend(struct gk20a *g);
125void gk20a_ce_destroy(struct gk20a *g);
126
127/* CE app utility functions */
128u32 gk20a_ce_create_context(struct gk20a *g,
129 int runlist_id,
130 int timeslice,
131 int runlist_level);
132int gk20a_ce_execute_ops(struct gk20a *g,
133 u32 ce_ctx_id,
134 u64 src_buf,
135 u64 dst_buf,
136 u64 size,
137 unsigned int payload,
138 int launch_flags,
139 int request_operation,
140 u32 submit_flags,
141 struct gk20a_fence **gk20a_fence_out);
142void gk20a_ce_delete_context_priv(struct gk20a *g,
143 u32 ce_ctx_id);
144void gk20a_ce_delete_context(struct gk20a *g,
145 u32 ce_ctx_id);
146int gk20a_ce_prepare_submit(u64 src_buf,
147 u64 dst_buf,
148 u64 size,
149 u32 *cmd_buf_cpu_va,
150 u32 max_cmd_buf_size,
151 unsigned int payload,
152 int launch_flags,
153 int request_operation,
154 u32 dma_copy_class);
155
156#endif /*NVGPU_GK20A_CE2_GK20A_H*/
diff --git a/include/gk20a/clk_gk20a.h b/include/gk20a/clk_gk20a.h
new file mode 100644
index 0000000..b8ec942
--- /dev/null
+++ b/include/gk20a/clk_gk20a.h
@@ -0,0 +1,134 @@
1/*
2 * Copyright (c) 2011 - 2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#ifndef CLK_GK20A_H
23#define CLK_GK20A_H
24
25#include <nvgpu/lock.h>
26
27#if defined(CONFIG_COMMON_CLK)
28#include <linux/clk-provider.h>
29#endif
30
31#define GPUFREQ_TABLE_END ~(u32)1
32enum {
33 /* only one PLL for gk20a */
34 GK20A_GPC_PLL = 0,
35 /* 2 PLL revisions for gm20b */
36 GM20B_GPC_PLL_B1,
37 GM20B_GPC_PLL_C1,
38};
39
40enum gpc_pll_mode {
41 GPC_PLL_MODE_F = 0, /* fixed frequency mode a.k.a legacy mode */
42 GPC_PLL_MODE_DVFS, /* DVFS mode a.k.a NA mode */
43};
44
45struct na_dvfs {
46 u32 n_int;
47 u32 sdm_din;
48 int dfs_coeff;
49 int dfs_det_max;
50 int dfs_ext_cal;
51 int uv_cal;
52 int mv;
53};
54
55struct pll {
56 u32 id;
57 u32 clk_in; /* KHz */
58 u32 M;
59 u32 N;
60 u32 PL;
61 u32 freq; /* KHz */
62 bool enabled;
63 enum gpc_pll_mode mode;
64 struct na_dvfs dvfs;
65};
66
67struct pll_parms {
68 u32 min_freq, max_freq; /* KHz */
69 u32 min_vco, max_vco; /* KHz */
70 u32 min_u, max_u; /* KHz */
71 u32 min_M, max_M;
72 u32 min_N, max_N;
73 u32 min_PL, max_PL;
74 /* NA mode parameters*/
75 int coeff_slope, coeff_offs; /* coeff = slope * V + offs */
76 int uvdet_slope, uvdet_offs; /* uV = slope * det + offs */
77 u32 vco_ctrl;
78 /*
79 * Timing parameters in us. Lock timeout is applied to locking in fixed
80 * frequency mode and to dynamic ramp in any mode; does not affect lock
81 * latency, since lock/ramp done status bit is polled. NA mode lock and
82 * and IDDQ exit delays set the time of the respective opertaions with
83 * no status polling.
84 */
85 u32 lock_timeout;
86 u32 na_lock_delay;
87 u32 iddq_exit_delay;
88 /* NA mode DFS control */
89 u32 dfs_ctrl;
90};
91
92struct namemap_cfg;
93
94struct clk_gk20a {
95 struct gk20a *g;
96#if defined(CONFIG_COMMON_CLK)
97 struct clk *tegra_clk;
98 struct clk *tegra_clk_parent;
99 struct clk_hw hw;
100#endif
101 struct pll gpc_pll;
102 struct pll gpc_pll_last;
103 struct nvgpu_mutex clk_mutex;
104 struct namemap_cfg *clk_namemap;
105 u32 namemap_num;
106 u32 *namemap_xlat_table;
107 bool sw_ready;
108 bool clk_hw_on;
109 bool debugfs_set;
110 int pll_poweron_uv;
111 unsigned long dvfs_safe_max_freq;
112};
113
114#if defined(CONFIG_COMMON_CLK)
115#define to_clk_gk20a(_hw) container_of(_hw, struct clk_gk20a, hw)
116#endif
117
118struct gpu_ops;
119
120#define KHZ 1000
121#define MHZ 1000000
122
123static inline unsigned long rate_gpc2clk_to_gpu(unsigned long rate)
124{
125 /* convert the kHz gpc2clk frequency to Hz gpcpll frequency */
126 return (rate * KHZ) / 2;
127}
128static inline unsigned long rate_gpu_to_gpc2clk(unsigned long rate)
129{
130 /* convert the Hz gpcpll frequency to kHz gpc2clk frequency */
131 return (rate * 2) / KHZ;
132}
133
134#endif /* CLK_GK20A_H */
diff --git a/include/gk20a/css_gr_gk20a.c b/include/gk20a/css_gr_gk20a.c
new file mode 100644
index 0000000..28a3d49
--- /dev/null
+++ b/include/gk20a/css_gr_gk20a.c
@@ -0,0 +1,636 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/bitops.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/lock.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/mm.h>
30#include <nvgpu/sizes.h>
31#include <nvgpu/barrier.h>
32#include <nvgpu/log.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/io.h>
35#include <nvgpu/utils.h>
36#include <nvgpu/channel.h>
37#include <nvgpu/unit.h>
38
39#include "gk20a.h"
40#include "css_gr_gk20a.h"
41
42#include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
43
44/* check client for pointed perfmon ownership */
45#define CONTAINS_PERFMON(cl, pm) \
46 ((cl)->perfmon_start <= (pm) && \
47 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
48
49/* address of fifo entry by offset */
50#define CSS_FIFO_ENTRY(fifo, offs) \
51 ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
52
53/* calculate area capacity in number of fifo entries */
54#define CSS_FIFO_ENTRY_CAPACITY(s) \
55 (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \
56 / sizeof(struct gk20a_cs_snapshot_fifo_entry))
57
58/* reserved to indicate failures with data */
59#define CSS_FIRST_PERFMON_ID 32
60/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
61#define CSS_MAX_PERFMON_IDS 256
62
63/* reports whether the hw queue overflowed */
64bool css_hw_get_overflow_status(struct gk20a *g)
65{
66 const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
67 return st == (gk20a_readl(g, perf_pmasys_control_r()) & st);
68}
69
70/* returns how many pending snapshot entries are pending */
71u32 css_hw_get_pending_snapshots(struct gk20a *g)
72{
73 return gk20a_readl(g, perf_pmasys_mem_bytes_r()) /
74 sizeof(struct gk20a_cs_snapshot_fifo_entry);
75}
76
77/* informs hw how many snapshots have been processed (frees up fifo space) */
78void css_hw_set_handled_snapshots(struct gk20a *g, u32 done)
79{
80 if (done > 0) {
81 gk20a_writel(g, perf_pmasys_mem_bump_r(),
82 done * sizeof(struct gk20a_cs_snapshot_fifo_entry));
83 }
84}
85
86/* disable streaming to memory */
87static void css_hw_reset_streaming(struct gk20a *g)
88{
89 u32 engine_status;
90
91 /* reset the perfmon */
92 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
93
94 /* RBUFEMPTY must be set -- otherwise we'll pick up */
95 /* snapshot that have been queued up from earlier */
96 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
97 WARN_ON(0 == (engine_status
98 & perf_pmasys_enginestatus_rbufempty_empty_f()));
99
100 /* turn off writes */
101 gk20a_writel(g, perf_pmasys_control_r(),
102 perf_pmasys_control_membuf_clear_status_doit_f());
103
104 /* pointing all pending snapshots as handled */
105 css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g));
106}
107
108/*
109 * WARNING: all css_gr_XXX functions are local and expected to be called
110 * from locked context (protected by cs_lock)
111 */
112
113static int css_gr_create_shared_data(struct gr_gk20a *gr)
114{
115 struct gk20a_cs_snapshot *data;
116
117 if (gr->cs_data)
118 return 0;
119
120 data = nvgpu_kzalloc(gr->g, sizeof(*data));
121 if (!data)
122 return -ENOMEM;
123
124 nvgpu_init_list_node(&data->clients);
125 gr->cs_data = data;
126
127 return 0;
128}
129
130int css_hw_enable_snapshot(struct channel_gk20a *ch,
131 struct gk20a_cs_snapshot_client *cs_client)
132{
133 struct gk20a *g = ch->g;
134 struct mm_gk20a *mm = &g->mm;
135 struct gr_gk20a *gr = &g->gr;
136 struct gk20a_cs_snapshot *data = gr->cs_data;
137 u32 snapshot_size = cs_client->snapshot_size;
138 int ret;
139
140 u32 virt_addr_lo;
141 u32 virt_addr_hi;
142 u32 inst_pa_page;
143
144 if (data->hw_snapshot)
145 return 0;
146
147 if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
148 snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;
149
150 ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size,
151 &data->hw_memdesc);
152 if (ret)
153 return ret;
154
155 /* perf output buffer may not cross a 4GB boundary - with a separate */
156 /* va smaller than that, it won't but check anyway */
157 if (!data->hw_memdesc.cpu_va ||
158 data->hw_memdesc.size < snapshot_size ||
159 data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) {
160 ret = -EFAULT;
161 goto failed_allocation;
162 }
163
164 data->hw_snapshot =
165 (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va;
166 data->hw_end = data->hw_snapshot +
167 snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
168 data->hw_get = data->hw_snapshot;
169 memset(data->hw_snapshot, 0xff, snapshot_size);
170
171 /* address and size are aligned to 32 bytes, the lowest bits read back
172 * as zeros */
173 virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va);
174 virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va);
175
176 css_hw_reset_streaming(g);
177
178 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
179 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
180 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
181 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
182
183 /* this field is aligned to 4K */
184 inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
185
186 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
187 * should be written last */
188 gk20a_writel(g, perf_pmasys_mem_block_r(),
189 perf_pmasys_mem_block_base_f(inst_pa_page) |
190 nvgpu_aperture_mask(g, &mm->hwpm.inst_block,
191 perf_pmasys_mem_block_target_sys_ncoh_f(),
192 perf_pmasys_mem_block_target_sys_coh_f(),
193 perf_pmasys_mem_block_target_lfb_f()) |
194 perf_pmasys_mem_block_valid_true_f());
195
196 nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n");
197
198 return 0;
199
200failed_allocation:
201 if (data->hw_memdesc.size) {
202 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
203 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
204 }
205 data->hw_snapshot = NULL;
206
207 return ret;
208}
209
210void css_hw_disable_snapshot(struct gr_gk20a *gr)
211{
212 struct gk20a *g = gr->g;
213 struct gk20a_cs_snapshot *data = gr->cs_data;
214
215 if (!data->hw_snapshot)
216 return;
217
218 css_hw_reset_streaming(g);
219
220 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
221 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
222 perf_pmasys_outbaseupper_ptr_f(0));
223 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
224
225 gk20a_writel(g, perf_pmasys_mem_block_r(),
226 perf_pmasys_mem_block_base_f(0) |
227 perf_pmasys_mem_block_valid_false_f() |
228 perf_pmasys_mem_block_target_f(0));
229
230 nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
231 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
232 data->hw_snapshot = NULL;
233
234 nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots disabled\n");
235}
236
237static void css_gr_free_shared_data(struct gr_gk20a *gr)
238{
239 struct gk20a *g = gr->g;
240
241 if (gr->cs_data) {
242 /* the clients list is expected to be empty */
243 g->ops.css.disable_snapshot(gr);
244
245 /* release the objects */
246 nvgpu_kfree(gr->g, gr->cs_data);
247 gr->cs_data = NULL;
248 }
249}
250
251
252struct gk20a_cs_snapshot_client*
253css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon)
254{
255 struct gk20a_cs_snapshot_client *client;
256
257 nvgpu_list_for_each_entry(client, clients,
258 gk20a_cs_snapshot_client, list) {
259 if (CONTAINS_PERFMON(client, perfmon))
260 return client;
261 }
262
263 return NULL;
264}
265
266static int css_gr_flush_snapshots(struct channel_gk20a *ch)
267{
268 struct gk20a *g = ch->g;
269 struct gr_gk20a *gr = &g->gr;
270 struct gk20a_cs_snapshot *css = gr->cs_data;
271 struct gk20a_cs_snapshot_client *cur;
272 u32 pending, completed;
273 bool hw_overflow;
274 int err;
275
276 /* variables for iterating over HW entries */
277 u32 sid;
278 struct gk20a_cs_snapshot_fifo_entry *src;
279
280 /* due to data sharing with userspace we allowed update only */
281 /* overflows and put field in the fifo header */
282 struct gk20a_cs_snapshot_fifo *dst;
283 struct gk20a_cs_snapshot_fifo_entry *dst_get;
284 struct gk20a_cs_snapshot_fifo_entry *dst_put;
285 struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
286 struct gk20a_cs_snapshot_fifo_entry *dst_head;
287 struct gk20a_cs_snapshot_fifo_entry *dst_tail;
288
289 if (!css)
290 return -EINVAL;
291
292 if (nvgpu_list_empty(&css->clients))
293 return -EBADF;
294
295 /* check data available */
296 err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
297 if (err)
298 return err;
299
300 if (!pending)
301 return 0;
302
303 if (hw_overflow) {
304 nvgpu_list_for_each_entry(cur, &css->clients,
305 gk20a_cs_snapshot_client, list) {
306 cur->snapshot->hw_overflow_events_occured++;
307 }
308
309 nvgpu_warn(g, "cyclestats: hardware overflow detected");
310 }
311
312 /* process all items in HW buffer */
313 sid = 0;
314 completed = 0;
315 cur = NULL;
316 dst = NULL;
317 dst_put = NULL;
318 src = css->hw_get;
319
320 /* proceed all completed records */
321 while (sid < pending && 0 == src->zero0) {
322 /* we may have a new perfmon_id which required to */
323 /* switch to a new client -> let's forget current */
324 if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) {
325 dst->put = (char *)dst_put - (char *)dst;
326 dst = NULL;
327 cur = NULL;
328 }
329
330 /* now we have to select a new current client */
331 /* the client selection rate depends from experiment */
332 /* activity but on Android usually happened 1-2 times */
333 if (!cur) {
334 cur = css_gr_search_client(&css->clients,
335 src->perfmon_id);
336 if (cur) {
337 /* found - setup all required data */
338 dst = cur->snapshot;
339 dst_get = CSS_FIFO_ENTRY(dst, dst->get);
340 dst_put = CSS_FIFO_ENTRY(dst, dst->put);
341 dst_head = CSS_FIFO_ENTRY(dst, dst->start);
342 dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
343
344 dst_nxt = dst_put + 1;
345 if (dst_nxt == dst_tail)
346 dst_nxt = dst_head;
347 } else {
348 /* client not found - skipping this entry */
349 nvgpu_warn(g, "cyclestats: orphaned perfmon %u",
350 src->perfmon_id);
351 goto next_hw_fifo_entry;
352 }
353 }
354
355 /* check for software overflows */
356 if (dst_nxt == dst_get) {
357 /* no data copy, no pointer updates */
358 dst->sw_overflow_events_occured++;
359 nvgpu_warn(g, "cyclestats: perfmon %u soft overflow",
360 src->perfmon_id);
361 } else {
362 *dst_put = *src;
363 completed++;
364
365 dst_put = dst_nxt++;
366
367 if (dst_nxt == dst_tail)
368 dst_nxt = dst_head;
369 }
370
371next_hw_fifo_entry:
372 sid++;
373 if (++src >= css->hw_end)
374 src = css->hw_snapshot;
375 }
376
377 /* update client put pointer if necessary */
378 if (cur && dst)
379 dst->put = (char *)dst_put - (char *)dst;
380
381 /* re-set HW buffer after processing taking wrapping into account */
382 if (css->hw_get < src) {
383 memset(css->hw_get, 0xff, (src - css->hw_get) * sizeof(*src));
384 } else {
385 memset(css->hw_snapshot, 0xff,
386 (src - css->hw_snapshot) * sizeof(*src));
387 memset(css->hw_get, 0xff,
388 (css->hw_end - css->hw_get) * sizeof(*src));
389 }
390 gr->cs_data->hw_get = src;
391
392 if (g->ops.css.set_handled_snapshots)
393 g->ops.css.set_handled_snapshots(g, sid);
394
395 if (completed != sid) {
396 /* not all entries proceed correctly. some of problems */
397 /* reported as overflows, some as orphaned perfmons, */
398 /* but it will be better notify with summary about it */
399 nvgpu_warn(g, "cyclestats: completed %u from %u entries",
400 completed, pending);
401 }
402
403 return 0;
404}
405
406u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
407 u32 count)
408{
409 unsigned long *pids = data->perfmon_ids;
410 unsigned int f;
411
412 f = bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS,
413 CSS_FIRST_PERFMON_ID, count, 0);
414 if (f > CSS_MAX_PERFMON_IDS)
415 f = 0;
416 else
417 bitmap_set(pids, f, count);
418
419 return f;
420}
421
422u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
423 u32 start,
424 u32 count)
425{
426 unsigned long *pids = data->perfmon_ids;
427 u32 end = start + count;
428 u32 cnt = 0;
429
430 if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) {
431 bitmap_clear(pids, start, count);
432 cnt = count;
433 }
434
435 return cnt;
436}
437
438
439static int css_gr_free_client_data(struct gk20a *g,
440 struct gk20a_cs_snapshot *data,
441 struct gk20a_cs_snapshot_client *client)
442{
443 int ret = 0;
444
445 if (client->list.next && client->list.prev)
446 nvgpu_list_del(&client->list);
447
448 if (client->perfmon_start && client->perfmon_count
449 && g->ops.css.release_perfmon_ids) {
450 if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
451 client->perfmon_start, client->perfmon_count))
452 ret = -EINVAL;
453 }
454
455 return ret;
456}
457
458static int css_gr_create_client_data(struct gk20a *g,
459 struct gk20a_cs_snapshot *data,
460 u32 perfmon_count,
461 struct gk20a_cs_snapshot_client *cur)
462{
463 /*
464 * Special handling in-case of rm-server
465 *
466 * client snapshot buffer will not be mapped
467 * in-case of rm-server its only mapped in
468 * guest side
469 */
470 if (cur->snapshot) {
471 memset(cur->snapshot, 0, sizeof(*cur->snapshot));
472 cur->snapshot->start = sizeof(*cur->snapshot);
473 /* we should be ensure that can fit all fifo entries here */
474 cur->snapshot->end =
475 CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size)
476 * sizeof(struct gk20a_cs_snapshot_fifo_entry)
477 + sizeof(struct gk20a_cs_snapshot_fifo);
478 cur->snapshot->get = cur->snapshot->start;
479 cur->snapshot->put = cur->snapshot->start;
480 }
481
482 cur->perfmon_count = perfmon_count;
483
484 /* In virtual case, perfmon ID allocation is handled by the server
485 * at the time of the attach (allocate_perfmon_ids is NULL in this case)
486 */
487 if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
488 cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
489 cur->perfmon_count);
490 if (!cur->perfmon_start)
491 return -ENOENT;
492 }
493
494 nvgpu_list_add_tail(&cur->list, &data->clients);
495
496 return 0;
497}
498
499
500int gr_gk20a_css_attach(struct channel_gk20a *ch,
501 u32 perfmon_count,
502 u32 *perfmon_start,
503 struct gk20a_cs_snapshot_client *cs_client)
504{
505 int ret = 0;
506 struct gk20a *g = ch->g;
507 struct gr_gk20a *gr;
508
509 /* we must have a placeholder to store pointer to client structure */
510 if (!cs_client)
511 return -EINVAL;
512
513 if (!perfmon_count ||
514 perfmon_count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID)
515 return -EINVAL;
516
517 nvgpu_speculation_barrier();
518
519 gr = &g->gr;
520
521 nvgpu_mutex_acquire(&gr->cs_lock);
522
523 ret = css_gr_create_shared_data(gr);
524 if (ret)
525 goto failed;
526
527 ret = css_gr_create_client_data(g, gr->cs_data,
528 perfmon_count,
529 cs_client);
530 if (ret)
531 goto failed;
532
533 ret = g->ops.css.enable_snapshot(ch, cs_client);
534 if (ret)
535 goto failed;
536
537 if (perfmon_start)
538 *perfmon_start = cs_client->perfmon_start;
539
540 nvgpu_mutex_release(&gr->cs_lock);
541
542 return 0;
543
544failed:
545 if (gr->cs_data) {
546 if (cs_client) {
547 css_gr_free_client_data(g, gr->cs_data, cs_client);
548 cs_client = NULL;
549 }
550
551 if (nvgpu_list_empty(&gr->cs_data->clients))
552 css_gr_free_shared_data(gr);
553 }
554 nvgpu_mutex_release(&gr->cs_lock);
555
556 if (perfmon_start)
557 *perfmon_start = 0;
558
559 return ret;
560}
561
562int gr_gk20a_css_detach(struct channel_gk20a *ch,
563 struct gk20a_cs_snapshot_client *cs_client)
564{
565 int ret = 0;
566 struct gk20a *g = ch->g;
567 struct gr_gk20a *gr;
568
569 if (!cs_client)
570 return -EINVAL;
571
572 gr = &g->gr;
573 nvgpu_mutex_acquire(&gr->cs_lock);
574 if (gr->cs_data) {
575 struct gk20a_cs_snapshot *data = gr->cs_data;
576
577 if (g->ops.css.detach_snapshot)
578 g->ops.css.detach_snapshot(ch, cs_client);
579
580 ret = css_gr_free_client_data(g, data, cs_client);
581 if (nvgpu_list_empty(&data->clients))
582 css_gr_free_shared_data(gr);
583 } else {
584 ret = -EBADF;
585 }
586 nvgpu_mutex_release(&gr->cs_lock);
587
588 return ret;
589}
590
591int gr_gk20a_css_flush(struct channel_gk20a *ch,
592 struct gk20a_cs_snapshot_client *cs_client)
593{
594 int ret = 0;
595 struct gk20a *g = ch->g;
596 struct gr_gk20a *gr;
597
598 if (!cs_client)
599 return -EINVAL;
600
601 gr = &g->gr;
602 nvgpu_mutex_acquire(&gr->cs_lock);
603 ret = css_gr_flush_snapshots(ch);
604 nvgpu_mutex_release(&gr->cs_lock);
605
606 return ret;
607}
608
609/* helper function with locking to cleanup snapshot code code in gr_gk20a.c */
610void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
611{
612 struct gr_gk20a *gr = &g->gr;
613
614 nvgpu_mutex_acquire(&gr->cs_lock);
615 css_gr_free_shared_data(gr);
616 nvgpu_mutex_release(&gr->cs_lock);
617 nvgpu_mutex_destroy(&gr->cs_lock);
618}
619
620int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
621 bool *hw_overflow)
622{
623 struct gk20a *g = ch->g;
624 struct gr_gk20a *gr = &g->gr;
625 struct gk20a_cs_snapshot *css = gr->cs_data;
626
627 if (!css->hw_snapshot)
628 return -EINVAL;
629
630 *pending = css_hw_get_pending_snapshots(g);
631 if (!*pending)
632 return 0;
633
634 *hw_overflow = css_hw_get_overflow_status(g);
635 return 0;
636}
diff --git a/include/gk20a/css_gr_gk20a.h b/include/gk20a/css_gr_gk20a.h
new file mode 100644
index 0000000..bf8890b
--- /dev/null
+++ b/include/gk20a/css_gr_gk20a.h
@@ -0,0 +1,151 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef CSS_GR_GK20A_H
26#define CSS_GR_GK20A_H
27
28#include <nvgpu/nvgpu_mem.h>
29#include <nvgpu/list.h>
30
31/* the minimal size of HW buffer - should be enough to avoid HW overflows */
32#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
33
34struct gk20a;
35struct gr_gk20a;
36struct channel_gk20a;
37
38/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
39struct gk20a_cs_snapshot_fifo {
40 /* layout description of the buffer */
41 u32 start;
42 u32 end;
43
44 /* snafu bits */
45 u32 hw_overflow_events_occured;
46 u32 sw_overflow_events_occured;
47
48 /* the kernel copies new entries to put and
49 * increment the put++. if put == get then
50 * overflowEventsOccured++
51 */
52 u32 put;
53 u32 _reserved10;
54 u32 _reserved11;
55 u32 _reserved12;
56
57 /* the driver/client reads from get until
58 * put==get, get++ */
59 u32 get;
60 u32 _reserved20;
61 u32 _reserved21;
62 u32 _reserved22;
63
64 /* unused */
65 u32 _reserved30;
66 u32 _reserved31;
67 u32 _reserved32;
68 u32 _reserved33;
69};
70
71/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
72struct gk20a_cs_snapshot_fifo_entry {
73 /* global 48 timestamp */
74 u32 timestamp31_00:32;
75 u32 timestamp39_32:8;
76
77 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
78 u32 perfmon_id:8;
79
80 /* typically samples_counter is wired to #pmtrigger count */
81 u32 samples_counter:12;
82
83 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
84 u32 ds:1;
85 u32 sz:1;
86 u32 zero0:1;
87 u32 zero1:1;
88
89 /* counter results */
90 u32 event_cnt:32;
91 u32 trigger0_cnt:32;
92 u32 trigger1_cnt:32;
93 u32 sample_cnt:32;
94
95 /* Local PmTrigger results for Maxwell+ or padding otherwise */
96 u16 local_trigger_b_count:16;
97 u16 book_mark_b:16;
98 u16 local_trigger_a_count:16;
99 u16 book_mark_a:16;
100};
101
102/* cycle stats snapshot client data (e.g. associated with channel) */
103struct gk20a_cs_snapshot_client {
104 struct nvgpu_list_node list;
105 struct gk20a_cs_snapshot_fifo *snapshot;
106 u32 snapshot_size;
107 u32 perfmon_start;
108 u32 perfmon_count;
109};
110
111static inline struct gk20a_cs_snapshot_client *
112gk20a_cs_snapshot_client_from_list(struct nvgpu_list_node *node)
113{
114 return (struct gk20a_cs_snapshot_client *)
115 ((uintptr_t)node - offsetof(struct gk20a_cs_snapshot_client, list));
116};
117
118/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
119#define CSS_MAX_PERFMON_IDS 256
120
121/* local definitions to avoid hardcodes sizes and shifts */
122#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
123
124/* cycle stats snapshot control structure for one HW entry and many clients */
125struct gk20a_cs_snapshot {
126 unsigned long perfmon_ids[PM_BITMAP_SIZE];
127 struct nvgpu_list_node clients;
128 struct nvgpu_mem hw_memdesc;
129 /* pointer to allocated cpu_va memory where GPU place data */
130 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
131 struct gk20a_cs_snapshot_fifo_entry *hw_end;
132 struct gk20a_cs_snapshot_fifo_entry *hw_get;
133};
134
135bool css_hw_get_overflow_status(struct gk20a *g);
136u32 css_hw_get_pending_snapshots(struct gk20a *g);
137void css_hw_set_handled_snapshots(struct gk20a *g, u32 done);
138int css_hw_enable_snapshot(struct channel_gk20a *ch,
139 struct gk20a_cs_snapshot_client *cs_client);
140void css_hw_disable_snapshot(struct gr_gk20a *gr);
141u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
142 u32 count);
143u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
144 u32 start,
145 u32 count);
146int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
147 bool *hw_overflow);
148struct gk20a_cs_snapshot_client*
149css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon);
150
151#endif /* CSS_GR_GK20A_H */
diff --git a/include/gk20a/dbg_gpu_gk20a.c b/include/gk20a/dbg_gpu_gk20a.c
new file mode 100644
index 0000000..1686d01
--- /dev/null
+++ b/include/gk20a/dbg_gpu_gk20a.c
@@ -0,0 +1,388 @@
1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver
3 *
4 * Copyright (c) 2013-2019, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/log.h>
27#include <nvgpu/vm.h>
28#include <nvgpu/atomic.h>
29#include <nvgpu/mm.h>
30#include <nvgpu/bug.h>
31#include <nvgpu/io.h>
32#include <nvgpu/utils.h>
33#include <nvgpu/channel.h>
34#include <nvgpu/unit.h>
35#include <nvgpu/power_features/power_features.h>
36
37#include "gk20a.h"
38#include "gr_gk20a.h"
39#include "dbg_gpu_gk20a.h"
40#include "regops_gk20a.h"
41
42#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_perf_gk20a.h>
44
45static void gk20a_perfbuf_reset_streaming(struct gk20a *g)
46{
47 u32 engine_status;
48 u32 num_unread_bytes;
49
50 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
51
52 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
53 WARN_ON(0u ==
54 (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f()));
55
56 gk20a_writel(g, perf_pmasys_control_r(),
57 perf_pmasys_control_membuf_clear_status_doit_f());
58
59 num_unread_bytes = gk20a_readl(g, perf_pmasys_mem_bytes_r());
60 if (num_unread_bytes != 0u) {
61 gk20a_writel(g, perf_pmasys_mem_bump_r(), num_unread_bytes);
62 }
63}
64
65/*
66 * API to get first channel from the list of all channels
67 * bound to the debug session
68 */
69struct channel_gk20a *
70nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
71{
72 struct dbg_session_channel_data *ch_data;
73 struct channel_gk20a *ch;
74 struct gk20a *g = dbg_s->g;
75
76 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
77 if (nvgpu_list_empty(&dbg_s->ch_list)) {
78 nvgpu_mutex_release(&dbg_s->ch_list_lock);
79 return NULL;
80 }
81
82 ch_data = nvgpu_list_first_entry(&dbg_s->ch_list,
83 dbg_session_channel_data,
84 ch_entry);
85 ch = g->fifo.channel + ch_data->chid;
86
87 nvgpu_mutex_release(&dbg_s->ch_list_lock);
88
89 return ch;
90}
91
92void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
93{
94 struct dbg_session_data *session_data;
95 struct dbg_session_gk20a *dbg_s;
96 struct gk20a *g = ch->g;
97
98 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
99
100 /* guard against the session list being modified */
101 nvgpu_mutex_acquire(&ch->dbg_s_lock);
102
103 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
104 dbg_session_data, dbg_s_entry) {
105 dbg_s = session_data->dbg_s;
106 if (dbg_s->dbg_events.events_enabled) {
107 nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d",
108 dbg_s->id);
109 nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
110 dbg_s->dbg_events.num_pending_events);
111
112 dbg_s->dbg_events.num_pending_events++;
113
114 nvgpu_dbg_session_post_event(dbg_s);
115 }
116 }
117
118 nvgpu_mutex_release(&ch->dbg_s_lock);
119}
120
121bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
122{
123 struct dbg_session_data *session_data;
124 struct dbg_session_gk20a *dbg_s;
125 bool broadcast = false;
126 struct gk20a *g = ch->g;
127
128 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
129
130 /* guard against the session list being modified */
131 nvgpu_mutex_acquire(&ch->dbg_s_lock);
132
133 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
134 dbg_session_data, dbg_s_entry) {
135 dbg_s = session_data->dbg_s;
136 if (dbg_s->broadcast_stop_trigger) {
137 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
138 "stop trigger broadcast enabled");
139 broadcast = true;
140 break;
141 }
142 }
143
144 nvgpu_mutex_release(&ch->dbg_s_lock);
145
146 return broadcast;
147}
148
149int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
150{
151 struct dbg_session_data *session_data;
152 struct dbg_session_gk20a *dbg_s;
153 struct gk20a *g = ch->g;
154
155 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
156
157 /* guard against the session list being modified */
158 nvgpu_mutex_acquire(&ch->dbg_s_lock);
159
160 nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
161 dbg_session_data, dbg_s_entry) {
162 dbg_s = session_data->dbg_s;
163 if (dbg_s->broadcast_stop_trigger) {
164 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
165 "stop trigger broadcast disabled");
166 dbg_s->broadcast_stop_trigger = false;
167 }
168 }
169
170 nvgpu_mutex_release(&ch->dbg_s_lock);
171
172 return 0;
173}
174
175u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
176 bool mode)
177{
178 u32 err = 0U;
179 struct gk20a *g = dbg_s->g;
180
181 if (dbg_s->is_pg_disabled != mode) {
182 if (mode == false) {
183 g->dbg_powergating_disabled_refcount--;
184 }
185
186 /*
187 * Allow powergate disable or enable only if
188 * the global pg disabled refcount is zero
189 */
190 if (g->dbg_powergating_disabled_refcount == 0) {
191 err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
192 mode);
193 }
194
195 if (mode) {
196 g->dbg_powergating_disabled_refcount++;
197 }
198
199 dbg_s->is_pg_disabled = mode;
200 }
201
202 return err;
203}
204
205int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate)
206{
207 int err = 0;
208 struct gk20a *g = dbg_s->g;
209
210 /* This function must be called with g->dbg_sessions_lock held */
211
212 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s",
213 g->name, disable_powergate ? "disable" : "enable");
214
215 /*
216 * Powergate mode here refers to railgate+powergate+clockgate
217 * so in case slcg/blcg/elcg are disabled and railgating is enabled,
218 * disable railgating and then set is_pg_disabled = true
219 * Similarly re-enable railgating and not other features if they are not
220 * enabled when powermode=MODE_ENABLE
221 */
222 if (disable_powergate) {
223 /* save off current powergate, clk state.
224 * set gpu module's can_powergate = 0.
225 * set gpu module's clk to max.
226 * while *a* debug session is active there will be no power or
227 * clocking state changes allowed from mainline code (but they
228 * should be saved).
229 */
230
231 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
232 "module busy");
233 err = gk20a_busy(g);
234 if (err) {
235 return err;
236 }
237
238 err = nvgpu_cg_pg_disable(g);
239
240 if (err == 0) {
241 dbg_s->is_pg_disabled = true;
242 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
243 "pg disabled");
244 }
245 } else {
246 /* restore (can) powergate, clk state */
247 /* release pending exceptions to fault/be handled as usual */
248 /*TBD: ordering of these? */
249
250 err = nvgpu_cg_pg_enable(g);
251
252 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
253 gk20a_idle(g);
254
255 if (err == 0) {
256 dbg_s->is_pg_disabled = false;
257 nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
258 "pg enabled");
259 }
260 }
261
262 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done",
263 g->name, disable_powergate ? "disable" : "enable");
264 return err;
265}
266
267bool nvgpu_check_and_set_global_reservation(
268 struct dbg_session_gk20a *dbg_s,
269 struct dbg_profiler_object_data *prof_obj)
270{
271 struct gk20a *g = dbg_s->g;
272
273 if (g->profiler_reservation_count == 0) {
274 g->global_profiler_reservation_held = true;
275 g->profiler_reservation_count = 1;
276 dbg_s->has_profiler_reservation = true;
277 prof_obj->has_reservation = true;
278 return true;
279 }
280 return false;
281}
282
283bool nvgpu_check_and_set_context_reservation(
284 struct dbg_session_gk20a *dbg_s,
285 struct dbg_profiler_object_data *prof_obj)
286{
287 struct gk20a *g = dbg_s->g;
288
289 /* Assumes that we've already checked that no global reservation
290 * is in effect.
291 */
292 g->profiler_reservation_count++;
293 dbg_s->has_profiler_reservation = true;
294 prof_obj->has_reservation = true;
295 return true;
296}
297
298void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s,
299 struct dbg_profiler_object_data *prof_obj)
300{
301 struct gk20a *g = dbg_s->g;
302
303 g->profiler_reservation_count--;
304 if (g->profiler_reservation_count < 0) {
305 nvgpu_err(g, "Negative reservation count!");
306 }
307 dbg_s->has_profiler_reservation = false;
308 prof_obj->has_reservation = false;
309 if (prof_obj->ch == NULL) {
310 g->global_profiler_reservation_held = false;
311 }
312}
313
314int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
315{
316 struct mm_gk20a *mm = &g->mm;
317 u32 virt_addr_lo;
318 u32 virt_addr_hi;
319 u32 inst_pa_page;
320 int err;
321
322 err = gk20a_busy(g);
323 if (err) {
324 nvgpu_err(g, "failed to poweron");
325 return err;
326 }
327
328 err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
329 if (err) {
330 return err;
331 }
332
333 g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
334
335 gk20a_perfbuf_reset_streaming(g);
336
337 virt_addr_lo = u64_lo32(offset);
338 virt_addr_hi = u64_hi32(offset);
339
340 /* address and size are aligned to 32 bytes, the lowest bits read back
341 * as zeros */
342 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
343 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
344 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
345 gk20a_writel(g, perf_pmasys_outsize_r(), size);
346
347 /* this field is aligned to 4K */
348 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
349
350 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
351 * should be written last */
352 gk20a_writel(g, perf_pmasys_mem_block_r(),
353 perf_pmasys_mem_block_base_f(inst_pa_page) |
354 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
355 perf_pmasys_mem_block_target_sys_ncoh_f(),
356 perf_pmasys_mem_block_target_sys_coh_f(),
357 perf_pmasys_mem_block_target_lfb_f()) |
358 perf_pmasys_mem_block_valid_true_f());
359
360 gk20a_idle(g);
361 return 0;
362}
363
364/* must be called with dbg_sessions_lock held */
365int gk20a_perfbuf_disable_locked(struct gk20a *g)
366{
367 int err = gk20a_busy(g);
368 if (err) {
369 nvgpu_err(g, "failed to poweron");
370 return err;
371 }
372
373 gk20a_perfbuf_reset_streaming(g);
374
375 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
376 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
377 perf_pmasys_outbaseupper_ptr_f(0));
378 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
379
380 gk20a_writel(g, perf_pmasys_mem_block_r(),
381 perf_pmasys_mem_block_base_f(0) |
382 perf_pmasys_mem_block_valid_false_f() |
383 perf_pmasys_mem_block_target_f(0));
384
385 gk20a_idle(g);
386
387 return 0;
388}
diff --git a/include/gk20a/dbg_gpu_gk20a.h b/include/gk20a/dbg_gpu_gk20a.h
new file mode 100644
index 0000000..fb5ae1f
--- /dev/null
+++ b/include/gk20a/dbg_gpu_gk20a.h
@@ -0,0 +1,147 @@
1/*
2 * Tegra GK20A GPU Debugger Driver
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef DBG_GPU_H
25#define DBG_GPU_H
26
27#include <nvgpu/cond.h>
28#include <nvgpu/lock.h>
29#include <nvgpu/list.h>
30
31struct gk20a;
32struct channel_gk20a;
33struct dbg_session_gk20a;
34
35/* used by the interrupt handler to post events */
36void gk20a_dbg_gpu_post_events(struct channel_gk20a *fault_ch);
37
38struct channel_gk20a *
39nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s);
40
41struct dbg_gpu_session_events {
42 struct nvgpu_cond wait_queue;
43 bool events_enabled;
44 int num_pending_events;
45};
46
47struct dbg_session_gk20a {
48 /* dbg session id used for trace/prints */
49 int id;
50
51 /* profiler session, if any */
52 bool is_profiler;
53
54 /* has a valid profiler reservation */
55 bool has_profiler_reservation;
56
57 /* power enabled or disabled */
58 bool is_pg_disabled;
59
60 /* timeouts enabled or disabled */
61 bool is_timeout_disabled;
62
63 struct gk20a *g;
64
65 /* list of bound channels, if any */
66 struct nvgpu_list_node ch_list;
67 struct nvgpu_mutex ch_list_lock;
68
69 /* event support */
70 struct dbg_gpu_session_events dbg_events;
71
72 bool broadcast_stop_trigger;
73
74 struct nvgpu_mutex ioctl_lock;
75};
76
77struct dbg_session_data {
78 struct dbg_session_gk20a *dbg_s;
79 struct nvgpu_list_node dbg_s_entry;
80};
81
82static inline struct dbg_session_data *
83dbg_session_data_from_dbg_s_entry(struct nvgpu_list_node *node)
84{
85 return (struct dbg_session_data *)
86 ((uintptr_t)node - offsetof(struct dbg_session_data, dbg_s_entry));
87};
88
89struct dbg_session_channel_data {
90 int channel_fd;
91 u32 chid;
92 struct nvgpu_list_node ch_entry;
93 struct dbg_session_data *session_data;
94 int (*unbind_single_channel)(struct dbg_session_gk20a *dbg_s,
95 struct dbg_session_channel_data *ch_data);
96};
97
98static inline struct dbg_session_channel_data *
99dbg_session_channel_data_from_ch_entry(struct nvgpu_list_node *node)
100{
101 return (struct dbg_session_channel_data *)
102 ((uintptr_t)node - offsetof(struct dbg_session_channel_data, ch_entry));
103};
104
105struct dbg_profiler_object_data {
106 int session_id;
107 u32 prof_handle;
108 struct channel_gk20a *ch;
109 bool has_reservation;
110 struct nvgpu_list_node prof_obj_entry;
111};
112
113static inline struct dbg_profiler_object_data *
114dbg_profiler_object_data_from_prof_obj_entry(struct nvgpu_list_node *node)
115{
116 return (struct dbg_profiler_object_data *)
117 ((uintptr_t)node - offsetof(struct dbg_profiler_object_data, prof_obj_entry));
118};
119
120bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch);
121int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch);
122
123int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate);
124bool nvgpu_check_and_set_global_reservation(
125 struct dbg_session_gk20a *dbg_s,
126 struct dbg_profiler_object_data *prof_obj);
127bool nvgpu_check_and_set_context_reservation(
128 struct dbg_session_gk20a *dbg_s,
129 struct dbg_profiler_object_data *prof_obj);
130void nvgpu_release_profiler_reservation(struct dbg_session_gk20a *dbg_s,
131 struct dbg_profiler_object_data *prof_obj);
132int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size);
133int gk20a_perfbuf_disable_locked(struct gk20a *g);
134
135void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s);
136u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
137 bool mode);
138
139 /* PM Context Switch Mode */
140/*This mode says that the pms are not to be context switched. */
141#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
142/* This mode says that the pms in Mode-B are to be context switched */
143#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW (0x00000001)
144/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
145#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002)
146
147#endif /* DBG_GPU_GK20A_H */
diff --git a/include/gk20a/fecs_trace_gk20a.c b/include/gk20a/fecs_trace_gk20a.c
new file mode 100644
index 0000000..5c1c5e0
--- /dev/null
+++ b/include/gk20a/fecs_trace_gk20a.c
@@ -0,0 +1,744 @@
1/*
2 * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/kmem.h>
24#include <nvgpu/dma.h>
25#include <nvgpu/enabled.h>
26#include <nvgpu/bug.h>
27#include <nvgpu/hashtable.h>
28#include <nvgpu/circ_buf.h>
29#include <nvgpu/thread.h>
30#include <nvgpu/barrier.h>
31#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/ctxsw_trace.h>
34#include <nvgpu/io.h>
35#include <nvgpu/utils.h>
36#include <nvgpu/timers.h>
37#include <nvgpu/channel.h>
38
39#include "fecs_trace_gk20a.h"
40#include "gk20a.h"
41#include "gr_gk20a.h"
42
43#include <nvgpu/log.h>
44#include <nvgpu/fecs_trace.h>
45
46#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
48
49struct gk20a_fecs_trace_hash_ent {
50 u32 context_ptr;
51 pid_t pid;
52 struct hlist_node node;
53};
54
55struct gk20a_fecs_trace {
56
57 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
58 struct nvgpu_mutex hash_lock;
59 struct nvgpu_mutex poll_lock;
60 struct nvgpu_thread poll_task;
61 bool init;
62 struct nvgpu_mutex enable_lock;
63 u32 enable_count;
64};
65
66#ifdef CONFIG_GK20A_CTXSW_TRACE
67u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void)
68{
69 return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
70}
71
72u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
73{
74 return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
75}
76
77u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
78{
79 return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
80}
81
82static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
83{
84 return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
85}
86
87int gk20a_fecs_trace_num_ts(void)
88{
89 return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
90 - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
91}
92
93struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
94 struct gk20a *g, int idx)
95{
96 struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
97
98 return (struct gk20a_fecs_trace_record *)
99 ((u8 *) mem->cpu_va
100 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
101}
102
103bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
104{
105 /*
106 * testing magic_hi should suffice. magic_lo is sometimes used
107 * as a sequence number in experimental ucode.
108 */
109 return (r->magic_hi
110 == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
111}
112
113int gk20a_fecs_trace_get_read_index(struct gk20a *g)
114{
115 return gr_gk20a_elpg_protected_call(g,
116 gk20a_readl(g, gr_fecs_mailbox1_r()));
117}
118
119int gk20a_fecs_trace_get_write_index(struct gk20a *g)
120{
121 return gr_gk20a_elpg_protected_call(g,
122 gk20a_readl(g, gr_fecs_mailbox0_r()));
123}
124
125static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
126{
127 nvgpu_log(g, gpu_dbg_ctxsw, "set read=%d", index);
128 return gr_gk20a_elpg_protected_call(g,
129 (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
130}
131
132void gk20a_fecs_trace_hash_dump(struct gk20a *g)
133{
134 u32 bkt;
135 struct gk20a_fecs_trace_hash_ent *ent;
136 struct gk20a_fecs_trace *trace = g->fecs_trace;
137
138 nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
139
140 nvgpu_mutex_acquire(&trace->hash_lock);
141 hash_for_each(trace->pid_hash_table, bkt, ent, node)
142 {
143 nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
144 ent, bkt, ent->context_ptr, ent->pid);
145
146 }
147 nvgpu_mutex_release(&trace->hash_lock);
148}
149
150static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
151{
152 struct gk20a_fecs_trace_hash_ent *he;
153 struct gk20a_fecs_trace *trace = g->fecs_trace;
154
155 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
156 "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
157
158 he = nvgpu_kzalloc(g, sizeof(*he));
159 if (unlikely(!he)) {
160 nvgpu_warn(g,
161 "can't alloc new hash entry for context_ptr=%x pid=%d",
162 context_ptr, pid);
163 return -ENOMEM;
164 }
165
166 he->context_ptr = context_ptr;
167 he->pid = pid;
168 nvgpu_mutex_acquire(&trace->hash_lock);
169 hash_add(trace->pid_hash_table, &he->node, context_ptr);
170 nvgpu_mutex_release(&trace->hash_lock);
171 return 0;
172}
173
174static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
175{
176 struct hlist_node *tmp;
177 struct gk20a_fecs_trace_hash_ent *ent;
178 struct gk20a_fecs_trace *trace = g->fecs_trace;
179
180 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
181 "freeing hash entry context_ptr=%x", context_ptr);
182
183 nvgpu_mutex_acquire(&trace->hash_lock);
184 hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
185 context_ptr) {
186 if (ent->context_ptr == context_ptr) {
187 hash_del(&ent->node);
188 nvgpu_log(g, gpu_dbg_ctxsw,
189 "freed hash entry=%p context_ptr=%x", ent,
190 ent->context_ptr);
191 nvgpu_kfree(g, ent);
192 break;
193 }
194 }
195 nvgpu_mutex_release(&trace->hash_lock);
196}
197
198static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
199{
200 u32 bkt;
201 struct hlist_node *tmp;
202 struct gk20a_fecs_trace_hash_ent *ent;
203 struct gk20a_fecs_trace *trace = g->fecs_trace;
204
205 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
206
207 nvgpu_mutex_acquire(&trace->hash_lock);
208 hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
209 hash_del(&ent->node);
210 nvgpu_kfree(g, ent);
211 }
212 nvgpu_mutex_release(&trace->hash_lock);
213
214}
215
216static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
217{
218 struct gk20a_fecs_trace_hash_ent *ent;
219 struct gk20a_fecs_trace *trace = g->fecs_trace;
220 pid_t pid = 0;
221
222 nvgpu_mutex_acquire(&trace->hash_lock);
223 hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
224 if (ent->context_ptr == context_ptr) {
225 nvgpu_log(g, gpu_dbg_ctxsw,
226 "found context_ptr=%x -> pid=%d",
227 ent->context_ptr, ent->pid);
228 pid = ent->pid;
229 break;
230 }
231 }
232 nvgpu_mutex_release(&trace->hash_lock);
233
234 return pid;
235}
236
237/*
238 * Converts HW entry format to userspace-facing format and pushes it to the
239 * queue.
240 */
241static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
242{
243 int i;
244 struct nvgpu_gpu_ctxsw_trace_entry entry = { };
245 struct gk20a_fecs_trace *trace = g->fecs_trace;
246 pid_t cur_pid;
247 pid_t new_pid;
248 int count = 0;
249
250 /* for now, only one VM */
251 const int vmid = 0;
252
253 struct gk20a_fecs_trace_record *r =
254 gk20a_fecs_trace_get_record(g, index);
255
256 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
257 "consuming record trace=%p read=%d record=%p", trace, index, r);
258
259 if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
260 nvgpu_warn(g,
261 "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
262 trace, index, r, r->magic_lo, r->magic_hi);
263 return -EINVAL;
264 }
265
266 /* Clear magic_hi to detect cases where CPU could read write index
267 * before FECS record is actually written to DRAM. This should not
268 * as we force FECS writes to SYSMEM by reading through PRAMIN.
269 */
270 r->magic_hi = 0;
271
272 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
273 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
274
275 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
276 "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
277 r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
278
279 entry.context_id = r->context_id;
280 entry.vmid = vmid;
281
282 /* break out FECS record into trace events */
283 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
284
285 entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
286 entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
287 entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
288
289 nvgpu_log(g, gpu_dbg_ctxsw,
290 "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
291 entry.tag, entry.timestamp, r->context_id,
292 r->new_context_id);
293
294 switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
295 case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
296 case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
297 entry.context_id = r->new_context_id;
298 entry.pid = new_pid;
299 break;
300
301 case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
302 case NVGPU_GPU_CTXSW_TAG_FE_ACK:
303 case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
304 case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
305 case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
306 case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
307 case NVGPU_GPU_CTXSW_TAG_SAVE_END:
308 entry.context_id = r->context_id;
309 entry.pid = cur_pid;
310 break;
311
312 default:
313 /* tags are not guaranteed to start at the beginning */
314 WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP));
315 continue;
316 }
317
318 nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
319 entry.tag, entry.context_id, entry.pid);
320
321 if (!entry.context_id)
322 continue;
323
324 gk20a_ctxsw_trace_write(g, &entry);
325 count++;
326 }
327
328 gk20a_ctxsw_trace_wake_up(g, vmid);
329 return count;
330}
331
332int gk20a_fecs_trace_poll(struct gk20a *g)
333{
334 struct gk20a_fecs_trace *trace = g->fecs_trace;
335
336 int read = 0;
337 int write = 0;
338 int cnt;
339 int err;
340
341 err = gk20a_busy(g);
342 if (unlikely(err))
343 return err;
344
345 nvgpu_mutex_acquire(&trace->poll_lock);
346 write = gk20a_fecs_trace_get_write_index(g);
347 if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
348 nvgpu_err(g,
349 "failed to acquire write index, write=%d", write);
350 err = write;
351 goto done;
352 }
353
354 read = gk20a_fecs_trace_get_read_index(g);
355
356 cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
357 if (!cnt)
358 goto done;
359
360 nvgpu_log(g, gpu_dbg_ctxsw,
361 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
362 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
363
364 /* Ensure all FECS writes have made it to SYSMEM */
365 g->ops.mm.fb_flush(g);
366
367 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
368 /* Bits 30:0 of MAILBOX1 represents actual read pointer value */
369 read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
370 }
371
372 while (read != write) {
373 cnt = gk20a_fecs_trace_ring_read(g, read);
374 if (cnt > 0) {
375 nvgpu_log(g, gpu_dbg_ctxsw,
376 "number of trace entries added: %d", cnt);
377 }
378
379 /* Get to next record. */
380 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
381 }
382
383 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
384 /*
385 * In the next step, read pointer is going to be updated.
386 * So, MSB of read pointer should be set back to 1. This will
387 * keep FECS trace enabled.
388 */
389 read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
390 }
391
392 /* ensure FECS records has been updated before incrementing read index */
393 nvgpu_wmb();
394 gk20a_fecs_trace_set_read_index(g, read);
395
396done:
397 nvgpu_mutex_release(&trace->poll_lock);
398 gk20a_idle(g);
399 return err;
400}
401
402static int gk20a_fecs_trace_periodic_polling(void *arg)
403{
404 struct gk20a *g = (struct gk20a *)arg;
405 struct gk20a_fecs_trace *trace = g->fecs_trace;
406
407 pr_info("%s: running\n", __func__);
408
409 while (!nvgpu_thread_should_stop(&trace->poll_task)) {
410
411 nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
412 GK20A_FECS_TRACE_FRAME_PERIOD_US * 2);
413
414 gk20a_fecs_trace_poll(g);
415 }
416
417 return 0;
418}
419
420size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
421{
422 return GK20A_FECS_TRACE_NUM_RECORDS
423 * ctxsw_prog_record_timestamp_record_size_in_bytes_v();
424}
425
426int gk20a_fecs_trace_init(struct gk20a *g)
427{
428 struct gk20a_fecs_trace *trace;
429 int err;
430
431 trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace));
432 if (!trace) {
433 nvgpu_warn(g, "failed to allocate fecs_trace");
434 return -ENOMEM;
435 }
436 g->fecs_trace = trace;
437
438 err = nvgpu_mutex_init(&trace->poll_lock);
439 if (err)
440 goto clean;
441 err = nvgpu_mutex_init(&trace->hash_lock);
442 if (err)
443 goto clean_poll_lock;
444
445 err = nvgpu_mutex_init(&trace->enable_lock);
446 if (err)
447 goto clean_hash_lock;
448
449 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
450 hash_init(trace->pid_hash_table);
451
452 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
453
454 trace->enable_count = 0;
455 trace->init = true;
456
457 return 0;
458
459clean_hash_lock:
460 nvgpu_mutex_destroy(&trace->hash_lock);
461
462clean_poll_lock:
463 nvgpu_mutex_destroy(&trace->poll_lock);
464clean:
465 nvgpu_kfree(g, trace);
466 g->fecs_trace = NULL;
467 return err;
468}
469
470int gk20a_fecs_trace_bind_channel(struct gk20a *g,
471 struct channel_gk20a *ch)
472{
473 /*
474 * map our circ_buf to the context space and store the GPU VA
475 * in the context header.
476 */
477
478 u32 lo;
479 u32 hi;
480 u64 addr;
481 struct tsg_gk20a *tsg;
482 struct nvgpu_gr_ctx *ch_ctx;
483 struct gk20a_fecs_trace *trace = g->fecs_trace;
484 struct nvgpu_mem *mem;
485 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
486 u32 aperture_mask;
487
488 tsg = tsg_gk20a_from_ch(ch);
489 if (tsg == NULL) {
490 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
491 return -EINVAL;
492 }
493
494 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
495 "chid=%d context_ptr=%x inst_block=%llx",
496 ch->chid, context_ptr,
497 nvgpu_inst_block_addr(g, &ch->inst_block));
498
499 tsg = tsg_gk20a_from_ch(ch);
500 if (!tsg)
501 return -EINVAL;
502
503 ch_ctx = &tsg->gr_ctx;
504 mem = &ch_ctx->mem;
505
506 if (!trace)
507 return -ENOMEM;
508
509 mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
510
511 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
512 addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
513 nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
514 aperture_mask = 0;
515 } else {
516 addr = nvgpu_inst_block_addr(g, mem);
517 nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
518 aperture_mask = nvgpu_aperture_mask(g, mem,
519 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
520 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
521 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
522 }
523 if (!addr)
524 return -ENOMEM;
525
526 lo = u64_lo32(addr);
527 hi = u64_hi32(addr);
528
529 mem = &ch_ctx->mem;
530
531 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
532 lo, GK20A_FECS_TRACE_NUM_RECORDS);
533
534 nvgpu_mem_wr(g, mem,
535 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
536 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
537 GK20A_FECS_TRACE_NUM_RECORDS));
538
539 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
540 mem = &ch->ctx_header;
541
542 nvgpu_mem_wr(g, mem,
543 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
544 lo);
545 nvgpu_mem_wr(g, mem,
546 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
547 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
548 aperture_mask);
549
550 /* pid (process identifier) in user space, corresponds to tgid (thread
551 * group id) in kernel space.
552 */
553 gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);
554
555 return 0;
556}
557
558int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
559{
560 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
561
562 if (g->fecs_trace) {
563 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
564 "ch=%p context_ptr=%x", ch, context_ptr);
565
566 if (g->ops.fecs_trace.is_enabled(g)) {
567 if (g->ops.fecs_trace.flush)
568 g->ops.fecs_trace.flush(g);
569 gk20a_fecs_trace_poll(g);
570 }
571 gk20a_fecs_trace_hash_del(g, context_ptr);
572 }
573 return 0;
574}
575
576int gk20a_fecs_trace_reset(struct gk20a *g)
577{
578 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
579
580 if (!g->ops.fecs_trace.is_enabled(g))
581 return 0;
582
583 gk20a_fecs_trace_poll(g);
584 return gk20a_fecs_trace_set_read_index(g, 0);
585}
586
587int gk20a_fecs_trace_deinit(struct gk20a *g)
588{
589 struct gk20a_fecs_trace *trace = g->fecs_trace;
590
591 if (!trace->init)
592 return 0;
593
594 /*
595 * Check if tracer was enabled before attempting to stop the
596 * tracer thread.
597 */
598 if (trace->enable_count > 0) {
599 nvgpu_thread_stop(&trace->poll_task);
600 }
601 gk20a_fecs_trace_free_hash_table(g);
602
603 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
604 nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
605 nvgpu_mutex_destroy(&g->fecs_trace->enable_lock);
606
607 nvgpu_kfree(g, g->fecs_trace);
608 g->fecs_trace = NULL;
609 return 0;
610}
611
612int gk20a_gr_max_entries(struct gk20a *g,
613 struct nvgpu_gpu_ctxsw_trace_filter *filter)
614{
615 int n;
616 int tag;
617
618 /* Compute number of entries per record, with given filter */
619 for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
620 n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
621
622 /* Return max number of entries generated for the whole ring */
623 return n * GK20A_FECS_TRACE_NUM_RECORDS;
624}
625
626int gk20a_fecs_trace_enable(struct gk20a *g)
627{
628 struct gk20a_fecs_trace *trace = g->fecs_trace;
629 int write;
630 int err = 0;
631
632 if (!trace)
633 return -EINVAL;
634
635 nvgpu_mutex_acquire(&trace->enable_lock);
636 trace->enable_count++;
637
638 if (trace->enable_count == 1U) {
639 /* drop data in hw buffer */
640 if (g->ops.fecs_trace.flush)
641 g->ops.fecs_trace.flush(g);
642
643 write = gk20a_fecs_trace_get_write_index(g);
644
645 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
646 /*
647 * For enabling FECS trace support, MAILBOX1's MSB
648 * (Bit 31:31) should be set to 1. Bits 30:0 represents
649 * actual pointer value.
650 */
651 write = write |
652 (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
653 }
654 gk20a_fecs_trace_set_read_index(g, write);
655
656 /*
657 * FECS ucode does a priv holdoff around the assertion of
658 * context reset. So, pri transactions (e.g. mailbox1 register
659 * write) might fail due to this. Hence, do write with ack
660 * i.e. write and read it back to make sure write happened for
661 * mailbox1.
662 */
663 while (gk20a_fecs_trace_get_read_index(g) != write) {
664 nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
665 gk20a_fecs_trace_set_read_index(g, write);
666 }
667
668 err = nvgpu_thread_create(&trace->poll_task, g,
669 gk20a_fecs_trace_periodic_polling, __func__);
670 if (err) {
671 nvgpu_warn(g,
672 "failed to create FECS polling task");
673 goto done;
674 }
675 }
676
677done:
678 nvgpu_mutex_release(&trace->enable_lock);
679 return err;
680}
681
682int gk20a_fecs_trace_disable(struct gk20a *g)
683{
684 struct gk20a_fecs_trace *trace = g->fecs_trace;
685 int read = 0;
686
687 if (trace == NULL) {
688 return -EINVAL;
689 }
690
691 nvgpu_mutex_acquire(&trace->enable_lock);
692 if (trace->enable_count <= 0U) {
693 nvgpu_mutex_release(&trace->enable_lock);
694 return 0;
695 }
696 trace->enable_count--;
697 if (trace->enable_count == 0U) {
698 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
699 /*
700 * For disabling FECS trace support, MAILBOX1's MSB
701 * (Bit 31:31) should be set to 0.
702 */
703 read = gk20a_fecs_trace_get_read_index(g) &
704 (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
705
706 gk20a_fecs_trace_set_read_index(g, read);
707
708 /*
709 * FECS ucode does a priv holdoff around the assertion
710 * of context reset. So, pri transactions (e.g.
711 * mailbox1 register write) might fail due to this.
712 * Hence, do write with ack i.e. write and read it back
713 * to make sure write happened for mailbox1.
714 */
715 while (gk20a_fecs_trace_get_read_index(g) != read) {
716 nvgpu_log(g, gpu_dbg_ctxsw,
717 "mailbox1 update failed");
718 gk20a_fecs_trace_set_read_index(g, read);
719 }
720 }
721
722 nvgpu_thread_stop(&trace->poll_task);
723
724 }
725 nvgpu_mutex_release(&trace->enable_lock);
726
727 return -EPERM;
728}
729
730bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
731{
732 struct gk20a_fecs_trace *trace = g->fecs_trace;
733
734 return (trace && nvgpu_thread_is_running(&trace->poll_task));
735}
736
737void gk20a_fecs_trace_reset_buffer(struct gk20a *g)
738{
739 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
740
741 gk20a_fecs_trace_set_read_index(g,
742 gk20a_fecs_trace_get_write_index(g));
743}
744#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/include/gk20a/fecs_trace_gk20a.h b/include/gk20a/fecs_trace_gk20a.h
new file mode 100644
index 0000000..d33e619
--- /dev/null
+++ b/include/gk20a/fecs_trace_gk20a.h
@@ -0,0 +1,45 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef NVGPU_GK20A_FECS_TRACE_GK20A_H
24#define NVGPU_GK20A_FECS_TRACE_GK20A_H
25
26struct gk20a;
27struct channel_gk20a;
28struct nvgpu_gpu_ctxsw_trace_filter;
29
30int gk20a_fecs_trace_poll(struct gk20a *g);
31int gk20a_fecs_trace_init(struct gk20a *g);
32int gk20a_fecs_trace_bind_channel(struct gk20a *g,
33 struct channel_gk20a *ch);
34int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch);
35int gk20a_fecs_trace_reset(struct gk20a *g);
36int gk20a_fecs_trace_deinit(struct gk20a *g);
37int gk20a_gr_max_entries(struct gk20a *g,
38 struct nvgpu_gpu_ctxsw_trace_filter *filter);
39int gk20a_fecs_trace_enable(struct gk20a *g);
40int gk20a_fecs_trace_disable(struct gk20a *g);
41bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
42size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
43void gk20a_fecs_trace_reset_buffer(struct gk20a *g);
44
45#endif /* NVGPU_GK20A_FECS_TRACE_GK20A_H */
diff --git a/include/gk20a/fence_gk20a.c b/include/gk20a/fence_gk20a.c
new file mode 100644
index 0000000..af42130
--- /dev/null
+++ b/include/gk20a/fence_gk20a.c
@@ -0,0 +1,319 @@
1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include "fence_gk20a.h"
24
25#include <nvgpu/semaphore.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/soc.h>
28#include <nvgpu/nvhost.h>
29#include <nvgpu/barrier.h>
30#include <nvgpu/os_fence.h>
31#include <nvgpu/channel.h>
32
33#include "gk20a.h"
34
35struct gk20a_fence_ops {
36 int (*wait)(struct gk20a_fence *, long timeout);
37 bool (*is_expired)(struct gk20a_fence *);
38 void *(*free)(struct nvgpu_ref *);
39};
40
41static void gk20a_fence_free(struct nvgpu_ref *ref)
42{
43 struct gk20a_fence *f =
44 container_of(ref, struct gk20a_fence, ref);
45 struct gk20a *g = f->g;
46
47 if (nvgpu_os_fence_is_initialized(&f->os_fence)) {
48 f->os_fence.ops->drop_ref(&f->os_fence);
49 }
50
51 if (f->semaphore) {
52 nvgpu_semaphore_put(f->semaphore);
53 }
54
55 if (f->allocator) {
56 if (nvgpu_alloc_initialized(f->allocator)) {
57 nvgpu_free(f->allocator, (u64)(uintptr_t)f);
58 }
59 } else {
60 nvgpu_kfree(g, f);
61 }
62}
63
64void gk20a_fence_put(struct gk20a_fence *f)
65{
66 if (f) {
67 nvgpu_ref_put(&f->ref, gk20a_fence_free);
68 }
69}
70
71struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
72{
73 if (f) {
74 nvgpu_ref_get(&f->ref);
75 }
76 return f;
77}
78
79inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
80{
81 bool valid = f->valid;
82
83 nvgpu_smp_rmb();
84 return valid;
85}
86
87int gk20a_fence_install_fd(struct gk20a_fence *f, int fd)
88{
89 if (!f || !gk20a_fence_is_valid(f) ||
90 !nvgpu_os_fence_is_initialized(&f->os_fence)) {
91 return -EINVAL;
92 }
93
94 f->os_fence.ops->install_fence(&f->os_fence, fd);
95
96 return 0;
97}
98
99int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
100 unsigned long timeout)
101{
102 if (f && gk20a_fence_is_valid(f)) {
103 if (!nvgpu_platform_is_silicon(g)) {
104 timeout = MAX_SCHEDULE_TIMEOUT;
105 }
106 return f->ops->wait(f, timeout);
107 }
108 return 0;
109}
110
111bool gk20a_fence_is_expired(struct gk20a_fence *f)
112{
113 if (f && gk20a_fence_is_valid(f) && f->ops) {
114 return f->ops->is_expired(f);
115 } else {
116 return true;
117 }
118}
119
120int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
121{
122 int err;
123 size_t size;
124 struct gk20a_fence *fence_pool = NULL;
125
126 size = sizeof(struct gk20a_fence);
127 if (count <= UINT_MAX / size) {
128 size = count * size;
129 fence_pool = nvgpu_vzalloc(c->g, size);
130 }
131
132 if (!fence_pool) {
133 return -ENOMEM;
134 }
135
136 err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator,
137 "fence_pool", (size_t)fence_pool, size,
138 sizeof(struct gk20a_fence), 0);
139 if (err) {
140 goto fail;
141 }
142
143 return 0;
144
145fail:
146 nvgpu_vfree(c->g, fence_pool);
147 return err;
148}
149
150void gk20a_free_fence_pool(struct channel_gk20a *c)
151{
152 if (nvgpu_alloc_initialized(&c->fence_allocator)) {
153 struct gk20a_fence *fence_pool;
154 fence_pool = (struct gk20a_fence *)(uintptr_t)
155 nvgpu_alloc_base(&c->fence_allocator);
156 nvgpu_alloc_destroy(&c->fence_allocator);
157 nvgpu_vfree(c->g, fence_pool);
158 }
159}
160
161struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
162{
163 struct gk20a_fence *fence = NULL;
164
165 if (channel_gk20a_is_prealloc_enabled(c)) {
166 if (nvgpu_alloc_initialized(&c->fence_allocator)) {
167 fence = (struct gk20a_fence *)(uintptr_t)
168 nvgpu_alloc(&c->fence_allocator,
169 sizeof(struct gk20a_fence));
170
171 /* clear the node and reset the allocator pointer */
172 if (fence) {
173 memset(fence, 0, sizeof(*fence));
174 fence->allocator = &c->fence_allocator;
175 }
176 }
177 } else {
178 fence = nvgpu_kzalloc(c->g, sizeof(struct gk20a_fence));
179 }
180
181 if (fence) {
182 nvgpu_ref_init(&fence->ref);
183 fence->g = c->g;
184 }
185
186 return fence;
187}
188
189void gk20a_init_fence(struct gk20a_fence *f,
190 const struct gk20a_fence_ops *ops,
191 struct nvgpu_os_fence os_fence)
192{
193 if (!f) {
194 return;
195 }
196 f->ops = ops;
197 f->syncpt_id = -1;
198 f->semaphore = NULL;
199 f->os_fence = os_fence;
200}
201
202/* Fences that are backed by GPU semaphores: */
203
204static int nvgpu_semaphore_fence_wait(struct gk20a_fence *f, long timeout)
205{
206 if (!nvgpu_semaphore_is_acquired(f->semaphore)) {
207 return 0;
208 }
209
210 return NVGPU_COND_WAIT_INTERRUPTIBLE(
211 f->semaphore_wq,
212 !nvgpu_semaphore_is_acquired(f->semaphore),
213 timeout);
214}
215
216static bool nvgpu_semaphore_fence_is_expired(struct gk20a_fence *f)
217{
218 return !nvgpu_semaphore_is_acquired(f->semaphore);
219}
220
221static const struct gk20a_fence_ops nvgpu_semaphore_fence_ops = {
222 .wait = &nvgpu_semaphore_fence_wait,
223 .is_expired = &nvgpu_semaphore_fence_is_expired,
224};
225
226/* This function takes ownership of the semaphore as well as the os_fence */
227int gk20a_fence_from_semaphore(
228 struct gk20a_fence *fence_out,
229 struct nvgpu_semaphore *semaphore,
230 struct nvgpu_cond *semaphore_wq,
231 struct nvgpu_os_fence os_fence)
232{
233 struct gk20a_fence *f = fence_out;
234
235 gk20a_init_fence(f, &nvgpu_semaphore_fence_ops, os_fence);
236 if (!f) {
237 return -EINVAL;
238 }
239
240
241 f->semaphore = semaphore;
242 f->semaphore_wq = semaphore_wq;
243
244 /* commit previous writes before setting the valid flag */
245 nvgpu_smp_wmb();
246 f->valid = true;
247
248 return 0;
249}
250
251#ifdef CONFIG_TEGRA_GK20A_NVHOST
252/* Fences that are backed by host1x syncpoints: */
253
254static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
255{
256 return nvgpu_nvhost_syncpt_wait_timeout_ext(
257 f->nvhost_dev, f->syncpt_id, f->syncpt_value,
258 (u32)timeout, NULL, NULL);
259}
260
261static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
262{
263
264 /*
265 * In cases we don't register a notifier, we can't expect the
266 * syncpt value to be updated. For this case, we force a read
267 * of the value from HW, and then check for expiration.
268 */
269 if (!nvgpu_nvhost_syncpt_is_expired_ext(f->nvhost_dev, f->syncpt_id,
270 f->syncpt_value)) {
271 u32 val;
272
273 if (!nvgpu_nvhost_syncpt_read_ext_check(f->nvhost_dev,
274 f->syncpt_id, &val)) {
275 return nvgpu_nvhost_syncpt_is_expired_ext(
276 f->nvhost_dev,
277 f->syncpt_id, f->syncpt_value);
278 }
279 }
280
281 return true;
282}
283
284static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
285 .wait = &gk20a_syncpt_fence_wait,
286 .is_expired = &gk20a_syncpt_fence_is_expired,
287};
288
289/* This function takes the ownership of the os_fence */
290int gk20a_fence_from_syncpt(
291 struct gk20a_fence *fence_out,
292 struct nvgpu_nvhost_dev *nvhost_dev,
293 u32 id, u32 value, struct nvgpu_os_fence os_fence)
294{
295 struct gk20a_fence *f = fence_out;
296
297 gk20a_init_fence(f, &gk20a_syncpt_fence_ops, os_fence);
298 if (!f)
299 return -EINVAL;
300
301 f->nvhost_dev = nvhost_dev;
302 f->syncpt_id = id;
303 f->syncpt_value = value;
304
305 /* commit previous writes before setting the valid flag */
306 nvgpu_smp_wmb();
307 f->valid = true;
308
309 return 0;
310}
311#else
312int gk20a_fence_from_syncpt(
313 struct gk20a_fence *fence_out,
314 struct nvgpu_nvhost_dev *nvhost_dev,
315 u32 id, u32 value, struct nvgpu_os_fence os_fence)
316{
317 return -EINVAL;
318}
319#endif
diff --git a/include/gk20a/fence_gk20a.h b/include/gk20a/fence_gk20a.h
new file mode 100644
index 0000000..0311279
--- /dev/null
+++ b/include/gk20a/fence_gk20a.h
@@ -0,0 +1,100 @@
1/*
2 * drivers/video/tegra/host/gk20a/fence_gk20a.h
3 *
4 * GK20A Fences
5 *
6 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26#ifndef NVGPU_GK20A_FENCE_GK20A_H
27#define NVGPU_GK20A_FENCE_GK20A_H
28
29#include <nvgpu/types.h>
30#include <nvgpu/kref.h>
31#include <nvgpu/os_fence.h>
32
33struct platform_device;
34struct nvgpu_semaphore;
35struct channel_gk20a;
36struct gk20a;
37struct nvgpu_os_fence;
38
39struct gk20a_fence_ops;
40
41struct gk20a_fence {
42 struct gk20a *g;
43
44 /* Valid for all fence types: */
45 bool valid;
46 struct nvgpu_ref ref;
47 const struct gk20a_fence_ops *ops;
48
49 struct nvgpu_os_fence os_fence;
50
51 /* Valid for fences created from semaphores: */
52 struct nvgpu_semaphore *semaphore;
53 struct nvgpu_cond *semaphore_wq;
54
55 /* Valid for fences created from syncpoints: */
56 struct nvgpu_nvhost_dev *nvhost_dev;
57 u32 syncpt_id;
58 u32 syncpt_value;
59
60 /* Valid for fences part of a pre-allocated fence pool */
61 struct nvgpu_allocator *allocator;
62};
63
64/* Fences can be created from semaphores or syncpoint (id, value) pairs */
65int gk20a_fence_from_semaphore(
66 struct gk20a_fence *fence_out,
67 struct nvgpu_semaphore *semaphore,
68 struct nvgpu_cond *semaphore_wq,
69 struct nvgpu_os_fence os_fence);
70
71int gk20a_fence_from_syncpt(
72 struct gk20a_fence *fence_out,
73 struct nvgpu_nvhost_dev *nvhost_dev,
74 u32 id, u32 value,
75 struct nvgpu_os_fence os_fence);
76
77int gk20a_alloc_fence_pool(
78 struct channel_gk20a *c,
79 unsigned int count);
80
81void gk20a_free_fence_pool(
82 struct channel_gk20a *c);
83
84struct gk20a_fence *gk20a_alloc_fence(
85 struct channel_gk20a *c);
86
87void gk20a_init_fence(struct gk20a_fence *f,
88 const struct gk20a_fence_ops *ops,
89 struct nvgpu_os_fence os_fence);
90
91/* Fence operations */
92void gk20a_fence_put(struct gk20a_fence *f);
93struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f);
94int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
95 unsigned long timeout);
96bool gk20a_fence_is_expired(struct gk20a_fence *f);
97bool gk20a_fence_is_valid(struct gk20a_fence *f);
98int gk20a_fence_install_fd(struct gk20a_fence *f, int fd);
99
100#endif /* NVGPU_GK20A_FENCE_GK20A_H */
diff --git a/include/gk20a/fifo_gk20a.c b/include/gk20a/fifo_gk20a.c
new file mode 100644
index 0000000..4477f7c
--- /dev/null
+++ b/include/gk20a/fifo_gk20a.c
@@ -0,0 +1,4649 @@
1/*
2 * GK20A Graphics FIFO (gr host)
3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26
27#include <nvgpu/mm.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/timers.h>
30#include <nvgpu/semaphore.h>
31#include <nvgpu/enabled.h>
32#include <nvgpu/kmem.h>
33#include <nvgpu/log.h>
34#include <nvgpu/soc.h>
35#include <nvgpu/atomic.h>
36#include <nvgpu/bug.h>
37#include <nvgpu/log2.h>
38#include <nvgpu/debug.h>
39#include <nvgpu/nvhost.h>
40#include <nvgpu/barrier.h>
41#include <nvgpu/ctxsw_trace.h>
42#include <nvgpu/error_notifier.h>
43#include <nvgpu/ptimer.h>
44#include <nvgpu/io.h>
45#include <nvgpu/utils.h>
46#include <nvgpu/channel.h>
47#include <nvgpu/unit.h>
48#include <nvgpu/power_features/power_features.h>
49#include <nvgpu/power_features/cg.h>
50
51#include "gk20a.h"
52#include "mm_gk20a.h"
53
54#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
55#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
56#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
57#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
58#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
59#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
60
61#include <os/linux/os_linux.h>
62
63#define FECS_METHOD_WFI_RESTORE 0x80000
64#define FECS_MAILBOX_0_ACK_RESTORE 0x4
65
66
67static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
68
69static const char *const pbdma_intr_fault_type_desc[] = {
70 "MEMREQ timeout", "MEMACK_TIMEOUT", "MEMACK_EXTRA acks",
71 "MEMDAT_TIMEOUT", "MEMDAT_EXTRA acks", "MEMFLUSH noack",
72 "MEMOP noack", "LBCONNECT noack", "NONE - was LBREQ",
73 "LBACK_TIMEOUT", "LBACK_EXTRA acks", "LBDAT_TIMEOUT",
74 "LBDAT_EXTRA acks", "GPFIFO won't fit", "GPPTR invalid",
75 "GPENTRY invalid", "GPCRC mismatch", "PBPTR get>put",
76 "PBENTRY invld", "PBCRC mismatch", "NONE - was XBARC",
77 "METHOD invld", "METHODCRC mismat", "DEVICE sw method",
78 "[ENGINE]", "SEMAPHORE invlid", "ACQUIRE timeout",
79 "PRI forbidden", "ILLEGAL SYNCPT", "[NO_CTXSW_SEG]",
80 "PBSEG badsplit", "SIGNATURE bad"
81};
82
83u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
84 u32 engine_id[], u32 engine_id_sz,
85 u32 engine_enum)
86{
87 struct fifo_gk20a *f = NULL;
88 u32 instance_cnt = 0;
89 u32 engine_id_idx;
90 u32 active_engine_id = 0;
91 struct fifo_engine_info_gk20a *info = NULL;
92
93 if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) {
94 f = &g->fifo;
95 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
96 active_engine_id = f->active_engines_list[engine_id_idx];
97 info = &f->engine_info[active_engine_id];
98
99 if (info->engine_enum == engine_enum) {
100 if (instance_cnt < engine_id_sz) {
101 engine_id[instance_cnt] = active_engine_id;
102 ++instance_cnt;
103 } else {
104 nvgpu_log_info(g, "warning engine_id table sz is small %d",
105 engine_id_sz);
106 }
107 }
108 }
109 }
110 return instance_cnt;
111}
112
113struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id)
114{
115 struct fifo_gk20a *f = NULL;
116 u32 engine_id_idx;
117 struct fifo_engine_info_gk20a *info = NULL;
118
119 if (!g) {
120 return info;
121 }
122
123 f = &g->fifo;
124
125 if (engine_id < f->max_engines) {
126 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
127 if (engine_id == f->active_engines_list[engine_id_idx]) {
128 info = &f->engine_info[engine_id];
129 break;
130 }
131 }
132 }
133
134 if (!info) {
135 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
136 }
137
138 return info;
139}
140
141bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id)
142{
143 struct fifo_gk20a *f = NULL;
144 u32 engine_id_idx;
145 bool valid = false;
146
147 if (!g) {
148 return valid;
149 }
150
151 f = &g->fifo;
152
153 if (engine_id < f->max_engines) {
154 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
155 if (engine_id == f->active_engines_list[engine_id_idx]) {
156 valid = true;
157 break;
158 }
159 }
160 }
161
162 if (!valid) {
163 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
164 }
165
166 return valid;
167}
168
169u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g)
170{
171 u32 gr_engine_cnt = 0;
172 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
173
174 /* Consider 1st available GR engine */
175 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
176 1, ENGINE_GR_GK20A);
177
178 if (!gr_engine_cnt) {
179 nvgpu_err(g, "No GR engine available on this device!");
180 }
181
182 return gr_engine_id;
183}
184
185u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
186{
187 u32 reset_mask = 0;
188 u32 engine_enum = ENGINE_INVAL_GK20A;
189 struct fifo_gk20a *f = NULL;
190 u32 engine_id_idx;
191 struct fifo_engine_info_gk20a *engine_info;
192 u32 active_engine_id = 0;
193
194 if (!g) {
195 return reset_mask;
196 }
197
198 f = &g->fifo;
199
200 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
201 active_engine_id = f->active_engines_list[engine_id_idx];
202 engine_info = &f->engine_info[active_engine_id];
203 engine_enum = engine_info->engine_enum;
204
205 if ((engine_enum == ENGINE_GRCE_GK20A) ||
206 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
207 reset_mask |= engine_info->reset_mask;
208 }
209 }
210
211 return reset_mask;
212}
213
214u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g)
215{
216 u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g);
217 u32 engine_enum = ENGINE_INVAL_GK20A;
218 struct fifo_gk20a *f = NULL;
219 u32 engine_id_idx;
220 struct fifo_engine_info_gk20a *engine_info;
221 u32 active_engine_id = 0;
222
223 if (!g) {
224 return ce_runlist_id;
225 }
226
227 f = &g->fifo;
228
229 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
230 active_engine_id = f->active_engines_list[engine_id_idx];
231 engine_info = &f->engine_info[active_engine_id];
232 engine_enum = engine_info->engine_enum;
233
234 /* selecet last available ASYNC_CE if available */
235 if (engine_enum == ENGINE_ASYNC_CE_GK20A) {
236 ce_runlist_id = engine_info->runlist_id;
237 }
238 }
239
240 return ce_runlist_id;
241}
242
243u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
244{
245 u32 gr_engine_cnt = 0;
246 u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
247 struct fifo_engine_info_gk20a *engine_info;
248 u32 gr_runlist_id = ~0;
249
250 /* Consider 1st available GR engine */
251 gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
252 1, ENGINE_GR_GK20A);
253
254 if (!gr_engine_cnt) {
255 nvgpu_err(g,
256 "No GR engine available on this device!");
257 goto end;
258 }
259
260 engine_info = gk20a_fifo_get_engine_info(g, gr_engine_id);
261
262 if (engine_info) {
263 gr_runlist_id = engine_info->runlist_id;
264 } else {
265 nvgpu_err(g,
266 "gr_engine_id is not in active list/invalid %d", gr_engine_id);
267 }
268
269end:
270 return gr_runlist_id;
271}
272
273bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
274{
275 struct fifo_gk20a *f = NULL;
276 u32 engine_id_idx;
277 u32 active_engine_id;
278 struct fifo_engine_info_gk20a *engine_info;
279
280 if (!g) {
281 return false;
282 }
283
284 f = &g->fifo;
285
286 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
287 active_engine_id = f->active_engines_list[engine_id_idx];
288 engine_info = gk20a_fifo_get_engine_info(g, active_engine_id);
289 if (engine_info && (engine_info->runlist_id == runlist_id)) {
290 return true;
291 }
292 }
293
294 return false;
295}
296
297/*
298 * Link engine IDs to MMU IDs and vice versa.
299 */
300
301static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
302{
303 u32 fault_id = FIFO_INVAL_ENGINE_ID;
304 struct fifo_engine_info_gk20a *engine_info;
305
306 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
307
308 if (engine_info) {
309 fault_id = engine_info->fault_id;
310 } else {
311 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
312 }
313 return fault_id;
314}
315
316static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
317{
318 u32 engine_id;
319 u32 active_engine_id;
320 struct fifo_engine_info_gk20a *engine_info;
321 struct fifo_gk20a *f = &g->fifo;
322
323 for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
324 active_engine_id = f->active_engines_list[engine_id];
325 engine_info = &g->fifo.engine_info[active_engine_id];
326
327 if (engine_info->fault_id == fault_id) {
328 break;
329 }
330 active_engine_id = FIFO_INVAL_ENGINE_ID;
331 }
332 return active_engine_id;
333}
334
335int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
336 u32 *inst_id)
337{
338 int ret = ENGINE_INVAL_GK20A;
339
340 nvgpu_log_info(g, "engine type %d", engine_type);
341 if (engine_type == top_device_info_type_enum_graphics_v()) {
342 ret = ENGINE_GR_GK20A;
343 } else if ((engine_type >= top_device_info_type_enum_copy0_v()) &&
344 (engine_type <= top_device_info_type_enum_copy2_v())) {
345 /* Lets consider all the CE engine have separate runlist at this point
346 * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id
347 * comparsion logic with GR runlist_id in init_engine_info() */
348 ret = ENGINE_ASYNC_CE_GK20A;
349 /* inst_id starts from CE0 to CE2 */
350 if (inst_id) {
351 *inst_id = (engine_type - top_device_info_type_enum_copy0_v());
352 }
353 }
354
355 return ret;
356}
357
358int gk20a_fifo_init_engine_info(struct fifo_gk20a *f)
359{
360 struct gk20a *g = f->g;
361 u32 i;
362 u32 max_info_entries = top_device_info__size_1_v();
363 u32 engine_enum = ENGINE_INVAL_GK20A;
364 u32 engine_id = FIFO_INVAL_ENGINE_ID;
365 u32 runlist_id = ~0;
366 u32 pbdma_id = ~0;
367 u32 intr_id = ~0;
368 u32 reset_id = ~0;
369 u32 inst_id = 0;
370 u32 pri_base = 0;
371 u32 fault_id = 0;
372 u32 gr_runlist_id = ~0;
373 bool found_pbdma_for_runlist = false;
374
375 nvgpu_log_fn(g, " ");
376
377 f->num_engines = 0;
378
379 for (i = 0; i < max_info_entries; i++) {
380 u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
381 u32 entry = top_device_info_entry_v(table_entry);
382 u32 runlist_bit;
383
384 if (entry == top_device_info_entry_enum_v()) {
385 if (top_device_info_engine_v(table_entry)) {
386 engine_id =
387 top_device_info_engine_enum_v(table_entry);
388 nvgpu_log_info(g, "info: engine_id %d",
389 top_device_info_engine_enum_v(table_entry));
390 }
391
392
393 if (top_device_info_runlist_v(table_entry)) {
394 runlist_id =
395 top_device_info_runlist_enum_v(table_entry);
396 nvgpu_log_info(g, "gr info: runlist_id %d", runlist_id);
397
398 runlist_bit = BIT(runlist_id);
399
400 found_pbdma_for_runlist = false;
401 for (pbdma_id = 0; pbdma_id < f->num_pbdma;
402 pbdma_id++) {
403 if (f->pbdma_map[pbdma_id] &
404 runlist_bit) {
405 nvgpu_log_info(g,
406 "gr info: pbdma_map[%d]=%d",
407 pbdma_id,
408 f->pbdma_map[pbdma_id]);
409 found_pbdma_for_runlist = true;
410 break;
411 }
412 }
413
414 if (!found_pbdma_for_runlist) {
415 nvgpu_err(g, "busted pbdma map");
416 return -EINVAL;
417 }
418 }
419
420 if (top_device_info_intr_v(table_entry)) {
421 intr_id =
422 top_device_info_intr_enum_v(table_entry);
423 nvgpu_log_info(g, "gr info: intr_id %d", intr_id);
424 }
425
426 if (top_device_info_reset_v(table_entry)) {
427 reset_id =
428 top_device_info_reset_enum_v(table_entry);
429 nvgpu_log_info(g, "gr info: reset_id %d",
430 reset_id);
431 }
432 } else if (entry == top_device_info_entry_engine_type_v()) {
433 u32 engine_type =
434 top_device_info_type_enum_v(table_entry);
435 engine_enum =
436 g->ops.fifo.engine_enum_from_type(g,
437 engine_type, &inst_id);
438 } else if (entry == top_device_info_entry_data_v()) {
439 /* gk20a doesn't support device_info_data packet parsing */
440 if (g->ops.fifo.device_info_data_parse) {
441 g->ops.fifo.device_info_data_parse(g,
442 table_entry, &inst_id, &pri_base,
443 &fault_id);
444 }
445 }
446
447 if (!top_device_info_chain_v(table_entry)) {
448 if (engine_enum < ENGINE_INVAL_GK20A) {
449 struct fifo_engine_info_gk20a *info =
450 &g->fifo.engine_info[engine_id];
451
452 info->intr_mask |= BIT(intr_id);
453 info->reset_mask |= BIT(reset_id);
454 info->runlist_id = runlist_id;
455 info->pbdma_id = pbdma_id;
456 info->inst_id = inst_id;
457 info->pri_base = pri_base;
458
459 if (engine_enum == ENGINE_GR_GK20A) {
460 gr_runlist_id = runlist_id;
461 }
462
463 /* GR and GR_COPY shares same runlist_id */
464 if ((engine_enum == ENGINE_ASYNC_CE_GK20A) &&
465 (gr_runlist_id == runlist_id)) {
466 engine_enum = ENGINE_GRCE_GK20A;
467 }
468
469 info->engine_enum = engine_enum;
470
471 if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A)) {
472 fault_id = 0x1b;
473 }
474 info->fault_id = fault_id;
475
476 /* engine_id starts from 0 to NV_HOST_NUM_ENGINES */
477 f->active_engines_list[f->num_engines] = engine_id;
478
479 ++f->num_engines;
480
481 engine_enum = ENGINE_INVAL_GK20A;
482 }
483 }
484 }
485
486 return 0;
487}
488
489u32 gk20a_fifo_act_eng_interrupt_mask(struct gk20a *g, u32 act_eng_id)
490{
491 struct fifo_engine_info_gk20a *engine_info = NULL;
492
493 engine_info = gk20a_fifo_get_engine_info(g, act_eng_id);
494 if (engine_info) {
495 return engine_info->intr_mask;
496 }
497
498 return 0;
499}
500
501u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
502{
503 u32 eng_intr_mask = 0;
504 unsigned int i;
505 u32 active_engine_id = 0;
506 u32 engine_enum = ENGINE_INVAL_GK20A;
507
508 for (i = 0; i < g->fifo.num_engines; i++) {
509 u32 intr_mask;
510 active_engine_id = g->fifo.active_engines_list[i];
511 intr_mask = g->fifo.engine_info[active_engine_id].intr_mask;
512 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
513 if (((engine_enum == ENGINE_GRCE_GK20A) ||
514 (engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
515 (!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall)) {
516 continue;
517 }
518
519 eng_intr_mask |= intr_mask;
520 }
521
522 return eng_intr_mask;
523}
524
525void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
526{
527 u32 i;
528 u32 runlist_id;
529 struct fifo_runlist_info_gk20a *runlist;
530 struct gk20a *g = NULL;
531
532 if (!f || !f->runlist_info) {
533 return;
534 }
535
536 g = f->g;
537
538 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
539 runlist = &f->runlist_info[runlist_id];
540 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
541 nvgpu_dma_free(g, &runlist->mem[i]);
542 }
543
544 nvgpu_kfree(g, runlist->active_channels);
545 runlist->active_channels = NULL;
546
547 nvgpu_kfree(g, runlist->active_tsgs);
548 runlist->active_tsgs = NULL;
549
550 nvgpu_mutex_destroy(&runlist->runlist_lock);
551
552 }
553 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
554 f->max_runlists));
555
556 nvgpu_kfree(g, f->runlist_info);
557 f->runlist_info = NULL;
558 f->max_runlists = 0;
559}
560
561static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
562{
563 struct gk20a *g = f->g;
564 unsigned int i = 0;
565
566 nvgpu_log_fn(g, " ");
567
568 nvgpu_channel_worker_deinit(g);
569 /*
570 * Make sure all channels are closed before deleting them.
571 */
572 for (; i < f->num_channels; i++) {
573 struct channel_gk20a *c = f->channel + i;
574 struct tsg_gk20a *tsg = f->tsg + i;
575
576 /*
577 * Could race but worst that happens is we get an error message
578 * from gk20a_free_channel() complaining about multiple closes.
579 */
580 if (c->referenceable) {
581 __gk20a_channel_kill(c);
582 }
583
584 nvgpu_mutex_destroy(&tsg->event_id_list_lock);
585
586 nvgpu_mutex_destroy(&c->ioctl_lock);
587 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
588 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
589 nvgpu_mutex_destroy(&c->sync_lock);
590#if defined(CONFIG_GK20A_CYCLE_STATS)
591 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
592 nvgpu_mutex_destroy(&c->cs_client_mutex);
593#endif
594 nvgpu_mutex_destroy(&c->dbg_s_lock);
595
596 }
597
598 nvgpu_vfree(g, f->channel);
599 nvgpu_vfree(g, f->tsg);
600 if (g->ops.mm.is_bar1_supported(g)) {
601 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
602 } else {
603 nvgpu_dma_free(g, &f->userd);
604 }
605
606 gk20a_fifo_delete_runlist(f);
607
608 nvgpu_kfree(g, f->pbdma_map);
609 f->pbdma_map = NULL;
610 nvgpu_kfree(g, f->engine_info);
611 f->engine_info = NULL;
612 nvgpu_kfree(g, f->active_engines_list);
613 f->active_engines_list = NULL;
614}
615
616/* reads info from hardware and fills in pbmda exception info record */
617static inline void get_exception_pbdma_info(
618 struct gk20a *g,
619 struct fifo_engine_info_gk20a *eng_info)
620{
621 struct fifo_pbdma_exception_info_gk20a *e =
622 &eng_info->pbdma_exception_info;
623
624 u32 pbdma_status_r = e->status_r = gk20a_readl(g,
625 fifo_pbdma_status_r(eng_info->pbdma_id));
626 e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
627 e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
628 fifo_pbdma_status_id_type_chid_v();
629 e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r);
630 e->next_id_is_chid =
631 fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
632 fifo_pbdma_status_next_id_type_chid_v();
633 e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
634 e->chsw_in_progress =
635 fifo_pbdma_status_chsw_v(pbdma_status_r) ==
636 fifo_pbdma_status_chsw_in_progress_v();
637}
638
639static void fifo_pbdma_exception_status(struct gk20a *g,
640 struct fifo_engine_info_gk20a *eng_info)
641{
642 struct fifo_pbdma_exception_info_gk20a *e;
643 get_exception_pbdma_info(g, eng_info);
644 e = &eng_info->pbdma_exception_info;
645
646 nvgpu_log_fn(g, "pbdma_id %d, "
647 "id_type %s, id %d, chan_status %d, "
648 "next_id_type %s, next_id %d, "
649 "chsw_in_progress %d",
650 eng_info->pbdma_id,
651 e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
652 e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
653 e->chsw_in_progress);
654}
655
656/* reads info from hardware and fills in pbmda exception info record */
657static inline void get_exception_engine_info(
658 struct gk20a *g,
659 struct fifo_engine_info_gk20a *eng_info)
660{
661 struct fifo_engine_exception_info_gk20a *e =
662 &eng_info->engine_exception_info;
663 u32 engine_status_r = e->status_r =
664 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
665 e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
666 e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
667 fifo_engine_status_id_type_chid_v();
668 e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
669 e->faulted =
670 fifo_engine_status_faulted_v(engine_status_r) ==
671 fifo_engine_status_faulted_true_v();
672 e->idle =
673 fifo_engine_status_engine_v(engine_status_r) ==
674 fifo_engine_status_engine_idle_v();
675 e->ctxsw_in_progress =
676 fifo_engine_status_ctxsw_v(engine_status_r) ==
677 fifo_engine_status_ctxsw_in_progress_v();
678}
679
680static void fifo_engine_exception_status(struct gk20a *g,
681 struct fifo_engine_info_gk20a *eng_info)
682{
683 struct fifo_engine_exception_info_gk20a *e;
684 get_exception_engine_info(g, eng_info);
685 e = &eng_info->engine_exception_info;
686
687 nvgpu_log_fn(g, "engine_id %d, id_type %s, id %d, ctx_status %d, "
688 "faulted %d, idle %d, ctxsw_in_progress %d, ",
689 eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
690 e->id, e->ctx_status_v,
691 e->faulted, e->idle, e->ctxsw_in_progress);
692}
693
694static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
695{
696 struct fifo_runlist_info_gk20a *runlist;
697 struct fifo_engine_info_gk20a *engine_info;
698 unsigned int runlist_id;
699 u32 i;
700 size_t runlist_size;
701 u32 active_engine_id, pbdma_id, engine_id;
702 int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ?
703 NVGPU_DMA_FORCE_CONTIGUOUS : 0;
704 int err = 0;
705
706 nvgpu_log_fn(g, " ");
707
708 f->max_runlists = g->ops.fifo.eng_runlist_base_size();
709 f->runlist_info = nvgpu_kzalloc(g,
710 sizeof(struct fifo_runlist_info_gk20a) *
711 f->max_runlists);
712 if (!f->runlist_info) {
713 goto clean_up_runlist;
714 }
715
716 memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
717 f->max_runlists));
718
719 for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
720 runlist = &f->runlist_info[runlist_id];
721
722 runlist->active_channels =
723 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
724 BITS_PER_BYTE));
725 if (!runlist->active_channels) {
726 goto clean_up_runlist;
727 }
728
729 runlist->active_tsgs =
730 nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
731 BITS_PER_BYTE));
732 if (!runlist->active_tsgs) {
733 goto clean_up_runlist;
734 }
735
736 runlist_size = f->runlist_entry_size * f->num_runlist_entries;
737 nvgpu_log(g, gpu_dbg_info,
738 "runlist_entries %d runlist size %zu",
739 f->num_runlist_entries, runlist_size);
740
741 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
742 err = nvgpu_dma_alloc_flags_sys(g, flags,
743 runlist_size,
744 &runlist->mem[i]);
745 if (err) {
746 nvgpu_err(g, "memory allocation failed");
747 goto clean_up_runlist;
748 }
749 }
750
751 err = nvgpu_mutex_init(&runlist->runlist_lock);
752 if (err != 0) {
753 nvgpu_err(g,
754 "Error in runlist_lock mutex initialization");
755 goto clean_up_runlist;
756 }
757
758 /* None of buffers is pinned if this value doesn't change.
759 Otherwise, one of them (cur_buffer) must have been pinned. */
760 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
761
762 for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
763 if (f->pbdma_map[pbdma_id] & BIT(runlist_id)) {
764 runlist->pbdma_bitmask |= BIT(pbdma_id);
765 }
766 }
767 nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
768 runlist_id, runlist->pbdma_bitmask);
769
770 for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
771 active_engine_id = f->active_engines_list[engine_id];
772 engine_info = &f->engine_info[active_engine_id];
773
774 if (engine_info && engine_info->runlist_id == runlist_id) {
775 runlist->eng_bitmask |= BIT(active_engine_id);
776 }
777 }
778 nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
779 runlist_id, runlist->eng_bitmask);
780 }
781
782 nvgpu_log_fn(g, "done");
783 return 0;
784
785clean_up_runlist:
786 gk20a_fifo_delete_runlist(f);
787 nvgpu_log_fn(g, "fail");
788 return err;
789}
790
791u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g)
792{
793 u32 intr_0_error_mask =
794 fifo_intr_0_bind_error_pending_f() |
795 fifo_intr_0_sched_error_pending_f() |
796 fifo_intr_0_chsw_error_pending_f() |
797 fifo_intr_0_fb_flush_timeout_pending_f() |
798 fifo_intr_0_dropped_mmu_fault_pending_f() |
799 fifo_intr_0_mmu_fault_pending_f() |
800 fifo_intr_0_lb_error_pending_f() |
801 fifo_intr_0_pio_error_pending_f();
802
803 return intr_0_error_mask;
804}
805
806static u32 gk20a_fifo_intr_0_en_mask(struct gk20a *g)
807{
808 u32 intr_0_en_mask;
809
810 intr_0_en_mask = g->ops.fifo.intr_0_error_mask(g);
811
812 intr_0_en_mask |= fifo_intr_0_runlist_event_pending_f() |
813 fifo_intr_0_pbdma_intr_pending_f();
814
815 return intr_0_en_mask;
816}
817
818int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
819{
820 u32 intr_stall;
821 u32 mask;
822 u32 timeout;
823 unsigned int i;
824 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
825
826 nvgpu_log_fn(g, " ");
827
828 /* enable pmc pfifo */
829 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_FIFO));
830
831 nvgpu_cg_slcg_fifo_load_enable(g);
832
833 nvgpu_cg_blcg_fifo_load_enable(g);
834
835 timeout = gk20a_readl(g, fifo_fb_timeout_r());
836 timeout = set_field(timeout, fifo_fb_timeout_period_m(),
837 fifo_fb_timeout_period_max_f());
838 nvgpu_log_info(g, "fifo_fb_timeout reg val = 0x%08x", timeout);
839 gk20a_writel(g, fifo_fb_timeout_r(), timeout);
840
841 /* write pbdma timeout value */
842 for (i = 0; i < host_num_pbdma; i++) {
843 timeout = gk20a_readl(g, pbdma_timeout_r(i));
844 timeout = set_field(timeout, pbdma_timeout_period_m(),
845 pbdma_timeout_period_max_f());
846 nvgpu_log_info(g, "pbdma_timeout reg val = 0x%08x", timeout);
847 gk20a_writel(g, pbdma_timeout_r(i), timeout);
848 }
849 if (g->ops.fifo.apply_pb_timeout) {
850 g->ops.fifo.apply_pb_timeout(g);
851 }
852
853 if (g->ops.fifo.apply_ctxsw_timeout_intr) {
854 g->ops.fifo.apply_ctxsw_timeout_intr(g);
855 } else {
856 timeout = g->fifo_eng_timeout_us;
857 timeout = scale_ptimer(timeout,
858 ptimer_scalingfactor10x(g->ptimer_src_freq));
859 timeout |= fifo_eng_timeout_detection_enabled_f();
860 gk20a_writel(g, fifo_eng_timeout_r(), timeout);
861 }
862
863 /* clear and enable pbdma interrupt */
864 for (i = 0; i < host_num_pbdma; i++) {
865 gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
866 gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
867
868 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
869 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
870 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
871 nvgpu_log_info(g, "pbdma id:%u, intr_en_0 0x%08x", i, intr_stall);
872 gk20a_writel(g, pbdma_intr_en_0_r(i), intr_stall);
873 intr_stall = gk20a_readl(g, pbdma_intr_stall_1_r(i));
874 /*
875 * For bug 2082123
876 * Mask the unused HCE_RE_ILLEGAL_OP bit from the interrupt.
877 */
878 intr_stall &= ~pbdma_intr_stall_1_hce_illegal_op_enabled_f();
879 nvgpu_log_info(g, "pbdma id:%u, intr_en_1 0x%08x", i, intr_stall);
880 gk20a_writel(g, pbdma_intr_en_1_r(i), intr_stall);
881 }
882
883 /* reset runlist interrupts */
884 gk20a_writel(g, fifo_intr_runlist_r(), ~0);
885
886 /* clear and enable pfifo interrupt */
887 gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
888 mask = gk20a_fifo_intr_0_en_mask(g);
889 nvgpu_log_info(g, "fifo_intr_en_0 0x%08x", mask);
890 gk20a_writel(g, fifo_intr_en_0_r(), mask);
891 nvgpu_log_info(g, "fifo_intr_en_1 = 0x80000000");
892 gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);
893
894 nvgpu_log_fn(g, "done");
895
896 return 0;
897}
898
899int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
900{
901 struct fifo_gk20a *f = &g->fifo;
902 unsigned int chid, i;
903 int err = 0;
904
905 nvgpu_log_fn(g, " ");
906
907 f->g = g;
908
909 err = nvgpu_mutex_init(&f->intr.isr.mutex);
910 if (err) {
911 nvgpu_err(g, "failed to init isr.mutex");
912 return err;
913 }
914
915 err = nvgpu_mutex_init(&f->engines_reset_mutex);
916 if (err) {
917 nvgpu_err(g, "failed to init engines_reset_mutex");
918 return err;
919 }
920
921 g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
922
923 f->num_channels = g->ops.fifo.get_num_fifos(g);
924 f->runlist_entry_size = g->ops.fifo.runlist_entry_size();
925 f->num_runlist_entries = fifo_eng_runlist_length_max_v();
926 f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
927 f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
928
929 f->userd_entry_size = 1 << ram_userd_base_shift_v();
930
931 f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
932 f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
933 f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
934 f->engine_info = nvgpu_kzalloc(g, f->max_engines *
935 sizeof(*f->engine_info));
936 f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
937
938 if (!(f->channel && f->tsg && f->pbdma_map && f->engine_info &&
939 f->active_engines_list)) {
940 err = -ENOMEM;
941 goto clean_up;
942 }
943 memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
944
945 /* pbdma map needs to be in place before calling engine info init */
946 for (i = 0; i < f->num_pbdma; ++i) {
947 f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
948 }
949
950 g->ops.fifo.init_engine_info(f);
951
952 err = init_runlist(g, f);
953 if (err) {
954 nvgpu_err(g, "failed to init runlist");
955 goto clean_up;
956 }
957
958 nvgpu_init_list_node(&f->free_chs);
959
960 err = nvgpu_mutex_init(&f->free_chs_mutex);
961 if (err) {
962 nvgpu_err(g, "failed to init free_chs_mutex");
963 goto clean_up;
964 }
965
966 for (chid = 0; chid < f->num_channels; chid++) {
967 gk20a_init_channel_support(g, chid);
968 gk20a_init_tsg_support(g, chid);
969 }
970
971 err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
972 if (err) {
973 nvgpu_err(g, "failed to init tsg_inuse_mutex");
974 goto clean_up;
975 }
976
977 f->remove_support = gk20a_remove_fifo_support;
978
979 f->deferred_reset_pending = false;
980
981 err = nvgpu_mutex_init(&f->deferred_reset_mutex);
982 if (err) {
983 nvgpu_err(g, "failed to init deferred_reset_mutex");
984 goto clean_up;
985 }
986
987 nvgpu_log_fn(g, "done");
988 return 0;
989
990clean_up:
991 nvgpu_err(g, "fail");
992
993 nvgpu_vfree(g, f->channel);
994 f->channel = NULL;
995 nvgpu_vfree(g, f->tsg);
996 f->tsg = NULL;
997 nvgpu_kfree(g, f->pbdma_map);
998 f->pbdma_map = NULL;
999 nvgpu_kfree(g, f->engine_info);
1000 f->engine_info = NULL;
1001 nvgpu_kfree(g, f->active_engines_list);
1002 f->active_engines_list = NULL;
1003
1004 return err;
1005}
1006
1007int gk20a_init_fifo_setup_sw(struct gk20a *g)
1008{
1009 struct fifo_gk20a *f = &g->fifo;
1010 unsigned int chid;
1011 u64 userd_base;
1012 int err = 0;
1013
1014 nvgpu_log_fn(g, " ");
1015
1016 if (f->sw_ready) {
1017 nvgpu_log_fn(g, "skip init");
1018 return 0;
1019 }
1020
1021 err = gk20a_init_fifo_setup_sw_common(g);
1022 if (err) {
1023 nvgpu_err(g, "fail: err: %d", err);
1024 return err;
1025 }
1026
1027 if (g->ops.mm.is_bar1_supported(g)) {
1028 err = nvgpu_dma_alloc_map_sys(g->mm.bar1.vm,
1029 f->userd_entry_size * f->num_channels,
1030 &f->userd);
1031 } else {
1032 err = nvgpu_dma_alloc_sys(g, f->userd_entry_size *
1033 f->num_channels, &f->userd);
1034 }
1035 if (err) {
1036 nvgpu_err(g, "userd memory allocation failed");
1037 goto clean_up;
1038 }
1039 nvgpu_log(g, gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
1040
1041 userd_base = nvgpu_mem_get_addr(g, &f->userd);
1042 for (chid = 0; chid < f->num_channels; chid++) {
1043 f->channel[chid].userd_iova = userd_base +
1044 chid * f->userd_entry_size;
1045 f->channel[chid].userd_gpu_va =
1046 f->userd.gpu_va + chid * f->userd_entry_size;
1047 }
1048
1049 err = nvgpu_channel_worker_init(g);
1050 if (err) {
1051 goto clean_up;
1052 }
1053
1054 f->sw_ready = true;
1055
1056 nvgpu_log_fn(g, "done");
1057 return 0;
1058
1059clean_up:
1060 nvgpu_log_fn(g, "fail");
1061 if (nvgpu_mem_is_valid(&f->userd)) {
1062 if (g->ops.mm.is_bar1_supported(g)) {
1063 nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
1064 } else {
1065 nvgpu_dma_free(g, &f->userd);
1066 }
1067 }
1068
1069 return err;
1070}
1071
1072void gk20a_fifo_handle_runlist_event(struct gk20a *g)
1073{
1074 u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
1075
1076 nvgpu_log(g, gpu_dbg_intr, "runlist event %08x",
1077 runlist_event);
1078
1079 gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
1080}
1081
1082int gk20a_init_fifo_setup_hw(struct gk20a *g)
1083{
1084 struct fifo_gk20a *f = &g->fifo;
1085
1086 nvgpu_log_fn(g, " ");
1087
1088 /* test write, read through bar1 @ userd region before
1089 * turning on the snooping */
1090 {
1091 struct fifo_gk20a *f = &g->fifo;
1092 u32 v, v1 = 0x33, v2 = 0x55;
1093
1094 u32 bar1_vaddr = f->userd.gpu_va;
1095 volatile u32 *cpu_vaddr = f->userd.cpu_va;
1096
1097 nvgpu_log_info(g, "test bar1 @ vaddr 0x%x",
1098 bar1_vaddr);
1099
1100 v = gk20a_bar1_readl(g, bar1_vaddr);
1101
1102 *cpu_vaddr = v1;
1103 nvgpu_mb();
1104
1105 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
1106 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
1107 GPU read 0x%x", *cpu_vaddr, gk20a_bar1_readl(g, bar1_vaddr));
1108 return -EINVAL;
1109 }
1110
1111 gk20a_bar1_writel(g, bar1_vaddr, v2);
1112
1113 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
1114 nvgpu_err(g, "bar1 broken @ gk20a: GPU wrote 0x%x, \
1115 CPU read 0x%x", gk20a_bar1_readl(g, bar1_vaddr), *cpu_vaddr);
1116 return -EINVAL;
1117 }
1118
1119 /* is it visible to the cpu? */
1120 if (*cpu_vaddr != v2) {
1121 nvgpu_err(g,
1122 "cpu didn't see bar1 write @ %p!",
1123 cpu_vaddr);
1124 }
1125
1126 /* put it back */
1127 gk20a_bar1_writel(g, bar1_vaddr, v);
1128 }
1129
1130 /*XXX all manner of flushes and caching worries, etc */
1131
1132 /* set the base for the userd region now */
1133 gk20a_writel(g, fifo_bar1_base_r(),
1134 fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
1135 fifo_bar1_base_valid_true_f());
1136
1137 nvgpu_log_fn(g, "done");
1138
1139 return 0;
1140}
1141
1142int gk20a_init_fifo_support(struct gk20a *g)
1143{
1144 u32 err;
1145
1146 err = g->ops.fifo.setup_sw(g);
1147 if (err) {
1148 return err;
1149 }
1150
1151 if (g->ops.fifo.init_fifo_setup_hw) {
1152 err = g->ops.fifo.init_fifo_setup_hw(g);
1153 }
1154 if (err) {
1155 return err;
1156 }
1157
1158 return err;
1159}
1160
1161/* return with a reference to the channel, caller must put it back */
1162struct channel_gk20a *
1163gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
1164{
1165 struct fifo_gk20a *f = &g->fifo;
1166 unsigned int ci;
1167 if (unlikely(!f->channel)) {
1168 return NULL;
1169 }
1170 for (ci = 0; ci < f->num_channels; ci++) {
1171 struct channel_gk20a *ch;
1172 u64 ch_inst_ptr;
1173
1174 ch = gk20a_channel_from_id(g, ci);
1175 /* only alive channels are searched */
1176 if (!ch) {
1177 continue;
1178 }
1179
1180 ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
1181 if (inst_ptr == ch_inst_ptr) {
1182 return ch;
1183 }
1184
1185 gk20a_channel_put(ch);
1186 }
1187 return NULL;
1188}
1189
1190/* fault info/descriptions.
1191 * tbd: move to setup
1192 * */
1193static const char * const gk20a_fault_type_descs[] = {
1194 "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
1195 "pde size",
1196 "pte",
1197 "va limit viol",
1198 "unbound inst",
1199 "priv viol",
1200 "ro viol",
1201 "wo viol",
1202 "pitch mask",
1203 "work creation",
1204 "bad aperture",
1205 "compression failure",
1206 "bad kind",
1207 "region viol",
1208 "dual ptes",
1209 "poisoned",
1210};
1211/* engine descriptions */
1212static const char * const engine_subid_descs[] = {
1213 "gpc",
1214 "hub",
1215};
1216
1217static const char * const gk20a_hub_client_descs[] = {
1218 "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
1219 "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
1220 "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
1221 "scc nb", "sec", "ssync", "gr copy", "xv", "mmu nb",
1222 "msenc", "d falcon", "sked", "a falcon", "n/a",
1223};
1224
1225static const char * const gk20a_gpc_client_descs[] = {
1226 "l1 0", "t1 0", "pe 0",
1227 "l1 1", "t1 1", "pe 1",
1228 "l1 2", "t1 2", "pe 2",
1229 "l1 3", "t1 3", "pe 3",
1230 "rast", "gcc", "gpccs",
1231 "prop 0", "prop 1", "prop 2", "prop 3",
1232 "l1 4", "t1 4", "pe 4",
1233 "l1 5", "t1 5", "pe 5",
1234 "l1 6", "t1 6", "pe 6",
1235 "l1 7", "t1 7", "pe 7",
1236};
1237
1238static const char * const does_not_exist[] = {
1239 "does not exist"
1240};
1241
1242/* fill in mmu fault desc */
1243void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault)
1244{
1245 if (mmfault->fault_type >= ARRAY_SIZE(gk20a_fault_type_descs)) {
1246 WARN_ON(mmfault->fault_type >=
1247 ARRAY_SIZE(gk20a_fault_type_descs));
1248 } else {
1249 mmfault->fault_type_desc =
1250 gk20a_fault_type_descs[mmfault->fault_type];
1251 }
1252}
1253
1254/* fill in mmu fault client description */
1255void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault)
1256{
1257 if (mmfault->client_id >= ARRAY_SIZE(gk20a_hub_client_descs)) {
1258 WARN_ON(mmfault->client_id >=
1259 ARRAY_SIZE(gk20a_hub_client_descs));
1260 } else {
1261 mmfault->client_id_desc =
1262 gk20a_hub_client_descs[mmfault->client_id];
1263 }
1264}
1265
1266/* fill in mmu fault gpc description */
1267void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault)
1268{
1269 if (mmfault->client_id >= ARRAY_SIZE(gk20a_gpc_client_descs)) {
1270 WARN_ON(mmfault->client_id >=
1271 ARRAY_SIZE(gk20a_gpc_client_descs));
1272 } else {
1273 mmfault->client_id_desc =
1274 gk20a_gpc_client_descs[mmfault->client_id];
1275 }
1276}
1277
1278static void get_exception_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1279 struct mmu_fault_info *mmfault)
1280{
1281 g->ops.fifo.get_mmu_fault_info(g, mmu_fault_id, mmfault);
1282
1283 /* parse info */
1284 mmfault->fault_type_desc = does_not_exist[0];
1285 if (g->ops.fifo.get_mmu_fault_desc) {
1286 g->ops.fifo.get_mmu_fault_desc(mmfault);
1287 }
1288
1289 if (mmfault->client_type >= ARRAY_SIZE(engine_subid_descs)) {
1290 WARN_ON(mmfault->client_type >= ARRAY_SIZE(engine_subid_descs));
1291 mmfault->client_type_desc = does_not_exist[0];
1292 } else {
1293 mmfault->client_type_desc =
1294 engine_subid_descs[mmfault->client_type];
1295 }
1296
1297 mmfault->client_id_desc = does_not_exist[0];
1298 if ((mmfault->client_type ==
1299 fifo_intr_mmu_fault_info_engine_subid_hub_v())
1300 && g->ops.fifo.get_mmu_fault_client_desc) {
1301 g->ops.fifo.get_mmu_fault_client_desc(mmfault);
1302 } else if ((mmfault->client_type ==
1303 fifo_intr_mmu_fault_info_engine_subid_gpc_v())
1304 && g->ops.fifo.get_mmu_fault_gpc_desc) {
1305 g->ops.fifo.get_mmu_fault_gpc_desc(mmfault);
1306 }
1307}
1308
1309/* reads info from hardware and fills in mmu fault info record */
1310void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
1311 struct mmu_fault_info *mmfault)
1312{
1313 u32 fault_info;
1314 u32 addr_lo, addr_hi;
1315
1316 nvgpu_log_fn(g, "mmu_fault_id %d", mmu_fault_id);
1317
1318 memset(mmfault, 0, sizeof(*mmfault));
1319
1320 fault_info = gk20a_readl(g,
1321 fifo_intr_mmu_fault_info_r(mmu_fault_id));
1322 mmfault->fault_type =
1323 fifo_intr_mmu_fault_info_type_v(fault_info);
1324 mmfault->access_type =
1325 fifo_intr_mmu_fault_info_write_v(fault_info);
1326 mmfault->client_type =
1327 fifo_intr_mmu_fault_info_engine_subid_v(fault_info);
1328 mmfault->client_id =
1329 fifo_intr_mmu_fault_info_client_v(fault_info);
1330
1331 addr_lo = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(mmu_fault_id));
1332 addr_hi = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(mmu_fault_id));
1333 mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo);
1334 /* note:ignoring aperture on gk20a... */
1335 mmfault->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
1336 gk20a_readl(g, fifo_intr_mmu_fault_inst_r(mmu_fault_id)));
1337 /* note: inst_ptr is a 40b phys addr. */
1338 mmfault->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
1339}
1340
1341void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1342{
1343 u32 engine_enum = ENGINE_INVAL_GK20A;
1344 struct fifo_engine_info_gk20a *engine_info;
1345
1346 nvgpu_log_fn(g, " ");
1347
1348 if (!g) {
1349 return;
1350 }
1351
1352 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1353
1354 if (engine_info) {
1355 engine_enum = engine_info->engine_enum;
1356 }
1357
1358 if (engine_enum == ENGINE_INVAL_GK20A) {
1359 nvgpu_err(g, "unsupported engine_id %d", engine_id);
1360 }
1361
1362 if (engine_enum == ENGINE_GR_GK20A) {
1363 if (g->support_pmu) {
1364 if (nvgpu_pg_elpg_disable(g) != 0 ) {
1365 nvgpu_err(g, "failed to set disable elpg");
1366 }
1367 }
1368
1369#ifdef CONFIG_GK20A_CTXSW_TRACE
1370 /*
1371 * Resetting engine will alter read/write index. Need to flush
1372 * circular buffer before re-enabling FECS.
1373 */
1374 if (g->ops.fecs_trace.reset)
1375 g->ops.fecs_trace.reset(g);
1376#endif
1377 if (!nvgpu_platform_is_simulation(g)) {
1378 /*HALT_PIPELINE method, halt GR engine*/
1379 if (gr_gk20a_halt_pipe(g)) {
1380 nvgpu_err(g, "failed to HALT gr pipe");
1381 }
1382 /*
1383 * resetting engine using mc_enable_r() is not
1384 * enough, we do full init sequence
1385 */
1386 nvgpu_log(g, gpu_dbg_info, "resetting gr engine");
1387 gk20a_gr_reset(g);
1388 } else {
1389 nvgpu_log(g, gpu_dbg_info,
1390 "HALT gr pipe not supported and "
1391 "gr cannot be reset without halting gr pipe");
1392 }
1393 if (g->support_pmu) {
1394 if (nvgpu_pg_elpg_enable(g) != 0 ) {
1395 nvgpu_err(g, "failed to set enable elpg");
1396 }
1397 }
1398 }
1399 if ((engine_enum == ENGINE_GRCE_GK20A) ||
1400 (engine_enum == ENGINE_ASYNC_CE_GK20A)) {
1401 g->ops.mc.reset(g, engine_info->reset_mask);
1402 }
1403}
1404
1405static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
1406{
1407 u32 intr;
1408
1409 intr = gk20a_readl(g, fifo_intr_chsw_error_r());
1410 nvgpu_err(g, "chsw: %08x", intr);
1411 gk20a_fecs_dump_falcon_stats(g);
1412 gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
1413}
1414
1415static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
1416{
1417 u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1418 nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id);
1419}
1420
1421bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid)
1422{
1423 return (engine_subid == fifo_intr_mmu_fault_info_engine_subid_gpc_v());
1424}
1425
1426bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
1427 u32 engine_subid, bool fake_fault)
1428{
1429 u32 engine_enum = ENGINE_INVAL_GK20A;
1430 struct fifo_engine_info_gk20a *engine_info;
1431
1432 if (!g) {
1433 return false;
1434 }
1435
1436 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
1437
1438 if (engine_info) {
1439 engine_enum = engine_info->engine_enum;
1440 }
1441
1442 if (engine_enum == ENGINE_INVAL_GK20A) {
1443 return false;
1444 }
1445
1446 /* channel recovery is only deferred if an sm debugger
1447 is attached and has MMU debug mode is enabled */
1448 if (!g->ops.gr.sm_debugger_attached(g) ||
1449 !g->ops.fb.is_debug_mode_enabled(g)) {
1450 return false;
1451 }
1452
1453 /* if this fault is fake (due to RC recovery), don't defer recovery */
1454 if (fake_fault) {
1455 return false;
1456 }
1457
1458 if (engine_enum != ENGINE_GR_GK20A) {
1459 return false;
1460 }
1461
1462 return g->ops.fifo.is_fault_engine_subid_gpc(g, engine_subid);
1463}
1464
1465/* caller must hold a channel reference */
1466static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
1467 struct channel_gk20a *refch)
1468{
1469 bool verbose = false;
1470 if (!refch) {
1471 return verbose;
1472 }
1473
1474 if (nvgpu_is_error_notifier_set(refch,
1475 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
1476 verbose = refch->timeout_debug_dump;
1477 }
1478
1479 return verbose;
1480}
1481
1482/* caller must hold a channel reference */
1483static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1484 struct channel_gk20a *refch)
1485{
1486 if (refch) {
1487 /* mark channel as faulted */
1488 gk20a_channel_set_timedout(refch);
1489
1490 /* unblock pending waits */
1491 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1492 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
1493 }
1494}
1495
1496/* caller must hold a channel reference */
1497bool gk20a_fifo_error_ch(struct gk20a *g,
1498 struct channel_gk20a *refch)
1499{
1500 bool verbose;
1501
1502 verbose = gk20a_fifo_ch_timeout_debug_dump_state(g, refch);
1503 gk20a_fifo_set_has_timedout_and_wake_up_wqs(g, refch);
1504
1505 return verbose;
1506}
1507
1508bool gk20a_fifo_error_tsg(struct gk20a *g,
1509 struct tsg_gk20a *tsg)
1510{
1511 struct channel_gk20a *ch = NULL;
1512 bool verbose = false;
1513
1514 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1515 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1516 if (gk20a_channel_get(ch)) {
1517 if (gk20a_fifo_error_ch(g, ch)) {
1518 verbose = true;
1519 }
1520 gk20a_channel_put(ch);
1521 }
1522 }
1523 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1524
1525 return verbose;
1526
1527}
1528/* caller must hold a channel reference */
1529void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
1530 struct channel_gk20a *refch)
1531{
1532 nvgpu_err(g,
1533 "channel %d generated a mmu fault", refch->chid);
1534 g->ops.fifo.set_error_notifier(refch,
1535 NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
1536}
1537
1538void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
1539 struct tsg_gk20a *tsg)
1540{
1541 struct channel_gk20a *ch = NULL;
1542
1543 nvgpu_err(g,
1544 "TSG %d generated a mmu fault", tsg->tsgid);
1545
1546 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1547 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1548 if (gk20a_channel_get(ch)) {
1549 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1550 gk20a_channel_put(ch);
1551 }
1552 }
1553 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1554
1555}
1556
1557void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)
1558{
1559 struct channel_gk20a *ch = NULL;
1560
1561 nvgpu_log_fn(g, " ");
1562
1563 g->ops.fifo.disable_tsg(tsg);
1564
1565 if (preempt) {
1566 g->ops.fifo.preempt_tsg(g, tsg);
1567 }
1568
1569 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1570 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1571 if (gk20a_channel_get(ch)) {
1572 gk20a_channel_set_timedout(ch);
1573 if (ch->g->ops.fifo.ch_abort_clean_up) {
1574 ch->g->ops.fifo.ch_abort_clean_up(ch);
1575 }
1576 gk20a_channel_put(ch);
1577 }
1578 }
1579 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1580}
1581
1582int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
1583{
1584 unsigned long engine_id, engines = 0U;
1585 struct tsg_gk20a *tsg;
1586 bool deferred_reset_pending;
1587 struct fifo_gk20a *f = &g->fifo;
1588
1589 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1590
1591 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1592 deferred_reset_pending = g->fifo.deferred_reset_pending;
1593 nvgpu_mutex_release(&f->deferred_reset_mutex);
1594
1595 if (!deferred_reset_pending) {
1596 nvgpu_mutex_release(&g->dbg_sessions_lock);
1597 return 0;
1598 }
1599
1600 gr_gk20a_disable_ctxsw(g);
1601
1602 tsg = tsg_gk20a_from_ch(ch);
1603 if (tsg != NULL) {
1604 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
1605 } else {
1606 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
1607 }
1608
1609 if (engines == 0U) {
1610 goto clean_up;
1611 }
1612
1613 /*
1614 * If deferred reset is set for an engine, and channel is running
1615 * on that engine, reset it
1616 */
1617 for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32) {
1618 if (BIT(engine_id) & engines) {
1619 gk20a_fifo_reset_engine(g, engine_id);
1620 }
1621 }
1622
1623 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1624 g->fifo.deferred_fault_engines = 0;
1625 g->fifo.deferred_reset_pending = false;
1626 nvgpu_mutex_release(&f->deferred_reset_mutex);
1627
1628clean_up:
1629 gr_gk20a_enable_ctxsw(g);
1630 nvgpu_mutex_release(&g->dbg_sessions_lock);
1631
1632 return 0;
1633}
1634
1635static bool gk20a_fifo_handle_mmu_fault_locked(
1636 struct gk20a *g,
1637 u32 mmu_fault_engines, /* queried from HW if 0 */
1638 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1639 bool id_is_tsg)
1640{
1641 bool fake_fault;
1642 unsigned long fault_id;
1643 unsigned long engine_mmu_fault_id;
1644 bool verbose = true;
1645 u32 grfifo_ctl;
1646
1647 bool deferred_reset_pending = false;
1648 struct fifo_gk20a *f = &g->fifo;
1649
1650 nvgpu_log_fn(g, " ");
1651
1652 /* Disable power management */
1653 if (g->support_pmu) {
1654 if (nvgpu_cg_pg_disable(g) != 0) {
1655 nvgpu_warn(g, "fail to disable power mgmt");
1656 }
1657 }
1658
1659 /* Disable fifo access */
1660 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
1661 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
1662 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
1663
1664 gk20a_writel(g, gr_gpfifo_ctl_r(),
1665 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
1666 gr_gpfifo_ctl_semaphore_access_f(0));
1667
1668 if (mmu_fault_engines) {
1669 fault_id = mmu_fault_engines;
1670 fake_fault = true;
1671 } else {
1672 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1673 fake_fault = false;
1674 gk20a_debug_dump(g);
1675 }
1676
1677 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1678 g->fifo.deferred_reset_pending = false;
1679 nvgpu_mutex_release(&f->deferred_reset_mutex);
1680
1681 /* go through all faulted engines */
1682 for_each_set_bit(engine_mmu_fault_id, &fault_id, 32) {
1683 /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
1684 * engines. Convert engine_mmu_id to engine_id */
1685 u32 engine_id = gk20a_mmu_id_to_engine_id(g,
1686 engine_mmu_fault_id);
1687 struct mmu_fault_info mmfault_info;
1688 struct channel_gk20a *ch = NULL;
1689 struct tsg_gk20a *tsg = NULL;
1690 struct channel_gk20a *refch = NULL;
1691 /* read and parse engine status */
1692 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1693 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1694 bool ctxsw = (ctx_status ==
1695 fifo_engine_status_ctx_status_ctxsw_switch_v()
1696 || ctx_status ==
1697 fifo_engine_status_ctx_status_ctxsw_save_v()
1698 || ctx_status ==
1699 fifo_engine_status_ctx_status_ctxsw_load_v());
1700
1701 get_exception_mmu_fault_info(g, engine_mmu_fault_id,
1702 &mmfault_info);
1703 trace_gk20a_mmu_fault(mmfault_info.fault_addr,
1704 mmfault_info.fault_type,
1705 mmfault_info.access_type,
1706 mmfault_info.inst_ptr,
1707 engine_id,
1708 mmfault_info.client_type_desc,
1709 mmfault_info.client_id_desc,
1710 mmfault_info.fault_type_desc);
1711 nvgpu_err(g, "%s mmu fault on engine %d, "
1712 "engine subid %d (%s), client %d (%s), "
1713 "addr 0x%llx, type %d (%s), access_type 0x%08x,"
1714 "inst_ptr 0x%llx",
1715 fake_fault ? "fake" : "",
1716 engine_id,
1717 mmfault_info.client_type,
1718 mmfault_info.client_type_desc,
1719 mmfault_info.client_id, mmfault_info.client_id_desc,
1720 mmfault_info.fault_addr,
1721 mmfault_info.fault_type,
1722 mmfault_info.fault_type_desc,
1723 mmfault_info.access_type, mmfault_info.inst_ptr);
1724
1725 if (ctxsw) {
1726 gk20a_fecs_dump_falcon_stats(g);
1727 nvgpu_err(g, "gr_status_r : 0x%x",
1728 gk20a_readl(g, gr_status_r()));
1729 }
1730
1731 /* get the channel/TSG */
1732 if (fake_fault) {
1733 /* use next_id if context load is failing */
1734 u32 id, type;
1735
1736 if (hw_id == ~(u32)0) {
1737 id = (ctx_status ==
1738 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1739 fifo_engine_status_next_id_v(status) :
1740 fifo_engine_status_id_v(status);
1741 type = (ctx_status ==
1742 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1743 fifo_engine_status_next_id_type_v(status) :
1744 fifo_engine_status_id_type_v(status);
1745 } else {
1746 id = hw_id;
1747 type = id_is_tsg ?
1748 fifo_engine_status_id_type_tsgid_v() :
1749 fifo_engine_status_id_type_chid_v();
1750 }
1751
1752 if (type == fifo_engine_status_id_type_tsgid_v()) {
1753 tsg = &g->fifo.tsg[id];
1754 } else if (type == fifo_engine_status_id_type_chid_v()) {
1755 ch = &g->fifo.channel[id];
1756 refch = gk20a_channel_get(ch);
1757 if (refch != NULL) {
1758 tsg = tsg_gk20a_from_ch(refch);
1759 }
1760 }
1761 } else {
1762 /* read channel based on instruction pointer */
1763 ch = gk20a_refch_from_inst_ptr(g,
1764 mmfault_info.inst_ptr);
1765 refch = ch;
1766 if (refch != NULL) {
1767 tsg = tsg_gk20a_from_ch(refch);
1768 }
1769 }
1770
1771 /* check if engine reset should be deferred */
1772 if (engine_id != FIFO_INVAL_ENGINE_ID) {
1773 bool defer = gk20a_fifo_should_defer_engine_reset(g,
1774 engine_id, mmfault_info.client_type,
1775 fake_fault);
1776 if ((ch || tsg) && defer) {
1777 g->fifo.deferred_fault_engines |= BIT(engine_id);
1778
1779 /* handled during channel free */
1780 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
1781 g->fifo.deferred_reset_pending = true;
1782 nvgpu_mutex_release(&f->deferred_reset_mutex);
1783
1784 deferred_reset_pending = true;
1785
1786 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
1787 "sm debugger attached,"
1788 " deferring channel recovery to channel free");
1789 } else {
1790 gk20a_fifo_reset_engine(g, engine_id);
1791 }
1792 }
1793
1794#ifdef CONFIG_GK20A_CTXSW_TRACE
1795 if (tsg) {
1796 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1797 }
1798#endif
1799 /*
1800 * Disable the channel/TSG from hw and increment syncpoints.
1801 */
1802 if (tsg) {
1803 if (deferred_reset_pending) {
1804 gk20a_disable_tsg(tsg);
1805 } else {
1806 if (!fake_fault) {
1807 gk20a_fifo_set_ctx_mmu_error_tsg(g,
1808 tsg);
1809 }
1810 verbose = gk20a_fifo_error_tsg(g, tsg);
1811 gk20a_fifo_abort_tsg(g, tsg, false);
1812 }
1813
1814 /* put back the ref taken early above */
1815 if (refch) {
1816 gk20a_channel_put(ch);
1817 }
1818 } else if (refch != NULL) {
1819 nvgpu_err(g, "mmu error in unbound channel %d",
1820 ch->chid);
1821 gk20a_channel_put(ch);
1822 } else if (mmfault_info.inst_ptr ==
1823 nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
1824 nvgpu_err(g, "mmu fault from bar1");
1825 } else if (mmfault_info.inst_ptr ==
1826 nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
1827 nvgpu_err(g, "mmu fault from pmu");
1828 } else {
1829 nvgpu_err(g, "couldn't locate channel for mmu fault");
1830 }
1831 }
1832
1833 /* clear interrupt */
1834 gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
1835
1836 /* resume scheduler */
1837 gk20a_writel(g, fifo_error_sched_disable_r(),
1838 gk20a_readl(g, fifo_error_sched_disable_r()));
1839
1840 /* Re-enable fifo access */
1841 gk20a_writel(g, gr_gpfifo_ctl_r(),
1842 gr_gpfifo_ctl_access_enabled_f() |
1843 gr_gpfifo_ctl_semaphore_access_enabled_f());
1844
1845 /* It is safe to enable ELPG again. */
1846 if (g->support_pmu) {
1847 if (nvgpu_cg_pg_enable(g) != 0) {
1848 nvgpu_warn(g, "fail to enable power mgmt");
1849 }
1850 }
1851
1852 return verbose;
1853}
1854
1855static bool gk20a_fifo_handle_mmu_fault(
1856 struct gk20a *g,
1857 u32 mmu_fault_engines, /* queried from HW if 0 */
1858 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
1859 bool id_is_tsg)
1860{
1861 u32 rlid;
1862 bool verbose;
1863
1864 nvgpu_log_fn(g, " ");
1865
1866 nvgpu_log_info(g, "acquire engines_reset_mutex");
1867 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1868
1869 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
1870 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1871 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
1872 }
1873
1874 verbose = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
1875 hw_id, id_is_tsg);
1876
1877 nvgpu_log_info(g, "release runlist_lock for all runlists");
1878 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
1879 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
1880 }
1881
1882 nvgpu_log_info(g, "release engines_reset_mutex");
1883 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
1884
1885 return verbose;
1886}
1887
1888static void gk20a_fifo_get_faulty_id_type(struct gk20a *g, int engine_id,
1889 u32 *id, u32 *type)
1890{
1891 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1892 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1893
1894 /* use next_id if context load is failing */
1895 *id = (ctx_status ==
1896 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1897 fifo_engine_status_next_id_v(status) :
1898 fifo_engine_status_id_v(status);
1899
1900 *type = (ctx_status ==
1901 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1902 fifo_engine_status_next_id_type_v(status) :
1903 fifo_engine_status_id_type_v(status);
1904}
1905
1906static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1907{
1908 unsigned int i;
1909 u32 engines = 0;
1910
1911 for (i = 0; i < g->fifo.num_engines; i++) {
1912 u32 active_engine_id = g->fifo.active_engines_list[i];
1913 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
1914 u32 ctx_status =
1915 fifo_engine_status_ctx_status_v(status);
1916 u32 ctx_id = (ctx_status ==
1917 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1918 fifo_engine_status_next_id_v(status) :
1919 fifo_engine_status_id_v(status);
1920 u32 type = (ctx_status ==
1921 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1922 fifo_engine_status_next_id_type_v(status) :
1923 fifo_engine_status_id_type_v(status);
1924 bool busy = fifo_engine_status_engine_v(status) ==
1925 fifo_engine_status_engine_busy_v();
1926 if (busy && ctx_id == id) {
1927 if ((is_tsg && type ==
1928 fifo_engine_status_id_type_tsgid_v()) ||
1929 (!is_tsg && type ==
1930 fifo_engine_status_id_type_chid_v())) {
1931 engines |= BIT(active_engine_id);
1932 }
1933 }
1934 }
1935
1936 return engines;
1937}
1938
1939void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
1940 bool verbose, u32 rc_type)
1941{
1942 u32 engines;
1943
1944 /* stop context switching to prevent engine assignments from
1945 changing until channel is recovered */
1946 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1947 gr_gk20a_disable_ctxsw(g);
1948
1949 engines = gk20a_fifo_engines_on_id(g, ch->chid, false);
1950
1951 if (engines) {
1952 gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
1953 rc_type);
1954 } else {
1955 gk20a_channel_abort(ch, false);
1956
1957 if (gk20a_fifo_error_ch(g, ch)) {
1958 gk20a_debug_dump(g);
1959 }
1960 }
1961
1962 gr_gk20a_enable_ctxsw(g);
1963 nvgpu_mutex_release(&g->dbg_sessions_lock);
1964}
1965
1966void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1967 bool verbose, u32 rc_type)
1968{
1969 u32 engines = 0U;
1970 int err;
1971
1972 /* stop context switching to prevent engine assignments from
1973 changing until TSG is recovered */
1974 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1975
1976 /* disable tsg so that it does not get scheduled again */
1977 g->ops.fifo.disable_tsg(tsg);
1978
1979 /*
1980 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
1981 * fifo_engine_status register. Also while the engine is held in reset
1982 * h/w passes busy/idle straight through. fifo_engine_status registers
1983 * are correct in that there is no context switch outstanding
1984 * as the CTXSW is aborted when reset is asserted.
1985 */
1986 nvgpu_log_info(g, "acquire engines_reset_mutex");
1987 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
1988
1989 /*
1990 * stop context switching to prevent engine assignments from
1991 * changing until engine status is checked to make sure tsg
1992 * being recovered is not loaded on the engines
1993 */
1994 err = gr_gk20a_disable_ctxsw(g);
1995
1996 if (err != 0) {
1997 /* if failed to disable ctxsw, just abort tsg */
1998 nvgpu_err(g, "failed to disable ctxsw");
1999 } else {
2000 /* recover engines if tsg is loaded on the engines */
2001 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
2002
2003 /*
2004 * it is ok to enable ctxsw before tsg is recovered. If engines
2005 * is 0, no engine recovery is needed and if it is non zero,
2006 * gk20a_fifo_recover will call get_engines_mask_on_id again.
2007 * By that time if tsg is not on the engine, engine need not
2008 * be reset.
2009 */
2010 err = gr_gk20a_enable_ctxsw(g);
2011 if (err != 0) {
2012 nvgpu_err(g, "failed to enable ctxsw");
2013 }
2014 }
2015
2016 nvgpu_log_info(g, "release engines_reset_mutex");
2017 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2018
2019 if (engines) {
2020 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
2021 rc_type);
2022 } else {
2023 if (gk20a_fifo_error_tsg(g, tsg) && verbose) {
2024 gk20a_debug_dump(g);
2025 }
2026
2027 gk20a_fifo_abort_tsg(g, tsg, false);
2028 }
2029
2030 nvgpu_mutex_release(&g->dbg_sessions_lock);
2031}
2032
2033void gk20a_fifo_teardown_mask_intr(struct gk20a *g)
2034{
2035 u32 val;
2036
2037 val = gk20a_readl(g, fifo_intr_en_0_r());
2038 val &= ~(fifo_intr_en_0_sched_error_m() |
2039 fifo_intr_en_0_mmu_fault_m());
2040 gk20a_writel(g, fifo_intr_en_0_r(), val);
2041 gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f());
2042}
2043
2044void gk20a_fifo_teardown_unmask_intr(struct gk20a *g)
2045{
2046 u32 val;
2047
2048 val = gk20a_readl(g, fifo_intr_en_0_r());
2049 val |= fifo_intr_en_0_mmu_fault_f(1) | fifo_intr_en_0_sched_error_f(1);
2050 gk20a_writel(g, fifo_intr_en_0_r(), val);
2051
2052}
2053
2054void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
2055 u32 hw_id, unsigned int id_type, unsigned int rc_type,
2056 struct mmu_fault_info *mmfault)
2057{
2058 unsigned long engine_id, i;
2059 unsigned long _engine_ids = __engine_ids;
2060 unsigned long engine_ids = 0;
2061 u32 mmu_fault_engines = 0;
2062 u32 ref_type;
2063 u32 ref_id;
2064 u32 ref_id_is_tsg = false;
2065 bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false;
2066 bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false;
2067 u32 rlid;
2068
2069 nvgpu_log_info(g, "acquire engines_reset_mutex");
2070 nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
2071
2072 nvgpu_log_info(g, "acquire runlist_lock for all runlists");
2073 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2074 nvgpu_mutex_acquire(&g->fifo.runlist_info[rlid].runlist_lock);
2075 }
2076
2077 if (id_is_known) {
2078 engine_ids = gk20a_fifo_engines_on_id(g, hw_id, id_is_tsg);
2079 ref_id = hw_id;
2080 ref_type = id_is_tsg ?
2081 fifo_engine_status_id_type_tsgid_v() :
2082 fifo_engine_status_id_type_chid_v();
2083 ref_id_is_tsg = id_is_tsg;
2084 /* atleast one engine will get passed during sched err*/
2085 engine_ids |= __engine_ids;
2086 for_each_set_bit(engine_id, &engine_ids, 32) {
2087 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id);
2088
2089 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2090 mmu_fault_engines |= BIT(mmu_id);
2091 }
2092 }
2093 } else {
2094 /* store faulted engines in advance */
2095 for_each_set_bit(engine_id, &_engine_ids, 32) {
2096 gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id,
2097 &ref_type);
2098 if (ref_type == fifo_engine_status_id_type_tsgid_v()) {
2099 ref_id_is_tsg = true;
2100 } else {
2101 ref_id_is_tsg = false;
2102 }
2103 /* Reset *all* engines that use the
2104 * same channel as faulty engine */
2105 for (i = 0; i < g->fifo.num_engines; i++) {
2106 u32 active_engine_id = g->fifo.active_engines_list[i];
2107 u32 type;
2108 u32 id;
2109
2110 gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type);
2111 if (ref_type == type && ref_id == id) {
2112 u32 mmu_id = gk20a_engine_id_to_mmu_id(g, active_engine_id);
2113
2114 engine_ids |= BIT(active_engine_id);
2115 if (mmu_id != FIFO_INVAL_ENGINE_ID) {
2116 mmu_fault_engines |= BIT(mmu_id);
2117 }
2118 }
2119 }
2120 }
2121 }
2122
2123 if (mmu_fault_engines) {
2124 g->ops.fifo.teardown_mask_intr(g);
2125 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
2126 gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id,
2127 ref_id_is_tsg);
2128
2129 g->ops.fifo.teardown_unmask_intr(g);
2130 }
2131
2132 nvgpu_log_info(g, "release runlist_lock for all runlists");
2133 for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
2134 nvgpu_mutex_release(&g->fifo.runlist_info[rlid].runlist_lock);
2135 }
2136
2137 nvgpu_log_info(g, "release engines_reset_mutex");
2138 nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
2139}
2140
2141void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
2142 u32 hw_id, bool id_is_tsg,
2143 bool id_is_known, bool verbose, int rc_type)
2144{
2145 unsigned int id_type;
2146
2147 if (verbose) {
2148 gk20a_debug_dump(g);
2149 }
2150
2151 if (g->ops.ltc.flush) {
2152 g->ops.ltc.flush(g);
2153 }
2154
2155 if (id_is_known) {
2156 id_type = id_is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
2157 } else {
2158 id_type = ID_TYPE_UNKNOWN;
2159 }
2160
2161 g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type,
2162 rc_type, NULL);
2163}
2164
2165/* force reset channel and tsg */
2166int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
2167 u32 err_code, bool verbose)
2168{
2169 struct channel_gk20a *ch_tsg = NULL;
2170 struct gk20a *g = ch->g;
2171
2172 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2173
2174 if (tsg != NULL) {
2175 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2176
2177 nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
2178 channel_gk20a, ch_entry) {
2179 if (gk20a_channel_get(ch_tsg)) {
2180 g->ops.fifo.set_error_notifier(ch_tsg,
2181 err_code);
2182 gk20a_channel_put(ch_tsg);
2183 }
2184 }
2185
2186 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2187 gk20a_fifo_recover_tsg(g, tsg, verbose,
2188 RC_TYPE_FORCE_RESET);
2189 } else {
2190 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2191 }
2192
2193 return 0;
2194}
2195
2196int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
2197{
2198 struct gk20a *g = ch->g;
2199
2200 if (gk20a_fifo_channel_status_is_next(g, ch->chid)) {
2201 nvgpu_err(g, "Channel %d to be removed from TSG %d has NEXT set!",
2202 ch->chid, ch->tsgid);
2203 return -EINVAL;
2204 }
2205
2206 if (g->ops.fifo.tsg_verify_status_ctx_reload) {
2207 g->ops.fifo.tsg_verify_status_ctx_reload(ch);
2208 }
2209
2210 if (g->ops.fifo.tsg_verify_status_faulted) {
2211 g->ops.fifo.tsg_verify_status_faulted(ch);
2212 }
2213
2214 return 0;
2215}
2216
2217static bool gk20a_fifo_tsg_is_multi_channel(struct tsg_gk20a *tsg)
2218{
2219 bool ret = false;
2220
2221 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2222 if (nvgpu_list_first_entry(&tsg->ch_list, channel_gk20a,
2223 ch_entry) !=
2224 nvgpu_list_last_entry(&tsg->ch_list, channel_gk20a,
2225 ch_entry)) {
2226 ret = true;
2227 }
2228 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2229
2230 return ret;
2231}
2232
2233int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
2234{
2235 struct gk20a *g = ch->g;
2236 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
2237 int err;
2238 bool tsg_timedout = false;
2239
2240 if (tsg == NULL) {
2241 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
2242 return 0;
2243 }
2244
2245 /* If one channel in TSG times out, we disable all channels */
2246 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2247 tsg_timedout = gk20a_channel_check_timedout(ch);
2248 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2249
2250 /* Disable TSG and examine status before unbinding channel */
2251 g->ops.fifo.disable_tsg(tsg);
2252
2253 err = g->ops.fifo.preempt_tsg(g, tsg);
2254 if (err != 0) {
2255 goto fail_enable_tsg;
2256 }
2257
2258 /*
2259 * State validation is only necessary if there are multiple channels in
2260 * the TSG.
2261 */
2262 if (gk20a_fifo_tsg_is_multi_channel(tsg) &&
2263 g->ops.fifo.tsg_verify_channel_status && !tsg_timedout) {
2264 err = g->ops.fifo.tsg_verify_channel_status(ch);
2265 if (err) {
2266 goto fail_enable_tsg;
2267 }
2268 }
2269
2270 /* Channel should be seen as TSG channel while updating runlist */
2271 err = channel_gk20a_update_runlist(ch, false);
2272 if (err) {
2273 goto fail_enable_tsg;
2274 }
2275
2276 while (ch->mmu_debug_mode_refcnt > 0U) {
2277 err = nvgpu_tsg_set_mmu_debug_mode(ch, false);
2278 if (err != 0) {
2279 nvgpu_err(g, "disable mmu debug mode failed ch:%u",
2280 ch->chid);
2281 break;
2282 }
2283 }
2284
2285 /* Remove channel from TSG and re-enable rest of the channels */
2286 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
2287 nvgpu_list_del(&ch->ch_entry);
2288 ch->tsgid = NVGPU_INVALID_TSG_ID;
2289
2290 /* another thread could have re-enabled the channel because it was
2291 * still on the list at that time, so make sure it's truly disabled
2292 */
2293 g->ops.fifo.disable_channel(ch);
2294 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
2295
2296 /*
2297 * Don't re-enable all channels if TSG has timed out already
2298 *
2299 * Note that we can skip disabling and preempting TSG too in case of
2300 * time out, but we keep that to ensure TSG is kicked out
2301 */
2302 if (!tsg_timedout) {
2303 g->ops.fifo.enable_tsg(tsg);
2304 }
2305
2306 if (ch->g->ops.fifo.ch_abort_clean_up) {
2307 ch->g->ops.fifo.ch_abort_clean_up(ch);
2308 }
2309
2310 return 0;
2311
2312fail_enable_tsg:
2313 if (!tsg_timedout) {
2314 g->ops.fifo.enable_tsg(tsg);
2315 }
2316 return err;
2317}
2318
2319u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
2320 int *__id, bool *__is_tsg)
2321{
2322 u32 engine_id;
2323 int id = -1;
2324 bool is_tsg = false;
2325 u32 mailbox2;
2326 u32 active_engine_id = FIFO_INVAL_ENGINE_ID;
2327
2328 for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
2329 u32 status;
2330 u32 ctx_status;
2331 bool failing_engine;
2332
2333 active_engine_id = g->fifo.active_engines_list[engine_id];
2334 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
2335 ctx_status = fifo_engine_status_ctx_status_v(status);
2336
2337 /* we are interested in busy engines */
2338 failing_engine = fifo_engine_status_engine_v(status) ==
2339 fifo_engine_status_engine_busy_v();
2340
2341 /* ..that are doing context switch */
2342 failing_engine = failing_engine &&
2343 (ctx_status ==
2344 fifo_engine_status_ctx_status_ctxsw_switch_v()
2345 || ctx_status ==
2346 fifo_engine_status_ctx_status_ctxsw_save_v()
2347 || ctx_status ==
2348 fifo_engine_status_ctx_status_ctxsw_load_v());
2349
2350 if (!failing_engine) {
2351 active_engine_id = FIFO_INVAL_ENGINE_ID;
2352 continue;
2353 }
2354
2355 if (ctx_status ==
2356 fifo_engine_status_ctx_status_ctxsw_load_v()) {
2357 id = fifo_engine_status_next_id_v(status);
2358 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2359 fifo_engine_status_next_id_type_chid_v();
2360 } else if (ctx_status ==
2361 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
2362 mailbox2 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(2));
2363 if (mailbox2 & FECS_METHOD_WFI_RESTORE) {
2364 id = fifo_engine_status_next_id_v(status);
2365 is_tsg = fifo_engine_status_next_id_type_v(status) !=
2366 fifo_engine_status_next_id_type_chid_v();
2367 } else {
2368 id = fifo_engine_status_id_v(status);
2369 is_tsg = fifo_engine_status_id_type_v(status) !=
2370 fifo_engine_status_id_type_chid_v();
2371 }
2372 } else {
2373 id = fifo_engine_status_id_v(status);
2374 is_tsg = fifo_engine_status_id_type_v(status) !=
2375 fifo_engine_status_id_type_chid_v();
2376 }
2377 break;
2378 }
2379
2380 *__id = id;
2381 *__is_tsg = is_tsg;
2382
2383 return active_engine_id;
2384}
2385
2386bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
2387 bool *verbose, u32 *ms)
2388{
2389 bool recover = false;
2390 bool progress = false;
2391 struct gk20a *g = ch->g;
2392
2393 if (gk20a_channel_get(ch)) {
2394 recover = gk20a_channel_update_and_check_timeout(ch,
2395 g->fifo_eng_timeout_us / 1000,
2396 &progress);
2397 *verbose = ch->timeout_debug_dump;
2398 *ms = ch->timeout_accumulated_ms;
2399 if (recover) {
2400 g->ops.fifo.set_error_notifier(ch,
2401 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2402 }
2403
2404 gk20a_channel_put(ch);
2405 }
2406 return recover;
2407}
2408
2409bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
2410 bool *verbose, u32 *ms)
2411{
2412 struct channel_gk20a *ch;
2413 bool recover = false;
2414 bool progress = false;
2415 struct gk20a *g = tsg->g;
2416
2417 *verbose = false;
2418 *ms = g->fifo_eng_timeout_us / 1000;
2419
2420 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2421
2422 /* check if there was some progress on any of the TSG channels.
2423 * fifo recovery is needed if at least one channel reached the
2424 * maximum timeout without progress (update in gpfifo pointers).
2425 */
2426 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
2427 if (gk20a_channel_get(ch)) {
2428 recover = gk20a_channel_update_and_check_timeout(ch,
2429 *ms, &progress);
2430 if (progress || recover) {
2431 break;
2432 }
2433 gk20a_channel_put(ch);
2434 }
2435 }
2436
2437 if (recover) {
2438 /*
2439 * if one channel is presumed dead (no progress for too long),
2440 * then fifo recovery is needed. we can't really figure out
2441 * which channel caused the problem, so set timeout error
2442 * notifier for all channels.
2443 */
2444 nvgpu_log_info(g, "timeout on tsg=%d ch=%d",
2445 tsg->tsgid, ch->chid);
2446 *ms = ch->timeout_accumulated_ms;
2447 gk20a_channel_put(ch);
2448 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2449 channel_gk20a, ch_entry) {
2450 if (gk20a_channel_get(ch)) {
2451 ch->g->ops.fifo.set_error_notifier(ch,
2452 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2453 if (ch->timeout_debug_dump) {
2454 *verbose = true;
2455 }
2456 gk20a_channel_put(ch);
2457 }
2458 }
2459 } else if (progress) {
2460 /*
2461 * if at least one channel in the TSG made some progress, reset
2462 * accumulated timeout for all channels in the TSG. In
2463 * particular, this resets timeout for channels that already
2464 * completed their work
2465 */
2466 nvgpu_log_info(g, "progress on tsg=%d ch=%d",
2467 tsg->tsgid, ch->chid);
2468 gk20a_channel_put(ch);
2469 *ms = g->fifo_eng_timeout_us / 1000;
2470 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2471 channel_gk20a, ch_entry) {
2472 if (gk20a_channel_get(ch)) {
2473 ch->timeout_accumulated_ms = *ms;
2474 gk20a_channel_put(ch);
2475 }
2476 }
2477 }
2478
2479 /* if we could not detect progress on any of the channel, but none
2480 * of them has reached the timeout, there is nothing more to do:
2481 * timeout_accumulated_ms has been updated for all of them.
2482 */
2483 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2484 return recover;
2485}
2486
2487bool gk20a_fifo_handle_sched_error(struct gk20a *g)
2488{
2489 u32 sched_error;
2490 u32 engine_id;
2491 int id = -1;
2492 bool is_tsg = false;
2493 bool ret = false;
2494
2495 /* read the scheduler error register */
2496 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
2497
2498 engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
2499 /*
2500 * Could not find the engine
2501 * Possible Causes:
2502 * a)
2503 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
2504 * fifo_engine_status register. Also while the engine is held in reset
2505 * h/w passes busy/idle straight through. fifo_engine_status registers
2506 * are correct in that there is no context switch outstanding
2507 * as the CTXSW is aborted when reset is asserted.
2508 * This is just a side effect of how gv100 and earlier versions of
2509 * ctxsw_timeout behave.
2510 * With gv11b and later, h/w snaps the context at the point of error
2511 * so that s/w can see the tsg_id which caused the HW timeout.
2512 * b)
2513 * If engines are not busy and ctxsw state is valid then intr occurred
2514 * in the past and if the ctxsw state has moved on to VALID from LOAD
2515 * or SAVE, it means that whatever timed out eventually finished
2516 * anyways. The problem with this is that s/w cannot conclude which
2517 * context caused the problem as maybe more switches occurred before
2518 * intr is handled.
2519 */
2520 if (engine_id == FIFO_INVAL_ENGINE_ID) {
2521 nvgpu_info(g, "fifo sched error: 0x%08x, failed to find engine "
2522 "that is busy doing ctxsw. "
2523 "May be ctxsw already happened", sched_error);
2524 ret = false;
2525 goto err;
2526 }
2527
2528 /* could not find the engine - should never happen */
2529 if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
2530 nvgpu_err(g, "fifo sched error : 0x%08x, failed to find engine",
2531 sched_error);
2532 ret = false;
2533 goto err;
2534 }
2535
2536 if (fifo_intr_sched_error_code_f(sched_error) ==
2537 fifo_intr_sched_error_code_ctxsw_timeout_v()) {
2538 struct fifo_gk20a *f = &g->fifo;
2539 u32 ms = 0;
2540 bool verbose = false;
2541
2542 if (is_tsg) {
2543 ret = g->ops.fifo.check_tsg_ctxsw_timeout(
2544 &f->tsg[id], &verbose, &ms);
2545 } else {
2546 ret = g->ops.fifo.check_ch_ctxsw_timeout(
2547 &f->channel[id], &verbose, &ms);
2548 }
2549
2550 if (ret) {
2551 nvgpu_err(g,
2552 "fifo sched ctxsw timeout error: "
2553 "engine=%u, %s=%d, ms=%u",
2554 engine_id, is_tsg ? "tsg" : "ch", id, ms);
2555 /*
2556 * Cancel all channels' timeout since SCHED error might
2557 * trigger multiple watchdogs at a time
2558 */
2559 gk20a_channel_timeout_restart_all_channels(g);
2560 gk20a_fifo_recover(g, BIT(engine_id), id,
2561 is_tsg, true, verbose,
2562 RC_TYPE_CTXSW_TIMEOUT);
2563 } else {
2564 nvgpu_log_info(g,
2565 "fifo is waiting for ctx switch for %d ms, "
2566 "%s=%d", ms, is_tsg ? "tsg" : "ch", id);
2567 }
2568 } else {
2569 nvgpu_err(g,
2570 "fifo sched error : 0x%08x, engine=%u, %s=%d",
2571 sched_error, engine_id, is_tsg ? "tsg" : "ch", id);
2572 }
2573
2574err:
2575 return ret;
2576}
2577
2578static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
2579{
2580 bool print_channel_reset_log = false;
2581 u32 handled = 0;
2582
2583 nvgpu_log_fn(g, "fifo_intr=0x%08x", fifo_intr);
2584
2585 if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
2586 /* pio mode is unused. this shouldn't happen, ever. */
2587 /* should we clear it or just leave it pending? */
2588 nvgpu_err(g, "fifo pio error!");
2589 BUG_ON(1);
2590 }
2591
2592 if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
2593 u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
2594 nvgpu_err(g, "fifo bind error: 0x%08x", bind_error);
2595 print_channel_reset_log = true;
2596 handled |= fifo_intr_0_bind_error_pending_f();
2597 }
2598
2599 if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
2600 print_channel_reset_log = g->ops.fifo.handle_sched_error(g);
2601 handled |= fifo_intr_0_sched_error_pending_f();
2602 }
2603
2604 if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
2605 gk20a_fifo_handle_chsw_fault(g);
2606 handled |= fifo_intr_0_chsw_error_pending_f();
2607 }
2608
2609 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
2610 if (gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false)) {
2611 print_channel_reset_log = true;
2612 }
2613 handled |= fifo_intr_0_mmu_fault_pending_f();
2614 }
2615
2616 if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
2617 gk20a_fifo_handle_dropped_mmu_fault(g);
2618 handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
2619 }
2620
2621 print_channel_reset_log = !g->fifo.deferred_reset_pending
2622 && print_channel_reset_log;
2623
2624 if (print_channel_reset_log) {
2625 unsigned int engine_id;
2626 nvgpu_err(g,
2627 "channel reset initiated from %s; intr=0x%08x",
2628 __func__, fifo_intr);
2629 for (engine_id = 0;
2630 engine_id < g->fifo.num_engines;
2631 engine_id++) {
2632 u32 active_engine_id = g->fifo.active_engines_list[engine_id];
2633 u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
2634 nvgpu_log_fn(g, "enum:%d -> engine_id:%d", engine_enum,
2635 active_engine_id);
2636 fifo_pbdma_exception_status(g,
2637 &g->fifo.engine_info[active_engine_id]);
2638 fifo_engine_exception_status(g,
2639 &g->fifo.engine_info[active_engine_id]);
2640 }
2641 }
2642
2643 return handled;
2644}
2645
2646static inline void gk20a_fifo_reset_pbdma_header(struct gk20a *g, int pbdma_id)
2647{
2648 gk20a_writel(g, pbdma_pb_header_r(pbdma_id),
2649 pbdma_pb_header_first_true_f() |
2650 pbdma_pb_header_type_non_inc_f());
2651}
2652
2653void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
2654 int pbdma_method_index)
2655{
2656 u32 pbdma_method_stride;
2657 u32 pbdma_method_reg;
2658
2659 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2660 pbdma_method0_r(pbdma_id);
2661
2662 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2663 (pbdma_method_index * pbdma_method_stride);
2664
2665 gk20a_writel(g, pbdma_method_reg,
2666 pbdma_method0_valid_true_f() |
2667 pbdma_method0_first_true_f() |
2668 pbdma_method0_addr_f(
2669 pbdma_udma_nop_r() >> 2));
2670}
2671
2672static bool gk20a_fifo_is_sw_method_subch(struct gk20a *g, int pbdma_id,
2673 int pbdma_method_index)
2674{
2675 u32 pbdma_method_stride;
2676 u32 pbdma_method_reg, pbdma_method_subch;
2677
2678 pbdma_method_stride = pbdma_method1_r(pbdma_id) -
2679 pbdma_method0_r(pbdma_id);
2680
2681 pbdma_method_reg = pbdma_method0_r(pbdma_id) +
2682 (pbdma_method_index * pbdma_method_stride);
2683
2684 pbdma_method_subch = pbdma_method0_subch_v(
2685 gk20a_readl(g, pbdma_method_reg));
2686
2687 if (pbdma_method_subch == 5 ||
2688 pbdma_method_subch == 6 ||
2689 pbdma_method_subch == 7) {
2690 return true;
2691 }
2692
2693 return false;
2694}
2695
2696unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
2697 u32 pbdma_intr_0, u32 *handled, u32 *error_notifier)
2698{
2699 struct fifo_gk20a *f = &g->fifo;
2700 unsigned int rc_type = RC_TYPE_NO_RC;
2701 int i;
2702 unsigned long pbdma_intr_err;
2703 u32 bit;
2704
2705 if ((f->intr.pbdma.device_fatal_0 |
2706 f->intr.pbdma.channel_fatal_0 |
2707 f->intr.pbdma.restartable_0) & pbdma_intr_0) {
2708
2709 pbdma_intr_err = (unsigned long)pbdma_intr_0;
2710 for_each_set_bit(bit, &pbdma_intr_err, 32) {
2711 nvgpu_err(g, "PBDMA intr %s Error",
2712 pbdma_intr_fault_type_desc[bit]);
2713 }
2714
2715 nvgpu_err(g,
2716 "pbdma_intr_0(%d):0x%08x PBH: %08x "
2717 "SHADOW: %08x gp shadow0: %08x gp shadow1: %08x"
2718 "M0: %08x %08x %08x %08x ",
2719 pbdma_id, pbdma_intr_0,
2720 gk20a_readl(g, pbdma_pb_header_r(pbdma_id)),
2721 gk20a_readl(g, pbdma_hdr_shadow_r(pbdma_id)),
2722 gk20a_readl(g, pbdma_gp_shadow_0_r(pbdma_id)),
2723 gk20a_readl(g, pbdma_gp_shadow_1_r(pbdma_id)),
2724 gk20a_readl(g, pbdma_method0_r(pbdma_id)),
2725 gk20a_readl(g, pbdma_method1_r(pbdma_id)),
2726 gk20a_readl(g, pbdma_method2_r(pbdma_id)),
2727 gk20a_readl(g, pbdma_method3_r(pbdma_id))
2728 );
2729
2730 rc_type = RC_TYPE_PBDMA_FAULT;
2731 *handled |= ((f->intr.pbdma.device_fatal_0 |
2732 f->intr.pbdma.channel_fatal_0 |
2733 f->intr.pbdma.restartable_0) &
2734 pbdma_intr_0);
2735 }
2736
2737 if (pbdma_intr_0 & pbdma_intr_0_acquire_pending_f()) {
2738 u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id));
2739
2740 val &= ~pbdma_acquire_timeout_en_enable_f();
2741 gk20a_writel(g, pbdma_acquire_r(pbdma_id), val);
2742 if (nvgpu_is_timeouts_enabled(g)) {
2743 rc_type = RC_TYPE_PBDMA_FAULT;
2744 nvgpu_err(g,
2745 "semaphore acquire timeout!");
2746 *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
2747 }
2748 *handled |= pbdma_intr_0_acquire_pending_f();
2749 }
2750
2751 if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) {
2752 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2753 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2754 rc_type = RC_TYPE_PBDMA_FAULT;
2755 }
2756
2757 if (pbdma_intr_0 & pbdma_intr_0_method_pending_f()) {
2758 gk20a_fifo_reset_pbdma_method(g, pbdma_id, 0);
2759 rc_type = RC_TYPE_PBDMA_FAULT;
2760 }
2761
2762 if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
2763 *error_notifier =
2764 NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
2765 rc_type = RC_TYPE_PBDMA_FAULT;
2766 }
2767
2768 if (pbdma_intr_0 & pbdma_intr_0_device_pending_f()) {
2769 gk20a_fifo_reset_pbdma_header(g, pbdma_id);
2770
2771 for (i = 0; i < 4; i++) {
2772 if (gk20a_fifo_is_sw_method_subch(g,
2773 pbdma_id, i)) {
2774 gk20a_fifo_reset_pbdma_method(g,
2775 pbdma_id, i);
2776 }
2777 }
2778 rc_type = RC_TYPE_PBDMA_FAULT;
2779 }
2780
2781 return rc_type;
2782}
2783
2784unsigned int gk20a_fifo_handle_pbdma_intr_1(struct gk20a *g,
2785 u32 pbdma_id, u32 pbdma_intr_1,
2786 u32 *handled, u32 *error_notifier)
2787{
2788 unsigned int rc_type = RC_TYPE_PBDMA_FAULT;
2789
2790 /*
2791 * all of the interrupts in _intr_1 are "host copy engine"
2792 * related, which is not supported. For now just make them
2793 * channel fatal.
2794 */
2795 nvgpu_err(g, "hce err: pbdma_intr_1(%d):0x%08x",
2796 pbdma_id, pbdma_intr_1);
2797 *handled |= pbdma_intr_1;
2798
2799 return rc_type;
2800}
2801
2802static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2803 struct fifo_gk20a *f, u32 pbdma_id,
2804 u32 error_notifier, u32 status)
2805{
2806 u32 id;
2807
2808 nvgpu_log(g, gpu_dbg_info, "pbdma id %d error notifier %d",
2809 pbdma_id, error_notifier);
2810 /* Remove channel from runlist */
2811 id = fifo_pbdma_status_id_v(status);
2812 if (fifo_pbdma_status_id_type_v(status)
2813 == fifo_pbdma_status_id_type_chid_v()) {
2814 struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
2815
2816 if (ch != NULL) {
2817 g->ops.fifo.set_error_notifier(ch, error_notifier);
2818 gk20a_fifo_recover_ch(g, ch, true, RC_TYPE_PBDMA_FAULT);
2819 gk20a_channel_put(ch);
2820 }
2821 } else if (fifo_pbdma_status_id_type_v(status)
2822 == fifo_pbdma_status_id_type_tsgid_v()) {
2823 struct tsg_gk20a *tsg = &f->tsg[id];
2824 struct channel_gk20a *ch = NULL;
2825
2826 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
2827 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
2828 channel_gk20a, ch_entry) {
2829 if (gk20a_channel_get(ch)) {
2830 g->ops.fifo.set_error_notifier(ch,
2831 error_notifier);
2832 gk20a_channel_put(ch);
2833 }
2834 }
2835 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2836 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PBDMA_FAULT);
2837 }
2838}
2839
2840u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2841 u32 pbdma_id, unsigned int rc)
2842{
2843 u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
2844 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
2845
2846 u32 handled = 0;
2847 u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
2848 unsigned int rc_type = RC_TYPE_NO_RC;
2849 u32 pbdma_status_info = 0;
2850
2851 if (pbdma_intr_0) {
2852 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2853 "pbdma id %d intr_0 0x%08x pending",
2854 pbdma_id, pbdma_intr_0);
2855
2856 if (g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, pbdma_intr_0,
2857 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2858 rc_type = RC_TYPE_PBDMA_FAULT;
2859
2860 pbdma_status_info = gk20a_readl(g,
2861 fifo_pbdma_status_r(pbdma_id));
2862 }
2863 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
2864 }
2865
2866 if (pbdma_intr_1) {
2867 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2868 "pbdma id %d intr_1 0x%08x pending",
2869 pbdma_id, pbdma_intr_1);
2870
2871 if (g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, pbdma_intr_1,
2872 &handled, &error_notifier) != RC_TYPE_NO_RC) {
2873 rc_type = RC_TYPE_PBDMA_FAULT;
2874
2875 pbdma_status_info = gk20a_readl(g,
2876 fifo_pbdma_status_r(pbdma_id));
2877 }
2878 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
2879 }
2880
2881 if (rc == RC_YES && rc_type == RC_TYPE_PBDMA_FAULT) {
2882 gk20a_fifo_pbdma_fault_rc(g, f, pbdma_id, error_notifier,
2883 pbdma_status_info);
2884 }
2885
2886 return handled;
2887}
2888
2889static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
2890{
2891 struct fifo_gk20a *f = &g->fifo;
2892 u32 clear_intr = 0, i;
2893 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
2894 u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());
2895
2896 for (i = 0; i < host_num_pbdma; i++) {
2897 if (fifo_intr_pbdma_id_status_v(pbdma_pending, i)) {
2898 nvgpu_log(g, gpu_dbg_intr, "pbdma id %d intr pending", i);
2899 clear_intr |=
2900 gk20a_fifo_handle_pbdma_intr(g, f, i, RC_YES);
2901 }
2902 }
2903 return fifo_intr_0_pbdma_intr_pending_f();
2904}
2905
2906void gk20a_fifo_isr(struct gk20a *g)
2907{
2908 u32 error_intr_mask;
2909 u32 clear_intr = 0;
2910 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2911
2912 error_intr_mask = g->ops.fifo.intr_0_error_mask(g);
2913
2914 if (g->fifo.sw_ready) {
2915 /* note we're not actually in an "isr", but rather
2916 * in a threaded interrupt context... */
2917 nvgpu_mutex_acquire(&g->fifo.intr.isr.mutex);
2918
2919 nvgpu_log(g, gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
2920
2921 /* handle runlist update */
2922 if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
2923 gk20a_fifo_handle_runlist_event(g);
2924 clear_intr |= fifo_intr_0_runlist_event_pending_f();
2925 }
2926 if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f()) {
2927 clear_intr |= fifo_pbdma_isr(g, fifo_intr);
2928 }
2929
2930 if (g->ops.fifo.handle_ctxsw_timeout) {
2931 g->ops.fifo.handle_ctxsw_timeout(g, fifo_intr);
2932 }
2933
2934 if (unlikely((fifo_intr & error_intr_mask) != 0U)) {
2935 clear_intr |= fifo_error_isr(g, fifo_intr);
2936 }
2937
2938 nvgpu_mutex_release(&g->fifo.intr.isr.mutex);
2939 }
2940 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2941
2942 return;
2943}
2944
2945u32 gk20a_fifo_nonstall_isr(struct gk20a *g)
2946{
2947 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
2948 u32 clear_intr = 0;
2949
2950 nvgpu_log(g, gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);
2951
2952 if (fifo_intr & fifo_intr_0_channel_intr_pending_f()) {
2953 clear_intr = fifo_intr_0_channel_intr_pending_f();
2954 }
2955
2956 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
2957
2958 return GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE;
2959}
2960
2961void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
2962{
2963 if (is_tsg) {
2964 gk20a_writel(g, fifo_preempt_r(),
2965 fifo_preempt_id_f(id) |
2966 fifo_preempt_type_tsg_f());
2967 } else {
2968 gk20a_writel(g, fifo_preempt_r(),
2969 fifo_preempt_chid_f(id) |
2970 fifo_preempt_type_channel_f());
2971 }
2972}
2973
2974static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g)
2975{
2976 /* Use fifo_eng_timeout converted to ms for preempt
2977 * polling. gr_idle_timeout i.e 3000 ms is and not appropriate
2978 * for polling preempt done as context switch timeout gets
2979 * triggered every 100 ms and context switch recovery
2980 * happens every 3000 ms */
2981
2982 return g->fifo_eng_timeout_us / 1000;
2983}
2984
2985int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
2986 unsigned int id_type)
2987{
2988 struct nvgpu_timeout timeout;
2989 u32 delay = GR_IDLE_CHECK_DEFAULT;
2990 int ret = -EBUSY;
2991
2992 nvgpu_timeout_init(g, &timeout, gk20a_fifo_get_preempt_timeout(g),
2993 NVGPU_TIMER_CPU_TIMER);
2994 do {
2995 if (!(gk20a_readl(g, fifo_preempt_r()) &
2996 fifo_preempt_pending_true_f())) {
2997 ret = 0;
2998 break;
2999 }
3000
3001 nvgpu_usleep_range(delay, delay * 2);
3002 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3003 } while (!nvgpu_timeout_expired(&timeout));
3004
3005 if (ret) {
3006 nvgpu_err(g, "preempt timeout: id: %u id_type: %d ",
3007 id, id_type);
3008 }
3009 return ret;
3010}
3011
3012void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3013{
3014 struct channel_gk20a *ch = NULL;
3015
3016 nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
3017
3018 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3019 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3020 channel_gk20a, ch_entry) {
3021 if (!gk20a_channel_get(ch)) {
3022 continue;
3023 }
3024 g->ops.fifo.set_error_notifier(ch,
3025 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3026 gk20a_channel_put(ch);
3027 }
3028 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3029 gk20a_fifo_recover_tsg(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
3030}
3031
3032void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
3033{
3034 nvgpu_err(g, "preempt channel %d timeout", ch->chid);
3035
3036 g->ops.fifo.set_error_notifier(ch,
3037 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
3038 gk20a_fifo_recover_ch(g, ch, true,
3039 RC_TYPE_PREEMPT_TIMEOUT);
3040}
3041
3042int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
3043{
3044 int ret;
3045 unsigned int id_type;
3046
3047 nvgpu_log_fn(g, "id: %d is_tsg: %d", id, is_tsg);
3048
3049 /* issue preempt */
3050 gk20a_fifo_issue_preempt(g, id, is_tsg);
3051
3052 id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
3053
3054 /* wait for preempt */
3055 ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
3056
3057 return ret;
3058}
3059
3060int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
3061{
3062 struct fifo_gk20a *f = &g->fifo;
3063 u32 ret = 0;
3064 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3065 u32 mutex_ret = 0;
3066 u32 i;
3067
3068 nvgpu_log_fn(g, "chid: %d", ch->chid);
3069
3070 /* we have no idea which runlist we are using. lock all */
3071 for (i = 0; i < g->fifo.max_runlists; i++) {
3072 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3073 }
3074
3075 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3076
3077 ret = __locked_fifo_preempt(g, ch->chid, false);
3078
3079 if (!mutex_ret) {
3080 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3081 }
3082
3083 for (i = 0; i < g->fifo.max_runlists; i++) {
3084 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3085 }
3086
3087 if (ret) {
3088 if (nvgpu_platform_is_silicon(g)) {
3089 nvgpu_err(g, "preempt timed out for chid: %u, "
3090 "ctxsw timeout will trigger recovery if needed",
3091 ch->chid);
3092 } else {
3093 gk20a_fifo_preempt_timeout_rc(g, ch);
3094 }
3095 }
3096
3097 return ret;
3098}
3099
3100int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
3101{
3102 struct fifo_gk20a *f = &g->fifo;
3103 u32 ret = 0;
3104 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3105 u32 mutex_ret = 0;
3106 u32 i;
3107
3108 nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
3109
3110 /* we have no idea which runlist we are using. lock all */
3111 for (i = 0; i < g->fifo.max_runlists; i++) {
3112 nvgpu_mutex_acquire(&f->runlist_info[i].runlist_lock);
3113 }
3114
3115 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3116
3117 ret = __locked_fifo_preempt(g, tsg->tsgid, true);
3118
3119 if (!mutex_ret) {
3120 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3121 }
3122
3123 for (i = 0; i < g->fifo.max_runlists; i++) {
3124 nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
3125 }
3126
3127 if (ret) {
3128 if (nvgpu_platform_is_silicon(g)) {
3129 nvgpu_err(g, "preempt timed out for tsgid: %u, "
3130 "ctxsw timeout will trigger recovery if needed",
3131 tsg->tsgid);
3132 } else {
3133 gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
3134 }
3135 }
3136
3137 return ret;
3138}
3139
3140int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch)
3141{
3142 int err;
3143 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
3144
3145 if (tsg != NULL) {
3146 err = g->ops.fifo.preempt_tsg(ch->g, tsg);
3147 } else {
3148 err = g->ops.fifo.preempt_channel(ch->g, ch);
3149 }
3150
3151 return err;
3152}
3153
3154static void gk20a_fifo_sched_disable_rw(struct gk20a *g, u32 runlists_mask,
3155 u32 runlist_state)
3156{
3157 u32 reg_val;
3158
3159 reg_val = gk20a_readl(g, fifo_sched_disable_r());
3160
3161 if (runlist_state == RUNLIST_DISABLED) {
3162 reg_val |= runlists_mask;
3163 } else {
3164 reg_val &= (~runlists_mask);
3165 }
3166
3167 gk20a_writel(g, fifo_sched_disable_r(), reg_val);
3168
3169}
3170
3171void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
3172 u32 runlist_state)
3173{
3174 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3175 u32 mutex_ret;
3176
3177 nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
3178 runlists_mask, runlist_state);
3179
3180 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3181
3182 gk20a_fifo_sched_disable_rw(g, runlists_mask, runlist_state);
3183
3184 if (!mutex_ret) {
3185 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3186 }
3187}
3188
3189void gk20a_fifo_enable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3190{
3191 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3192 tsg->runlist_id), RUNLIST_ENABLED);
3193
3194}
3195
3196void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg)
3197{
3198 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3199 tsg->runlist_id), RUNLIST_DISABLED);
3200}
3201
3202int gk20a_fifo_enable_engine_activity(struct gk20a *g,
3203 struct fifo_engine_info_gk20a *eng_info)
3204{
3205 nvgpu_log(g, gpu_dbg_info, "start");
3206
3207 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3208 eng_info->runlist_id), RUNLIST_ENABLED);
3209 return 0;
3210}
3211
3212int gk20a_fifo_enable_all_engine_activity(struct gk20a *g)
3213{
3214 unsigned int i;
3215 int err = 0, ret = 0;
3216
3217 for (i = 0; i < g->fifo.num_engines; i++) {
3218 u32 active_engine_id = g->fifo.active_engines_list[i];
3219 err = gk20a_fifo_enable_engine_activity(g,
3220 &g->fifo.engine_info[active_engine_id]);
3221 if (err) {
3222 nvgpu_err(g,
3223 "failed to enable engine %d activity", active_engine_id);
3224 ret = err;
3225 }
3226 }
3227
3228 return ret;
3229}
3230
3231int gk20a_fifo_disable_engine_activity(struct gk20a *g,
3232 struct fifo_engine_info_gk20a *eng_info,
3233 bool wait_for_idle)
3234{
3235 u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
3236 u32 pbdma_chid = FIFO_INVAL_CHANNEL_ID;
3237 u32 engine_chid = FIFO_INVAL_CHANNEL_ID;
3238 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3239 int mutex_ret;
3240 struct channel_gk20a *ch = NULL;
3241 int err = 0;
3242
3243 nvgpu_log_fn(g, " ");
3244
3245 gr_stat =
3246 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3247 if (fifo_engine_status_engine_v(gr_stat) ==
3248 fifo_engine_status_engine_busy_v() && !wait_for_idle) {
3249 return -EBUSY;
3250 }
3251
3252 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3253
3254 gk20a_fifo_set_runlist_state(g, fifo_sched_disable_runlist_m(
3255 eng_info->runlist_id), RUNLIST_DISABLED);
3256
3257 /* chid from pbdma status */
3258 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
3259 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
3260 if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
3261 chan_stat == fifo_pbdma_status_chan_status_chsw_save_v()) {
3262 pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
3263 } else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
3264 chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v()) {
3265 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
3266 }
3267
3268 if (pbdma_chid != FIFO_INVAL_CHANNEL_ID) {
3269 ch = gk20a_channel_from_id(g, pbdma_chid);
3270 if (ch != NULL) {
3271 err = g->ops.fifo.preempt_channel(g, ch);
3272 gk20a_channel_put(ch);
3273 }
3274 if (err != 0) {
3275 goto clean_up;
3276 }
3277 }
3278
3279 /* chid from engine status */
3280 eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
3281 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
3282 if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
3283 ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v()) {
3284 engine_chid = fifo_engine_status_id_v(eng_stat);
3285 } else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
3286 ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3287 engine_chid = fifo_engine_status_next_id_v(eng_stat);
3288 }
3289
3290 if (engine_chid != FIFO_INVAL_ENGINE_ID && engine_chid != pbdma_chid) {
3291 ch = gk20a_channel_from_id(g, engine_chid);
3292 if (ch != NULL) {
3293 err = g->ops.fifo.preempt_channel(g, ch);
3294 gk20a_channel_put(ch);
3295 }
3296 if (err != 0) {
3297 goto clean_up;
3298 }
3299 }
3300
3301clean_up:
3302 if (!mutex_ret) {
3303 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3304 }
3305
3306 if (err) {
3307 nvgpu_log_fn(g, "failed");
3308 if (gk20a_fifo_enable_engine_activity(g, eng_info)) {
3309 nvgpu_err(g,
3310 "failed to enable gr engine activity");
3311 }
3312 } else {
3313 nvgpu_log_fn(g, "done");
3314 }
3315 return err;
3316}
3317
3318int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
3319 bool wait_for_idle)
3320{
3321 unsigned int i;
3322 int err = 0, ret = 0;
3323 u32 active_engine_id;
3324
3325 for (i = 0; i < g->fifo.num_engines; i++) {
3326 active_engine_id = g->fifo.active_engines_list[i];
3327 err = gk20a_fifo_disable_engine_activity(g,
3328 &g->fifo.engine_info[active_engine_id],
3329 wait_for_idle);
3330 if (err) {
3331 nvgpu_err(g, "failed to disable engine %d activity",
3332 active_engine_id);
3333 ret = err;
3334 break;
3335 }
3336 }
3337
3338 if (err) {
3339 while (i-- != 0) {
3340 active_engine_id = g->fifo.active_engines_list[i];
3341 err = gk20a_fifo_enable_engine_activity(g,
3342 &g->fifo.engine_info[active_engine_id]);
3343 if (err) {
3344 nvgpu_err(g,
3345 "failed to re-enable engine %d activity",
3346 active_engine_id);
3347 }
3348 }
3349 }
3350
3351 return ret;
3352}
3353
3354static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
3355{
3356 struct fifo_gk20a *f = &g->fifo;
3357 u32 engines = 0;
3358 unsigned int i;
3359
3360 for (i = 0; i < f->num_engines; i++) {
3361 u32 active_engine_id = g->fifo.active_engines_list[i];
3362 u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
3363 bool engine_busy = fifo_engine_status_engine_v(status) ==
3364 fifo_engine_status_engine_busy_v();
3365
3366 if (engine_busy &&
3367 (f->engine_info[active_engine_id].runlist_id == runlist_id)) {
3368 engines |= BIT(active_engine_id);
3369 }
3370 }
3371
3372 if (engines) {
3373 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true,
3374 RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
3375 }
3376}
3377
3378int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
3379{
3380 struct nvgpu_timeout timeout;
3381 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3382 int ret = -ETIMEDOUT;
3383
3384 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3385 NVGPU_TIMER_CPU_TIMER);
3386
3387 do {
3388 if ((gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
3389 fifo_eng_runlist_pending_true_f()) == 0) {
3390 ret = 0;
3391 break;
3392 }
3393
3394 nvgpu_usleep_range(delay, delay * 2);
3395 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3396 } while (!nvgpu_timeout_expired(&timeout));
3397
3398 if (ret) {
3399 nvgpu_err(g, "runlist wait timeout: runlist id: %u",
3400 runlist_id);
3401 }
3402
3403 return ret;
3404}
3405
3406void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist)
3407{
3408
3409 u32 runlist_entry_0 = ram_rl_entry_id_f(tsg->tsgid) |
3410 ram_rl_entry_type_tsg_f() |
3411 ram_rl_entry_tsg_length_f(tsg->num_active_channels);
3412
3413 if (tsg->timeslice_timeout) {
3414 runlist_entry_0 |=
3415 ram_rl_entry_timeslice_scale_f(tsg->timeslice_scale) |
3416 ram_rl_entry_timeslice_timeout_f(tsg->timeslice_timeout);
3417 } else {
3418 runlist_entry_0 |=
3419 ram_rl_entry_timeslice_scale_f(
3420 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) |
3421 ram_rl_entry_timeslice_timeout_f(
3422 NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT);
3423 }
3424
3425 runlist[0] = runlist_entry_0;
3426 runlist[1] = 0;
3427
3428}
3429
3430u32 gk20a_fifo_default_timeslice_us(struct gk20a *g)
3431{
3432 return (((u64)(NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT <<
3433 NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) *
3434 (u64)g->ptimer_src_freq) /
3435 (u64)PTIMER_REF_FREQ_HZ);
3436}
3437
3438void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist)
3439{
3440 runlist[0] = ram_rl_entry_chid_f(ch->chid);
3441 runlist[1] = 0;
3442}
3443
3444/* recursively construct a runlist with interleaved bare channels and TSGs */
3445u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
3446 struct fifo_runlist_info_gk20a *runlist,
3447 u32 cur_level,
3448 u32 *runlist_entry,
3449 bool interleave_enabled,
3450 bool prev_empty,
3451 u32 *entries_left)
3452{
3453 bool last_level = cur_level == NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
3454 struct channel_gk20a *ch;
3455 bool skip_next = false;
3456 u32 tsgid, count = 0;
3457 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3458 struct gk20a *g = f->g;
3459
3460 nvgpu_log_fn(g, " ");
3461
3462 /* for each TSG, T, on this level, insert all higher-level channels
3463 and TSGs before inserting T. */
3464 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
3465 struct tsg_gk20a *tsg = &f->tsg[tsgid];
3466
3467 if (tsg->interleave_level != cur_level) {
3468 continue;
3469 }
3470
3471 if (!last_level && !skip_next) {
3472 runlist_entry = gk20a_runlist_construct_locked(f,
3473 runlist,
3474 cur_level + 1,
3475 runlist_entry,
3476 interleave_enabled,
3477 false,
3478 entries_left);
3479 if (!interleave_enabled) {
3480 skip_next = true;
3481 }
3482 }
3483
3484 if (*entries_left == 0U) {
3485 return NULL;
3486 }
3487
3488 /* add TSG entry */
3489 nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
3490 f->g->ops.fifo.get_tsg_runlist_entry(tsg, runlist_entry);
3491 nvgpu_log_info(g, "tsg runlist count %d runlist [0] %x [1] %x\n",
3492 count, runlist_entry[0], runlist_entry[1]);
3493 runlist_entry += runlist_entry_words;
3494 count++;
3495 (*entries_left)--;
3496
3497 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
3498 /* add runnable channels bound to this TSG */
3499 nvgpu_list_for_each_entry(ch, &tsg->ch_list,
3500 channel_gk20a, ch_entry) {
3501 if (!test_bit((int)ch->chid,
3502 runlist->active_channels)) {
3503 continue;
3504 }
3505
3506 if (*entries_left == 0U) {
3507 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3508 return NULL;
3509 }
3510
3511 nvgpu_log_info(g, "add channel %d to runlist",
3512 ch->chid);
3513 f->g->ops.fifo.get_ch_runlist_entry(ch, runlist_entry);
3514 nvgpu_log_info(g,
3515 "run list count %d runlist [0] %x [1] %x\n",
3516 count, runlist_entry[0], runlist_entry[1]);
3517 count++;
3518 runlist_entry += runlist_entry_words;
3519 (*entries_left)--;
3520 }
3521 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
3522 }
3523
3524 /* append entries from higher level if this level is empty */
3525 if (!count && !last_level) {
3526 runlist_entry = gk20a_runlist_construct_locked(f,
3527 runlist,
3528 cur_level + 1,
3529 runlist_entry,
3530 interleave_enabled,
3531 true,
3532 entries_left);
3533 }
3534
3535 /*
3536 * if previous and this level have entries, append
3537 * entries from higher level.
3538 *
3539 * ex. dropping from MEDIUM to LOW, need to insert HIGH
3540 */
3541 if (interleave_enabled && count && !prev_empty && !last_level) {
3542 runlist_entry = gk20a_runlist_construct_locked(f,
3543 runlist,
3544 cur_level + 1,
3545 runlist_entry,
3546 interleave_enabled,
3547 false,
3548 entries_left);
3549 }
3550 return runlist_entry;
3551}
3552
3553int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
3554 u32 id,
3555 u32 runlist_id,
3556 u32 new_level)
3557{
3558 nvgpu_log_fn(g, " ");
3559
3560 g->fifo.tsg[id].interleave_level = new_level;
3561
3562 return 0;
3563}
3564
3565int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
3566{
3567 struct gk20a *g = tsg->g;
3568
3569 if (timeslice < g->min_timeslice_us ||
3570 timeslice > g->max_timeslice_us) {
3571 return -EINVAL;
3572 }
3573
3574 gk20a_channel_get_timescale_from_timeslice(g, timeslice,
3575 &tsg->timeslice_timeout, &tsg->timeslice_scale);
3576
3577 tsg->timeslice_us = timeslice;
3578
3579 return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
3580}
3581
3582void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
3583 u32 count, u32 buffer_index)
3584{
3585 struct fifo_runlist_info_gk20a *runlist = NULL;
3586 u64 runlist_iova;
3587 u32 val_wrote;
3588 struct nvgpu_os_linux *l;
3589
3590 runlist = &g->fifo.runlist_info[runlist_id];
3591 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]);
3592
3593
3594 if (count != 0) {
3595 printk(KERN_INFO "Runlist base register: %0x\n", fifo_runlist_base_r());
3596 printk(KERN_INFO "Runlist KVA: %px\n", (void*)(runlist->mem[buffer_index].cpu_va));
3597 printk(KERN_INFO "Runlist PA: %px\n", (void*)virt_to_phys((runlist->mem[buffer_index].cpu_va)));
3598 printk(KERN_INFO "Runlist dma_address: %px\n", (void*)(runlist->mem[buffer_index].priv.sgt->sgl->dma_address));
3599 printk(KERN_INFO "Runlist pages KVA: %px\n", (void*)(runlist->mem[buffer_index].priv.pages));
3600 printk(KERN_INFO "Runlist pages PA: %px\n", (void*)virt_to_phys(runlist->mem[buffer_index].priv.pages));
3601 printk(KERN_INFO "Runlist dma_address: %px\n", (void*)(runlist->mem[buffer_index].priv.sgt->sgl->dma_address));
3602 printk(KERN_INFO "Runlist page_to_phys %px + offset %px\n", (void*)(page_to_phys(sg_page(runlist->mem[buffer_index].priv.sgt->sgl))), (void*)(runlist->mem[buffer_index].priv.sgt->sgl->offset));
3603 printk(KERN_INFO "Runlist IOVA: %px\n", (void*)runlist_iova);
3604 printk(KERN_INFO "Using struct gk20* %px\n", g);
3605 printk(KERN_INFO "g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n", g->name, g->power_on, g->sw_ready, g->is_virtual);
3606 printk(KERN_INFO "COHERENT_SYSMEM? %d, iommuable? %d\n", nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM), nvgpu_iommuable(g));
3607 l = container_of(g, struct nvgpu_os_linux, g);
3608 printk(KERN_INFO "l->regs %px\n", l->regs);
3609 gk20a_writel(g, fifo_runlist_base_r(),
3610 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3611 nvgpu_aperture_mask(g, &runlist->mem[buffer_index],
3612 fifo_runlist_base_target_sys_mem_ncoh_f(),
3613 fifo_runlist_base_target_sys_mem_coh_f(),
3614 fifo_runlist_base_target_vid_mem_f()));
3615 val_wrote = nvgpu_readl(g, 0x2270);
3616 printk(KERN_INFO "Wrote runlist base as %0llx\n", (u64)(val_wrote & 0x0fffffff) << 12);
3617 }
3618
3619 gk20a_writel(g, fifo_runlist_r(),
3620 fifo_runlist_engine_f(runlist_id) |
3621 fifo_eng_runlist_length_f(count));
3622}
3623
3624int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3625 u32 chid, bool add,
3626 bool wait_for_finish)
3627{
3628 int ret = 0;
3629 struct fifo_gk20a *f = &g->fifo;
3630 struct fifo_runlist_info_gk20a *runlist = NULL;
3631 u32 *runlist_entry_base = NULL;
3632 u64 runlist_iova;
3633 u32 new_buf;
3634 struct channel_gk20a *ch = NULL;
3635 struct tsg_gk20a *tsg = NULL;
3636 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3637
3638 runlist = &f->runlist_info[runlist_id];
3639
3640 /* valid channel, add/remove it from active list.
3641 Otherwise, keep active list untouched for suspend/resume. */
3642 if (chid != FIFO_INVAL_CHANNEL_ID) {
3643 ch = &f->channel[chid];
3644 tsg = tsg_gk20a_from_ch(ch);
3645
3646 if (add) {
3647 if (test_and_set_bit(chid,
3648 runlist->active_channels) == 1) {
3649 return 0;
3650 }
3651 if (tsg && ++tsg->num_active_channels) {
3652 set_bit((int)f->channel[chid].tsgid,
3653 runlist->active_tsgs);
3654 }
3655 } else {
3656 if (test_and_clear_bit(chid,
3657 runlist->active_channels) == 0) {
3658 return 0;
3659 }
3660 if (tsg && --tsg->num_active_channels == 0) {
3661 clear_bit((int)f->channel[chid].tsgid,
3662 runlist->active_tsgs);
3663 }
3664 }
3665 }
3666
3667 new_buf = !runlist->cur_buffer;
3668
3669 runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[new_buf]);
3670
3671 nvgpu_log_info(g, "runlist_id : %d, switch to new buffer 0x%16llx",
3672 runlist_id, (u64)runlist_iova);
3673
3674 if (!runlist_iova) {
3675 ret = -EINVAL;
3676 goto clean_up;
3677 }
3678
3679 runlist_entry_base = runlist->mem[new_buf].cpu_va;
3680 if (!runlist_entry_base) {
3681 ret = -ENOMEM;
3682 goto clean_up;
3683 }
3684
3685 if (chid != FIFO_INVAL_CHANNEL_ID || /* add/remove a valid channel */
3686 add /* resume to add all channels back */) {
3687 u32 max_entries = f->num_runlist_entries;
3688 u32 *runlist_end;
3689
3690 runlist_end = gk20a_runlist_construct_locked(f,
3691 runlist,
3692 0,
3693 runlist_entry_base,
3694 g->runlist_interleave,
3695 true,
3696 &max_entries);
3697 if (!runlist_end) {
3698 ret = -E2BIG;
3699 goto clean_up;
3700 }
3701 runlist->count = (runlist_end - runlist_entry_base) /
3702 runlist_entry_words;
3703 WARN_ON(runlist->count > f->num_runlist_entries);
3704 } else {
3705 /* suspend to remove all channels */
3706 runlist->count = 0;
3707 }
3708
3709 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3710
3711 if (wait_for_finish) {
3712 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
3713
3714 if (ret == -ETIMEDOUT) {
3715 nvgpu_err(g, "runlist %d update timeout", runlist_id);
3716 /* trigger runlist update timeout recovery */
3717 return ret;
3718
3719 } else if (ret == -EINTR) {
3720 nvgpu_err(g, "runlist update interrupted");
3721 }
3722 }
3723
3724 runlist->cur_buffer = new_buf;
3725
3726clean_up:
3727 return ret;
3728}
3729
3730int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 chid,
3731 bool add, bool wait_for_finish)
3732{
3733 u32 ret = -EINVAL;
3734 u32 runlist_id = 0;
3735 u32 errcode;
3736 unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
3737
3738 if (!g) {
3739 goto end;
3740 }
3741
3742 ret = 0;
3743 for_each_set_bit(runlist_id, &ulong_runlist_ids, 32) {
3744 /* Capture the last failure error code */
3745 errcode = g->ops.fifo.update_runlist(g, runlist_id, chid, add, wait_for_finish);
3746 if (errcode) {
3747 nvgpu_err(g,
3748 "failed to update_runlist %d %d", runlist_id, errcode);
3749 ret = errcode;
3750 }
3751 }
3752end:
3753 return ret;
3754}
3755
3756/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3757static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3758 bool wait_preempt)
3759{
3760 struct gk20a *g = ch->g;
3761 struct fifo_runlist_info_gk20a *runlist =
3762 &g->fifo.runlist_info[ch->runlist_id];
3763 int ret = 0;
3764 u32 gr_eng_id = 0;
3765 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3766 u32 preempt_id;
3767 u32 preempt_type = 0;
3768
3769 if (1 != gk20a_fifo_get_engine_ids(
3770 g, &gr_eng_id, 1, ENGINE_GR_GK20A)) {
3771 return ret;
3772 }
3773 if (!(runlist->eng_bitmask & (1 << gr_eng_id))) {
3774 return ret;
3775 }
3776
3777 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3778 fifo_preempt_pending_true_f()) {
3779 return ret;
3780 }
3781
3782 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3783 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3784 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3785 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3786 /* host switching to next context, preempt that if needed */
3787 preempt_id = fifo_engine_status_next_id_v(engstat);
3788 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3789 } else {
3790 return ret;
3791 }
3792 if (preempt_id == ch->tsgid && preempt_type) {
3793 return ret;
3794 }
3795 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3796 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3797 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3798 /* preempt useless if FECS acked save and started restore */
3799 return ret;
3800 }
3801
3802 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3803#ifdef TRACEPOINTS_ENABLED
3804 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3805 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3806 gk20a_readl(g, fifo_preempt_r()));
3807#endif
3808 if (wait_preempt) {
3809 g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type);
3810 }
3811#ifdef TRACEPOINTS_ENABLED
3812 trace_gk20a_reschedule_preempted_next(ch->chid);
3813#endif
3814 return ret;
3815}
3816
3817int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3818{
3819 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3820}
3821
3822/* trigger host to expire current timeslice and reschedule runlist from front */
3823int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3824 bool wait_preempt)
3825{
3826 struct gk20a *g = ch->g;
3827 struct fifo_runlist_info_gk20a *runlist;
3828 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3829 u32 mutex_ret;
3830 int ret = 0;
3831
3832 runlist = &g->fifo.runlist_info[ch->runlist_id];
3833 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {
3834 return -EBUSY;
3835 }
3836
3837 mutex_ret = nvgpu_pmu_mutex_acquire(
3838 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3839
3840 g->ops.fifo.runlist_hw_submit(
3841 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3842
3843 if (preempt_next) {
3844 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3845 }
3846
3847 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3848
3849 if (!mutex_ret) {
3850 nvgpu_pmu_mutex_release(
3851 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3852 }
3853 nvgpu_mutex_release(&runlist->runlist_lock);
3854
3855 return ret;
3856}
3857
3858/* add/remove a channel from runlist
3859 special cases below: runlist->active_channels will NOT be changed.
3860 (chid == ~0 && !add) means remove all active channels from runlist.
3861 (chid == ~0 && add) means restore all active channels on runlist. */
3862int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 chid,
3863 bool add, bool wait_for_finish)
3864{
3865 struct fifo_runlist_info_gk20a *runlist = NULL;
3866 struct fifo_gk20a *f = &g->fifo;
3867 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3868 u32 mutex_ret;
3869 int ret = 0;
3870
3871 nvgpu_log_fn(g, " ");
3872
3873 runlist = &f->runlist_info[runlist_id];
3874
3875 nvgpu_mutex_acquire(&runlist->runlist_lock);
3876
3877 mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3878
3879 ret = gk20a_fifo_update_runlist_locked(g, runlist_id, chid, add,
3880 wait_for_finish);
3881
3882 if (!mutex_ret) {
3883 nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3884 }
3885
3886 nvgpu_mutex_release(&runlist->runlist_lock);
3887
3888 if (ret == -ETIMEDOUT) {
3889 gk20a_fifo_runlist_reset_engines(g, runlist_id);
3890 }
3891
3892 return ret;
3893}
3894
3895int gk20a_fifo_suspend(struct gk20a *g)
3896{
3897 nvgpu_log_fn(g, " ");
3898
3899 /* stop bar1 snooping */
3900 if (g->ops.mm.is_bar1_supported(g)) {
3901 gk20a_writel(g, fifo_bar1_base_r(),
3902 fifo_bar1_base_valid_false_f());
3903 }
3904
3905 /* disable fifo intr */
3906 gk20a_writel(g, fifo_intr_en_0_r(), 0);
3907 gk20a_writel(g, fifo_intr_en_1_r(), 0);
3908
3909 nvgpu_log_fn(g, "done");
3910 return 0;
3911}
3912
3913bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
3914{
3915 if (gk20a_readl(g, fifo_intr_0_r()) &
3916 fifo_intr_0_mmu_fault_pending_f()) {
3917 return true;
3918 } else {
3919 return false;
3920 }
3921}
3922
3923bool gk20a_fifo_is_engine_busy(struct gk20a *g)
3924{
3925 u32 i, host_num_engines;
3926
3927 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3928
3929 for (i = 0; i < host_num_engines; i++) {
3930 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3931 if (fifo_engine_status_engine_v(status) ==
3932 fifo_engine_status_engine_busy_v()) {
3933 return true;
3934 }
3935 }
3936 return false;
3937}
3938
3939int gk20a_fifo_wait_engine_idle(struct gk20a *g)
3940{
3941 struct nvgpu_timeout timeout;
3942 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
3943 int ret = -ETIMEDOUT;
3944 u32 i, host_num_engines;
3945
3946 nvgpu_log_fn(g, " ");
3947
3948 host_num_engines =
3949 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
3950
3951 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3952 NVGPU_TIMER_CPU_TIMER);
3953
3954 for (i = 0; i < host_num_engines; i++) {
3955 do {
3956 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
3957 if (!fifo_engine_status_engine_v(status)) {
3958 ret = 0;
3959 break;
3960 }
3961
3962 nvgpu_usleep_range(delay, delay * 2);
3963 delay = min_t(unsigned long,
3964 delay << 1, GR_IDLE_CHECK_MAX);
3965 } while (!nvgpu_timeout_expired(&timeout));
3966
3967 if (ret) {
3968 nvgpu_log_info(g, "cannot idle engine %u", i);
3969 break;
3970 }
3971 }
3972
3973 nvgpu_log_fn(g, "done");
3974
3975 return ret;
3976}
3977
3978u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
3979{
3980 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
3981}
3982
3983static const char * const ccsr_chan_status_str[] = {
3984 "idle",
3985 "pending",
3986 "pending_ctx_reload",
3987 "pending_acquire",
3988 "pending_acq_ctx_reload",
3989 "on_pbdma",
3990 "on_pbdma_and_eng",
3991 "on_eng",
3992 "on_eng_pending_acquire",
3993 "on_eng_pending",
3994 "on_pbdma_ctx_reload",
3995 "on_pbdma_and_eng_ctx_reload",
3996 "on_eng_ctx_reload",
3997 "on_eng_pending_ctx_reload",
3998 "on_eng_pending_acq_ctx_reload",
3999};
4000
4001static const char * const pbdma_chan_eng_ctx_status_str[] = {
4002 "invalid",
4003 "valid",
4004 "NA",
4005 "NA",
4006 "NA",
4007 "load",
4008 "save",
4009 "switch",
4010};
4011
4012static const char * const not_found_str[] = {
4013 "NOT FOUND"
4014};
4015
4016const char *gk20a_decode_ccsr_chan_status(u32 index)
4017{
4018 if (index >= ARRAY_SIZE(ccsr_chan_status_str)) {
4019 return not_found_str[0];
4020 } else {
4021 return ccsr_chan_status_str[index];
4022 }
4023}
4024
4025const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index)
4026{
4027 if (index >= ARRAY_SIZE(pbdma_chan_eng_ctx_status_str)) {
4028 return not_found_str[0];
4029 } else {
4030 return pbdma_chan_eng_ctx_status_str[index];
4031 }
4032}
4033
4034bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid)
4035{
4036 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4037
4038 return ccsr_channel_next_v(channel) == ccsr_channel_next_true_v();
4039}
4040
4041bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid)
4042{
4043 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4044 u32 status = ccsr_channel_status_v(channel);
4045
4046 return (status == ccsr_channel_status_pending_ctx_reload_v() ||
4047 status == ccsr_channel_status_pending_acq_ctx_reload_v() ||
4048 status == ccsr_channel_status_on_pbdma_ctx_reload_v() ||
4049 status == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() ||
4050 status == ccsr_channel_status_on_eng_ctx_reload_v() ||
4051 status == ccsr_channel_status_on_eng_pending_ctx_reload_v() ||
4052 status == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v());
4053}
4054
4055void gk20a_dump_channel_status_ramfc(struct gk20a *g,
4056 struct gk20a_debug_output *o,
4057 u32 chid,
4058 struct ch_state *ch_state)
4059{
4060 u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
4061 u32 status = ccsr_channel_status_v(channel);
4062 u32 syncpointa, syncpointb;
4063 u32 *inst_mem;
4064 struct channel_gk20a *c = g->fifo.channel + chid;
4065 struct nvgpu_semaphore_int *hw_sema = NULL;
4066
4067 if (c->hw_sema) {
4068 hw_sema = c->hw_sema;
4069 }
4070
4071 if (!ch_state) {
4072 return;
4073 }
4074
4075 inst_mem = &ch_state->inst_block[0];
4076
4077 syncpointa = inst_mem[ram_fc_syncpointa_w()];
4078 syncpointb = inst_mem[ram_fc_syncpointb_w()];
4079
4080 gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", chid,
4081 g->name,
4082 ch_state->pid,
4083 ch_state->refs,
4084 ch_state->deterministic ? ", deterministic" : "");
4085 gk20a_debug_output(o, "channel status: %s in use %s %s\n",
4086 ccsr_channel_enable_v(channel) ? "" : "not",
4087 gk20a_decode_ccsr_chan_status(status),
4088 ccsr_channel_busy_v(channel) ? "busy" : "not busy");
4089 gk20a_debug_output(o, "RAMFC : TOP: %016llx PUT: %016llx GET: %016llx "
4090 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
4091 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
4092 (u64)inst_mem[ram_fc_pb_top_level_get_w()] +
4093 ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
4094 (u64)inst_mem[ram_fc_pb_put_w()] +
4095 ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
4096 (u64)inst_mem[ram_fc_pb_get_w()] +
4097 ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
4098 (u64)inst_mem[ram_fc_pb_fetch_w()] +
4099 ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
4100 inst_mem[ram_fc_pb_header_w()],
4101 inst_mem[ram_fc_pb_count_w()],
4102 syncpointa,
4103 syncpointb,
4104 inst_mem[ram_fc_semaphorea_w()],
4105 inst_mem[ram_fc_semaphoreb_w()],
4106 inst_mem[ram_fc_semaphorec_w()],
4107 inst_mem[ram_fc_semaphored_w()]);
4108 if (hw_sema) {
4109 gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
4110 "next_val: 0x%08x addr: 0x%010llx\n",
4111 __nvgpu_semaphore_read(hw_sema),
4112 nvgpu_atomic_read(&hw_sema->next_value),
4113 nvgpu_hw_sema_addr(hw_sema));
4114 }
4115
4116#ifdef CONFIG_TEGRA_GK20A_NVHOST
4117 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
4118 && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
4119 pbdma_syncpointb_wait_switch_en_v()))
4120 gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n",
4121 (status == 3 || status == 8) ? "Waiting" : "Waited",
4122 pbdma_syncpointb_syncpt_index_v(syncpointb),
4123 nvgpu_nvhost_syncpt_get_name(g->nvhost_dev,
4124 pbdma_syncpointb_syncpt_index_v(syncpointb)),
4125 pbdma_syncpointa_payload_v(syncpointa));
4126#endif
4127
4128 gk20a_debug_output(o, "\n");
4129}
4130
4131void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
4132 struct gk20a_debug_output *o)
4133{
4134 struct fifo_gk20a *f = &g->fifo;
4135 u32 chid;
4136 struct ch_state **ch_state;
4137
4138 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
4139 if (!ch_state) {
4140 gk20a_debug_output(o, "cannot alloc memory for channels\n");
4141 return;
4142 }
4143
4144 for (chid = 0; chid < f->num_channels; chid++) {
4145 struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
4146 if (ch != NULL) {
4147 ch_state[chid] =
4148 nvgpu_kmalloc(g, sizeof(struct ch_state) +
4149 ram_in_alloc_size_v());
4150 /* ref taken stays to below loop with
4151 * successful allocs */
4152 if (!ch_state[chid]) {
4153 gk20a_channel_put(ch);
4154 }
4155 }
4156 }
4157
4158 for (chid = 0; chid < f->num_channels; chid++) {
4159 struct channel_gk20a *ch = &f->channel[chid];
4160 if (!ch_state[chid]) {
4161 continue;
4162 }
4163
4164 ch_state[chid]->pid = ch->pid;
4165 ch_state[chid]->refs = nvgpu_atomic_read(&ch->ref_count);
4166 ch_state[chid]->deterministic = ch->deterministic;
4167 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
4168 &ch_state[chid]->inst_block[0],
4169 ram_in_alloc_size_v());
4170 gk20a_channel_put(ch);
4171 }
4172 for (chid = 0; chid < f->num_channels; chid++) {
4173 if (ch_state[chid]) {
4174 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
4175 ch_state[chid]);
4176 nvgpu_kfree(g, ch_state[chid]);
4177 }
4178 }
4179 nvgpu_kfree(g, ch_state);
4180}
4181
4182void gk20a_dump_pbdma_status(struct gk20a *g,
4183 struct gk20a_debug_output *o)
4184{
4185 u32 i, host_num_pbdma;
4186
4187 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
4188
4189 for (i = 0; i < host_num_pbdma; i++) {
4190 u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
4191 u32 chan_status = fifo_pbdma_status_chan_status_v(status);
4192
4193 gk20a_debug_output(o, "%s pbdma %d: ", g->name, i);
4194 gk20a_debug_output(o,
4195 "id: %d (%s), next_id: %d (%s) chan status: %s\n",
4196 fifo_pbdma_status_id_v(status),
4197 fifo_pbdma_status_id_type_v(status) ?
4198 "tsg" : "channel",
4199 fifo_pbdma_status_next_id_v(status),
4200 fifo_pbdma_status_next_id_type_v(status) ?
4201 "tsg" : "channel",
4202 gk20a_decode_pbdma_chan_eng_ctx_status(chan_status));
4203 gk20a_debug_output(o, "PBDMA_PUT: %016llx PBDMA_GET: %016llx "
4204 "GP_PUT: %08x GP_GET: %08x "
4205 "FETCH: %08x HEADER: %08x\n"
4206 "HDR: %08x SHADOW0: %08x SHADOW1: %08x",
4207 (u64)gk20a_readl(g, pbdma_put_r(i)) +
4208 ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
4209 (u64)gk20a_readl(g, pbdma_get_r(i)) +
4210 ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
4211 gk20a_readl(g, pbdma_gp_put_r(i)),
4212 gk20a_readl(g, pbdma_gp_get_r(i)),
4213 gk20a_readl(g, pbdma_gp_fetch_r(i)),
4214 gk20a_readl(g, pbdma_pb_header_r(i)),
4215 gk20a_readl(g, pbdma_hdr_shadow_r(i)),
4216 gk20a_readl(g, pbdma_gp_shadow_0_r(i)),
4217 gk20a_readl(g, pbdma_gp_shadow_1_r(i)));
4218 }
4219 gk20a_debug_output(o, "\n");
4220}
4221
4222void gk20a_dump_eng_status(struct gk20a *g,
4223 struct gk20a_debug_output *o)
4224{
4225 u32 i, host_num_engines;
4226
4227 host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
4228
4229 for (i = 0; i < host_num_engines; i++) {
4230 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
4231 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
4232
4233 gk20a_debug_output(o, "%s eng %d: ", g->name, i);
4234 gk20a_debug_output(o,
4235 "id: %d (%s), next_id: %d (%s), ctx status: %s ",
4236 fifo_engine_status_id_v(status),
4237 fifo_engine_status_id_type_v(status) ?
4238 "tsg" : "channel",
4239 fifo_engine_status_next_id_v(status),
4240 fifo_engine_status_next_id_type_v(status) ?
4241 "tsg" : "channel",
4242 gk20a_decode_pbdma_chan_eng_ctx_status(ctx_status));
4243
4244 if (fifo_engine_status_faulted_v(status)) {
4245 gk20a_debug_output(o, "faulted ");
4246 }
4247 if (fifo_engine_status_engine_v(status)) {
4248 gk20a_debug_output(o, "busy ");
4249 }
4250 gk20a_debug_output(o, "\n");
4251 }
4252 gk20a_debug_output(o, "\n");
4253}
4254
4255void gk20a_fifo_enable_channel(struct channel_gk20a *ch)
4256{
4257 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4258 gk20a_readl(ch->g, ccsr_channel_r(ch->chid)) |
4259 ccsr_channel_enable_set_true_f());
4260}
4261
4262void gk20a_fifo_disable_channel(struct channel_gk20a *ch)
4263{
4264 gk20a_writel(ch->g, ccsr_channel_r(ch->chid),
4265 gk20a_readl(ch->g,
4266 ccsr_channel_r(ch->chid)) |
4267 ccsr_channel_enable_clr_true_f());
4268}
4269
4270void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
4271{
4272 struct gk20a *g = ch_gk20a->g;
4273
4274 nvgpu_log_fn(g, " ");
4275
4276 if (nvgpu_atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
4277 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->chid),
4278 ccsr_channel_inst_ptr_f(0) |
4279 ccsr_channel_inst_bind_false_f());
4280 }
4281}
4282
4283static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
4284{
4285 u32 addr_lo;
4286 u32 addr_hi;
4287 struct gk20a *g = c->g;
4288
4289 nvgpu_log_fn(g, " ");
4290
4291 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
4292 addr_hi = u64_hi32(c->userd_iova);
4293
4294 nvgpu_log_info(g, "channel %d : set ramfc userd 0x%16llx",
4295 c->chid, (u64)c->userd_iova);
4296
4297 nvgpu_mem_wr32(g, &c->inst_block,
4298 ram_in_ramfc_w() + ram_fc_userd_w(),
4299 nvgpu_aperture_mask(g, &g->fifo.userd,
4300 pbdma_userd_target_sys_mem_ncoh_f(),
4301 pbdma_userd_target_sys_mem_coh_f(),
4302 pbdma_userd_target_vid_mem_f()) |
4303 pbdma_userd_addr_f(addr_lo));
4304
4305 nvgpu_mem_wr32(g, &c->inst_block,
4306 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
4307 pbdma_userd_hi_addr_f(addr_hi));
4308
4309 return 0;
4310}
4311
4312int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
4313 u64 gpfifo_base, u32 gpfifo_entries,
4314 unsigned long timeout,
4315 u32 flags)
4316{
4317 struct gk20a *g = c->g;
4318 struct nvgpu_mem *mem = &c->inst_block;
4319
4320 nvgpu_log_fn(g, " ");
4321
4322 nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
4323
4324 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
4325 pbdma_gp_base_offset_f(
4326 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
4327
4328 nvgpu_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
4329 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
4330 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
4331
4332 nvgpu_mem_wr32(g, mem, ram_fc_signature_w(),
4333 c->g->ops.fifo.get_pbdma_signature(c->g));
4334
4335 nvgpu_mem_wr32(g, mem, ram_fc_formats_w(),
4336 pbdma_formats_gp_fermi0_f() |
4337 pbdma_formats_pb_fermi1_f() |
4338 pbdma_formats_mp_fermi0_f());
4339
4340 nvgpu_mem_wr32(g, mem, ram_fc_pb_header_w(),
4341 pbdma_pb_header_priv_user_f() |
4342 pbdma_pb_header_method_zero_f() |
4343 pbdma_pb_header_subchannel_zero_f() |
4344 pbdma_pb_header_level_main_f() |
4345 pbdma_pb_header_first_true_f() |
4346 pbdma_pb_header_type_inc_f());
4347
4348 nvgpu_mem_wr32(g, mem, ram_fc_subdevice_w(),
4349 pbdma_subdevice_id_f(1) |
4350 pbdma_subdevice_status_active_f() |
4351 pbdma_subdevice_channel_dma_enable_f());
4352
4353 nvgpu_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
4354
4355 nvgpu_mem_wr32(g, mem, ram_fc_acquire_w(),
4356 g->ops.fifo.pbdma_acquire_val(timeout));
4357
4358 nvgpu_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
4359 fifo_runlist_timeslice_timeout_128_f() |
4360 fifo_runlist_timeslice_timescale_3_f() |
4361 fifo_runlist_timeslice_enable_true_f());
4362
4363 nvgpu_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
4364 fifo_pb_timeslice_timeout_16_f() |
4365 fifo_pb_timeslice_timescale_0_f() |
4366 fifo_pb_timeslice_enable_true_f());
4367
4368 nvgpu_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->chid));
4369
4370 if (c->is_privileged_channel) {
4371 gk20a_fifo_setup_ramfc_for_privileged_channel(c);
4372 }
4373
4374 return gk20a_fifo_commit_userd(c);
4375}
4376
4377void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
4378{
4379 struct gk20a *g = c->g;
4380 struct nvgpu_mem *mem = &c->inst_block;
4381
4382 nvgpu_log_info(g, "channel %d : set ramfc privileged_channel", c->chid);
4383
4384 /* Enable HCE priv mode for phys mode transfer */
4385 nvgpu_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
4386 pbdma_hce_ctrl_hce_priv_mode_yes_f());
4387}
4388
4389int gk20a_fifo_setup_userd(struct channel_gk20a *c)
4390{
4391 struct gk20a *g = c->g;
4392 struct nvgpu_mem *mem;
4393 u32 offset;
4394
4395 nvgpu_log_fn(g, " ");
4396
4397 if (nvgpu_mem_is_valid(&c->usermode_userd)) {
4398 mem = &c->usermode_userd;
4399 offset = 0;
4400 } else {
4401 mem = &g->fifo.userd;
4402 offset = c->chid * g->fifo.userd_entry_size / sizeof(u32);
4403 }
4404
4405 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
4406 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
4407 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
4408 nvgpu_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
4409 nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
4410 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
4411 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
4412 nvgpu_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
4413 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
4414 nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
4415
4416 return 0;
4417}
4418
4419int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
4420{
4421 int err;
4422
4423 nvgpu_log_fn(g, " ");
4424
4425 err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
4426 if (err) {
4427 return err;
4428 }
4429
4430 nvgpu_log_info(g, "channel %d inst block physical addr: 0x%16llx",
4431 ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
4432
4433 nvgpu_log_fn(g, "done");
4434 return 0;
4435}
4436
4437void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
4438{
4439 nvgpu_free_inst_block(g, &ch->inst_block);
4440}
4441
4442u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
4443{
4444 return gk20a_bar1_readl(g,
4445 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
4446}
4447
4448u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c)
4449{
4450 u32 lo = gk20a_bar1_readl(g,
4451 c->userd_gpu_va + sizeof(u32) * ram_userd_get_w());
4452 u32 hi = gk20a_bar1_readl(g,
4453 c->userd_gpu_va + sizeof(u32) * ram_userd_get_hi_w());
4454
4455 return ((u64)hi << 32) | lo;
4456}
4457
4458void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
4459{
4460 gk20a_bar1_writel(g,
4461 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
4462 c->gpfifo.put);
4463}
4464
4465u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
4466{
4467 u32 val, exp, man;
4468 unsigned int val_len;
4469
4470 val = pbdma_acquire_retry_man_2_f() |
4471 pbdma_acquire_retry_exp_2_f();
4472
4473 if (!timeout) {
4474 return val;
4475 }
4476
4477 timeout *= 80UL;
4478 do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
4479 timeout *= 1000000UL; /* ms -> ns */
4480 do_div(timeout, 1024); /* in unit of 1024ns */
4481 val_len = fls(timeout >> 32) + 32;
4482 if (val_len == 32) {
4483 val_len = fls(timeout);
4484 }
4485 if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
4486 exp = pbdma_acquire_timeout_exp_max_v();
4487 man = pbdma_acquire_timeout_man_max_v();
4488 } else if (val_len > 16) {
4489 exp = val_len - 16;
4490 man = timeout >> exp;
4491 } else {
4492 exp = 0;
4493 man = timeout;
4494 }
4495
4496 val |= pbdma_acquire_timeout_exp_f(exp) |
4497 pbdma_acquire_timeout_man_f(man) |
4498 pbdma_acquire_timeout_en_enable_f();
4499
4500 return val;
4501}
4502
4503const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
4504{
4505 switch (interleave_level) {
4506 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
4507 return "LOW";
4508
4509 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
4510 return "MEDIUM";
4511
4512 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
4513 return "HIGH";
4514
4515 default:
4516 return "?";
4517 }
4518}
4519
4520u32 gk20a_fifo_get_sema_wait_cmd_size(void)
4521{
4522 return 8;
4523}
4524
4525u32 gk20a_fifo_get_sema_incr_cmd_size(void)
4526{
4527 return 10;
4528}
4529
4530void gk20a_fifo_add_sema_cmd(struct gk20a *g,
4531 struct nvgpu_semaphore *s, u64 sema_va,
4532 struct priv_cmd_entry *cmd,
4533 u32 off, bool acquire, bool wfi)
4534{
4535 nvgpu_log_fn(g, " ");
4536
4537 /* semaphore_a */
4538 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
4539 /* offset_upper */
4540 nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff);
4541 /* semaphore_b */
4542 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
4543 /* offset */
4544 nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff);
4545
4546 if (acquire) {
4547 /* semaphore_c */
4548 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4549 /* payload */
4550 nvgpu_mem_wr32(g, cmd->mem, off++,
4551 nvgpu_semaphore_get_value(s));
4552 /* semaphore_d */
4553 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4554 /* operation: acq_geq, switch_en */
4555 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
4556 } else {
4557 /* semaphore_c */
4558 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4559 /* payload */
4560 nvgpu_mem_wr32(g, cmd->mem, off++,
4561 nvgpu_semaphore_get_value(s));
4562 /* semaphore_d */
4563 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4564 /* operation: release, wfi */
4565 nvgpu_mem_wr32(g, cmd->mem, off++,
4566 0x2 | ((wfi ? 0x0 : 0x1) << 20));
4567 /* non_stall_int */
4568 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
4569 /* ignored */
4570 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4571 }
4572}
4573
4574#ifdef CONFIG_TEGRA_GK20A_NVHOST
4575void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
4576 struct priv_cmd_entry *cmd, u32 off,
4577 u32 id, u32 thresh, u64 gpu_va)
4578{
4579 nvgpu_log_fn(g, " ");
4580
4581 off = cmd->off + off;
4582 /* syncpoint_a */
4583 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4584 /* payload */
4585 nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
4586 /* syncpoint_b */
4587 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4588 /* syncpt_id, switch_en, wait */
4589 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
4590}
4591
4592u32 gk20a_fifo_get_syncpt_wait_cmd_size(void)
4593{
4594 return 4;
4595}
4596
4597u32 gk20a_fifo_get_syncpt_incr_per_release(void)
4598{
4599 return 2;
4600}
4601
4602void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
4603 bool wfi_cmd, struct priv_cmd_entry *cmd,
4604 u32 id, u64 gpu_va)
4605{
4606 u32 off = cmd->off;
4607
4608 nvgpu_log_fn(g, " ");
4609 if (wfi_cmd) {
4610 /* wfi */
4611 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001E);
4612 /* handle, ignored */
4613 nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000);
4614 }
4615 /* syncpoint_a */
4616 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
4617 /* payload, ignored */
4618 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4619 /* syncpoint_b */
4620 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4621 /* syncpt_id, incr */
4622 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4623 /* syncpoint_b */
4624 nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
4625 /* syncpt_id, incr */
4626 nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
4627
4628}
4629
4630u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd)
4631{
4632 if (wfi_cmd)
4633 return 8;
4634 else
4635 return 6;
4636}
4637
4638void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
4639 struct nvgpu_mem *syncpt_buf)
4640{
4641
4642}
4643
4644int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
4645 u32 syncpt_id, struct nvgpu_mem *syncpt_buf)
4646{
4647 return 0;
4648}
4649#endif
diff --git a/include/gk20a/fifo_gk20a.h b/include/gk20a/fifo_gk20a.h
new file mode 100644
index 0000000..26365ca
--- /dev/null
+++ b/include/gk20a/fifo_gk20a.h
@@ -0,0 +1,471 @@
1/*
2 * GK20A graphics fifo (gr host)
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef FIFO_GK20A_H
25#define FIFO_GK20A_H
26
27#include <nvgpu/kref.h>
28
29struct gk20a_debug_output;
30struct mmu_fault_info;
31struct nvgpu_semaphore;
32struct channel_gk20a;
33struct tsg_gk20a;
34
35enum {
36 NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW = 0,
37 NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM,
38 NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH,
39 NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS,
40};
41
42#define MAX_RUNLIST_BUFFERS 2
43
44#define FIFO_INVAL_ENGINE_ID ((u32)~0)
45#define FIFO_INVAL_CHANNEL_ID ((u32)~0)
46#define FIFO_INVAL_TSG_ID ((u32)~0)
47#define FIFO_INVAL_RUNLIST_ID ((u32)~0)
48
49#define ID_TYPE_CHANNEL 0
50#define ID_TYPE_TSG 1
51#define ID_TYPE_UNKNOWN ((u32)~0)
52
53#define RC_YES 1
54#define RC_NO 0
55
56#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000
57
58#define RC_TYPE_NO_RC 0
59#define RC_TYPE_MMU_FAULT 1
60#define RC_TYPE_PBDMA_FAULT 2
61#define RC_TYPE_GR_FAULT 3
62#define RC_TYPE_PREEMPT_TIMEOUT 4
63#define RC_TYPE_CTXSW_TIMEOUT 5
64#define RC_TYPE_RUNLIST_UPDATE_TIMEOUT 6
65#define RC_TYPE_FORCE_RESET 7
66#define RC_TYPE_SCHED_ERR 8
67
68#define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL
69#define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL
70
71/*
72 * Number of entries in the kickoff latency buffer, used to calculate
73 * the profiling and histogram. This number is calculated to be statistically
74 * significative on a histogram on a 5% step
75 */
76#ifdef CONFIG_DEBUG_FS
77#define FIFO_PROFILING_ENTRIES 16384
78#endif
79
80#define RUNLIST_DISABLED 0
81#define RUNLIST_ENABLED 1
82
83/* generally corresponds to the "pbdma" engine */
84
85struct fifo_runlist_info_gk20a {
86 unsigned long *active_channels;
87 unsigned long *active_tsgs;
88 /* Each engine has its own SW and HW runlist buffer.*/
89 struct nvgpu_mem mem[MAX_RUNLIST_BUFFERS];
90 u32 cur_buffer;
91 u32 total_entries;
92 u32 pbdma_bitmask; /* pbdmas supported for this runlist*/
93 u32 eng_bitmask; /* engines using this runlist */
94 u32 reset_eng_bitmask; /* engines to be reset during recovery */
95 u32 count; /* cached runlist_hw_submit parameter */
96 bool stopped;
97 bool support_tsg;
98 /* protect ch/tsg/runlist preempt & runlist update */
99 struct nvgpu_mutex runlist_lock;
100};
101
102enum {
103 ENGINE_GR_GK20A = 0U,
104 ENGINE_GRCE_GK20A = 1U,
105 ENGINE_ASYNC_CE_GK20A = 2U,
106 ENGINE_INVAL_GK20A = 3U,
107};
108
109struct fifo_pbdma_exception_info_gk20a {
110 u32 status_r; /* raw register value from hardware */
111 u32 id, next_id;
112 u32 chan_status_v; /* raw value from hardware */
113 bool id_is_chid, next_id_is_chid;
114 bool chsw_in_progress;
115};
116
117struct fifo_engine_exception_info_gk20a {
118 u32 status_r; /* raw register value from hardware */
119 u32 id, next_id;
120 u32 ctx_status_v; /* raw value from hardware */
121 bool id_is_chid, next_id_is_chid;
122 bool faulted, idle, ctxsw_in_progress;
123};
124
125struct fifo_engine_info_gk20a {
126 u32 engine_id;
127 u32 runlist_id;
128 u32 intr_mask;
129 u32 reset_mask;
130 u32 pbdma_id;
131 u32 inst_id;
132 u32 pri_base;
133 u32 fault_id;
134 u32 engine_enum;
135 struct fifo_pbdma_exception_info_gk20a pbdma_exception_info;
136 struct fifo_engine_exception_info_gk20a engine_exception_info;
137};
138
139enum {
140 PROFILE_IOCTL_ENTRY = 0U,
141 PROFILE_ENTRY,
142 PROFILE_JOB_TRACKING,
143 PROFILE_APPEND,
144 PROFILE_END,
145 PROFILE_IOCTL_EXIT,
146 PROFILE_MAX
147};
148
149struct fifo_profile_gk20a {
150 u64 timestamp[PROFILE_MAX];
151};
152
153struct fifo_gk20a {
154 struct gk20a *g;
155 unsigned int num_channels;
156 unsigned int runlist_entry_size;
157 unsigned int num_runlist_entries;
158
159 unsigned int num_pbdma;
160 u32 *pbdma_map;
161
162 struct fifo_engine_info_gk20a *engine_info;
163 u32 max_engines;
164 u32 num_engines;
165 u32 *active_engines_list;
166
167 struct fifo_runlist_info_gk20a *runlist_info;
168 u32 max_runlists;
169#ifdef CONFIG_DEBUG_FS
170 struct {
171 struct fifo_profile_gk20a *data;
172 nvgpu_atomic_t get;
173 bool enabled;
174 u64 *sorted;
175 struct nvgpu_ref ref;
176 struct nvgpu_mutex lock;
177 } profile;
178#endif
179 struct nvgpu_mem userd;
180 u32 userd_entry_size;
181
182 unsigned int used_channels;
183 struct channel_gk20a *channel;
184 /* zero-kref'd channels here */
185 struct nvgpu_list_node free_chs;
186 struct nvgpu_mutex free_chs_mutex;
187 struct nvgpu_mutex engines_reset_mutex;
188
189 struct tsg_gk20a *tsg;
190 struct nvgpu_mutex tsg_inuse_mutex;
191
192 void (*remove_support)(struct fifo_gk20a *);
193 bool sw_ready;
194 struct {
195 /* share info between isrs and non-isr code */
196 struct {
197 struct nvgpu_mutex mutex;
198 } isr;
199 struct {
200 u32 device_fatal_0;
201 u32 channel_fatal_0;
202 u32 restartable_0;
203 } pbdma;
204 struct {
205
206 } engine;
207
208
209 } intr;
210
211 unsigned long deferred_fault_engines;
212 bool deferred_reset_pending;
213 struct nvgpu_mutex deferred_reset_mutex;
214
215 u32 max_subctx_count;
216 u32 channel_base;
217};
218
219struct ch_state {
220 int pid;
221 int refs;
222 bool deterministic;
223 u32 inst_block[0];
224};
225
226int gk20a_init_fifo_support(struct gk20a *g);
227
228int gk20a_init_fifo_setup_hw(struct gk20a *g);
229
230void gk20a_fifo_isr(struct gk20a *g);
231u32 gk20a_fifo_nonstall_isr(struct gk20a *g);
232
233int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch);
234int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
235int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch);
236
237int gk20a_fifo_enable_engine_activity(struct gk20a *g,
238 struct fifo_engine_info_gk20a *eng_info);
239int gk20a_fifo_enable_all_engine_activity(struct gk20a *g);
240int gk20a_fifo_disable_engine_activity(struct gk20a *g,
241 struct fifo_engine_info_gk20a *eng_info,
242 bool wait_for_idle);
243int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
244 bool wait_for_idle);
245void gk20a_fifo_enable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);
246void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);
247
248u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid);
249
250int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
251int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
252 bool wait_preempt);
253
254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
255 bool add, bool wait_for_finish);
256
257int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
258 u32 chid, bool add,
259 bool wait_for_finish);
260int gk20a_fifo_suspend(struct gk20a *g);
261
262bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
263
264void gk20a_fifo_recover(struct gk20a *g,
265 u32 engine_ids, /* if zero, will be queried from HW */
266 u32 hw_id, /* if ~0, will be queried from HW */
267 bool id_is_tsg, /* ignored if hw_id == ~0 */
268 bool id_is_known, bool verbose, int rc_type);
269void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
270 bool verbose, u32 rc_type);
271void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
272 bool verbose, u32 rc_type);
273int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
274 u32 err_code, bool verbose);
275void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id);
276int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
277int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch);
278
279void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
280 unsigned long fault_id);
281int gk20a_fifo_wait_engine_idle(struct gk20a *g);
282bool gk20a_fifo_is_engine_busy(struct gk20a *g);
283u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
284u32 gk20a_fifo_act_eng_interrupt_mask(struct gk20a *g, u32 act_eng_id);
285u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
286u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
287 int *__id, bool *__is_tsg);
288void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
289 struct tsg_gk20a *tsg);
290void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt);
291void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
292 struct channel_gk20a *refch);
293bool gk20a_fifo_error_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
294bool gk20a_fifo_error_ch(struct gk20a *g, struct channel_gk20a *refch);
295
296void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
297int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
298 u32 id,
299 u32 runlist_id,
300 u32 new_level);
301int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
302
303const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
304
305int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
306 u32 *inst_id);
307
308u32 gk20a_fifo_get_engine_ids(struct gk20a *g, u32 engine_id[],
309 u32 engine_id_sz, u32 engine_enum);
310
311void gk20a_fifo_delete_runlist(struct fifo_gk20a *f);
312
313struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g,
314 u32 engine_id);
315
316bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id);
317
318u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g);
319
320int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch);
321
322u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g);
323
324u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g);
325
326u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g);
327
328bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id);
329
330int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 chid,
331 bool add, bool wait_for_finish);
332
333int gk20a_fifo_init_engine_info(struct fifo_gk20a *f);
334
335void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist);
336void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);
337void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
338 u32 runlist_state);
339
340u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
341void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
342u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c);
343
344bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid);
345#ifdef CONFIG_DEBUG_FS
346struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g);
347void gk20a_fifo_profile_release(struct gk20a *g,
348 struct fifo_profile_gk20a *profile);
349void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx);
350#else
351static inline struct fifo_profile_gk20a *
352gk20a_fifo_profile_acquire(struct gk20a *g)
353{
354 return NULL;
355}
356static inline void gk20a_fifo_profile_release(struct gk20a *g,
357 struct fifo_profile_gk20a *profile)
358{
359}
360static inline void gk20a_fifo_profile_snapshot(
361 struct fifo_profile_gk20a *profile, int idx)
362{
363}
364#endif
365
366void gk20a_dump_channel_status_ramfc(struct gk20a *g,
367 struct gk20a_debug_output *o,
368 u32 chid,
369 struct ch_state *ch_state);
370void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
371 struct gk20a_debug_output *o);
372void gk20a_dump_pbdma_status(struct gk20a *g,
373 struct gk20a_debug_output *o);
374void gk20a_dump_eng_status(struct gk20a *g,
375 struct gk20a_debug_output *o);
376const char *gk20a_decode_ccsr_chan_status(u32 index);
377const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index);
378void gk20a_fifo_enable_channel(struct channel_gk20a *ch);
379void gk20a_fifo_disable_channel(struct channel_gk20a *ch);
380
381bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid);
382bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid);
383int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch);
384
385struct channel_gk20a *gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr);
386void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a);
387
388u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g);
389
390int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
391 unsigned int id_type);
392int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
393void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
394void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch);
395int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
396 u64 gpfifo_base, u32 gpfifo_entries,
397 unsigned long timeout, u32 flags);
398void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c);
399int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
400void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch);
401int gk20a_fifo_setup_userd(struct channel_gk20a *c);
402u32 gk20a_fifo_pbdma_acquire_val(u64 timeout);
403
404
405u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
406 struct fifo_runlist_info_gk20a *runlist,
407 u32 cur_level,
408 u32 *runlist_entry,
409 bool interleave_enabled,
410 bool prev_empty,
411 u32 *entries_left);
412void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
413 u32 count, u32 buffer_index);
414int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id);
415int gk20a_init_fifo_setup_sw_common(struct gk20a *g);
416int gk20a_init_fifo_setup_sw(struct gk20a *g);
417void gk20a_fifo_handle_runlist_event(struct gk20a *g);
418bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
419 u32 engine_subid, bool fake_fault);
420
421void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
422 u32 hw_id, unsigned int id_type, unsigned int rc_type,
423 struct mmu_fault_info *mmfault);
424
425bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
426 bool *verbose, u32 *ms);
427bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
428 bool *verbose, u32 *ms);
429void gk20a_fifo_teardown_mask_intr(struct gk20a *g);
430void gk20a_fifo_teardown_unmask_intr(struct gk20a *g);
431bool gk20a_fifo_handle_sched_error(struct gk20a *g);
432
433void gk20a_fifo_reset_pbdma_method(struct gk20a *g, int pbdma_id,
434 int pbdma_method_index);
435unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
436 u32 pbdma_intr_0, u32 *handled, u32 *error_notifier);
437unsigned int gk20a_fifo_handle_pbdma_intr_1(struct gk20a *g, u32 pbdma_id,
438 u32 pbdma_intr_1, u32 *handled, u32 *error_notifier);
439u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
440 u32 pbdma_id, unsigned int rc);
441
442u32 gk20a_fifo_default_timeslice_us(struct gk20a *g);
443
444#ifdef CONFIG_TEGRA_GK20A_NVHOST
445void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
446 struct priv_cmd_entry *cmd, u32 off,
447 u32 id, u32 thresh, u64 gpu_va);
448u32 gk20a_fifo_get_syncpt_wait_cmd_size(void);
449u32 gk20a_fifo_get_syncpt_incr_per_release(void);
450void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
451 bool wfi_cmd, struct priv_cmd_entry *cmd,
452 u32 id, u64 gpu_va);
453u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd);
454void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
455 struct nvgpu_mem *syncpt_buf);
456int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
457 u32 syncpt_id, struct nvgpu_mem *syncpt_buf);
458#endif
459
460void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
461 struct mmu_fault_info *mmfault);
462void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault);
463void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault);
464void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault);
465u32 gk20a_fifo_get_sema_wait_cmd_size(void);
466u32 gk20a_fifo_get_sema_incr_cmd_size(void);
467void gk20a_fifo_add_sema_cmd(struct gk20a *g,
468 struct nvgpu_semaphore *s, u64 sema_va,
469 struct priv_cmd_entry *cmd,
470 u32 off, bool acquire, bool wfi);
471#endif /* FIFO_GK20A_H */
diff --git a/include/gk20a/flcn_gk20a.c b/include/gk20a/flcn_gk20a.c
new file mode 100644
index 0000000..fdcaef9
--- /dev/null
+++ b/include/gk20a/flcn_gk20a.c
@@ -0,0 +1,759 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#include <nvgpu/falcon.h>
23#include <nvgpu/pmu.h>
24#include <nvgpu/io.h>
25
26#include "gk20a/gk20a.h"
27#include "gk20a/flcn_gk20a.h"
28
29#include <nvgpu/hw/gm20b/hw_falcon_gm20b.h>
30
31static int gk20a_flcn_reset(struct nvgpu_falcon *flcn)
32{
33 struct gk20a *g = flcn->g;
34 u32 base_addr = flcn->flcn_base;
35 u32 unit_status = 0;
36 int status = 0;
37
38 if (flcn->flcn_engine_dep_ops.reset_eng) {
39 /* falcon & engine reset */
40 status = flcn->flcn_engine_dep_ops.reset_eng(g);
41 } else {
42 /* do falcon CPU hard reset */
43 unit_status = gk20a_readl(g, base_addr +
44 falcon_falcon_cpuctl_r());
45 gk20a_writel(g, base_addr + falcon_falcon_cpuctl_r(),
46 (unit_status | falcon_falcon_cpuctl_hreset_f(1)));
47 }
48
49 return status;
50}
51
52static bool gk20a_flcn_clear_halt_interrupt_status(struct nvgpu_falcon *flcn)
53{
54 struct gk20a *g = flcn->g;
55 u32 base_addr = flcn->flcn_base;
56 u32 data = 0;
57 bool status = false;
58
59 gk20a_writel(g, base_addr + falcon_falcon_irqsclr_r(),
60 gk20a_readl(g, base_addr + falcon_falcon_irqsclr_r()) |
61 (0x10));
62 data = gk20a_readl(g, (base_addr + falcon_falcon_irqstat_r()));
63
64 if ((data & falcon_falcon_irqstat_halt_true_f()) !=
65 falcon_falcon_irqstat_halt_true_f()) {
66 /*halt irq is clear*/
67 status = true;
68 }
69
70 return status;
71}
72
73static void gk20a_flcn_set_irq(struct nvgpu_falcon *flcn, bool enable)
74{
75 struct gk20a *g = flcn->g;
76 u32 base_addr = flcn->flcn_base;
77
78 if (!flcn->is_interrupt_enabled) {
79 nvgpu_warn(g, "Interrupt not supported on flcn 0x%x ",
80 flcn->flcn_id);
81 /* Keep interrupt disabled */
82 enable = false;
83 }
84
85 if (enable) {
86 gk20a_writel(g, base_addr + falcon_falcon_irqmset_r(),
87 flcn->intr_mask);
88 gk20a_writel(g, base_addr + falcon_falcon_irqdest_r(),
89 flcn->intr_dest);
90 } else {
91 gk20a_writel(g, base_addr + falcon_falcon_irqmclr_r(),
92 0xffffffff);
93 }
94}
95
96static bool gk20a_is_falcon_cpu_halted(struct nvgpu_falcon *flcn)
97{
98 struct gk20a *g = flcn->g;
99 u32 base_addr = flcn->flcn_base;
100
101 return (gk20a_readl(g, base_addr + falcon_falcon_cpuctl_r()) &
102 falcon_falcon_cpuctl_halt_intr_m() ?
103 true : false);
104}
105
106static bool gk20a_is_falcon_idle(struct nvgpu_falcon *flcn)
107{
108 struct gk20a *g = flcn->g;
109 u32 base_addr = flcn->flcn_base;
110 u32 unit_status = 0;
111 bool status = false;
112
113 unit_status = gk20a_readl(g,
114 base_addr + falcon_falcon_idlestate_r());
115
116 if (falcon_falcon_idlestate_falcon_busy_v(unit_status) == 0 &&
117 falcon_falcon_idlestate_ext_busy_v(unit_status) == 0) {
118 status = true;
119 } else {
120 status = false;
121 }
122
123 return status;
124}
125
126static bool gk20a_is_falcon_scrubbing_done(struct nvgpu_falcon *flcn)
127{
128 struct gk20a *g = flcn->g;
129 u32 base_addr = flcn->flcn_base;
130 u32 unit_status = 0;
131 bool status = false;
132
133 unit_status = gk20a_readl(g,
134 base_addr + falcon_falcon_dmactl_r());
135
136 if (unit_status & (falcon_falcon_dmactl_dmem_scrubbing_m() |
137 falcon_falcon_dmactl_imem_scrubbing_m())) {
138 status = false;
139 } else {
140 status = true;
141 }
142
143 return status;
144}
145
146static u32 gk20a_falcon_get_mem_size(struct nvgpu_falcon *flcn,
147 enum flcn_mem_type mem_type)
148{
149 struct gk20a *g = flcn->g;
150 u32 mem_size = 0;
151 u32 hw_cfg_reg = gk20a_readl(g,
152 flcn->flcn_base + falcon_falcon_hwcfg_r());
153
154 if (mem_type == MEM_DMEM) {
155 mem_size = falcon_falcon_hwcfg_dmem_size_v(hw_cfg_reg)
156 << GK20A_PMU_DMEM_BLKSIZE2;
157 } else {
158 mem_size = falcon_falcon_hwcfg_imem_size_v(hw_cfg_reg)
159 << GK20A_PMU_DMEM_BLKSIZE2;
160 }
161
162 return mem_size;
163}
164
165static int flcn_mem_overflow_check(struct nvgpu_falcon *flcn,
166 u32 offset, u32 size, enum flcn_mem_type mem_type)
167{
168 struct gk20a *g = flcn->g;
169 u32 mem_size = 0;
170
171 if (size == 0) {
172 nvgpu_err(g, "size is zero");
173 return -EINVAL;
174 }
175
176 if (offset & 0x3) {
177 nvgpu_err(g, "offset (0x%08x) not 4-byte aligned", offset);
178 return -EINVAL;
179 }
180
181 mem_size = gk20a_falcon_get_mem_size(flcn, mem_type);
182 if (!(offset <= mem_size && (offset + size) <= mem_size)) {
183 nvgpu_err(g, "flcn-id 0x%x, copy overflow ",
184 flcn->flcn_id);
185 nvgpu_err(g, "total size 0x%x, offset 0x%x, copy size 0x%x",
186 mem_size, offset, size);
187 return -EINVAL;
188 }
189
190 return 0;
191}
192
193static int gk20a_flcn_copy_from_dmem(struct nvgpu_falcon *flcn,
194 u32 src, u8 *dst, u32 size, u8 port)
195{
196 struct gk20a *g = flcn->g;
197 u32 base_addr = flcn->flcn_base;
198 u32 i, words, bytes;
199 u32 data, addr_mask;
200 u32 *dst_u32 = (u32 *)dst;
201
202 nvgpu_log_fn(g, " src dmem offset - %x, size - %x", src, size);
203
204 if (flcn_mem_overflow_check(flcn, src, size, MEM_DMEM)) {
205 nvgpu_err(g, "incorrect parameters");
206 return -EINVAL;
207 }
208
209 nvgpu_mutex_acquire(&flcn->copy_lock);
210
211 words = size >> 2;
212 bytes = size & 0x3;
213
214 addr_mask = falcon_falcon_dmemc_offs_m() |
215 falcon_falcon_dmemc_blk_m();
216
217 src &= addr_mask;
218
219 gk20a_writel(g, base_addr + falcon_falcon_dmemc_r(port),
220 src | falcon_falcon_dmemc_aincr_f(1));
221
222 for (i = 0; i < words; i++) {
223 dst_u32[i] = gk20a_readl(g,
224 base_addr + falcon_falcon_dmemd_r(port));
225 }
226
227 if (bytes > 0) {
228 data = gk20a_readl(g, base_addr + falcon_falcon_dmemd_r(port));
229 for (i = 0; i < bytes; i++) {
230 dst[(words << 2) + i] = ((u8 *)&data)[i];
231 }
232 }
233
234 nvgpu_mutex_release(&flcn->copy_lock);
235 return 0;
236}
237
238static int gk20a_flcn_copy_to_dmem(struct nvgpu_falcon *flcn,
239 u32 dst, u8 *src, u32 size, u8 port)
240{
241 struct gk20a *g = flcn->g;
242 u32 base_addr = flcn->flcn_base;
243 u32 i, words, bytes;
244 u32 data, addr_mask;
245 u32 *src_u32 = (u32 *)src;
246
247 nvgpu_log_fn(g, "dest dmem offset - %x, size - %x", dst, size);
248
249 if (flcn_mem_overflow_check(flcn, dst, size, MEM_DMEM)) {
250 nvgpu_err(g, "incorrect parameters");
251 return -EINVAL;
252 }
253
254 nvgpu_mutex_acquire(&flcn->copy_lock);
255
256 words = size >> 2;
257 bytes = size & 0x3;
258
259 addr_mask = falcon_falcon_dmemc_offs_m() |
260 falcon_falcon_dmemc_blk_m();
261
262 dst &= addr_mask;
263
264 gk20a_writel(g, base_addr + falcon_falcon_dmemc_r(port),
265 dst | falcon_falcon_dmemc_aincw_f(1));
266
267 for (i = 0; i < words; i++) {
268 gk20a_writel(g,
269 base_addr + falcon_falcon_dmemd_r(port), src_u32[i]);
270 }
271
272 if (bytes > 0) {
273 data = 0;
274 for (i = 0; i < bytes; i++) {
275 ((u8 *)&data)[i] = src[(words << 2) + i];
276 }
277 gk20a_writel(g, base_addr + falcon_falcon_dmemd_r(port), data);
278 }
279
280 size = ALIGN(size, 4);
281 data = gk20a_readl(g,
282 base_addr + falcon_falcon_dmemc_r(port)) & addr_mask;
283 if (data != ((dst + size) & addr_mask)) {
284 nvgpu_warn(g, "copy failed. bytes written %d, expected %d",
285 data - dst, size);
286 }
287
288 nvgpu_mutex_release(&flcn->copy_lock);
289
290 return 0;
291}
292
293static int gk20a_flcn_copy_from_imem(struct nvgpu_falcon *flcn, u32 src,
294 u8 *dst, u32 size, u8 port)
295{
296 struct gk20a *g = flcn->g;
297 u32 base_addr = flcn->flcn_base;
298 u32 *dst_u32 = (u32 *)dst;
299 u32 words = 0;
300 u32 bytes = 0;
301 u32 data = 0;
302 u32 blk = 0;
303 u32 i = 0;
304
305 nvgpu_log_info(g, "download %d bytes from 0x%x", size, src);
306
307 if (flcn_mem_overflow_check(flcn, src, size, MEM_IMEM)) {
308 nvgpu_err(g, "incorrect parameters");
309 return -EINVAL;
310 }
311
312 nvgpu_mutex_acquire(&flcn->copy_lock);
313
314 words = size >> 2;
315 bytes = size & 0x3;
316 blk = src >> 8;
317
318 nvgpu_log_info(g, "download %d words from 0x%x block %d",
319 words, src, blk);
320
321 gk20a_writel(g, base_addr + falcon_falcon_imemc_r(port),
322 falcon_falcon_imemc_offs_f(src >> 2) |
323 falcon_falcon_imemc_blk_f(blk) |
324 falcon_falcon_dmemc_aincr_f(1));
325
326 for (i = 0; i < words; i++) {
327 dst_u32[i] = gk20a_readl(g,
328 base_addr + falcon_falcon_imemd_r(port));
329 }
330
331 if (bytes > 0) {
332 data = gk20a_readl(g, base_addr + falcon_falcon_imemd_r(port));
333 for (i = 0; i < bytes; i++) {
334 dst[(words << 2) + i] = ((u8 *)&data)[i];
335 }
336 }
337
338 nvgpu_mutex_release(&flcn->copy_lock);
339
340 return 0;
341}
342
343static int gk20a_flcn_copy_to_imem(struct nvgpu_falcon *flcn, u32 dst,
344 u8 *src, u32 size, u8 port, bool sec, u32 tag)
345{
346 struct gk20a *g = flcn->g;
347 u32 base_addr = flcn->flcn_base;
348 u32 *src_u32 = (u32 *)src;
349 u32 words = 0;
350 u32 blk = 0;
351 u32 i = 0;
352
353 nvgpu_log_info(g, "upload %d bytes to 0x%x", size, dst);
354
355 if (flcn_mem_overflow_check(flcn, dst, size, MEM_IMEM)) {
356 nvgpu_err(g, "incorrect parameters");
357 return -EINVAL;
358 }
359
360 nvgpu_mutex_acquire(&flcn->copy_lock);
361
362 words = size >> 2;
363 blk = dst >> 8;
364
365 nvgpu_log_info(g, "upload %d words to 0x%x block %d, tag 0x%x",
366 words, dst, blk, tag);
367
368 gk20a_writel(g, base_addr + falcon_falcon_imemc_r(port),
369 falcon_falcon_imemc_offs_f(dst >> 2) |
370 falcon_falcon_imemc_blk_f(blk) |
371 /* Set Auto-Increment on write */
372 falcon_falcon_imemc_aincw_f(1) |
373 falcon_falcon_imemc_secure_f(sec ? 1U : 0U));
374
375 for (i = 0; i < words; i++) {
376 if (i % 64 == 0) {
377 /* tag is always 256B aligned */
378 gk20a_writel(g, base_addr + falcon_falcon_imemt_r(0),
379 tag);
380 tag++;
381 }
382
383 gk20a_writel(g, base_addr + falcon_falcon_imemd_r(port),
384 src_u32[i]);
385 }
386
387 /* WARNING : setting remaining bytes in block to 0x0 */
388 while (i % 64) {
389 gk20a_writel(g, base_addr + falcon_falcon_imemd_r(port), 0);
390 i++;
391 }
392
393 nvgpu_mutex_release(&flcn->copy_lock);
394
395 return 0;
396}
397
398static int gk20a_falcon_bootstrap(struct nvgpu_falcon *flcn,
399 u32 boot_vector)
400{
401 struct gk20a *g = flcn->g;
402 u32 base_addr = flcn->flcn_base;
403
404 nvgpu_log_info(g, "boot vec 0x%x", boot_vector);
405
406 gk20a_writel(g, base_addr + falcon_falcon_dmactl_r(),
407 falcon_falcon_dmactl_require_ctx_f(0));
408
409 gk20a_writel(g, base_addr + falcon_falcon_bootvec_r(),
410 falcon_falcon_bootvec_vec_f(boot_vector));
411
412 gk20a_writel(g, base_addr + falcon_falcon_cpuctl_r(),
413 falcon_falcon_cpuctl_startcpu_f(1));
414
415 return 0;
416}
417
418static u32 gk20a_falcon_mailbox_read(struct nvgpu_falcon *flcn,
419 u32 mailbox_index)
420{
421 struct gk20a *g = flcn->g;
422 u32 data = 0;
423
424 if (mailbox_index < FALCON_MAILBOX_COUNT) {
425 data = gk20a_readl(g, flcn->flcn_base + (mailbox_index ?
426 falcon_falcon_mailbox1_r() :
427 falcon_falcon_mailbox0_r()));
428 } else {
429 nvgpu_err(g, "incorrect mailbox id %d", mailbox_index);
430 }
431
432 return data;
433}
434
435static void gk20a_falcon_mailbox_write(struct nvgpu_falcon *flcn,
436 u32 mailbox_index, u32 data)
437{
438 struct gk20a *g = flcn->g;
439
440 if (mailbox_index < FALCON_MAILBOX_COUNT) {
441 gk20a_writel(g, flcn->flcn_base + (mailbox_index ?
442 falcon_falcon_mailbox1_r() :
443 falcon_falcon_mailbox0_r()),
444 data);
445 } else {
446 nvgpu_err(g, "incorrect mailbox id %d", mailbox_index);
447 }
448}
449
450static int gk20a_falcon_bl_bootstrap(struct nvgpu_falcon *flcn,
451 struct nvgpu_falcon_bl_info *bl_info)
452{
453 struct gk20a *g = flcn->g;
454 u32 base_addr = flcn->flcn_base;
455 u32 virt_addr = 0;
456 u32 dst = 0;
457 int err = 0;
458
459 /*copy bootloader interface structure to dmem*/
460 err = gk20a_flcn_copy_to_dmem(flcn, 0, (u8 *)bl_info->bl_desc,
461 bl_info->bl_desc_size, (u8)0);
462 if (err != 0) {
463 goto exit;
464 }
465
466 /* copy bootloader to TOP of IMEM */
467 dst = (falcon_falcon_hwcfg_imem_size_v(gk20a_readl(g,
468 base_addr + falcon_falcon_hwcfg_r())) << 8) - bl_info->bl_size;
469
470 err = gk20a_flcn_copy_to_imem(flcn, dst, (u8 *)(bl_info->bl_src),
471 bl_info->bl_size, (u8)0, false, bl_info->bl_start_tag);
472 if (err != 0) {
473 goto exit;
474 }
475
476 gk20a_falcon_mailbox_write(flcn, FALCON_MAILBOX_0, 0xDEADA5A5U);
477
478 virt_addr = bl_info->bl_start_tag << 8;
479
480 err = gk20a_falcon_bootstrap(flcn, virt_addr);
481
482exit:
483 if (err != 0) {
484 nvgpu_err(g, "falcon id-0x%x bootstrap failed", flcn->flcn_id);
485 }
486
487 return err;
488}
489
490static void gk20a_falcon_dump_imblk(struct nvgpu_falcon *flcn)
491{
492 struct gk20a *g = flcn->g;
493 u32 base_addr = flcn->flcn_base;
494 u32 i = 0, j = 0;
495 u32 data[8] = {0};
496 u32 block_count = 0;
497
498 block_count = falcon_falcon_hwcfg_imem_size_v(gk20a_readl(g,
499 flcn->flcn_base + falcon_falcon_hwcfg_r()));
500
501 /* block_count must be multiple of 8 */
502 block_count &= ~0x7;
503 nvgpu_err(g, "FALCON IMEM BLK MAPPING (PA->VA) (%d TOTAL):",
504 block_count);
505
506 for (i = 0; i < block_count; i += 8) {
507 for (j = 0; j < 8; j++) {
508 gk20a_writel(g, flcn->flcn_base +
509 falcon_falcon_imctl_debug_r(),
510 falcon_falcon_imctl_debug_cmd_f(0x2) |
511 falcon_falcon_imctl_debug_addr_blk_f(i + j));
512
513 data[j] = gk20a_readl(g, base_addr +
514 falcon_falcon_imstat_r());
515 }
516
517 nvgpu_err(g, " %#04x: %#010x %#010x %#010x %#010x",
518 i, data[0], data[1], data[2], data[3]);
519 nvgpu_err(g, " %#04x: %#010x %#010x %#010x %#010x",
520 i + 4, data[4], data[5], data[6], data[7]);
521 }
522}
523
524static void gk20a_falcon_dump_pc_trace(struct nvgpu_falcon *flcn)
525{
526 struct gk20a *g = flcn->g;
527 u32 base_addr = flcn->flcn_base;
528 u32 trace_pc_count = 0;
529 u32 pc = 0;
530 u32 i = 0;
531
532 if (gk20a_readl(g, base_addr + falcon_falcon_sctl_r()) & 0x02) {
533 nvgpu_err(g, " falcon is in HS mode, PC TRACE dump not supported");
534 return;
535 }
536
537 trace_pc_count = falcon_falcon_traceidx_maxidx_v(gk20a_readl(g,
538 base_addr + falcon_falcon_traceidx_r()));
539 nvgpu_err(g,
540 "PC TRACE (TOTAL %d ENTRIES. entry 0 is the most recent branch):",
541 trace_pc_count);
542
543 for (i = 0; i < trace_pc_count; i++) {
544 gk20a_writel(g, base_addr + falcon_falcon_traceidx_r(),
545 falcon_falcon_traceidx_idx_f(i));
546
547 pc = falcon_falcon_tracepc_pc_v(gk20a_readl(g,
548 base_addr + falcon_falcon_tracepc_r()));
549 nvgpu_err(g, "FALCON_TRACEPC(%d) : %#010x", i, pc);
550 }
551}
552
553void gk20a_falcon_dump_stats(struct nvgpu_falcon *flcn)
554{
555 struct gk20a *g = flcn->g;
556 u32 base_addr = flcn->flcn_base;
557 unsigned int i;
558
559 nvgpu_err(g, "<<< FALCON id-%d DEBUG INFORMATION - START >>>",
560 flcn->flcn_id);
561
562 /* imblk dump */
563 gk20a_falcon_dump_imblk(flcn);
564 /* PC trace dump */
565 gk20a_falcon_dump_pc_trace(flcn);
566
567 nvgpu_err(g, "FALCON ICD REGISTERS DUMP");
568
569 for (i = 0; i < 4; i++) {
570 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
571 falcon_falcon_icd_cmd_opc_rreg_f() |
572 falcon_falcon_icd_cmd_idx_f(FALCON_REG_PC));
573 nvgpu_err(g, "FALCON_REG_PC : 0x%x",
574 gk20a_readl(g, base_addr +
575 falcon_falcon_icd_rdata_r()));
576
577 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
578 falcon_falcon_icd_cmd_opc_rreg_f() |
579 falcon_falcon_icd_cmd_idx_f(FALCON_REG_SP));
580 nvgpu_err(g, "FALCON_REG_SP : 0x%x",
581 gk20a_readl(g, base_addr +
582 falcon_falcon_icd_rdata_r()));
583 }
584
585 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
586 falcon_falcon_icd_cmd_opc_rreg_f() |
587 falcon_falcon_icd_cmd_idx_f(FALCON_REG_IMB));
588 nvgpu_err(g, "FALCON_REG_IMB : 0x%x",
589 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
590
591 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
592 falcon_falcon_icd_cmd_opc_rreg_f() |
593 falcon_falcon_icd_cmd_idx_f(FALCON_REG_DMB));
594 nvgpu_err(g, "FALCON_REG_DMB : 0x%x",
595 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
596
597 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
598 falcon_falcon_icd_cmd_opc_rreg_f() |
599 falcon_falcon_icd_cmd_idx_f(FALCON_REG_CSW));
600 nvgpu_err(g, "FALCON_REG_CSW : 0x%x",
601 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
602
603 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
604 falcon_falcon_icd_cmd_opc_rreg_f() |
605 falcon_falcon_icd_cmd_idx_f(FALCON_REG_CTX));
606 nvgpu_err(g, "FALCON_REG_CTX : 0x%x",
607 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
608
609 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
610 falcon_falcon_icd_cmd_opc_rreg_f() |
611 falcon_falcon_icd_cmd_idx_f(FALCON_REG_EXCI));
612 nvgpu_err(g, "FALCON_REG_EXCI : 0x%x",
613 gk20a_readl(g, base_addr + falcon_falcon_icd_rdata_r()));
614
615 for (i = 0; i < 6; i++) {
616 gk20a_writel(g, base_addr + falcon_falcon_icd_cmd_r(),
617 falcon_falcon_icd_cmd_opc_rreg_f() |
618 falcon_falcon_icd_cmd_idx_f(
619 falcon_falcon_icd_cmd_opc_rstat_f()));
620 nvgpu_err(g, "FALCON_REG_RSTAT[%d] : 0x%x", i,
621 gk20a_readl(g, base_addr +
622 falcon_falcon_icd_rdata_r()));
623 }
624
625 nvgpu_err(g, " FALCON REGISTERS DUMP");
626 nvgpu_err(g, "falcon_falcon_os_r : %d",
627 gk20a_readl(g, base_addr + falcon_falcon_os_r()));
628 nvgpu_err(g, "falcon_falcon_cpuctl_r : 0x%x",
629 gk20a_readl(g, base_addr + falcon_falcon_cpuctl_r()));
630 nvgpu_err(g, "falcon_falcon_idlestate_r : 0x%x",
631 gk20a_readl(g, base_addr + falcon_falcon_idlestate_r()));
632 nvgpu_err(g, "falcon_falcon_mailbox0_r : 0x%x",
633 gk20a_readl(g, base_addr + falcon_falcon_mailbox0_r()));
634 nvgpu_err(g, "falcon_falcon_mailbox1_r : 0x%x",
635 gk20a_readl(g, base_addr + falcon_falcon_mailbox1_r()));
636 nvgpu_err(g, "falcon_falcon_irqstat_r : 0x%x",
637 gk20a_readl(g, base_addr + falcon_falcon_irqstat_r()));
638 nvgpu_err(g, "falcon_falcon_irqmode_r : 0x%x",
639 gk20a_readl(g, base_addr + falcon_falcon_irqmode_r()));
640 nvgpu_err(g, "falcon_falcon_irqmask_r : 0x%x",
641 gk20a_readl(g, base_addr + falcon_falcon_irqmask_r()));
642 nvgpu_err(g, "falcon_falcon_irqdest_r : 0x%x",
643 gk20a_readl(g, base_addr + falcon_falcon_irqdest_r()));
644 nvgpu_err(g, "falcon_falcon_debug1_r : 0x%x",
645 gk20a_readl(g, base_addr + falcon_falcon_debug1_r()));
646 nvgpu_err(g, "falcon_falcon_debuginfo_r : 0x%x",
647 gk20a_readl(g, base_addr + falcon_falcon_debuginfo_r()));
648 nvgpu_err(g, "falcon_falcon_bootvec_r : 0x%x",
649 gk20a_readl(g, base_addr + falcon_falcon_bootvec_r()));
650 nvgpu_err(g, "falcon_falcon_hwcfg_r : 0x%x",
651 gk20a_readl(g, base_addr + falcon_falcon_hwcfg_r()));
652 nvgpu_err(g, "falcon_falcon_engctl_r : 0x%x",
653 gk20a_readl(g, base_addr + falcon_falcon_engctl_r()));
654 nvgpu_err(g, "falcon_falcon_curctx_r : 0x%x",
655 gk20a_readl(g, base_addr + falcon_falcon_curctx_r()));
656 nvgpu_err(g, "falcon_falcon_nxtctx_r : 0x%x",
657 gk20a_readl(g, base_addr + falcon_falcon_nxtctx_r()));
658 nvgpu_err(g, "falcon_falcon_exterrstat_r : 0x%x",
659 gk20a_readl(g, base_addr + falcon_falcon_exterrstat_r()));
660 nvgpu_err(g, "falcon_falcon_exterraddr_r : 0x%x",
661 gk20a_readl(g, base_addr + falcon_falcon_exterraddr_r()));
662}
663
664static void gk20a_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn)
665{
666 struct gk20a *g = flcn->g;
667 struct nvgpu_falcon_engine_dependency_ops *flcn_eng_dep_ops =
668 &flcn->flcn_engine_dep_ops;
669
670 switch (flcn->flcn_id) {
671 case FALCON_ID_PMU:
672 flcn_eng_dep_ops->reset_eng = nvgpu_pmu_reset;
673 flcn_eng_dep_ops->queue_head = g->ops.pmu.pmu_queue_head;
674 flcn_eng_dep_ops->queue_tail = g->ops.pmu.pmu_queue_tail;
675 break;
676 default:
677 /* NULL assignment make sure
678 * CPU hard reset in gk20a_flcn_reset() gets execute
679 * if falcon doesn't need specific reset implementation
680 */
681 flcn_eng_dep_ops->reset_eng = NULL;
682 break;
683 }
684}
685
686void gk20a_falcon_ops(struct nvgpu_falcon *flcn)
687{
688 struct nvgpu_falcon_ops *flcn_ops = &flcn->flcn_ops;
689
690 flcn_ops->reset = gk20a_flcn_reset;
691 flcn_ops->set_irq = gk20a_flcn_set_irq;
692 flcn_ops->clear_halt_interrupt_status =
693 gk20a_flcn_clear_halt_interrupt_status;
694 flcn_ops->is_falcon_cpu_halted = gk20a_is_falcon_cpu_halted;
695 flcn_ops->is_falcon_idle = gk20a_is_falcon_idle;
696 flcn_ops->is_falcon_scrubbing_done = gk20a_is_falcon_scrubbing_done;
697 flcn_ops->copy_from_dmem = gk20a_flcn_copy_from_dmem;
698 flcn_ops->copy_to_dmem = gk20a_flcn_copy_to_dmem;
699 flcn_ops->copy_to_imem = gk20a_flcn_copy_to_imem;
700 flcn_ops->copy_from_imem = gk20a_flcn_copy_from_imem;
701 flcn_ops->bootstrap = gk20a_falcon_bootstrap;
702 flcn_ops->dump_falcon_stats = gk20a_falcon_dump_stats;
703 flcn_ops->mailbox_read = gk20a_falcon_mailbox_read;
704 flcn_ops->mailbox_write = gk20a_falcon_mailbox_write;
705 flcn_ops->bl_bootstrap = gk20a_falcon_bl_bootstrap;
706
707 gk20a_falcon_engine_dependency_ops(flcn);
708}
709
710int gk20a_falcon_hal_sw_init(struct nvgpu_falcon *flcn)
711{
712 struct gk20a *g = flcn->g;
713 int err = 0;
714
715 switch (flcn->flcn_id) {
716 case FALCON_ID_PMU:
717 flcn->flcn_base = FALCON_PWR_BASE;
718 flcn->is_falcon_supported = true;
719 flcn->is_interrupt_enabled = true;
720 break;
721 case FALCON_ID_SEC2:
722 flcn->flcn_base = FALCON_SEC_BASE;
723 flcn->is_falcon_supported = false;
724 flcn->is_interrupt_enabled = false;
725 break;
726 case FALCON_ID_FECS:
727 flcn->flcn_base = FALCON_FECS_BASE;
728 flcn->is_falcon_supported = true;
729 flcn->is_interrupt_enabled = false;
730 break;
731 case FALCON_ID_GPCCS:
732 flcn->flcn_base = FALCON_GPCCS_BASE;
733 flcn->is_falcon_supported = true;
734 flcn->is_interrupt_enabled = false;
735 break;
736 case FALCON_ID_NVDEC:
737 flcn->flcn_base = FALCON_NVDEC_BASE;
738 flcn->is_falcon_supported = false;
739 flcn->is_interrupt_enabled = false;
740 break;
741 default:
742 flcn->is_falcon_supported = false;
743 break;
744 }
745
746 if (flcn->is_falcon_supported) {
747 err = nvgpu_mutex_init(&flcn->copy_lock);
748 if (err != 0) {
749 nvgpu_err(g, "Error in flcn.copy_lock mutex initialization");
750 } else {
751 gk20a_falcon_ops(flcn);
752 }
753 } else {
754 nvgpu_log_info(g, "falcon 0x%x not supported on %s",
755 flcn->flcn_id, g->name);
756 }
757
758 return err;
759}
diff --git a/include/gk20a/flcn_gk20a.h b/include/gk20a/flcn_gk20a.h
new file mode 100644
index 0000000..9d27b38
--- /dev/null
+++ b/include/gk20a/flcn_gk20a.h
@@ -0,0 +1,29 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#ifndef NVGPU_GK20A_FLCN_GK20A_H
23#define NVGPU_GK20A_FLCN_GK20A_H
24
25void gk20a_falcon_ops(struct nvgpu_falcon *flcn);
26int gk20a_falcon_hal_sw_init(struct nvgpu_falcon *flcn);
27void gk20a_falcon_dump_stats(struct nvgpu_falcon *flcn);
28
29#endif /* NVGPU_GK20A_FLCN_GK20A_H */
diff --git a/include/gk20a/gk20a.c b/include/gk20a/gk20a.c
new file mode 100644
index 0000000..c3068b7
--- /dev/null
+++ b/include/gk20a/gk20a.c
@@ -0,0 +1,590 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/allocator.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/soc.h>
30#include <nvgpu/enabled.h>
31#include <nvgpu/pmu.h>
32#include <nvgpu/gmmu.h>
33#include <nvgpu/ltc.h>
34#include <nvgpu/vidmem.h>
35#include <nvgpu/mm.h>
36#include <nvgpu/ctxsw_trace.h>
37#include <nvgpu/soc.h>
38#include <nvgpu/clk_arb.h>
39#include <nvgpu/therm.h>
40#include <nvgpu/mc.h>
41#include <nvgpu/channel_sync.h>
42
43#include <trace/events/gk20a.h>
44
45#include "gk20a.h"
46
47#include "dbg_gpu_gk20a.h"
48#include "pstate/pstate.h"
49
50void __nvgpu_check_gpu_state(struct gk20a *g)
51{
52 u32 boot_0 = 0xffffffff;
53
54 boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL);
55 if (boot_0 == 0xffffffff) {
56 nvgpu_err(g, "GPU has disappeared from bus!!");
57 nvgpu_err(g, "Rebooting system!!");
58 nvgpu_kernel_restart(NULL);
59 }
60}
61
62void __gk20a_warn_on_no_regs(void)
63{
64 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
65}
66
67static void gk20a_mask_interrupts(struct gk20a *g)
68{
69 if (g->ops.mc.intr_mask != NULL) {
70 g->ops.mc.intr_mask(g);
71 }
72
73 if (g->ops.mc.log_pending_intrs != NULL) {
74 g->ops.mc.log_pending_intrs(g);
75 }
76}
77
78int gk20a_prepare_poweroff(struct gk20a *g)
79{
80 int ret = 0;
81
82 nvgpu_log_fn(g, " ");
83
84 if (g->ops.fifo.channel_suspend) {
85 ret = g->ops.fifo.channel_suspend(g);
86 if (ret) {
87 return ret;
88 }
89 }
90
91 /* disable elpg before gr or fifo suspend */
92 if (g->ops.pmu.is_pmu_supported(g)) {
93 ret |= nvgpu_pmu_destroy(g);
94 }
95
96 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
97 ret |= nvgpu_sec2_destroy(g);
98 }
99
100 ret |= gk20a_gr_suspend(g);
101 ret |= nvgpu_mm_suspend(g);
102 ret |= gk20a_fifo_suspend(g);
103
104 gk20a_ce_suspend(g);
105
106 /* Disable GPCPLL */
107 if (g->ops.clk.suspend_clk_support) {
108 ret |= g->ops.clk.suspend_clk_support(g);
109 }
110
111 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
112 gk20a_deinit_pstate_support(g);
113 }
114
115 gk20a_mask_interrupts(g);
116
117 g->power_on = false;
118
119 return ret;
120}
121
122int gk20a_finalize_poweron(struct gk20a *g)
123{
124 int err = 0;
125#if defined(CONFIG_TEGRA_GK20A_NVHOST)
126 u32 nr_pages;
127#endif
128
129 u32 fuse_status;
130
131 nvgpu_log_fn(g, " ");
132
133 if (g->power_on) {
134 return 0;
135 }
136
137 g->power_on = true;
138
139 /*
140 * Before probing the GPU make sure the GPU's state is cleared. This is
141 * relevant for rebind operations.
142 */
143 if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
144 g->ops.xve.reset_gpu(g);
145 g->gpu_reset_done = true;
146 }
147
148 if (g->ops.clock_gating.slcg_acb_load_gating_prod != NULL) {
149 g->ops.clock_gating.slcg_acb_load_gating_prod(g, true);
150 }
151
152 /*
153 * Do this early so any early VMs that get made are capable of mapping
154 * buffers.
155 */
156 err = nvgpu_pd_cache_init(g);
157 if (err) {
158 return err;
159 }
160
161 /* init interface layer support for PMU falcon */
162 err = nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
163 if (err != 0) {
164 nvgpu_err(g, "failed to sw init FALCON_ID_PMU");
165 goto done;
166 }
167 err = nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
168 if (err != 0) {
169 nvgpu_err(g, "failed to sw init FALCON_ID_SEC2");
170 goto done;
171 }
172 err = nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
173 if (err != 0) {
174 nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC");
175 goto done;
176 }
177 err = nvgpu_flcn_sw_init(g, FALCON_ID_GSPLITE);
178 if (err != 0) {
179 nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE");
180 goto done;
181 }
182
183 if (g->ops.acr.acr_sw_init != NULL &&
184 nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
185 g->ops.acr.acr_sw_init(g, &g->acr);
186 }
187
188 if (g->ops.bios.init) {
189 err = g->ops.bios.init(g);
190 }
191 if (err) {
192 goto done;
193 }
194
195 g->ops.bus.init_hw(g);
196
197 if (g->ops.clk.disable_slowboot) {
198 g->ops.clk.disable_slowboot(g);
199 }
200
201 g->ops.priv_ring.enable_priv_ring(g);
202
203 /* TBD: move this after graphics init in which blcg/slcg is enabled.
204 This function removes SlowdownOnBoot which applies 32x divider
205 on gpcpll bypass path. The purpose of slowdown is to save power
206 during boot but it also significantly slows down gk20a init on
207 simulation and emulation. We should remove SOB after graphics power
208 saving features (blcg/slcg) are enabled. For now, do it here. */
209 if (g->ops.clk.init_clk_support) {
210 err = g->ops.clk.init_clk_support(g);
211 if (err) {
212 nvgpu_err(g, "failed to init gk20a clk");
213 goto done;
214 }
215 }
216
217 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
218 err = g->ops.nvlink.init(g);
219 if (err) {
220 nvgpu_err(g, "failed to init nvlink");
221 goto done;
222 }
223 }
224
225 if (g->ops.fb.init_fbpa) {
226 err = g->ops.fb.init_fbpa(g);
227 if (err) {
228 nvgpu_err(g, "failed to init fbpa");
229 goto done;
230 }
231 }
232
233 if (g->ops.fb.mem_unlock) {
234 err = g->ops.fb.mem_unlock(g);
235 if (err) {
236 nvgpu_err(g, "failed to unlock memory");
237 goto done;
238 }
239 }
240
241 err = g->ops.fifo.reset_enable_hw(g);
242
243 if (err) {
244 nvgpu_err(g, "failed to reset gk20a fifo");
245 goto done;
246 }
247
248 err = nvgpu_init_ltc_support(g);
249 if (err) {
250 nvgpu_err(g, "failed to init ltc");
251 goto done;
252 }
253
254 err = nvgpu_init_mm_support(g);
255 if (err) {
256 nvgpu_err(g, "failed to init gk20a mm");
257 goto done;
258 }
259
260 err = gk20a_init_fifo_support(g);
261 if (err) {
262 nvgpu_err(g, "failed to init gk20a fifo");
263 goto done;
264 }
265
266 if (g->ops.therm.elcg_init_idle_filters) {
267 g->ops.therm.elcg_init_idle_filters(g);
268 }
269
270 g->ops.mc.intr_enable(g);
271
272 /*
273 * Power gate the chip as per the TPC PG mask
274 * and the fuse_status register.
275 * If TPC PG mask is invalid halt the GPU poweron.
276 */
277 g->can_tpc_powergate = false;
278 fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0);
279
280 if (g->ops.tpc.tpc_powergate) {
281 err = g->ops.tpc.tpc_powergate(g, fuse_status);
282 }
283
284 if (err) {
285 nvgpu_err(g, "failed to power ON GPU");
286 goto done;
287 }
288
289 nvgpu_mutex_acquire(&g->tpc_pg_lock);
290
291 if (g->can_tpc_powergate) {
292 if (g->ops.gr.powergate_tpc != NULL)
293 g->ops.gr.powergate_tpc(g);
294 }
295
296 err = gk20a_enable_gr_hw(g);
297 if (err) {
298 nvgpu_err(g, "failed to enable gr");
299 nvgpu_mutex_release(&g->tpc_pg_lock);
300 goto done;
301 }
302
303 if (g->ops.pmu.is_pmu_supported(g)) {
304 if (g->ops.pmu.prepare_ucode) {
305 err = g->ops.pmu.prepare_ucode(g);
306 }
307 if (err) {
308 nvgpu_err(g, "failed to init pmu ucode");
309 nvgpu_mutex_release(&g->tpc_pg_lock);
310 goto done;
311 }
312 }
313
314 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
315 err = gk20a_init_pstate_support(g);
316 if (err) {
317 nvgpu_err(g, "failed to init pstates");
318 nvgpu_mutex_release(&g->tpc_pg_lock);
319 goto done;
320 }
321 }
322
323 if (g->acr.bootstrap_hs_acr != NULL &&
324 nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
325 err = g->acr.bootstrap_hs_acr(g, &g->acr, &g->acr.acr);
326 if (err != 0) {
327 nvgpu_err(g, "ACR bootstrap failed");
328 nvgpu_mutex_release(&g->tpc_pg_lock);
329 goto done;
330 }
331 }
332
333 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
334 err = nvgpu_init_sec2_support(g);
335 if (err != 0) {
336 nvgpu_err(g, "failed to init sec2");
337 nvgpu_mutex_release(&g->tpc_pg_lock);
338 goto done;
339 }
340 }
341
342 if (g->ops.pmu.is_pmu_supported(g)) {
343 err = nvgpu_init_pmu_support(g);
344 if (err) {
345 nvgpu_err(g, "failed to init gk20a pmu");
346 nvgpu_mutex_release(&g->tpc_pg_lock);
347 goto done;
348 }
349 }
350
351 err = gk20a_init_gr_support(g);
352 if (err) {
353 nvgpu_err(g, "failed to init gk20a gr");
354 nvgpu_mutex_release(&g->tpc_pg_lock);
355 goto done;
356 }
357
358 nvgpu_mutex_release(&g->tpc_pg_lock);
359
360 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
361 err = gk20a_init_pstate_pmu_support(g);
362 if (err) {
363 nvgpu_err(g, "failed to init pstates");
364 goto done;
365 }
366 }
367
368 if (g->ops.pmu_ver.clk.clk_set_boot_clk && nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
369 g->ops.pmu_ver.clk.clk_set_boot_clk(g);
370 } else {
371 err = nvgpu_clk_arb_init_arbiter(g);
372 if (err) {
373 nvgpu_err(g, "failed to init clk arb");
374 goto done;
375 }
376 }
377
378 err = nvgpu_init_therm_support(g);
379 if (err) {
380 nvgpu_err(g, "failed to init gk20a therm");
381 goto done;
382 }
383
384 err = g->ops.chip_init_gpu_characteristics(g);
385 if (err) {
386 nvgpu_err(g, "failed to init gk20a gpu characteristics");
387 goto done;
388 }
389
390#ifdef CONFIG_GK20A_CTXSW_TRACE
391 err = gk20a_ctxsw_trace_init(g);
392 if (err)
393 nvgpu_warn(g, "could not initialize ctxsw tracing");
394#endif
395
396 /* Restore the debug setting */
397 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
398
399 gk20a_init_ce_support(g);
400
401 if (g->ops.xve.available_speeds) {
402 u32 speed;
403
404 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) {
405 g->ops.xve.disable_aspm(g);
406 }
407
408 g->ops.xve.available_speeds(g, &speed);
409
410 /* Set to max speed */
411 speed = 1 << (fls(speed) - 1);
412 err = g->ops.xve.set_speed(g, speed);
413 if (err) {
414 nvgpu_err(g, "Failed to set PCIe bus speed!");
415 goto done;
416 }
417 }
418
419#if defined(CONFIG_TEGRA_GK20A_NVHOST)
420 if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) {
421 if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
422 nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
423 __nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
424 g->syncpt_unit_base, nr_pages);
425 }
426 }
427#endif
428
429 if (g->ops.fifo.channel_resume) {
430 g->ops.fifo.channel_resume(g);
431 }
432
433done:
434 if (err) {
435 g->power_on = false;
436 }
437
438 return err;
439}
440
441int gk20a_wait_for_idle(struct gk20a *g)
442{
443 int wait_length = 150; /* 3 second overall max wait. */
444 int target_usage_count = 0;
445
446 if (!g) {
447 return -ENODEV;
448 }
449
450 while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
451 && (wait_length-- >= 0)) {
452 nvgpu_msleep(20);
453 }
454
455 if (wait_length < 0) {
456 nvgpu_warn(g, "Timed out waiting for idle (%d)!\n",
457 nvgpu_atomic_read(&g->usage_count));
458 return -ETIMEDOUT;
459 }
460
461 return 0;
462}
463
464int gk20a_init_gpu_characteristics(struct gk20a *g)
465{
466 __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
467 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
468 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
469
470 if (IS_ENABLED(CONFIG_SYNC)) {
471 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
472 }
473
474 if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) {
475 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
476 }
477
478 /*
479 * Fast submits are supported as long as the user doesn't request
480 * anything that depends on job tracking. (Here, fast means strictly no
481 * metadata, just the gpfifo contents are copied and gp_put updated).
482 */
483 __nvgpu_set_enabled(g,
484 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
485 true);
486
487 /*
488 * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
489 * and other heavy stuff, which prevents deterministic submits. This is
490 * supported otherwise, provided that the user doesn't request anything
491 * that depends on deferred cleanup.
492 */
493 if (!nvgpu_channel_sync_needs_os_fence_framework(g)) {
494 __nvgpu_set_enabled(g,
495 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
496 true);
497 }
498
499 __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
500
501 __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
502 __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
503
504 if (g->ops.clk_arb.get_arbiter_clk_domains != NULL &&
505 g->ops.clk.support_clk_freq_controller) {
506 __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
507 }
508
509 g->ops.gr.detect_sm_arch(g);
510
511 if (g->ops.gr.init_cyclestats) {
512 g->ops.gr.init_cyclestats(g);
513 }
514
515 g->ops.gr.get_rop_l2_en_mask(g);
516
517 return 0;
518}
519
520/*
521 * Free the gk20a struct.
522 */
523static void gk20a_free_cb(struct nvgpu_ref *refcount)
524{
525 struct gk20a *g = container_of(refcount,
526 struct gk20a, refcount);
527
528 nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!");
529
530 gk20a_ce_destroy(g);
531
532 if (g->remove_support) {
533 g->remove_support(g);
534 }
535
536 if (g->free) {
537 g->free(g);
538 }
539}
540
541/**
542 * gk20a_get() - Increment ref count on driver
543 *
544 * @g The driver to increment
545 * This will fail if the driver is in the process of being released. In that
546 * case it will return NULL. Otherwise a pointer to the driver passed in will
547 * be returned.
548 */
549struct gk20a * __must_check gk20a_get(struct gk20a *g)
550{
551 int success;
552
553 /*
554 * Handle the possibility we are still freeing the gk20a struct while
555 * gk20a_get() is called. Unlikely but plausible race condition. Ideally
556 * the code will never be in such a situation that this race is
557 * possible.
558 */
559 success = nvgpu_ref_get_unless_zero(&g->refcount);
560
561 nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s",
562 nvgpu_atomic_read(&g->refcount.refcount),
563 success ? "" : "(FAILED)");
564
565 return success ? g : NULL;
566}
567
568/**
569 * gk20a_put() - Decrement ref count on driver
570 *
571 * @g - The driver to decrement
572 *
573 * Decrement the driver ref-count. If neccesary also free the underlying driver
574 * memory
575 */
576void gk20a_put(struct gk20a *g)
577{
578 /*
579 * Note - this is racy, two instances of this could run before the
580 * actual kref_put(0 runs, you could see something like:
581 *
582 * ... PUT: refs currently 2
583 * ... PUT: refs currently 2
584 * ... Freeing GK20A struct!
585 */
586 nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d",
587 nvgpu_atomic_read(&g->refcount.refcount));
588
589 nvgpu_ref_put(&g->refcount, gk20a_free_cb);
590}
diff --git a/include/gk20a/gk20a.h b/include/gk20a/gk20a.h
new file mode 100644
index 0000000..16a2453
--- /dev/null
+++ b/include/gk20a/gk20a.h
@@ -0,0 +1,33 @@
1/*
2 * This file is used as a temporary redirection header for <nvgpu/gk20a.h>
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * GK20A Graphics
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27#ifndef GK20A_GK20A_H
28#define GK20A_GK20A_H
29
30/* no new headers should be added here */
31#include <nvgpu/gk20a.h>
32
33#endif
diff --git a/include/gk20a/gr_ctx_gk20a.c b/include/gk20a/gr_ctx_gk20a.c
new file mode 100644
index 0000000..8b9ac32
--- /dev/null
+++ b/include/gk20a/gr_ctx_gk20a.c
@@ -0,0 +1,486 @@
1/*
2 * GK20A Graphics Context
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28#include <nvgpu/firmware.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/io.h>
31
32#include "gk20a.h"
33#include "gr_ctx_gk20a.h"
34
35#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
36
37static int gr_gk20a_alloc_load_netlist_u32(struct gk20a *g, u32 *src, u32 len,
38 struct u32_list_gk20a *u32_list)
39{
40 u32_list->count = (len + sizeof(u32) - 1) / sizeof(u32);
41 if (!alloc_u32_list_gk20a(g, u32_list)) {
42 return -ENOMEM;
43 }
44
45 memcpy(u32_list->l, src, len);
46
47 return 0;
48}
49
50static int gr_gk20a_alloc_load_netlist_av(struct gk20a *g, u32 *src, u32 len,
51 struct av_list_gk20a *av_list)
52{
53 av_list->count = len / sizeof(struct av_gk20a);
54 if (!alloc_av_list_gk20a(g, av_list)) {
55 return -ENOMEM;
56 }
57
58 memcpy(av_list->l, src, len);
59
60 return 0;
61}
62
63static int gr_gk20a_alloc_load_netlist_av64(struct gk20a *g, u32 *src, u32 len,
64 struct av64_list_gk20a *av64_list)
65{
66 av64_list->count = len / sizeof(struct av64_gk20a);
67 if (!alloc_av64_list_gk20a(g, av64_list)) {
68 return -ENOMEM;
69 }
70
71 memcpy(av64_list->l, src, len);
72
73 return 0;
74}
75
76static int gr_gk20a_alloc_load_netlist_aiv(struct gk20a *g, u32 *src, u32 len,
77 struct aiv_list_gk20a *aiv_list)
78{
79 aiv_list->count = len / sizeof(struct aiv_gk20a);
80 if (!alloc_aiv_list_gk20a(g, aiv_list)) {
81 return -ENOMEM;
82 }
83
84 memcpy(aiv_list->l, src, len);
85
86 return 0;
87}
88
89static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
90{
91 struct nvgpu_firmware *netlist_fw;
92 struct netlist_image *netlist = NULL;
93 char name[MAX_NETLIST_NAME];
94 u32 i, major_v = ~0, major_v_hw, netlist_num;
95 int net, max, err = -ENOENT;
96
97 nvgpu_log_fn(g, " ");
98
99 if (g->ops.gr_ctx.is_fw_defined()) {
100 net = NETLIST_FINAL;
101 max = 0;
102 major_v_hw = ~0;
103 g->gr.ctx_vars.dynamic = false;
104 } else {
105 net = NETLIST_SLOT_A;
106 max = MAX_NETLIST;
107 major_v_hw = gk20a_readl(g,
108 gr_fecs_ctx_state_store_major_rev_id_r());
109 g->gr.ctx_vars.dynamic = true;
110 }
111
112 for (; net < max; net++) {
113 if (g->ops.gr_ctx.get_netlist_name(g, net, name) != 0) {
114 nvgpu_warn(g, "invalid netlist index %d", net);
115 continue;
116 }
117
118 netlist_fw = nvgpu_request_firmware(g, name, 0);
119 if (!netlist_fw) {
120 nvgpu_warn(g, "failed to load netlist %s", name);
121 continue;
122 }
123
124 netlist = (struct netlist_image *)netlist_fw->data;
125
126 for (i = 0; i < netlist->header.regions; i++) {
127 u32 *src = (u32 *)((u8 *)netlist + netlist->regions[i].data_offset);
128 u32 size = netlist->regions[i].data_size;
129
130 switch (netlist->regions[i].region_id) {
131 case NETLIST_REGIONID_FECS_UCODE_DATA:
132 nvgpu_log_info(g, "NETLIST_REGIONID_FECS_UCODE_DATA");
133 err = gr_gk20a_alloc_load_netlist_u32(g,
134 src, size, &g->gr.ctx_vars.ucode.fecs.data);
135 if (err) {
136 goto clean_up;
137 }
138 break;
139 case NETLIST_REGIONID_FECS_UCODE_INST:
140 nvgpu_log_info(g, "NETLIST_REGIONID_FECS_UCODE_INST");
141 err = gr_gk20a_alloc_load_netlist_u32(g,
142 src, size, &g->gr.ctx_vars.ucode.fecs.inst);
143 if (err) {
144 goto clean_up;
145 }
146 break;
147 case NETLIST_REGIONID_GPCCS_UCODE_DATA:
148 nvgpu_log_info(g, "NETLIST_REGIONID_GPCCS_UCODE_DATA");
149 err = gr_gk20a_alloc_load_netlist_u32(g,
150 src, size, &g->gr.ctx_vars.ucode.gpccs.data);
151 if (err) {
152 goto clean_up;
153 }
154 break;
155 case NETLIST_REGIONID_GPCCS_UCODE_INST:
156 nvgpu_log_info(g, "NETLIST_REGIONID_GPCCS_UCODE_INST");
157 err = gr_gk20a_alloc_load_netlist_u32(g,
158 src, size, &g->gr.ctx_vars.ucode.gpccs.inst);
159 if (err) {
160 goto clean_up;
161 }
162 break;
163 case NETLIST_REGIONID_SW_BUNDLE_INIT:
164 nvgpu_log_info(g, "NETLIST_REGIONID_SW_BUNDLE_INIT");
165 err = gr_gk20a_alloc_load_netlist_av(g,
166 src, size, &g->gr.ctx_vars.sw_bundle_init);
167 if (err) {
168 goto clean_up;
169 }
170 break;
171 case NETLIST_REGIONID_SW_METHOD_INIT:
172 nvgpu_log_info(g, "NETLIST_REGIONID_SW_METHOD_INIT");
173 err = gr_gk20a_alloc_load_netlist_av(g,
174 src, size, &g->gr.ctx_vars.sw_method_init);
175 if (err) {
176 goto clean_up;
177 }
178 break;
179 case NETLIST_REGIONID_SW_CTX_LOAD:
180 nvgpu_log_info(g, "NETLIST_REGIONID_SW_CTX_LOAD");
181 err = gr_gk20a_alloc_load_netlist_aiv(g,
182 src, size, &g->gr.ctx_vars.sw_ctx_load);
183 if (err) {
184 goto clean_up;
185 }
186 break;
187 case NETLIST_REGIONID_SW_NON_CTX_LOAD:
188 nvgpu_log_info(g, "NETLIST_REGIONID_SW_NON_CTX_LOAD");
189 err = gr_gk20a_alloc_load_netlist_av(g,
190 src, size, &g->gr.ctx_vars.sw_non_ctx_load);
191 if (err) {
192 goto clean_up;
193 }
194 break;
195 case NETLIST_REGIONID_SWVEIDBUNDLEINIT:
196 nvgpu_log_info(g,
197 "NETLIST_REGIONID_SW_VEID_BUNDLE_INIT");
198 err = gr_gk20a_alloc_load_netlist_av(g,
199 src, size,
200 &g->gr.ctx_vars.sw_veid_bundle_init);
201 if (err) {
202 goto clean_up;
203 }
204 break;
205 case NETLIST_REGIONID_CTXREG_SYS:
206 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_SYS");
207 err = gr_gk20a_alloc_load_netlist_aiv(g,
208 src, size, &g->gr.ctx_vars.ctxsw_regs.sys);
209 if (err) {
210 goto clean_up;
211 }
212 break;
213 case NETLIST_REGIONID_CTXREG_GPC:
214 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_GPC");
215 err = gr_gk20a_alloc_load_netlist_aiv(g,
216 src, size, &g->gr.ctx_vars.ctxsw_regs.gpc);
217 if (err) {
218 goto clean_up;
219 }
220 break;
221 case NETLIST_REGIONID_CTXREG_TPC:
222 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_TPC");
223 err = gr_gk20a_alloc_load_netlist_aiv(g,
224 src, size, &g->gr.ctx_vars.ctxsw_regs.tpc);
225 if (err) {
226 goto clean_up;
227 }
228 break;
229 case NETLIST_REGIONID_CTXREG_ZCULL_GPC:
230 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_ZCULL_GPC");
231 err = gr_gk20a_alloc_load_netlist_aiv(g,
232 src, size, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
233 if (err) {
234 goto clean_up;
235 }
236 break;
237 case NETLIST_REGIONID_CTXREG_PPC:
238 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PPC");
239 err = gr_gk20a_alloc_load_netlist_aiv(g,
240 src, size, &g->gr.ctx_vars.ctxsw_regs.ppc);
241 if (err) {
242 goto clean_up;
243 }
244 break;
245 case NETLIST_REGIONID_CTXREG_PM_SYS:
246 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_SYS");
247 err = gr_gk20a_alloc_load_netlist_aiv(g,
248 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_sys);
249 if (err) {
250 goto clean_up;
251 }
252 break;
253 case NETLIST_REGIONID_CTXREG_PM_GPC:
254 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_GPC");
255 err = gr_gk20a_alloc_load_netlist_aiv(g,
256 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_gpc);
257 if (err) {
258 goto clean_up;
259 }
260 break;
261 case NETLIST_REGIONID_CTXREG_PM_TPC:
262 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_TPC");
263 err = gr_gk20a_alloc_load_netlist_aiv(g,
264 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_tpc);
265 if (err) {
266 goto clean_up;
267 }
268 break;
269 case NETLIST_REGIONID_BUFFER_SIZE:
270 g->gr.ctx_vars.buffer_size = *src;
271 nvgpu_log_info(g, "NETLIST_REGIONID_BUFFER_SIZE : %d",
272 g->gr.ctx_vars.buffer_size);
273 break;
274 case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX:
275 g->gr.ctx_vars.regs_base_index = *src;
276 nvgpu_log_info(g, "NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %u",
277 g->gr.ctx_vars.regs_base_index);
278 break;
279 case NETLIST_REGIONID_MAJORV:
280 major_v = *src;
281 nvgpu_log_info(g, "NETLIST_REGIONID_MAJORV : %d",
282 major_v);
283 break;
284 case NETLIST_REGIONID_NETLIST_NUM:
285 netlist_num = *src;
286 nvgpu_log_info(g, "NETLIST_REGIONID_NETLIST_NUM : %d",
287 netlist_num);
288 break;
289 case NETLIST_REGIONID_CTXREG_PMPPC:
290 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMPPC");
291 err = gr_gk20a_alloc_load_netlist_aiv(g,
292 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ppc);
293 if (err) {
294 goto clean_up;
295 }
296 break;
297 case NETLIST_REGIONID_NVPERF_CTXREG_SYS:
298 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_CTXREG_SYS");
299 err = gr_gk20a_alloc_load_netlist_aiv(g,
300 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys);
301 if (err) {
302 goto clean_up;
303 }
304 break;
305 case NETLIST_REGIONID_NVPERF_FBP_CTXREGS:
306 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_CTXREGS");
307 err = gr_gk20a_alloc_load_netlist_aiv(g,
308 src, size, &g->gr.ctx_vars.ctxsw_regs.fbp);
309 if (err) {
310 goto clean_up;
311 }
312 break;
313 case NETLIST_REGIONID_NVPERF_CTXREG_GPC:
314 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_CTXREG_GPC");
315 err = gr_gk20a_alloc_load_netlist_aiv(g,
316 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_gpc);
317 if (err) {
318 goto clean_up;
319 }
320 break;
321 case NETLIST_REGIONID_NVPERF_FBP_ROUTER:
322 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_ROUTER");
323 err = gr_gk20a_alloc_load_netlist_aiv(g,
324 src, size, &g->gr.ctx_vars.ctxsw_regs.fbp_router);
325 if (err) {
326 goto clean_up;
327 }
328 break;
329 case NETLIST_REGIONID_NVPERF_GPC_ROUTER:
330 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_GPC_ROUTER");
331 err = gr_gk20a_alloc_load_netlist_aiv(g,
332 src, size, &g->gr.ctx_vars.ctxsw_regs.gpc_router);
333 if (err) {
334 goto clean_up;
335 }
336 break;
337 case NETLIST_REGIONID_CTXREG_PMLTC:
338 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMLTC");
339 err = gr_gk20a_alloc_load_netlist_aiv(g,
340 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ltc);
341 if (err) {
342 goto clean_up;
343 }
344 break;
345 case NETLIST_REGIONID_CTXREG_PMFBPA:
346 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMFBPA");
347 err = gr_gk20a_alloc_load_netlist_aiv(g,
348 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_fbpa);
349 if (err) {
350 goto clean_up;
351 }
352 break;
353 case NETLIST_REGIONID_NVPERF_SYS_ROUTER:
354 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_SYS_ROUTER");
355 err = gr_gk20a_alloc_load_netlist_aiv(g,
356 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router);
357 if (err) {
358 goto clean_up;
359 }
360 break;
361 case NETLIST_REGIONID_NVPERF_PMA:
362 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMA");
363 err = gr_gk20a_alloc_load_netlist_aiv(g,
364 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_pma);
365 if (err) {
366 goto clean_up;
367 }
368 break;
369 case NETLIST_REGIONID_CTXREG_PMROP:
370 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMROP");
371 err = gr_gk20a_alloc_load_netlist_aiv(g,
372 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_rop);
373 if (err) {
374 goto clean_up;
375 }
376 break;
377 case NETLIST_REGIONID_CTXREG_PMUCGPC:
378 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMUCGPC");
379 err = gr_gk20a_alloc_load_netlist_aiv(g,
380 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc);
381 if (err) {
382 goto clean_up;
383 }
384 break;
385 case NETLIST_REGIONID_CTXREG_ETPC:
386 nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_ETPC");
387 err = gr_gk20a_alloc_load_netlist_aiv(g,
388 src, size, &g->gr.ctx_vars.ctxsw_regs.etpc);
389 if (err) {
390 goto clean_up;
391 }
392 break;
393 case NETLIST_REGIONID_SW_BUNDLE64_INIT:
394 nvgpu_log_info(g, "NETLIST_REGIONID_SW_BUNDLE64_INIT");
395 err = gr_gk20a_alloc_load_netlist_av64(g,
396 src, size,
397 &g->gr.ctx_vars.sw_bundle64_init);
398 if (err) {
399 goto clean_up;
400 }
401 break;
402 case NETLIST_REGIONID_NVPERF_PMCAU:
403 nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMCAU");
404 err = gr_gk20a_alloc_load_netlist_aiv(g,
405 src, size,
406 &g->gr.ctx_vars.ctxsw_regs.pm_cau);
407 if (err) {
408 goto clean_up;
409 }
410 break;
411
412 default:
413 nvgpu_log_info(g, "unrecognized region %d skipped", i);
414 break;
415 }
416 }
417
418 if (net != NETLIST_FINAL && major_v != major_v_hw) {
419 nvgpu_log_info(g, "skip %s: major_v 0x%08x doesn't match hw 0x%08x",
420 name, major_v, major_v_hw);
421 goto clean_up;
422 }
423
424 g->gr.ctx_vars.valid = true;
425 g->gr.netlist = net;
426
427 nvgpu_release_firmware(g, netlist_fw);
428 nvgpu_log_fn(g, "done");
429 goto done;
430
431clean_up:
432 g->gr.ctx_vars.valid = false;
433 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.inst.l);
434 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.data.l);
435 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.inst.l);
436 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.data.l);
437 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle_init.l);
438 nvgpu_kfree(g, g->gr.ctx_vars.sw_method_init.l);
439 nvgpu_kfree(g, g->gr.ctx_vars.sw_ctx_load.l);
440 nvgpu_kfree(g, g->gr.ctx_vars.sw_non_ctx_load.l);
441 nvgpu_kfree(g, g->gr.ctx_vars.sw_veid_bundle_init.l);
442 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.sys.l);
443 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.gpc.l);
444 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.tpc.l);
445 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l);
446 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.ppc.l);
447 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
448 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
449 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
450 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_ppc.l);
451 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_sys.l);
452 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.fbp.l);
453 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_gpc.l);
454 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.fbp_router.l);
455 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.gpc_router.l);
456 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_ltc.l);
457 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
458 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_sys_router.l);
459 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.perf_pma.l);
460 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_rop.l);
461 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_ucgpc.l);
462 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.etpc.l);
463 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle64_init.l);
464 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_cau.l);
465 nvgpu_release_firmware(g, netlist_fw);
466 err = -ENOENT;
467 }
468
469done:
470 if (g->gr.ctx_vars.valid) {
471 nvgpu_log_info(g, "netlist image %s loaded", name);
472 return 0;
473 } else {
474 nvgpu_err(g, "failed to load netlist image!!");
475 return err;
476 }
477}
478
479int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr)
480{
481 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
482 return gr_gk20a_init_ctx_vars_sim(g, gr);
483 } else {
484 return gr_gk20a_init_ctx_vars_fw(g, gr);
485 }
486}
diff --git a/include/gk20a/gr_ctx_gk20a.h b/include/gk20a/gr_ctx_gk20a.h
new file mode 100644
index 0000000..e75472c
--- /dev/null
+++ b/include/gk20a/gr_ctx_gk20a.h
@@ -0,0 +1,206 @@
1/*
2 * GK20A Graphics Context
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef NVGPU_GK20A_GR_CTX_GK20A_H
25#define NVGPU_GK20A_GR_CTX_GK20A_H
26
27#include <nvgpu/kmem.h>
28
29struct gr_gk20a;
30
31/* emulation netlists, match majorV with HW */
32#define GK20A_NETLIST_IMAGE_A "NETA_img.bin"
33#define GK20A_NETLIST_IMAGE_B "NETB_img.bin"
34#define GK20A_NETLIST_IMAGE_C "NETC_img.bin"
35#define GK20A_NETLIST_IMAGE_D "NETD_img.bin"
36
37/*
38 * Need to support multiple ARCH in same GPU family
39 * then need to provide path like ARCH/NETIMAGE to
40 * point to correct netimage within GPU family,
41 * Example, gm20x can support gm204 or gm206,so path
42 * for netimage is gm204/NETC_img.bin, and '/' char
43 * will inserted at null terminator char of "GAxxx"
44 * to get complete path like gm204/NETC_img.bin
45 */
46#define GPU_ARCH "GAxxx"
47
48union __max_name {
49#ifdef GK20A_NETLIST_IMAGE_A
50 char __name_a[sizeof(GK20A_NETLIST_IMAGE_A)];
51#endif
52#ifdef GK20A_NETLIST_IMAGE_B
53 char __name_b[sizeof(GK20A_NETLIST_IMAGE_B)];
54#endif
55#ifdef GK20A_NETLIST_IMAGE_C
56 char __name_c[sizeof(GK20A_NETLIST_IMAGE_C)];
57#endif
58#ifdef GK20A_NETLIST_IMAGE_D
59 char __name_d[sizeof(GK20A_NETLIST_IMAGE_D)];
60#endif
61};
62
63#define MAX_NETLIST_NAME (sizeof(GPU_ARCH) + sizeof(union __max_name))
64
65/* index for emulation netlists */
66#define NETLIST_FINAL -1
67#define NETLIST_SLOT_A 0
68#define NETLIST_SLOT_B 1
69#define NETLIST_SLOT_C 2
70#define NETLIST_SLOT_D 3
71#define MAX_NETLIST 4
72
73/* netlist regions */
74#define NETLIST_REGIONID_FECS_UCODE_DATA 0
75#define NETLIST_REGIONID_FECS_UCODE_INST 1
76#define NETLIST_REGIONID_GPCCS_UCODE_DATA 2
77#define NETLIST_REGIONID_GPCCS_UCODE_INST 3
78#define NETLIST_REGIONID_SW_BUNDLE_INIT 4
79#define NETLIST_REGIONID_SW_CTX_LOAD 5
80#define NETLIST_REGIONID_SW_NON_CTX_LOAD 6
81#define NETLIST_REGIONID_SW_METHOD_INIT 7
82#define NETLIST_REGIONID_CTXREG_SYS 8
83#define NETLIST_REGIONID_CTXREG_GPC 9
84#define NETLIST_REGIONID_CTXREG_TPC 10
85#define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11
86#define NETLIST_REGIONID_CTXREG_PM_SYS 12
87#define NETLIST_REGIONID_CTXREG_PM_GPC 13
88#define NETLIST_REGIONID_CTXREG_PM_TPC 14
89#define NETLIST_REGIONID_MAJORV 15
90#define NETLIST_REGIONID_BUFFER_SIZE 16
91#define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17
92#define NETLIST_REGIONID_NETLIST_NUM 18
93#define NETLIST_REGIONID_CTXREG_PPC 19
94#define NETLIST_REGIONID_CTXREG_PMPPC 20
95#define NETLIST_REGIONID_NVPERF_CTXREG_SYS 21
96#define NETLIST_REGIONID_NVPERF_FBP_CTXREGS 22
97#define NETLIST_REGIONID_NVPERF_CTXREG_GPC 23
98#define NETLIST_REGIONID_NVPERF_FBP_ROUTER 24
99#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25
100#define NETLIST_REGIONID_CTXREG_PMLTC 26
101#define NETLIST_REGIONID_CTXREG_PMFBPA 27
102#define NETLIST_REGIONID_SWVEIDBUNDLEINIT 28
103#define NETLIST_REGIONID_NVPERF_SYS_ROUTER 29
104#define NETLIST_REGIONID_NVPERF_PMA 30
105#define NETLIST_REGIONID_CTXREG_PMROP 31
106#define NETLIST_REGIONID_CTXREG_PMUCGPC 32
107#define NETLIST_REGIONID_CTXREG_ETPC 33
108#define NETLIST_REGIONID_SW_BUNDLE64_INIT 34
109#define NETLIST_REGIONID_NVPERF_PMCAU 35
110
111struct netlist_region {
112 u32 region_id;
113 u32 data_size;
114 u32 data_offset;
115};
116
117struct netlist_image_header {
118 u32 version;
119 u32 regions;
120};
121
122struct netlist_image {
123 struct netlist_image_header header;
124 struct netlist_region regions[1];
125};
126
127struct av_gk20a {
128 u32 addr;
129 u32 value;
130};
131struct av64_gk20a {
132 u32 addr;
133 u32 value_lo;
134 u32 value_hi;
135};
136struct aiv_gk20a {
137 u32 addr;
138 u32 index;
139 u32 value;
140};
141struct aiv_list_gk20a {
142 struct aiv_gk20a *l;
143 u32 count;
144};
145struct av_list_gk20a {
146 struct av_gk20a *l;
147 u32 count;
148};
149struct av64_list_gk20a {
150 struct av64_gk20a *l;
151 u32 count;
152};
153struct u32_list_gk20a {
154 u32 *l;
155 u32 count;
156};
157
158struct ctxsw_buf_offset_map_entry {
159 u32 addr; /* Register address */
160 u32 offset; /* Offset in ctxt switch buffer */
161};
162
163static inline
164struct av_gk20a *alloc_av_list_gk20a(struct gk20a *g, struct av_list_gk20a *avl)
165{
166 avl->l = nvgpu_kzalloc(g, avl->count * sizeof(*avl->l));
167 return avl->l;
168}
169
170static inline
171struct av64_gk20a *alloc_av64_list_gk20a(struct gk20a *g, struct av64_list_gk20a *avl)
172{
173 avl->l = nvgpu_kzalloc(g, avl->count * sizeof(*avl->l));
174 return avl->l;
175}
176
177static inline
178struct aiv_gk20a *alloc_aiv_list_gk20a(struct gk20a *g,
179 struct aiv_list_gk20a *aivl)
180{
181 aivl->l = nvgpu_kzalloc(g, aivl->count * sizeof(*aivl->l));
182 return aivl->l;
183}
184
185static inline
186u32 *alloc_u32_list_gk20a(struct gk20a *g, struct u32_list_gk20a *u32l)
187{
188 u32l->l = nvgpu_kzalloc(g, u32l->count * sizeof(*u32l->l));
189 return u32l->l;
190}
191
192struct gr_ucode_gk20a {
193 struct {
194 struct u32_list_gk20a inst;
195 struct u32_list_gk20a data;
196 } gpccs, fecs;
197};
198
199/* main entry for grctx loading */
200int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
201int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr);
202
203struct gpu_ops;
204void gk20a_init_gr_ctx(struct gpu_ops *gops);
205
206#endif /*NVGPU_GK20A_GR_CTX_GK20A_H*/
diff --git a/include/gk20a/gr_ctx_gk20a_sim.c b/include/gk20a/gr_ctx_gk20a_sim.c
new file mode 100644
index 0000000..ce65c77
--- /dev/null
+++ b/include/gk20a/gr_ctx_gk20a_sim.c
@@ -0,0 +1,356 @@
1/*
2 * GK20A Graphics Context for Simulation
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a.h"
26#include <nvgpu/sim.h>
27#include "gr_ctx_gk20a.h"
28
29#include <nvgpu/log.h>
30
31int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
32{
33 int err = -ENOMEM;
34 u32 i, temp;
35
36 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info,
37 "querying grctx info from chiplib");
38
39 g->gr.ctx_vars.dynamic = true;
40 g->gr.netlist = GR_NETLIST_DYNAMIC;
41
42 if (g->sim->esc_readl == NULL) {
43 nvgpu_err(g, "Invalid pointer to query function.");
44 err = -ENOENT;
45 goto fail;
46 }
47
48 /* query sizes and counts */
49 g->sim->esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0,
50 &g->gr.ctx_vars.ucode.fecs.inst.count);
51 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0,
52 &g->gr.ctx_vars.ucode.fecs.data.count);
53 g->sim->esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0,
54 &g->gr.ctx_vars.ucode.gpccs.inst.count);
55 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0,
56 &g->gr.ctx_vars.ucode.gpccs.data.count);
57 g->sim->esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp);
58 g->gr.ctx_vars.buffer_size = temp << 2;
59 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0,
60 &g->gr.ctx_vars.sw_bundle_init.count);
61 g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0,
62 &g->gr.ctx_vars.sw_method_init.count);
63 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0,
64 &g->gr.ctx_vars.sw_ctx_load.count);
65 g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT_SIZE", 0,
66 &g->gr.ctx_vars.sw_veid_bundle_init.count);
67 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT_SIZE", 0,
68 &g->gr.ctx_vars.sw_bundle64_init.count);
69
70 g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG_SIZE", 0,
71 &g->gr.ctx_vars.sw_non_ctx_load.count);
72 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0,
73 &g->gr.ctx_vars.ctxsw_regs.sys.count);
74 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0,
75 &g->gr.ctx_vars.ctxsw_regs.gpc.count);
76 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0,
77 &g->gr.ctx_vars.ctxsw_regs.tpc.count);
78 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0,
79 &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count);
80 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0,
81 &g->gr.ctx_vars.ctxsw_regs.pm_sys.count);
82 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0,
83 &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count);
84 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0,
85 &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count);
86 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
87 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
88 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0,
89 &g->gr.ctx_vars.ctxsw_regs.etpc.count);
90 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
91 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
92
93 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.inst) == NULL) {
94 goto fail;
95 }
96 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.data) == NULL) {
97 goto fail;
98 }
99 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.gpccs.inst) == NULL) {
100 goto fail;
101 }
102 if (alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.gpccs.data) == NULL) {
103 goto fail;
104 }
105 if (alloc_av_list_gk20a(g, &g->gr.ctx_vars.sw_bundle_init) == NULL) {
106 goto fail;
107 }
108 if (alloc_av64_list_gk20a(g,
109 &g->gr.ctx_vars.sw_bundle64_init) == NULL) {
110 goto fail;
111 }
112 if (alloc_av_list_gk20a(g, &g->gr.ctx_vars.sw_method_init) == NULL) {
113 goto fail;
114 }
115 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.sw_ctx_load) == NULL) {
116 goto fail;
117 }
118 if (alloc_av_list_gk20a(g, &g->gr.ctx_vars.sw_non_ctx_load) == NULL) {
119 goto fail;
120 }
121 if (alloc_av_list_gk20a(g,
122 &g->gr.ctx_vars.sw_veid_bundle_init) == NULL) {
123 goto fail;
124 }
125 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.sys) == NULL) {
126 goto fail;
127 }
128 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.gpc) == NULL) {
129 goto fail;
130 }
131 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.tpc) == NULL) {
132 goto fail;
133 }
134 if (alloc_aiv_list_gk20a(g,
135 &g->gr.ctx_vars.ctxsw_regs.zcull_gpc) == NULL) {
136 goto fail;
137 }
138 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.ppc) == NULL) {
139 goto fail;
140 }
141 if (alloc_aiv_list_gk20a(g,
142 &g->gr.ctx_vars.ctxsw_regs.pm_sys) == NULL) {
143 goto fail;
144 }
145 if (alloc_aiv_list_gk20a(g,
146 &g->gr.ctx_vars.ctxsw_regs.pm_gpc) == NULL) {
147 goto fail;
148 }
149 if (alloc_aiv_list_gk20a(g,
150 &g->gr.ctx_vars.ctxsw_regs.pm_tpc) == NULL) {
151 goto fail;
152 }
153 if (alloc_aiv_list_gk20a(g, &g->gr.ctx_vars.ctxsw_regs.etpc) == NULL) {
154 goto fail;
155 }
156
157 for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++) {
158 g->sim->esc_readl(g, "GRCTX_UCODE_INST_FECS",
159 i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]);
160 }
161
162 for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++) {
163 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_FECS",
164 i, &g->gr.ctx_vars.ucode.fecs.data.l[i]);
165 }
166
167 for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++) {
168 g->sim->esc_readl(g, "GRCTX_UCODE_INST_GPCCS",
169 i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]);
170 }
171
172 for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++) {
173 g->sim->esc_readl(g, "GRCTX_UCODE_DATA_GPCCS",
174 i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]);
175 }
176
177 for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) {
178 struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l;
179 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR",
180 i, &l[i].addr);
181 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE",
182 i, &l[i].value);
183 }
184
185 for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) {
186 struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l;
187 g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR",
188 i, &l[i].addr);
189 g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE",
190 i, &l[i].value);
191 }
192
193 for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) {
194 struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l;
195 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR",
196 i, &l[i].addr);
197 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX",
198 i, &l[i].index);
199 g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE",
200 i, &l[i].value);
201 }
202
203 for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) {
204 struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l;
205 g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG:REG",
206 i, &l[i].addr);
207 g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG:VALUE",
208 i, &l[i].value);
209 }
210
211 for (i = 0; i < g->gr.ctx_vars.sw_veid_bundle_init.count; i++) {
212 struct av_gk20a *l = g->gr.ctx_vars.sw_veid_bundle_init.l;
213
214 g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT:ADDR",
215 i, &l[i].addr);
216 g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT:VALUE",
217 i, &l[i].value);
218 }
219
220 for (i = 0; i < g->gr.ctx_vars.sw_bundle64_init.count; i++) {
221 struct av64_gk20a *l = g->gr.ctx_vars.sw_bundle64_init.l;
222
223 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:ADDR",
224 i, &l[i].addr);
225 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:VALUE_LO",
226 i, &l[i].value_lo);
227 g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:VALUE_HI",
228 i, &l[i].value_hi);
229 }
230
231 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
232 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l;
233 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR",
234 i, &l[i].addr);
235 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX",
236 i, &l[i].index);
237 g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE",
238 i, &l[i].value);
239 }
240
241 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
242 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l;
243 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR",
244 i, &l[i].addr);
245 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX",
246 i, &l[i].index);
247 g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE",
248 i, &l[i].value);
249 }
250
251 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
252 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l;
253 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR",
254 i, &l[i].addr);
255 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX",
256 i, &l[i].index);
257 g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE",
258 i, &l[i].value);
259 }
260
261 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
262 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l;
263 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR",
264 i, &l[i].addr);
265 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX",
266 i, &l[i].index);
267 g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE",
268 i, &l[i].value);
269 }
270
271 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) {
272 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l;
273 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR",
274 i, &l[i].addr);
275 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX",
276 i, &l[i].index);
277 g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE",
278 i, &l[i].value);
279 }
280
281 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) {
282 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l;
283 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR",
284 i, &l[i].addr);
285 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX",
286 i, &l[i].index);
287 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE",
288 i, &l[i].value);
289 }
290
291 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) {
292 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l;
293 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR",
294 i, &l[i].addr);
295 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX",
296 i, &l[i].index);
297 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE",
298 i, &l[i].value);
299 }
300
301 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) {
302 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l;
303 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR",
304 i, &l[i].addr);
305 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX",
306 i, &l[i].index);
307 g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE",
308 i, &l[i].value);
309 }
310
311 nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_ETPC");
312 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
313 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.etpc.l;
314 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:ADDR",
315 i, &l[i].addr);
316 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:INDEX",
317 i, &l[i].index);
318 g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:VALUE",
319 i, &l[i].value);
320 nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn,
321 "addr:0x%#08x index:0x%08x value:0x%08x",
322 l[i].addr, l[i].index, l[i].value);
323 }
324
325 g->gr.ctx_vars.valid = true;
326
327 g->sim->esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0,
328 &g->gr.ctx_vars.regs_base_index);
329
330 nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib");
331 return 0;
332fail:
333 nvgpu_err(g, "failed querying grctx info from chiplib");
334
335 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.inst.l);
336 nvgpu_kfree(g, g->gr.ctx_vars.ucode.fecs.data.l);
337 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.inst.l);
338 nvgpu_kfree(g, g->gr.ctx_vars.ucode.gpccs.data.l);
339 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle_init.l);
340 nvgpu_kfree(g, g->gr.ctx_vars.sw_bundle64_init.l);
341 nvgpu_kfree(g, g->gr.ctx_vars.sw_method_init.l);
342 nvgpu_kfree(g, g->gr.ctx_vars.sw_ctx_load.l);
343 nvgpu_kfree(g, g->gr.ctx_vars.sw_non_ctx_load.l);
344 nvgpu_kfree(g, g->gr.ctx_vars.sw_veid_bundle_init.l);
345 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.sys.l);
346 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.gpc.l);
347 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.tpc.l);
348 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l);
349 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.ppc.l);
350 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
351 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
352 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
353 nvgpu_kfree(g, g->gr.ctx_vars.ctxsw_regs.etpc.l);
354
355 return err;
356}
diff --git a/include/gk20a/gr_gk20a.c b/include/gk20a/gr_gk20a.c
new file mode 100644
index 0000000..7bcf528
--- /dev/null
+++ b/include/gk20a/gr_gk20a.c
@@ -0,0 +1,8998 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/dma.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/log.h>
31#include <nvgpu/bsearch.h>
32#include <nvgpu/sort.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/firmware.h>
35#include <nvgpu/enabled.h>
36#include <nvgpu/debug.h>
37#include <nvgpu/barrier.h>
38#include <nvgpu/mm.h>
39#include <nvgpu/ctxsw_trace.h>
40#include <nvgpu/error_notifier.h>
41#include <nvgpu/ecc.h>
42#include <nvgpu/io.h>
43#include <nvgpu/utils.h>
44#include <nvgpu/channel.h>
45#include <nvgpu/unit.h>
46#include <nvgpu/power_features/pg.h>
47#include <nvgpu/power_features/cg.h>
48
49#include "gk20a.h"
50#include "gr_gk20a.h"
51#include "gk20a/fecs_trace_gk20a.h"
52#include "gr_ctx_gk20a.h"
53#include "gr_pri_gk20a.h"
54#include "regops_gk20a.h"
55#include "dbg_gpu_gk20a.h"
56
57#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
58#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
59#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
60#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
61#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
62#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
63#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
64#include <nvgpu/hw/gk20a/hw_pri_ringmaster_gk20a.h>
65#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
66#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
67
68#define BLK_SIZE (256)
69#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
70#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
71#define NV_PCFG_BASE 0x00088000
72#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020
73#define FE_PWR_MODE_TIMEOUT_MAX 2000
74#define FE_PWR_MODE_TIMEOUT_DEFAULT 10
75#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000
76#define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT 10
77#define FECS_ARB_CMD_TIMEOUT_MAX 40
78#define FECS_ARB_CMD_TIMEOUT_DEFAULT 2
79
80static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g);
81
82static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
83 struct vm_gk20a *vm,
84 struct nvgpu_gr_ctx *gr_ctx);
85
86/* channel patch ctx buffer */
87static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
88 struct channel_gk20a *c);
89static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
90 struct vm_gk20a *vm,
91 struct nvgpu_gr_ctx *gr_ctx);
92
93/* golden ctx image */
94static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
95 struct channel_gk20a *c);
96
97int gr_gk20a_get_ctx_id(struct gk20a *g,
98 struct channel_gk20a *c,
99 u32 *ctx_id)
100{
101 struct tsg_gk20a *tsg;
102 struct nvgpu_gr_ctx *gr_ctx = NULL;
103 struct nvgpu_mem *mem = NULL;
104
105 tsg = tsg_gk20a_from_ch(c);
106 if (tsg == NULL) {
107 return -EINVAL;
108 }
109
110 gr_ctx = &tsg->gr_ctx;
111 mem = &gr_ctx->mem;
112
113 /* Channel gr_ctx buffer is gpu cacheable.
114 Flush and invalidate before cpu update. */
115 g->ops.mm.l2_flush(g, true);
116
117 *ctx_id = nvgpu_mem_rd(g, mem,
118 ctxsw_prog_main_image_context_id_o());
119 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", *ctx_id);
120
121 return 0;
122}
123
124void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
125{
126 unsigned int i;
127
128 nvgpu_err(g, "gr_fecs_os_r : %d",
129 gk20a_readl(g, gr_fecs_os_r()));
130 nvgpu_err(g, "gr_fecs_cpuctl_r : 0x%x",
131 gk20a_readl(g, gr_fecs_cpuctl_r()));
132 nvgpu_err(g, "gr_fecs_idlestate_r : 0x%x",
133 gk20a_readl(g, gr_fecs_idlestate_r()));
134 nvgpu_err(g, "gr_fecs_mailbox0_r : 0x%x",
135 gk20a_readl(g, gr_fecs_mailbox0_r()));
136 nvgpu_err(g, "gr_fecs_mailbox1_r : 0x%x",
137 gk20a_readl(g, gr_fecs_mailbox1_r()));
138 nvgpu_err(g, "gr_fecs_irqstat_r : 0x%x",
139 gk20a_readl(g, gr_fecs_irqstat_r()));
140 nvgpu_err(g, "gr_fecs_irqmode_r : 0x%x",
141 gk20a_readl(g, gr_fecs_irqmode_r()));
142 nvgpu_err(g, "gr_fecs_irqmask_r : 0x%x",
143 gk20a_readl(g, gr_fecs_irqmask_r()));
144 nvgpu_err(g, "gr_fecs_irqdest_r : 0x%x",
145 gk20a_readl(g, gr_fecs_irqdest_r()));
146 nvgpu_err(g, "gr_fecs_debug1_r : 0x%x",
147 gk20a_readl(g, gr_fecs_debug1_r()));
148 nvgpu_err(g, "gr_fecs_debuginfo_r : 0x%x",
149 gk20a_readl(g, gr_fecs_debuginfo_r()));
150 nvgpu_err(g, "gr_fecs_ctxsw_status_1_r : 0x%x",
151 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
152
153 for (i = 0; i < g->ops.gr.fecs_ctxsw_mailbox_size(); i++) {
154 nvgpu_err(g, "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x",
155 i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i)));
156 }
157
158 nvgpu_err(g, "gr_fecs_engctl_r : 0x%x",
159 gk20a_readl(g, gr_fecs_engctl_r()));
160 nvgpu_err(g, "gr_fecs_curctx_r : 0x%x",
161 gk20a_readl(g, gr_fecs_curctx_r()));
162 nvgpu_err(g, "gr_fecs_nxtctx_r : 0x%x",
163 gk20a_readl(g, gr_fecs_nxtctx_r()));
164
165 gk20a_writel(g, gr_fecs_icd_cmd_r(),
166 gr_fecs_icd_cmd_opc_rreg_f() |
167 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
168 nvgpu_err(g, "FECS_FALCON_REG_IMB : 0x%x",
169 gk20a_readl(g, gr_fecs_icd_rdata_r()));
170
171 gk20a_writel(g, gr_fecs_icd_cmd_r(),
172 gr_fecs_icd_cmd_opc_rreg_f() |
173 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
174 nvgpu_err(g, "FECS_FALCON_REG_DMB : 0x%x",
175 gk20a_readl(g, gr_fecs_icd_rdata_r()));
176
177 gk20a_writel(g, gr_fecs_icd_cmd_r(),
178 gr_fecs_icd_cmd_opc_rreg_f() |
179 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
180 nvgpu_err(g, "FECS_FALCON_REG_CSW : 0x%x",
181 gk20a_readl(g, gr_fecs_icd_rdata_r()));
182
183 gk20a_writel(g, gr_fecs_icd_cmd_r(),
184 gr_fecs_icd_cmd_opc_rreg_f() |
185 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
186 nvgpu_err(g, "FECS_FALCON_REG_CTX : 0x%x",
187 gk20a_readl(g, gr_fecs_icd_rdata_r()));
188
189 gk20a_writel(g, gr_fecs_icd_cmd_r(),
190 gr_fecs_icd_cmd_opc_rreg_f() |
191 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
192 nvgpu_err(g, "FECS_FALCON_REG_EXCI : 0x%x",
193 gk20a_readl(g, gr_fecs_icd_rdata_r()));
194
195 for (i = 0; i < 4; i++) {
196 gk20a_writel(g, gr_fecs_icd_cmd_r(),
197 gr_fecs_icd_cmd_opc_rreg_f() |
198 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC));
199 nvgpu_err(g, "FECS_FALCON_REG_PC : 0x%x",
200 gk20a_readl(g, gr_fecs_icd_rdata_r()));
201
202 gk20a_writel(g, gr_fecs_icd_cmd_r(),
203 gr_fecs_icd_cmd_opc_rreg_f() |
204 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP));
205 nvgpu_err(g, "FECS_FALCON_REG_SP : 0x%x",
206 gk20a_readl(g, gr_fecs_icd_rdata_r()));
207 }
208}
209
210static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
211{
212 u32 i, ucode_u32_size;
213 const u32 *ucode_u32_data;
214 u32 checksum;
215
216 nvgpu_log_fn(g, " ");
217
218 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
219 gr_gpccs_dmemc_blk_f(0) |
220 gr_gpccs_dmemc_aincw_f(1)));
221
222 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
223 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
224
225 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
226 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
227 checksum += ucode_u32_data[i];
228 }
229
230 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
231 gr_fecs_dmemc_blk_f(0) |
232 gr_fecs_dmemc_aincw_f(1)));
233
234 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
235 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
236
237 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
238 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
239 checksum += ucode_u32_data[i];
240 }
241 nvgpu_log_fn(g, "done");
242}
243
244static void gr_gk20a_load_falcon_imem(struct gk20a *g)
245{
246 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
247 const u32 *ucode_u32_data;
248 u32 tag, i, pad_start, pad_end;
249 u32 checksum;
250
251 nvgpu_log_fn(g, " ");
252
253 cfg = gk20a_readl(g, gr_fecs_cfg_r());
254 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
255
256 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
257 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
258
259 /* Use the broadcast address to access all of the GPCCS units. */
260 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
261 gr_gpccs_imemc_blk_f(0) |
262 gr_gpccs_imemc_aincw_f(1)));
263
264 /* Setup the tags for the instruction memory. */
265 tag = 0;
266 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
267
268 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
269 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
270
271 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
272 if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
273 tag++;
274 gk20a_writel(g, gr_gpccs_imemt_r(0),
275 gr_gpccs_imemt_tag_f(tag));
276 }
277 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
278 checksum += ucode_u32_data[i];
279 }
280
281 pad_start = i * 4U;
282 pad_end = pad_start + (256U - pad_start % 256U) + 256U;
283 for (i = pad_start;
284 (i < gpccs_imem_size * 256U) && (i < pad_end);
285 i += 4U) {
286 if ((i != 0U) && ((i % 256U) == 0U)) {
287 tag++;
288 gk20a_writel(g, gr_gpccs_imemt_r(0),
289 gr_gpccs_imemt_tag_f(tag));
290 }
291 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
292 }
293
294 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
295 gr_fecs_imemc_blk_f(0) |
296 gr_fecs_imemc_aincw_f(1)));
297
298 /* Setup the tags for the instruction memory. */
299 tag = 0;
300 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
301
302 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
303 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
304
305 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
306 if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
307 tag++;
308 gk20a_writel(g, gr_fecs_imemt_r(0),
309 gr_fecs_imemt_tag_f(tag));
310 }
311 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
312 checksum += ucode_u32_data[i];
313 }
314
315 pad_start = i * 4U;
316 pad_end = pad_start + (256U - pad_start % 256U) + 256U;
317 for (i = pad_start;
318 (i < fecs_imem_size * 256U) && i < pad_end;
319 i += 4U) {
320 if ((i != 0U) && ((i % 256U) == 0U)) {
321 tag++;
322 gk20a_writel(g, gr_fecs_imemt_r(0),
323 gr_fecs_imemt_tag_f(tag));
324 }
325 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
326 }
327}
328
329int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
330 u32 expect_delay)
331{
332 u32 delay = expect_delay;
333 bool ctxsw_active;
334 bool gr_busy;
335 u32 gr_engine_id;
336 u32 engine_status;
337 bool ctx_status_invalid;
338 struct nvgpu_timeout timeout;
339
340 nvgpu_log_fn(g, " ");
341
342 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
343
344 nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER);
345
346 do {
347 /* fmodel: host gets fifo_engine_status(gr) from gr
348 only when gr_status is read */
349 (void) gk20a_readl(g, gr_status_r());
350
351 engine_status = gk20a_readl(g,
352 fifo_engine_status_r(gr_engine_id));
353
354 ctxsw_active = engine_status &
355 fifo_engine_status_ctxsw_in_progress_f();
356
357 ctx_status_invalid =
358 (fifo_engine_status_ctx_status_v(engine_status) ==
359 fifo_engine_status_ctx_status_invalid_v());
360
361 gr_busy = gk20a_readl(g, gr_engine_status_r()) &
362 gr_engine_status_value_busy_f();
363
364 if (ctx_status_invalid || (!gr_busy && !ctxsw_active)) {
365 nvgpu_log_fn(g, "done");
366 return 0;
367 }
368
369 nvgpu_usleep_range(delay, delay * 2);
370 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
371
372 } while (nvgpu_timeout_expired(&timeout) == 0);
373
374 nvgpu_err(g,
375 "timeout, ctxsw busy : %d, gr busy : %d",
376 ctxsw_active, gr_busy);
377
378 return -EAGAIN;
379}
380
381int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
382 u32 expect_delay)
383{
384 u32 val;
385 u32 delay = expect_delay;
386 struct nvgpu_timeout timeout;
387
388 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
389 return 0;
390 }
391
392 nvgpu_log_fn(g, " ");
393
394 nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER);
395
396 do {
397 val = gk20a_readl(g, gr_status_r());
398
399 if (gr_status_fe_method_lower_v(val) == 0U) {
400 nvgpu_log_fn(g, "done");
401 return 0;
402 }
403
404 nvgpu_usleep_range(delay, delay * 2);
405 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
406 } while (nvgpu_timeout_expired(&timeout) == 0);
407
408 nvgpu_err(g,
409 "timeout, fe busy : %x", val);
410
411 return -EAGAIN;
412}
413
414int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
415 u32 *mailbox_ret, u32 opc_success,
416 u32 mailbox_ok, u32 opc_fail,
417 u32 mailbox_fail, bool sleepduringwait)
418{
419 struct nvgpu_timeout timeout;
420 u32 delay = GR_FECS_POLL_INTERVAL;
421 u32 check = WAIT_UCODE_LOOP;
422 u32 reg;
423
424 nvgpu_log_fn(g, " ");
425
426 if (sleepduringwait) {
427 delay = GR_IDLE_CHECK_DEFAULT;
428 }
429
430 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
431 NVGPU_TIMER_CPU_TIMER);
432
433 while (check == WAIT_UCODE_LOOP) {
434 if (nvgpu_timeout_expired(&timeout)) {
435 check = WAIT_UCODE_TIMEOUT;
436 }
437
438 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
439
440 if (mailbox_ret) {
441 *mailbox_ret = reg;
442 }
443
444 switch (opc_success) {
445 case GR_IS_UCODE_OP_EQUAL:
446 if (reg == mailbox_ok) {
447 check = WAIT_UCODE_OK;
448 }
449 break;
450 case GR_IS_UCODE_OP_NOT_EQUAL:
451 if (reg != mailbox_ok) {
452 check = WAIT_UCODE_OK;
453 }
454 break;
455 case GR_IS_UCODE_OP_AND:
456 if (reg & mailbox_ok) {
457 check = WAIT_UCODE_OK;
458 }
459 break;
460 case GR_IS_UCODE_OP_LESSER:
461 if (reg < mailbox_ok) {
462 check = WAIT_UCODE_OK;
463 }
464 break;
465 case GR_IS_UCODE_OP_LESSER_EQUAL:
466 if (reg <= mailbox_ok) {
467 check = WAIT_UCODE_OK;
468 }
469 break;
470 case GR_IS_UCODE_OP_SKIP:
471 /* do no success check */
472 break;
473 default:
474 nvgpu_err(g,
475 "invalid success opcode 0x%x", opc_success);
476
477 check = WAIT_UCODE_ERROR;
478 break;
479 }
480
481 switch (opc_fail) {
482 case GR_IS_UCODE_OP_EQUAL:
483 if (reg == mailbox_fail) {
484 check = WAIT_UCODE_ERROR;
485 }
486 break;
487 case GR_IS_UCODE_OP_NOT_EQUAL:
488 if (reg != mailbox_fail) {
489 check = WAIT_UCODE_ERROR;
490 }
491 break;
492 case GR_IS_UCODE_OP_AND:
493 if (reg & mailbox_fail) {
494 check = WAIT_UCODE_ERROR;
495 }
496 break;
497 case GR_IS_UCODE_OP_LESSER:
498 if (reg < mailbox_fail) {
499 check = WAIT_UCODE_ERROR;
500 }
501 break;
502 case GR_IS_UCODE_OP_LESSER_EQUAL:
503 if (reg <= mailbox_fail) {
504 check = WAIT_UCODE_ERROR;
505 }
506 break;
507 case GR_IS_UCODE_OP_SKIP:
508 /* do no check on fail*/
509 break;
510 default:
511 nvgpu_err(g,
512 "invalid fail opcode 0x%x", opc_fail);
513 check = WAIT_UCODE_ERROR;
514 break;
515 }
516
517 if (sleepduringwait) {
518 nvgpu_usleep_range(delay, delay * 2);
519 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
520 } else {
521 nvgpu_udelay(delay);
522 }
523 }
524
525 if (check == WAIT_UCODE_TIMEOUT) {
526 nvgpu_err(g,
527 "timeout waiting on mailbox=%d value=0x%08x",
528 mailbox_id, reg);
529 gk20a_fecs_dump_falcon_stats(g);
530 gk20a_gr_debug_dump(g);
531 return -1;
532 } else if (check == WAIT_UCODE_ERROR) {
533 nvgpu_err(g,
534 "ucode method failed on mailbox=%d value=0x%08x",
535 mailbox_id, reg);
536 gk20a_fecs_dump_falcon_stats(g);
537 return -1;
538 }
539
540 nvgpu_log_fn(g, "done");
541 return 0;
542}
543
544int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
545 struct fecs_method_op_gk20a op,
546 bool sleepduringwait)
547{
548 int ret;
549
550 if (op.mailbox.id != 0) {
551 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
552 op.mailbox.data);
553 }
554
555 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
556 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
557
558 gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
559 gk20a_writel(g, gr_fecs_method_push_r(),
560 gr_fecs_method_push_adr_f(op.method.addr));
561
562 /* op.mailbox.id == 4 cases require waiting for completion on
563 * for op.mailbox.id == 0 */
564 if (op.mailbox.id == 4) {
565 op.mailbox.id = 0;
566 }
567
568 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
569 op.cond.ok, op.mailbox.ok,
570 op.cond.fail, op.mailbox.fail,
571 sleepduringwait);
572 if (ret) {
573 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x",
574 op.method.data, op.method.addr);
575 }
576
577 return ret;
578}
579
580/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
581 * We should replace most, if not all, fecs method calls to this instead. */
582int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
583 struct fecs_method_op_gk20a op,
584 bool sleepduringwait)
585{
586 struct gr_gk20a *gr = &g->gr;
587 int ret;
588
589 nvgpu_mutex_acquire(&gr->fecs_mutex);
590
591 ret = gr_gk20a_submit_fecs_method_op_locked(g, op, sleepduringwait);
592
593 nvgpu_mutex_release(&gr->fecs_mutex);
594
595 return ret;
596}
597
598/* Sideband mailbox writes are done a bit differently */
599int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
600 struct fecs_method_op_gk20a op)
601{
602 struct gr_gk20a *gr = &g->gr;
603 int ret;
604
605 nvgpu_mutex_acquire(&gr->fecs_mutex);
606
607 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(op.mailbox.id),
608 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
609
610 gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
611 gk20a_writel(g, gr_fecs_method_push_r(),
612 gr_fecs_method_push_adr_f(op.method.addr));
613
614 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
615 op.cond.ok, op.mailbox.ok,
616 op.cond.fail, op.mailbox.fail,
617 false);
618 if (ret) {
619 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x",
620 op.method.data, op.method.addr);
621 }
622
623 nvgpu_mutex_release(&gr->fecs_mutex);
624
625 return ret;
626}
627
628static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
629{
630 return gr_gk20a_submit_fecs_method_op(g,
631 (struct fecs_method_op_gk20a) {
632 .method.addr = fecs_method,
633 .method.data = ~0,
634 .mailbox = { .id = 1, /*sideband?*/
635 .data = ~0, .clr = ~0, .ret = ret,
636 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
637 .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
638 .cond.ok = GR_IS_UCODE_OP_EQUAL,
639 .cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
640}
641
642/**
643 * Stop processing (stall) context switches at FECS:-
644 * If fecs is sent stop_ctxsw method, elpg entry/exit cannot happen
645 * and may timeout. It could manifest as different error signatures
646 * depending on when stop_ctxsw fecs method gets sent with respect
647 * to pmu elpg sequence. It could come as pmu halt or abort or
648 * maybe ext error too.
649*/
650int gr_gk20a_disable_ctxsw(struct gk20a *g)
651{
652 int err = 0;
653
654 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
655
656 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
657 g->ctxsw_disable_count++;
658 if (g->ctxsw_disable_count == 1) {
659 err = nvgpu_pg_elpg_disable(g);
660 if (err != 0) {
661 nvgpu_err(g, "failed to disable elpg. not safe to "
662 "stop_ctxsw");
663 /* stop ctxsw command is not sent */
664 g->ctxsw_disable_count--;
665 } else {
666 err = gr_gk20a_ctrl_ctxsw(g,
667 gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
668 if (err != 0) {
669 nvgpu_err(g, "failed to stop fecs ctxsw");
670 /* stop ctxsw failed */
671 g->ctxsw_disable_count--;
672 }
673 }
674 } else {
675 nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d",
676 g->ctxsw_disable_count);
677 }
678 nvgpu_mutex_release(&g->ctxsw_disable_lock);
679
680 return err;
681}
682
683/* Start processing (continue) context switches at FECS */
684int gr_gk20a_enable_ctxsw(struct gk20a *g)
685{
686 int err = 0;
687
688 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
689
690 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
691
692 if (g->ctxsw_disable_count == 0) {
693 goto ctxsw_already_enabled;
694 }
695 g->ctxsw_disable_count--;
696 WARN_ON(g->ctxsw_disable_count < 0);
697 if (g->ctxsw_disable_count == 0) {
698 err = gr_gk20a_ctrl_ctxsw(g,
699 gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
700 if (err != 0) {
701 nvgpu_err(g, "failed to start fecs ctxsw");
702 } else {
703 if (nvgpu_pg_elpg_enable(g) != 0) {
704 nvgpu_err(g, "failed to enable elpg "
705 "after start_ctxsw");
706 }
707 }
708 } else {
709 nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet",
710 g->ctxsw_disable_count);
711 }
712ctxsw_already_enabled:
713 nvgpu_mutex_release(&g->ctxsw_disable_lock);
714
715 return err;
716}
717
718int gr_gk20a_halt_pipe(struct gk20a *g)
719{
720 return gr_gk20a_submit_fecs_method_op(g,
721 (struct fecs_method_op_gk20a) {
722 .method.addr =
723 gr_fecs_method_push_adr_halt_pipeline_v(),
724 .method.data = ~0,
725 .mailbox = { .id = 1, /*sideband?*/
726 .data = ~0, .clr = ~0, .ret = NULL,
727 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
728 .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
729 .cond.ok = GR_IS_UCODE_OP_EQUAL,
730 .cond.fail = GR_IS_UCODE_OP_EQUAL }, false);
731}
732
733
734int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
735{
736 u32 addr_lo;
737 u32 addr_hi;
738
739 nvgpu_log_fn(c->g, " ");
740
741 addr_lo = u64_lo32(gpu_va) >> 12;
742 addr_hi = u64_hi32(gpu_va);
743
744 nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(),
745 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
746 ram_in_gr_wfi_ptr_lo_f(addr_lo));
747
748 nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(),
749 ram_in_gr_wfi_ptr_hi_f(addr_hi));
750
751 return 0;
752}
753
754/*
755 * Context state can be written directly, or "patched" at times. So that code
756 * can be used in either situation it is written using a series of
757 * _ctx_patch_write(..., patch) statements. However any necessary map overhead
758 * should be minimized; thus, bundle the sequence of these writes together, and
759 * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end.
760 */
761
762int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
763 struct nvgpu_gr_ctx *gr_ctx,
764 bool update_patch_count)
765{
766 if (update_patch_count) {
767 /* reset patch count if ucode has already processed it */
768 gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
769 &gr_ctx->mem,
770 ctxsw_prog_main_image_patch_count_o());
771 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
772 gr_ctx->patch_ctx.data_count);
773 }
774 return 0;
775}
776
777void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
778 struct nvgpu_gr_ctx *gr_ctx,
779 bool update_patch_count)
780{
781 /* Write context count to context image if it is mapped */
782 if (update_patch_count) {
783 nvgpu_mem_wr(g, &gr_ctx->mem,
784 ctxsw_prog_main_image_patch_count_o(),
785 gr_ctx->patch_ctx.data_count);
786 nvgpu_log(g, gpu_dbg_info, "write patch count %d",
787 gr_ctx->patch_ctx.data_count);
788 }
789}
790
791void gr_gk20a_ctx_patch_write(struct gk20a *g,
792 struct nvgpu_gr_ctx *gr_ctx,
793 u32 addr, u32 data, bool patch)
794{
795 if (patch) {
796 u32 patch_slot = gr_ctx->patch_ctx.data_count *
797 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
798 if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
799 gr_ctx->patch_ctx.mem.size) -
800 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
801 nvgpu_err(g, "failed to access patch_slot %d",
802 patch_slot);
803 return;
804 }
805 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr);
806 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data);
807 gr_ctx->patch_ctx.data_count++;
808 nvgpu_log(g, gpu_dbg_info,
809 "patch addr = 0x%x data = 0x%x data_count %d",
810 addr, data, gr_ctx->patch_ctx.data_count);
811 } else {
812 gk20a_writel(g, addr, data);
813 }
814}
815
816static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
817{
818 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
819 ram_in_base_shift_v();
820 u32 aperture = nvgpu_aperture_mask(g, inst_block,
821 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
822 gr_fecs_current_ctx_target_sys_mem_coh_f(),
823 gr_fecs_current_ctx_target_vid_mem_f());
824
825 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
826 gr_fecs_current_ctx_valid_f(1);
827}
828
829int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
830 struct channel_gk20a *c)
831{
832 u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
833 >> ram_in_base_shift_v());
834 u32 data = fecs_current_ctx_data(g, &c->inst_block);
835 u32 ret;
836
837 nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
838 c->chid, inst_base_ptr);
839
840 ret = gr_gk20a_submit_fecs_method_op(g,
841 (struct fecs_method_op_gk20a) {
842 .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
843 .method.data = data,
844 .mailbox = { .id = 0, .data = 0,
845 .clr = 0x30,
846 .ret = NULL,
847 .ok = 0x10,
848 .fail = 0x20, },
849 .cond.ok = GR_IS_UCODE_OP_AND,
850 .cond.fail = GR_IS_UCODE_OP_AND}, true);
851 if (ret) {
852 nvgpu_err(g,
853 "bind channel instance failed");
854 }
855
856 return ret;
857}
858
859void gr_gk20a_write_zcull_ptr(struct gk20a *g,
860 struct nvgpu_mem *mem, u64 gpu_va)
861{
862 u32 va = u64_lo32(gpu_va >> 8);
863
864 nvgpu_mem_wr(g, mem,
865 ctxsw_prog_main_image_zcull_ptr_o(), va);
866}
867
868void gr_gk20a_write_pm_ptr(struct gk20a *g,
869 struct nvgpu_mem *mem, u64 gpu_va)
870{
871 u32 va = u64_lo32(gpu_va >> 8);
872
873 nvgpu_mem_wr(g, mem,
874 ctxsw_prog_main_image_pm_ptr_o(), va);
875}
876
877static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
878{
879 struct tsg_gk20a *tsg;
880 struct nvgpu_gr_ctx *gr_ctx = NULL;
881 struct nvgpu_mem *mem = NULL;
882 struct nvgpu_mem *ctxheader = &c->ctx_header;
883 int ret = 0;
884
885 nvgpu_log_fn(g, " ");
886
887 tsg = tsg_gk20a_from_ch(c);
888 if (tsg == NULL) {
889 return -EINVAL;
890 }
891
892 gr_ctx = &tsg->gr_ctx;
893 mem = &gr_ctx->mem;
894
895 if (gr_ctx->zcull_ctx.gpu_va == 0 &&
896 gr_ctx->zcull_ctx.ctx_sw_mode ==
897 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
898 return -EINVAL;
899 }
900
901 ret = gk20a_disable_channel_tsg(g, c);
902 if (ret) {
903 nvgpu_err(g, "failed to disable channel/TSG");
904 return ret;
905 }
906 ret = gk20a_fifo_preempt(g, c);
907 if (ret) {
908 gk20a_enable_channel_tsg(g, c);
909 nvgpu_err(g, "failed to preempt channel/TSG");
910 return ret;
911 }
912
913 nvgpu_mem_wr(g, mem,
914 ctxsw_prog_main_image_zcull_o(),
915 gr_ctx->zcull_ctx.ctx_sw_mode);
916
917 if (ctxheader->gpu_va) {
918 g->ops.gr.write_zcull_ptr(g, ctxheader,
919 gr_ctx->zcull_ctx.gpu_va);
920 } else {
921 g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
922 }
923
924 gk20a_enable_channel_tsg(g, c);
925
926 return ret;
927}
928
929u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc)
930{
931 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
932 u32 gpc_offset = gpc_stride * gpc;
933
934 return gpc_offset;
935}
936
937u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc)
938{
939 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
940 GPU_LIT_TPC_IN_GPC_STRIDE);
941 u32 tpc_offset = tpc_in_gpc_stride * tpc;
942
943 return tpc_offset;
944}
945
946int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
947 struct channel_gk20a *c, bool patch)
948{
949 struct gr_gk20a *gr = &g->gr;
950 struct tsg_gk20a *tsg;
951 struct nvgpu_gr_ctx *gr_ctx = NULL;
952 u64 addr;
953 u32 size;
954
955 nvgpu_log_fn(g, " ");
956
957 tsg = tsg_gk20a_from_ch(c);
958 if (tsg == NULL) {
959 return -EINVAL;
960 }
961
962 gr_ctx = &tsg->gr_ctx;
963 if (patch) {
964 int err;
965 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
966 if (err != 0) {
967 return err;
968 }
969 }
970
971 /* global pagepool buffer */
972 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
973 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
974 (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
975 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
976
977 size = gr->global_ctx_buffer[PAGEPOOL].mem.size /
978 gr_scc_pagepool_total_pages_byte_granularity_v();
979
980 if (size == g->ops.gr.pagepool_default_size(g)) {
981 size = gr_scc_pagepool_total_pages_hwmax_v();
982 }
983
984 nvgpu_log_info(g, "pagepool buffer addr : 0x%016llx, size : %d",
985 addr, size);
986
987 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch);
988
989 /* global bundle cb */
990 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
991 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
992 (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
993 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
994
995 size = gr->bundle_cb_default_size;
996
997 nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %d",
998 addr, size);
999
1000 g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
1001
1002 /* global attrib cb */
1003 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
1004 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1005 (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
1006 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1007
1008 nvgpu_log_info(g, "attrib cb addr : 0x%016llx", addr);
1009 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch);
1010 g->ops.gr.commit_global_cb_manager(g, c, patch);
1011
1012 if (patch) {
1013 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
1014 }
1015
1016 return 0;
1017}
1018
1019int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
1020{
1021 struct gr_gk20a *gr = &g->gr;
1022 struct nvgpu_gr_ctx *gr_ctx = NULL;
1023 u32 gpm_pd_cfg;
1024 u32 pd_ab_dist_cfg0;
1025 u32 ds_debug;
1026 u32 mpc_vtg_debug;
1027 u32 pe_vaf;
1028 u32 pe_vsc_vpc;
1029
1030 nvgpu_log_fn(g, " ");
1031
1032 gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
1033 pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
1034 ds_debug = gk20a_readl(g, gr_ds_debug_r());
1035 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
1036
1037 if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
1038 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
1039 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
1040
1041 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
1042 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
1043 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
1044 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
1045 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
1046 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
1047
1048 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
1049 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
1050 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
1051 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
1052 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
1053 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
1054 } else {
1055 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
1056 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
1057 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
1058 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
1059
1060 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
1061 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
1062 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
1063 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
1064 }
1065
1066 return 0;
1067}
1068
1069/*
1070 * Return map tiles count for given index
1071 * Return 0 if index is out-of-bounds
1072 */
1073static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index)
1074{
1075 if (index >= gr->map_tile_count) {
1076 return 0;
1077 }
1078
1079 return gr->map_tiles[index];
1080}
1081
1082int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1083{
1084 u32 norm_entries, norm_shift;
1085 u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
1086 u32 map0, map1, map2, map3, map4, map5;
1087
1088 if (gr->map_tiles == NULL) {
1089 return -1;
1090 }
1091
1092 nvgpu_log_fn(g, " ");
1093
1094 gk20a_writel(g, gr_crstr_map_table_cfg_r(),
1095 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
1096 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
1097
1098 map0 = gr_crstr_gpc_map0_tile0_f(gr_gk20a_get_map_tile_count(gr, 0)) |
1099 gr_crstr_gpc_map0_tile1_f(gr_gk20a_get_map_tile_count(gr, 1)) |
1100 gr_crstr_gpc_map0_tile2_f(gr_gk20a_get_map_tile_count(gr, 2)) |
1101 gr_crstr_gpc_map0_tile3_f(gr_gk20a_get_map_tile_count(gr, 3)) |
1102 gr_crstr_gpc_map0_tile4_f(gr_gk20a_get_map_tile_count(gr, 4)) |
1103 gr_crstr_gpc_map0_tile5_f(gr_gk20a_get_map_tile_count(gr, 5));
1104
1105 map1 = gr_crstr_gpc_map1_tile6_f(gr_gk20a_get_map_tile_count(gr, 6)) |
1106 gr_crstr_gpc_map1_tile7_f(gr_gk20a_get_map_tile_count(gr, 7)) |
1107 gr_crstr_gpc_map1_tile8_f(gr_gk20a_get_map_tile_count(gr, 8)) |
1108 gr_crstr_gpc_map1_tile9_f(gr_gk20a_get_map_tile_count(gr, 9)) |
1109 gr_crstr_gpc_map1_tile10_f(gr_gk20a_get_map_tile_count(gr, 10)) |
1110 gr_crstr_gpc_map1_tile11_f(gr_gk20a_get_map_tile_count(gr, 11));
1111
1112 map2 = gr_crstr_gpc_map2_tile12_f(gr_gk20a_get_map_tile_count(gr, 12)) |
1113 gr_crstr_gpc_map2_tile13_f(gr_gk20a_get_map_tile_count(gr, 13)) |
1114 gr_crstr_gpc_map2_tile14_f(gr_gk20a_get_map_tile_count(gr, 14)) |
1115 gr_crstr_gpc_map2_tile15_f(gr_gk20a_get_map_tile_count(gr, 15)) |
1116 gr_crstr_gpc_map2_tile16_f(gr_gk20a_get_map_tile_count(gr, 16)) |
1117 gr_crstr_gpc_map2_tile17_f(gr_gk20a_get_map_tile_count(gr, 17));
1118
1119 map3 = gr_crstr_gpc_map3_tile18_f(gr_gk20a_get_map_tile_count(gr, 18)) |
1120 gr_crstr_gpc_map3_tile19_f(gr_gk20a_get_map_tile_count(gr, 19)) |
1121 gr_crstr_gpc_map3_tile20_f(gr_gk20a_get_map_tile_count(gr, 20)) |
1122 gr_crstr_gpc_map3_tile21_f(gr_gk20a_get_map_tile_count(gr, 21)) |
1123 gr_crstr_gpc_map3_tile22_f(gr_gk20a_get_map_tile_count(gr, 22)) |
1124 gr_crstr_gpc_map3_tile23_f(gr_gk20a_get_map_tile_count(gr, 23));
1125
1126 map4 = gr_crstr_gpc_map4_tile24_f(gr_gk20a_get_map_tile_count(gr, 24)) |
1127 gr_crstr_gpc_map4_tile25_f(gr_gk20a_get_map_tile_count(gr, 25)) |
1128 gr_crstr_gpc_map4_tile26_f(gr_gk20a_get_map_tile_count(gr, 26)) |
1129 gr_crstr_gpc_map4_tile27_f(gr_gk20a_get_map_tile_count(gr, 27)) |
1130 gr_crstr_gpc_map4_tile28_f(gr_gk20a_get_map_tile_count(gr, 28)) |
1131 gr_crstr_gpc_map4_tile29_f(gr_gk20a_get_map_tile_count(gr, 29));
1132
1133 map5 = gr_crstr_gpc_map5_tile30_f(gr_gk20a_get_map_tile_count(gr, 30)) |
1134 gr_crstr_gpc_map5_tile31_f(gr_gk20a_get_map_tile_count(gr, 31)) |
1135 gr_crstr_gpc_map5_tile32_f(0) |
1136 gr_crstr_gpc_map5_tile33_f(0) |
1137 gr_crstr_gpc_map5_tile34_f(0) |
1138 gr_crstr_gpc_map5_tile35_f(0);
1139
1140 gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
1141 gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
1142 gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
1143 gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
1144 gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
1145 gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
1146
1147 switch (gr->tpc_count) {
1148 case 1:
1149 norm_shift = 4;
1150 break;
1151 case 2:
1152 case 3:
1153 norm_shift = 3;
1154 break;
1155 case 4:
1156 case 5:
1157 case 6:
1158 case 7:
1159 norm_shift = 2;
1160 break;
1161 case 8:
1162 case 9:
1163 case 10:
1164 case 11:
1165 case 12:
1166 case 13:
1167 case 14:
1168 case 15:
1169 norm_shift = 1;
1170 break;
1171 default:
1172 norm_shift = 0;
1173 break;
1174 }
1175
1176 norm_entries = gr->tpc_count << norm_shift;
1177 coeff5_mod = (1 << 5) % norm_entries;
1178 coeff6_mod = (1 << 6) % norm_entries;
1179 coeff7_mod = (1 << 7) % norm_entries;
1180 coeff8_mod = (1 << 8) % norm_entries;
1181 coeff9_mod = (1 << 9) % norm_entries;
1182 coeff10_mod = (1 << 10) % norm_entries;
1183 coeff11_mod = (1 << 11) % norm_entries;
1184
1185 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
1186 gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
1187 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
1188 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
1189 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
1190 gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
1191
1192 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
1193 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
1194 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
1195 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
1196 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
1197 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
1198 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
1199
1200 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
1201 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
1202 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
1203 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
1204 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
1205 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
1206
1207 gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
1208 gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
1209 gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
1210
1211 gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
1212 gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
1213 gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
1214 gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
1215 gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
1216 gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
1217
1218 return 0;
1219}
1220
1221static inline u32 count_bits(u32 mask)
1222{
1223 u32 temp = mask;
1224 u32 count;
1225 for (count = 0; temp != 0; count++) {
1226 temp &= temp - 1;
1227 }
1228
1229 return count;
1230}
1231
1232int gr_gk20a_init_sm_id_table(struct gk20a *g)
1233{
1234 u32 gpc, tpc;
1235 u32 sm_id = 0;
1236
1237 for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) {
1238 for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
1239
1240 if (tpc < g->gr.gpc_tpc_count[gpc]) {
1241 g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
1242 g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
1243 g->gr.sm_to_cluster[sm_id].sm_index = 0;
1244 g->gr.sm_to_cluster[sm_id].global_tpc_index =
1245 sm_id;
1246 sm_id++;
1247 }
1248 }
1249 }
1250 g->gr.no_of_sm = sm_id;
1251 return 0;
1252}
1253
1254/*
1255 * Return number of TPCs in a GPC
1256 * Return 0 if GPC index is invalid i.e. GPC is disabled
1257 */
1258u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index)
1259{
1260 if (gpc_index >= gr->gpc_count) {
1261 return 0;
1262 }
1263
1264 return gr->gpc_tpc_count[gpc_index];
1265}
1266
1267int gr_gk20a_init_fs_state(struct gk20a *g)
1268{
1269 struct gr_gk20a *gr = &g->gr;
1270 u32 tpc_index, gpc_index;
1271 u32 sm_id = 0, gpc_id = 0;
1272 u32 tpc_per_gpc;
1273 u32 fuse_tpc_mask;
1274 u32 reg_index;
1275 int err;
1276
1277 nvgpu_log_fn(g, " ");
1278
1279 if (g->ops.gr.init_sm_id_table) {
1280 err = g->ops.gr.init_sm_id_table(g);
1281 if (err != 0) {
1282 return err;
1283 }
1284
1285 /* Is table empty ? */
1286 if (g->gr.no_of_sm == 0) {
1287 return -EINVAL;
1288 }
1289 }
1290
1291 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
1292 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1293 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1294
1295 g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id);
1296
1297 if (g->ops.gr.program_active_tpc_counts) {
1298 g->ops.gr.program_active_tpc_counts(g, gpc_index);
1299 }
1300 }
1301
1302 for (reg_index = 0, gpc_id = 0;
1303 reg_index < gr_pd_num_tpc_per_gpc__size_1_v();
1304 reg_index++, gpc_id += 8) {
1305
1306 tpc_per_gpc =
1307 gr_pd_num_tpc_per_gpc_count0_f(gr_gk20a_get_tpc_count(gr, gpc_id + 0)) |
1308 gr_pd_num_tpc_per_gpc_count1_f(gr_gk20a_get_tpc_count(gr, gpc_id + 1)) |
1309 gr_pd_num_tpc_per_gpc_count2_f(gr_gk20a_get_tpc_count(gr, gpc_id + 2)) |
1310 gr_pd_num_tpc_per_gpc_count3_f(gr_gk20a_get_tpc_count(gr, gpc_id + 3)) |
1311 gr_pd_num_tpc_per_gpc_count4_f(gr_gk20a_get_tpc_count(gr, gpc_id + 4)) |
1312 gr_pd_num_tpc_per_gpc_count5_f(gr_gk20a_get_tpc_count(gr, gpc_id + 5)) |
1313 gr_pd_num_tpc_per_gpc_count6_f(gr_gk20a_get_tpc_count(gr, gpc_id + 6)) |
1314 gr_pd_num_tpc_per_gpc_count7_f(gr_gk20a_get_tpc_count(gr, gpc_id + 7));
1315
1316 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(reg_index), tpc_per_gpc);
1317 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(reg_index), tpc_per_gpc);
1318 }
1319
1320 /* gr__setup_pd_mapping stubbed for gk20a */
1321 g->ops.gr.setup_rop_mapping(g, gr);
1322 if (g->ops.gr.setup_alpha_beta_tables) {
1323 g->ops.gr.setup_alpha_beta_tables(g, gr);
1324 }
1325
1326 for (gpc_index = 0;
1327 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
1328 gpc_index += 4) {
1329
1330 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
1331 (gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) != 0U) ||
1332 (gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) != 0U) ||
1333 (gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) != 0U) ||
1334 (gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]) != 0U));
1335 }
1336
1337 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
1338 if ((g->tpc_fs_mask_user != 0U) &&
1339 (fuse_tpc_mask == BIT32(gr->max_tpc_count) - 1U)) {
1340 u32 val = g->tpc_fs_mask_user;
1341 val &= (0x1U << gr->max_tpc_count) - 1U;
1342 gk20a_writel(g, gr_cwd_fs_r(),
1343 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1344 gr_cwd_fs_num_tpcs_f(hweight32(val)));
1345 } else {
1346 gk20a_writel(g, gr_cwd_fs_r(),
1347 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1348 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1349 }
1350
1351 gk20a_writel(g, gr_bes_zrop_settings_r(),
1352 gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
1353 gk20a_writel(g, gr_bes_crop_settings_r(),
1354 gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
1355
1356 return 0;
1357}
1358
1359int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1360{
1361 struct gk20a *g = c->g;
1362 int ret;
1363
1364 nvgpu_log_fn(g, " ");
1365
1366 ret = gr_gk20a_submit_fecs_method_op(g,
1367 (struct fecs_method_op_gk20a) {
1368 .method.addr = save_type,
1369 .method.data = fecs_current_ctx_data(g, &c->inst_block),
1370 .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
1371 .ok = 1, .fail = 2,
1372 },
1373 .cond.ok = GR_IS_UCODE_OP_AND,
1374 .cond.fail = GR_IS_UCODE_OP_AND,
1375 }, true);
1376
1377 if (ret) {
1378 nvgpu_err(g, "save context image failed");
1379 }
1380
1381 return ret;
1382}
1383
1384u32 gk20a_init_sw_bundle(struct gk20a *g)
1385{
1386 struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
1387 u32 last_bundle_data = 0;
1388 u32 err = 0;
1389 unsigned int i;
1390
1391 /* disable fe_go_idle */
1392 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1393 gr_fe_go_idle_timeout_count_disabled_f());
1394 /* enable pipe mode override */
1395 gk20a_writel(g, gr_pipe_bundle_config_r(),
1396 gr_pipe_bundle_config_override_pipe_mode_enabled_f());
1397
1398 /* load bundle init */
1399 for (i = 0; i < sw_bundle_init->count; i++) {
1400 if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
1401 gk20a_writel(g, gr_pipe_bundle_data_r(),
1402 sw_bundle_init->l[i].value);
1403 last_bundle_data = sw_bundle_init->l[i].value;
1404 }
1405
1406 gk20a_writel(g, gr_pipe_bundle_address_r(),
1407 sw_bundle_init->l[i].addr);
1408
1409 if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
1410 GR_GO_IDLE_BUNDLE) {
1411 err = gr_gk20a_wait_idle(g,
1412 gk20a_get_gr_idle_timeout(g),
1413 GR_IDLE_CHECK_DEFAULT);
1414 if (err != 0U) {
1415 goto error;
1416 }
1417 }
1418
1419 err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g),
1420 GR_IDLE_CHECK_DEFAULT);
1421 if (err != 0U) {
1422 goto error;
1423 }
1424 }
1425
1426 if ((err == 0U) && (g->ops.gr.init_sw_veid_bundle != NULL)) {
1427 err = g->ops.gr.init_sw_veid_bundle(g);
1428 if (err != 0U) {
1429 goto error;
1430 }
1431 }
1432
1433 if (g->ops.gr.init_sw_bundle64) {
1434 err = g->ops.gr.init_sw_bundle64(g);
1435 if (err != 0U) {
1436 goto error;
1437 }
1438 }
1439
1440 /* disable pipe mode override */
1441 gk20a_writel(g, gr_pipe_bundle_config_r(),
1442 gr_pipe_bundle_config_override_pipe_mode_disabled_f());
1443
1444 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1445 GR_IDLE_CHECK_DEFAULT);
1446
1447 /* restore fe_go_idle */
1448 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1449 gr_fe_go_idle_timeout_count_prod_f());
1450
1451 return err;
1452
1453error:
1454 /* in case of error skip waiting for GR idle - just restore state */
1455 gk20a_writel(g, gr_pipe_bundle_config_r(),
1456 gr_pipe_bundle_config_override_pipe_mode_disabled_f());
1457
1458 /* restore fe_go_idle */
1459 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1460 gr_fe_go_idle_timeout_count_prod_f());
1461
1462 return err;
1463}
1464
1465/* init global golden image from a fresh gr_ctx in channel ctx.
1466 save a copy in local_golden_image in ctx_vars */
1467static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1468 struct channel_gk20a *c)
1469{
1470 struct gr_gk20a *gr = &g->gr;
1471 struct tsg_gk20a *tsg;
1472 struct nvgpu_gr_ctx *gr_ctx = NULL;
1473 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1474 u32 ctx_header_words;
1475 u32 i;
1476 u32 data;
1477 struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1478 struct nvgpu_mem *gr_mem;
1479 u32 err = 0;
1480 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1481 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
1482 u32 last_method_data = 0;
1483
1484 nvgpu_log_fn(g, " ");
1485
1486 tsg = tsg_gk20a_from_ch(c);
1487 if (tsg == NULL) {
1488 return -EINVAL;
1489 }
1490
1491 gr_ctx = &tsg->gr_ctx;
1492 gr_mem = &gr_ctx->mem;
1493
1494 /* golden ctx is global to all channels. Although only the first
1495 channel initializes golden image, driver needs to prevent multiple
1496 channels from initializing golden ctx at the same time */
1497 nvgpu_mutex_acquire(&gr->ctx_mutex);
1498
1499 if (gr->ctx_vars.golden_image_initialized) {
1500 goto clean_up;
1501 }
1502 if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
1503 struct nvgpu_timeout timeout;
1504
1505 nvgpu_timeout_init(g, &timeout,
1506 FE_PWR_MODE_TIMEOUT_MAX /
1507 FE_PWR_MODE_TIMEOUT_DEFAULT,
1508 NVGPU_TIMER_RETRY_TIMER);
1509 gk20a_writel(g, gr_fe_pwr_mode_r(),
1510 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f());
1511 do {
1512 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1513 if (req == gr_fe_pwr_mode_req_done_v()) {
1514 break;
1515 }
1516 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT);
1517 } while (nvgpu_timeout_expired_msg(&timeout,
1518 "timeout forcing FE on") == 0);
1519 }
1520
1521
1522 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
1523 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
1524 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
1525 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
1526 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
1527 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
1528 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
1529 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
1530 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
1531 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
1532 (void) gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
1533 nvgpu_udelay(10);
1534
1535 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
1536 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
1537 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
1538 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
1539 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
1540 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
1541 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
1542 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
1543 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
1544 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
1545 (void) gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
1546 nvgpu_udelay(10);
1547
1548 if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
1549 struct nvgpu_timeout timeout;
1550
1551 nvgpu_timeout_init(g, &timeout,
1552 FE_PWR_MODE_TIMEOUT_MAX /
1553 FE_PWR_MODE_TIMEOUT_DEFAULT,
1554 NVGPU_TIMER_RETRY_TIMER);
1555 gk20a_writel(g, gr_fe_pwr_mode_r(),
1556 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_auto_f());
1557
1558 do {
1559 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1560 if (req == gr_fe_pwr_mode_req_done_v()) {
1561 break;
1562 }
1563 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT);
1564 } while (nvgpu_timeout_expired_msg(&timeout,
1565 "timeout setting FE power to auto") == 0);
1566 }
1567
1568 /* clear scc ram */
1569 gk20a_writel(g, gr_scc_init_r(),
1570 gr_scc_init_ram_trigger_f());
1571
1572 err = gr_gk20a_fecs_ctx_bind_channel(g, c);
1573 if (err != 0U) {
1574 goto clean_up;
1575 }
1576
1577 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1578 GR_IDLE_CHECK_DEFAULT);
1579
1580 /* load ctx init */
1581 for (i = 0; i < sw_ctx_load->count; i++) {
1582 gk20a_writel(g, sw_ctx_load->l[i].addr,
1583 sw_ctx_load->l[i].value);
1584 }
1585
1586 if (g->ops.gr.init_preemption_state) {
1587 g->ops.gr.init_preemption_state(g);
1588 }
1589
1590 if (g->ops.clock_gating.blcg_gr_load_gating_prod) {
1591 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
1592 }
1593
1594 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1595 GR_IDLE_CHECK_DEFAULT);
1596 if (err != 0U) {
1597 goto clean_up;
1598 }
1599
1600 /* disable fe_go_idle */
1601 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1602 gr_fe_go_idle_timeout_count_disabled_f());
1603
1604 err = g->ops.gr.commit_global_ctx_buffers(g, c, false);
1605 if (err != 0U) {
1606 goto clean_up;
1607 }
1608
1609 /* override a few ctx state registers */
1610 g->ops.gr.commit_global_timeslice(g, c);
1611
1612 /* floorsweep anything left */
1613 err = g->ops.gr.init_fs_state(g);
1614 if (err != 0U) {
1615 goto clean_up;
1616 }
1617
1618 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1619 GR_IDLE_CHECK_DEFAULT);
1620 if (err != 0U) {
1621 goto restore_fe_go_idle;
1622 }
1623
1624 err = gk20a_init_sw_bundle(g);
1625 if (err != 0U) {
1626 goto clean_up;
1627 }
1628
1629restore_fe_go_idle:
1630 /* restore fe_go_idle */
1631 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1632 gr_fe_go_idle_timeout_count_prod_f());
1633
1634 if ((err != 0U) || (gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1635 GR_IDLE_CHECK_DEFAULT) != 0)) {
1636 goto clean_up;
1637 }
1638
1639 /* load method init */
1640 if (sw_method_init->count) {
1641 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
1642 sw_method_init->l[0].value);
1643 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
1644 gr_pri_mme_shadow_raw_index_write_trigger_f() |
1645 sw_method_init->l[0].addr);
1646 last_method_data = sw_method_init->l[0].value;
1647 }
1648 for (i = 1; i < sw_method_init->count; i++) {
1649 if (sw_method_init->l[i].value != last_method_data) {
1650 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
1651 sw_method_init->l[i].value);
1652 last_method_data = sw_method_init->l[i].value;
1653 }
1654 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
1655 gr_pri_mme_shadow_raw_index_write_trigger_f() |
1656 sw_method_init->l[i].addr);
1657 }
1658
1659 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1660 GR_IDLE_CHECK_DEFAULT);
1661 if (err != 0U) {
1662 goto clean_up;
1663 }
1664
1665 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1666 ctx_header_words >>= 2;
1667
1668 g->ops.mm.l2_flush(g, true);
1669
1670 for (i = 0; i < ctx_header_words; i++) {
1671 data = nvgpu_mem_rd32(g, gr_mem, i);
1672 nvgpu_mem_wr32(g, gold_mem, i, data);
1673 }
1674 nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1675 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1676
1677 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1678
1679 err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1680 if (err != 0U) {
1681 goto clean_up;
1682 }
1683
1684 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
1685
1686
1687
1688 if (gr->ctx_vars.local_golden_image == NULL) {
1689
1690 gr->ctx_vars.local_golden_image =
1691 nvgpu_vzalloc(g, gr->ctx_vars.golden_image_size);
1692
1693 if (gr->ctx_vars.local_golden_image == NULL) {
1694 err = -ENOMEM;
1695 goto clean_up;
1696 }
1697 nvgpu_mem_rd_n(g, gold_mem, 0,
1698 gr->ctx_vars.local_golden_image,
1699 gr->ctx_vars.golden_image_size);
1700
1701 }
1702
1703 err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
1704 if (err != 0U) {
1705 goto clean_up;
1706 }
1707
1708 gr->ctx_vars.golden_image_initialized = true;
1709
1710 gk20a_writel(g, gr_fecs_current_ctx_r(),
1711 gr_fecs_current_ctx_valid_false_f());
1712
1713clean_up:
1714 if (err != 0U) {
1715 nvgpu_err(g, "fail");
1716 } else {
1717 nvgpu_log_fn(g, "done");
1718 }
1719
1720 nvgpu_mutex_release(&gr->ctx_mutex);
1721 return err;
1722}
1723
1724int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1725 struct channel_gk20a *c,
1726 bool enable_smpc_ctxsw)
1727{
1728 struct tsg_gk20a *tsg;
1729 struct nvgpu_gr_ctx *gr_ctx = NULL;
1730 struct nvgpu_mem *mem = NULL;
1731 u32 data;
1732 int ret;
1733
1734 nvgpu_log_fn(g, " ");
1735
1736 tsg = tsg_gk20a_from_ch(c);
1737 if (tsg == NULL) {
1738 return -EINVAL;
1739 }
1740
1741 gr_ctx = &tsg->gr_ctx;
1742 mem = &gr_ctx->mem;
1743 if (!nvgpu_mem_is_valid(mem)) {
1744 nvgpu_err(g, "no graphics context allocated");
1745 return -EFAULT;
1746 }
1747
1748 ret = gk20a_disable_channel_tsg(g, c);
1749 if (ret) {
1750 nvgpu_err(g, "failed to disable channel/TSG");
1751 goto out;
1752 }
1753 ret = gk20a_fifo_preempt(g, c);
1754 if (ret) {
1755 gk20a_enable_channel_tsg(g, c);
1756 nvgpu_err(g, "failed to preempt channel/TSG");
1757 goto out;
1758 }
1759
1760 /* Channel gr_ctx buffer is gpu cacheable.
1761 Flush and invalidate before cpu update. */
1762 g->ops.mm.l2_flush(g, true);
1763
1764 data = nvgpu_mem_rd(g, mem,
1765 ctxsw_prog_main_image_pm_o());
1766
1767 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1768 data |= enable_smpc_ctxsw ?
1769 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1770 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1771
1772 nvgpu_mem_wr(g, mem,
1773 ctxsw_prog_main_image_pm_o(), data);
1774
1775out:
1776 gk20a_enable_channel_tsg(g, c);
1777 return ret;
1778}
1779
1780int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1781 struct channel_gk20a *c,
1782 u64 gpu_va,
1783 u32 mode)
1784{
1785 struct tsg_gk20a *tsg;
1786 struct nvgpu_mem *gr_mem = NULL;
1787 struct nvgpu_gr_ctx *gr_ctx;
1788 struct pm_ctx_desc *pm_ctx;
1789 u32 data;
1790 u64 virt_addr = 0;
1791 struct nvgpu_mem *ctxheader = &c->ctx_header;
1792 int ret;
1793
1794 nvgpu_log_fn(g, " ");
1795
1796 tsg = tsg_gk20a_from_ch(c);
1797 if (tsg == NULL) {
1798 return -EINVAL;
1799 }
1800
1801 gr_ctx = &tsg->gr_ctx;
1802 pm_ctx = &gr_ctx->pm_ctx;
1803 gr_mem = &gr_ctx->mem;
1804 if (!nvgpu_mem_is_valid(gr_mem)) {
1805 nvgpu_err(g, "no graphics context allocated");
1806 return -EFAULT;
1807 }
1808
1809 if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
1810 (g->ops.gr.get_hw_accessor_stream_out_mode == NULL)) {
1811 nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
1812 return -EINVAL;
1813 }
1814
1815 switch (mode) {
1816 case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
1817 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1818 return 0;
1819 }
1820 break;
1821 case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
1822 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
1823 return 0;
1824 }
1825 break;
1826 case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1827 if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
1828 return 0;
1829 }
1830 break;
1831 default:
1832 nvgpu_err(g, "invalid hwpm context switch mode");
1833 return -EINVAL;
1834 }
1835
1836 ret = gk20a_disable_channel_tsg(g, c);
1837 if (ret) {
1838 nvgpu_err(g, "failed to disable channel/TSG");
1839 return ret;
1840 }
1841
1842 ret = gk20a_fifo_preempt(g, c);
1843 if (ret) {
1844 gk20a_enable_channel_tsg(g, c);
1845 nvgpu_err(g, "failed to preempt channel/TSG");
1846 return ret;
1847 }
1848
1849 /* Channel gr_ctx buffer is gpu cacheable.
1850 Flush and invalidate before cpu update. */
1851 g->ops.mm.l2_flush(g, true);
1852
1853 if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
1854 /* Allocate buffer if necessary */
1855 if (pm_ctx->mem.gpu_va == 0) {
1856 ret = nvgpu_dma_alloc_sys(g,
1857 g->gr.ctx_vars.pm_ctxsw_image_size,
1858 &pm_ctx->mem);
1859 if (ret) {
1860 c->g->ops.fifo.enable_channel(c);
1861 nvgpu_err(g,
1862 "failed to allocate pm ctxt buffer");
1863 return ret;
1864 }
1865
1866 pm_ctx->mem.gpu_va = nvgpu_gmmu_map_fixed(c->vm,
1867 &pm_ctx->mem,
1868 gpu_va,
1869 pm_ctx->mem.size,
1870 NVGPU_VM_MAP_CACHEABLE,
1871 gk20a_mem_flag_none, true,
1872 pm_ctx->mem.aperture);
1873 if (pm_ctx->mem.gpu_va == 0ULL) {
1874 nvgpu_err(g,
1875 "failed to map pm ctxt buffer");
1876 nvgpu_dma_free(g, &pm_ctx->mem);
1877 c->g->ops.fifo.enable_channel(c);
1878 return -ENOMEM;
1879 }
1880 }
1881
1882 if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
1883 (g->ops.gr.init_hwpm_pmm_register != NULL)) {
1884 g->ops.gr.init_hwpm_pmm_register(g);
1885 }
1886 }
1887
1888 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1889 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1890
1891 switch (mode) {
1892 case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
1893 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
1894 virt_addr = pm_ctx->mem.gpu_va;
1895 break;
1896 case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1897 pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
1898 virt_addr = pm_ctx->mem.gpu_va;
1899 break;
1900 case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
1901 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1902 virt_addr = 0;
1903 }
1904
1905 data |= pm_ctx->pm_mode;
1906
1907 nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1908
1909 if (ctxheader->gpu_va) {
1910 struct channel_gk20a *ch;
1911
1912 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
1913 nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
1914 g->ops.gr.write_pm_ptr(g, &ch->ctx_header, virt_addr);
1915 }
1916 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
1917 } else {
1918 g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
1919 }
1920
1921 /* enable channel */
1922 gk20a_enable_channel_tsg(g, c);
1923
1924 return 0;
1925}
1926
1927void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
1928 struct nvgpu_mem *mem)
1929{
1930 nvgpu_mem_wr(g, mem,
1931 ctxsw_prog_main_image_num_save_ops_o(), 0);
1932 nvgpu_mem_wr(g, mem,
1933 ctxsw_prog_main_image_num_restore_ops_o(), 0);
1934}
1935
1936/* load saved fresh copy of gloden image into channel gr_ctx */
1937int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1938 struct channel_gk20a *c)
1939{
1940 struct gr_gk20a *gr = &g->gr;
1941 struct tsg_gk20a *tsg;
1942 struct nvgpu_gr_ctx *gr_ctx;
1943 u32 virt_addr_lo;
1944 u32 virt_addr_hi;
1945 u64 virt_addr = 0;
1946 u32 v, data;
1947 int ret = 0;
1948 struct nvgpu_mem *mem;
1949
1950 nvgpu_log_fn(g, " ");
1951
1952 tsg = tsg_gk20a_from_ch(c);
1953 if (tsg == NULL) {
1954 return -EINVAL;
1955 }
1956
1957 gr_ctx = &tsg->gr_ctx;
1958 mem = &gr_ctx->mem;
1959 if (gr->ctx_vars.local_golden_image == NULL) {
1960 return -EINVAL;
1961 }
1962
1963 /* Channel gr_ctx buffer is gpu cacheable.
1964 Flush and invalidate before cpu update. */
1965 g->ops.mm.l2_flush(g, true);
1966
1967 nvgpu_mem_wr_n(g, mem, 0,
1968 gr->ctx_vars.local_golden_image,
1969 gr->ctx_vars.golden_image_size);
1970
1971 if (g->ops.gr.init_ctxsw_hdr_data) {
1972 g->ops.gr.init_ctxsw_hdr_data(g, mem);
1973 }
1974
1975 if ((g->ops.gr.enable_cde_in_fecs != NULL) && c->cde) {
1976 g->ops.gr.enable_cde_in_fecs(g, mem);
1977 }
1978
1979 /* set priv access map */
1980 virt_addr_lo =
1981 u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1982 virt_addr_hi =
1983 u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1984
1985 if (g->allow_all) {
1986 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
1987 } else {
1988 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
1989 }
1990
1991 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1992 data);
1993
1994 nvgpu_mem_wr(g, mem,
1995 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1996 virt_addr_lo);
1997 nvgpu_mem_wr(g, mem,
1998 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1999 virt_addr_hi);
2000
2001 /* disable verif features */
2002 v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
2003 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
2004 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
2005 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
2006
2007 if (g->ops.gr.update_ctxsw_preemption_mode) {
2008 g->ops.gr.update_ctxsw_preemption_mode(g, c, mem);
2009 }
2010
2011 if (g->ops.gr.update_boosted_ctx) {
2012 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
2013 }
2014
2015 virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
2016 virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
2017
2018 nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
2019 gr_ctx->patch_ctx.data_count);
2020 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
2021 gr_ctx->patch_ctx.data_count);
2022
2023 nvgpu_mem_wr(g, mem,
2024 ctxsw_prog_main_image_patch_adr_lo_o(),
2025 virt_addr_lo);
2026 nvgpu_mem_wr(g, mem,
2027 ctxsw_prog_main_image_patch_adr_hi_o(),
2028 virt_addr_hi);
2029
2030 /* Update main header region of the context buffer with the info needed
2031 * for PM context switching, including mode and possibly a pointer to
2032 * the PM backing store.
2033 */
2034 if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
2035 if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
2036 nvgpu_err(g,
2037 "context switched pm with no pm buffer!");
2038 return -EFAULT;
2039 }
2040
2041 virt_addr = gr_ctx->pm_ctx.mem.gpu_va;
2042 } else {
2043 virt_addr = 0;
2044 }
2045
2046 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
2047 data = data & ~ctxsw_prog_main_image_pm_mode_m();
2048 data |= gr_ctx->pm_ctx.pm_mode;
2049
2050 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
2051
2052 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
2053
2054 return ret;
2055}
2056
2057static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
2058{
2059 nvgpu_log_fn(g, " ");
2060
2061 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
2062 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
2063
2064 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
2065 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
2066
2067 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
2068 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
2069
2070 nvgpu_log_fn(g, "done");
2071}
2072
2073static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
2074{
2075 struct mm_gk20a *mm = &g->mm;
2076 struct vm_gk20a *vm = mm->pmu.vm;
2077 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2078 int err;
2079
2080 err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
2081 if (err != 0) {
2082 return err;
2083 }
2084
2085 g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
2086
2087 /* Map ucode surface to GMMU */
2088 ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
2089 &ucode_info->surface_desc,
2090 ucode_info->surface_desc.size,
2091 0, /* flags */
2092 gk20a_mem_flag_read_only,
2093 false,
2094 ucode_info->surface_desc.aperture);
2095 if (ucode_info->surface_desc.gpu_va == 0ULL) {
2096 nvgpu_err(g, "failed to update gmmu ptes");
2097 return -ENOMEM;
2098 }
2099
2100 return 0;
2101}
2102
2103static void gr_gk20a_init_ctxsw_ucode_segment(
2104 struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
2105{
2106 p_seg->offset = *offset;
2107 p_seg->size = size;
2108 *offset = ALIGN(*offset + size, BLK_SIZE);
2109}
2110
2111static void gr_gk20a_init_ctxsw_ucode_segments(
2112 struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
2113 struct gk20a_ctxsw_bootloader_desc *bootdesc,
2114 u32 code_size, u32 data_size)
2115{
2116 u32 boot_size = ALIGN(bootdesc->size, sizeof(u32));
2117 segments->boot_entry = bootdesc->entry_point;
2118 segments->boot_imem_offset = bootdesc->imem_offset;
2119 gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size);
2120 gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size);
2121 gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size);
2122}
2123
2124static int gr_gk20a_copy_ctxsw_ucode_segments(
2125 struct gk20a *g,
2126 struct nvgpu_mem *dst,
2127 struct gk20a_ctxsw_ucode_segments *segments,
2128 u32 *bootimage,
2129 u32 *code, u32 *data)
2130{
2131 unsigned int i;
2132
2133 nvgpu_mem_wr_n(g, dst, segments->boot.offset, bootimage,
2134 segments->boot.size);
2135 nvgpu_mem_wr_n(g, dst, segments->code.offset, code,
2136 segments->code.size);
2137 nvgpu_mem_wr_n(g, dst, segments->data.offset, data,
2138 segments->data.size);
2139
2140 /* compute a "checksum" for the boot binary to detect its version */
2141 segments->boot_signature = 0;
2142 for (i = 0; i < segments->boot.size / sizeof(u32); i++) {
2143 segments->boot_signature += bootimage[i];
2144 }
2145
2146 return 0;
2147}
2148
2149int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2150{
2151 struct mm_gk20a *mm = &g->mm;
2152 struct vm_gk20a *vm = mm->pmu.vm;
2153 struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
2154 struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
2155 struct nvgpu_firmware *fecs_fw;
2156 struct nvgpu_firmware *gpccs_fw;
2157 u32 *fecs_boot_image;
2158 u32 *gpccs_boot_image;
2159 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2160 u32 ucode_size;
2161 int err = 0;
2162
2163 fecs_fw = nvgpu_request_firmware(g, GK20A_FECS_UCODE_IMAGE, 0);
2164 if (fecs_fw == NULL) {
2165 nvgpu_err(g, "failed to load fecs ucode!!");
2166 return -ENOENT;
2167 }
2168
2169 fecs_boot_desc = (void *)fecs_fw->data;
2170 fecs_boot_image = (void *)(fecs_fw->data +
2171 sizeof(struct gk20a_ctxsw_bootloader_desc));
2172
2173 gpccs_fw = nvgpu_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE, 0);
2174 if (gpccs_fw == NULL) {
2175 nvgpu_release_firmware(g, fecs_fw);
2176 nvgpu_err(g, "failed to load gpccs ucode!!");
2177 return -ENOENT;
2178 }
2179
2180 gpccs_boot_desc = (void *)gpccs_fw->data;
2181 gpccs_boot_image = (void *)(gpccs_fw->data +
2182 sizeof(struct gk20a_ctxsw_bootloader_desc));
2183
2184 ucode_size = 0;
2185 gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size,
2186 fecs_boot_desc,
2187 g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32),
2188 g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32));
2189 gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size,
2190 gpccs_boot_desc,
2191 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
2192 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
2193
2194 err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc);
2195 if (err != 0) {
2196 goto clean_up;
2197 }
2198
2199 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2200 &ucode_info->fecs,
2201 fecs_boot_image,
2202 g->gr.ctx_vars.ucode.fecs.inst.l,
2203 g->gr.ctx_vars.ucode.fecs.data.l);
2204
2205 nvgpu_release_firmware(g, fecs_fw);
2206 fecs_fw = NULL;
2207
2208 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2209 &ucode_info->gpccs,
2210 gpccs_boot_image,
2211 g->gr.ctx_vars.ucode.gpccs.inst.l,
2212 g->gr.ctx_vars.ucode.gpccs.data.l);
2213
2214 nvgpu_release_firmware(g, gpccs_fw);
2215 gpccs_fw = NULL;
2216
2217 err = gr_gk20a_init_ctxsw_ucode_vaspace(g);
2218 if (err != 0) {
2219 goto clean_up;
2220 }
2221
2222 return 0;
2223
2224clean_up:
2225 if (ucode_info->surface_desc.gpu_va) {
2226 nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc,
2227 ucode_info->surface_desc.gpu_va);
2228 }
2229 nvgpu_dma_free(g, &ucode_info->surface_desc);
2230
2231 nvgpu_release_firmware(g, gpccs_fw);
2232 gpccs_fw = NULL;
2233 nvgpu_release_firmware(g, fecs_fw);
2234 fecs_fw = NULL;
2235
2236 return err;
2237}
2238
2239static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g)
2240{
2241 int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2242 u32 val;
2243
2244 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2245 while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) {
2246 nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
2247 retries--;
2248 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2249 }
2250
2251 if (retries == 0) {
2252 nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
2253 gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
2254 }
2255
2256 retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2257 while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
2258 gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
2259 (retries != 0)) {
2260 nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
2261 retries--;
2262 }
2263 if (retries == 0) {
2264 nvgpu_err(g,
2265 "arbiter idle timeout, fecs ctxsw status: 0x%08x",
2266 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
2267 }
2268}
2269
2270void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2271{
2272 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2273 int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2274 u64 inst_ptr;
2275
2276 while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
2277 gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
2278 (retries != 0)) {
2279 nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
2280 retries--;
2281 }
2282 if (retries == 0) {
2283 nvgpu_err(g,
2284 "arbiter idle timeout, status: %08x",
2285 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
2286 }
2287
2288 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
2289
2290 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2291 gk20a_writel(g, gr_fecs_new_ctx_r(),
2292 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2293 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2294 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2295 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2296 gr_fecs_new_ctx_target_vid_mem_f()) |
2297 gr_fecs_new_ctx_valid_m());
2298
2299 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2300 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2301 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2302 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2303 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2304 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2305
2306 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
2307
2308 /* Wait for arbiter command to complete */
2309 gr_gk20a_wait_for_fecs_arb_idle(g);
2310
2311 gk20a_writel(g, gr_fecs_current_ctx_r(),
2312 gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) |
2313 gr_fecs_current_ctx_target_m() |
2314 gr_fecs_current_ctx_valid_m());
2315 /* Send command to arbiter to flush */
2316 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
2317
2318 gr_gk20a_wait_for_fecs_arb_idle(g);
2319
2320}
2321
2322void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
2323 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2324{
2325 u32 addr_code32;
2326 u32 addr_data32;
2327
2328 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
2329 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
2330
2331 /*
2332 * Copy falcon bootloader header into dmem at offset 0.
2333 * Configure dmem port 0 for auto-incrementing writes starting at dmem
2334 * offset 0.
2335 */
2336 gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
2337 gr_fecs_dmemc_offs_f(0) |
2338 gr_fecs_dmemc_blk_f(0) |
2339 gr_fecs_dmemc_aincw_f(1));
2340
2341 /* Write out the actual data */
2342 switch (segments->boot_signature) {
2343 case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED:
2344 case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE:
2345 case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
2346 case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
2347 case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
2348 case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
2349 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2350 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2351 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2352 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2353 /* fallthrough */
2354 case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
2355 case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
2356 case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
2357 case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
2358 case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
2359 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2360 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2361 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2362 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2363 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
2364 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2365 addr_code32);
2366 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2367 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2368 segments->code.size);
2369 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2370 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2371 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2372 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2373 addr_data32);
2374 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2375 segments->data.size);
2376 break;
2377 case FALCON_UCODE_SIG_T12X_FECS_OLDER:
2378 case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
2379 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2380 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2381 addr_code32);
2382 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2383 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2384 segments->code.size);
2385 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2386 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2387 addr_data32);
2388 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2389 segments->data.size);
2390 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2391 addr_code32);
2392 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2393 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2394 break;
2395 default:
2396 nvgpu_err(g,
2397 "unknown falcon ucode boot signature 0x%08x"
2398 " with reg_offset 0x%08x",
2399 segments->boot_signature, reg_offset);
2400 BUG();
2401 }
2402}
2403
2404void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
2405 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2406{
2407 u32 addr_load32;
2408 u32 blocks;
2409 u32 b;
2410 u32 dst;
2411
2412 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
2413 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
2414
2415 /*
2416 * Set the base FB address for the DMA transfer. Subtract off the 256
2417 * byte IMEM block offset such that the relative FB and IMEM offsets
2418 * match, allowing the IMEM tags to be properly created.
2419 */
2420
2421 dst = segments->boot_imem_offset;
2422 gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
2423 (addr_load32 - (dst >> 8)));
2424
2425 for (b = 0; b < blocks; b++) {
2426 /* Setup destination IMEM offset */
2427 gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
2428 dst + (b << 8));
2429
2430 /* Setup source offset (relative to BASE) */
2431 gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
2432 dst + (b << 8));
2433
2434 gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
2435 gr_fecs_dmatrfcmd_imem_f(0x01) |
2436 gr_fecs_dmatrfcmd_write_f(0x00) |
2437 gr_fecs_dmatrfcmd_size_f(0x06) |
2438 gr_fecs_dmatrfcmd_ctxdma_f(0));
2439 }
2440
2441 /* Specify the falcon boot vector */
2442 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
2443 gr_fecs_bootvec_vec_f(segments->boot_entry));
2444}
2445
2446static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2447{
2448 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2449 u64 addr_base = ucode_info->surface_desc.gpu_va;
2450
2451 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
2452
2453 gr_gk20a_load_falcon_bind_instblk(g);
2454
2455 g->ops.gr.falcon_load_ucode(g, addr_base,
2456 &g->ctxsw_ucode_info.fecs, 0);
2457
2458 g->ops.gr.falcon_load_ucode(g, addr_base,
2459 &g->ctxsw_ucode_info.gpccs,
2460 gr_gpcs_gpccs_falcon_hwcfg_r() -
2461 gr_fecs_falcon_hwcfg_r());
2462}
2463
2464int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2465{
2466 int err;
2467
2468 nvgpu_log_fn(g, " ");
2469
2470 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
2471 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
2472 gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
2473 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
2474 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
2475 }
2476
2477 /*
2478 * In case bootloader is not supported, revert to the old way of
2479 * loading gr ucode, without the faster bootstrap routine.
2480 */
2481 if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
2482 gr_gk20a_load_falcon_dmem(g);
2483 gr_gk20a_load_falcon_imem(g);
2484 gr_gk20a_start_falcon_ucode(g);
2485 } else {
2486 if (!g->gr.skip_ucode_init) {
2487 err = gr_gk20a_init_ctxsw_ucode(g);
2488
2489 if (err != 0) {
2490 return err;
2491 }
2492 }
2493 gr_gk20a_load_falcon_with_bootloader(g);
2494 g->gr.skip_ucode_init = true;
2495 }
2496 nvgpu_log_fn(g, "done");
2497 return 0;
2498}
2499
2500int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g)
2501{
2502 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
2503 gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
2504 gk20a_writel(g, gr_fecs_method_push_r(),
2505 gr_fecs_method_push_adr_set_watchdog_timeout_f());
2506
2507 return 0;
2508}
2509
2510static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
2511{
2512 u32 ret;
2513
2514 nvgpu_log_fn(g, " ");
2515
2516 ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL,
2517 GR_IS_UCODE_OP_EQUAL,
2518 eUcodeHandshakeInitComplete,
2519 GR_IS_UCODE_OP_SKIP, 0, false);
2520 if (ret) {
2521 nvgpu_err(g, "falcon ucode init timeout");
2522 return ret;
2523 }
2524
2525 if (nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP) ||
2526 nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
2527 gk20a_writel(g, gr_fecs_current_ctx_r(),
2528 gr_fecs_current_ctx_valid_false_f());
2529 }
2530
2531 ret = g->ops.gr.set_fecs_watchdog_timeout(g);
2532 if (ret) {
2533 nvgpu_err(g, "fail to set watchdog timeout");
2534 return ret;
2535 }
2536
2537 nvgpu_log_fn(g, "done");
2538 return 0;
2539}
2540
2541int gr_gk20a_init_ctx_state(struct gk20a *g)
2542{
2543 u32 ret;
2544 struct fecs_method_op_gk20a op = {
2545 .mailbox = { .id = 0, .data = 0,
2546 .clr = ~0, .ok = 0, .fail = 0},
2547 .method.data = 0,
2548 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
2549 .cond.fail = GR_IS_UCODE_OP_SKIP,
2550 };
2551
2552 nvgpu_log_fn(g, " ");
2553 /* query ctxsw image sizes, if golden context is not created */
2554 if (!g->gr.ctx_vars.golden_image_initialized) {
2555 op.method.addr =
2556 gr_fecs_method_push_adr_discover_image_size_v();
2557 op.mailbox.ret = &g->gr.ctx_vars.golden_image_size;
2558 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2559 if (ret) {
2560 nvgpu_err(g,
2561 "query golden image size failed");
2562 return ret;
2563 }
2564 op.method.addr =
2565 gr_fecs_method_push_adr_discover_zcull_image_size_v();
2566 op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size;
2567 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2568 if (ret) {
2569 nvgpu_err(g,
2570 "query zcull ctx image size failed");
2571 return ret;
2572 }
2573 op.method.addr =
2574 gr_fecs_method_push_adr_discover_pm_image_size_v();
2575 op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size;
2576 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2577 if (ret) {
2578 nvgpu_err(g,
2579 "query pm ctx image size failed");
2580 return ret;
2581 }
2582 g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
2583#ifdef CONFIG_GK20A_CTXSW_TRACE
2584 g->gr.ctx_vars.fecs_trace_buffer_size =
2585 gk20a_fecs_trace_buffer_size(g);
2586#endif
2587 }
2588
2589 nvgpu_log_fn(g, "done");
2590 return 0;
2591}
2592
2593void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
2594 struct gr_ctx_buffer_desc *desc)
2595{
2596 if (desc == NULL) {
2597 return;
2598 }
2599 nvgpu_dma_free(g, &desc->mem);
2600 desc->destroy = NULL;
2601}
2602
2603int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
2604 struct gr_ctx_buffer_desc *desc,
2605 size_t size)
2606{
2607 int err = 0;
2608
2609 nvgpu_log_fn(g, " ");
2610
2611 if (nvgpu_mem_is_valid(&desc->mem)) {
2612 return 0;
2613 }
2614
2615 err = nvgpu_dma_alloc_sys(g, size, &desc->mem);
2616 if (err != 0) {
2617 return err;
2618 }
2619
2620 desc->destroy = gk20a_gr_destroy_ctx_buffer;
2621
2622 return err;
2623}
2624
2625static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
2626{
2627 struct gr_gk20a *gr = &g->gr;
2628 u32 i;
2629
2630 for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
2631 /* destroy exists iff buffer is allocated */
2632 if (gr->global_ctx_buffer[i].destroy) {
2633 gr->global_ctx_buffer[i].destroy(g,
2634 &gr->global_ctx_buffer[i]);
2635 }
2636 }
2637
2638 nvgpu_log_fn(g, "done");
2639}
2640
2641int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2642{
2643 struct gr_gk20a *gr = &g->gr;
2644 int attr_buffer_size, err;
2645
2646 u32 cb_buffer_size = gr->bundle_cb_default_size *
2647 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
2648
2649 u32 pagepool_buffer_size = g->ops.gr.pagepool_default_size(g) *
2650 gr_scc_pagepool_total_pages_byte_granularity_v();
2651
2652 nvgpu_log_fn(g, " ");
2653
2654 attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
2655
2656 nvgpu_log_info(g, "cb_buffer_size : %d", cb_buffer_size);
2657
2658 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[CIRCULAR],
2659 cb_buffer_size);
2660 if (err != 0) {
2661 goto clean_up;
2662 }
2663
2664 if (g->ops.secure_alloc) {
2665 err = g->ops.secure_alloc(g,
2666 &gr->global_ctx_buffer[CIRCULAR_VPR],
2667 cb_buffer_size);
2668 if (err != 0) {
2669 goto clean_up;
2670 }
2671 }
2672
2673 nvgpu_log_info(g, "pagepool_buffer_size : %d", pagepool_buffer_size);
2674
2675 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[PAGEPOOL],
2676 pagepool_buffer_size);
2677 if (err != 0) {
2678 goto clean_up;
2679 }
2680
2681 if (g->ops.secure_alloc) {
2682 err = g->ops.secure_alloc(g,
2683 &gr->global_ctx_buffer[PAGEPOOL_VPR],
2684 pagepool_buffer_size);
2685 if (err != 0) {
2686 goto clean_up;
2687 }
2688 }
2689
2690 nvgpu_log_info(g, "attr_buffer_size : %d", attr_buffer_size);
2691
2692 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[ATTRIBUTE],
2693 attr_buffer_size);
2694 if (err != 0) {
2695 goto clean_up;
2696 }
2697
2698 if (g->ops.secure_alloc) {
2699 err = g->ops.secure_alloc(g,
2700 &gr->global_ctx_buffer[ATTRIBUTE_VPR],
2701 attr_buffer_size);
2702 if (err != 0) {
2703 goto clean_up;
2704 }
2705 }
2706
2707 nvgpu_log_info(g, "golden_image_size : %d",
2708 gr->ctx_vars.golden_image_size);
2709
2710 err = gk20a_gr_alloc_ctx_buffer(g,
2711 &gr->global_ctx_buffer[GOLDEN_CTX],
2712 gr->ctx_vars.golden_image_size);
2713 if (err != 0) {
2714 goto clean_up;
2715 }
2716
2717 nvgpu_log_info(g, "priv_access_map_size : %d",
2718 gr->ctx_vars.priv_access_map_size);
2719
2720 err = gk20a_gr_alloc_ctx_buffer(g,
2721 &gr->global_ctx_buffer[PRIV_ACCESS_MAP],
2722 gr->ctx_vars.priv_access_map_size);
2723
2724 if (err != 0) {
2725 goto clean_up;
2726 }
2727
2728#ifdef CONFIG_GK20A_CTXSW_TRACE
2729 nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
2730 gr->ctx_vars.fecs_trace_buffer_size);
2731
2732 err = nvgpu_dma_alloc_sys(g,
2733 gr->ctx_vars.fecs_trace_buffer_size,
2734 &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
2735 if (err != 0) {
2736 goto clean_up;
2737 }
2738
2739 gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
2740 gk20a_gr_destroy_ctx_buffer;
2741#endif
2742
2743 nvgpu_log_fn(g, "done");
2744 return 0;
2745
2746 clean_up:
2747 nvgpu_err(g, "fail");
2748 gr_gk20a_free_global_ctx_buffers(g);
2749 return -ENOMEM;
2750}
2751
2752static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
2753 struct vm_gk20a *vm,
2754 struct nvgpu_gr_ctx *gr_ctx)
2755{
2756 u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va;
2757 u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size;
2758 int *g_bfr_index = gr_ctx->global_ctx_buffer_index;
2759 u32 i;
2760
2761 nvgpu_log_fn(g, " ");
2762
2763 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
2764 if (g_bfr_index[i]) {
2765 struct nvgpu_mem *mem;
2766
2767 /*
2768 * Translate from VA index to buffer index to determine
2769 * the correct struct nvgpu_mem to use. Handles the VPR
2770 * vs non-VPR difference in context images.
2771 */
2772 mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem;
2773
2774 nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]);
2775 }
2776 }
2777
2778 memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
2779 memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size));
2780 memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
2781
2782 gr_ctx->global_ctx_buffer_mapped = false;
2783}
2784
2785int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2786 struct channel_gk20a *c)
2787{
2788 struct tsg_gk20a *tsg;
2789 struct vm_gk20a *ch_vm = c->vm;
2790 u64 *g_bfr_va;
2791 u64 *g_bfr_size;
2792 int *g_bfr_index;
2793 struct gr_gk20a *gr = &g->gr;
2794 struct nvgpu_mem *mem;
2795 u64 gpu_va;
2796
2797 nvgpu_log_fn(g, " ");
2798
2799 tsg = tsg_gk20a_from_ch(c);
2800 if (tsg == NULL) {
2801 return -EINVAL;
2802 }
2803
2804 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
2805 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
2806 g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index;
2807
2808 /* Circular Buffer */
2809 if (c->vpr &&
2810 nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) {
2811 mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem;
2812 g_bfr_index[CIRCULAR_VA] = CIRCULAR_VPR;
2813 } else {
2814 mem = &gr->global_ctx_buffer[CIRCULAR].mem;
2815 g_bfr_index[CIRCULAR_VA] = CIRCULAR;
2816 }
2817
2818 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2819 NVGPU_VM_MAP_CACHEABLE,
2820 gk20a_mem_flag_none, true, mem->aperture);
2821 if (gpu_va == 0ULL) {
2822 goto clean_up;
2823 }
2824 g_bfr_va[CIRCULAR_VA] = gpu_va;
2825 g_bfr_size[CIRCULAR_VA] = mem->size;
2826
2827 /* Attribute Buffer */
2828 if (c->vpr &&
2829 nvgpu_mem_is_valid(&gr->global_ctx_buffer[ATTRIBUTE_VPR].mem)) {
2830 mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem;
2831 g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE_VPR;
2832 } else {
2833 mem = &gr->global_ctx_buffer[ATTRIBUTE].mem;
2834 g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE;
2835 }
2836
2837 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2838 NVGPU_VM_MAP_CACHEABLE,
2839 gk20a_mem_flag_none, false, mem->aperture);
2840 if (gpu_va == 0ULL) {
2841 goto clean_up;
2842 }
2843 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
2844 g_bfr_size[ATTRIBUTE_VA] = mem->size;
2845
2846 /* Page Pool */
2847 if (c->vpr &&
2848 nvgpu_mem_is_valid(&gr->global_ctx_buffer[PAGEPOOL_VPR].mem)) {
2849 mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem;
2850 g_bfr_index[PAGEPOOL_VA] = PAGEPOOL_VPR;
2851 } else {
2852 mem = &gr->global_ctx_buffer[PAGEPOOL].mem;
2853 g_bfr_index[PAGEPOOL_VA] = PAGEPOOL;
2854 }
2855
2856 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2857 NVGPU_VM_MAP_CACHEABLE,
2858 gk20a_mem_flag_none, true, mem->aperture);
2859 if (gpu_va == 0ULL) {
2860 goto clean_up;
2861 }
2862 g_bfr_va[PAGEPOOL_VA] = gpu_va;
2863 g_bfr_size[PAGEPOOL_VA] = mem->size;
2864
2865 /* Golden Image */
2866 mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
2867 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2868 gk20a_mem_flag_none, true, mem->aperture);
2869 if (gpu_va == 0ULL) {
2870 goto clean_up;
2871 }
2872 g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
2873 g_bfr_size[GOLDEN_CTX_VA] = mem->size;
2874 g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX;
2875
2876 /* Priv register Access Map */
2877 mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
2878 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2879 gk20a_mem_flag_none, true, mem->aperture);
2880 if (gpu_va == 0ULL) {
2881 goto clean_up;
2882 }
2883 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
2884 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size;
2885 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2886
2887 tsg->gr_ctx.global_ctx_buffer_mapped = true;
2888
2889#ifdef CONFIG_GK20A_CTXSW_TRACE
2890 /* FECS trace buffer */
2891 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
2892 mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
2893 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2894 gk20a_mem_flag_none, true, mem->aperture);
2895 if (!gpu_va)
2896 goto clean_up;
2897 g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
2898 g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
2899 g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
2900 }
2901#endif
2902
2903 return 0;
2904
2905clean_up:
2906 gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx);
2907
2908 return -ENOMEM;
2909}
2910
2911int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2912 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
2913 u32 class,
2914 u32 padding)
2915{
2916 struct gr_gk20a *gr = &g->gr;
2917 int err = 0;
2918
2919 nvgpu_log_fn(g, " ");
2920
2921 if (gr->ctx_vars.buffer_size == 0) {
2922 return 0;
2923 }
2924
2925 /* alloc channel gr ctx buffer */
2926 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2927 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2928
2929 err = nvgpu_dma_alloc(g, gr->ctx_vars.buffer_total_size, &gr_ctx->mem);
2930 if (err != 0) {
2931 return err;
2932 }
2933
2934 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
2935 &gr_ctx->mem,
2936 gr_ctx->mem.size,
2937 0, /* not GPU-cacheable */
2938 gk20a_mem_flag_none, true,
2939 gr_ctx->mem.aperture);
2940 if (gr_ctx->mem.gpu_va == 0ULL) {
2941 goto err_free_mem;
2942 }
2943
2944 return 0;
2945
2946 err_free_mem:
2947 nvgpu_dma_free(g, &gr_ctx->mem);
2948
2949 return err;
2950}
2951
2952static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2953 struct tsg_gk20a *tsg, u32 class, u32 padding)
2954{
2955 struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
2956 int err;
2957
2958 if (tsg->vm == NULL) {
2959 nvgpu_err(tsg->g, "No address space bound");
2960 return -ENOMEM;
2961 }
2962
2963 err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding);
2964 if (err != 0) {
2965 return err;
2966 }
2967
2968 gr_ctx->tsgid = tsg->tsgid;
2969
2970 return 0;
2971}
2972
2973void gr_gk20a_free_gr_ctx(struct gk20a *g,
2974 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
2975{
2976 nvgpu_log_fn(g, " ");
2977
2978 if (gr_ctx->mem.gpu_va) {
2979 gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx);
2980 gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx);
2981 gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx);
2982
2983 if ((g->ops.gr.dump_ctxsw_stats != NULL) &&
2984 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) {
2985 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx);
2986 }
2987
2988 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
2989 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
2990 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
2991 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
2992 nvgpu_dma_unmap_free(vm, &gr_ctx->mem);
2993
2994 memset(gr_ctx, 0, sizeof(*gr_ctx));
2995 }
2996}
2997
2998void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
2999{
3000 struct gk20a *g = tsg->g;
3001
3002 if (tsg->vm == NULL) {
3003 nvgpu_err(g, "No address space bound");
3004 return;
3005 }
3006 tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx);
3007}
3008
3009u32 gr_gk20a_get_patch_slots(struct gk20a *g)
3010{
3011 return PATCH_CTX_SLOTS_PER_PAGE;
3012}
3013
3014static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
3015 struct channel_gk20a *c)
3016{
3017 struct tsg_gk20a *tsg;
3018 struct patch_desc *patch_ctx;
3019 struct vm_gk20a *ch_vm = c->vm;
3020 u32 alloc_size;
3021 int err = 0;
3022
3023 nvgpu_log_fn(g, " ");
3024
3025 tsg = tsg_gk20a_from_ch(c);
3026 if (tsg == NULL) {
3027 return -EINVAL;
3028 }
3029
3030 patch_ctx = &tsg->gr_ctx.patch_ctx;
3031 alloc_size = g->ops.gr.get_patch_slots(g) *
3032 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
3033
3034 nvgpu_log(g, gpu_dbg_info, "patch buffer size in entries: %d",
3035 alloc_size);
3036
3037 err = nvgpu_dma_alloc_map_sys(ch_vm,
3038 alloc_size * sizeof(u32), &patch_ctx->mem);
3039 if (err != 0) {
3040 return err;
3041 }
3042
3043 nvgpu_log_fn(g, "done");
3044 return 0;
3045}
3046
3047static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
3048 struct vm_gk20a *vm,
3049 struct nvgpu_gr_ctx *gr_ctx)
3050{
3051 struct patch_desc *patch_ctx = &gr_ctx->patch_ctx;
3052
3053 nvgpu_log_fn(g, " ");
3054
3055 if (patch_ctx->mem.gpu_va) {
3056 nvgpu_gmmu_unmap(vm, &patch_ctx->mem,
3057 patch_ctx->mem.gpu_va);
3058 }
3059
3060 nvgpu_dma_free(g, &patch_ctx->mem);
3061 patch_ctx->data_count = 0;
3062}
3063
3064static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
3065 struct vm_gk20a *vm,
3066 struct nvgpu_gr_ctx *gr_ctx)
3067{
3068 struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
3069
3070 nvgpu_log_fn(g, " ");
3071
3072 if (pm_ctx->mem.gpu_va) {
3073 nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
3074
3075 nvgpu_dma_free(g, &pm_ctx->mem);
3076 }
3077}
3078
3079int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3080{
3081 struct gk20a *g = c->g;
3082 struct nvgpu_gr_ctx *gr_ctx;
3083 struct tsg_gk20a *tsg = NULL;
3084 int err = 0;
3085
3086 nvgpu_log_fn(g, " ");
3087
3088 /* an address space needs to have been bound at this point.*/
3089 if (!gk20a_channel_as_bound(c) && (c->vm == NULL)) {
3090 nvgpu_err(g,
3091 "not bound to address space at time"
3092 " of grctx allocation");
3093 return -EINVAL;
3094 }
3095
3096 if (!g->ops.gr.is_valid_class(g, class_num)) {
3097 nvgpu_err(g,
3098 "invalid obj class 0x%x", class_num);
3099 err = -EINVAL;
3100 goto out;
3101 }
3102 c->obj_class = class_num;
3103
3104 tsg = tsg_gk20a_from_ch(c);
3105 if (tsg == NULL) {
3106 return -EINVAL;
3107 }
3108
3109 gr_ctx = &tsg->gr_ctx;
3110
3111 if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
3112 tsg->vm = c->vm;
3113 nvgpu_vm_get(tsg->vm);
3114 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
3115 class_num,
3116 flags);
3117 if (err != 0) {
3118 nvgpu_err(g,
3119 "fail to allocate TSG gr ctx buffer");
3120 nvgpu_vm_put(tsg->vm);
3121 tsg->vm = NULL;
3122 goto out;
3123 }
3124
3125 /* allocate patch buffer */
3126 if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
3127 gr_ctx->patch_ctx.data_count = 0;
3128 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
3129 if (err != 0) {
3130 nvgpu_err(g,
3131 "fail to allocate patch buffer");
3132 goto out;
3133 }
3134 }
3135
3136 /* map global buffer to channel gpu_va and commit */
3137 err = g->ops.gr.map_global_ctx_buffers(g, c);
3138 if (err != 0) {
3139 nvgpu_err(g,
3140 "fail to map global ctx buffer");
3141 goto out;
3142 }
3143 g->ops.gr.commit_global_ctx_buffers(g, c, true);
3144
3145 /* commit gr ctx buffer */
3146 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
3147 if (err != 0) {
3148 nvgpu_err(g,
3149 "fail to commit gr ctx buffer");
3150 goto out;
3151 }
3152
3153 /* init golden image */
3154 err = gr_gk20a_init_golden_ctx_image(g, c);
3155 if (err != 0) {
3156 nvgpu_err(g,
3157 "fail to init golden ctx image");
3158 goto out;
3159 }
3160
3161 /* Re-enable ELPG now that golden image has been initialized.
3162 * The PMU PG init code may already have tried to enable elpg, but
3163 * would not have been able to complete this action since the golden
3164 * image hadn't been initialized yet, so do this now.
3165 */
3166 err = nvgpu_pmu_reenable_elpg(g);
3167 if (err != 0) {
3168 nvgpu_err(g, "fail to re-enable elpg");
3169 goto out;
3170 }
3171
3172 /* load golden image */
3173 gr_gk20a_load_golden_ctx_image(g, c);
3174 if (err != 0) {
3175 nvgpu_err(g,
3176 "fail to load golden ctx image");
3177 goto out;
3178 }
3179#ifdef CONFIG_GK20A_CTXSW_TRACE
3180 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3181 err = g->ops.fecs_trace.bind_channel(g, c);
3182 if (err != 0) {
3183 nvgpu_warn(g,
3184 "fail to bind channel for ctxsw trace");
3185 }
3186 }
3187#endif
3188
3189 if (g->ops.gr.set_czf_bypass) {
3190 g->ops.gr.set_czf_bypass(g, c);
3191 }
3192
3193 /* PM ctxt switch is off by default */
3194 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
3195 } else {
3196 /* commit gr ctx buffer */
3197 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
3198 if (err != 0) {
3199 nvgpu_err(g,
3200 "fail to commit gr ctx buffer");
3201 goto out;
3202 }
3203#ifdef CONFIG_GK20A_CTXSW_TRACE
3204 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3205 err = g->ops.fecs_trace.bind_channel(g, c);
3206 if (err != 0) {
3207 nvgpu_warn(g,
3208 "fail to bind channel for ctxsw trace");
3209 }
3210 }
3211#endif
3212 }
3213
3214 nvgpu_log_fn(g, "done");
3215 return 0;
3216out:
3217 /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
3218 can be reused so no need to release them.
3219 2. golden image init and load is a one time thing so if
3220 they pass, no need to undo. */
3221 nvgpu_err(g, "fail");
3222 return err;
3223}
3224
3225static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3226{
3227 struct gk20a *g = gr->g;
3228
3229 nvgpu_log_fn(g, " ");
3230
3231 gr_gk20a_free_cyclestats_snapshot_data(g);
3232
3233 gr_gk20a_free_global_ctx_buffers(g);
3234
3235 nvgpu_dma_free(g, &gr->compbit_store.mem);
3236
3237 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
3238
3239 nvgpu_kfree(g, gr->gpc_tpc_count);
3240 nvgpu_kfree(g, gr->gpc_zcb_count);
3241 nvgpu_kfree(g, gr->gpc_ppc_count);
3242 nvgpu_kfree(g, gr->pes_tpc_count[0]);
3243 nvgpu_kfree(g, gr->pes_tpc_count[1]);
3244 nvgpu_kfree(g, gr->pes_tpc_mask[0]);
3245 nvgpu_kfree(g, gr->pes_tpc_mask[1]);
3246 nvgpu_kfree(g, gr->sm_to_cluster);
3247 nvgpu_kfree(g, gr->gpc_skip_mask);
3248 nvgpu_kfree(g, gr->map_tiles);
3249 nvgpu_kfree(g, gr->fbp_rop_l2_en_mask);
3250 gr->gpc_tpc_count = NULL;
3251 gr->gpc_zcb_count = NULL;
3252 gr->gpc_ppc_count = NULL;
3253 gr->pes_tpc_count[0] = NULL;
3254 gr->pes_tpc_count[1] = NULL;
3255 gr->pes_tpc_mask[0] = NULL;
3256 gr->pes_tpc_mask[1] = NULL;
3257 gr->gpc_skip_mask = NULL;
3258 gr->map_tiles = NULL;
3259 gr->fbp_rop_l2_en_mask = NULL;
3260
3261 gr->ctx_vars.valid = false;
3262 nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.inst.l);
3263 nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.data.l);
3264 nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.inst.l);
3265 nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.data.l);
3266 nvgpu_kfree(g, gr->ctx_vars.sw_bundle_init.l);
3267 nvgpu_kfree(g, gr->ctx_vars.sw_veid_bundle_init.l);
3268 nvgpu_kfree(g, gr->ctx_vars.sw_method_init.l);
3269 nvgpu_kfree(g, gr->ctx_vars.sw_ctx_load.l);
3270 nvgpu_kfree(g, gr->ctx_vars.sw_non_ctx_load.l);
3271 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.sys.l);
3272 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc.l);
3273 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.tpc.l);
3274 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
3275 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.ppc.l);
3276 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_sys.l);
3277 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_gpc.l);
3278 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_tpc.l);
3279 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ppc.l);
3280 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_sys.l);
3281 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp.l);
3282 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_gpc.l);
3283 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp_router.l);
3284 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc_router.l);
3285 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ltc.l);
3286 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_fbpa.l);
3287 nvgpu_kfree(g, gr->ctx_vars.sw_bundle64_init.l);
3288 nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_cau.l);
3289
3290 nvgpu_vfree(g, gr->ctx_vars.local_golden_image);
3291 gr->ctx_vars.local_golden_image = NULL;
3292
3293 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) {
3294 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3295 }
3296 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3297
3298 gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
3299
3300 nvgpu_ecc_remove_support(g);
3301}
3302
3303static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3304{
3305 u32 gpc_index, pes_index;
3306 u32 pes_tpc_mask;
3307 u32 pes_tpc_count;
3308 u32 pes_heavy_index;
3309 u32 gpc_new_skip_mask;
3310 u32 tmp;
3311 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3312 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
3313
3314 tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
3315 gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
3316
3317 tmp = gk20a_readl(g, top_num_gpcs_r());
3318 gr->max_gpc_count = top_num_gpcs_value_v(tmp);
3319
3320 tmp = gk20a_readl(g, top_num_fbps_r());
3321 gr->max_fbps_count = top_num_fbps_value_v(tmp);
3322
3323 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
3324
3325 if (gr->fbp_rop_l2_en_mask == NULL) {
3326 gr->fbp_rop_l2_en_mask =
3327 nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
3328 if (gr->fbp_rop_l2_en_mask == NULL) {
3329 goto clean_up;
3330 }
3331 } else {
3332 memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count *
3333 sizeof(u32));
3334 }
3335
3336 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
3337 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
3338
3339 gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
3340
3341 tmp = gk20a_readl(g, top_num_fbps_r());
3342 gr->sys_count = top_num_fbps_value_v(tmp);
3343
3344 tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
3345 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
3346
3347 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
3348 if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
3349 "too many pes per gpc\n")) {
3350 goto clean_up;
3351 }
3352
3353 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS);
3354
3355 if (gr->gpc_count == 0U) {
3356 nvgpu_err(g, "gpc_count==0!");
3357 goto clean_up;
3358 }
3359
3360 if (gr->gpc_tpc_count == NULL) {
3361 gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count *
3362 sizeof(u32));
3363 } else {
3364 memset(gr->gpc_tpc_count, 0, gr->gpc_count *
3365 sizeof(u32));
3366 }
3367
3368 if (gr->gpc_tpc_mask == NULL) {
3369 gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count *
3370 sizeof(u32));
3371 } else {
3372 memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count *
3373 sizeof(u32));
3374 }
3375
3376 if (gr->gpc_zcb_count == NULL) {
3377 gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count *
3378 sizeof(u32));
3379 } else {
3380 memset(gr->gpc_zcb_count, 0, gr->gpc_count *
3381 sizeof(u32));
3382 }
3383
3384 if (gr->gpc_ppc_count == NULL) {
3385 gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count *
3386 sizeof(u32));
3387 } else {
3388 memset(gr->gpc_ppc_count, 0, gr->gpc_count *
3389 sizeof(u32));
3390 }
3391
3392 if (gr->gpc_skip_mask == NULL) {
3393 gr->gpc_skip_mask =
3394 nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() *
3395 4 * sizeof(u32));
3396 } else {
3397 memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() *
3398 4 * sizeof(u32));
3399 }
3400
3401 if ((gr->gpc_tpc_count == NULL) || (gr->gpc_tpc_mask == NULL) ||
3402 (gr->gpc_zcb_count == NULL) || (gr->gpc_ppc_count == NULL) ||
3403 (gr->gpc_skip_mask == NULL)) {
3404 goto clean_up;
3405 }
3406
3407 for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) {
3408 if (g->ops.gr.get_gpc_tpc_mask) {
3409 gr->gpc_tpc_mask[gpc_index] =
3410 g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
3411 }
3412 }
3413
3414 gr->ppc_count = 0;
3415 gr->tpc_count = 0;
3416 gr->zcb_count = 0;
3417 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3418 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r() +
3419 gpc_stride * gpc_index);
3420
3421 gr->gpc_tpc_count[gpc_index] =
3422 gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
3423 gr->tpc_count += gr->gpc_tpc_count[gpc_index];
3424
3425 gr->gpc_zcb_count[gpc_index] =
3426 gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
3427 gr->zcb_count += gr->gpc_zcb_count[gpc_index];
3428
3429 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
3430 if (gr->pes_tpc_count[pes_index] == NULL) {
3431 gr->pes_tpc_count[pes_index] =
3432 nvgpu_kzalloc(g, gr->gpc_count *
3433 sizeof(u32));
3434 gr->pes_tpc_mask[pes_index] =
3435 nvgpu_kzalloc(g, gr->gpc_count *
3436 sizeof(u32));
3437 if ((gr->pes_tpc_count[pes_index] == NULL) ||
3438 (gr->pes_tpc_mask[pes_index] == NULL)) {
3439 goto clean_up;
3440 }
3441 }
3442
3443 tmp = gk20a_readl(g,
3444 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
3445 gpc_index * gpc_stride);
3446
3447 pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
3448 pes_tpc_count = count_bits(pes_tpc_mask);
3449
3450 /* detect PES presence by seeing if there are
3451 * TPCs connected to it.
3452 */
3453 if (pes_tpc_count != 0) {
3454 gr->gpc_ppc_count[gpc_index]++;
3455 }
3456
3457 gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
3458 gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
3459 }
3460
3461 gr->ppc_count += gr->gpc_ppc_count[gpc_index];
3462
3463 gpc_new_skip_mask = 0;
3464 if (gr->pe_count_per_gpc > 1 &&
3465 gr->pes_tpc_count[0][gpc_index] +
3466 gr->pes_tpc_count[1][gpc_index] == 5) {
3467 pes_heavy_index =
3468 gr->pes_tpc_count[0][gpc_index] >
3469 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
3470
3471 gpc_new_skip_mask =
3472 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
3473 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3474 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3475
3476 } else if (gr->pe_count_per_gpc > 1 &&
3477 (gr->pes_tpc_count[0][gpc_index] +
3478 gr->pes_tpc_count[1][gpc_index] == 4) &&
3479 (gr->pes_tpc_count[0][gpc_index] !=
3480 gr->pes_tpc_count[1][gpc_index])) {
3481 pes_heavy_index =
3482 gr->pes_tpc_count[0][gpc_index] >
3483 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
3484
3485 gpc_new_skip_mask =
3486 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
3487 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3488 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3489 }
3490 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
3491 }
3492
3493 /* allocate for max tpc per gpc */
3494 if (gr->sm_to_cluster == NULL) {
3495 gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count *
3496 gr->max_tpc_per_gpc_count *
3497 sm_per_tpc * sizeof(struct sm_info));
3498 if (!gr->sm_to_cluster)
3499 goto clean_up;
3500 } else {
3501 memset(gr->sm_to_cluster, 0, gr->gpc_count *
3502 gr->max_tpc_per_gpc_count *
3503 sm_per_tpc * sizeof(struct sm_info));
3504 }
3505 gr->no_of_sm = 0;
3506
3507 nvgpu_log_info(g, "fbps: %d", gr->num_fbps);
3508 nvgpu_log_info(g, "max_gpc_count: %d", gr->max_gpc_count);
3509 nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count);
3510 nvgpu_log_info(g, "max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
3511 nvgpu_log_info(g, "max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
3512 nvgpu_log_info(g, "max_tpc_count: %d", gr->max_tpc_count);
3513 nvgpu_log_info(g, "sys_count: %d", gr->sys_count);
3514 nvgpu_log_info(g, "gpc_count: %d", gr->gpc_count);
3515 nvgpu_log_info(g, "pe_count_per_gpc: %d", gr->pe_count_per_gpc);
3516 nvgpu_log_info(g, "tpc_count: %d", gr->tpc_count);
3517 nvgpu_log_info(g, "ppc_count: %d", gr->ppc_count);
3518
3519 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3520 nvgpu_log_info(g, "gpc_tpc_count[%d] : %d",
3521 gpc_index, gr->gpc_tpc_count[gpc_index]);
3522 }
3523 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3524 nvgpu_log_info(g, "gpc_zcb_count[%d] : %d",
3525 gpc_index, gr->gpc_zcb_count[gpc_index]);
3526 }
3527 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3528 nvgpu_log_info(g, "gpc_ppc_count[%d] : %d",
3529 gpc_index, gr->gpc_ppc_count[gpc_index]);
3530 }
3531 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3532 nvgpu_log_info(g, "gpc_skip_mask[%d] : %d",
3533 gpc_index, gr->gpc_skip_mask[gpc_index]);
3534 }
3535 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3536 for (pes_index = 0;
3537 pes_index < gr->pe_count_per_gpc;
3538 pes_index++) {
3539 nvgpu_log_info(g, "pes_tpc_count[%d][%d] : %d",
3540 pes_index, gpc_index,
3541 gr->pes_tpc_count[pes_index][gpc_index]);
3542 }
3543 }
3544
3545 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3546 for (pes_index = 0;
3547 pes_index < gr->pe_count_per_gpc;
3548 pes_index++) {
3549 nvgpu_log_info(g, "pes_tpc_mask[%d][%d] : %d",
3550 pes_index, gpc_index,
3551 gr->pes_tpc_mask[pes_index][gpc_index]);
3552 }
3553 }
3554
3555 g->ops.gr.bundle_cb_defaults(g);
3556 g->ops.gr.cb_size_default(g);
3557 g->ops.gr.calc_global_ctx_buffer_size(g);
3558 gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
3559
3560 nvgpu_log_info(g, "bundle_cb_default_size: %d",
3561 gr->bundle_cb_default_size);
3562 nvgpu_log_info(g, "min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
3563 nvgpu_log_info(g, "bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
3564 nvgpu_log_info(g, "attrib_cb_default_size: %d",
3565 gr->attrib_cb_default_size);
3566 nvgpu_log_info(g, "attrib_cb_size: %d", gr->attrib_cb_size);
3567 nvgpu_log_info(g, "alpha_cb_default_size: %d", gr->alpha_cb_default_size);
3568 nvgpu_log_info(g, "alpha_cb_size: %d", gr->alpha_cb_size);
3569 nvgpu_log_info(g, "timeslice_mode: %d", gr->timeslice_mode);
3570
3571 return 0;
3572
3573clean_up:
3574 return -ENOMEM;
3575}
3576
3577static u32 prime_set[18] = {
3578 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
3579
3580static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3581{
3582 s32 comm_denom;
3583 s32 mul_factor;
3584 s32 *init_frac = NULL;
3585 s32 *init_err = NULL;
3586 s32 *run_err = NULL;
3587 s32 *sorted_num_tpcs = NULL;
3588 s32 *sorted_to_unsorted_gpc_map = NULL;
3589 u32 gpc_index;
3590 u32 gpc_mark = 0;
3591 u32 num_tpc;
3592 u32 max_tpc_count = 0;
3593 u32 swap;
3594 u32 tile_count;
3595 u32 index;
3596 bool delete_map = false;
3597 bool gpc_sorted;
3598 int ret = 0;
3599 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
3600 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
3601 int map_tile_count = num_gpcs * num_tpc_per_gpc;
3602
3603 init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3604 init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3605 run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3606 sorted_num_tpcs =
3607 nvgpu_kzalloc(g, num_gpcs * num_tpc_per_gpc * sizeof(s32));
3608 sorted_to_unsorted_gpc_map =
3609 nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3610
3611 if (!((init_frac != NULL) &&
3612 (init_err != NULL) &&
3613 (run_err != NULL) &&
3614 (sorted_num_tpcs != NULL) &&
3615 (sorted_to_unsorted_gpc_map != NULL))) {
3616 ret = -ENOMEM;
3617 goto clean_up;
3618 }
3619
3620 gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
3621
3622 if (gr->tpc_count == 3) {
3623 gr->map_row_offset = 2;
3624 } else if (gr->tpc_count < 3) {
3625 gr->map_row_offset = 1;
3626 } else {
3627 gr->map_row_offset = 3;
3628
3629 for (index = 1; index < 18; index++) {
3630 u32 prime = prime_set[index];
3631 if ((gr->tpc_count % prime) != 0) {
3632 gr->map_row_offset = prime;
3633 break;
3634 }
3635 }
3636 }
3637
3638 switch (gr->tpc_count) {
3639 case 15:
3640 gr->map_row_offset = 6;
3641 break;
3642 case 14:
3643 gr->map_row_offset = 5;
3644 break;
3645 case 13:
3646 gr->map_row_offset = 2;
3647 break;
3648 case 11:
3649 gr->map_row_offset = 7;
3650 break;
3651 case 10:
3652 gr->map_row_offset = 6;
3653 break;
3654 case 7:
3655 case 5:
3656 gr->map_row_offset = 1;
3657 break;
3658 default:
3659 break;
3660 }
3661
3662 if (gr->map_tiles) {
3663 if (gr->map_tile_count != gr->tpc_count) {
3664 delete_map = true;
3665 }
3666
3667 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
3668 if (gr_gk20a_get_map_tile_count(gr, tile_count)
3669 >= gr->tpc_count) {
3670 delete_map = true;
3671 }
3672 }
3673
3674 if (delete_map) {
3675 nvgpu_kfree(g, gr->map_tiles);
3676 gr->map_tiles = NULL;
3677 gr->map_tile_count = 0;
3678 }
3679 }
3680
3681 if (gr->map_tiles == NULL) {
3682 gr->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
3683 if (gr->map_tiles == NULL) {
3684 ret = -ENOMEM;
3685 goto clean_up;
3686 }
3687 gr->map_tile_count = map_tile_count;
3688
3689 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3690 sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
3691 sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
3692 }
3693
3694 gpc_sorted = false;
3695 while (!gpc_sorted) {
3696 gpc_sorted = true;
3697 for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
3698 if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
3699 gpc_sorted = false;
3700 swap = sorted_num_tpcs[gpc_index];
3701 sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
3702 sorted_num_tpcs[gpc_index + 1] = swap;
3703 swap = sorted_to_unsorted_gpc_map[gpc_index];
3704 sorted_to_unsorted_gpc_map[gpc_index] =
3705 sorted_to_unsorted_gpc_map[gpc_index + 1];
3706 sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
3707 }
3708 }
3709 }
3710
3711 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3712 if (gr->gpc_tpc_count[gpc_index] > max_tpc_count) {
3713 max_tpc_count = gr->gpc_tpc_count[gpc_index];
3714 }
3715 }
3716
3717 mul_factor = gr->gpc_count * max_tpc_count;
3718 if (mul_factor & 0x1) {
3719 mul_factor = 2;
3720 } else {
3721 mul_factor = 1;
3722 }
3723
3724 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
3725
3726 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3727 num_tpc = sorted_num_tpcs[gpc_index];
3728
3729 init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
3730
3731 if (num_tpc != 0) {
3732 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
3733 } else {
3734 init_err[gpc_index] = 0;
3735 }
3736
3737 run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
3738 }
3739
3740 while (gpc_mark < gr->tpc_count) {
3741 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3742 if ((run_err[gpc_index] * 2) >= comm_denom) {
3743 gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
3744 run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
3745 } else {
3746 run_err[gpc_index] += init_frac[gpc_index];
3747 }
3748 }
3749 }
3750 }
3751
3752clean_up:
3753 nvgpu_kfree(g, init_frac);
3754 nvgpu_kfree(g, init_err);
3755 nvgpu_kfree(g, run_err);
3756 nvgpu_kfree(g, sorted_num_tpcs);
3757 nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
3758
3759 if (ret) {
3760 nvgpu_err(g, "fail");
3761 } else {
3762 nvgpu_log_fn(g, "done");
3763 }
3764
3765 return ret;
3766}
3767
3768static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
3769{
3770 struct gr_zcull_gk20a *zcull = &gr->zcull;
3771
3772 zcull->aliquot_width = gr->tpc_count * 16;
3773 zcull->aliquot_height = 16;
3774
3775 zcull->width_align_pixels = gr->tpc_count * 16;
3776 zcull->height_align_pixels = 32;
3777
3778 zcull->aliquot_size =
3779 zcull->aliquot_width * zcull->aliquot_height;
3780
3781 /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
3782 zcull->pixel_squares_by_aliquots =
3783 gr->zcb_count * 16 * 16 * gr->tpc_count /
3784 (gr->gpc_count * gr->gpc_tpc_count[0]);
3785
3786 zcull->total_aliquots =
3787 gr_gpc0_zcull_total_ram_size_num_aliquots_f(
3788 gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
3789
3790 return 0;
3791}
3792
3793u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
3794{
3795 /* assuming gr has already been initialized */
3796 return gr->ctx_vars.zcull_ctxsw_image_size;
3797}
3798
3799int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
3800 struct channel_gk20a *c, u64 zcull_va, u32 mode)
3801{
3802 struct tsg_gk20a *tsg;
3803 struct zcull_ctx_desc *zcull_ctx;
3804
3805 tsg = tsg_gk20a_from_ch(c);
3806 if (tsg == NULL) {
3807 return -EINVAL;
3808 }
3809
3810 zcull_ctx = &tsg->gr_ctx.zcull_ctx;
3811 zcull_ctx->ctx_sw_mode = mode;
3812 zcull_ctx->gpu_va = zcull_va;
3813
3814 /* TBD: don't disable channel in sw method processing */
3815 return gr_gk20a_ctx_zcull_setup(g, c);
3816}
3817
3818int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
3819 struct gr_zcull_info *zcull_params)
3820{
3821 struct gr_zcull_gk20a *zcull = &gr->zcull;
3822
3823 zcull_params->width_align_pixels = zcull->width_align_pixels;
3824 zcull_params->height_align_pixels = zcull->height_align_pixels;
3825 zcull_params->pixel_squares_by_aliquots =
3826 zcull->pixel_squares_by_aliquots;
3827 zcull_params->aliquot_total = zcull->total_aliquots;
3828
3829 zcull_params->region_byte_multiplier =
3830 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
3831 zcull_params->region_header_size =
3832 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
3833 gr_zcull_save_restore_header_bytes_per_gpc_v();
3834
3835 zcull_params->subregion_header_size =
3836 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
3837 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
3838
3839 zcull_params->subregion_width_align_pixels =
3840 gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
3841 zcull_params->subregion_height_align_pixels =
3842 gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
3843 zcull_params->subregion_count = gr_zcull_subregion_qty_v();
3844
3845 return 0;
3846}
3847
3848int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
3849 struct zbc_entry *color_val, u32 index)
3850{
3851 u32 i;
3852
3853 /* update l2 table */
3854 g->ops.ltc.set_zbc_color_entry(g, color_val, index);
3855
3856 /* update ds table */
3857 gk20a_writel(g, gr_ds_zbc_color_r_r(),
3858 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
3859 gk20a_writel(g, gr_ds_zbc_color_g_r(),
3860 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
3861 gk20a_writel(g, gr_ds_zbc_color_b_r(),
3862 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
3863 gk20a_writel(g, gr_ds_zbc_color_a_r(),
3864 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
3865
3866 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
3867 gr_ds_zbc_color_fmt_val_f(color_val->format));
3868
3869 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3870 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
3871
3872 /* trigger the write */
3873 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3874 gr_ds_zbc_tbl_ld_select_c_f() |
3875 gr_ds_zbc_tbl_ld_action_write_f() |
3876 gr_ds_zbc_tbl_ld_trigger_active_f());
3877
3878 /* update local copy */
3879 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3880 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
3881 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
3882 }
3883 gr->zbc_col_tbl[index].format = color_val->format;
3884 gr->zbc_col_tbl[index].ref_cnt++;
3885
3886 return 0;
3887}
3888
3889int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
3890 struct zbc_entry *depth_val, u32 index)
3891{
3892 /* update l2 table */
3893 g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
3894
3895 /* update ds table */
3896 gk20a_writel(g, gr_ds_zbc_z_r(),
3897 gr_ds_zbc_z_val_f(depth_val->depth));
3898
3899 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
3900 gr_ds_zbc_z_fmt_val_f(depth_val->format));
3901
3902 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3903 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
3904
3905 /* trigger the write */
3906 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3907 gr_ds_zbc_tbl_ld_select_z_f() |
3908 gr_ds_zbc_tbl_ld_action_write_f() |
3909 gr_ds_zbc_tbl_ld_trigger_active_f());
3910
3911 /* update local copy */
3912 gr->zbc_dep_tbl[index].depth = depth_val->depth;
3913 gr->zbc_dep_tbl[index].format = depth_val->format;
3914 gr->zbc_dep_tbl[index].ref_cnt++;
3915
3916 return 0;
3917}
3918
3919void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
3920{
3921 struct fifo_gk20a *f = &g->fifo;
3922 struct fifo_engine_info_gk20a *gr_info = NULL;
3923 u32 ret;
3924 u32 engine_id;
3925
3926 engine_id = gk20a_fifo_get_gr_engine_id(g);
3927 gr_info = (f->engine_info + engine_id);
3928
3929 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
3930 if (ret) {
3931 nvgpu_err(g,
3932 "failed to disable gr engine activity");
3933 return;
3934 }
3935
3936 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
3937 GR_IDLE_CHECK_DEFAULT);
3938 if (ret) {
3939 nvgpu_err(g,
3940 "failed to idle graphics");
3941 goto clean_up;
3942 }
3943
3944 /* update zbc */
3945 g->ops.gr.pmu_save_zbc(g, entries);
3946
3947clean_up:
3948 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
3949 if (ret) {
3950 nvgpu_err(g,
3951 "failed to enable gr engine activity");
3952 }
3953}
3954
3955int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
3956 struct zbc_entry *zbc_val)
3957{
3958 struct zbc_color_table *c_tbl;
3959 struct zbc_depth_table *d_tbl;
3960 u32 i;
3961 int ret = -ENOSPC;
3962 bool added = false;
3963 u32 entries;
3964
3965 /* no endian swap ? */
3966
3967 nvgpu_mutex_acquire(&gr->zbc_lock);
3968 nvgpu_speculation_barrier();
3969 switch (zbc_val->type) {
3970 case GK20A_ZBC_TYPE_COLOR:
3971 /* search existing tables */
3972 for (i = 0; i < gr->max_used_color_index; i++) {
3973
3974 c_tbl = &gr->zbc_col_tbl[i];
3975
3976 if ((c_tbl->ref_cnt != 0U) &&
3977 (c_tbl->format == zbc_val->format) &&
3978 (memcmp(c_tbl->color_ds, zbc_val->color_ds,
3979 sizeof(zbc_val->color_ds)) == 0) &&
3980 (memcmp(c_tbl->color_l2, zbc_val->color_l2,
3981 sizeof(zbc_val->color_l2)) == 0)) {
3982
3983 added = true;
3984 c_tbl->ref_cnt++;
3985 ret = 0;
3986 break;
3987 }
3988 }
3989 /* add new table */
3990 if (!added &&
3991 gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
3992
3993 c_tbl =
3994 &gr->zbc_col_tbl[gr->max_used_color_index];
3995 WARN_ON(c_tbl->ref_cnt != 0);
3996
3997 ret = g->ops.gr.add_zbc_color(g, gr,
3998 zbc_val, gr->max_used_color_index);
3999
4000 if (ret == 0) {
4001 gr->max_used_color_index++;
4002 }
4003 }
4004 break;
4005 case GK20A_ZBC_TYPE_DEPTH:
4006 /* search existing tables */
4007 for (i = 0; i < gr->max_used_depth_index; i++) {
4008
4009 d_tbl = &gr->zbc_dep_tbl[i];
4010
4011 if ((d_tbl->ref_cnt != 0U) &&
4012 (d_tbl->depth == zbc_val->depth) &&
4013 (d_tbl->format == zbc_val->format)) {
4014 added = true;
4015 d_tbl->ref_cnt++;
4016 ret = 0;
4017 break;
4018 }
4019 }
4020 /* add new table */
4021 if (!added &&
4022 gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
4023
4024 d_tbl =
4025 &gr->zbc_dep_tbl[gr->max_used_depth_index];
4026 WARN_ON(d_tbl->ref_cnt != 0);
4027
4028 ret = g->ops.gr.add_zbc_depth(g, gr,
4029 zbc_val, gr->max_used_depth_index);
4030
4031 if (ret == 0) {
4032 gr->max_used_depth_index++;
4033 }
4034 }
4035 break;
4036 case T19X_ZBC:
4037 if (g->ops.gr.add_zbc_type_s) {
4038 added = g->ops.gr.add_zbc_type_s(g, gr, zbc_val, &ret);
4039 } else {
4040 nvgpu_err(g,
4041 "invalid zbc table type %d", zbc_val->type);
4042 ret = -EINVAL;
4043 goto err_mutex;
4044 }
4045 break;
4046 default:
4047 nvgpu_err(g,
4048 "invalid zbc table type %d", zbc_val->type);
4049 ret = -EINVAL;
4050 goto err_mutex;
4051 }
4052
4053 if (!added && ret == 0) {
4054 /* update zbc for elpg only when new entry is added */
4055 entries = max(gr->max_used_color_index,
4056 gr->max_used_depth_index);
4057 g->ops.gr.pmu_save_zbc(g, entries);
4058 }
4059
4060err_mutex:
4061 nvgpu_mutex_release(&gr->zbc_lock);
4062 return ret;
4063}
4064
4065/* get a zbc table entry specified by index
4066 * return table size when type is invalid */
4067int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
4068 struct zbc_query_params *query_params)
4069{
4070 u32 index = query_params->index_size;
4071 u32 i;
4072
4073 nvgpu_speculation_barrier();
4074 switch (query_params->type) {
4075 case GK20A_ZBC_TYPE_INVALID:
4076 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
4077 break;
4078 case GK20A_ZBC_TYPE_COLOR:
4079 if (index >= GK20A_ZBC_TABLE_SIZE) {
4080 nvgpu_err(g,
4081 "invalid zbc color table index");
4082 return -EINVAL;
4083 }
4084
4085 nvgpu_speculation_barrier();
4086 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4087 query_params->color_l2[i] =
4088 gr->zbc_col_tbl[index].color_l2[i];
4089 query_params->color_ds[i] =
4090 gr->zbc_col_tbl[index].color_ds[i];
4091 }
4092 query_params->format = gr->zbc_col_tbl[index].format;
4093 query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
4094 break;
4095 case GK20A_ZBC_TYPE_DEPTH:
4096 if (index >= GK20A_ZBC_TABLE_SIZE) {
4097 nvgpu_err(g,
4098 "invalid zbc depth table index");
4099 return -EINVAL;
4100 }
4101
4102 nvgpu_speculation_barrier();
4103 query_params->depth = gr->zbc_dep_tbl[index].depth;
4104 query_params->format = gr->zbc_dep_tbl[index].format;
4105 query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
4106 break;
4107 case T19X_ZBC:
4108 if (g->ops.gr.zbc_s_query_table) {
4109 return g->ops.gr.zbc_s_query_table(g, gr,
4110 query_params);
4111 } else {
4112 nvgpu_err(g,
4113 "invalid zbc table type");
4114 return -EINVAL;
4115 }
4116 break;
4117 default:
4118 nvgpu_err(g,
4119 "invalid zbc table type");
4120 return -EINVAL;
4121 }
4122
4123 return 0;
4124}
4125
4126static int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
4127{
4128 unsigned int i;
4129 int ret;
4130
4131 for (i = 0; i < gr->max_used_color_index; i++) {
4132 struct zbc_color_table *c_tbl = &gr->zbc_col_tbl[i];
4133 struct zbc_entry zbc_val;
4134
4135 zbc_val.type = GK20A_ZBC_TYPE_COLOR;
4136 memcpy(zbc_val.color_ds,
4137 c_tbl->color_ds, sizeof(zbc_val.color_ds));
4138 memcpy(zbc_val.color_l2,
4139 c_tbl->color_l2, sizeof(zbc_val.color_l2));
4140 zbc_val.format = c_tbl->format;
4141
4142 ret = g->ops.gr.add_zbc_color(g, gr, &zbc_val, i);
4143
4144 if (ret) {
4145 return ret;
4146 }
4147 }
4148 for (i = 0; i < gr->max_used_depth_index; i++) {
4149 struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i];
4150 struct zbc_entry zbc_val;
4151
4152 zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
4153 zbc_val.depth = d_tbl->depth;
4154 zbc_val.format = d_tbl->format;
4155
4156 ret = g->ops.gr.add_zbc_depth(g, gr, &zbc_val, i);
4157 if (ret) {
4158 return ret;
4159 }
4160 }
4161
4162 if (g->ops.gr.load_zbc_s_tbl) {
4163 ret = g->ops.gr.load_zbc_s_tbl(g, gr);
4164 if (ret) {
4165 return ret;
4166 }
4167 }
4168
4169 return 0;
4170}
4171
4172int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
4173{
4174 struct zbc_entry zbc_val;
4175 u32 i = 0;
4176 int err = 0;
4177
4178 err = nvgpu_mutex_init(&gr->zbc_lock);
4179 if (err != 0) {
4180 nvgpu_err(g, "Error in zbc_lock mutex initialization");
4181 return err;
4182 }
4183
4184 /* load default color table */
4185 zbc_val.type = GK20A_ZBC_TYPE_COLOR;
4186
4187 /* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */
4188 zbc_val.format = gr_ds_zbc_color_fmt_val_a8_b8_g8_r8_v();
4189 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4190 zbc_val.color_ds[i] = 0;
4191 zbc_val.color_l2[i] = 0;
4192 }
4193 zbc_val.color_l2[0] = 0xff000000;
4194 zbc_val.color_ds[3] = 0x3f800000;
4195 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4196 if (err != 0) {
4197 goto color_fail;
4198 }
4199
4200 /* Transparent black = (fmt 1 = zero) */
4201 zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
4202 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4203 zbc_val.color_ds[i] = 0;
4204 zbc_val.color_l2[i] = 0;
4205 }
4206 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4207 if (err != 0) {
4208 goto color_fail;
4209 }
4210
4211 /* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */
4212 zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
4213 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
4214 zbc_val.color_ds[i] = 0x3f800000;
4215 zbc_val.color_l2[i] = 0xffffffff;
4216 }
4217 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4218 if (err != 0) {
4219 goto color_fail;
4220 }
4221
4222 gr->max_default_color_index = 3;
4223
4224 /* load default depth table */
4225 zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
4226
4227 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
4228 zbc_val.depth = 0x3f800000;
4229 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4230 if (err != 0) {
4231 goto depth_fail;
4232 }
4233
4234 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
4235 zbc_val.depth = 0;
4236 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
4237 if (err != 0) {
4238 goto depth_fail;
4239 }
4240
4241 gr->max_default_depth_index = 2;
4242
4243 if (g->ops.gr.load_zbc_s_default_tbl) {
4244 err = g->ops.gr.load_zbc_s_default_tbl(g, gr);
4245 if (err != 0) {
4246 return err;
4247 }
4248 }
4249
4250 return 0;
4251
4252color_fail:
4253 nvgpu_err(g, "fail to load default zbc color table");
4254 return err;
4255depth_fail:
4256 nvgpu_err(g, "fail to load default zbc depth table");
4257 return err;
4258}
4259
4260int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
4261 struct zbc_entry *zbc_val)
4262{
4263 struct fifo_gk20a *f = &g->fifo;
4264 struct fifo_engine_info_gk20a *gr_info = NULL;
4265 int ret;
4266 u32 engine_id;
4267
4268 engine_id = gk20a_fifo_get_gr_engine_id(g);
4269 gr_info = (f->engine_info + engine_id);
4270
4271 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
4272 if (ret) {
4273 nvgpu_err(g,
4274 "failed to disable gr engine activity");
4275 return ret;
4276 }
4277
4278 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
4279 GR_IDLE_CHECK_DEFAULT);
4280 if (ret) {
4281 nvgpu_err(g,
4282 "failed to idle graphics");
4283 goto clean_up;
4284 }
4285
4286 ret = gr_gk20a_add_zbc(g, gr, zbc_val);
4287
4288clean_up:
4289 if (gk20a_fifo_enable_engine_activity(g, gr_info)) {
4290 nvgpu_err(g,
4291 "failed to enable gr engine activity");
4292 }
4293
4294 return ret;
4295}
4296
4297int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
4298 struct zbc_entry *zbc_val)
4299{
4300 nvgpu_log_fn(g, " ");
4301
4302 return gr_gk20a_elpg_protected_call(g,
4303 gr_gk20a_add_zbc(g, gr, zbc_val));
4304}
4305
4306void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
4307 u32 *zcull_map_tiles)
4308{
4309 u32 val;
4310
4311 nvgpu_log_fn(g, " ");
4312
4313 if (zcull_num_entries >= 8) {
4314 nvgpu_log_fn(g, "map0");
4315 val =
4316 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(
4317 zcull_map_tiles[0]) |
4318 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(
4319 zcull_map_tiles[1]) |
4320 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(
4321 zcull_map_tiles[2]) |
4322 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(
4323 zcull_map_tiles[3]) |
4324 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(
4325 zcull_map_tiles[4]) |
4326 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(
4327 zcull_map_tiles[5]) |
4328 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(
4329 zcull_map_tiles[6]) |
4330 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(
4331 zcull_map_tiles[7]);
4332
4333 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val);
4334 }
4335
4336 if (zcull_num_entries >= 16) {
4337 nvgpu_log_fn(g, "map1");
4338 val =
4339 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(
4340 zcull_map_tiles[8]) |
4341 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(
4342 zcull_map_tiles[9]) |
4343 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(
4344 zcull_map_tiles[10]) |
4345 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(
4346 zcull_map_tiles[11]) |
4347 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(
4348 zcull_map_tiles[12]) |
4349 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(
4350 zcull_map_tiles[13]) |
4351 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(
4352 zcull_map_tiles[14]) |
4353 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(
4354 zcull_map_tiles[15]);
4355
4356 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val);
4357 }
4358
4359 if (zcull_num_entries >= 24) {
4360 nvgpu_log_fn(g, "map2");
4361 val =
4362 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(
4363 zcull_map_tiles[16]) |
4364 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(
4365 zcull_map_tiles[17]) |
4366 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(
4367 zcull_map_tiles[18]) |
4368 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(
4369 zcull_map_tiles[19]) |
4370 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(
4371 zcull_map_tiles[20]) |
4372 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(
4373 zcull_map_tiles[21]) |
4374 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(
4375 zcull_map_tiles[22]) |
4376 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(
4377 zcull_map_tiles[23]);
4378
4379 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val);
4380 }
4381
4382 if (zcull_num_entries >= 32) {
4383 nvgpu_log_fn(g, "map3");
4384 val =
4385 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(
4386 zcull_map_tiles[24]) |
4387 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(
4388 zcull_map_tiles[25]) |
4389 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(
4390 zcull_map_tiles[26]) |
4391 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(
4392 zcull_map_tiles[27]) |
4393 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(
4394 zcull_map_tiles[28]) |
4395 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(
4396 zcull_map_tiles[29]) |
4397 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(
4398 zcull_map_tiles[30]) |
4399 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(
4400 zcull_map_tiles[31]);
4401
4402 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val);
4403 }
4404
4405}
4406
4407static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4408{
4409 u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
4410 u32 *zcull_map_tiles, *zcull_bank_counters;
4411 u32 map_counter;
4412 u32 rcp_conserv;
4413 u32 offset;
4414 bool floorsweep = false;
4415 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
4416 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
4417 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
4418 GPU_LIT_NUM_TPC_PER_GPC);
4419 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
4420 u32 map_tile_count;
4421
4422 if (gr->map_tiles == NULL) {
4423 return -1;
4424 }
4425
4426 if (zcull_alloc_num % 8 != 0) {
4427 /* Total 8 fields per map reg i.e. tile_0 to tile_7*/
4428 zcull_alloc_num += (zcull_alloc_num % 8);
4429 }
4430 zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
4431
4432 if (zcull_map_tiles == NULL) {
4433 nvgpu_err(g,
4434 "failed to allocate zcull map titles");
4435 return -ENOMEM;
4436 }
4437
4438 zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
4439
4440 if (zcull_bank_counters == NULL) {
4441 nvgpu_err(g,
4442 "failed to allocate zcull bank counters");
4443 nvgpu_kfree(g, zcull_map_tiles);
4444 return -ENOMEM;
4445 }
4446
4447 for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
4448 map_tile_count = gr_gk20a_get_map_tile_count(gr, map_counter);
4449 zcull_map_tiles[map_counter] =
4450 zcull_bank_counters[map_tile_count];
4451 zcull_bank_counters[map_tile_count]++;
4452 }
4453
4454 if (g->ops.gr.program_zcull_mapping != NULL) {
4455 g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
4456 zcull_map_tiles);
4457 }
4458
4459 nvgpu_kfree(g, zcull_map_tiles);
4460 nvgpu_kfree(g, zcull_bank_counters);
4461
4462 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4463 gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
4464 gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
4465
4466 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
4467 gpc_zcull_count < gpc_tpc_count) {
4468 nvgpu_err(g,
4469 "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
4470 gpc_zcull_count, gpc_tpc_count, gpc_index);
4471 return -EINVAL;
4472 }
4473 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
4474 gpc_zcull_count != 0) {
4475 floorsweep = true;
4476 }
4477 }
4478
4479 /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */
4480 rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(),
4481 gr->gpc_tpc_count[0]);
4482
4483 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4484 offset = gpc_index * gpc_stride;
4485
4486 if (floorsweep) {
4487 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
4488 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
4489 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
4490 gr->max_zcull_per_gpc_count));
4491 } else {
4492 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
4493 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
4494 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
4495 gr->gpc_tpc_count[gpc_index]));
4496 }
4497
4498 gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
4499 gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
4500 gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
4501
4502 gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
4503 gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
4504 }
4505
4506 gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
4507 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
4508
4509 return 0;
4510}
4511
4512void gk20a_gr_enable_exceptions(struct gk20a *g)
4513{
4514 gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
4515 gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
4516 gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
4517 gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
4518 gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
4519 gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
4520}
4521
4522void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
4523{
4524 struct gr_gk20a *gr = &g->gr;
4525 u32 tpc_mask;
4526
4527 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
4528 gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f() |
4529 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
4530
4531 tpc_mask =
4532 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1);
4533
4534 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask);
4535}
4536
4537
4538void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
4539{
4540 /* enable exceptions */
4541 gk20a_writel(g, gr_fe_hww_esr_r(),
4542 gr_fe_hww_esr_en_enable_f() |
4543 gr_fe_hww_esr_reset_active_f());
4544 gk20a_writel(g, gr_memfmt_hww_esr_r(),
4545 gr_memfmt_hww_esr_en_enable_f() |
4546 gr_memfmt_hww_esr_reset_active_f());
4547}
4548
4549void gr_gk20a_fecs_host_int_enable(struct gk20a *g)
4550{
4551 gk20a_writel(g, gr_fecs_host_int_enable_r(),
4552 gr_fecs_host_int_enable_ctxsw_intr1_enable_f() |
4553 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
4554 gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
4555 gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
4556 gr_fecs_host_int_enable_watchdog_enable_f());
4557}
4558
4559static int gk20a_init_gr_setup_hw(struct gk20a *g)
4560{
4561 struct gr_gk20a *gr = &g->gr;
4562 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
4563 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
4564 u32 data;
4565 u32 last_method_data = 0;
4566 u32 i, err;
4567
4568 nvgpu_log_fn(g, " ");
4569
4570 if (g->ops.gr.init_gpc_mmu) {
4571 g->ops.gr.init_gpc_mmu(g);
4572 }
4573
4574 /* load gr floorsweeping registers */
4575 data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
4576 data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
4577 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
4578 gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
4579
4580 gr_gk20a_zcull_init_hw(g, gr);
4581
4582 if (g->ops.priv_ring.set_ppriv_timeout_settings != NULL) {
4583 g->ops.priv_ring.set_ppriv_timeout_settings(g);
4584 }
4585
4586 /* enable fifo access */
4587 gk20a_writel(g, gr_gpfifo_ctl_r(),
4588 gr_gpfifo_ctl_access_enabled_f() |
4589 gr_gpfifo_ctl_semaphore_access_enabled_f());
4590
4591 /* TBD: reload gr ucode when needed */
4592
4593 /* enable interrupts */
4594 gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
4595 gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
4596
4597 /* enable fecs error interrupts */
4598 g->ops.gr.fecs_host_int_enable(g);
4599
4600 g->ops.gr.enable_hww_exceptions(g);
4601 g->ops.gr.set_hww_esr_report_mask(g);
4602
4603 /* enable TPC exceptions per GPC */
4604 if (g->ops.gr.enable_gpc_exceptions) {
4605 g->ops.gr.enable_gpc_exceptions(g);
4606 }
4607
4608 /* enable ECC for L1/SM */
4609 if (g->ops.gr.ecc_init_scrub_reg) {
4610 g->ops.gr.ecc_init_scrub_reg(g);
4611 }
4612
4613 /* TBD: enable per BE exceptions */
4614
4615 /* reset and enable exceptions */
4616 g->ops.gr.enable_exceptions(g);
4617
4618 gr_gk20a_load_zbc_table(g, gr);
4619
4620 if (g->ops.ltc.init_cbc) {
4621 g->ops.ltc.init_cbc(g, gr);
4622 }
4623
4624 if (g->ops.fb.init_cbc) {
4625 g->ops.fb.init_cbc(g, gr);
4626 }
4627
4628 if (g->ops.gr.disable_rd_coalesce) {
4629 g->ops.gr.disable_rd_coalesce(g);
4630 }
4631
4632 /* load ctx init */
4633 for (i = 0; i < sw_ctx_load->count; i++) {
4634 gk20a_writel(g, sw_ctx_load->l[i].addr,
4635 sw_ctx_load->l[i].value);
4636 }
4637
4638 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4639 GR_IDLE_CHECK_DEFAULT);
4640 if (err != 0U) {
4641 goto out;
4642 }
4643
4644 if (g->ops.gr.init_preemption_state) {
4645 err = g->ops.gr.init_preemption_state(g);
4646 if (err != 0U) {
4647 goto out;
4648 }
4649 }
4650
4651 /* disable fe_go_idle */
4652 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
4653 gr_fe_go_idle_timeout_count_disabled_f());
4654
4655 /* override a few ctx state registers */
4656 g->ops.gr.commit_global_timeslice(g, NULL);
4657
4658 /* floorsweep anything left */
4659 err = g->ops.gr.init_fs_state(g);
4660 if (err != 0U) {
4661 goto out;
4662 }
4663
4664 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4665 GR_IDLE_CHECK_DEFAULT);
4666 if (err != 0U) {
4667 goto restore_fe_go_idle;
4668 }
4669
4670restore_fe_go_idle:
4671 /* restore fe_go_idle */
4672 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
4673 gr_fe_go_idle_timeout_count_prod_f());
4674
4675 if ((err != 0U) || (gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4676 GR_IDLE_CHECK_DEFAULT) != 0)) {
4677 goto out;
4678 }
4679
4680 /* load method init */
4681 if (sw_method_init->count) {
4682 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
4683 sw_method_init->l[0].value);
4684 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
4685 gr_pri_mme_shadow_raw_index_write_trigger_f() |
4686 sw_method_init->l[0].addr);
4687 last_method_data = sw_method_init->l[0].value;
4688 }
4689 for (i = 1; i < sw_method_init->count; i++) {
4690 if (sw_method_init->l[i].value != last_method_data) {
4691 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
4692 sw_method_init->l[i].value);
4693 last_method_data = sw_method_init->l[i].value;
4694 }
4695 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
4696 gr_pri_mme_shadow_raw_index_write_trigger_f() |
4697 sw_method_init->l[i].addr);
4698 }
4699
4700 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4701 GR_IDLE_CHECK_DEFAULT);
4702out:
4703 nvgpu_log_fn(g, "done");
4704 return err;
4705}
4706
4707static int gk20a_init_gr_prepare(struct gk20a *g)
4708{
4709 u32 err = 0;
4710
4711 /* reset gr engine */
4712 g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_GRAPH) |
4713 g->ops.mc.reset_mask(g, NVGPU_UNIT_BLG) |
4714 g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
4715
4716 nvgpu_cg_init_gr_load_gating_prod(g);
4717
4718 /* Disable elcg until it gets enabled later in the init*/
4719 nvgpu_cg_elcg_disable_no_wait(g);
4720
4721 /* enable fifo access */
4722 gk20a_writel(g, gr_gpfifo_ctl_r(),
4723 gr_gpfifo_ctl_access_enabled_f() |
4724 gr_gpfifo_ctl_semaphore_access_enabled_f());
4725
4726 if (!g->gr.ctx_vars.valid) {
4727 err = gr_gk20a_init_ctx_vars(g, &g->gr);
4728 if (err != 0U) {
4729 nvgpu_err(g,
4730 "fail to load gr init ctx");
4731 }
4732 }
4733 return err;
4734}
4735
4736static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g)
4737{
4738 struct nvgpu_timeout timeout;
4739 bool fecs_scrubbing;
4740 bool gpccs_scrubbing;
4741
4742 nvgpu_log_fn(g, " ");
4743
4744 nvgpu_timeout_init(g, &timeout,
4745 CTXSW_MEM_SCRUBBING_TIMEOUT_MAX /
4746 CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT,
4747 NVGPU_TIMER_RETRY_TIMER);
4748 do {
4749 fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) &
4750 (gr_fecs_dmactl_imem_scrubbing_m() |
4751 gr_fecs_dmactl_dmem_scrubbing_m());
4752
4753 gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) &
4754 (gr_gpccs_dmactl_imem_scrubbing_m() |
4755 gr_gpccs_dmactl_imem_scrubbing_m());
4756
4757 if (!fecs_scrubbing && !gpccs_scrubbing) {
4758 nvgpu_log_fn(g, "done");
4759 return 0;
4760 }
4761
4762 nvgpu_udelay(CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT);
4763 } while (nvgpu_timeout_expired(&timeout) == 0);
4764
4765 nvgpu_err(g, "Falcon mem scrubbing timeout");
4766 return -ETIMEDOUT;
4767}
4768
4769static int gr_gk20a_init_ctxsw(struct gk20a *g)
4770{
4771 u32 err = 0;
4772
4773 err = g->ops.gr.load_ctxsw_ucode(g);
4774 if (err != 0U) {
4775 goto out;
4776 }
4777
4778 err = gr_gk20a_wait_ctxsw_ready(g);
4779 if (err != 0U) {
4780 goto out;
4781 }
4782
4783out:
4784 if (err != 0U) {
4785 nvgpu_err(g, "fail");
4786 } else {
4787 nvgpu_log_fn(g, "done");
4788 }
4789
4790 return err;
4791}
4792
4793static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
4794{
4795 struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
4796 u32 i, err = 0;
4797
4798 nvgpu_log_fn(g, " ");
4799
4800 /* enable interrupts */
4801 gk20a_writel(g, gr_intr_r(), ~0);
4802 gk20a_writel(g, gr_intr_en_r(), ~0);
4803
4804 /* load non_ctx init */
4805 for (i = 0; i < sw_non_ctx_load->count; i++) {
4806 gk20a_writel(g, sw_non_ctx_load->l[i].addr,
4807 sw_non_ctx_load->l[i].value);
4808 }
4809
4810 err = gr_gk20a_wait_mem_scrubbing(g);
4811 if (err != 0U) {
4812 goto out;
4813 }
4814
4815 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4816 GR_IDLE_CHECK_DEFAULT);
4817 if (err != 0U) {
4818 goto out;
4819 }
4820
4821out:
4822 if (err != 0U) {
4823 nvgpu_err(g, "fail");
4824 } else {
4825 nvgpu_log_fn(g, "done");
4826 }
4827
4828 return 0;
4829}
4830
4831static int gr_gk20a_init_access_map(struct gk20a *g)
4832{
4833 struct gr_gk20a *gr = &g->gr;
4834 struct nvgpu_mem *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
4835 u32 nr_pages =
4836 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4837 PAGE_SIZE);
4838 u32 *whitelist = NULL;
4839 int w, num_entries = 0;
4840
4841 nvgpu_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4842
4843 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
4844
4845 for (w = 0; w < num_entries; w++) {
4846 u32 map_bit, map_byte, map_shift, x;
4847 map_bit = whitelist[w] >> 2;
4848 map_byte = map_bit >> 3;
4849 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4850 nvgpu_log_info(g, "access map addr:0x%x byte:0x%x bit:%d",
4851 whitelist[w], map_byte, map_shift);
4852 x = nvgpu_mem_rd32(g, mem, map_byte / sizeof(u32));
4853 x |= 1 << (
4854 (map_byte % sizeof(u32) * BITS_PER_BYTE)
4855 + map_shift);
4856 nvgpu_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4857 }
4858
4859 return 0;
4860}
4861
4862static int gk20a_init_gr_setup_sw(struct gk20a *g)
4863{
4864 struct gr_gk20a *gr = &g->gr;
4865 int err = 0;
4866
4867 nvgpu_log_fn(g, " ");
4868
4869 if (gr->sw_ready) {
4870 nvgpu_log_fn(g, "skip init");
4871 return 0;
4872 }
4873
4874 gr->g = g;
4875
4876#if defined(CONFIG_GK20A_CYCLE_STATS)
4877 err = nvgpu_mutex_init(&g->gr.cs_lock);
4878 if (err != 0) {
4879 nvgpu_err(g, "Error in gr.cs_lock mutex initialization");
4880 return err;
4881 }
4882#endif
4883
4884 err = gr_gk20a_init_gr_config(g, gr);
4885 if (err != 0) {
4886 goto clean_up;
4887 }
4888
4889 err = gr_gk20a_init_map_tiles(g, gr);
4890 if (err != 0) {
4891 goto clean_up;
4892 }
4893
4894 if (g->ops.ltc.init_comptags) {
4895 err = g->ops.ltc.init_comptags(g, gr);
4896 if (err != 0) {
4897 goto clean_up;
4898 }
4899 }
4900
4901 err = gr_gk20a_init_zcull(g, gr);
4902 if (err != 0) {
4903 goto clean_up;
4904 }
4905
4906 err = g->ops.gr.alloc_global_ctx_buffers(g);
4907 if (err != 0) {
4908 goto clean_up;
4909 }
4910
4911 err = gr_gk20a_init_access_map(g);
4912 if (err != 0) {
4913 goto clean_up;
4914 }
4915
4916 gr_gk20a_load_zbc_default_table(g, gr);
4917
4918 if (g->ops.gr.init_czf_bypass) {
4919 g->ops.gr.init_czf_bypass(g);
4920 }
4921
4922 if (g->ops.gr.init_gfxp_wfi_timeout_count) {
4923 g->ops.gr.init_gfxp_wfi_timeout_count(g);
4924 }
4925
4926 err = nvgpu_mutex_init(&gr->ctx_mutex);
4927 if (err != 0) {
4928 nvgpu_err(g, "Error in gr.ctx_mutex initialization");
4929 goto clean_up;
4930 }
4931
4932 nvgpu_spinlock_init(&gr->ch_tlb_lock);
4933
4934 gr->remove_support = gk20a_remove_gr_support;
4935 gr->sw_ready = true;
4936
4937 err = nvgpu_ecc_init_support(g);
4938 if (err != 0) {
4939 goto clean_up;
4940 }
4941
4942 nvgpu_log_fn(g, "done");
4943 return 0;
4944
4945clean_up:
4946 nvgpu_err(g, "fail");
4947 gk20a_remove_gr_support(gr);
4948 return err;
4949}
4950
4951static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4952{
4953 struct nvgpu_pmu *pmu = &g->pmu;
4954 struct mm_gk20a *mm = &g->mm;
4955 struct vm_gk20a *vm = mm->pmu.vm;
4956 int err = 0;
4957
4958 u32 size;
4959
4960 nvgpu_log_fn(g, " ");
4961
4962 size = 0;
4963
4964 err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
4965 if (err != 0) {
4966 nvgpu_err(g,
4967 "fail to query fecs pg buffer size");
4968 return err;
4969 }
4970
4971 if (pmu->pg_buf.cpu_va == NULL) {
4972 err = nvgpu_dma_alloc_map_sys(vm, size, &pmu->pg_buf);
4973 if (err != 0) {
4974 nvgpu_err(g, "failed to allocate memory");
4975 return -ENOMEM;
4976 }
4977 }
4978
4979
4980 err = gr_gk20a_fecs_set_reglist_bind_inst(g, &mm->pmu.inst_block);
4981 if (err != 0) {
4982 nvgpu_err(g,
4983 "fail to bind pmu inst to gr");
4984 return err;
4985 }
4986
4987 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.gpu_va);
4988 if (err != 0) {
4989 nvgpu_err(g,
4990 "fail to set pg buffer pmu va");
4991 return err;
4992 }
4993
4994 return err;
4995}
4996
4997int gk20a_init_gr_support(struct gk20a *g)
4998{
4999 int err = 0;
5000
5001 nvgpu_log_fn(g, " ");
5002
5003 g->gr.initialized = false;
5004
5005 /* this is required before gr_gk20a_init_ctx_state */
5006 err = nvgpu_mutex_init(&g->gr.fecs_mutex);
5007 if (err != 0) {
5008 nvgpu_err(g, "Error in gr.fecs_mutex initialization");
5009 return err;
5010 }
5011
5012 err = gr_gk20a_init_ctxsw(g);
5013 if (err != 0) {
5014 return err;
5015 }
5016
5017 /* this appears query for sw states but fecs actually init
5018 ramchain, etc so this is hw init */
5019 err = g->ops.gr.init_ctx_state(g);
5020 if (err != 0) {
5021 return err;
5022 }
5023
5024 err = gk20a_init_gr_setup_sw(g);
5025 if (err != 0) {
5026 return err;
5027 }
5028
5029 err = gk20a_init_gr_setup_hw(g);
5030 if (err != 0) {
5031 return err;
5032 }
5033
5034 if (g->can_elpg) {
5035 err = gk20a_init_gr_bind_fecs_elpg(g);
5036 if (err != 0) {
5037 return err;
5038 }
5039 }
5040
5041 /* GR is inialized, signal possible waiters */
5042 g->gr.initialized = true;
5043 nvgpu_cond_signal(&g->gr.init_wq);
5044
5045 return 0;
5046}
5047
5048/* Wait until GR is initialized */
5049void gk20a_gr_wait_initialized(struct gk20a *g)
5050{
5051 NVGPU_COND_WAIT(&g->gr.init_wq, g->gr.initialized, 0);
5052}
5053
5054#define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
5055#define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280
5056#define NVA297_SET_SHADER_EXCEPTIONS 0x1528
5057#define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528
5058
5059#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
5060
5061void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data)
5062{
5063 nvgpu_log_fn(g, " ");
5064
5065 if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) {
5066 gk20a_writel(g,
5067 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0);
5068 gk20a_writel(g,
5069 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0);
5070 } else {
5071 /* setup sm warp esr report masks */
5072 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
5073 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
5074 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
5075 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
5076 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
5077 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
5078 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
5079 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
5080 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
5081 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
5082 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
5083 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
5084 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
5085 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
5086 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
5087 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
5088 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
5089 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
5090 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
5091 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
5092 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
5093
5094 /* setup sm global esr report mask */
5095 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
5096 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
5097 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
5098 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
5099 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
5100 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
5101 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
5102 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
5103 }
5104}
5105
5106int gk20a_enable_gr_hw(struct gk20a *g)
5107{
5108 int err;
5109
5110 nvgpu_log_fn(g, " ");
5111
5112 err = gk20a_init_gr_prepare(g);
5113 if (err != 0) {
5114 return err;
5115 }
5116
5117 err = gk20a_init_gr_reset_enable_hw(g);
5118 if (err != 0) {
5119 return err;
5120 }
5121
5122 nvgpu_log_fn(g, "done");
5123
5124 return 0;
5125}
5126
5127int gk20a_gr_reset(struct gk20a *g)
5128{
5129 int err;
5130 u32 size;
5131
5132 g->gr.initialized = false;
5133
5134 nvgpu_mutex_acquire(&g->gr.fecs_mutex);
5135
5136 err = gk20a_enable_gr_hw(g);
5137 if (err != 0) {
5138 nvgpu_mutex_release(&g->gr.fecs_mutex);
5139 return err;
5140 }
5141
5142 err = gk20a_init_gr_setup_hw(g);
5143 if (err != 0) {
5144 nvgpu_mutex_release(&g->gr.fecs_mutex);
5145 return err;
5146 }
5147
5148 err = gr_gk20a_init_ctxsw(g);
5149 if (err != 0) {
5150 nvgpu_mutex_release(&g->gr.fecs_mutex);
5151 return err;
5152 }
5153
5154 nvgpu_mutex_release(&g->gr.fecs_mutex);
5155
5156 /* this appears query for sw states but fecs actually init
5157 ramchain, etc so this is hw init */
5158 err = g->ops.gr.init_ctx_state(g);
5159 if (err != 0) {
5160 return err;
5161 }
5162
5163 size = 0;
5164 err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
5165 if (err != 0) {
5166 nvgpu_err(g,
5167 "fail to query fecs pg buffer size");
5168 return err;
5169 }
5170
5171 err = gr_gk20a_fecs_set_reglist_bind_inst(g, &g->mm.pmu.inst_block);
5172 if (err != 0) {
5173 nvgpu_err(g,
5174 "fail to bind pmu inst to gr");
5175 return err;
5176 }
5177
5178 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.gpu_va);
5179 if (err != 0) {
5180 nvgpu_err(g,
5181 "fail to set pg buffer pmu va");
5182 return err;
5183 }
5184
5185 nvgpu_cg_init_gr_load_gating_prod(g);
5186 nvgpu_cg_elcg_enable_no_wait(g);
5187
5188 /* GR is inialized, signal possible waiters */
5189 g->gr.initialized = true;
5190 nvgpu_cond_signal(&g->gr.init_wq);
5191
5192 return err;
5193}
5194
5195static void gk20a_gr_set_error_notifier(struct gk20a *g,
5196 struct gr_gk20a_isr_data *isr_data, u32 error_notifier)
5197{
5198 struct channel_gk20a *ch;
5199 struct tsg_gk20a *tsg;
5200 struct channel_gk20a *ch_tsg;
5201
5202 ch = isr_data->ch;
5203
5204 if (ch == NULL) {
5205 return;
5206 }
5207
5208 tsg = tsg_gk20a_from_ch(ch);
5209 if (tsg != NULL) {
5210 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
5211 nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
5212 channel_gk20a, ch_entry) {
5213 if (gk20a_channel_get(ch_tsg)) {
5214 g->ops.fifo.set_error_notifier(ch_tsg,
5215 error_notifier);
5216 gk20a_channel_put(ch_tsg);
5217 }
5218
5219 }
5220 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
5221 } else {
5222 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
5223 }
5224}
5225
5226static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
5227 struct gr_gk20a_isr_data *isr_data)
5228{
5229 nvgpu_log_fn(g, " ");
5230 gk20a_gr_set_error_notifier(g, isr_data,
5231 NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
5232 nvgpu_err(g,
5233 "gr semaphore timeout");
5234 return -EINVAL;
5235}
5236
5237static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
5238 struct gr_gk20a_isr_data *isr_data)
5239{
5240 nvgpu_log_fn(g, " ");
5241 gk20a_gr_set_error_notifier(g, isr_data,
5242 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
5243 /* This is an unrecoverable error, reset is needed */
5244 nvgpu_err(g,
5245 "gr semaphore timeout");
5246 return -EINVAL;
5247}
5248
5249static int gk20a_gr_handle_illegal_method(struct gk20a *g,
5250 struct gr_gk20a_isr_data *isr_data)
5251{
5252 int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
5253 isr_data->class_num, isr_data->offset,
5254 isr_data->data_lo);
5255 if (ret) {
5256 gk20a_gr_set_error_notifier(g, isr_data,
5257 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
5258 nvgpu_err(g, "invalid method class 0x%08x"
5259 ", offset 0x%08x address 0x%08x",
5260 isr_data->class_num, isr_data->offset, isr_data->addr);
5261 }
5262 return ret;
5263}
5264
5265static int gk20a_gr_handle_illegal_class(struct gk20a *g,
5266 struct gr_gk20a_isr_data *isr_data)
5267{
5268 nvgpu_log_fn(g, " ");
5269 gk20a_gr_set_error_notifier(g, isr_data,
5270 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5271 nvgpu_err(g,
5272 "invalid class 0x%08x, offset 0x%08x",
5273 isr_data->class_num, isr_data->offset);
5274 return -EINVAL;
5275}
5276
5277int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
5278 struct gr_gk20a_isr_data *isr_data)
5279{
5280 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
5281 int ret = 0;
5282 u32 chid = isr_data->ch != NULL ?
5283 isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
5284
5285 if (gr_fecs_intr == 0U) {
5286 return 0;
5287 }
5288
5289 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
5290 gk20a_gr_set_error_notifier(g, isr_data,
5291 NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
5292 nvgpu_err(g,
5293 "firmware method error 0x%08x for offset 0x%04x",
5294 gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
5295 isr_data->data_lo);
5296 ret = -1;
5297 } else if ((gr_fecs_intr &
5298 gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
5299 /* currently, recovery is not initiated */
5300 nvgpu_err(g, "fecs watchdog triggered for channel %u, "
5301 "cannot ctxsw anymore !!", chid);
5302 gk20a_fecs_dump_falcon_stats(g);
5303 } else if ((gr_fecs_intr &
5304 gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
5305 u32 mailbox_value = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6));
5306
5307 if (mailbox_value == MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL) {
5308 nvgpu_info(g, "ctxsw intr0 set by ucode, "
5309 "timestamp buffer full");
5310#ifdef CONFIG_GK20A_CTXSW_TRACE
5311 gk20a_fecs_trace_reset_buffer(g);
5312#else
5313 ret = -1;
5314#endif
5315 } else {
5316 nvgpu_err(g,
5317 "ctxsw intr0 set by ucode, error_code: 0x%08x",
5318 mailbox_value);
5319 ret = -1;
5320 }
5321 } else {
5322 nvgpu_err(g,
5323 "unhandled fecs error interrupt 0x%08x for channel %u",
5324 gr_fecs_intr, ch->chid);
5325 gk20a_fecs_dump_falcon_stats(g);
5326 }
5327
5328 gk20a_writel(g, gr_fecs_host_int_clear_r(), gr_fecs_intr);
5329 return ret;
5330}
5331
5332static int gk20a_gr_handle_class_error(struct gk20a *g,
5333 struct gr_gk20a_isr_data *isr_data)
5334{
5335 u32 gr_class_error;
5336 u32 chid = isr_data->ch != NULL ?
5337 isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
5338
5339 nvgpu_log_fn(g, " ");
5340
5341 gr_class_error =
5342 gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
5343 gk20a_gr_set_error_notifier(g, isr_data,
5344 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5345 nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
5346 "sub channel 0x%08x mme generated %d,"
5347 " mme pc 0x%08xdata high %d priv status %d"
5348 " unhandled intr 0x%08x for channel %u",
5349 isr_data->class_num, (isr_data->offset << 2),
5350 gr_trapped_addr_subch_v(isr_data->addr),
5351 gr_trapped_addr_mme_generated_v(isr_data->addr),
5352 gr_trapped_data_mme_pc_v(
5353 gk20a_readl(g, gr_trapped_data_mme_r())),
5354 gr_trapped_addr_datahigh_v(isr_data->addr),
5355 gr_trapped_addr_priv_v(isr_data->addr),
5356 gr_class_error, chid);
5357
5358 nvgpu_err(g, "trapped data low 0x%08x",
5359 gk20a_readl(g, gr_trapped_data_lo_r()));
5360 if (gr_trapped_addr_datahigh_v(isr_data->addr)) {
5361 nvgpu_err(g, "trapped data high 0x%08x",
5362 gk20a_readl(g, gr_trapped_data_hi_r()));
5363 }
5364
5365 return -EINVAL;
5366}
5367
5368static int gk20a_gr_handle_firmware_method(struct gk20a *g,
5369 struct gr_gk20a_isr_data *isr_data)
5370{
5371 u32 chid = isr_data->ch != NULL ?
5372 isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
5373
5374 nvgpu_log_fn(g, " ");
5375
5376 gk20a_gr_set_error_notifier(g, isr_data,
5377 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
5378 nvgpu_err(g,
5379 "firmware method 0x%08x, offset 0x%08x for channel %u",
5380 isr_data->class_num, isr_data->offset,
5381 chid);
5382 return -EINVAL;
5383}
5384
5385int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
5386 struct gr_gk20a_isr_data *isr_data)
5387{
5388 struct channel_gk20a *ch = isr_data->ch;
5389 struct tsg_gk20a *tsg;
5390
5391 if (ch == NULL) {
5392 return 0;
5393 }
5394
5395 tsg = tsg_gk20a_from_ch(ch);
5396 if (tsg != NULL) {
5397 g->ops.fifo.post_event_id(tsg,
5398 NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN);
5399
5400 nvgpu_cond_broadcast(&ch->semaphore_wq);
5401 } else {
5402 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
5403 }
5404
5405 return 0;
5406}
5407
5408#if defined(CONFIG_GK20A_CYCLE_STATS)
5409static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
5410 u32 offset)
5411{
5412 /* support only 24-bit 4-byte aligned offsets */
5413 bool valid = !(offset & 0xFF000003);
5414
5415 if (g->allow_all)
5416 return true;
5417
5418 /* whitelist check */
5419 valid = valid &&
5420 is_bar0_global_offset_whitelisted_gk20a(g, offset);
5421 /* resource size check in case there was a problem
5422 * with allocating the assumed size of bar0 */
5423 valid = valid && gk20a_io_valid_reg(g, offset);
5424 return valid;
5425}
5426#endif
5427
5428int gk20a_gr_handle_notify_pending(struct gk20a *g,
5429 struct gr_gk20a_isr_data *isr_data)
5430{
5431 struct channel_gk20a *ch = isr_data->ch;
5432
5433#if defined(CONFIG_GK20A_CYCLE_STATS)
5434 void *virtual_address;
5435 u32 buffer_size;
5436 u32 offset;
5437 bool exit;
5438#endif
5439 if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) {
5440 return 0;
5441 }
5442
5443#if defined(CONFIG_GK20A_CYCLE_STATS)
5444 /* GL will never use payload 0 for cycle state */
5445 if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
5446 return 0;
5447
5448 nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
5449
5450 virtual_address = ch->cyclestate.cyclestate_buffer;
5451 buffer_size = ch->cyclestate.cyclestate_buffer_size;
5452 offset = isr_data->data_lo;
5453 exit = false;
5454 while (!exit) {
5455 struct share_buffer_head *sh_hdr;
5456 u32 min_element_size;
5457
5458 /* validate offset */
5459 if (offset + sizeof(struct share_buffer_head) > buffer_size ||
5460 offset + sizeof(struct share_buffer_head) < offset) {
5461 nvgpu_err(g,
5462 "cyclestats buffer overrun at offset 0x%x",
5463 offset);
5464 break;
5465 }
5466
5467 sh_hdr = (struct share_buffer_head *)
5468 ((char *)virtual_address + offset);
5469
5470 min_element_size =
5471 (sh_hdr->operation == OP_END ?
5472 sizeof(struct share_buffer_head) :
5473 sizeof(struct gk20a_cyclestate_buffer_elem));
5474
5475 /* validate sh_hdr->size */
5476 if (sh_hdr->size < min_element_size ||
5477 offset + sh_hdr->size > buffer_size ||
5478 offset + sh_hdr->size < offset) {
5479 nvgpu_err(g,
5480 "bad cyclestate buffer header size at offset 0x%x",
5481 offset);
5482 sh_hdr->failed = true;
5483 break;
5484 }
5485
5486 switch (sh_hdr->operation) {
5487 case OP_END:
5488 exit = true;
5489 break;
5490
5491 case BAR0_READ32:
5492 case BAR0_WRITE32:
5493 {
5494 struct gk20a_cyclestate_buffer_elem *op_elem =
5495 (struct gk20a_cyclestate_buffer_elem *)sh_hdr;
5496 bool valid = is_valid_cyclestats_bar0_offset_gk20a(
5497 g, op_elem->offset_bar0);
5498 u32 raw_reg;
5499 u64 mask_orig;
5500 u64 v;
5501
5502 if (!valid) {
5503 nvgpu_err(g,
5504 "invalid cycletstats op offset: 0x%x",
5505 op_elem->offset_bar0);
5506
5507 sh_hdr->failed = exit = true;
5508 break;
5509 }
5510
5511
5512 mask_orig =
5513 ((1ULL <<
5514 (op_elem->last_bit + 1))
5515 -1)&~((1ULL <<
5516 op_elem->first_bit)-1);
5517
5518 raw_reg =
5519 gk20a_readl(g,
5520 op_elem->offset_bar0);
5521
5522 switch (sh_hdr->operation) {
5523 case BAR0_READ32:
5524 op_elem->data =
5525 (raw_reg & mask_orig)
5526 >> op_elem->first_bit;
5527 break;
5528
5529 case BAR0_WRITE32:
5530 v = 0;
5531 if ((unsigned int)mask_orig !=
5532 (unsigned int)~0) {
5533 v = (unsigned int)
5534 (raw_reg & ~mask_orig);
5535 }
5536
5537 v |= ((op_elem->data
5538 << op_elem->first_bit)
5539 & mask_orig);
5540
5541 gk20a_writel(g,
5542 op_elem->offset_bar0,
5543 (unsigned int)v);
5544 break;
5545 default:
5546 /* nop ok?*/
5547 break;
5548 }
5549 }
5550 break;
5551
5552 default:
5553 /* no operation content case */
5554 exit = true;
5555 break;
5556 }
5557 sh_hdr->completed = true;
5558 offset += sh_hdr->size;
5559 }
5560 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
5561#endif
5562 nvgpu_log_fn(g, " ");
5563 nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
5564 return 0;
5565}
5566
5567/* Used by sw interrupt thread to translate current ctx to chid.
5568 * Also used by regops to translate current ctx to chid and tsgid.
5569 * For performance, we don't want to go through 128 channels every time.
5570 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
5571 * A small tlb is used here to cache translation.
5572 *
5573 * Returned channel must be freed with gk20a_channel_put() */
5574static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5575 struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid)
5576{
5577 struct fifo_gk20a *f = &g->fifo;
5578 struct gr_gk20a *gr = &g->gr;
5579 u32 chid = -1;
5580 u32 tsgid = NVGPU_INVALID_TSG_ID;
5581 u32 i;
5582 struct channel_gk20a *ret = NULL;
5583
5584 /* when contexts are unloaded from GR, the valid bit is reset
5585 * but the instance pointer information remains intact.
5586 * This might be called from gr_isr where contexts might be
5587 * unloaded. No need to check ctx_valid bit
5588 */
5589
5590 nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
5591
5592 /* check cache first */
5593 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
5594 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5595 chid = gr->chid_tlb[i].chid;
5596 tsgid = gr->chid_tlb[i].tsgid;
5597 ret = gk20a_channel_from_id(g, chid);
5598 goto unlock;
5599 }
5600 }
5601
5602 /* slow path */
5603 for (chid = 0; chid < f->num_channels; chid++) {
5604 struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
5605
5606 if (ch == NULL) {
5607 continue;
5608 }
5609
5610 if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
5611 ram_in_base_shift_v()) ==
5612 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5613 tsgid = ch->tsgid;
5614 /* found it */
5615 ret = ch;
5616 break;
5617 }
5618 gk20a_channel_put(ch);
5619 }
5620
5621 if (ret == NULL) {
5622 goto unlock;
5623 }
5624
5625 /* add to free tlb entry */
5626 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
5627 if (gr->chid_tlb[i].curr_ctx == 0) {
5628 gr->chid_tlb[i].curr_ctx = curr_ctx;
5629 gr->chid_tlb[i].chid = chid;
5630 gr->chid_tlb[i].tsgid = tsgid;
5631 goto unlock;
5632 }
5633 }
5634
5635 /* no free entry, flush one */
5636 gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
5637 gr->chid_tlb[gr->channel_tlb_flush_index].chid = chid;
5638 gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid;
5639
5640 gr->channel_tlb_flush_index =
5641 (gr->channel_tlb_flush_index + 1) &
5642 (GR_CHANNEL_MAP_TLB_SIZE - 1);
5643
5644unlock:
5645 nvgpu_spinlock_release(&gr->ch_tlb_lock);
5646 if (curr_tsgid) {
5647 *curr_tsgid = tsgid;
5648 }
5649 return ret;
5650}
5651
5652int gk20a_gr_lock_down_sm(struct gk20a *g,
5653 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
5654 bool check_errors)
5655{
5656 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5657 u32 dbgr_control0;
5658
5659 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5660 "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
5661
5662 /* assert stop trigger */
5663 dbgr_control0 =
5664 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
5665 dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
5666 gk20a_writel(g,
5667 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
5668
5669 return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
5670 check_errors);
5671}
5672
5673bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
5674{
5675 u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
5676
5677 /* check if an sm debugger is attached.
5678 * assumption: all SMs will have debug mode enabled/disabled
5679 * uniformly. */
5680 if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
5681 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) {
5682 return true;
5683 }
5684
5685 return false;
5686}
5687
5688int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
5689 bool *post_event, struct channel_gk20a *fault_ch,
5690 u32 *hww_global_esr)
5691{
5692 int ret = 0;
5693 bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
5694 bool disable_sm_exceptions = true;
5695 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5696 bool sm_debugger_attached;
5697 u32 global_esr, warp_esr, global_mask;
5698
5699 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
5700
5701 sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
5702
5703 global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm);
5704 *hww_global_esr = global_esr;
5705 warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
5706 global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g);
5707
5708 if (!sm_debugger_attached) {
5709 nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
5710 global_esr, warp_esr);
5711 return -EFAULT;
5712 }
5713
5714 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5715 "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr);
5716
5717 gr_gk20a_elpg_protected_call(g,
5718 g->ops.gr.record_sm_error_state(g, gpc, tpc, sm, fault_ch));
5719
5720 if (g->ops.gr.pre_process_sm_exception) {
5721 ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm,
5722 global_esr, warp_esr,
5723 sm_debugger_attached,
5724 fault_ch,
5725 &early_exit,
5726 &ignore_debugger);
5727 if (ret) {
5728 nvgpu_err(g, "could not pre-process sm error!");
5729 return ret;
5730 }
5731 }
5732
5733 if (early_exit) {
5734 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5735 "returning early");
5736 return ret;
5737 }
5738
5739 /*
5740 * Disable forwarding of tpc exceptions,
5741 * the debugger will reenable exceptions after servicing them.
5742 *
5743 * Do not disable exceptions if the only SM exception is BPT_INT
5744 */
5745 if ((global_esr == gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
5746 && (warp_esr == 0)) {
5747 disable_sm_exceptions = false;
5748 }
5749
5750 if (!ignore_debugger && disable_sm_exceptions) {
5751 u32 tpc_exception_en = gk20a_readl(g,
5752 gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
5753 offset);
5754 tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
5755 gk20a_writel(g,
5756 gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + offset,
5757 tpc_exception_en);
5758 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM Exceptions disabled");
5759 }
5760
5761 /* if a debugger is present and an error has occurred, do a warp sync */
5762 if (!ignore_debugger &&
5763 ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
5764 nvgpu_log(g, gpu_dbg_intr, "warp sync needed");
5765 do_warp_sync = true;
5766 }
5767
5768 if (do_warp_sync) {
5769 ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
5770 global_mask, true);
5771 if (ret) {
5772 nvgpu_err(g, "sm did not lock down!");
5773 return ret;
5774 }
5775 }
5776
5777 if (ignore_debugger) {
5778 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5779 "ignore_debugger set, skipping event posting");
5780 } else {
5781 *post_event = true;
5782 }
5783
5784 return ret;
5785}
5786
5787int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
5788 bool *post_event)
5789{
5790 int ret = 0;
5791 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5792 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5793 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5794 u32 esr;
5795
5796 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
5797
5798 esr = gk20a_readl(g,
5799 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
5800 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
5801
5802 gk20a_writel(g,
5803 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
5804 esr);
5805
5806 return ret;
5807}
5808
5809void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
5810 u32 *esr_sm_sel)
5811{
5812 *esr_sm_sel = 1;
5813}
5814
5815static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
5816 bool *post_event, struct channel_gk20a *fault_ch,
5817 u32 *hww_global_esr)
5818{
5819 int ret = 0;
5820 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5821 u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
5822 + offset);
5823 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
5824
5825 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5826 "GPC%d TPC%d: pending exception 0x%x",
5827 gpc, tpc, tpc_exception);
5828
5829 /* check if an sm exeption is pending */
5830 if (gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(tpc_exception) ==
5831 gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v()) {
5832 u32 esr_sm_sel, sm;
5833
5834 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5835 "GPC%d TPC%d: SM exception pending", gpc, tpc);
5836
5837 if (g->ops.gr.handle_tpc_sm_ecc_exception) {
5838 g->ops.gr.handle_tpc_sm_ecc_exception(g, gpc, tpc,
5839 post_event, fault_ch, hww_global_esr);
5840 }
5841
5842 g->ops.gr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel);
5843
5844 for (sm = 0; sm < sm_per_tpc; sm++) {
5845
5846 if ((esr_sm_sel & BIT32(sm)) == 0U) {
5847 continue;
5848 }
5849
5850 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5851 "GPC%d TPC%d: SM%d exception pending",
5852 gpc, tpc, sm);
5853
5854 ret |= g->ops.gr.handle_sm_exception(g,
5855 gpc, tpc, sm, post_event, fault_ch,
5856 hww_global_esr);
5857 /* clear the hwws, also causes tpc and gpc
5858 * exceptions to be cleared. Should be cleared
5859 * only if SM is locked down or empty.
5860 */
5861 g->ops.gr.clear_sm_hww(g,
5862 gpc, tpc, sm, *hww_global_esr);
5863
5864 }
5865
5866 }
5867
5868 /* check if a tex exeption is pending */
5869 if (gr_gpc0_tpc0_tpccs_tpc_exception_tex_v(tpc_exception) ==
5870 gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v()) {
5871 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5872 "GPC%d TPC%d: TEX exception pending", gpc, tpc);
5873 ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event);
5874 }
5875
5876 if (g->ops.gr.handle_tpc_mpc_exception) {
5877 ret |= g->ops.gr.handle_tpc_mpc_exception(g,
5878 gpc, tpc, post_event);
5879 }
5880
5881 return ret;
5882}
5883
5884static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5885 struct channel_gk20a *fault_ch, u32 *hww_global_esr)
5886{
5887 int ret = 0;
5888 u32 gpc_offset, gpc, tpc;
5889 struct gr_gk20a *gr = &g->gr;
5890 u32 exception1 = gk20a_readl(g, gr_exception1_r());
5891 u32 gpc_exception;
5892
5893 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " ");
5894
5895 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
5896 if ((exception1 & (1 << gpc)) == 0) {
5897 continue;
5898 }
5899
5900 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5901 "GPC%d exception pending", gpc);
5902
5903 gpc_offset = gk20a_gr_gpc_offset(g, gpc);
5904
5905 gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r()
5906 + gpc_offset);
5907
5908 /* check if any tpc has an exception */
5909 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
5910 if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) &
5911 (1 << tpc)) == 0) {
5912 continue;
5913 }
5914
5915 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5916 "GPC%d: TPC%d exception pending", gpc, tpc);
5917
5918 ret |= gk20a_gr_handle_tpc_exception(g, gpc, tpc,
5919 post_event, fault_ch, hww_global_esr);
5920
5921 }
5922
5923 /* Handle GCC exception */
5924 if ((gr_gpc0_gpccs_gpc_exception_gcc_v(gpc_exception) != 0U) &&
5925 (g->ops.gr.handle_gcc_exception != NULL)) {
5926 int gcc_ret = 0;
5927 gcc_ret = g->ops.gr.handle_gcc_exception(g, gpc, tpc,
5928 post_event, fault_ch, hww_global_esr);
5929 ret |= (ret != 0) ? ret : gcc_ret;
5930 }
5931
5932 /* Handle GPCCS exceptions */
5933 if (g->ops.gr.handle_gpc_gpccs_exception) {
5934 int ret_ecc = 0;
5935 ret_ecc = g->ops.gr.handle_gpc_gpccs_exception(g, gpc,
5936 gpc_exception);
5937 ret |= (ret != 0) ? ret : ret_ecc;
5938 }
5939
5940 /* Handle GPCMMU exceptions */
5941 if (g->ops.gr.handle_gpc_gpcmmu_exception) {
5942 int ret_mmu = 0;
5943
5944 ret_mmu = g->ops.gr.handle_gpc_gpcmmu_exception(g, gpc,
5945 gpc_exception);
5946 ret |= (ret != 0) ? ret : ret_mmu;
5947 }
5948
5949 }
5950
5951 return ret;
5952}
5953
5954static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg,
5955 u32 global_esr)
5956{
5957 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) {
5958 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT);
5959 }
5960
5961 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) {
5962 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE);
5963 }
5964
5965 return 0;
5966}
5967
5968int gk20a_gr_isr(struct gk20a *g)
5969{
5970 struct gr_gk20a_isr_data isr_data;
5971 u32 grfifo_ctl;
5972 u32 obj_table;
5973 bool need_reset = false;
5974 u32 gr_intr = gk20a_readl(g, gr_intr_r());
5975 struct channel_gk20a *ch = NULL;
5976 struct channel_gk20a *fault_ch = NULL;
5977 u32 tsgid = NVGPU_INVALID_TSG_ID;
5978 struct tsg_gk20a *tsg = NULL;
5979 u32 gr_engine_id;
5980 u32 global_esr = 0;
5981 u32 chid;
5982
5983 nvgpu_log_fn(g, " ");
5984 nvgpu_log(g, gpu_dbg_intr, "pgraph intr 0x%08x", gr_intr);
5985
5986 if (gr_intr == 0U) {
5987 return 0;
5988 }
5989
5990 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
5991 if (gr_engine_id != FIFO_INVAL_ENGINE_ID) {
5992 gr_engine_id = BIT(gr_engine_id);
5993 }
5994
5995 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
5996 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
5997 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
5998
5999 gk20a_writel(g, gr_gpfifo_ctl_r(),
6000 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
6001 gr_gpfifo_ctl_semaphore_access_f(0));
6002
6003 isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
6004 isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
6005 isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
6006 isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6007 isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
6008 isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
6009 obj_table = (isr_data.sub_chan < 4) ? gk20a_readl(g,
6010 gr_fe_object_table_r(isr_data.sub_chan)) : 0;
6011 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
6012
6013 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, &tsgid);
6014 isr_data.ch = ch;
6015 chid = ch != NULL ? ch->chid : FIFO_INVAL_CHANNEL_ID;
6016
6017 if (ch == NULL) {
6018 nvgpu_err(g, "pgraph intr: 0x%08x, chid: INVALID", gr_intr);
6019 } else {
6020 tsg = tsg_gk20a_from_ch(ch);
6021 if (tsg == NULL) {
6022 nvgpu_err(g, "pgraph intr: 0x%08x, chid: %d "
6023 "not bound to tsg", gr_intr, chid);
6024 }
6025 }
6026
6027 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
6028 "channel %d: addr 0x%08x, "
6029 "data 0x%08x 0x%08x,"
6030 "ctx 0x%08x, offset 0x%08x, "
6031 "subchannel 0x%08x, class 0x%08x",
6032 chid, isr_data.addr,
6033 isr_data.data_hi, isr_data.data_lo,
6034 isr_data.curr_ctx, isr_data.offset,
6035 isr_data.sub_chan, isr_data.class_num);
6036
6037 if (gr_intr & gr_intr_notify_pending_f()) {
6038 g->ops.gr.handle_notify_pending(g, &isr_data);
6039 gk20a_writel(g, gr_intr_r(),
6040 gr_intr_notify_reset_f());
6041 gr_intr &= ~gr_intr_notify_pending_f();
6042 }
6043
6044 if (gr_intr & gr_intr_semaphore_pending_f()) {
6045 g->ops.gr.handle_semaphore_pending(g, &isr_data);
6046 gk20a_writel(g, gr_intr_r(),
6047 gr_intr_semaphore_reset_f());
6048 gr_intr &= ~gr_intr_semaphore_pending_f();
6049 }
6050
6051 if (gr_intr & gr_intr_semaphore_timeout_pending_f()) {
6052 if (gk20a_gr_handle_semaphore_timeout_pending(g,
6053 &isr_data) != 0) {
6054 need_reset = true;
6055 }
6056 gk20a_writel(g, gr_intr_r(),
6057 gr_intr_semaphore_reset_f());
6058 gr_intr &= ~gr_intr_semaphore_pending_f();
6059 }
6060
6061 if (gr_intr & gr_intr_illegal_notify_pending_f()) {
6062 if (gk20a_gr_intr_illegal_notify_pending(g,
6063 &isr_data) != 0) {
6064 need_reset = true;
6065 }
6066 gk20a_writel(g, gr_intr_r(),
6067 gr_intr_illegal_notify_reset_f());
6068 gr_intr &= ~gr_intr_illegal_notify_pending_f();
6069 }
6070
6071 if (gr_intr & gr_intr_illegal_method_pending_f()) {
6072 if (gk20a_gr_handle_illegal_method(g, &isr_data) != 0) {
6073 need_reset = true;
6074 }
6075 gk20a_writel(g, gr_intr_r(),
6076 gr_intr_illegal_method_reset_f());
6077 gr_intr &= ~gr_intr_illegal_method_pending_f();
6078 }
6079
6080 if (gr_intr & gr_intr_illegal_class_pending_f()) {
6081 if (gk20a_gr_handle_illegal_class(g, &isr_data) != 0) {
6082 need_reset = true;
6083 }
6084 gk20a_writel(g, gr_intr_r(),
6085 gr_intr_illegal_class_reset_f());
6086 gr_intr &= ~gr_intr_illegal_class_pending_f();
6087 }
6088
6089 if (gr_intr & gr_intr_fecs_error_pending_f()) {
6090 if (g->ops.gr.handle_fecs_error(g, ch, &isr_data) != 0) {
6091 need_reset = true;
6092 }
6093 gk20a_writel(g, gr_intr_r(),
6094 gr_intr_fecs_error_reset_f());
6095 gr_intr &= ~gr_intr_fecs_error_pending_f();
6096 }
6097
6098 if (gr_intr & gr_intr_class_error_pending_f()) {
6099 if (gk20a_gr_handle_class_error(g, &isr_data) != 0) {
6100 need_reset = true;
6101 }
6102 gk20a_writel(g, gr_intr_r(),
6103 gr_intr_class_error_reset_f());
6104 gr_intr &= ~gr_intr_class_error_pending_f();
6105 }
6106
6107 /* this one happens if someone tries to hit a non-whitelisted
6108 * register using set_falcon[4] */
6109 if (gr_intr & gr_intr_firmware_method_pending_f()) {
6110 if (gk20a_gr_handle_firmware_method(g, &isr_data) != 0) {
6111 need_reset = true;
6112 }
6113 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n");
6114 gk20a_writel(g, gr_intr_r(),
6115 gr_intr_firmware_method_reset_f());
6116 gr_intr &= ~gr_intr_firmware_method_pending_f();
6117 }
6118
6119 if (gr_intr & gr_intr_exception_pending_f()) {
6120 u32 exception = gk20a_readl(g, gr_exception_r());
6121
6122 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
6123
6124 if (exception & gr_exception_fe_m()) {
6125 u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
6126 u32 info = gk20a_readl(g, gr_fe_hww_esr_info_r());
6127
6128 nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x",
6129 fe, info);
6130 gk20a_writel(g, gr_fe_hww_esr_r(),
6131 gr_fe_hww_esr_reset_active_f());
6132 need_reset = true;
6133 }
6134
6135 if (exception & gr_exception_memfmt_m()) {
6136 u32 memfmt = gk20a_readl(g, gr_memfmt_hww_esr_r());
6137
6138 nvgpu_err(g, "memfmt exception: esr %08x", memfmt);
6139 gk20a_writel(g, gr_memfmt_hww_esr_r(),
6140 gr_memfmt_hww_esr_reset_active_f());
6141 need_reset = true;
6142 }
6143
6144 if (exception & gr_exception_pd_m()) {
6145 u32 pd = gk20a_readl(g, gr_pd_hww_esr_r());
6146
6147 nvgpu_err(g, "pd exception: esr 0x%08x", pd);
6148 gk20a_writel(g, gr_pd_hww_esr_r(),
6149 gr_pd_hww_esr_reset_active_f());
6150 need_reset = true;
6151 }
6152
6153 if (exception & gr_exception_scc_m()) {
6154 u32 scc = gk20a_readl(g, gr_scc_hww_esr_r());
6155
6156 nvgpu_err(g, "scc exception: esr 0x%08x", scc);
6157 gk20a_writel(g, gr_scc_hww_esr_r(),
6158 gr_scc_hww_esr_reset_active_f());
6159 need_reset = true;
6160 }
6161
6162 if (exception & gr_exception_ds_m()) {
6163 u32 ds = gk20a_readl(g, gr_ds_hww_esr_r());
6164
6165 nvgpu_err(g, "ds exception: esr: 0x%08x", ds);
6166 gk20a_writel(g, gr_ds_hww_esr_r(),
6167 gr_ds_hww_esr_reset_task_f());
6168 need_reset = true;
6169 }
6170
6171 if (exception & gr_exception_ssync_m()) {
6172 if (g->ops.gr.handle_ssync_hww) {
6173 if (g->ops.gr.handle_ssync_hww(g) != 0) {
6174 need_reset = true;
6175 }
6176 } else {
6177 nvgpu_err(g, "unhandled ssync exception");
6178 }
6179 }
6180
6181 if (exception & gr_exception_mme_m()) {
6182 u32 mme = gk20a_readl(g, gr_mme_hww_esr_r());
6183 u32 info = gk20a_readl(g, gr_mme_hww_esr_info_r());
6184
6185 nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x",
6186 mme, info);
6187 gk20a_writel(g, gr_mme_hww_esr_r(),
6188 gr_mme_hww_esr_reset_active_f());
6189 need_reset = true;
6190 }
6191
6192 if (exception & gr_exception_sked_m()) {
6193 u32 sked = gk20a_readl(g, gr_sked_hww_esr_r());
6194
6195 nvgpu_err(g, "sked exception: esr 0x%08x", sked);
6196 gk20a_writel(g, gr_sked_hww_esr_r(),
6197 gr_sked_hww_esr_reset_active_f());
6198 need_reset = true;
6199 }
6200
6201 /* check if a gpc exception has occurred */
6202 if (((exception & gr_exception_gpc_m()) != 0U) &&
6203 !need_reset) {
6204 bool post_event = false;
6205
6206 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
6207 "GPC exception pending");
6208
6209 if (tsg != NULL) {
6210 fault_ch = isr_data.ch;
6211 }
6212
6213 /* fault_ch can be NULL */
6214 /* check if any gpc has an exception */
6215 if (gk20a_gr_handle_gpc_exception(g, &post_event,
6216 fault_ch, &global_esr) != 0) {
6217 need_reset = true;
6218 }
6219
6220 /* signal clients waiting on an event */
6221 if (g->ops.gr.sm_debugger_attached(g) &&
6222 post_event && (fault_ch != NULL)) {
6223 g->ops.debugger.post_events(fault_ch);
6224 }
6225 }
6226
6227 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
6228 gr_intr &= ~gr_intr_exception_pending_f();
6229
6230 if (need_reset) {
6231 nvgpu_err(g, "set gr exception notifier");
6232 gk20a_gr_set_error_notifier(g, &isr_data,
6233 NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
6234 }
6235 }
6236
6237 if (need_reset) {
6238 if (tsg != NULL) {
6239 gk20a_fifo_recover(g, gr_engine_id,
6240 tsgid, true, true, true,
6241 RC_TYPE_GR_FAULT);
6242 } else {
6243 if (ch != NULL) {
6244 nvgpu_err(g, "chid: %d referenceable but not "
6245 "bound to tsg", chid);
6246 }
6247 gk20a_fifo_recover(g, gr_engine_id,
6248 0, false, false, true,
6249 RC_TYPE_GR_FAULT);
6250 }
6251 }
6252
6253 if (gr_intr != 0U) {
6254 /* clear unhandled interrupts */
6255 if (ch == NULL) {
6256 /*
6257 * This is probably an interrupt during
6258 * gk20a_free_channel()
6259 */
6260 nvgpu_err(g, "unhandled gr intr 0x%08x for "
6261 "unreferenceable channel, clearing",
6262 gr_intr);
6263 } else {
6264 nvgpu_err(g, "unhandled gr intr 0x%08x for chid: %d",
6265 gr_intr, chid);
6266 }
6267 gk20a_writel(g, gr_intr_r(), gr_intr);
6268 }
6269
6270 gk20a_writel(g, gr_gpfifo_ctl_r(),
6271 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
6272 gr_gpfifo_ctl_semaphore_access_f(1));
6273
6274
6275 /* Posting of BPT events should be the last thing in this function */
6276 if ((global_esr != 0U) && (tsg != NULL)) {
6277 gk20a_gr_post_bpt_events(g, tsg, global_esr);
6278 }
6279
6280 if (ch) {
6281 gk20a_channel_put(ch);
6282 }
6283
6284 return 0;
6285}
6286
6287u32 gk20a_gr_nonstall_isr(struct gk20a *g)
6288{
6289 u32 ops = 0;
6290 u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r());
6291
6292 nvgpu_log(g, gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr);
6293
6294 if ((gr_intr & gr_intr_nonstall_trap_pending_f()) != 0U) {
6295 /* Clear the interrupt */
6296 gk20a_writel(g, gr_intr_nonstall_r(),
6297 gr_intr_nonstall_trap_pending_f());
6298 ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE |
6299 GK20A_NONSTALL_OPS_POST_EVENTS);
6300 }
6301 return ops;
6302}
6303
6304int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
6305{
6306 BUG_ON(size == NULL);
6307 return gr_gk20a_submit_fecs_method_op(g,
6308 (struct fecs_method_op_gk20a) {
6309 .mailbox.id = 0,
6310 .mailbox.data = 0,
6311 .mailbox.clr = ~0,
6312 .method.data = 1,
6313 .method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(),
6314 .mailbox.ret = size,
6315 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
6316 .mailbox.ok = 0,
6317 .cond.fail = GR_IS_UCODE_OP_SKIP,
6318 .mailbox.fail = 0}, false);
6319}
6320
6321int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
6322 struct nvgpu_mem *inst_block)
6323{
6324 u32 data = fecs_current_ctx_data(g, inst_block);
6325
6326 return gr_gk20a_submit_fecs_method_op(g,
6327 (struct fecs_method_op_gk20a){
6328 .mailbox.id = 4,
6329 .mailbox.data = data,
6330 .mailbox.clr = ~0,
6331 .method.data = 1,
6332 .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),
6333 .mailbox.ret = NULL,
6334 .cond.ok = GR_IS_UCODE_OP_EQUAL,
6335 .mailbox.ok = 1,
6336 .cond.fail = GR_IS_UCODE_OP_SKIP,
6337 .mailbox.fail = 0}, false);
6338}
6339
6340int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
6341{
6342 return gr_gk20a_submit_fecs_method_op(g,
6343 (struct fecs_method_op_gk20a) {
6344 .mailbox.id = 4,
6345 .mailbox.data = u64_lo32(pmu_va >> 8),
6346 .mailbox.clr = ~0,
6347 .method.data = 1,
6348 .method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(),
6349 .mailbox.ret = NULL,
6350 .cond.ok = GR_IS_UCODE_OP_EQUAL,
6351 .mailbox.ok = 1,
6352 .cond.fail = GR_IS_UCODE_OP_SKIP,
6353 .mailbox.fail = 0}, false);
6354}
6355
6356int gk20a_gr_suspend(struct gk20a *g)
6357{
6358 u32 ret = 0;
6359
6360 nvgpu_log_fn(g, " ");
6361
6362 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
6363 GR_IDLE_CHECK_DEFAULT);
6364 if (ret) {
6365 return ret;
6366 }
6367
6368 gk20a_writel(g, gr_gpfifo_ctl_r(),
6369 gr_gpfifo_ctl_access_disabled_f());
6370
6371 /* disable gr intr */
6372 gk20a_writel(g, gr_intr_r(), 0);
6373 gk20a_writel(g, gr_intr_en_r(), 0);
6374
6375 /* disable all exceptions */
6376 gk20a_writel(g, gr_exception_r(), 0);
6377 gk20a_writel(g, gr_exception_en_r(), 0);
6378 gk20a_writel(g, gr_exception1_r(), 0);
6379 gk20a_writel(g, gr_exception1_en_r(), 0);
6380 gk20a_writel(g, gr_exception2_r(), 0);
6381 gk20a_writel(g, gr_exception2_en_r(), 0);
6382
6383 gk20a_gr_flush_channel_tlb(&g->gr);
6384
6385 g->gr.initialized = false;
6386
6387 nvgpu_log_fn(g, "done");
6388 return ret;
6389}
6390
6391static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6392 u32 addr,
6393 bool is_quad, u32 quad,
6394 u32 *context_buffer,
6395 u32 context_buffer_size,
6396 u32 *priv_offset);
6397
6398static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
6399 u32 addr,
6400 u32 *priv_offset);
6401
6402/* This function will decode a priv address and return the partition type and numbers. */
6403int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6404 enum ctxsw_addr_type *addr_type,
6405 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
6406 u32 *broadcast_flags)
6407{
6408 u32 gpc_addr;
6409
6410 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6411
6412 /* setup defaults */
6413 *addr_type = CTXSW_ADDR_TYPE_SYS;
6414 *broadcast_flags = PRI_BROADCAST_FLAGS_NONE;
6415 *gpc_num = 0;
6416 *tpc_num = 0;
6417 *ppc_num = 0;
6418 *be_num = 0;
6419
6420 if (pri_is_gpc_addr(g, addr)) {
6421 *addr_type = CTXSW_ADDR_TYPE_GPC;
6422 gpc_addr = pri_gpccs_addr_mask(addr);
6423 if (pri_is_gpc_addr_shared(g, addr)) {
6424 *addr_type = CTXSW_ADDR_TYPE_GPC;
6425 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
6426 } else {
6427 *gpc_num = pri_get_gpc_num(g, addr);
6428 }
6429
6430 if (pri_is_ppc_addr(g, gpc_addr)) {
6431 *addr_type = CTXSW_ADDR_TYPE_PPC;
6432 if (pri_is_ppc_addr_shared(g, gpc_addr)) {
6433 *broadcast_flags |= PRI_BROADCAST_FLAGS_PPC;
6434 return 0;
6435 }
6436 }
6437 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
6438 *addr_type = CTXSW_ADDR_TYPE_TPC;
6439 if (pri_is_tpc_addr_shared(g, gpc_addr)) {
6440 *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC;
6441 return 0;
6442 }
6443 *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6444 }
6445 return 0;
6446 } else if (pri_is_be_addr(g, addr)) {
6447 *addr_type = CTXSW_ADDR_TYPE_BE;
6448 if (pri_is_be_addr_shared(g, addr)) {
6449 *broadcast_flags |= PRI_BROADCAST_FLAGS_BE;
6450 return 0;
6451 }
6452 *be_num = pri_get_be_num(g, addr);
6453 return 0;
6454 } else if (g->ops.ltc.pri_is_ltc_addr(g, addr)) {
6455 *addr_type = CTXSW_ADDR_TYPE_LTCS;
6456 if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) {
6457 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
6458 } else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) {
6459 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
6460 }
6461 return 0;
6462 } else if (pri_is_fbpa_addr(g, addr)) {
6463 *addr_type = CTXSW_ADDR_TYPE_FBPA;
6464 if (pri_is_fbpa_addr_shared(g, addr)) {
6465 *broadcast_flags |= PRI_BROADCAST_FLAGS_FBPA;
6466 return 0;
6467 }
6468 return 0;
6469 } else if ((g->ops.gr.is_egpc_addr != NULL) &&
6470 g->ops.gr.is_egpc_addr(g, addr)) {
6471 return g->ops.gr.decode_egpc_addr(g,
6472 addr, addr_type, gpc_num,
6473 tpc_num, broadcast_flags);
6474 } else {
6475 *addr_type = CTXSW_ADDR_TYPE_SYS;
6476 return 0;
6477 }
6478 /* PPC!?!?!?! */
6479
6480 /*NOTREACHED*/
6481 return -EINVAL;
6482}
6483
6484void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
6485 u32 num_fbpas,
6486 u32 *priv_addr_table, u32 *t)
6487{
6488 u32 fbpa_id;
6489
6490 for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) {
6491 priv_addr_table[(*t)++] = pri_fbpa_addr(g,
6492 pri_fbpa_addr_mask(g, addr), fbpa_id);
6493 }
6494}
6495
6496int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
6497 u32 gpc_num,
6498 u32 *priv_addr_table, u32 *t)
6499{
6500 u32 ppc_num;
6501
6502 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6503
6504 for (ppc_num = 0; ppc_num < g->gr.gpc_ppc_count[gpc_num]; ppc_num++) {
6505 priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr),
6506 gpc_num, ppc_num);
6507 }
6508
6509 return 0;
6510}
6511
6512/*
6513 * The context buffer is indexed using BE broadcast addresses and GPC/TPC
6514 * unicast addresses. This function will convert a BE unicast address to a BE
6515 * broadcast address and split a GPC/TPC broadcast address into a table of
6516 * GPC/TPC addresses. The addresses generated by this function can be
6517 * successfully processed by gr_gk20a_find_priv_offset_in_buffer
6518 */
6519int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6520 u32 addr,
6521 u32 *priv_addr_table,
6522 u32 *num_registers)
6523{
6524 enum ctxsw_addr_type addr_type;
6525 u32 gpc_num, tpc_num, ppc_num, be_num;
6526 u32 priv_addr, gpc_addr;
6527 u32 broadcast_flags;
6528 u32 t;
6529 int err;
6530
6531 t = 0;
6532 *num_registers = 0;
6533
6534 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6535
6536 err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
6537 &gpc_num, &tpc_num, &ppc_num, &be_num,
6538 &broadcast_flags);
6539 nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
6540 if (err != 0) {
6541 return err;
6542 }
6543
6544 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
6545 (addr_type == CTXSW_ADDR_TYPE_BE)) {
6546 /* The BE broadcast registers are included in the compressed PRI
6547 * table. Convert a BE unicast address to a broadcast address
6548 * so that we can look up the offset. */
6549 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
6550 ((broadcast_flags & PRI_BROADCAST_FLAGS_BE) == 0U)) {
6551 priv_addr_table[t++] = pri_be_shared_addr(g, addr);
6552 } else {
6553 priv_addr_table[t++] = addr;
6554 }
6555
6556 *num_registers = t;
6557 return 0;
6558 }
6559
6560 /* The GPC/TPC unicast registers are included in the compressed PRI
6561 * tables. Convert a GPC/TPC broadcast address to unicast addresses so
6562 * that we can look up the offsets. */
6563 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
6564 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
6565
6566 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) {
6567 for (tpc_num = 0;
6568 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6569 tpc_num++) {
6570 priv_addr_table[t++] =
6571 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6572 gpc_num, tpc_num);
6573 }
6574
6575 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
6576 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
6577 priv_addr_table, &t);
6578 if (err != 0) {
6579 return err;
6580 }
6581 } else {
6582 priv_addr = pri_gpc_addr(g,
6583 pri_gpccs_addr_mask(addr),
6584 gpc_num);
6585
6586 gpc_addr = pri_gpccs_addr_mask(priv_addr);
6587 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6588 if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) {
6589 continue;
6590 }
6591
6592 priv_addr_table[t++] = priv_addr;
6593 }
6594 }
6595 } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
6596 (addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
6597 (g->ops.gr.egpc_etpc_priv_addr_table != NULL)) {
6598 nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
6599 g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num, tpc_num,
6600 broadcast_flags, priv_addr_table, &t);
6601 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
6602 g->ops.ltc.split_lts_broadcast_addr(g, addr,
6603 priv_addr_table, &t);
6604 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
6605 g->ops.ltc.split_ltc_broadcast_addr(g, addr,
6606 priv_addr_table, &t);
6607 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) {
6608 g->ops.gr.split_fbpa_broadcast_addr(g, addr,
6609 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS),
6610 priv_addr_table, &t);
6611 } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) {
6612 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) {
6613 for (tpc_num = 0;
6614 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6615 tpc_num++) {
6616 priv_addr_table[t++] =
6617 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6618 gpc_num, tpc_num);
6619 }
6620 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
6621 err = gr_gk20a_split_ppc_broadcast_addr(g,
6622 addr, gpc_num, priv_addr_table, &t);
6623 } else {
6624 priv_addr_table[t++] = addr;
6625 }
6626 }
6627
6628 *num_registers = t;
6629 return 0;
6630}
6631
6632int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6633 u32 addr,
6634 u32 max_offsets,
6635 u32 *offsets, u32 *offset_addrs,
6636 u32 *num_offsets,
6637 bool is_quad, u32 quad)
6638{
6639 u32 i;
6640 u32 priv_offset = 0;
6641 u32 *priv_registers;
6642 u32 num_registers = 0;
6643 int err = 0;
6644 struct gr_gk20a *gr = &g->gr;
6645 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
6646 u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count *
6647 sm_per_tpc;
6648
6649 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6650
6651 /* implementation is crossed-up if either of these happen */
6652 if (max_offsets > potential_offsets) {
6653 nvgpu_log_fn(g, "max_offsets > potential_offsets");
6654 return -EINVAL;
6655 }
6656
6657 if (!g->gr.ctx_vars.golden_image_initialized) {
6658 return -ENODEV;
6659 }
6660
6661 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6662 if (priv_registers == NULL) {
6663 nvgpu_log_fn(g, "failed alloc for potential_offsets=%d", potential_offsets);
6664 err = PTR_ERR(priv_registers);
6665 goto cleanup;
6666 }
6667 memset(offsets, 0, sizeof(u32) * max_offsets);
6668 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6669 *num_offsets = 0;
6670
6671 g->ops.gr.create_priv_addr_table(g, addr, &priv_registers[0],
6672 &num_registers);
6673
6674 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6675 nvgpu_log_fn(g, "max_offsets = %d, num_registers = %d",
6676 max_offsets, num_registers);
6677 err = -EINVAL;
6678 goto cleanup;
6679 }
6680
6681 if ((max_offsets == 1) && (num_registers > 1)) {
6682 num_registers = 1;
6683 }
6684
6685 if (g->gr.ctx_vars.local_golden_image == NULL) {
6686 nvgpu_log_fn(g, "no context switch header info to work with");
6687 err = -EINVAL;
6688 goto cleanup;
6689 }
6690
6691 for (i = 0; i < num_registers; i++) {
6692 err = gr_gk20a_find_priv_offset_in_buffer(g,
6693 priv_registers[i],
6694 is_quad, quad,
6695 g->gr.ctx_vars.local_golden_image,
6696 g->gr.ctx_vars.golden_image_size,
6697 &priv_offset);
6698 if (err != 0) {
6699 nvgpu_log_fn(g, "Could not determine priv_offset for addr:0x%x",
6700 addr); /*, grPriRegStr(addr)));*/
6701 goto cleanup;
6702 }
6703
6704 offsets[i] = priv_offset;
6705 offset_addrs[i] = priv_registers[i];
6706 }
6707
6708 *num_offsets = num_registers;
6709cleanup:
6710 if (!IS_ERR_OR_NULL(priv_registers)) {
6711 nvgpu_kfree(g, priv_registers);
6712 }
6713
6714 return err;
6715}
6716
6717int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
6718 u32 addr,
6719 u32 max_offsets,
6720 u32 *offsets, u32 *offset_addrs,
6721 u32 *num_offsets)
6722{
6723 u32 i;
6724 u32 priv_offset = 0;
6725 u32 *priv_registers;
6726 u32 num_registers = 0;
6727 int err = 0;
6728 struct gr_gk20a *gr = &g->gr;
6729 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
6730 u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count *
6731 sm_per_tpc;
6732
6733 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6734
6735 /* implementation is crossed-up if either of these happen */
6736 if (max_offsets > potential_offsets) {
6737 return -EINVAL;
6738 }
6739
6740 if (!g->gr.ctx_vars.golden_image_initialized) {
6741 return -ENODEV;
6742 }
6743
6744 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6745 if (priv_registers == NULL) {
6746 nvgpu_log_fn(g, "failed alloc for potential_offsets=%d", potential_offsets);
6747 return -ENOMEM;
6748 }
6749 memset(offsets, 0, sizeof(u32) * max_offsets);
6750 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6751 *num_offsets = 0;
6752
6753 g->ops.gr.create_priv_addr_table(g, addr, priv_registers,
6754 &num_registers);
6755
6756 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6757 err = -EINVAL;
6758 goto cleanup;
6759 }
6760
6761 if ((max_offsets == 1) && (num_registers > 1)) {
6762 num_registers = 1;
6763 }
6764
6765 if (g->gr.ctx_vars.local_golden_image == NULL) {
6766 nvgpu_log_fn(g, "no context switch header info to work with");
6767 err = -EINVAL;
6768 goto cleanup;
6769 }
6770
6771 for (i = 0; i < num_registers; i++) {
6772 err = gr_gk20a_find_priv_offset_in_pm_buffer(g,
6773 priv_registers[i],
6774 &priv_offset);
6775 if (err != 0) {
6776 nvgpu_log_fn(g, "Could not determine priv_offset for addr:0x%x",
6777 addr); /*, grPriRegStr(addr)));*/
6778 goto cleanup;
6779 }
6780
6781 offsets[i] = priv_offset;
6782 offset_addrs[i] = priv_registers[i];
6783 }
6784
6785 *num_offsets = num_registers;
6786cleanup:
6787 nvgpu_kfree(g, priv_registers);
6788
6789 return err;
6790}
6791
6792/* Setup some register tables. This looks hacky; our
6793 * register/offset functions are just that, functions.
6794 * So they can't be used as initializers... TBD: fix to
6795 * generate consts at least on an as-needed basis.
6796 */
6797static const u32 _num_ovr_perf_regs = 17;
6798static u32 _ovr_perf_regs[17] = { 0, };
6799/* Following are the blocks of registers that the ucode
6800 stores in the extended region.*/
6801
6802void gk20a_gr_init_ovr_sm_dsm_perf(void)
6803{
6804 if (_ovr_perf_regs[0] != 0) {
6805 return;
6806 }
6807
6808 _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r();
6809 _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r();
6810 _ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
6811 _ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
6812 _ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r();
6813 _ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r();
6814 _ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r();
6815 _ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r();
6816 _ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r();
6817 _ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r();
6818 _ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r();
6819 _ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r();
6820 _ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r();
6821 _ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r();
6822 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
6823 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
6824 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
6825
6826}
6827
6828/* TBD: would like to handle this elsewhere, at a higher level.
6829 * these are currently constructed in a "test-then-write" style
6830 * which makes it impossible to know externally whether a ctx
6831 * write will actually occur. so later we should put a lazy,
6832 * map-and-hold system in the patch write state */
6833static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6834 struct channel_gk20a *ch,
6835 u32 addr, u32 data,
6836 struct nvgpu_mem *mem)
6837{
6838 u32 num_gpc = g->gr.gpc_count;
6839 u32 num_tpc;
6840 u32 tpc, gpc, reg;
6841 u32 chk_addr;
6842 u32 vaddr_lo;
6843 u32 vaddr_hi;
6844 u32 tmp;
6845 u32 num_ovr_perf_regs = 0;
6846 u32 *ovr_perf_regs = NULL;
6847 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6848 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6849 struct tsg_gk20a *tsg;
6850 struct nvgpu_gr_ctx *gr_ctx;
6851 struct nvgpu_mem *ctxheader = &ch->ctx_header;
6852
6853 tsg = tsg_gk20a_from_ch(ch);
6854 if (tsg == NULL) {
6855 return -EINVAL;
6856 }
6857
6858 gr_ctx = &tsg->gr_ctx;
6859 g->ops.gr.init_ovr_sm_dsm_perf();
6860 g->ops.gr.init_sm_dsm_reg_info();
6861 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs);
6862
6863 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6864
6865 for (reg = 0; reg < num_ovr_perf_regs; reg++) {
6866 for (gpc = 0; gpc < num_gpc; gpc++) {
6867 num_tpc = g->gr.gpc_tpc_count[gpc];
6868 for (tpc = 0; tpc < num_tpc; tpc++) {
6869 chk_addr = ((gpc_stride * gpc) +
6870 (tpc_in_gpc_stride * tpc) +
6871 ovr_perf_regs[reg]);
6872 if (chk_addr != addr) {
6873 continue;
6874 }
6875 /* reset the patch count from previous
6876 runs,if ucode has already processed
6877 it */
6878 tmp = nvgpu_mem_rd(g, mem,
6879 ctxsw_prog_main_image_patch_count_o());
6880
6881 if (tmp == 0U) {
6882 gr_ctx->patch_ctx.data_count = 0;
6883 }
6884
6885 gr_gk20a_ctx_patch_write(g, gr_ctx,
6886 addr, data, true);
6887
6888 vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
6889 vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
6890
6891 nvgpu_mem_wr(g, mem,
6892 ctxsw_prog_main_image_patch_count_o(),
6893 gr_ctx->patch_ctx.data_count);
6894 if (ctxheader->gpu_va) {
6895 nvgpu_mem_wr(g, ctxheader,
6896 ctxsw_prog_main_image_patch_adr_lo_o(),
6897 vaddr_lo);
6898 nvgpu_mem_wr(g, ctxheader,
6899 ctxsw_prog_main_image_patch_adr_hi_o(),
6900 vaddr_hi);
6901 } else {
6902 nvgpu_mem_wr(g, mem,
6903 ctxsw_prog_main_image_patch_adr_lo_o(),
6904 vaddr_lo);
6905 nvgpu_mem_wr(g, mem,
6906 ctxsw_prog_main_image_patch_adr_hi_o(),
6907 vaddr_hi);
6908 }
6909
6910 /* we're not caching these on cpu side,
6911 but later watch for it */
6912 return 0;
6913 }
6914 }
6915 }
6916
6917 return 0;
6918}
6919
6920#define ILLEGAL_ID ((u32)~0)
6921
6922static inline bool check_main_image_header_magic(u8 *context)
6923{
6924 u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
6925 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
6926}
6927static inline bool check_local_header_magic(u8 *context)
6928{
6929 u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
6930 return magic == ctxsw_prog_local_magic_value_v_value_v();
6931
6932}
6933
6934/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
6935static inline int ctxsw_prog_ucode_header_size_in_bytes(void)
6936{
6937 return 256;
6938}
6939
6940void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
6941 u32 **ovr_perf_regs)
6942{
6943 *num_ovr_perf_regs = _num_ovr_perf_regs;
6944 *ovr_perf_regs = _ovr_perf_regs;
6945}
6946
6947static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6948 u32 addr,
6949 bool is_quad, u32 quad,
6950 u32 *context_buffer,
6951 u32 context_buffer_size,
6952 u32 *priv_offset)
6953{
6954 u32 i, data32;
6955 u32 gpc_num, tpc_num;
6956 u32 num_gpcs, num_tpcs;
6957 u32 chk_addr;
6958 u32 ext_priv_offset, ext_priv_size;
6959 u8 *context;
6960 u32 offset_to_segment, offset_to_segment_end;
6961 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
6962 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
6963 u32 num_ext_gpccs_ext_buffer_segments;
6964 u32 inter_seg_offset;
6965 u32 max_tpc_count;
6966 u32 *sm_dsm_perf_ctrl_regs = NULL;
6967 u32 num_sm_dsm_perf_ctrl_regs = 0;
6968 u32 *sm_dsm_perf_regs = NULL;
6969 u32 num_sm_dsm_perf_regs = 0;
6970 u32 buffer_segments_size = 0;
6971 u32 marker_size = 0;
6972 u32 control_register_stride = 0;
6973 u32 perf_register_stride = 0;
6974 struct gr_gk20a *gr = &g->gr;
6975 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
6976 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6977 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
6978 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6979 u32 tpc_gpc_mask = (tpc_in_gpc_stride - 1);
6980
6981 /* Only have TPC registers in extended region, so if not a TPC reg,
6982 then return error so caller can look elsewhere. */
6983 if (pri_is_gpc_addr(g, addr)) {
6984 u32 gpc_addr = 0;
6985 gpc_num = pri_get_gpc_num(g, addr);
6986 gpc_addr = pri_gpccs_addr_mask(addr);
6987 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
6988 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6989 } else {
6990 return -EINVAL;
6991 }
6992
6993 nvgpu_log_info(g, " gpc = %d tpc = %d",
6994 gpc_num, tpc_num);
6995 } else if ((g->ops.gr.is_etpc_addr != NULL) &&
6996 g->ops.gr.is_etpc_addr(g, addr)) {
6997 g->ops.gr.get_egpc_etpc_num(g, addr, &gpc_num, &tpc_num);
6998 gpc_base = g->ops.gr.get_egpc_base(g);
6999 } else {
7000 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7001 "does not exist in extended region");
7002 return -EINVAL;
7003 }
7004
7005 buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
7006 /* note below is in words/num_registers */
7007 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
7008
7009 context = (u8 *)context_buffer;
7010 /* sanity check main header */
7011 if (!check_main_image_header_magic(context)) {
7012 nvgpu_err(g,
7013 "Invalid main header: magic value");
7014 return -EINVAL;
7015 }
7016 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7017 if (gpc_num >= num_gpcs) {
7018 nvgpu_err(g,
7019 "GPC 0x%08x is greater than total count 0x%08x!",
7020 gpc_num, num_gpcs);
7021 return -EINVAL;
7022 }
7023
7024 data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
7025 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
7026 if (0 == ext_priv_size) {
7027 nvgpu_log_info(g, " No extended memory in context buffer");
7028 return -EINVAL;
7029 }
7030 ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
7031
7032 offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
7033 offset_to_segment_end = offset_to_segment +
7034 (ext_priv_size * buffer_segments_size);
7035
7036 /* check local header magic */
7037 context += ctxsw_prog_ucode_header_size_in_bytes();
7038 if (!check_local_header_magic(context)) {
7039 nvgpu_err(g,
7040 "Invalid local header: magic value");
7041 return -EINVAL;
7042 }
7043
7044 /*
7045 * See if the incoming register address is in the first table of
7046 * registers. We check this by decoding only the TPC addr portion.
7047 * If we get a hit on the TPC bit, we then double check the address
7048 * by computing it from the base gpc/tpc strides. Then make sure
7049 * it is a real match.
7050 */
7051 g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
7052 &sm_dsm_perf_regs,
7053 &perf_register_stride);
7054
7055 g->ops.gr.init_sm_dsm_reg_info();
7056
7057 for (i = 0; i < num_sm_dsm_perf_regs; i++) {
7058 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
7059 sm_dsm_perf_reg_id = i;
7060
7061 nvgpu_log_info(g, "register match: 0x%08x",
7062 sm_dsm_perf_regs[i]);
7063
7064 chk_addr = (gpc_base + gpc_stride * gpc_num) +
7065 tpc_in_gpc_base +
7066 (tpc_in_gpc_stride * tpc_num) +
7067 (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask);
7068
7069 if (chk_addr != addr) {
7070 nvgpu_err(g,
7071 "Oops addr miss-match! : 0x%08x != 0x%08x",
7072 addr, chk_addr);
7073 return -EINVAL;
7074 }
7075 break;
7076 }
7077 }
7078
7079 /* Didn't find reg in supported group 1.
7080 * so try the second group now */
7081 g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
7082 &sm_dsm_perf_ctrl_regs,
7083 &control_register_stride);
7084
7085 if (ILLEGAL_ID == sm_dsm_perf_reg_id) {
7086 for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) {
7087 if ((addr & tpc_gpc_mask) ==
7088 (sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) {
7089 sm_dsm_perf_ctrl_reg_id = i;
7090
7091 nvgpu_log_info(g, "register match: 0x%08x",
7092 sm_dsm_perf_ctrl_regs[i]);
7093
7094 chk_addr = (gpc_base + gpc_stride * gpc_num) +
7095 tpc_in_gpc_base +
7096 tpc_in_gpc_stride * tpc_num +
7097 (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] &
7098 tpc_gpc_mask);
7099
7100 if (chk_addr != addr) {
7101 nvgpu_err(g,
7102 "Oops addr miss-match! : 0x%08x != 0x%08x",
7103 addr, chk_addr);
7104 return -EINVAL;
7105
7106 }
7107
7108 break;
7109 }
7110 }
7111 }
7112
7113 if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) &&
7114 (ILLEGAL_ID == sm_dsm_perf_reg_id)) {
7115 return -EINVAL;
7116 }
7117
7118 /* Skip the FECS extended header, nothing there for us now. */
7119 offset_to_segment += buffer_segments_size;
7120
7121 /* skip through the GPCCS extended headers until we get to the data for
7122 * our GPC. The size of each gpc extended segment is enough to hold the
7123 * max tpc count for the gpcs,in 256b chunks.
7124 */
7125
7126 max_tpc_count = gr->max_tpc_per_gpc_count;
7127
7128 num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2);
7129
7130 offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
7131 buffer_segments_size * gpc_num);
7132
7133 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
7134
7135 /* skip the head marker to start with */
7136 inter_seg_offset = marker_size;
7137
7138 if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) {
7139 /* skip over control regs of TPC's before the one we want.
7140 * then skip to the register in this tpc */
7141 inter_seg_offset = inter_seg_offset +
7142 (tpc_num * control_register_stride) +
7143 sm_dsm_perf_ctrl_reg_id;
7144 } else {
7145 /* skip all the control registers */
7146 inter_seg_offset = inter_seg_offset +
7147 (num_tpcs * control_register_stride);
7148
7149 /* skip the marker between control and counter segments */
7150 inter_seg_offset += marker_size;
7151
7152 /* skip over counter regs of TPCs before the one we want */
7153 inter_seg_offset = inter_seg_offset +
7154 (tpc_num * perf_register_stride) *
7155 ctxsw_prog_extended_num_smpc_quadrants_v();
7156
7157 /* skip over the register for the quadrants we do not want.
7158 * then skip to the register in this tpc */
7159 inter_seg_offset = inter_seg_offset +
7160 (perf_register_stride * quad) +
7161 sm_dsm_perf_reg_id;
7162 }
7163
7164 /* set the offset to the segment offset plus the inter segment offset to
7165 * our register */
7166 offset_to_segment += (inter_seg_offset * 4);
7167
7168 /* last sanity check: did we somehow compute an offset outside the
7169 * extended buffer? */
7170 if (offset_to_segment > offset_to_segment_end) {
7171 nvgpu_err(g,
7172 "Overflow ctxsw buffer! 0x%08x > 0x%08x",
7173 offset_to_segment, offset_to_segment_end);
7174 return -EINVAL;
7175 }
7176
7177 *priv_offset = offset_to_segment;
7178
7179 return 0;
7180}
7181
7182
7183static int
7184gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7185 enum ctxsw_addr_type addr_type,
7186 u32 pri_addr,
7187 u32 gpc_num, u32 num_tpcs,
7188 u32 num_ppcs, u32 ppc_mask,
7189 u32 *priv_offset)
7190{
7191 u32 i;
7192 u32 address, base_address;
7193 u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
7194 u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
7195 struct aiv_gk20a *reg;
7196 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
7197 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7198 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
7199 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
7200 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7201 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7202
7203 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
7204
7205 if (!g->gr.ctx_vars.valid) {
7206 return -EINVAL;
7207 }
7208
7209 /* Process the SYS/BE segment. */
7210 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
7211 (addr_type == CTXSW_ADDR_TYPE_BE)) {
7212 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
7213 reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i];
7214 address = reg->addr;
7215 sys_offset = reg->index;
7216
7217 if (pri_addr == address) {
7218 *priv_offset = sys_offset;
7219 return 0;
7220 }
7221 }
7222 }
7223
7224 /* Process the TPC segment. */
7225 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
7226 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
7227 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
7228 reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i];
7229 address = reg->addr;
7230 tpc_addr = pri_tpccs_addr_mask(address);
7231 base_address = gpc_base +
7232 (gpc_num * gpc_stride) +
7233 tpc_in_gpc_base +
7234 (tpc_num * tpc_in_gpc_stride);
7235 address = base_address + tpc_addr;
7236 /*
7237 * The data for the TPCs is interleaved in the context buffer.
7238 * Example with num_tpcs = 2
7239 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
7240 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
7241 */
7242 tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
7243
7244 if (pri_addr == address) {
7245 *priv_offset = tpc_offset;
7246 return 0;
7247 }
7248 }
7249 }
7250 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
7251 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
7252 if (g->ops.gr.get_egpc_base == NULL) {
7253 return -EINVAL;
7254 }
7255
7256 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
7257 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
7258 reg = &g->gr.ctx_vars.ctxsw_regs.etpc.l[i];
7259 address = reg->addr;
7260 tpc_addr = pri_tpccs_addr_mask(address);
7261 base_address = g->ops.gr.get_egpc_base(g) +
7262 (gpc_num * gpc_stride) +
7263 tpc_in_gpc_base +
7264 (tpc_num * tpc_in_gpc_stride);
7265 address = base_address + tpc_addr;
7266 /*
7267 * The data for the TPCs is interleaved in the context buffer.
7268 * Example with num_tpcs = 2
7269 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
7270 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
7271 */
7272 tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
7273
7274 if (pri_addr == address) {
7275 *priv_offset = tpc_offset;
7276 nvgpu_log(g,
7277 gpu_dbg_fn | gpu_dbg_gpu_dbg,
7278 "egpc/etpc priv_offset=0x%#08x",
7279 *priv_offset);
7280 return 0;
7281 }
7282 }
7283 }
7284 }
7285
7286
7287 /* Process the PPC segment. */
7288 if (addr_type == CTXSW_ADDR_TYPE_PPC) {
7289 for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
7290 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
7291 reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i];
7292 address = reg->addr;
7293 ppc_addr = pri_ppccs_addr_mask(address);
7294 base_address = gpc_base +
7295 (gpc_num * gpc_stride) +
7296 ppc_in_gpc_base +
7297 (ppc_num * ppc_in_gpc_stride);
7298 address = base_address + ppc_addr;
7299 /*
7300 * The data for the PPCs is interleaved in the context buffer.
7301 * Example with numPpcs = 2
7302 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
7303 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
7304 */
7305 ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4);
7306
7307 if (pri_addr == address) {
7308 *priv_offset = ppc_offset;
7309 return 0;
7310 }
7311 }
7312 }
7313 }
7314
7315
7316 /* Process the GPC segment. */
7317 if (addr_type == CTXSW_ADDR_TYPE_GPC) {
7318 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
7319 reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i];
7320
7321 address = reg->addr;
7322 gpc_addr = pri_gpccs_addr_mask(address);
7323 gpc_offset = reg->index;
7324
7325 base_address = gpc_base + (gpc_num * gpc_stride);
7326 address = base_address + gpc_addr;
7327
7328 if (pri_addr == address) {
7329 *priv_offset = gpc_offset;
7330 return 0;
7331 }
7332 }
7333 }
7334 return -EINVAL;
7335}
7336
7337static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7338 u8 *context,
7339 u32 *num_ppcs, u32 *ppc_mask,
7340 u32 *reg_ppc_count)
7341{
7342 u32 data32;
7343 u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
7344
7345 /*
7346 * if there is only 1 PES_PER_GPC, then we put the PES registers
7347 * in the GPC reglist, so we can't error out if ppc.count == 0
7348 */
7349 if ((!g->gr.ctx_vars.valid) ||
7350 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
7351 (num_pes_per_gpc > 1))) {
7352 return -EINVAL;
7353 }
7354
7355 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7356
7357 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
7358 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
7359
7360 *reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count;
7361
7362 return 0;
7363}
7364
7365int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
7366 enum ctxsw_addr_type addr_type,
7367 u32 num_tpcs,
7368 u32 num_ppcs,
7369 u32 reg_list_ppc_count,
7370 u32 *__offset_in_segment)
7371{
7372 u32 offset_in_segment = 0;
7373 struct gr_gk20a *gr = &g->gr;
7374
7375 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
7376 /*
7377 * reg = gr->ctx_vars.ctxsw_regs.tpc.l;
7378 * offset_in_segment = 0;
7379 */
7380 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
7381 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
7382 offset_in_segment =
7383 ((gr->ctx_vars.ctxsw_regs.tpc.count *
7384 num_tpcs) << 2);
7385
7386 nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
7387 "egpc etpc offset_in_segment 0x%#08x",
7388 offset_in_segment);
7389 } else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
7390 /*
7391 * The ucode stores TPC data before PPC data.
7392 * Advance offset past TPC data to PPC data.
7393 */
7394 offset_in_segment =
7395 (((gr->ctx_vars.ctxsw_regs.tpc.count +
7396 gr->ctx_vars.ctxsw_regs.etpc.count) *
7397 num_tpcs) << 2);
7398 } else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
7399 /*
7400 * The ucode stores TPC/PPC data before GPC data.
7401 * Advance offset past TPC/PPC data to GPC data.
7402 *
7403 * Note 1 PES_PER_GPC case
7404 */
7405 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
7406 GPU_LIT_NUM_PES_PER_GPC);
7407 if (num_pes_per_gpc > 1) {
7408 offset_in_segment =
7409 ((((gr->ctx_vars.ctxsw_regs.tpc.count +
7410 gr->ctx_vars.ctxsw_regs.etpc.count) *
7411 num_tpcs) << 2) +
7412 ((reg_list_ppc_count * num_ppcs) << 2));
7413 } else {
7414 offset_in_segment =
7415 (((gr->ctx_vars.ctxsw_regs.tpc.count +
7416 gr->ctx_vars.ctxsw_regs.etpc.count) *
7417 num_tpcs) << 2);
7418 }
7419 } else {
7420 nvgpu_log_fn(g, "Unknown address type.");
7421 return -EINVAL;
7422 }
7423
7424 *__offset_in_segment = offset_in_segment;
7425 return 0;
7426}
7427
7428/*
7429 * This function will return the 32 bit offset for a priv register if it is
7430 * present in the context buffer. The context buffer is in CPU memory.
7431 */
7432static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7433 u32 addr,
7434 bool is_quad, u32 quad,
7435 u32 *context_buffer,
7436 u32 context_buffer_size,
7437 u32 *priv_offset)
7438{
7439 u32 i, data32;
7440 int err;
7441 enum ctxsw_addr_type addr_type;
7442 u32 broadcast_flags;
7443 u32 gpc_num, tpc_num, ppc_num, be_num;
7444 u32 num_gpcs, num_tpcs, num_ppcs;
7445 u32 offset;
7446 u32 sys_priv_offset, gpc_priv_offset;
7447 u32 ppc_mask, reg_list_ppc_count;
7448 u8 *context;
7449 u32 offset_to_segment, offset_in_segment = 0;
7450
7451 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
7452
7453 err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
7454 &gpc_num, &tpc_num, &ppc_num, &be_num,
7455 &broadcast_flags);
7456 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7457 "addr_type = %d, broadcast_flags: %08x",
7458 addr_type, broadcast_flags);
7459 if (err != 0) {
7460 return err;
7461 }
7462
7463 context = (u8 *)context_buffer;
7464 if (!check_main_image_header_magic(context)) {
7465 nvgpu_err(g,
7466 "Invalid main header: magic value");
7467 return -EINVAL;
7468 }
7469 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7470
7471 /* Parse the FECS local header. */
7472 context += ctxsw_prog_ucode_header_size_in_bytes();
7473 if (!check_local_header_magic(context)) {
7474 nvgpu_err(g,
7475 "Invalid FECS local header: magic value");
7476 return -EINVAL;
7477 }
7478 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7479 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7480 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
7481
7482 /* If found in Ext buffer, ok.
7483 * If it failed and we expected to find it there (quad offset)
7484 * then return the error. Otherwise continue on.
7485 */
7486 err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
7487 addr, is_quad, quad, context_buffer,
7488 context_buffer_size, priv_offset);
7489 if ((err == 0) || ((err != 0) && is_quad)) {
7490 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7491 "err = %d, is_quad = %s",
7492 err, is_quad ? "true" : "false");
7493 return err;
7494 }
7495
7496 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
7497 (addr_type == CTXSW_ADDR_TYPE_BE)) {
7498 /* Find the offset in the FECS segment. */
7499 offset_to_segment = sys_priv_offset *
7500 ctxsw_prog_ucode_header_size_in_bytes();
7501
7502 err = gr_gk20a_process_context_buffer_priv_segment(g,
7503 addr_type, addr,
7504 0, 0, 0, 0,
7505 &offset);
7506 if (err != 0) {
7507 return err;
7508 }
7509
7510 *priv_offset = (offset_to_segment + offset);
7511 return 0;
7512 }
7513
7514 if ((gpc_num + 1) > num_gpcs) {
7515 nvgpu_err(g,
7516 "GPC %d not in this context buffer.",
7517 gpc_num);
7518 return -EINVAL;
7519 }
7520
7521 /* Parse the GPCCS local header(s).*/
7522 for (i = 0; i < num_gpcs; i++) {
7523 context += ctxsw_prog_ucode_header_size_in_bytes();
7524 if (!check_local_header_magic(context)) {
7525 nvgpu_err(g,
7526 "Invalid GPCCS local header: magic value");
7527 return -EINVAL;
7528
7529 }
7530 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7531 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7532
7533 err = gr_gk20a_determine_ppc_configuration(g, context,
7534 &num_ppcs, &ppc_mask,
7535 &reg_list_ppc_count);
7536 if (err != 0) {
7537 nvgpu_err(g, "determine ppc configuration failed");
7538 return err;
7539 }
7540
7541
7542 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7543
7544 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
7545 nvgpu_err(g,
7546 "GPC %d TPC %d not in this context buffer.",
7547 gpc_num, tpc_num);
7548 return -EINVAL;
7549 }
7550
7551 /* Find the offset in the GPCCS segment.*/
7552 if (i == gpc_num) {
7553 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7554 "gpc_priv_offset 0x%#08x",
7555 gpc_priv_offset);
7556 offset_to_segment = gpc_priv_offset *
7557 ctxsw_prog_ucode_header_size_in_bytes();
7558
7559 err = g->ops.gr.get_offset_in_gpccs_segment(g,
7560 addr_type,
7561 num_tpcs, num_ppcs, reg_list_ppc_count,
7562 &offset_in_segment);
7563 if (err != 0) {
7564 return -EINVAL;
7565 }
7566
7567 offset_to_segment += offset_in_segment;
7568 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7569 "offset_to_segment 0x%#08x",
7570 offset_to_segment);
7571
7572 err = gr_gk20a_process_context_buffer_priv_segment(g,
7573 addr_type, addr,
7574 i, num_tpcs,
7575 num_ppcs, ppc_mask,
7576 &offset);
7577 if (err != 0) {
7578 return -EINVAL;
7579 }
7580
7581 *priv_offset = offset_to_segment + offset;
7582 return 0;
7583 }
7584 }
7585
7586 return -EINVAL;
7587}
7588
7589static int map_cmp(const void *a, const void *b)
7590{
7591 struct ctxsw_buf_offset_map_entry *e1 =
7592 (struct ctxsw_buf_offset_map_entry *)a;
7593 struct ctxsw_buf_offset_map_entry *e2 =
7594 (struct ctxsw_buf_offset_map_entry *)b;
7595
7596 if (e1->addr < e2->addr) {
7597 return -1;
7598 }
7599
7600 if (e1->addr > e2->addr) {
7601 return 1;
7602 }
7603 return 0;
7604}
7605
7606static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry *map,
7607 struct aiv_list_gk20a *regs,
7608 u32 *count, u32 *offset,
7609 u32 max_cnt, u32 base, u32 mask)
7610{
7611 u32 idx;
7612 u32 cnt = *count;
7613 u32 off = *offset;
7614
7615 if ((cnt + regs->count) > max_cnt) {
7616 return -EINVAL;
7617 }
7618
7619 for (idx = 0; idx < regs->count; idx++) {
7620 if ((base + (regs->l[idx].addr & mask)) < 0xFFF) {
7621 map[cnt].addr = base + (regs->l[idx].addr & mask)
7622 + NV_PCFG_BASE;
7623 } else {
7624 map[cnt].addr = base + (regs->l[idx].addr & mask);
7625 }
7626 map[cnt++].offset = off;
7627 off += 4;
7628 }
7629 *count = cnt;
7630 *offset = off;
7631 return 0;
7632}
7633
7634static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
7635 struct ctxsw_buf_offset_map_entry *map,
7636 struct aiv_list_gk20a *regs,
7637 u32 *count, u32 *offset,
7638 u32 max_cnt, u32 base, u32 mask)
7639{
7640 u32 idx;
7641 u32 cnt = *count;
7642 u32 off = *offset;
7643
7644 if ((cnt + regs->count) > max_cnt) {
7645 return -EINVAL;
7646 }
7647
7648 /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
7649 * To handle the case of PPC registers getting added into GPC, the below
7650 * code specifically checks for any PPC offsets and adds them using
7651 * proper mask
7652 */
7653 for (idx = 0; idx < regs->count; idx++) {
7654 /* Check if the address is PPC address */
7655 if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
7656 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
7657 GPU_LIT_PPC_IN_GPC_BASE);
7658 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
7659 GPU_LIT_PPC_IN_GPC_STRIDE);
7660 /* Use PPC mask instead of the GPC mask provided */
7661 u32 ppcmask = ppc_in_gpc_stride - 1;
7662
7663 map[cnt].addr = base + ppc_in_gpc_base
7664 + (regs->l[idx].addr & ppcmask);
7665 } else {
7666 map[cnt].addr = base + (regs->l[idx].addr & mask);
7667 }
7668 map[cnt++].offset = off;
7669 off += 4;
7670 }
7671 *count = cnt;
7672 *offset = off;
7673 return 0;
7674}
7675
7676static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
7677 struct aiv_list_gk20a *regs,
7678 u32 *count, u32 *offset,
7679 u32 max_cnt, u32 base, u32 mask)
7680{
7681 u32 idx;
7682 u32 cnt = *count;
7683 u32 off = *offset;
7684
7685 if ((cnt + regs->count) > max_cnt) {
7686 return -EINVAL;
7687 }
7688
7689 for (idx = 0; idx < regs->count; idx++) {
7690 map[cnt].addr = base + (regs->l[idx].addr & mask);
7691 map[cnt++].offset = off;
7692 off += 4;
7693 }
7694 *count = cnt;
7695 *offset = off;
7696 return 0;
7697}
7698
7699/* Helper function to add register entries to the register map for all
7700 * subunits
7701 */
7702static int add_ctxsw_buffer_map_entries_subunits(
7703 struct ctxsw_buf_offset_map_entry *map,
7704 struct aiv_list_gk20a *regs,
7705 u32 *count, u32 *offset,
7706 u32 max_cnt, u32 base,
7707 u32 num_units, u32 stride, u32 mask)
7708{
7709 u32 unit;
7710 u32 idx;
7711 u32 cnt = *count;
7712 u32 off = *offset;
7713
7714 if ((cnt + (regs->count * num_units)) > max_cnt) {
7715 return -EINVAL;
7716 }
7717
7718 /* Data is interleaved for units in ctxsw buffer */
7719 for (idx = 0; idx < regs->count; idx++) {
7720 for (unit = 0; unit < num_units; unit++) {
7721 map[cnt].addr = base + (regs->l[idx].addr & mask) +
7722 (unit * stride);
7723 map[cnt++].offset = off;
7724 off += 4;
7725 }
7726 }
7727 *count = cnt;
7728 *offset = off;
7729 return 0;
7730}
7731
7732int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
7733 struct ctxsw_buf_offset_map_entry *map,
7734 struct aiv_list_gk20a *regs,
7735 u32 *count, u32 *offset,
7736 u32 max_cnt, u32 base,
7737 u32 num_fbpas, u32 stride, u32 mask)
7738{
7739 return add_ctxsw_buffer_map_entries_subunits(map, regs, count, offset,
7740 max_cnt, base, num_fbpas, stride, mask);
7741}
7742
7743static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7744 struct ctxsw_buf_offset_map_entry *map,
7745 u32 *count, u32 *offset, u32 max_cnt)
7746{
7747 u32 num_gpcs = g->gr.gpc_count;
7748 u32 num_ppcs, num_tpcs, gpc_num, base;
7749 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
7750 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7751 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
7752 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
7753 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7754 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7755
7756 for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
7757 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
7758 base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base;
7759 if (add_ctxsw_buffer_map_entries_subunits(map,
7760 &g->gr.ctx_vars.ctxsw_regs.pm_tpc,
7761 count, offset, max_cnt, base, num_tpcs,
7762 tpc_in_gpc_stride,
7763 (tpc_in_gpc_stride - 1))) {
7764 return -EINVAL;
7765 }
7766
7767 num_ppcs = g->gr.gpc_ppc_count[gpc_num];
7768 base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
7769 if (add_ctxsw_buffer_map_entries_subunits(map,
7770 &g->gr.ctx_vars.ctxsw_regs.pm_ppc,
7771 count, offset, max_cnt, base, num_ppcs,
7772 ppc_in_gpc_stride,
7773 (ppc_in_gpc_stride - 1))) {
7774 return -EINVAL;
7775 }
7776
7777 base = gpc_base + (gpc_stride * gpc_num);
7778 if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
7779 &g->gr.ctx_vars.ctxsw_regs.pm_gpc,
7780 count, offset, max_cnt, base,
7781 (gpc_stride - 1))) {
7782 return -EINVAL;
7783 }
7784
7785 base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
7786 if (add_ctxsw_buffer_map_entries(map,
7787 &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc,
7788 count, offset, max_cnt, base, ~0)) {
7789 return -EINVAL;
7790 }
7791
7792 base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num);
7793 if (add_ctxsw_buffer_map_entries(map,
7794 &g->gr.ctx_vars.ctxsw_regs.perf_gpc,
7795 count, offset, max_cnt, base, ~0)) {
7796 return -EINVAL;
7797 }
7798
7799 base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
7800 if (add_ctxsw_buffer_map_entries(map,
7801 &g->gr.ctx_vars.ctxsw_regs.gpc_router,
7802 count, offset, max_cnt, base, ~0)) {
7803 return -EINVAL;
7804 }
7805
7806 /* Counter Aggregation Unit, if available */
7807 if (g->gr.ctx_vars.ctxsw_regs.pm_cau.count) {
7808 base = gpc_base + (gpc_stride * gpc_num)
7809 + tpc_in_gpc_base;
7810 if (add_ctxsw_buffer_map_entries_subunits(map,
7811 &g->gr.ctx_vars.ctxsw_regs.pm_cau,
7812 count, offset, max_cnt, base, num_tpcs,
7813 tpc_in_gpc_stride,
7814 (tpc_in_gpc_stride - 1))) {
7815 return -EINVAL;
7816 }
7817 }
7818
7819 *offset = ALIGN(*offset, 256);
7820 }
7821 return 0;
7822}
7823
7824int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
7825 struct aiv_list_gk20a *regs,
7826 u32 *count, u32 *offset,
7827 u32 max_cnt, u32 base, u32 mask)
7828{
7829 return add_ctxsw_buffer_map_entries(map, regs,
7830 count, offset, max_cnt, base, mask);
7831}
7832
7833/*
7834 * PM CTXSW BUFFER LAYOUT :
7835 *|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE
7836 *| |
7837 *| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
7838 *|---------------------------------------------|
7839 *| |
7840 *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
7841 *|---------------------------------------------|
7842 *| |
7843 *| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
7844 *|---------------------------------------------|
7845 *| |
7846 *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
7847 *|---------------------------------------------|
7848 *| PADDING for 256 byte alignment |
7849 *|---------------------------------------------|<----256 byte aligned
7850 *| LIST_compressed_nv_perf_fbp_ctx_regs |
7851 *| |Space allocated: numRegs * n words (for n FB units)
7852 *|---------------------------------------------|
7853 *| LIST_compressed_nv_perf_fbprouter_ctx_regs |
7854 *| |Space allocated: numRegs * n words (for n FB units)
7855 *|---------------------------------------------|
7856 *| LIST_compressed_pm_fbpa_ctx_regs |
7857 *| |Space allocated: numRegs * n words (for n FB units)
7858 *|---------------------------------------------|
7859 *| LIST_compressed_pm_rop_ctx_regs |
7860 *|---------------------------------------------|
7861 *| LIST_compressed_pm_ltc_ctx_regs |
7862 *| LTC0 LTS0 |
7863 *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
7864 *| LTCn LTS0 |
7865 *| LTC0 LTS1 |
7866 *| LTC1 LTS1 |
7867 *| LTCn LTS1 |
7868 *| LTC0 LTSn |
7869 *| LTC1 LTSn |
7870 *| LTCn LTSn |
7871 *|---------------------------------------------|
7872 *| PADDING for 256 byte alignment |
7873 *|---------------------------------------------|<----256 byte aligned
7874 *| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate
7875 *| REG0 TPC1 | all the GPC/TPC register lists
7876 *| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
7877 *| LIST_pm_ctx_reg_TPC REG1 TPC0 |
7878 *| * numTpcs REG1 TPC1 |
7879 *| LIST_pm_ctx_reg_PPC REG1 TPCn |
7880 *| * numPpcs REGn TPC0 |
7881 *| LIST_pm_ctx_reg_GPC REGn TPC1 |
7882 *| List_pm_ctx_reg_uc_GPC REGn TPCn |
7883 *| LIST_nv_perf_ctx_reg_GPC |
7884 *| LIST_nv_perf_gpcrouter_ctx_reg |
7885 *| LIST_nv_perf_ctx_reg_CAU |
7886 *| ---- |--
7887 *| GPC1 . |
7888 *| . |<----
7889 *|---------------------------------------------|
7890 *= =
7891 *| GPCn |
7892 *= =
7893 *|---------------------------------------------|
7894 */
7895
7896static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7897{
7898 u32 hwpm_ctxsw_buffer_size = g->gr.ctx_vars.pm_ctxsw_image_size;
7899 u32 hwpm_ctxsw_reg_count_max;
7900 u32 map_size;
7901 u32 i, count = 0;
7902 u32 offset = 0;
7903 struct ctxsw_buf_offset_map_entry *map;
7904 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
7905 u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
7906 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
7907 u32 num_ltc = g->ops.gr.get_max_ltc_per_fbp(g) * g->gr.num_fbps;
7908
7909 if (hwpm_ctxsw_buffer_size == 0) {
7910 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7911 "no PM Ctxsw buffer memory in context buffer");
7912 return -EINVAL;
7913 }
7914
7915 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
7916 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
7917
7918 map = nvgpu_big_zalloc(g, map_size);
7919 if (map == NULL) {
7920 return -ENOMEM;
7921 }
7922
7923 /* Add entries from _LIST_pm_ctx_reg_SYS */
7924 if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
7925 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7926 goto cleanup;
7927 }
7928
7929 /* Add entries from _LIST_nv_perf_ctx_reg_SYS */
7930 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys,
7931 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7932 goto cleanup;
7933 }
7934
7935 /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
7936 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router,
7937 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7938 goto cleanup;
7939 }
7940
7941 /* Add entries from _LIST_nv_perf_pma_ctx_reg*/
7942 if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma,
7943 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7944 goto cleanup;
7945 }
7946
7947 offset = ALIGN(offset, 256);
7948
7949 /* Add entries from _LIST_nv_perf_fbp_ctx_regs */
7950 if (add_ctxsw_buffer_map_entries_subunits(map,
7951 &g->gr.ctx_vars.ctxsw_regs.fbp,
7952 &count, &offset,
7953 hwpm_ctxsw_reg_count_max, 0,
7954 g->gr.num_fbps,
7955 g->ops.gr.get_pmm_per_chiplet_offset(),
7956 ~0)) {
7957 goto cleanup;
7958 }
7959
7960 /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
7961 if (add_ctxsw_buffer_map_entries_subunits(map,
7962 &g->gr.ctx_vars.ctxsw_regs.fbp_router,
7963 &count, &offset,
7964 hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps,
7965 NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) {
7966 goto cleanup;
7967 }
7968
7969 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
7970 if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map,
7971 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
7972 &count, &offset,
7973 hwpm_ctxsw_reg_count_max, 0,
7974 num_fbpas, fbpa_stride, ~0)) {
7975 goto cleanup;
7976 }
7977
7978 /* Add entries from _LIST_nv_pm_rop_ctx_regs */
7979 if (add_ctxsw_buffer_map_entries(map,
7980 &g->gr.ctx_vars.ctxsw_regs.pm_rop,
7981 &count, &offset,
7982 hwpm_ctxsw_reg_count_max, 0, ~0)) {
7983 goto cleanup;
7984 }
7985
7986 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
7987 if (add_ctxsw_buffer_map_entries_subunits(map,
7988 &g->gr.ctx_vars.ctxsw_regs.pm_ltc,
7989 &count, &offset,
7990 hwpm_ctxsw_reg_count_max, 0,
7991 num_ltc, ltc_stride, ~0)) {
7992 goto cleanup;
7993 }
7994
7995 offset = ALIGN(offset, 256);
7996
7997 /* Add GPC entries */
7998 if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
7999 hwpm_ctxsw_reg_count_max)) {
8000 goto cleanup;
8001 }
8002
8003 if (offset > hwpm_ctxsw_buffer_size) {
8004 nvgpu_err(g, "offset > buffer size");
8005 goto cleanup;
8006 }
8007
8008 sort(map, count, sizeof(*map), map_cmp, NULL);
8009
8010 g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map = map;
8011 g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map_count = count;
8012
8013 nvgpu_log_info(g, "Reg Addr => HWPM Ctxt switch buffer offset");
8014
8015 for (i = 0; i < count; i++) {
8016 nvgpu_log_info(g, "%08x => %08x", map[i].addr, map[i].offset);
8017 }
8018
8019 return 0;
8020cleanup:
8021 nvgpu_err(g, "Failed to create HWPM buffer offset map");
8022 nvgpu_big_free(g, map);
8023 return -EINVAL;
8024}
8025
8026/*
8027 * This function will return the 32 bit offset for a priv register if it is
8028 * present in the PM context buffer.
8029 */
8030static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
8031 u32 addr,
8032 u32 *priv_offset)
8033{
8034 struct gr_gk20a *gr = &g->gr;
8035 int err = 0;
8036 u32 count;
8037 struct ctxsw_buf_offset_map_entry *map, *result, map_key;
8038
8039 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
8040
8041 /* Create map of pri address and pm offset if necessary */
8042 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) {
8043 err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g);
8044 if (err != 0) {
8045 return err;
8046 }
8047 }
8048
8049 *priv_offset = 0;
8050
8051 map = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map;
8052 count = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map_count;
8053
8054 map_key.addr = addr;
8055 result = bsearch(&map_key, map, count, sizeof(*map), map_cmp);
8056
8057 if (result) {
8058 *priv_offset = result->offset;
8059 } else {
8060 nvgpu_err(g, "Lookup failed for address 0x%x", addr);
8061 err = -EINVAL;
8062 }
8063 return err;
8064}
8065
8066bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
8067{
8068 int curr_gr_ctx;
8069 u32 curr_gr_tsgid;
8070 struct gk20a *g = ch->g;
8071 struct channel_gk20a *curr_ch;
8072 bool ret = false;
8073 struct tsg_gk20a *tsg;
8074
8075 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
8076
8077 /* when contexts are unloaded from GR, the valid bit is reset
8078 * but the instance pointer information remains intact. So the
8079 * valid bit must be checked to be absolutely certain that a
8080 * valid context is currently resident.
8081 */
8082 if (gr_fecs_current_ctx_valid_v(curr_gr_ctx) == 0U) {
8083 return NULL;
8084 }
8085
8086 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
8087 &curr_gr_tsgid);
8088
8089 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
8090 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
8091 " ch->chid=%d",
8092 (curr_ch != NULL) ? curr_ch->chid : U32_MAX,
8093 curr_gr_tsgid,
8094 ch->tsgid,
8095 ch->chid);
8096
8097 if (curr_ch == NULL) {
8098 return false;
8099 }
8100
8101 if (ch->chid == curr_ch->chid) {
8102 ret = true;
8103 }
8104
8105 tsg = tsg_gk20a_from_ch(ch);
8106 if ((tsg != NULL) && (tsg->tsgid == curr_gr_tsgid)) {
8107 ret = true;
8108 }
8109
8110 gk20a_channel_put(curr_ch);
8111 return ret;
8112}
8113
8114int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8115 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
8116 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
8117 bool ch_is_curr_ctx)
8118{
8119 struct gk20a *g = ch->g;
8120 struct tsg_gk20a *tsg;
8121 struct nvgpu_gr_ctx *gr_ctx;
8122 bool gr_ctx_ready = false;
8123 bool pm_ctx_ready = false;
8124 struct nvgpu_mem *current_mem = NULL;
8125 u32 i, j, offset, v;
8126 struct gr_gk20a *gr = &g->gr;
8127 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8128 u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count *
8129 sm_per_tpc;
8130 u32 *offsets = NULL;
8131 u32 *offset_addrs = NULL;
8132 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
8133 int err = 0, pass;
8134
8135 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
8136 num_ctx_wr_ops, num_ctx_rd_ops);
8137
8138 tsg = tsg_gk20a_from_ch(ch);
8139 if (tsg == NULL) {
8140 return -EINVAL;
8141 }
8142
8143 gr_ctx = &tsg->gr_ctx;
8144
8145 if (ch_is_curr_ctx) {
8146 for (pass = 0; pass < 2; pass++) {
8147 ctx_op_nr = 0;
8148 for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
8149 /* only do ctx ops and only on the right pass */
8150 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
8151 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
8152 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) {
8153 continue;
8154 }
8155
8156 /* if this is a quad access, setup for special access*/
8157 if ((ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD))
8158 && (g->ops.gr.access_smpc_reg != NULL)) {
8159 g->ops.gr.access_smpc_reg(g,
8160 ctx_ops[i].quad,
8161 ctx_ops[i].offset);
8162 }
8163 offset = ctx_ops[i].offset;
8164
8165 if (pass == 0) { /* write pass */
8166 v = gk20a_readl(g, offset);
8167 v &= ~ctx_ops[i].and_n_mask_lo;
8168 v |= ctx_ops[i].value_lo;
8169 gk20a_writel(g, offset, v);
8170
8171 nvgpu_log(g, gpu_dbg_gpu_dbg,
8172 "direct wr: offset=0x%x v=0x%x",
8173 offset, v);
8174
8175 if (ctx_ops[i].op == REGOP(WRITE_64)) {
8176 v = gk20a_readl(g, offset + 4);
8177 v &= ~ctx_ops[i].and_n_mask_hi;
8178 v |= ctx_ops[i].value_hi;
8179 gk20a_writel(g, offset + 4, v);
8180
8181 nvgpu_log(g, gpu_dbg_gpu_dbg,
8182 "direct wr: offset=0x%x v=0x%x",
8183 offset + 4, v);
8184 }
8185
8186 } else { /* read pass */
8187 ctx_ops[i].value_lo =
8188 gk20a_readl(g, offset);
8189
8190 nvgpu_log(g, gpu_dbg_gpu_dbg,
8191 "direct rd: offset=0x%x v=0x%x",
8192 offset, ctx_ops[i].value_lo);
8193
8194 if (ctx_ops[i].op == REGOP(READ_64)) {
8195 ctx_ops[i].value_hi =
8196 gk20a_readl(g, offset + 4);
8197
8198 nvgpu_log(g, gpu_dbg_gpu_dbg,
8199 "direct rd: offset=0x%x v=0x%x",
8200 offset, ctx_ops[i].value_lo);
8201 } else {
8202 ctx_ops[i].value_hi = 0;
8203 }
8204 }
8205 ctx_op_nr++;
8206 }
8207 }
8208 goto cleanup;
8209 }
8210
8211 /* they're the same size, so just use one alloc for both */
8212 offsets = nvgpu_kzalloc(g, 2 * sizeof(u32) * max_offsets);
8213 if (offsets == NULL) {
8214 err = -ENOMEM;
8215 goto cleanup;
8216 }
8217 offset_addrs = offsets + max_offsets;
8218
8219 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
8220 if (err != 0) {
8221 goto cleanup;
8222 }
8223
8224 g->ops.mm.l2_flush(g, true);
8225
8226 /* write to appropriate place in context image,
8227 * first have to figure out where that really is */
8228
8229 /* first pass is writes, second reads */
8230 for (pass = 0; pass < 2; pass++) {
8231 ctx_op_nr = 0;
8232 for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
8233 u32 num_offsets;
8234
8235 /* only do ctx ops and only on the right pass */
8236 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
8237 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
8238 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) {
8239 continue;
8240 }
8241
8242 err = gr_gk20a_get_ctx_buffer_offsets(g,
8243 ctx_ops[i].offset,
8244 max_offsets,
8245 offsets, offset_addrs,
8246 &num_offsets,
8247 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
8248 ctx_ops[i].quad);
8249 if (err == 0) {
8250 if (!gr_ctx_ready) {
8251 gr_ctx_ready = true;
8252 }
8253 current_mem = &gr_ctx->mem;
8254 } else {
8255 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
8256 ctx_ops[i].offset,
8257 max_offsets,
8258 offsets, offset_addrs,
8259 &num_offsets);
8260 if (err != 0) {
8261 nvgpu_log(g, gpu_dbg_gpu_dbg,
8262 "ctx op invalid offset: offset=0x%x",
8263 ctx_ops[i].offset);
8264 ctx_ops[i].status =
8265 REGOP(STATUS_INVALID_OFFSET);
8266 continue;
8267 }
8268 if (!pm_ctx_ready) {
8269 /* Make sure ctx buffer was initialized */
8270 if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) {
8271 nvgpu_err(g,
8272 "Invalid ctx buffer");
8273 err = -EINVAL;
8274 goto cleanup;
8275 }
8276 pm_ctx_ready = true;
8277 }
8278 current_mem = &gr_ctx->pm_ctx.mem;
8279 }
8280
8281 /* if this is a quad access, setup for special access*/
8282 if ((ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)) &&
8283 (g->ops.gr.access_smpc_reg != NULL)) {
8284 g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
8285 ctx_ops[i].offset);
8286 }
8287
8288 for (j = 0; j < num_offsets; j++) {
8289 /* sanity check gr ctxt offsets,
8290 * don't write outside, worst case
8291 */
8292 if ((current_mem == &gr_ctx->mem) &&
8293 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) {
8294 continue;
8295 }
8296 if (pass == 0) { /* write pass */
8297 v = nvgpu_mem_rd(g, current_mem, offsets[j]);
8298 v &= ~ctx_ops[i].and_n_mask_lo;
8299 v |= ctx_ops[i].value_lo;
8300 nvgpu_mem_wr(g, current_mem, offsets[j], v);
8301
8302 nvgpu_log(g, gpu_dbg_gpu_dbg,
8303 "context wr: offset=0x%x v=0x%x",
8304 offsets[j], v);
8305
8306 if (ctx_ops[i].op == REGOP(WRITE_64)) {
8307 v = nvgpu_mem_rd(g, current_mem, offsets[j] + 4);
8308 v &= ~ctx_ops[i].and_n_mask_hi;
8309 v |= ctx_ops[i].value_hi;
8310 nvgpu_mem_wr(g, current_mem, offsets[j] + 4, v);
8311
8312 nvgpu_log(g, gpu_dbg_gpu_dbg,
8313 "context wr: offset=0x%x v=0x%x",
8314 offsets[j] + 4, v);
8315 }
8316
8317 /* check to see if we need to add a special WAR
8318 for some of the SMPC perf regs */
8319 gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j],
8320 v, current_mem);
8321
8322 } else { /* read pass */
8323 ctx_ops[i].value_lo =
8324 nvgpu_mem_rd(g, current_mem, offsets[0]);
8325
8326 nvgpu_log(g, gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
8327 offsets[0], ctx_ops[i].value_lo);
8328
8329 if (ctx_ops[i].op == REGOP(READ_64)) {
8330 ctx_ops[i].value_hi =
8331 nvgpu_mem_rd(g, current_mem, offsets[0] + 4);
8332
8333 nvgpu_log(g, gpu_dbg_gpu_dbg,
8334 "context rd: offset=0x%x v=0x%x",
8335 offsets[0] + 4, ctx_ops[i].value_hi);
8336 } else {
8337 ctx_ops[i].value_hi = 0;
8338 }
8339 }
8340 }
8341 ctx_op_nr++;
8342 }
8343 }
8344
8345 cleanup:
8346 if (offsets) {
8347 nvgpu_kfree(g, offsets);
8348 }
8349
8350 if (gr_ctx->patch_ctx.mem.cpu_va) {
8351 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
8352 }
8353
8354 return err;
8355}
8356
8357int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8358 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
8359 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
8360 bool *is_curr_ctx)
8361{
8362 struct gk20a *g = ch->g;
8363 int err, tmp_err;
8364 bool ch_is_curr_ctx;
8365
8366 /* disable channel switching.
8367 * at that point the hardware state can be inspected to
8368 * determine if the context we're interested in is current.
8369 */
8370 err = gr_gk20a_disable_ctxsw(g);
8371 if (err != 0) {
8372 nvgpu_err(g, "unable to stop gr ctxsw");
8373 /* this should probably be ctx-fatal... */
8374 return err;
8375 }
8376
8377 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8378 if (is_curr_ctx != NULL) {
8379 *is_curr_ctx = ch_is_curr_ctx;
8380 }
8381 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
8382 ch_is_curr_ctx);
8383
8384 err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
8385 num_ctx_rd_ops, ch_is_curr_ctx);
8386
8387 tmp_err = gr_gk20a_enable_ctxsw(g);
8388 if (tmp_err) {
8389 nvgpu_err(g, "unable to restart ctxsw!");
8390 err = tmp_err;
8391 }
8392
8393 return err;
8394}
8395
8396void gr_gk20a_commit_global_pagepool(struct gk20a *g,
8397 struct nvgpu_gr_ctx *gr_ctx,
8398 u64 addr, u32 size, bool patch)
8399{
8400 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
8401 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
8402
8403 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
8404 gr_scc_pagepool_total_pages_f(size) |
8405 gr_scc_pagepool_valid_true_f(), patch);
8406
8407 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
8408 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
8409
8410 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
8411 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
8412
8413 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(),
8414 gr_pd_pagepool_total_pages_f(size) |
8415 gr_pd_pagepool_valid_true_f(), patch);
8416}
8417
8418void gk20a_init_gr(struct gk20a *g)
8419{
8420 nvgpu_cond_init(&g->gr.init_wq);
8421}
8422
8423int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
8424 u32 global_esr_mask, bool check_errors)
8425{
8426 bool locked_down;
8427 bool no_error_pending;
8428 u32 delay = GR_IDLE_CHECK_DEFAULT;
8429 bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
8430 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8431 u32 dbgr_status0 = 0, dbgr_control0 = 0;
8432 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
8433 struct nvgpu_timeout timeout;
8434 u32 warp_esr;
8435
8436 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
8437 "GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
8438
8439 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
8440 NVGPU_TIMER_CPU_TIMER);
8441
8442 /* wait for the sm to lock down */
8443 do {
8444 u32 global_esr = g->ops.gr.get_sm_hww_global_esr(g,
8445 gpc, tpc, sm);
8446 dbgr_status0 = gk20a_readl(g,
8447 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
8448
8449 warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
8450
8451 locked_down =
8452 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
8453 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
8454 no_error_pending =
8455 check_errors &&
8456 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
8457 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
8458 ((global_esr & ~global_esr_mask) == 0);
8459
8460 if (locked_down || no_error_pending) {
8461 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
8462 "GPC%d TPC%d SM%d: locked down SM",
8463 gpc, tpc, sm);
8464 return 0;
8465 }
8466
8467 /* if an mmu fault is pending and mmu debug mode is not
8468 * enabled, the sm will never lock down. */
8469 if (!mmu_debug_mode_enabled &&
8470 (g->ops.mm.mmu_fault_pending(g))) {
8471 nvgpu_err(g,
8472 "GPC%d TPC%d: mmu fault pending,"
8473 " SM%d will never lock down!", gpc, tpc, sm);
8474 return -EFAULT;
8475 }
8476
8477 nvgpu_usleep_range(delay, delay * 2);
8478 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
8479 } while (nvgpu_timeout_expired(&timeout) == 0);
8480
8481 dbgr_control0 = gk20a_readl(g,
8482 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
8483
8484 /* 64 bit read */
8485 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_1_r() + offset) << 32;
8486 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + offset);
8487
8488 /* 64 bit read */
8489 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r() + offset) << 32;
8490 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + offset);
8491
8492 /* 64 bit read */
8493 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r() + offset) << 32;
8494 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + offset);
8495
8496 nvgpu_err(g,
8497 "GPC%d TPC%d: timed out while trying to lock down SM", gpc, tpc);
8498 nvgpu_err(g,
8499 "STATUS0(0x%x)=0x%x CONTROL0=0x%x VALID_MASK=0x%llx PAUSE_MASK=0x%llx TRAP_MASK=0x%llx",
8500 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset, dbgr_status0, dbgr_control0,
8501 warps_valid, warps_paused, warps_trapped);
8502
8503 return -ETIMEDOUT;
8504}
8505
8506void gk20a_gr_suspend_single_sm(struct gk20a *g,
8507 u32 gpc, u32 tpc, u32 sm,
8508 u32 global_esr_mask, bool check_errors)
8509{
8510 int err;
8511 u32 dbgr_control0;
8512 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8513
8514 /* if an SM debugger isn't attached, skip suspend */
8515 if (!g->ops.gr.sm_debugger_attached(g)) {
8516 nvgpu_err(g,
8517 "SM debugger not attached, skipping suspend!");
8518 return;
8519 }
8520
8521 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
8522 "suspending gpc:%d, tpc:%d, sm%d", gpc, tpc, sm);
8523
8524 /* assert stop trigger. */
8525 dbgr_control0 = gk20a_readl(g,
8526 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
8527 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8528 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
8529 dbgr_control0);
8530
8531 err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
8532 global_esr_mask, check_errors);
8533 if (err != 0) {
8534 nvgpu_err(g,
8535 "SuspendSm failed");
8536 return;
8537 }
8538}
8539
8540void gk20a_gr_suspend_all_sms(struct gk20a *g,
8541 u32 global_esr_mask, bool check_errors)
8542{
8543 struct gr_gk20a *gr = &g->gr;
8544 u32 gpc, tpc, sm;
8545 int err;
8546 u32 dbgr_control0;
8547 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8548
8549 /* if an SM debugger isn't attached, skip suspend */
8550 if (!g->ops.gr.sm_debugger_attached(g)) {
8551 nvgpu_err(g,
8552 "SM debugger not attached, skipping suspend!");
8553 return;
8554 }
8555
8556 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "suspending all sms");
8557 /* assert stop trigger. uniformity assumption: all SMs will have
8558 * the same state in dbg_control0.
8559 */
8560 dbgr_control0 =
8561 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
8562 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8563
8564 /* broadcast write */
8565 gk20a_writel(g,
8566 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8567
8568 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
8569 for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
8570 for (sm = 0; sm < sm_per_tpc; sm++) {
8571 err = g->ops.gr.wait_for_sm_lock_down(g,
8572 gpc, tpc, sm,
8573 global_esr_mask, check_errors);
8574 if (err != 0) {
8575 nvgpu_err(g, "SuspendAllSms failed");
8576 return;
8577 }
8578 }
8579 }
8580 }
8581}
8582
8583void gk20a_gr_resume_single_sm(struct gk20a *g,
8584 u32 gpc, u32 tpc, u32 sm)
8585{
8586 u32 dbgr_control0;
8587 u32 offset;
8588 /*
8589 * The following requires some clarification. Despite the fact that both
8590 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
8591 * names, only one is actually a trigger, and that is the STOP_TRIGGER.
8592 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
8593 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
8594 * (_DISABLE) as well.
8595
8596 * Advice from the arch group: Disable the stop trigger first, as a
8597 * separate operation, in order to ensure that the trigger has taken
8598 * effect, before enabling the run trigger.
8599 */
8600
8601 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8602
8603 /*De-assert stop trigger */
8604 dbgr_control0 =
8605 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
8606 dbgr_control0 = set_field(dbgr_control0,
8607 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(),
8608 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f());
8609 gk20a_writel(g,
8610 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
8611
8612 /* Run trigger */
8613 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f();
8614 gk20a_writel(g,
8615 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
8616}
8617
8618void gk20a_gr_resume_all_sms(struct gk20a *g)
8619{
8620 u32 dbgr_control0;
8621 /*
8622 * The following requires some clarification. Despite the fact that both
8623 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
8624 * names, only one is actually a trigger, and that is the STOP_TRIGGER.
8625 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
8626 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
8627 * (_DISABLE) as well.
8628
8629 * Advice from the arch group: Disable the stop trigger first, as a
8630 * separate operation, in order to ensure that the trigger has taken
8631 * effect, before enabling the run trigger.
8632 */
8633
8634 /*De-assert stop trigger */
8635 dbgr_control0 =
8636 gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r());
8637 dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8638 gk20a_writel(g,
8639 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8640
8641 /* Run trigger */
8642 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f();
8643 gk20a_writel(g,
8644 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8645}
8646
8647int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
8648 struct channel_gk20a *ch, u64 sms, bool enable)
8649{
8650 struct nvgpu_dbg_reg_op *ops;
8651 unsigned int i = 0, sm_id;
8652 int err;
8653 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
8654 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
8655
8656 ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops));
8657 if (ops == NULL) {
8658 return -ENOMEM;
8659 }
8660 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
8661 int gpc, tpc;
8662 u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val;
8663
8664 if ((sms & BIT64(sm_id)) == 0ULL) {
8665 continue;
8666 }
8667
8668 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8669 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8670
8671 tpc_offset = tpc_in_gpc_stride * tpc;
8672 gpc_offset = gpc_stride * gpc;
8673 reg_offset = tpc_offset + gpc_offset;
8674
8675 ops[i].op = REGOP(WRITE_32);
8676 ops[i].type = REGOP(TYPE_GR_CTX);
8677 ops[i].offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
8678
8679 reg_mask = 0;
8680 reg_val = 0;
8681 if (enable) {
8682 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m();
8683 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_f();
8684 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m();
8685 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f();
8686 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m();
8687 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f();
8688 } else {
8689 reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m();
8690 reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_f();
8691 }
8692
8693 ops[i].and_n_mask_lo = reg_mask;
8694 ops[i].value_lo = reg_val;
8695 i++;
8696 }
8697
8698 err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0, NULL);
8699 if (err != 0) {
8700 nvgpu_err(g, "Failed to access register");
8701 }
8702 nvgpu_kfree(g, ops);
8703 return err;
8704}
8705
8706/*
8707 * gr_gk20a_suspend_context()
8708 * This API should be called with dbg_session lock held
8709 * and ctxsw disabled
8710 * Returns bool value indicating if context was resident
8711 * or not
8712 */
8713bool gr_gk20a_suspend_context(struct channel_gk20a *ch)
8714{
8715 struct gk20a *g = ch->g;
8716 bool ctx_resident = false;
8717
8718 if (gk20a_is_channel_ctx_resident(ch)) {
8719 g->ops.gr.suspend_all_sms(g, 0, false);
8720 ctx_resident = true;
8721 } else {
8722 gk20a_disable_channel_tsg(g, ch);
8723 }
8724
8725 return ctx_resident;
8726}
8727
8728bool gr_gk20a_resume_context(struct channel_gk20a *ch)
8729{
8730 struct gk20a *g = ch->g;
8731 bool ctx_resident = false;
8732
8733 if (gk20a_is_channel_ctx_resident(ch)) {
8734 g->ops.gr.resume_all_sms(g);
8735 ctx_resident = true;
8736 } else {
8737 gk20a_enable_channel_tsg(g, ch);
8738 }
8739
8740 return ctx_resident;
8741}
8742
8743int gr_gk20a_suspend_contexts(struct gk20a *g,
8744 struct dbg_session_gk20a *dbg_s,
8745 int *ctx_resident_ch_fd)
8746{
8747 int local_ctx_resident_ch_fd = -1;
8748 bool ctx_resident;
8749 struct channel_gk20a *ch;
8750 struct dbg_session_channel_data *ch_data;
8751 int err = 0;
8752
8753 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
8754
8755 err = gr_gk20a_disable_ctxsw(g);
8756 if (err != 0) {
8757 nvgpu_err(g, "unable to stop gr ctxsw");
8758 goto clean_up;
8759 }
8760
8761 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
8762
8763 nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
8764 dbg_session_channel_data, ch_entry) {
8765 ch = g->fifo.channel + ch_data->chid;
8766
8767 ctx_resident = gr_gk20a_suspend_context(ch);
8768 if (ctx_resident) {
8769 local_ctx_resident_ch_fd = ch_data->channel_fd;
8770 }
8771 }
8772
8773 nvgpu_mutex_release(&dbg_s->ch_list_lock);
8774
8775 err = gr_gk20a_enable_ctxsw(g);
8776 if (err != 0) {
8777 nvgpu_err(g, "unable to restart ctxsw!");
8778 }
8779
8780 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
8781
8782clean_up:
8783 nvgpu_mutex_release(&g->dbg_sessions_lock);
8784
8785 return err;
8786}
8787
8788int gr_gk20a_resume_contexts(struct gk20a *g,
8789 struct dbg_session_gk20a *dbg_s,
8790 int *ctx_resident_ch_fd)
8791{
8792 int local_ctx_resident_ch_fd = -1;
8793 bool ctx_resident;
8794 struct channel_gk20a *ch;
8795 int err = 0;
8796 struct dbg_session_channel_data *ch_data;
8797
8798 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
8799
8800 err = gr_gk20a_disable_ctxsw(g);
8801 if (err != 0) {
8802 nvgpu_err(g, "unable to stop gr ctxsw");
8803 goto clean_up;
8804 }
8805
8806 nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
8807 dbg_session_channel_data, ch_entry) {
8808 ch = g->fifo.channel + ch_data->chid;
8809
8810 ctx_resident = gr_gk20a_resume_context(ch);
8811 if (ctx_resident) {
8812 local_ctx_resident_ch_fd = ch_data->channel_fd;
8813 }
8814 }
8815
8816 err = gr_gk20a_enable_ctxsw(g);
8817 if (err != 0) {
8818 nvgpu_err(g, "unable to restart ctxsw!");
8819 }
8820
8821 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
8822
8823clean_up:
8824 nvgpu_mutex_release(&g->dbg_sessions_lock);
8825
8826 return err;
8827}
8828
8829int gr_gk20a_trigger_suspend(struct gk20a *g)
8830{
8831 int err = 0;
8832 u32 dbgr_control0;
8833
8834 /* assert stop trigger. uniformity assumption: all SMs will have
8835 * the same state in dbg_control0. */
8836 dbgr_control0 =
8837 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
8838 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
8839
8840 /* broadcast write */
8841 gk20a_writel(g,
8842 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
8843
8844 return err;
8845}
8846
8847int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state)
8848{
8849 int err = 0;
8850 struct gr_gk20a *gr = &g->gr;
8851 u32 gpc, tpc, sm, sm_id;
8852 u32 global_mask;
8853
8854 /* Wait for the SMs to reach full stop. This condition is:
8855 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
8856 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
8857 * masks.
8858 */
8859 global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g);
8860
8861 /* Lock down all SMs */
8862 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
8863
8864 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8865 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8866 sm = g->gr.sm_to_cluster[sm_id].sm_index;
8867
8868 err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
8869 global_mask, false);
8870 if (err != 0) {
8871 nvgpu_err(g, "sm did not lock down!");
8872 return err;
8873 }
8874 }
8875
8876 /* Read the warp status */
8877 g->ops.gr.bpt_reg_info(g, w_state);
8878
8879 return 0;
8880}
8881
8882int gr_gk20a_resume_from_pause(struct gk20a *g)
8883{
8884 int err = 0;
8885 u32 reg_val;
8886
8887 /* Clear the pause mask to tell the GPU we want to resume everyone */
8888 gk20a_writel(g,
8889 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
8890
8891 /* explicitly re-enable forwarding of SM interrupts upon any resume */
8892 reg_val = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
8893 reg_val |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
8894 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), reg_val);
8895
8896 /* Now resume all sms, write a 0 to the stop trigger
8897 * then a 1 to the run trigger */
8898 g->ops.gr.resume_all_sms(g);
8899
8900 return err;
8901}
8902
8903int gr_gk20a_clear_sm_errors(struct gk20a *g)
8904{
8905 int ret = 0;
8906 u32 gpc, tpc, sm;
8907 struct gr_gk20a *gr = &g->gr;
8908 u32 global_esr;
8909 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8910
8911 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
8912
8913 /* check if any tpc has an exception */
8914 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
8915
8916 for (sm = 0; sm < sm_per_tpc; sm++) {
8917 global_esr = g->ops.gr.get_sm_hww_global_esr(g,
8918 gpc, tpc, sm);
8919
8920 /* clearing hwws, also causes tpc and gpc
8921 * exceptions to be cleared
8922 */
8923 g->ops.gr.clear_sm_hww(g,
8924 gpc, tpc, sm, global_esr);
8925 }
8926 }
8927 }
8928
8929 return ret;
8930}
8931
8932u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)
8933{
8934 struct gr_gk20a *gr = &g->gr;
8935 u32 sm_id, tpc_exception_en = 0;
8936 u32 offset, regval, tpc_offset, gpc_offset;
8937 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
8938 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
8939
8940 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
8941
8942 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
8943 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
8944 offset = tpc_offset + gpc_offset;
8945
8946 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
8947 offset);
8948 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
8949 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
8950 }
8951
8952 return tpc_exception_en;
8953}
8954
8955u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
8956{
8957 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8958 u32 hww_warp_esr = gk20a_readl(g,
8959 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
8960 return hww_warp_esr;
8961}
8962
8963u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
8964{
8965 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
8966
8967 u32 hww_global_esr = gk20a_readl(g,
8968 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
8969
8970 return hww_global_esr;
8971}
8972
8973u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g)
8974{
8975 /*
8976 * These three interrupts don't require locking down the SM. They can
8977 * be handled by usermode clients as they aren't fatal. Additionally,
8978 * usermode clients may wish to allow some warps to execute while others
8979 * are at breakpoints, as opposed to fatal errors where all warps should
8980 * halt.
8981 */
8982 u32 global_esr_mask =
8983 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
8984 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
8985 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
8986
8987 return global_esr_mask;
8988}
8989
8990/* invalidate channel lookup tlb */
8991void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr)
8992{
8993 nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
8994 memset(gr->chid_tlb, 0,
8995 sizeof(struct gr_channel_map_tlb_entry) *
8996 GR_CHANNEL_MAP_TLB_SIZE);
8997 nvgpu_spinlock_release(&gr->ch_tlb_lock);
8998}
diff --git a/include/gk20a/gr_gk20a.h b/include/gk20a/gr_gk20a.h
new file mode 100644
index 0000000..08b81e8
--- /dev/null
+++ b/include/gk20a/gr_gk20a.h
@@ -0,0 +1,851 @@
1/*
2 * GK20A Graphics Engine
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef GR_GK20A_H
25#define GR_GK20A_H
26
27#include <nvgpu/types.h>
28
29#include "gr_ctx_gk20a.h"
30#include "mm_gk20a.h"
31#include <nvgpu/power_features/pg.h>
32
33#include <nvgpu/comptags.h>
34#include <nvgpu/cond.h>
35
36#define GR_IDLE_CHECK_DEFAULT 10 /* usec */
37#define GR_IDLE_CHECK_MAX 200 /* usec */
38#define GR_FECS_POLL_INTERVAL 5 /* usec */
39
40#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
41#define INVALID_MAX_WAYS 0xFFFFFFFF
42
43#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
44#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
45
46#define GK20A_GR_MAX_PES_PER_GPC 3
47
48#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */
49
50/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
51#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
52#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
53
54/*
55 * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
56 * of address and data pairs
57 */
58#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2
59#define PATCH_CTX_SLOTS_PER_PAGE \
60 (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32)))
61#define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32))
62
63#define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI (1 << 0)
64#define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP (1 << 1)
65
66#define NVGPU_PREEMPTION_MODE_COMPUTE_WFI (1 << 0)
67#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1)
68#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2)
69
70#define CTXSW_INTR0 BIT32(0)
71#define CTXSW_INTR1 BIT32(1)
72
73#define MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL 0x26
74
75struct tsg_gk20a;
76struct channel_gk20a;
77struct nvgpu_warpstate;
78
79enum ctxsw_addr_type;
80
81enum /* global_ctx_buffer */ {
82 CIRCULAR = 0,
83 PAGEPOOL = 1,
84 ATTRIBUTE = 2,
85 CIRCULAR_VPR = 3,
86 PAGEPOOL_VPR = 4,
87 ATTRIBUTE_VPR = 5,
88 GOLDEN_CTX = 6,
89 PRIV_ACCESS_MAP = 7,
90 /* #8 is reserved */
91 FECS_TRACE_BUFFER = 9,
92 NR_GLOBAL_CTX_BUF = 10
93};
94
95/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
96enum /*global_ctx_buffer_va */ {
97 CIRCULAR_VA = 0,
98 PAGEPOOL_VA = 1,
99 ATTRIBUTE_VA = 2,
100 GOLDEN_CTX_VA = 3,
101 PRIV_ACCESS_MAP_VA = 4,
102 /* #5 is reserved */
103 FECS_TRACE_BUFFER_VA = 6,
104 NR_GLOBAL_CTX_BUF_VA = 7
105};
106
107enum {
108 WAIT_UCODE_LOOP,
109 WAIT_UCODE_TIMEOUT,
110 WAIT_UCODE_ERROR,
111 WAIT_UCODE_OK
112};
113
114enum {
115 GR_IS_UCODE_OP_EQUAL,
116 GR_IS_UCODE_OP_NOT_EQUAL,
117 GR_IS_UCODE_OP_AND,
118 GR_IS_UCODE_OP_LESSER,
119 GR_IS_UCODE_OP_LESSER_EQUAL,
120 GR_IS_UCODE_OP_SKIP
121};
122
123enum {
124 eUcodeHandshakeInitComplete = 1,
125 eUcodeHandshakeMethodFinished
126};
127
128enum {
129 ELCG_MODE = (1 << 0),
130 BLCG_MODE = (1 << 1),
131 INVALID_MODE = (1 << 2)
132};
133
134enum {
135 NVGPU_EVENT_ID_BPT_INT = 0,
136 NVGPU_EVENT_ID_BPT_PAUSE,
137 NVGPU_EVENT_ID_BLOCKING_SYNC,
138 NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED,
139 NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE,
140 NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN,
141 NVGPU_EVENT_ID_MAX,
142};
143
144#ifndef GR_GO_IDLE_BUNDLE
145#define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */
146#endif
147
148struct gr_channel_map_tlb_entry {
149 u32 curr_ctx;
150 u32 chid;
151 u32 tsgid;
152};
153
154struct gr_zcull_gk20a {
155 u32 aliquot_width;
156 u32 aliquot_height;
157 u32 aliquot_size;
158 u32 total_aliquots;
159
160 u32 width_align_pixels;
161 u32 height_align_pixels;
162 u32 pixel_squares_by_aliquots;
163};
164
165struct gr_zcull_info {
166 u32 width_align_pixels;
167 u32 height_align_pixels;
168 u32 pixel_squares_by_aliquots;
169 u32 aliquot_total;
170 u32 region_byte_multiplier;
171 u32 region_header_size;
172 u32 subregion_header_size;
173 u32 subregion_width_align_pixels;
174 u32 subregion_height_align_pixels;
175 u32 subregion_count;
176};
177
178#define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */
179
180#define GK20A_STARTOF_ZBC_TABLE 1U /* index zero reserved to indicate "not ZBCd" */
181#define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */
182#define GK20A_ZBC_TABLE_SIZE (16 - 1)
183
184#define GK20A_ZBC_TYPE_INVALID 0
185#define GK20A_ZBC_TYPE_COLOR 1
186#define GK20A_ZBC_TYPE_DEPTH 2
187#define T19X_ZBC 3
188
189struct zbc_color_table {
190 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
191 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
192 u32 format;
193 u32 ref_cnt;
194};
195
196struct zbc_depth_table {
197 u32 depth;
198 u32 format;
199 u32 ref_cnt;
200};
201
202struct zbc_s_table {
203 u32 stencil;
204 u32 format;
205 u32 ref_cnt;
206};
207
208struct zbc_entry {
209 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
210 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
211 u32 depth;
212 u32 type; /* color or depth */
213 u32 format;
214};
215
216struct zbc_query_params {
217 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
218 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
219 u32 depth;
220 u32 ref_cnt;
221 u32 format;
222 u32 type; /* color or depth */
223 u32 index_size; /* [out] size, [in] index */
224};
225
226struct sm_info {
227 u32 gpc_index;
228 u32 tpc_index;
229 u32 sm_index;
230 u32 global_tpc_index;
231};
232
233#if defined(CONFIG_GK20A_CYCLE_STATS)
234struct gk20a_cs_snapshot_client;
235struct gk20a_cs_snapshot;
236#endif
237
238struct gr_gk20a_isr_data {
239 u32 addr;
240 u32 data_lo;
241 u32 data_hi;
242 u32 curr_ctx;
243 struct channel_gk20a *ch;
244 u32 offset;
245 u32 sub_chan;
246 u32 class_num;
247};
248
249struct gr_ctx_buffer_desc {
250 void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);
251 struct nvgpu_mem mem;
252 void *priv;
253};
254
255struct nvgpu_preemption_modes_rec {
256 u32 graphics_preemption_mode_flags; /* supported preemption modes */
257 u32 compute_preemption_mode_flags; /* supported preemption modes */
258
259 u32 default_graphics_preempt_mode; /* default mode */
260 u32 default_compute_preempt_mode; /* default mode */
261};
262
263struct gr_gk20a {
264 struct gk20a *g;
265 struct {
266 bool dynamic;
267
268 u32 buffer_size;
269 u32 buffer_total_size;
270
271 bool golden_image_initialized;
272 u32 golden_image_size;
273 u32 *local_golden_image;
274
275 u32 hwpm_ctxsw_buffer_offset_map_count;
276 struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;
277
278 u32 zcull_ctxsw_image_size;
279
280 u32 pm_ctxsw_image_size;
281
282 u32 buffer_header_size;
283
284 u32 priv_access_map_size;
285
286 u32 fecs_trace_buffer_size;
287
288 struct gr_ucode_gk20a ucode;
289
290 struct av_list_gk20a sw_bundle_init;
291 struct av_list_gk20a sw_method_init;
292 struct aiv_list_gk20a sw_ctx_load;
293 struct av_list_gk20a sw_non_ctx_load;
294 struct av_list_gk20a sw_veid_bundle_init;
295 struct av64_list_gk20a sw_bundle64_init;
296 struct {
297 struct aiv_list_gk20a sys;
298 struct aiv_list_gk20a gpc;
299 struct aiv_list_gk20a tpc;
300 struct aiv_list_gk20a zcull_gpc;
301 struct aiv_list_gk20a ppc;
302 struct aiv_list_gk20a pm_sys;
303 struct aiv_list_gk20a pm_gpc;
304 struct aiv_list_gk20a pm_tpc;
305 struct aiv_list_gk20a pm_ppc;
306 struct aiv_list_gk20a perf_sys;
307 struct aiv_list_gk20a perf_gpc;
308 struct aiv_list_gk20a fbp;
309 struct aiv_list_gk20a fbp_router;
310 struct aiv_list_gk20a gpc_router;
311 struct aiv_list_gk20a pm_ltc;
312 struct aiv_list_gk20a pm_fbpa;
313 struct aiv_list_gk20a perf_sys_router;
314 struct aiv_list_gk20a perf_pma;
315 struct aiv_list_gk20a pm_rop;
316 struct aiv_list_gk20a pm_ucgpc;
317 struct aiv_list_gk20a etpc;
318 struct aiv_list_gk20a pm_cau;
319 } ctxsw_regs;
320 u32 regs_base_index;
321 bool valid;
322
323 u32 preempt_image_size;
324 bool force_preemption_gfxp;
325 bool force_preemption_cilp;
326 bool dump_ctxsw_stats_on_channel_close;
327 } ctx_vars;
328
329 struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
330 struct nvgpu_mutex fecs_mutex; /* protect fecs method */
331
332#define GR_NETLIST_DYNAMIC -1
333#define GR_NETLIST_STATIC_A 'A'
334 int netlist;
335
336 struct nvgpu_cond init_wq;
337 int initialized;
338
339 u32 num_fbps;
340
341 u32 max_comptag_lines;
342 u32 compbit_backing_size;
343 u32 comptags_per_cacheline;
344 u32 slices_per_ltc;
345 u32 cacheline_size;
346 u32 gobs_per_comptagline_per_slice;
347
348 u32 max_gpc_count;
349 u32 max_fbps_count;
350 u32 max_tpc_per_gpc_count;
351 u32 max_zcull_per_gpc_count;
352 u32 max_tpc_count;
353
354 u32 sys_count;
355 u32 gpc_count;
356 u32 pe_count_per_gpc;
357 u32 ppc_count;
358 u32 *gpc_ppc_count;
359 u32 tpc_count;
360 u32 *gpc_tpc_count;
361 u32 *gpc_tpc_mask;
362 u32 zcb_count;
363 u32 *gpc_zcb_count;
364 u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
365 u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
366 u32 *gpc_skip_mask;
367
368 u32 bundle_cb_default_size;
369 u32 min_gpm_fifo_depth;
370 u32 bundle_cb_token_limit;
371 u32 attrib_cb_default_size;
372 u32 attrib_cb_size;
373 u32 attrib_cb_gfxp_default_size;
374 u32 attrib_cb_gfxp_size;
375 u32 alpha_cb_default_size;
376 u32 alpha_cb_size;
377 u32 timeslice_mode;
378 u32 czf_bypass;
379 u32 pd_max_batches;
380 u32 gfxp_wfi_timeout_count;
381 u32 gfxp_wfi_timeout_unit;
382
383 /*
384 * The deductible memory size for max_comptag_mem (in MBytes)
385 * Usually close to memory size that running system is taking
386 */
387 u32 comptag_mem_deduct;
388
389 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
390
391 u8 *map_tiles;
392 u32 map_tile_count;
393 u32 map_row_offset;
394
395 u32 max_comptag_mem; /* max memory size (MB) for comptag */
396 struct compbit_store_desc compbit_store;
397 struct gk20a_comptag_allocator comp_tags;
398
399 struct gr_zcull_gk20a zcull;
400
401 struct nvgpu_mutex zbc_lock;
402 struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
403 struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
404 struct zbc_s_table zbc_s_tbl[GK20A_ZBC_TABLE_SIZE];
405 s32 max_default_color_index;
406 s32 max_default_depth_index;
407 s32 max_default_s_index;
408
409 u32 max_used_color_index;
410 u32 max_used_depth_index;
411 u32 max_used_s_index;
412
413#define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */
414 struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
415 u32 channel_tlb_flush_index;
416 struct nvgpu_spinlock ch_tlb_lock;
417
418 void (*remove_support)(struct gr_gk20a *gr);
419 bool sw_ready;
420 bool skip_ucode_init;
421
422 struct nvgpu_preemption_modes_rec preemption_mode_rec;
423
424 u32 fecs_feature_override_ecc_val;
425
426 int cilp_preempt_pending_chid;
427
428 u32 fbp_en_mask;
429 u32 *fbp_rop_l2_en_mask;
430 u32 no_of_sm;
431 struct sm_info *sm_to_cluster;
432
433#if defined(CONFIG_GK20A_CYCLE_STATS)
434 struct nvgpu_mutex cs_lock;
435 struct gk20a_cs_snapshot *cs_data;
436#endif
437 u32 max_css_buffer_size;
438};
439
440void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
441
442/* contexts associated with a TSG */
443struct nvgpu_gr_ctx {
444 struct nvgpu_mem mem;
445
446 u32 graphics_preempt_mode;
447 u32 compute_preempt_mode;
448
449 struct nvgpu_mem preempt_ctxsw_buffer;
450 struct nvgpu_mem spill_ctxsw_buffer;
451 struct nvgpu_mem betacb_ctxsw_buffer;
452 struct nvgpu_mem pagepool_ctxsw_buffer;
453 u32 ctx_id;
454 bool ctx_id_valid;
455 bool cilp_preempt_pending;
456 bool boosted_ctx;
457 bool golden_img_loaded;
458
459#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
460 u64 virt_ctx;
461#endif
462
463 struct patch_desc patch_ctx;
464 struct zcull_ctx_desc zcull_ctx;
465 struct pm_ctx_desc pm_ctx;
466 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
467 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
468 int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
469 bool global_ctx_buffer_mapped;
470
471 u32 tsgid;
472};
473
474struct gk20a_ctxsw_ucode_segment {
475 u32 offset;
476 u32 size;
477};
478
479struct gk20a_ctxsw_ucode_segments {
480 u32 boot_entry;
481 u32 boot_imem_offset;
482 u32 boot_signature;
483 struct gk20a_ctxsw_ucode_segment boot;
484 struct gk20a_ctxsw_ucode_segment code;
485 struct gk20a_ctxsw_ucode_segment data;
486};
487
488/* sums over the ucode files as sequences of u32, computed to the
489 * boot_signature field in the structure above */
490
491/* T18X FECS remains same as T21X,
492 * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
493 * for T18X*/
494#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34
495#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5c
496#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5c
497#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78
498#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b
499#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f
500
501#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2
502#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5
503#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3
504#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877
505
506#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d
507#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10
508
509#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da
510
511struct gk20a_ctxsw_ucode_info {
512 u64 *p_va;
513 struct nvgpu_mem inst_blk_desc;
514 struct nvgpu_mem surface_desc;
515 struct gk20a_ctxsw_ucode_segments fecs;
516 struct gk20a_ctxsw_ucode_segments gpccs;
517};
518
519struct gk20a_ctxsw_bootloader_desc {
520 u32 start_offset;
521 u32 size;
522 u32 imem_offset;
523 u32 entry_point;
524};
525
526struct fecs_method_op_gk20a {
527 struct {
528 u32 addr;
529 u32 data;
530 } method;
531
532 struct {
533 u32 id;
534 u32 data;
535 u32 clr;
536 u32 *ret;
537 u32 ok;
538 u32 fail;
539 } mailbox;
540
541 struct {
542 u32 ok;
543 u32 fail;
544 } cond;
545
546};
547
548struct nvgpu_warpstate {
549 u64 valid_warps[2];
550 u64 trapped_warps[2];
551 u64 paused_warps[2];
552};
553
554struct gpu_ops;
555int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
556 struct channel_gk20a *c);
557void gk20a_init_gr(struct gk20a *g);
558int gk20a_init_gr_support(struct gk20a *g);
559int gk20a_enable_gr_hw(struct gk20a *g);
560int gk20a_gr_reset(struct gk20a *g);
561void gk20a_gr_wait_initialized(struct gk20a *g);
562
563int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
564
565int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
566
567int gk20a_gr_isr(struct gk20a *g);
568u32 gk20a_gr_nonstall_isr(struct gk20a *g);
569
570/* zcull */
571u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
572int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
573 struct channel_gk20a *c, u64 zcull_va, u32 mode);
574int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
575 struct gr_zcull_info *zcull_params);
576void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
577 u32 *zcull_map_tiles);
578/* zbc */
579int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
580 struct zbc_entry *zbc_val);
581int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
582 struct zbc_query_params *query_params);
583int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
584 struct zbc_entry *zbc_val);
585int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
586
587/* pmu */
588int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
589int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
590 struct nvgpu_mem *inst_block);
591int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
592
593void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);
594
595/* sm */
596bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
597u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g);
598
599#define gr_gk20a_elpg_protected_call(g, func) \
600 ({ \
601 int err = 0; \
602 if (g->support_pmu) {\
603 err = nvgpu_pg_elpg_disable(g);\
604 if (err != 0) {\
605 err = nvgpu_pg_elpg_enable(g); \
606 } \
607 } \
608 if (err == 0) { \
609 err = func; \
610 if (g->support_pmu) {\
611 (void)nvgpu_pg_elpg_enable(g); \
612 } \
613 } \
614 err; \
615 })
616
617int gk20a_gr_suspend(struct gk20a *g);
618
619struct nvgpu_dbg_reg_op;
620int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
621 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
622 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
623 bool *is_curr_ctx);
624int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
625 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
626 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
627 bool ch_is_curr_ctx);
628int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
629 u32 addr,
630 u32 max_offsets,
631 u32 *offsets, u32 *offset_addrs,
632 u32 *num_offsets,
633 bool is_quad, u32 quad);
634int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
635 u32 addr,
636 u32 max_offsets,
637 u32 *offsets, u32 *offset_addrs,
638 u32 *num_offsets);
639int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
640 struct channel_gk20a *c,
641 bool enable_smpc_ctxsw);
642int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
643 struct channel_gk20a *c,
644 u64 gpu_va,
645 u32 mode);
646
647struct nvgpu_gr_ctx;
648void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
649 u32 addr, u32 data, bool patch);
650int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
651 struct nvgpu_gr_ctx *ch_ctx,
652 bool update_patch_count);
653void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
654 struct nvgpu_gr_ctx *ch_ctx,
655 bool update_patch_count);
656void gr_gk20a_commit_global_pagepool(struct gk20a *g,
657 struct nvgpu_gr_ctx *ch_ctx,
658 u64 addr, u32 size, bool patch);
659void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
660void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
661int gr_gk20a_init_fs_state(struct gk20a *g);
662int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
663int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
664int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
665void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
666void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
667 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
668void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
669 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
670
671
672void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
673int gr_gk20a_disable_ctxsw(struct gk20a *g);
674int gr_gk20a_enable_ctxsw(struct gk20a *g);
675void gk20a_gr_resume_single_sm(struct gk20a *g,
676 u32 gpc, u32 tpc, u32 sm);
677void gk20a_gr_resume_all_sms(struct gk20a *g);
678void gk20a_gr_suspend_single_sm(struct gk20a *g,
679 u32 gpc, u32 tpc, u32 sm,
680 u32 global_esr_mask, bool check_errors);
681void gk20a_gr_suspend_all_sms(struct gk20a *g,
682 u32 global_esr_mask, bool check_errors);
683u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
684int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
685 struct channel_gk20a *ch, u64 sms, bool enable);
686bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
687int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
688 struct zbc_entry *color_val, u32 index);
689int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
690 struct zbc_entry *depth_val, u32 index);
691int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
692 struct zbc_entry *zbc_val);
693void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
694int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
695 u32 expect_delay);
696int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
697 bool *post_event, struct channel_gk20a *fault_ch,
698 u32 *hww_global_esr);
699int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
700 bool *post_event);
701int gr_gk20a_init_ctx_state(struct gk20a *g);
702int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
703 struct fecs_method_op_gk20a op,
704 bool sleepduringwait);
705int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
706 struct fecs_method_op_gk20a op,
707 bool sleepduringwait);
708int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
709 struct fecs_method_op_gk20a op);
710int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
711 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
712 u32 class, u32 padding);
713void gr_gk20a_free_gr_ctx(struct gk20a *g,
714 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
715int gr_gk20a_halt_pipe(struct gk20a *g);
716
717#if defined(CONFIG_GK20A_CYCLE_STATS)
718int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
719 u32 perfmon_id_count, /* in - number of perfmons*/
720 u32 *perfmon_id_start, /* out- index of first pm */
721 /* in/out - pointer to client data used in later */
722 struct gk20a_cs_snapshot_client *css_client);
723
724int gr_gk20a_css_detach(struct channel_gk20a *ch,
725 struct gk20a_cs_snapshot_client *css_client);
726int gr_gk20a_css_flush(struct channel_gk20a *ch,
727 struct gk20a_cs_snapshot_client *css_client);
728
729void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
730
731#else
732/* fake empty cleanup function if no cyclestats snapshots enabled */
733static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
734{
735 (void)g;
736}
737#endif
738
739void gr_gk20a_fecs_host_int_enable(struct gk20a *g);
740int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
741 struct gr_gk20a_isr_data *isr_data);
742int gk20a_gr_lock_down_sm(struct gk20a *g,
743 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
744 bool check_errors);
745int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
746 u32 global_esr_mask, bool check_errors);
747int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
748 u32 *mailbox_ret, u32 opc_success,
749 u32 mailbox_ok, u32 opc_fail,
750 u32 mailbox_fail, bool sleepduringwait);
751
752int gr_gk20a_get_ctx_id(struct gk20a *g,
753 struct channel_gk20a *c,
754 u32 *ctx_id);
755
756u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
757u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
758
759int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
760 u32 expect_delay);
761
762struct dbg_session_gk20a;
763
764bool gr_gk20a_suspend_context(struct channel_gk20a *ch);
765bool gr_gk20a_resume_context(struct channel_gk20a *ch);
766int gr_gk20a_suspend_contexts(struct gk20a *g,
767 struct dbg_session_gk20a *dbg_s,
768 int *ctx_resident_ch_fd);
769int gr_gk20a_resume_contexts(struct gk20a *g,
770 struct dbg_session_gk20a *dbg_s,
771 int *ctx_resident_ch_fd);
772void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
773void gk20a_gr_enable_exceptions(struct gk20a *g);
774int gr_gk20a_trigger_suspend(struct gk20a *g);
775int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state);
776int gr_gk20a_resume_from_pause(struct gk20a *g);
777int gr_gk20a_clear_sm_errors(struct gk20a *g);
778u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
779
780int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
781
782int gr_gk20a_init_sm_id_table(struct gk20a *g);
783
784int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
785
786void gr_gk20a_write_zcull_ptr(struct gk20a *g,
787 struct nvgpu_mem *mem, u64 gpu_va);
788
789void gr_gk20a_write_pm_ptr(struct gk20a *g,
790 struct nvgpu_mem *mem, u64 gpu_va);
791
792u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
793u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
794void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
795 u32 *esr_sm_sel);
796void gk20a_gr_init_ovr_sm_dsm_perf(void);
797void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
798 u32 **ovr_perf_regs);
799void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
800 struct nvgpu_mem *mem);
801u32 gr_gk20a_get_patch_slots(struct gk20a *g);
802int gk20a_gr_handle_notify_pending(struct gk20a *g,
803 struct gr_gk20a_isr_data *isr_data);
804
805int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
806int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
807 struct channel_gk20a *c);
808int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
809 struct channel_gk20a *c, bool patch);
810
811int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
812 struct channel_gk20a *c);
813u32 gk20a_init_sw_bundle(struct gk20a *g);
814int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
815int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
816 struct gr_gk20a_isr_data *isr_data);
817int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
818 struct ctxsw_buf_offset_map_entry *map,
819 struct aiv_list_gk20a *regs,
820 u32 *count, u32 *offset,
821 u32 max_cnt, u32 base,
822 u32 num_fbpas, u32 stride, u32 mask);
823int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
824 struct aiv_list_gk20a *regs,
825 u32 *count, u32 *offset,
826 u32 max_cnt, u32 base, u32 mask);
827int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
828 enum ctxsw_addr_type *addr_type,
829 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
830 u32 *broadcast_flags);
831int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
832 u32 gpc_num,
833 u32 *priv_addr_table, u32 *t);
834int gr_gk20a_create_priv_addr_table(struct gk20a *g,
835 u32 addr,
836 u32 *priv_addr_table,
837 u32 *num_registers);
838void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
839 u32 num_fbpas,
840 u32 *priv_addr_table, u32 *t);
841int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
842 enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs,
843 u32 reg_list_ppc_count, u32 *__offset_in_segment);
844
845void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
846 struct gr_ctx_buffer_desc *desc);
847int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
848 struct gr_ctx_buffer_desc *desc, size_t size);
849void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr);
850int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g);
851#endif /*__GR_GK20A_H__*/
diff --git a/include/gk20a/gr_pri_gk20a.h b/include/gk20a/gr_pri_gk20a.h
new file mode 100644
index 0000000..d832d90
--- /dev/null
+++ b/include/gk20a/gr_pri_gk20a.h
@@ -0,0 +1,261 @@
1/*
2 * GK20A Graphics Context Pri Register Addressing
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef GR_PRI_GK20A_H
25#define GR_PRI_GK20A_H
26
27/*
28 * These convenience macros are generally for use in the management/modificaiton
29 * of the context state store for gr/compute contexts.
30 */
31
32/*
33 * GPC pri addressing
34 */
35static inline u32 pri_gpccs_addr_width(void)
36{
37 return 15; /*from where?*/
38}
39static inline u32 pri_gpccs_addr_mask(u32 addr)
40{
41 return addr & ((1 << pri_gpccs_addr_width()) - 1);
42}
43static inline u32 pri_gpc_addr(struct gk20a *g, u32 addr, u32 gpc)
44{
45 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
46 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
47 return gpc_base + (gpc * gpc_stride) + addr;
48}
49static inline bool pri_is_gpc_addr_shared(struct gk20a *g, u32 addr)
50{
51 u32 gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_SHARED_BASE);
52 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
53 return (addr >= gpc_shared_base) &&
54 (addr < gpc_shared_base + gpc_stride);
55}
56static inline bool pri_is_gpc_addr(struct gk20a *g, u32 addr)
57{
58 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
59 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
60 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
61 return ((addr >= gpc_base) &&
62 (addr < gpc_base + num_gpcs * gpc_stride)) ||
63 pri_is_gpc_addr_shared(g, addr);
64}
65static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr)
66{
67 u32 i, start;
68 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
69 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
70 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
71 for (i = 0; i < num_gpcs; i++) {
72 start = gpc_base + (i * gpc_stride);
73 if ((addr >= start) && (addr < (start + gpc_stride)))
74 return i;
75 }
76 return 0;
77}
78
79/*
80 * PPC pri addressing
81 */
82static inline bool pri_is_ppc_addr_shared(struct gk20a *g, u32 addr)
83{
84 u32 ppc_in_gpc_shared_base = nvgpu_get_litter_value(g,
85 GPU_LIT_PPC_IN_GPC_SHARED_BASE);
86 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
87 GPU_LIT_PPC_IN_GPC_STRIDE);
88
89 return ((addr >= ppc_in_gpc_shared_base) &&
90 (addr < (ppc_in_gpc_shared_base + ppc_in_gpc_stride)));
91}
92
93static inline bool pri_is_ppc_addr(struct gk20a *g, u32 addr)
94{
95 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
96 GPU_LIT_PPC_IN_GPC_BASE);
97 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
98 GPU_LIT_NUM_PES_PER_GPC);
99 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
100 GPU_LIT_PPC_IN_GPC_STRIDE);
101
102 return ((addr >= ppc_in_gpc_base) &&
103 (addr < ppc_in_gpc_base + num_pes_per_gpc * ppc_in_gpc_stride))
104 || pri_is_ppc_addr_shared(g, addr);
105}
106
107/*
108 * TPC pri addressing
109 */
110static inline u32 pri_tpccs_addr_width(void)
111{
112 return 11; /* from where? */
113}
114static inline u32 pri_tpccs_addr_mask(u32 addr)
115{
116 return addr & ((1 << pri_tpccs_addr_width()) - 1);
117}
118static inline u32 pri_fbpa_addr_mask(struct gk20a *g, u32 addr)
119{
120 return addr & (nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE) - 1);
121}
122static inline u32 pri_tpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc)
123{
124 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
125 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
126 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
127 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
128 return gpc_base + (gpc * gpc_stride) +
129 tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) +
130 addr;
131}
132static inline bool pri_is_tpc_addr_shared(struct gk20a *g, u32 addr)
133{
134 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
135 u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
136 return (addr >= tpc_in_gpc_shared_base) &&
137 (addr < (tpc_in_gpc_shared_base +
138 tpc_in_gpc_stride));
139}
140static inline u32 pri_fbpa_addr(struct gk20a *g, u32 addr, u32 fbpa)
141{
142 return (nvgpu_get_litter_value(g, GPU_LIT_FBPA_BASE) + addr +
143 (fbpa * nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE)));
144}
145static inline bool pri_is_fbpa_addr_shared(struct gk20a *g, u32 addr)
146{
147 u32 fbpa_shared_base = nvgpu_get_litter_value(g, GPU_LIT_FBPA_SHARED_BASE);
148 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
149 return ((addr >= fbpa_shared_base) &&
150 (addr < (fbpa_shared_base + fbpa_stride)));
151}
152static inline bool pri_is_fbpa_addr(struct gk20a *g, u32 addr)
153{
154 u32 fbpa_base = nvgpu_get_litter_value(g, GPU_LIT_FBPA_BASE);
155 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
156 u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
157 return (((addr >= fbpa_base) &&
158 (addr < (fbpa_base + num_fbpas * fbpa_stride)))
159 || pri_is_fbpa_addr_shared(g, addr));
160}
161/*
162 * BE pri addressing
163 */
164static inline u32 pri_becs_addr_width(void)
165{
166 return 10;/* from where? */
167}
168static inline u32 pri_becs_addr_mask(u32 addr)
169{
170 return addr & ((1 << pri_becs_addr_width()) - 1);
171}
172static inline bool pri_is_be_addr_shared(struct gk20a *g, u32 addr)
173{
174 u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE);
175 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
176 return (addr >= rop_shared_base) &&
177 (addr < rop_shared_base + rop_stride);
178}
179static inline u32 pri_be_shared_addr(struct gk20a *g, u32 addr)
180{
181 u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE);
182 return rop_shared_base + pri_becs_addr_mask(addr);
183}
184static inline bool pri_is_be_addr(struct gk20a *g, u32 addr)
185{
186 u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE);
187 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
188 return ((addr >= rop_base) &&
189 (addr < rop_base + g->ltc_count * rop_stride)) ||
190 pri_is_be_addr_shared(g, addr);
191}
192
193static inline u32 pri_get_be_num(struct gk20a *g, u32 addr)
194{
195 u32 i, start;
196 u32 num_fbps = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS);
197 u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE);
198 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
199 for (i = 0; i < num_fbps; i++) {
200 start = rop_base + (i * rop_stride);
201 if ((addr >= start) && (addr < (start + rop_stride)))
202 return i;
203 }
204 return 0;
205}
206
207/*
208 * PPC pri addressing
209 */
210static inline u32 pri_ppccs_addr_width(void)
211{
212 return 9; /* from where? */
213}
214static inline u32 pri_ppccs_addr_mask(u32 addr)
215{
216 return addr & ((1 << pri_ppccs_addr_width()) - 1);
217}
218static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc)
219{
220 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
221 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
222 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
223 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
224 return gpc_base + (gpc * gpc_stride) +
225 ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr;
226}
227
228enum ctxsw_addr_type {
229 CTXSW_ADDR_TYPE_SYS = 0,
230 CTXSW_ADDR_TYPE_GPC = 1,
231 CTXSW_ADDR_TYPE_TPC = 2,
232 CTXSW_ADDR_TYPE_BE = 3,
233 CTXSW_ADDR_TYPE_PPC = 4,
234 CTXSW_ADDR_TYPE_LTCS = 5,
235 CTXSW_ADDR_TYPE_FBPA = 6,
236 CTXSW_ADDR_TYPE_EGPC = 7,
237 CTXSW_ADDR_TYPE_ETPC = 8,
238 CTXSW_ADDR_TYPE_ROP = 9,
239 CTXSW_ADDR_TYPE_FBP = 10,
240};
241
242#define PRI_BROADCAST_FLAGS_NONE 0U
243#define PRI_BROADCAST_FLAGS_GPC BIT32(0)
244#define PRI_BROADCAST_FLAGS_TPC BIT32(1)
245#define PRI_BROADCAST_FLAGS_BE BIT32(2)
246#define PRI_BROADCAST_FLAGS_PPC BIT32(3)
247#define PRI_BROADCAST_FLAGS_LTCS BIT32(4)
248#define PRI_BROADCAST_FLAGS_LTSS BIT32(5)
249#define PRI_BROADCAST_FLAGS_FBPA BIT32(6)
250#define PRI_BROADCAST_FLAGS_EGPC BIT32(7)
251#define PRI_BROADCAST_FLAGS_ETPC BIT32(8)
252#define PRI_BROADCAST_FLAGS_PMMGPC BIT32(9)
253#define PRI_BROADCAST_FLAGS_PMM_GPCS BIT32(10)
254#define PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCA BIT32(11)
255#define PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCB BIT32(12)
256#define PRI_BROADCAST_FLAGS_PMMFBP BIT32(13)
257#define PRI_BROADCAST_FLAGS_PMM_FBPS BIT32(14)
258#define PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC BIT32(15)
259#define PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP BIT32(16)
260
261#endif /* GR_PRI_GK20A_H */
diff --git a/include/gk20a/hw_bus_gk20a.h b/include/gk20a/hw_bus_gk20a.h
new file mode 100644
index 0000000..d3bb9e9
--- /dev/null
+++ b/include/gk20a/hw_bus_gk20a.h
@@ -0,0 +1,171 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_bus_gk20a_h_
57#define _hw_bus_gk20a_h_
58
59static inline u32 bus_bar0_window_r(void)
60{
61 return 0x00001700U;
62}
63static inline u32 bus_bar0_window_base_f(u32 v)
64{
65 return (v & 0xffffffU) << 0U;
66}
67static inline u32 bus_bar0_window_target_vid_mem_f(void)
68{
69 return 0x0U;
70}
71static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void)
72{
73 return 0x2000000U;
74}
75static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void)
76{
77 return 0x3000000U;
78}
79static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void)
80{
81 return 0x00000010U;
82}
83static inline u32 bus_bar1_block_r(void)
84{
85 return 0x00001704U;
86}
87static inline u32 bus_bar1_block_ptr_f(u32 v)
88{
89 return (v & 0xfffffffU) << 0U;
90}
91static inline u32 bus_bar1_block_target_vid_mem_f(void)
92{
93 return 0x0U;
94}
95static inline u32 bus_bar1_block_target_sys_mem_coh_f(void)
96{
97 return 0x20000000U;
98}
99static inline u32 bus_bar1_block_target_sys_mem_ncoh_f(void)
100{
101 return 0x30000000U;
102}
103static inline u32 bus_bar1_block_mode_virtual_f(void)
104{
105 return 0x80000000U;
106}
107static inline u32 bus_bar2_block_r(void)
108{
109 return 0x00001714U;
110}
111static inline u32 bus_bar2_block_ptr_f(u32 v)
112{
113 return (v & 0xfffffffU) << 0U;
114}
115static inline u32 bus_bar2_block_target_vid_mem_f(void)
116{
117 return 0x0U;
118}
119static inline u32 bus_bar2_block_target_sys_mem_coh_f(void)
120{
121 return 0x20000000U;
122}
123static inline u32 bus_bar2_block_target_sys_mem_ncoh_f(void)
124{
125 return 0x30000000U;
126}
127static inline u32 bus_bar2_block_mode_virtual_f(void)
128{
129 return 0x80000000U;
130}
131static inline u32 bus_bar1_block_ptr_shift_v(void)
132{
133 return 0x0000000cU;
134}
135static inline u32 bus_bar2_block_ptr_shift_v(void)
136{
137 return 0x0000000cU;
138}
139static inline u32 bus_intr_0_r(void)
140{
141 return 0x00001100U;
142}
143static inline u32 bus_intr_0_pri_squash_m(void)
144{
145 return 0x1U << 1U;
146}
147static inline u32 bus_intr_0_pri_fecserr_m(void)
148{
149 return 0x1U << 2U;
150}
151static inline u32 bus_intr_0_pri_timeout_m(void)
152{
153 return 0x1U << 3U;
154}
155static inline u32 bus_intr_en_0_r(void)
156{
157 return 0x00001140U;
158}
159static inline u32 bus_intr_en_0_pri_squash_m(void)
160{
161 return 0x1U << 1U;
162}
163static inline u32 bus_intr_en_0_pri_fecserr_m(void)
164{
165 return 0x1U << 2U;
166}
167static inline u32 bus_intr_en_0_pri_timeout_m(void)
168{
169 return 0x1U << 3U;
170}
171#endif
diff --git a/include/gk20a/hw_ccsr_gk20a.h b/include/gk20a/hw_ccsr_gk20a.h
new file mode 100644
index 0000000..95151f6
--- /dev/null
+++ b/include/gk20a/hw_ccsr_gk20a.h
@@ -0,0 +1,163 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_ccsr_gk20a_h_
57#define _hw_ccsr_gk20a_h_
58
59static inline u32 ccsr_channel_inst_r(u32 i)
60{
61 return 0x00800000U + i*8U;
62}
63static inline u32 ccsr_channel_inst__size_1_v(void)
64{
65 return 0x00000080U;
66}
67static inline u32 ccsr_channel_inst_ptr_f(u32 v)
68{
69 return (v & 0xfffffffU) << 0U;
70}
71static inline u32 ccsr_channel_inst_target_vid_mem_f(void)
72{
73 return 0x0U;
74}
75static inline u32 ccsr_channel_inst_target_sys_mem_coh_f(void)
76{
77 return 0x20000000U;
78}
79static inline u32 ccsr_channel_inst_target_sys_mem_ncoh_f(void)
80{
81 return 0x30000000U;
82}
83static inline u32 ccsr_channel_inst_bind_false_f(void)
84{
85 return 0x0U;
86}
87static inline u32 ccsr_channel_inst_bind_true_f(void)
88{
89 return 0x80000000U;
90}
91static inline u32 ccsr_channel_r(u32 i)
92{
93 return 0x00800004U + i*8U;
94}
95static inline u32 ccsr_channel__size_1_v(void)
96{
97 return 0x00000080U;
98}
99static inline u32 ccsr_channel_enable_v(u32 r)
100{
101 return (r >> 0U) & 0x1U;
102}
103static inline u32 ccsr_channel_enable_set_f(u32 v)
104{
105 return (v & 0x1U) << 10U;
106}
107static inline u32 ccsr_channel_enable_set_true_f(void)
108{
109 return 0x400U;
110}
111static inline u32 ccsr_channel_enable_clr_true_f(void)
112{
113 return 0x800U;
114}
115static inline u32 ccsr_channel_runlist_f(u32 v)
116{
117 return (v & 0xfU) << 16U;
118}
119static inline u32 ccsr_channel_status_v(u32 r)
120{
121 return (r >> 24U) & 0xfU;
122}
123static inline u32 ccsr_channel_status_pending_ctx_reload_v(void)
124{
125 return 0x00000002U;
126}
127static inline u32 ccsr_channel_status_pending_acq_ctx_reload_v(void)
128{
129 return 0x00000004U;
130}
131static inline u32 ccsr_channel_status_on_pbdma_ctx_reload_v(void)
132{
133 return 0x0000000aU;
134}
135static inline u32 ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v(void)
136{
137 return 0x0000000bU;
138}
139static inline u32 ccsr_channel_status_on_eng_ctx_reload_v(void)
140{
141 return 0x0000000cU;
142}
143static inline u32 ccsr_channel_status_on_eng_pending_ctx_reload_v(void)
144{
145 return 0x0000000dU;
146}
147static inline u32 ccsr_channel_status_on_eng_pending_acq_ctx_reload_v(void)
148{
149 return 0x0000000eU;
150}
151static inline u32 ccsr_channel_next_v(u32 r)
152{
153 return (r >> 1U) & 0x1U;
154}
155static inline u32 ccsr_channel_next_true_v(void)
156{
157 return 0x00000001U;
158}
159static inline u32 ccsr_channel_busy_v(u32 r)
160{
161 return (r >> 28U) & 0x1U;
162}
163#endif
diff --git a/include/gk20a/hw_ce2_gk20a.h b/include/gk20a/hw_ce2_gk20a.h
new file mode 100644
index 0000000..87481cd
--- /dev/null
+++ b/include/gk20a/hw_ce2_gk20a.h
@@ -0,0 +1,87 @@
1/*
2 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_ce2_gk20a_h_
57#define _hw_ce2_gk20a_h_
58
59static inline u32 ce2_intr_status_r(void)
60{
61 return 0x00106908U;
62}
63static inline u32 ce2_intr_status_blockpipe_pending_f(void)
64{
65 return 0x1U;
66}
67static inline u32 ce2_intr_status_blockpipe_reset_f(void)
68{
69 return 0x1U;
70}
71static inline u32 ce2_intr_status_nonblockpipe_pending_f(void)
72{
73 return 0x2U;
74}
75static inline u32 ce2_intr_status_nonblockpipe_reset_f(void)
76{
77 return 0x2U;
78}
79static inline u32 ce2_intr_status_launcherr_pending_f(void)
80{
81 return 0x4U;
82}
83static inline u32 ce2_intr_status_launcherr_reset_f(void)
84{
85 return 0x4U;
86}
87#endif
diff --git a/include/gk20a/hw_ctxsw_prog_gk20a.h b/include/gk20a/hw_ctxsw_prog_gk20a.h
new file mode 100644
index 0000000..131fd12
--- /dev/null
+++ b/include/gk20a/hw_ctxsw_prog_gk20a.h
@@ -0,0 +1,447 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_ctxsw_prog_gk20a_h_
57#define _hw_ctxsw_prog_gk20a_h_
58
59static inline u32 ctxsw_prog_fecs_header_v(void)
60{
61 return 0x00000100U;
62}
63static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
64{
65 return 0x00000008U;
66}
67static inline u32 ctxsw_prog_main_image_patch_count_o(void)
68{
69 return 0x00000010U;
70}
71static inline u32 ctxsw_prog_main_image_context_id_o(void)
72{
73 return 0x000000f0U;
74}
75static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void)
76{
77 return 0x00000014U;
78}
79static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void)
80{
81 return 0x00000018U;
82}
83static inline u32 ctxsw_prog_main_image_zcull_o(void)
84{
85 return 0x0000001cU;
86}
87static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void)
88{
89 return 0x00000001U;
90}
91static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void)
92{
93 return 0x00000002U;
94}
95static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void)
96{
97 return 0x00000020U;
98}
99static inline u32 ctxsw_prog_main_image_pm_o(void)
100{
101 return 0x00000028U;
102}
103static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
104{
105 return 0x7U << 0U;
106}
107static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
108{
109 return 0x1U;
110}
111static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
112{
113 return 0x0U;
114}
115static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
116{
117 return 0x7U << 3U;
118}
119static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void)
120{
121 return 0x8U;
122}
123static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
124{
125 return 0x0U;
126}
127static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
128{
129 return 0x0000002cU;
130}
131static inline u32 ctxsw_prog_main_image_num_save_ops_o(void)
132{
133 return 0x000000f4U;
134}
135static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void)
136{
137 return 0x000000f8U;
138}
139static inline u32 ctxsw_prog_main_image_magic_value_o(void)
140{
141 return 0x000000fcU;
142}
143static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void)
144{
145 return 0x600dc0deU;
146}
147static inline u32 ctxsw_prog_local_priv_register_ctl_o(void)
148{
149 return 0x0000000cU;
150}
151static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r)
152{
153 return (r >> 0U) & 0xffffU;
154}
155static inline u32 ctxsw_prog_local_image_ppc_info_o(void)
156{
157 return 0x000000f4U;
158}
159static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r)
160{
161 return (r >> 0U) & 0xffffU;
162}
163static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r)
164{
165 return (r >> 16U) & 0xffffU;
166}
167static inline u32 ctxsw_prog_local_image_num_tpcs_o(void)
168{
169 return 0x000000f8U;
170}
171static inline u32 ctxsw_prog_local_magic_value_o(void)
172{
173 return 0x000000fcU;
174}
175static inline u32 ctxsw_prog_local_magic_value_v_value_v(void)
176{
177 return 0xad0becabU;
178}
179static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void)
180{
181 return 0x000000ecU;
182}
183static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r)
184{
185 return (r >> 0U) & 0xffffU;
186}
187static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r)
188{
189 return (r >> 16U) & 0xffU;
190}
191static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void)
192{
193 return 0x00000100U;
194}
195static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void)
196{
197 return 0x00000004U;
198}
199static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void)
200{
201 return 0x00000005U;
202}
203static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void)
204{
205 return 0x00000004U;
206}
207static inline u32 ctxsw_prog_extended_num_smpc_quadrants_v(void)
208{
209 return 0x00000004U;
210}
211static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void)
212{
213 return 0x000000a0U;
214}
215static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_s(void)
216{
217 return 2U;
218}
219static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_f(u32 v)
220{
221 return (v & 0x3U) << 0U;
222}
223static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_m(void)
224{
225 return 0x3U << 0U;
226}
227static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_v(u32 r)
228{
229 return (r >> 0U) & 0x3U;
230}
231static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void)
232{
233 return 0x0U;
234}
235static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void)
236{
237 return 0x2U;
238}
239static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void)
240{
241 return 0x000000a4U;
242}
243static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void)
244{
245 return 0x000000a8U;
246}
247static inline u32 ctxsw_prog_main_image_misc_options_o(void)
248{
249 return 0x0000003cU;
250}
251static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void)
252{
253 return 0x1U << 3U;
254}
255static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void)
256{
257 return 0x0U;
258}
259static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_o(void)
260{
261 return 0x000000acU;
262}
263static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(u32 v)
264{
265 return (v & 0xffffU) << 0U;
266}
267static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(void)
268{
269 return 0x000000b0U;
270}
271static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_v_m(void)
272{
273 return 0xfffffffU << 0U;
274}
275static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_m(void)
276{
277 return 0x3U << 28U;
278}
279static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f(void)
280{
281 return 0x0U;
282}
283static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(void)
284{
285 return 0x20000000U;
286}
287static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(void)
288{
289 return 0x30000000U;
290}
291static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(void)
292{
293 return 0x000000b4U;
294}
295static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u32 v)
296{
297 return (v & 0xffffffffU) << 0U;
298}
299static inline u32 ctxsw_prog_record_timestamp_record_size_in_bytes_v(void)
300{
301 return 0x00000080U;
302}
303static inline u32 ctxsw_prog_record_timestamp_record_size_in_words_v(void)
304{
305 return 0x00000020U;
306}
307static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_o(void)
308{
309 return 0x00000000U;
310}
311static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_v_value_v(void)
312{
313 return 0x00000000U;
314}
315static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_o(void)
316{
317 return 0x00000004U;
318}
319static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(void)
320{
321 return 0x600dbeefU;
322}
323static inline u32 ctxsw_prog_record_timestamp_context_id_o(void)
324{
325 return 0x00000008U;
326}
327static inline u32 ctxsw_prog_record_timestamp_context_ptr_o(void)
328{
329 return 0x0000000cU;
330}
331static inline u32 ctxsw_prog_record_timestamp_new_context_id_o(void)
332{
333 return 0x00000010U;
334}
335static inline u32 ctxsw_prog_record_timestamp_new_context_ptr_o(void)
336{
337 return 0x00000014U;
338}
339static inline u32 ctxsw_prog_record_timestamp_timestamp_lo_o(void)
340{
341 return 0x00000018U;
342}
343static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_o(void)
344{
345 return 0x0000001cU;
346}
347static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_f(u32 v)
348{
349 return (v & 0xffffffU) << 0U;
350}
351static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_v(u32 r)
352{
353 return (r >> 0U) & 0xffffffU;
354}
355static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_f(u32 v)
356{
357 return (v & 0xffU) << 24U;
358}
359static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_m(void)
360{
361 return 0xffU << 24U;
362}
363static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_v(u32 r)
364{
365 return (r >> 24U) & 0xffU;
366}
367static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_v(void)
368{
369 return 0x00000001U;
370}
371static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_f(void)
372{
373 return 0x1000000U;
374}
375static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_v(void)
376{
377 return 0x00000002U;
378}
379static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_f(void)
380{
381 return 0x2000000U;
382}
383static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_v(void)
384{
385 return 0x0000000aU;
386}
387static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_f(void)
388{
389 return 0xa000000U;
390}
391static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_v(void)
392{
393 return 0x0000000bU;
394}
395static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_f(void)
396{
397 return 0xb000000U;
398}
399static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_v(void)
400{
401 return 0x0000000cU;
402}
403static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_f(void)
404{
405 return 0xc000000U;
406}
407static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_v(void)
408{
409 return 0x0000000dU;
410}
411static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_f(void)
412{
413 return 0xd000000U;
414}
415static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_v(void)
416{
417 return 0x00000003U;
418}
419static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_f(void)
420{
421 return 0x3000000U;
422}
423static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_v(void)
424{
425 return 0x00000004U;
426}
427static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_f(void)
428{
429 return 0x4000000U;
430}
431static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_v(void)
432{
433 return 0x00000005U;
434}
435static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_f(void)
436{
437 return 0x5000000U;
438}
439static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(void)
440{
441 return 0x000000ffU;
442}
443static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_f(void)
444{
445 return 0xff000000U;
446}
447#endif
diff --git a/include/gk20a/hw_falcon_gk20a.h b/include/gk20a/hw_falcon_gk20a.h
new file mode 100644
index 0000000..7b4d87b
--- /dev/null
+++ b/include/gk20a/hw_falcon_gk20a.h
@@ -0,0 +1,559 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_falcon_gk20a_h_
57#define _hw_falcon_gk20a_h_
58
59static inline u32 falcon_falcon_irqsset_r(void)
60{
61 return 0x00000000U;
62}
63static inline u32 falcon_falcon_irqsset_swgen0_set_f(void)
64{
65 return 0x40U;
66}
67static inline u32 falcon_falcon_irqsclr_r(void)
68{
69 return 0x00000004U;
70}
71static inline u32 falcon_falcon_irqstat_r(void)
72{
73 return 0x00000008U;
74}
75static inline u32 falcon_falcon_irqstat_halt_true_f(void)
76{
77 return 0x10U;
78}
79static inline u32 falcon_falcon_irqstat_exterr_true_f(void)
80{
81 return 0x20U;
82}
83static inline u32 falcon_falcon_irqstat_swgen0_true_f(void)
84{
85 return 0x40U;
86}
87static inline u32 falcon_falcon_irqmode_r(void)
88{
89 return 0x0000000cU;
90}
91static inline u32 falcon_falcon_irqmset_r(void)
92{
93 return 0x00000010U;
94}
95static inline u32 falcon_falcon_irqmset_gptmr_f(u32 v)
96{
97 return (v & 0x1U) << 0U;
98}
99static inline u32 falcon_falcon_irqmset_wdtmr_f(u32 v)
100{
101 return (v & 0x1U) << 1U;
102}
103static inline u32 falcon_falcon_irqmset_mthd_f(u32 v)
104{
105 return (v & 0x1U) << 2U;
106}
107static inline u32 falcon_falcon_irqmset_ctxsw_f(u32 v)
108{
109 return (v & 0x1U) << 3U;
110}
111static inline u32 falcon_falcon_irqmset_halt_f(u32 v)
112{
113 return (v & 0x1U) << 4U;
114}
115static inline u32 falcon_falcon_irqmset_exterr_f(u32 v)
116{
117 return (v & 0x1U) << 5U;
118}
119static inline u32 falcon_falcon_irqmset_swgen0_f(u32 v)
120{
121 return (v & 0x1U) << 6U;
122}
123static inline u32 falcon_falcon_irqmset_swgen1_f(u32 v)
124{
125 return (v & 0x1U) << 7U;
126}
127static inline u32 falcon_falcon_irqmclr_r(void)
128{
129 return 0x00000014U;
130}
131static inline u32 falcon_falcon_irqmclr_gptmr_f(u32 v)
132{
133 return (v & 0x1U) << 0U;
134}
135static inline u32 falcon_falcon_irqmclr_wdtmr_f(u32 v)
136{
137 return (v & 0x1U) << 1U;
138}
139static inline u32 falcon_falcon_irqmclr_mthd_f(u32 v)
140{
141 return (v & 0x1U) << 2U;
142}
143static inline u32 falcon_falcon_irqmclr_ctxsw_f(u32 v)
144{
145 return (v & 0x1U) << 3U;
146}
147static inline u32 falcon_falcon_irqmclr_halt_f(u32 v)
148{
149 return (v & 0x1U) << 4U;
150}
151static inline u32 falcon_falcon_irqmclr_exterr_f(u32 v)
152{
153 return (v & 0x1U) << 5U;
154}
155static inline u32 falcon_falcon_irqmclr_swgen0_f(u32 v)
156{
157 return (v & 0x1U) << 6U;
158}
159static inline u32 falcon_falcon_irqmclr_swgen1_f(u32 v)
160{
161 return (v & 0x1U) << 7U;
162}
163static inline u32 falcon_falcon_irqmclr_ext_f(u32 v)
164{
165 return (v & 0xffU) << 8U;
166}
167static inline u32 falcon_falcon_irqmask_r(void)
168{
169 return 0x00000018U;
170}
171static inline u32 falcon_falcon_irqdest_r(void)
172{
173 return 0x0000001cU;
174}
175static inline u32 falcon_falcon_irqdest_host_gptmr_f(u32 v)
176{
177 return (v & 0x1U) << 0U;
178}
179static inline u32 falcon_falcon_irqdest_host_wdtmr_f(u32 v)
180{
181 return (v & 0x1U) << 1U;
182}
183static inline u32 falcon_falcon_irqdest_host_mthd_f(u32 v)
184{
185 return (v & 0x1U) << 2U;
186}
187static inline u32 falcon_falcon_irqdest_host_ctxsw_f(u32 v)
188{
189 return (v & 0x1U) << 3U;
190}
191static inline u32 falcon_falcon_irqdest_host_halt_f(u32 v)
192{
193 return (v & 0x1U) << 4U;
194}
195static inline u32 falcon_falcon_irqdest_host_exterr_f(u32 v)
196{
197 return (v & 0x1U) << 5U;
198}
199static inline u32 falcon_falcon_irqdest_host_swgen0_f(u32 v)
200{
201 return (v & 0x1U) << 6U;
202}
203static inline u32 falcon_falcon_irqdest_host_swgen1_f(u32 v)
204{
205 return (v & 0x1U) << 7U;
206}
207static inline u32 falcon_falcon_irqdest_host_ext_f(u32 v)
208{
209 return (v & 0xffU) << 8U;
210}
211static inline u32 falcon_falcon_irqdest_target_gptmr_f(u32 v)
212{
213 return (v & 0x1U) << 16U;
214}
215static inline u32 falcon_falcon_irqdest_target_wdtmr_f(u32 v)
216{
217 return (v & 0x1U) << 17U;
218}
219static inline u32 falcon_falcon_irqdest_target_mthd_f(u32 v)
220{
221 return (v & 0x1U) << 18U;
222}
223static inline u32 falcon_falcon_irqdest_target_ctxsw_f(u32 v)
224{
225 return (v & 0x1U) << 19U;
226}
227static inline u32 falcon_falcon_irqdest_target_halt_f(u32 v)
228{
229 return (v & 0x1U) << 20U;
230}
231static inline u32 falcon_falcon_irqdest_target_exterr_f(u32 v)
232{
233 return (v & 0x1U) << 21U;
234}
235static inline u32 falcon_falcon_irqdest_target_swgen0_f(u32 v)
236{
237 return (v & 0x1U) << 22U;
238}
239static inline u32 falcon_falcon_irqdest_target_swgen1_f(u32 v)
240{
241 return (v & 0x1U) << 23U;
242}
243static inline u32 falcon_falcon_irqdest_target_ext_f(u32 v)
244{
245 return (v & 0xffU) << 24U;
246}
247static inline u32 falcon_falcon_curctx_r(void)
248{
249 return 0x00000050U;
250}
251static inline u32 falcon_falcon_nxtctx_r(void)
252{
253 return 0x00000054U;
254}
255static inline u32 falcon_falcon_mailbox0_r(void)
256{
257 return 0x00000040U;
258}
259static inline u32 falcon_falcon_mailbox1_r(void)
260{
261 return 0x00000044U;
262}
263static inline u32 falcon_falcon_itfen_r(void)
264{
265 return 0x00000048U;
266}
267static inline u32 falcon_falcon_itfen_ctxen_enable_f(void)
268{
269 return 0x1U;
270}
271static inline u32 falcon_falcon_idlestate_r(void)
272{
273 return 0x0000004cU;
274}
275static inline u32 falcon_falcon_idlestate_falcon_busy_v(u32 r)
276{
277 return (r >> 0U) & 0x1U;
278}
279static inline u32 falcon_falcon_idlestate_ext_busy_v(u32 r)
280{
281 return (r >> 1U) & 0x7fffU;
282}
283static inline u32 falcon_falcon_os_r(void)
284{
285 return 0x00000080U;
286}
287static inline u32 falcon_falcon_engctl_r(void)
288{
289 return 0x000000a4U;
290}
291static inline u32 falcon_falcon_cpuctl_r(void)
292{
293 return 0x00000100U;
294}
295static inline u32 falcon_falcon_cpuctl_startcpu_f(u32 v)
296{
297 return (v & 0x1U) << 1U;
298}
299static inline u32 falcon_falcon_cpuctl_sreset_f(u32 v)
300{
301 return (v & 0x1U) << 2U;
302}
303static inline u32 falcon_falcon_cpuctl_hreset_f(u32 v)
304{
305 return (v & 0x1U) << 3U;
306}
307static inline u32 falcon_falcon_cpuctl_halt_intr_f(u32 v)
308{
309 return (v & 0x1U) << 4U;
310}
311static inline u32 falcon_falcon_cpuctl_halt_intr_m(void)
312{
313 return 0x1U << 4U;
314}
315static inline u32 falcon_falcon_cpuctl_halt_intr_v(u32 r)
316{
317 return (r >> 4U) & 0x1U;
318}
319static inline u32 falcon_falcon_cpuctl_stopped_m(void)
320{
321 return 0x1U << 5U;
322}
323static inline u32 falcon_falcon_imemc_r(u32 i)
324{
325 return 0x00000180U + i*16U;
326}
327static inline u32 falcon_falcon_imemc_offs_f(u32 v)
328{
329 return (v & 0x3fU) << 2U;
330}
331static inline u32 falcon_falcon_imemc_blk_f(u32 v)
332{
333 return (v & 0xffU) << 8U;
334}
335static inline u32 falcon_falcon_imemc_aincw_f(u32 v)
336{
337 return (v & 0x1U) << 24U;
338}
339static inline u32 falcon_falcon_imemc_secure_f(u32 v)
340{
341 return (v & 0x1U) << 28U;
342}
343static inline u32 falcon_falcon_imemd_r(u32 i)
344{
345 return 0x00000184U + i*16U;
346}
347static inline u32 falcon_falcon_imemt_r(u32 i)
348{
349 return 0x00000188U + i*16U;
350}
351static inline u32 falcon_falcon_bootvec_r(void)
352{
353 return 0x00000104U;
354}
355static inline u32 falcon_falcon_bootvec_vec_f(u32 v)
356{
357 return (v & 0xffffffffU) << 0U;
358}
359static inline u32 falcon_falcon_dmactl_r(void)
360{
361 return 0x0000010cU;
362}
363static inline u32 falcon_falcon_dmactl_dmem_scrubbing_m(void)
364{
365 return 0x1U << 1U;
366}
367static inline u32 falcon_falcon_dmactl_imem_scrubbing_m(void)
368{
369 return 0x1U << 2U;
370}
371static inline u32 falcon_falcon_dmactl_require_ctx_f(u32 v)
372{
373 return (v & 0x1U) << 0U;
374}
375static inline u32 falcon_falcon_hwcfg_r(void)
376{
377 return 0x00000108U;
378}
379static inline u32 falcon_falcon_hwcfg_imem_size_v(u32 r)
380{
381 return (r >> 0U) & 0x1ffU;
382}
383static inline u32 falcon_falcon_hwcfg_dmem_size_v(u32 r)
384{
385 return (r >> 9U) & 0x1ffU;
386}
387static inline u32 falcon_falcon_dmatrfbase_r(void)
388{
389 return 0x00000110U;
390}
391static inline u32 falcon_falcon_dmatrfmoffs_r(void)
392{
393 return 0x00000114U;
394}
395static inline u32 falcon_falcon_dmatrfcmd_r(void)
396{
397 return 0x00000118U;
398}
399static inline u32 falcon_falcon_dmatrfcmd_imem_f(u32 v)
400{
401 return (v & 0x1U) << 4U;
402}
403static inline u32 falcon_falcon_dmatrfcmd_write_f(u32 v)
404{
405 return (v & 0x1U) << 5U;
406}
407static inline u32 falcon_falcon_dmatrfcmd_size_f(u32 v)
408{
409 return (v & 0x7U) << 8U;
410}
411static inline u32 falcon_falcon_dmatrfcmd_ctxdma_f(u32 v)
412{
413 return (v & 0x7U) << 12U;
414}
415static inline u32 falcon_falcon_dmatrffboffs_r(void)
416{
417 return 0x0000011cU;
418}
419static inline u32 falcon_falcon_imstat_r(void)
420{
421 return 0x00000144U;
422}
423static inline u32 falcon_falcon_traceidx_r(void)
424{
425 return 0x00000148U;
426}
427static inline u32 falcon_falcon_traceidx_maxidx_v(u32 r)
428{
429 return (r >> 16U) & 0xffU;
430}
431static inline u32 falcon_falcon_traceidx_idx_v(u32 r)
432{
433 return (r >> 0U) & 0xffU;
434}
435static inline u32 falcon_falcon_tracepc_r(void)
436{
437 return 0x0000014cU;
438}
439static inline u32 falcon_falcon_tracepc_pc_v(u32 r)
440{
441 return (r >> 0U) & 0xffffffU;
442}
443static inline u32 falcon_falcon_exterraddr_r(void)
444{
445 return 0x00000168U;
446}
447static inline u32 falcon_falcon_exterrstat_r(void)
448{
449 return 0x0000016cU;
450}
451static inline u32 falcon_falcon_exterrstat_valid_m(void)
452{
453 return 0x1U << 31U;
454}
455static inline u32 falcon_falcon_exterrstat_valid_v(u32 r)
456{
457 return (r >> 31U) & 0x1U;
458}
459static inline u32 falcon_falcon_exterrstat_valid_true_v(void)
460{
461 return 0x00000001U;
462}
463static inline u32 falcon_falcon_icd_cmd_r(void)
464{
465 return 0x00000200U;
466}
467static inline u32 falcon_falcon_icd_cmd_opc_s(void)
468{
469 return 4U;
470}
471static inline u32 falcon_falcon_icd_cmd_opc_f(u32 v)
472{
473 return (v & 0xfU) << 0U;
474}
475static inline u32 falcon_falcon_icd_cmd_opc_m(void)
476{
477 return 0xfU << 0U;
478}
479static inline u32 falcon_falcon_icd_cmd_opc_v(u32 r)
480{
481 return (r >> 0U) & 0xfU;
482}
483static inline u32 falcon_falcon_icd_cmd_opc_rreg_f(void)
484{
485 return 0x8U;
486}
487static inline u32 falcon_falcon_icd_cmd_opc_rstat_f(void)
488{
489 return 0xeU;
490}
491static inline u32 falcon_falcon_icd_cmd_idx_f(u32 v)
492{
493 return (v & 0x1fU) << 8U;
494}
495static inline u32 falcon_falcon_icd_rdata_r(void)
496{
497 return 0x0000020cU;
498}
499static inline u32 falcon_falcon_dmemc_r(u32 i)
500{
501 return 0x000001c0U + i*8U;
502}
503static inline u32 falcon_falcon_dmemc_offs_f(u32 v)
504{
505 return (v & 0x3fU) << 2U;
506}
507static inline u32 falcon_falcon_dmemc_offs_m(void)
508{
509 return 0x3fU << 2U;
510}
511static inline u32 falcon_falcon_dmemc_blk_f(u32 v)
512{
513 return (v & 0xffU) << 8U;
514}
515static inline u32 falcon_falcon_dmemc_blk_m(void)
516{
517 return 0xffU << 8U;
518}
519static inline u32 falcon_falcon_dmemc_aincw_f(u32 v)
520{
521 return (v & 0x1U) << 24U;
522}
523static inline u32 falcon_falcon_dmemc_aincr_f(u32 v)
524{
525 return (v & 0x1U) << 25U;
526}
527static inline u32 falcon_falcon_dmemd_r(u32 i)
528{
529 return 0x000001c4U + i*8U;
530}
531static inline u32 falcon_falcon_debug1_r(void)
532{
533 return 0x00000090U;
534}
535static inline u32 falcon_falcon_debug1_ctxsw_mode_s(void)
536{
537 return 1U;
538}
539static inline u32 falcon_falcon_debug1_ctxsw_mode_f(u32 v)
540{
541 return (v & 0x1U) << 16U;
542}
543static inline u32 falcon_falcon_debug1_ctxsw_mode_m(void)
544{
545 return 0x1U << 16U;
546}
547static inline u32 falcon_falcon_debug1_ctxsw_mode_v(u32 r)
548{
549 return (r >> 16U) & 0x1U;
550}
551static inline u32 falcon_falcon_debug1_ctxsw_mode_init_f(void)
552{
553 return 0x0U;
554}
555static inline u32 falcon_falcon_debuginfo_r(void)
556{
557 return 0x00000094U;
558}
559#endif
diff --git a/include/gk20a/hw_fb_gk20a.h b/include/gk20a/hw_fb_gk20a.h
new file mode 100644
index 0000000..42df4f5
--- /dev/null
+++ b/include/gk20a/hw_fb_gk20a.h
@@ -0,0 +1,263 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_fb_gk20a_h_
57#define _hw_fb_gk20a_h_
58
59static inline u32 fb_mmu_ctrl_r(void)
60{
61 return 0x00100c80U;
62}
63static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v)
64{
65 return (v & 0x1U) << 0U;
66}
67static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
68{
69 return 0x0U;
70}
71static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void)
72{
73 return 0x1U;
74}
75static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
76{
77 return (r >> 15U) & 0x1U;
78}
79static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void)
80{
81 return 0x0U;
82}
83static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
84{
85 return (r >> 16U) & 0xffU;
86}
87static inline u32 fb_mmu_invalidate_pdb_r(void)
88{
89 return 0x00100cb8U;
90}
91static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void)
92{
93 return 0x0U;
94}
95static inline u32 fb_mmu_invalidate_pdb_aperture_sys_mem_f(void)
96{
97 return 0x2U;
98}
99static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v)
100{
101 return (v & 0xfffffffU) << 4U;
102}
103static inline u32 fb_mmu_invalidate_r(void)
104{
105 return 0x00100cbcU;
106}
107static inline u32 fb_mmu_invalidate_all_va_true_f(void)
108{
109 return 0x1U;
110}
111static inline u32 fb_mmu_invalidate_all_pdb_true_f(void)
112{
113 return 0x2U;
114}
115static inline u32 fb_mmu_invalidate_trigger_s(void)
116{
117 return 1U;
118}
119static inline u32 fb_mmu_invalidate_trigger_f(u32 v)
120{
121 return (v & 0x1U) << 31U;
122}
123static inline u32 fb_mmu_invalidate_trigger_m(void)
124{
125 return 0x1U << 31U;
126}
127static inline u32 fb_mmu_invalidate_trigger_v(u32 r)
128{
129 return (r >> 31U) & 0x1U;
130}
131static inline u32 fb_mmu_invalidate_trigger_true_f(void)
132{
133 return 0x80000000U;
134}
135static inline u32 fb_mmu_debug_wr_r(void)
136{
137 return 0x00100cc8U;
138}
139static inline u32 fb_mmu_debug_wr_aperture_s(void)
140{
141 return 2U;
142}
143static inline u32 fb_mmu_debug_wr_aperture_f(u32 v)
144{
145 return (v & 0x3U) << 0U;
146}
147static inline u32 fb_mmu_debug_wr_aperture_m(void)
148{
149 return 0x3U << 0U;
150}
151static inline u32 fb_mmu_debug_wr_aperture_v(u32 r)
152{
153 return (r >> 0U) & 0x3U;
154}
155static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void)
156{
157 return 0x0U;
158}
159static inline u32 fb_mmu_debug_wr_aperture_sys_mem_coh_f(void)
160{
161 return 0x2U;
162}
163static inline u32 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(void)
164{
165 return 0x3U;
166}
167static inline u32 fb_mmu_debug_wr_vol_false_f(void)
168{
169 return 0x0U;
170}
171static inline u32 fb_mmu_debug_wr_vol_true_v(void)
172{
173 return 0x00000001U;
174}
175static inline u32 fb_mmu_debug_wr_vol_true_f(void)
176{
177 return 0x4U;
178}
179static inline u32 fb_mmu_debug_wr_addr_f(u32 v)
180{
181 return (v & 0xfffffffU) << 4U;
182}
183static inline u32 fb_mmu_debug_wr_addr_alignment_v(void)
184{
185 return 0x0000000cU;
186}
187static inline u32 fb_mmu_debug_rd_r(void)
188{
189 return 0x00100cccU;
190}
191static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void)
192{
193 return 0x0U;
194}
195static inline u32 fb_mmu_debug_rd_aperture_sys_mem_coh_f(void)
196{
197 return 0x2U;
198}
199static inline u32 fb_mmu_debug_rd_aperture_sys_mem_ncoh_f(void)
200{
201 return 0x3U;
202}
203static inline u32 fb_mmu_debug_rd_vol_false_f(void)
204{
205 return 0x0U;
206}
207static inline u32 fb_mmu_debug_rd_addr_f(u32 v)
208{
209 return (v & 0xfffffffU) << 4U;
210}
211static inline u32 fb_mmu_debug_rd_addr_alignment_v(void)
212{
213 return 0x0000000cU;
214}
215static inline u32 fb_mmu_debug_ctrl_r(void)
216{
217 return 0x00100cc4U;
218}
219static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
220{
221 return (r >> 16U) & 0x1U;
222}
223static inline u32 fb_mmu_debug_ctrl_debug_m(void)
224{
225 return 0x1U << 16U;
226}
227static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
228{
229 return 0x00000001U;
230}
231static inline u32 fb_mmu_debug_ctrl_debug_enabled_f(void)
232{
233 return 0x10000U;
234}
235static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
236{
237 return 0x00000000U;
238}
239static inline u32 fb_mmu_debug_ctrl_debug_disabled_f(void)
240{
241 return 0x0U;
242}
243static inline u32 fb_mmu_vpr_info_r(void)
244{
245 return 0x00100cd0U;
246}
247static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
248{
249 return (r >> 2U) & 0x1U;
250}
251static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
252{
253 return 0x00000000U;
254}
255static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
256{
257 return 0x00000001U;
258}
259static inline u32 fb_niso_flush_sysmem_addr_r(void)
260{
261 return 0x00100c10U;
262}
263#endif
diff --git a/include/gk20a/hw_fifo_gk20a.h b/include/gk20a/hw_fifo_gk20a.h
new file mode 100644
index 0000000..e61e386
--- /dev/null
+++ b/include/gk20a/hw_fifo_gk20a.h
@@ -0,0 +1,619 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_fifo_gk20a_h_
57#define _hw_fifo_gk20a_h_
58
59static inline u32 fifo_bar1_base_r(void)
60{
61 return 0x00002254U;
62}
63static inline u32 fifo_bar1_base_ptr_f(u32 v)
64{
65 return (v & 0xfffffffU) << 0U;
66}
67static inline u32 fifo_bar1_base_ptr_align_shift_v(void)
68{
69 return 0x0000000cU;
70}
71static inline u32 fifo_bar1_base_valid_false_f(void)
72{
73 return 0x0U;
74}
75static inline u32 fifo_bar1_base_valid_true_f(void)
76{
77 return 0x10000000U;
78}
79static inline u32 fifo_runlist_base_r(void)
80{
81 return 0x00002270U;
82}
83static inline u32 fifo_runlist_base_ptr_f(u32 v)
84{
85 return (v & 0xfffffffU) << 0U;
86}
87static inline u32 fifo_runlist_base_target_vid_mem_f(void)
88{
89 return 0x0U;
90}
91static inline u32 fifo_runlist_base_target_sys_mem_coh_f(void)
92{
93 return 0x20000000U;
94}
95static inline u32 fifo_runlist_base_target_sys_mem_ncoh_f(void)
96{
97 return 0x30000000U;
98}
99static inline u32 fifo_runlist_r(void)
100{
101 return 0x00002274U;
102}
103static inline u32 fifo_runlist_engine_f(u32 v)
104{
105 return (v & 0xfU) << 20U;
106}
107static inline u32 fifo_eng_runlist_base_r(u32 i)
108{
109 return 0x00002280U + i*8U;
110}
111static inline u32 fifo_eng_runlist_base__size_1_v(void)
112{
113 return 0x00000001U;
114}
115static inline u32 fifo_eng_runlist_r(u32 i)
116{
117 return 0x00002284U + i*8U;
118}
119static inline u32 fifo_eng_runlist__size_1_v(void)
120{
121 return 0x00000001U;
122}
123static inline u32 fifo_eng_runlist_length_f(u32 v)
124{
125 return (v & 0xffffU) << 0U;
126}
127static inline u32 fifo_eng_runlist_length_max_v(void)
128{
129 return 0x0000ffffU;
130}
131static inline u32 fifo_eng_runlist_pending_true_f(void)
132{
133 return 0x100000U;
134}
135static inline u32 fifo_runlist_timeslice_r(u32 i)
136{
137 return 0x00002310U + i*4U;
138}
139static inline u32 fifo_runlist_timeslice_timeout_128_f(void)
140{
141 return 0x80U;
142}
143static inline u32 fifo_runlist_timeslice_timescale_3_f(void)
144{
145 return 0x3000U;
146}
147static inline u32 fifo_runlist_timeslice_enable_true_f(void)
148{
149 return 0x10000000U;
150}
151static inline u32 fifo_eng_timeout_r(void)
152{
153 return 0x00002a0cU;
154}
155static inline u32 fifo_eng_timeout_period_max_f(void)
156{
157 return 0x7fffffffU;
158}
159static inline u32 fifo_eng_timeout_detection_enabled_f(void)
160{
161 return 0x80000000U;
162}
163static inline u32 fifo_eng_timeout_detection_disabled_f(void)
164{
165 return 0x0U;
166}
167static inline u32 fifo_pb_timeslice_r(u32 i)
168{
169 return 0x00002350U + i*4U;
170}
171static inline u32 fifo_pb_timeslice_timeout_16_f(void)
172{
173 return 0x10U;
174}
175static inline u32 fifo_pb_timeslice_timescale_0_f(void)
176{
177 return 0x0U;
178}
179static inline u32 fifo_pb_timeslice_enable_true_f(void)
180{
181 return 0x10000000U;
182}
183static inline u32 fifo_pbdma_map_r(u32 i)
184{
185 return 0x00002390U + i*4U;
186}
187static inline u32 fifo_intr_0_r(void)
188{
189 return 0x00002100U;
190}
191static inline u32 fifo_intr_0_bind_error_pending_f(void)
192{
193 return 0x1U;
194}
195static inline u32 fifo_intr_0_bind_error_reset_f(void)
196{
197 return 0x1U;
198}
199static inline u32 fifo_intr_0_pio_error_pending_f(void)
200{
201 return 0x10U;
202}
203static inline u32 fifo_intr_0_pio_error_reset_f(void)
204{
205 return 0x10U;
206}
207static inline u32 fifo_intr_0_sched_error_pending_f(void)
208{
209 return 0x100U;
210}
211static inline u32 fifo_intr_0_sched_error_reset_f(void)
212{
213 return 0x100U;
214}
215static inline u32 fifo_intr_0_chsw_error_pending_f(void)
216{
217 return 0x10000U;
218}
219static inline u32 fifo_intr_0_chsw_error_reset_f(void)
220{
221 return 0x10000U;
222}
223static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void)
224{
225 return 0x800000U;
226}
227static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void)
228{
229 return 0x800000U;
230}
231static inline u32 fifo_intr_0_lb_error_pending_f(void)
232{
233 return 0x1000000U;
234}
235static inline u32 fifo_intr_0_lb_error_reset_f(void)
236{
237 return 0x1000000U;
238}
239static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void)
240{
241 return 0x8000000U;
242}
243static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void)
244{
245 return 0x8000000U;
246}
247static inline u32 fifo_intr_0_mmu_fault_pending_f(void)
248{
249 return 0x10000000U;
250}
251static inline u32 fifo_intr_0_pbdma_intr_pending_f(void)
252{
253 return 0x20000000U;
254}
255static inline u32 fifo_intr_0_runlist_event_pending_f(void)
256{
257 return 0x40000000U;
258}
259static inline u32 fifo_intr_0_channel_intr_pending_f(void)
260{
261 return 0x80000000U;
262}
263static inline u32 fifo_intr_en_0_r(void)
264{
265 return 0x00002140U;
266}
267static inline u32 fifo_intr_en_0_sched_error_f(u32 v)
268{
269 return (v & 0x1U) << 8U;
270}
271static inline u32 fifo_intr_en_0_sched_error_m(void)
272{
273 return 0x1U << 8U;
274}
275static inline u32 fifo_intr_en_0_mmu_fault_f(u32 v)
276{
277 return (v & 0x1U) << 28U;
278}
279static inline u32 fifo_intr_en_0_mmu_fault_m(void)
280{
281 return 0x1U << 28U;
282}
283static inline u32 fifo_intr_en_1_r(void)
284{
285 return 0x00002528U;
286}
287static inline u32 fifo_intr_bind_error_r(void)
288{
289 return 0x0000252cU;
290}
291static inline u32 fifo_intr_sched_error_r(void)
292{
293 return 0x0000254cU;
294}
295static inline u32 fifo_intr_sched_error_code_f(u32 v)
296{
297 return (v & 0xffU) << 0U;
298}
299static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void)
300{
301 return 0x0000000aU;
302}
303static inline u32 fifo_intr_chsw_error_r(void)
304{
305 return 0x0000256cU;
306}
307static inline u32 fifo_intr_mmu_fault_id_r(void)
308{
309 return 0x0000259cU;
310}
311static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void)
312{
313 return 0x00000000U;
314}
315static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void)
316{
317 return 0x0U;
318}
319static inline u32 fifo_intr_mmu_fault_inst_r(u32 i)
320{
321 return 0x00002800U + i*16U;
322}
323static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r)
324{
325 return (r >> 0U) & 0xfffffffU;
326}
327static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void)
328{
329 return 0x0000000cU;
330}
331static inline u32 fifo_intr_mmu_fault_lo_r(u32 i)
332{
333 return 0x00002804U + i*16U;
334}
335static inline u32 fifo_intr_mmu_fault_hi_r(u32 i)
336{
337 return 0x00002808U + i*16U;
338}
339static inline u32 fifo_intr_mmu_fault_info_r(u32 i)
340{
341 return 0x0000280cU + i*16U;
342}
343static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r)
344{
345 return (r >> 0U) & 0xfU;
346}
347static inline u32 fifo_intr_mmu_fault_info_write_v(u32 r)
348{
349 return (r >> 7U) & 0x1U;
350}
351static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r)
352{
353 return (r >> 6U) & 0x1U;
354}
355static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void)
356{
357 return 0x00000000U;
358}
359static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void)
360{
361 return 0x00000001U;
362}
363static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r)
364{
365 return (r >> 8U) & 0x1fU;
366}
367static inline u32 fifo_intr_pbdma_id_r(void)
368{
369 return 0x000025a0U;
370}
371static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i)
372{
373 return (v & 0x1U) << (0U + i*1U);
374}
375static inline u32 fifo_intr_pbdma_id_status_v(u32 r, u32 i)
376{
377 return (r >> (0U + i*1U)) & 0x1U;
378}
379static inline u32 fifo_intr_pbdma_id_status__size_1_v(void)
380{
381 return 0x00000001U;
382}
383static inline u32 fifo_intr_runlist_r(void)
384{
385 return 0x00002a00U;
386}
387static inline u32 fifo_fb_timeout_r(void)
388{
389 return 0x00002a04U;
390}
391static inline u32 fifo_fb_timeout_period_m(void)
392{
393 return 0x3fffffffU << 0U;
394}
395static inline u32 fifo_fb_timeout_period_max_f(void)
396{
397 return 0x3fffffffU;
398}
399static inline u32 fifo_pb_timeout_r(void)
400{
401 return 0x00002a08U;
402}
403static inline u32 fifo_pb_timeout_detection_enabled_f(void)
404{
405 return 0x80000000U;
406}
407static inline u32 fifo_error_sched_disable_r(void)
408{
409 return 0x0000262cU;
410}
411static inline u32 fifo_sched_disable_r(void)
412{
413 return 0x00002630U;
414}
415static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i)
416{
417 return (v & 0x1U) << (0U + i*1U);
418}
419static inline u32 fifo_sched_disable_runlist_m(u32 i)
420{
421 return 0x1U << (0U + i*1U);
422}
423static inline u32 fifo_sched_disable_true_v(void)
424{
425 return 0x00000001U;
426}
427static inline u32 fifo_preempt_r(void)
428{
429 return 0x00002634U;
430}
431static inline u32 fifo_preempt_pending_true_f(void)
432{
433 return 0x100000U;
434}
435static inline u32 fifo_preempt_type_channel_f(void)
436{
437 return 0x0U;
438}
439static inline u32 fifo_preempt_type_tsg_f(void)
440{
441 return 0x1000000U;
442}
443static inline u32 fifo_preempt_chid_f(u32 v)
444{
445 return (v & 0xfffU) << 0U;
446}
447static inline u32 fifo_preempt_id_f(u32 v)
448{
449 return (v & 0xfffU) << 0U;
450}
451static inline u32 fifo_trigger_mmu_fault_r(u32 i)
452{
453 return 0x00002a30U + i*4U;
454}
455static inline u32 fifo_trigger_mmu_fault_id_f(u32 v)
456{
457 return (v & 0x1fU) << 0U;
458}
459static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v)
460{
461 return (v & 0x1U) << 8U;
462}
463static inline u32 fifo_engine_status_r(u32 i)
464{
465 return 0x00002640U + i*8U;
466}
467static inline u32 fifo_engine_status__size_1_v(void)
468{
469 return 0x00000002U;
470}
471static inline u32 fifo_engine_status_id_v(u32 r)
472{
473 return (r >> 0U) & 0xfffU;
474}
475static inline u32 fifo_engine_status_id_type_v(u32 r)
476{
477 return (r >> 12U) & 0x1U;
478}
479static inline u32 fifo_engine_status_id_type_chid_v(void)
480{
481 return 0x00000000U;
482}
483static inline u32 fifo_engine_status_id_type_tsgid_v(void)
484{
485 return 0x00000001U;
486}
487static inline u32 fifo_engine_status_ctx_status_v(u32 r)
488{
489 return (r >> 13U) & 0x7U;
490}
491static inline u32 fifo_engine_status_ctx_status_invalid_v(void)
492{
493 return 0x00000000U;
494}
495static inline u32 fifo_engine_status_ctx_status_valid_v(void)
496{
497 return 0x00000001U;
498}
499static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void)
500{
501 return 0x00000005U;
502}
503static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void)
504{
505 return 0x00000006U;
506}
507static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void)
508{
509 return 0x00000007U;
510}
511static inline u32 fifo_engine_status_next_id_v(u32 r)
512{
513 return (r >> 16U) & 0xfffU;
514}
515static inline u32 fifo_engine_status_next_id_type_v(u32 r)
516{
517 return (r >> 28U) & 0x1U;
518}
519static inline u32 fifo_engine_status_next_id_type_chid_v(void)
520{
521 return 0x00000000U;
522}
523static inline u32 fifo_engine_status_faulted_v(u32 r)
524{
525 return (r >> 30U) & 0x1U;
526}
527static inline u32 fifo_engine_status_faulted_true_v(void)
528{
529 return 0x00000001U;
530}
531static inline u32 fifo_engine_status_engine_v(u32 r)
532{
533 return (r >> 31U) & 0x1U;
534}
535static inline u32 fifo_engine_status_engine_idle_v(void)
536{
537 return 0x00000000U;
538}
539static inline u32 fifo_engine_status_engine_busy_v(void)
540{
541 return 0x00000001U;
542}
543static inline u32 fifo_engine_status_ctxsw_v(u32 r)
544{
545 return (r >> 15U) & 0x1U;
546}
547static inline u32 fifo_engine_status_ctxsw_in_progress_v(void)
548{
549 return 0x00000001U;
550}
551static inline u32 fifo_engine_status_ctxsw_in_progress_f(void)
552{
553 return 0x8000U;
554}
555static inline u32 fifo_pbdma_status_r(u32 i)
556{
557 return 0x00003080U + i*4U;
558}
559static inline u32 fifo_pbdma_status__size_1_v(void)
560{
561 return 0x00000001U;
562}
563static inline u32 fifo_pbdma_status_id_v(u32 r)
564{
565 return (r >> 0U) & 0xfffU;
566}
567static inline u32 fifo_pbdma_status_id_type_v(u32 r)
568{
569 return (r >> 12U) & 0x1U;
570}
571static inline u32 fifo_pbdma_status_id_type_chid_v(void)
572{
573 return 0x00000000U;
574}
575static inline u32 fifo_pbdma_status_id_type_tsgid_v(void)
576{
577 return 0x00000001U;
578}
579static inline u32 fifo_pbdma_status_chan_status_v(u32 r)
580{
581 return (r >> 13U) & 0x7U;
582}
583static inline u32 fifo_pbdma_status_chan_status_valid_v(void)
584{
585 return 0x00000001U;
586}
587static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void)
588{
589 return 0x00000005U;
590}
591static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void)
592{
593 return 0x00000006U;
594}
595static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void)
596{
597 return 0x00000007U;
598}
599static inline u32 fifo_pbdma_status_next_id_v(u32 r)
600{
601 return (r >> 16U) & 0xfffU;
602}
603static inline u32 fifo_pbdma_status_next_id_type_v(u32 r)
604{
605 return (r >> 28U) & 0x1U;
606}
607static inline u32 fifo_pbdma_status_next_id_type_chid_v(void)
608{
609 return 0x00000000U;
610}
611static inline u32 fifo_pbdma_status_chsw_v(u32 r)
612{
613 return (r >> 15U) & 0x1U;
614}
615static inline u32 fifo_pbdma_status_chsw_in_progress_v(void)
616{
617 return 0x00000001U;
618}
619#endif
diff --git a/include/gk20a/hw_flush_gk20a.h b/include/gk20a/hw_flush_gk20a.h
new file mode 100644
index 0000000..d270b5f
--- /dev/null
+++ b/include/gk20a/hw_flush_gk20a.h
@@ -0,0 +1,187 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_flush_gk20a_h_
57#define _hw_flush_gk20a_h_
58
59static inline u32 flush_l2_system_invalidate_r(void)
60{
61 return 0x00070004U;
62}
63static inline u32 flush_l2_system_invalidate_pending_v(u32 r)
64{
65 return (r >> 0U) & 0x1U;
66}
67static inline u32 flush_l2_system_invalidate_pending_busy_v(void)
68{
69 return 0x00000001U;
70}
71static inline u32 flush_l2_system_invalidate_pending_busy_f(void)
72{
73 return 0x1U;
74}
75static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r)
76{
77 return (r >> 1U) & 0x1U;
78}
79static inline u32 flush_l2_system_invalidate_outstanding_true_v(void)
80{
81 return 0x00000001U;
82}
83static inline u32 flush_l2_flush_dirty_r(void)
84{
85 return 0x00070010U;
86}
87static inline u32 flush_l2_flush_dirty_pending_v(u32 r)
88{
89 return (r >> 0U) & 0x1U;
90}
91static inline u32 flush_l2_flush_dirty_pending_empty_v(void)
92{
93 return 0x00000000U;
94}
95static inline u32 flush_l2_flush_dirty_pending_empty_f(void)
96{
97 return 0x0U;
98}
99static inline u32 flush_l2_flush_dirty_pending_busy_v(void)
100{
101 return 0x00000001U;
102}
103static inline u32 flush_l2_flush_dirty_pending_busy_f(void)
104{
105 return 0x1U;
106}
107static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r)
108{
109 return (r >> 1U) & 0x1U;
110}
111static inline u32 flush_l2_flush_dirty_outstanding_false_v(void)
112{
113 return 0x00000000U;
114}
115static inline u32 flush_l2_flush_dirty_outstanding_false_f(void)
116{
117 return 0x0U;
118}
119static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
120{
121 return 0x00000001U;
122}
123static inline u32 flush_l2_clean_comptags_r(void)
124{
125 return 0x0007000cU;
126}
127static inline u32 flush_l2_clean_comptags_pending_v(u32 r)
128{
129 return (r >> 0U) & 0x1U;
130}
131static inline u32 flush_l2_clean_comptags_pending_empty_v(void)
132{
133 return 0x00000000U;
134}
135static inline u32 flush_l2_clean_comptags_pending_empty_f(void)
136{
137 return 0x0U;
138}
139static inline u32 flush_l2_clean_comptags_pending_busy_v(void)
140{
141 return 0x00000001U;
142}
143static inline u32 flush_l2_clean_comptags_pending_busy_f(void)
144{
145 return 0x1U;
146}
147static inline u32 flush_l2_clean_comptags_outstanding_v(u32 r)
148{
149 return (r >> 1U) & 0x1U;
150}
151static inline u32 flush_l2_clean_comptags_outstanding_false_v(void)
152{
153 return 0x00000000U;
154}
155static inline u32 flush_l2_clean_comptags_outstanding_false_f(void)
156{
157 return 0x0U;
158}
159static inline u32 flush_l2_clean_comptags_outstanding_true_v(void)
160{
161 return 0x00000001U;
162}
163static inline u32 flush_fb_flush_r(void)
164{
165 return 0x00070000U;
166}
167static inline u32 flush_fb_flush_pending_v(u32 r)
168{
169 return (r >> 0U) & 0x1U;
170}
171static inline u32 flush_fb_flush_pending_busy_v(void)
172{
173 return 0x00000001U;
174}
175static inline u32 flush_fb_flush_pending_busy_f(void)
176{
177 return 0x1U;
178}
179static inline u32 flush_fb_flush_outstanding_v(u32 r)
180{
181 return (r >> 1U) & 0x1U;
182}
183static inline u32 flush_fb_flush_outstanding_true_v(void)
184{
185 return 0x00000001U;
186}
187#endif
diff --git a/include/gk20a/hw_gmmu_gk20a.h b/include/gk20a/hw_gmmu_gk20a.h
new file mode 100644
index 0000000..a788d1d
--- /dev/null
+++ b/include/gk20a/hw_gmmu_gk20a.h
@@ -0,0 +1,283 @@
1/*
2 * Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_gmmu_gk20a_h_
57#define _hw_gmmu_gk20a_h_
58
59static inline u32 gmmu_pde_aperture_big_w(void)
60{
61 return 0U;
62}
63static inline u32 gmmu_pde_aperture_big_invalid_f(void)
64{
65 return 0x0U;
66}
67static inline u32 gmmu_pde_aperture_big_video_memory_f(void)
68{
69 return 0x1U;
70}
71static inline u32 gmmu_pde_aperture_big_sys_mem_coh_f(void)
72{
73 return 0x2U;
74}
75static inline u32 gmmu_pde_aperture_big_sys_mem_ncoh_f(void)
76{
77 return 0x3U;
78}
79static inline u32 gmmu_pde_size_w(void)
80{
81 return 0U;
82}
83static inline u32 gmmu_pde_size_full_f(void)
84{
85 return 0x0U;
86}
87static inline u32 gmmu_pde_address_big_sys_f(u32 v)
88{
89 return (v & 0xfffffffU) << 4U;
90}
91static inline u32 gmmu_pde_address_big_sys_w(void)
92{
93 return 0U;
94}
95static inline u32 gmmu_pde_aperture_small_w(void)
96{
97 return 1U;
98}
99static inline u32 gmmu_pde_aperture_small_invalid_f(void)
100{
101 return 0x0U;
102}
103static inline u32 gmmu_pde_aperture_small_video_memory_f(void)
104{
105 return 0x1U;
106}
107static inline u32 gmmu_pde_aperture_small_sys_mem_coh_f(void)
108{
109 return 0x2U;
110}
111static inline u32 gmmu_pde_aperture_small_sys_mem_ncoh_f(void)
112{
113 return 0x3U;
114}
115static inline u32 gmmu_pde_vol_small_w(void)
116{
117 return 1U;
118}
119static inline u32 gmmu_pde_vol_small_true_f(void)
120{
121 return 0x4U;
122}
123static inline u32 gmmu_pde_vol_small_false_f(void)
124{
125 return 0x0U;
126}
127static inline u32 gmmu_pde_vol_big_w(void)
128{
129 return 1U;
130}
131static inline u32 gmmu_pde_vol_big_true_f(void)
132{
133 return 0x8U;
134}
135static inline u32 gmmu_pde_vol_big_false_f(void)
136{
137 return 0x0U;
138}
139static inline u32 gmmu_pde_address_small_sys_f(u32 v)
140{
141 return (v & 0xfffffffU) << 4U;
142}
143static inline u32 gmmu_pde_address_small_sys_w(void)
144{
145 return 1U;
146}
147static inline u32 gmmu_pde_address_shift_v(void)
148{
149 return 0x0000000cU;
150}
151static inline u32 gmmu_pde__size_v(void)
152{
153 return 0x00000008U;
154}
155static inline u32 gmmu_pte__size_v(void)
156{
157 return 0x00000008U;
158}
159static inline u32 gmmu_pte_valid_w(void)
160{
161 return 0U;
162}
163static inline u32 gmmu_pte_valid_true_f(void)
164{
165 return 0x1U;
166}
167static inline u32 gmmu_pte_valid_false_f(void)
168{
169 return 0x0U;
170}
171static inline u32 gmmu_pte_privilege_w(void)
172{
173 return 0U;
174}
175static inline u32 gmmu_pte_privilege_true_f(void)
176{
177 return 0x2U;
178}
179static inline u32 gmmu_pte_privilege_false_f(void)
180{
181 return 0x0U;
182}
183static inline u32 gmmu_pte_address_sys_f(u32 v)
184{
185 return (v & 0xfffffffU) << 4U;
186}
187static inline u32 gmmu_pte_address_sys_w(void)
188{
189 return 0U;
190}
191static inline u32 gmmu_pte_address_vid_f(u32 v)
192{
193 return (v & 0x1ffffffU) << 4U;
194}
195static inline u32 gmmu_pte_address_vid_w(void)
196{
197 return 0U;
198}
199static inline u32 gmmu_pte_vol_w(void)
200{
201 return 1U;
202}
203static inline u32 gmmu_pte_vol_true_f(void)
204{
205 return 0x1U;
206}
207static inline u32 gmmu_pte_vol_false_f(void)
208{
209 return 0x0U;
210}
211static inline u32 gmmu_pte_aperture_w(void)
212{
213 return 1U;
214}
215static inline u32 gmmu_pte_aperture_video_memory_f(void)
216{
217 return 0x0U;
218}
219static inline u32 gmmu_pte_aperture_sys_mem_coh_f(void)
220{
221 return 0x4U;
222}
223static inline u32 gmmu_pte_aperture_sys_mem_ncoh_f(void)
224{
225 return 0x6U;
226}
227static inline u32 gmmu_pte_read_only_w(void)
228{
229 return 0U;
230}
231static inline u32 gmmu_pte_read_only_true_f(void)
232{
233 return 0x4U;
234}
235static inline u32 gmmu_pte_write_disable_w(void)
236{
237 return 1U;
238}
239static inline u32 gmmu_pte_write_disable_true_f(void)
240{
241 return 0x80000000U;
242}
243static inline u32 gmmu_pte_read_disable_w(void)
244{
245 return 1U;
246}
247static inline u32 gmmu_pte_read_disable_true_f(void)
248{
249 return 0x40000000U;
250}
251static inline u32 gmmu_pte_comptagline_s(void)
252{
253 return 17U;
254}
255static inline u32 gmmu_pte_comptagline_f(u32 v)
256{
257 return (v & 0x1ffffU) << 12U;
258}
259static inline u32 gmmu_pte_comptagline_w(void)
260{
261 return 1U;
262}
263static inline u32 gmmu_pte_address_shift_v(void)
264{
265 return 0x0000000cU;
266}
267static inline u32 gmmu_pte_kind_f(u32 v)
268{
269 return (v & 0xffU) << 4U;
270}
271static inline u32 gmmu_pte_kind_w(void)
272{
273 return 1U;
274}
275static inline u32 gmmu_pte_kind_invalid_v(void)
276{
277 return 0x000000ffU;
278}
279static inline u32 gmmu_pte_kind_pitch_v(void)
280{
281 return 0x00000000U;
282}
283#endif
diff --git a/include/gk20a/hw_gr_gk20a.h b/include/gk20a/hw_gr_gk20a.h
new file mode 100644
index 0000000..826108f
--- /dev/null
+++ b/include/gk20a/hw_gr_gk20a.h
@@ -0,0 +1,3807 @@
1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_gr_gk20a_h_
57#define _hw_gr_gk20a_h_
58
59static inline u32 gr_intr_r(void)
60{
61 return 0x00400100U;
62}
63static inline u32 gr_intr_notify_pending_f(void)
64{
65 return 0x1U;
66}
67static inline u32 gr_intr_notify_reset_f(void)
68{
69 return 0x1U;
70}
71static inline u32 gr_intr_semaphore_pending_f(void)
72{
73 return 0x2U;
74}
75static inline u32 gr_intr_semaphore_reset_f(void)
76{
77 return 0x2U;
78}
79static inline u32 gr_intr_semaphore_timeout_not_pending_f(void)
80{
81 return 0x0U;
82}
83static inline u32 gr_intr_semaphore_timeout_pending_f(void)
84{
85 return 0x4U;
86}
87static inline u32 gr_intr_semaphore_timeout_reset_f(void)
88{
89 return 0x4U;
90}
91static inline u32 gr_intr_illegal_method_pending_f(void)
92{
93 return 0x10U;
94}
95static inline u32 gr_intr_illegal_method_reset_f(void)
96{
97 return 0x10U;
98}
99static inline u32 gr_intr_illegal_notify_pending_f(void)
100{
101 return 0x40U;
102}
103static inline u32 gr_intr_illegal_notify_reset_f(void)
104{
105 return 0x40U;
106}
107static inline u32 gr_intr_firmware_method_f(u32 v)
108{
109 return (v & 0x1U) << 8U;
110}
111static inline u32 gr_intr_firmware_method_pending_f(void)
112{
113 return 0x100U;
114}
115static inline u32 gr_intr_firmware_method_reset_f(void)
116{
117 return 0x100U;
118}
119static inline u32 gr_intr_illegal_class_pending_f(void)
120{
121 return 0x20U;
122}
123static inline u32 gr_intr_illegal_class_reset_f(void)
124{
125 return 0x20U;
126}
127static inline u32 gr_intr_fecs_error_pending_f(void)
128{
129 return 0x80000U;
130}
131static inline u32 gr_intr_fecs_error_reset_f(void)
132{
133 return 0x80000U;
134}
135static inline u32 gr_intr_class_error_pending_f(void)
136{
137 return 0x100000U;
138}
139static inline u32 gr_intr_class_error_reset_f(void)
140{
141 return 0x100000U;
142}
143static inline u32 gr_intr_exception_pending_f(void)
144{
145 return 0x200000U;
146}
147static inline u32 gr_intr_exception_reset_f(void)
148{
149 return 0x200000U;
150}
151static inline u32 gr_fecs_intr_r(void)
152{
153 return 0x00400144U;
154}
155static inline u32 gr_class_error_r(void)
156{
157 return 0x00400110U;
158}
159static inline u32 gr_class_error_code_v(u32 r)
160{
161 return (r >> 0U) & 0xffffU;
162}
163static inline u32 gr_intr_nonstall_r(void)
164{
165 return 0x00400120U;
166}
167static inline u32 gr_intr_nonstall_trap_pending_f(void)
168{
169 return 0x2U;
170}
171static inline u32 gr_intr_en_r(void)
172{
173 return 0x0040013cU;
174}
175static inline u32 gr_exception_r(void)
176{
177 return 0x00400108U;
178}
179static inline u32 gr_exception_fe_m(void)
180{
181 return 0x1U << 0U;
182}
183static inline u32 gr_exception_gpc_m(void)
184{
185 return 0x1U << 24U;
186}
187static inline u32 gr_exception_memfmt_m(void)
188{
189 return 0x1U << 1U;
190}
191static inline u32 gr_exception_ds_m(void)
192{
193 return 0x1U << 4U;
194}
195static inline u32 gr_exception_sked_m(void)
196{
197 return 0x1U << 8U;
198}
199static inline u32 gr_exception_pd_m(void)
200{
201 return 0x1U << 2U;
202}
203static inline u32 gr_exception_scc_m(void)
204{
205 return 0x1U << 3U;
206}
207static inline u32 gr_exception_ssync_m(void)
208{
209 return 0x1U << 5U;
210}
211static inline u32 gr_exception_mme_m(void)
212{
213 return 0x1U << 7U;
214}
215static inline u32 gr_exception1_r(void)
216{
217 return 0x00400118U;
218}
219static inline u32 gr_exception1_gpc_0_pending_f(void)
220{
221 return 0x1U;
222}
223static inline u32 gr_exception2_r(void)
224{
225 return 0x0040011cU;
226}
227static inline u32 gr_exception_en_r(void)
228{
229 return 0x00400138U;
230}
231static inline u32 gr_exception_en_fe_m(void)
232{
233 return 0x1U << 0U;
234}
235static inline u32 gr_exception1_en_r(void)
236{
237 return 0x00400130U;
238}
239static inline u32 gr_exception2_en_r(void)
240{
241 return 0x00400134U;
242}
243static inline u32 gr_gpfifo_ctl_r(void)
244{
245 return 0x00400500U;
246}
247static inline u32 gr_gpfifo_ctl_access_f(u32 v)
248{
249 return (v & 0x1U) << 0U;
250}
251static inline u32 gr_gpfifo_ctl_access_disabled_f(void)
252{
253 return 0x0U;
254}
255static inline u32 gr_gpfifo_ctl_access_enabled_f(void)
256{
257 return 0x1U;
258}
259static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v)
260{
261 return (v & 0x1U) << 16U;
262}
263static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void)
264{
265 return 0x00000001U;
266}
267static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void)
268{
269 return 0x10000U;
270}
271static inline u32 gr_gpfifo_status_r(void)
272{
273 return 0x00400504U;
274}
275static inline u32 gr_trapped_addr_r(void)
276{
277 return 0x00400704U;
278}
279static inline u32 gr_trapped_addr_mthd_v(u32 r)
280{
281 return (r >> 2U) & 0xfffU;
282}
283static inline u32 gr_trapped_addr_subch_v(u32 r)
284{
285 return (r >> 16U) & 0x7U;
286}
287static inline u32 gr_trapped_addr_mme_generated_v(u32 r)
288{
289 return (r >> 20U) & 0x1U;
290}
291static inline u32 gr_trapped_addr_datahigh_v(u32 r)
292{
293 return (r >> 24U) & 0x1U;
294}
295static inline u32 gr_trapped_addr_priv_v(u32 r)
296{
297 return (r >> 28U) & 0x1U;
298}
299static inline u32 gr_trapped_addr_status_v(u32 r)
300{
301 return (r >> 31U) & 0x1U;
302}
303static inline u32 gr_trapped_data_lo_r(void)
304{
305 return 0x00400708U;
306}
307static inline u32 gr_trapped_data_hi_r(void)
308{
309 return 0x0040070cU;
310}
311static inline u32 gr_trapped_data_mme_r(void)
312{
313 return 0x00400710U;
314}
315static inline u32 gr_trapped_data_mme_pc_v(u32 r)
316{
317 return (r >> 0U) & 0x7ffU;
318}
319static inline u32 gr_status_r(void)
320{
321 return 0x00400700U;
322}
323static inline u32 gr_status_fe_method_upper_v(u32 r)
324{
325 return (r >> 1U) & 0x1U;
326}
327static inline u32 gr_status_fe_method_lower_v(u32 r)
328{
329 return (r >> 2U) & 0x1U;
330}
331static inline u32 gr_status_fe_method_lower_idle_v(void)
332{
333 return 0x00000000U;
334}
335static inline u32 gr_status_fe_gi_v(u32 r)
336{
337 return (r >> 21U) & 0x1U;
338}
339static inline u32 gr_status_mask_r(void)
340{
341 return 0x00400610U;
342}
343static inline u32 gr_status_1_r(void)
344{
345 return 0x00400604U;
346}
347static inline u32 gr_status_2_r(void)
348{
349 return 0x00400608U;
350}
351static inline u32 gr_engine_status_r(void)
352{
353 return 0x0040060cU;
354}
355static inline u32 gr_engine_status_value_busy_f(void)
356{
357 return 0x1U;
358}
359static inline u32 gr_pri_be0_becs_be_exception_r(void)
360{
361 return 0x00410204U;
362}
363static inline u32 gr_pri_be0_becs_be_exception_en_r(void)
364{
365 return 0x00410208U;
366}
367static inline u32 gr_pri_gpc0_gpccs_gpc_exception_r(void)
368{
369 return 0x00502c90U;
370}
371static inline u32 gr_pri_gpc0_gpccs_gpc_exception_en_r(void)
372{
373 return 0x00502c94U;
374}
375static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_exception_r(void)
376{
377 return 0x00504508U;
378}
379static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
380{
381 return 0x0050450cU;
382}
383static inline u32 gr_activity_0_r(void)
384{
385 return 0x00400380U;
386}
387static inline u32 gr_activity_1_r(void)
388{
389 return 0x00400384U;
390}
391static inline u32 gr_activity_2_r(void)
392{
393 return 0x00400388U;
394}
395static inline u32 gr_activity_4_r(void)
396{
397 return 0x00400390U;
398}
399static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
400{
401 return 0x00501000U;
402}
403static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
404{
405 return 0x00419000U;
406}
407static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
408{
409 return 0x1U << 1U;
410}
411static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
412{
413 return 0x005046a4U;
414}
415static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
416{
417 return 0x00419ea4U;
418}
419static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
420{
421 return 0x1U << 0U;
422}
423static inline u32 gr_pri_sked_activity_r(void)
424{
425 return 0x00407054U;
426}
427static inline u32 gr_pri_gpc0_gpccs_gpc_activity0_r(void)
428{
429 return 0x00502c80U;
430}
431static inline u32 gr_pri_gpc0_gpccs_gpc_activity1_r(void)
432{
433 return 0x00502c84U;
434}
435static inline u32 gr_pri_gpc0_gpccs_gpc_activity2_r(void)
436{
437 return 0x00502c88U;
438}
439static inline u32 gr_pri_gpc0_gpccs_gpc_activity3_r(void)
440{
441 return 0x00502c8cU;
442}
443static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r(void)
444{
445 return 0x00504500U;
446}
447static inline u32 gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r(void)
448{
449 return 0x00501d00U;
450}
451static inline u32 gr_pri_gpcs_gpccs_gpc_activity_0_r(void)
452{
453 return 0x0041ac80U;
454}
455static inline u32 gr_pri_gpcs_gpccs_gpc_activity_1_r(void)
456{
457 return 0x0041ac84U;
458}
459static inline u32 gr_pri_gpcs_gpccs_gpc_activity_2_r(void)
460{
461 return 0x0041ac88U;
462}
463static inline u32 gr_pri_gpcs_gpccs_gpc_activity_3_r(void)
464{
465 return 0x0041ac8cU;
466}
467static inline u32 gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r(void)
468{
469 return 0x0041c500U;
470}
471static inline u32 gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r(void)
472{
473 return 0x00419d00U;
474}
475static inline u32 gr_pri_be0_becs_be_activity0_r(void)
476{
477 return 0x00410200U;
478}
479static inline u32 gr_pri_bes_becs_be_activity0_r(void)
480{
481 return 0x00408a00U;
482}
483static inline u32 gr_pri_ds_mpipe_status_r(void)
484{
485 return 0x00405858U;
486}
487static inline u32 gr_pri_fe_go_idle_on_status_r(void)
488{
489 return 0x00404150U;
490}
491static inline u32 gr_pri_fe_go_idle_check_r(void)
492{
493 return 0x00404158U;
494}
495static inline u32 gr_pri_fe_go_idle_info_r(void)
496{
497 return 0x00404194U;
498}
499static inline u32 gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r(void)
500{
501 return 0x00504238U;
502}
503static inline u32 gr_pri_be0_crop_status1_r(void)
504{
505 return 0x00410134U;
506}
507static inline u32 gr_pri_bes_crop_status1_r(void)
508{
509 return 0x00408934U;
510}
511static inline u32 gr_pri_be0_zrop_status_r(void)
512{
513 return 0x00410048U;
514}
515static inline u32 gr_pri_be0_zrop_status2_r(void)
516{
517 return 0x0041004cU;
518}
519static inline u32 gr_pri_bes_zrop_status_r(void)
520{
521 return 0x00408848U;
522}
523static inline u32 gr_pri_bes_zrop_status2_r(void)
524{
525 return 0x0040884cU;
526}
527static inline u32 gr_pipe_bundle_address_r(void)
528{
529 return 0x00400200U;
530}
531static inline u32 gr_pipe_bundle_address_value_v(u32 r)
532{
533 return (r >> 0U) & 0xffffU;
534}
535static inline u32 gr_pipe_bundle_data_r(void)
536{
537 return 0x00400204U;
538}
539static inline u32 gr_pipe_bundle_config_r(void)
540{
541 return 0x00400208U;
542}
543static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void)
544{
545 return 0x0U;
546}
547static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void)
548{
549 return 0x80000000U;
550}
551static inline u32 gr_fe_hww_esr_r(void)
552{
553 return 0x00404000U;
554}
555static inline u32 gr_fe_hww_esr_reset_active_f(void)
556{
557 return 0x40000000U;
558}
559static inline u32 gr_fe_hww_esr_en_enable_f(void)
560{
561 return 0x80000000U;
562}
563static inline u32 gr_fe_hww_esr_info_r(void)
564{
565 return 0x004041b0U;
566}
567static inline u32 gr_fe_go_idle_timeout_r(void)
568{
569 return 0x00404154U;
570}
571static inline u32 gr_fe_go_idle_timeout_count_f(u32 v)
572{
573 return (v & 0xffffffffU) << 0U;
574}
575static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void)
576{
577 return 0x0U;
578}
579static inline u32 gr_fe_go_idle_timeout_count_prod_f(void)
580{
581 return 0x800U;
582}
583static inline u32 gr_fe_object_table_r(u32 i)
584{
585 return 0x00404200U + i*4U;
586}
587static inline u32 gr_fe_object_table_nvclass_v(u32 r)
588{
589 return (r >> 0U) & 0xffffU;
590}
591static inline u32 gr_pri_mme_shadow_raw_index_r(void)
592{
593 return 0x00404488U;
594}
595static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void)
596{
597 return 0x80000000U;
598}
599static inline u32 gr_pri_mme_shadow_raw_data_r(void)
600{
601 return 0x0040448cU;
602}
603static inline u32 gr_mme_hww_esr_r(void)
604{
605 return 0x00404490U;
606}
607static inline u32 gr_mme_hww_esr_reset_active_f(void)
608{
609 return 0x40000000U;
610}
611static inline u32 gr_mme_hww_esr_en_enable_f(void)
612{
613 return 0x80000000U;
614}
615static inline u32 gr_mme_hww_esr_info_r(void)
616{
617 return 0x00404494U;
618}
619static inline u32 gr_memfmt_hww_esr_r(void)
620{
621 return 0x00404600U;
622}
623static inline u32 gr_memfmt_hww_esr_reset_active_f(void)
624{
625 return 0x40000000U;
626}
627static inline u32 gr_memfmt_hww_esr_en_enable_f(void)
628{
629 return 0x80000000U;
630}
631static inline u32 gr_fecs_cpuctl_r(void)
632{
633 return 0x00409100U;
634}
635static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
636{
637 return (v & 0x1U) << 1U;
638}
639static inline u32 gr_fecs_dmactl_r(void)
640{
641 return 0x0040910cU;
642}
643static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v)
644{
645 return (v & 0x1U) << 0U;
646}
647static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void)
648{
649 return 0x1U << 1U;
650}
651static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void)
652{
653 return 0x1U << 2U;
654}
655static inline u32 gr_fecs_os_r(void)
656{
657 return 0x00409080U;
658}
659static inline u32 gr_fecs_idlestate_r(void)
660{
661 return 0x0040904cU;
662}
663static inline u32 gr_fecs_mailbox0_r(void)
664{
665 return 0x00409040U;
666}
667static inline u32 gr_fecs_mailbox1_r(void)
668{
669 return 0x00409044U;
670}
671static inline u32 gr_fecs_irqstat_r(void)
672{
673 return 0x00409008U;
674}
675static inline u32 gr_fecs_irqmode_r(void)
676{
677 return 0x0040900cU;
678}
679static inline u32 gr_fecs_irqmask_r(void)
680{
681 return 0x00409018U;
682}
683static inline u32 gr_fecs_irqdest_r(void)
684{
685 return 0x0040901cU;
686}
687static inline u32 gr_fecs_curctx_r(void)
688{
689 return 0x00409050U;
690}
691static inline u32 gr_fecs_nxtctx_r(void)
692{
693 return 0x00409054U;
694}
695static inline u32 gr_fecs_engctl_r(void)
696{
697 return 0x004090a4U;
698}
699static inline u32 gr_fecs_debug1_r(void)
700{
701 return 0x00409090U;
702}
703static inline u32 gr_fecs_debuginfo_r(void)
704{
705 return 0x00409094U;
706}
707static inline u32 gr_fecs_icd_cmd_r(void)
708{
709 return 0x00409200U;
710}
711static inline u32 gr_fecs_icd_cmd_opc_s(void)
712{
713 return 4U;
714}
715static inline u32 gr_fecs_icd_cmd_opc_f(u32 v)
716{
717 return (v & 0xfU) << 0U;
718}
719static inline u32 gr_fecs_icd_cmd_opc_m(void)
720{
721 return 0xfU << 0U;
722}
723static inline u32 gr_fecs_icd_cmd_opc_v(u32 r)
724{
725 return (r >> 0U) & 0xfU;
726}
727static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void)
728{
729 return 0x8U;
730}
731static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void)
732{
733 return 0xeU;
734}
735static inline u32 gr_fecs_icd_cmd_idx_f(u32 v)
736{
737 return (v & 0x1fU) << 8U;
738}
739static inline u32 gr_fecs_icd_rdata_r(void)
740{
741 return 0x0040920cU;
742}
743static inline u32 gr_fecs_imemc_r(u32 i)
744{
745 return 0x00409180U + i*16U;
746}
747static inline u32 gr_fecs_imemc_offs_f(u32 v)
748{
749 return (v & 0x3fU) << 2U;
750}
751static inline u32 gr_fecs_imemc_blk_f(u32 v)
752{
753 return (v & 0xffU) << 8U;
754}
755static inline u32 gr_fecs_imemc_aincw_f(u32 v)
756{
757 return (v & 0x1U) << 24U;
758}
759static inline u32 gr_fecs_imemd_r(u32 i)
760{
761 return 0x00409184U + i*16U;
762}
763static inline u32 gr_fecs_imemt_r(u32 i)
764{
765 return 0x00409188U + i*16U;
766}
767static inline u32 gr_fecs_imemt_tag_f(u32 v)
768{
769 return (v & 0xffffU) << 0U;
770}
771static inline u32 gr_fecs_dmemc_r(u32 i)
772{
773 return 0x004091c0U + i*8U;
774}
775static inline u32 gr_fecs_dmemc_offs_s(void)
776{
777 return 6U;
778}
779static inline u32 gr_fecs_dmemc_offs_f(u32 v)
780{
781 return (v & 0x3fU) << 2U;
782}
783static inline u32 gr_fecs_dmemc_offs_m(void)
784{
785 return 0x3fU << 2U;
786}
787static inline u32 gr_fecs_dmemc_offs_v(u32 r)
788{
789 return (r >> 2U) & 0x3fU;
790}
791static inline u32 gr_fecs_dmemc_blk_f(u32 v)
792{
793 return (v & 0xffU) << 8U;
794}
795static inline u32 gr_fecs_dmemc_aincw_f(u32 v)
796{
797 return (v & 0x1U) << 24U;
798}
799static inline u32 gr_fecs_dmemd_r(u32 i)
800{
801 return 0x004091c4U + i*8U;
802}
803static inline u32 gr_fecs_dmatrfbase_r(void)
804{
805 return 0x00409110U;
806}
807static inline u32 gr_fecs_dmatrfmoffs_r(void)
808{
809 return 0x00409114U;
810}
811static inline u32 gr_fecs_dmatrffboffs_r(void)
812{
813 return 0x0040911cU;
814}
815static inline u32 gr_fecs_dmatrfcmd_r(void)
816{
817 return 0x00409118U;
818}
819static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v)
820{
821 return (v & 0x1U) << 4U;
822}
823static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v)
824{
825 return (v & 0x1U) << 5U;
826}
827static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v)
828{
829 return (v & 0x7U) << 8U;
830}
831static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v)
832{
833 return (v & 0x7U) << 12U;
834}
835static inline u32 gr_fecs_bootvec_r(void)
836{
837 return 0x00409104U;
838}
839static inline u32 gr_fecs_bootvec_vec_f(u32 v)
840{
841 return (v & 0xffffffffU) << 0U;
842}
843static inline u32 gr_fecs_falcon_hwcfg_r(void)
844{
845 return 0x00409108U;
846}
847static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void)
848{
849 return 0x0041a108U;
850}
851static inline u32 gr_fecs_falcon_rm_r(void)
852{
853 return 0x00409084U;
854}
855static inline u32 gr_fecs_current_ctx_r(void)
856{
857 return 0x00409b00U;
858}
859static inline u32 gr_fecs_current_ctx_ptr_f(u32 v)
860{
861 return (v & 0xfffffffU) << 0U;
862}
863static inline u32 gr_fecs_current_ctx_ptr_v(u32 r)
864{
865 return (r >> 0U) & 0xfffffffU;
866}
867static inline u32 gr_fecs_current_ctx_target_s(void)
868{
869 return 2U;
870}
871static inline u32 gr_fecs_current_ctx_target_f(u32 v)
872{
873 return (v & 0x3U) << 28U;
874}
875static inline u32 gr_fecs_current_ctx_target_m(void)
876{
877 return 0x3U << 28U;
878}
879static inline u32 gr_fecs_current_ctx_target_v(u32 r)
880{
881 return (r >> 28U) & 0x3U;
882}
883static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void)
884{
885 return 0x0U;
886}
887static inline u32 gr_fecs_current_ctx_target_sys_mem_coh_f(void)
888{
889 return 0x20000000U;
890}
891static inline u32 gr_fecs_current_ctx_target_sys_mem_ncoh_f(void)
892{
893 return 0x30000000U;
894}
895static inline u32 gr_fecs_current_ctx_valid_s(void)
896{
897 return 1U;
898}
899static inline u32 gr_fecs_current_ctx_valid_f(u32 v)
900{
901 return (v & 0x1U) << 31U;
902}
903static inline u32 gr_fecs_current_ctx_valid_m(void)
904{
905 return 0x1U << 31U;
906}
907static inline u32 gr_fecs_current_ctx_valid_v(u32 r)
908{
909 return (r >> 31U) & 0x1U;
910}
911static inline u32 gr_fecs_current_ctx_valid_false_f(void)
912{
913 return 0x0U;
914}
915static inline u32 gr_fecs_method_data_r(void)
916{
917 return 0x00409500U;
918}
919static inline u32 gr_fecs_method_push_r(void)
920{
921 return 0x00409504U;
922}
923static inline u32 gr_fecs_method_push_adr_f(u32 v)
924{
925 return (v & 0xfffU) << 0U;
926}
927static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void)
928{
929 return 0x00000003U;
930}
931static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void)
932{
933 return 0x3U;
934}
935static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void)
936{
937 return 0x00000010U;
938}
939static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void)
940{
941 return 0x00000009U;
942}
943static inline u32 gr_fecs_method_push_adr_restore_golden_v(void)
944{
945 return 0x00000015U;
946}
947static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void)
948{
949 return 0x00000016U;
950}
951static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void)
952{
953 return 0x00000025U;
954}
955static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void)
956{
957 return 0x00000030U;
958}
959static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void)
960{
961 return 0x00000031U;
962}
963static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void)
964{
965 return 0x00000032U;
966}
967static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void)
968{
969 return 0x00000038U;
970}
971static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void)
972{
973 return 0x00000039U;
974}
975static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
976{
977 return 0x21U;
978}
979static inline u32 gr_fecs_method_push_adr_halt_pipeline_v(void)
980{
981 return 0x00000004U;
982}
983static inline u32 gr_fecs_host_int_status_r(void)
984{
985 return 0x00409c18U;
986}
987static inline u32 gr_fecs_host_int_status_fault_during_ctxsw_f(u32 v)
988{
989 return (v & 0x1U) << 16U;
990}
991static inline u32 gr_fecs_host_int_status_umimp_firmware_method_f(u32 v)
992{
993 return (v & 0x1U) << 17U;
994}
995static inline u32 gr_fecs_host_int_status_umimp_illegal_method_f(u32 v)
996{
997 return (v & 0x1U) << 18U;
998}
999static inline u32 gr_fecs_host_int_status_watchdog_active_f(void)
1000{
1001 return 0x80000U;
1002}
1003static inline u32 gr_fecs_host_int_status_ctxsw_intr_f(u32 v)
1004{
1005 return (v & 0xffffU) << 0U;
1006}
1007static inline u32 gr_fecs_host_int_clear_r(void)
1008{
1009 return 0x00409c20U;
1010}
1011static inline u32 gr_fecs_host_int_clear_ctxsw_intr1_f(u32 v)
1012{
1013 return (v & 0x1U) << 1U;
1014}
1015static inline u32 gr_fecs_host_int_clear_ctxsw_intr1_clear_f(void)
1016{
1017 return 0x2U;
1018}
1019static inline u32 gr_fecs_host_int_enable_r(void)
1020{
1021 return 0x00409c24U;
1022}
1023static inline u32 gr_fecs_host_int_enable_ctxsw_intr1_enable_f(void)
1024{
1025 return 0x2U;
1026}
1027static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void)
1028{
1029 return 0x10000U;
1030}
1031static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void)
1032{
1033 return 0x20000U;
1034}
1035static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void)
1036{
1037 return 0x40000U;
1038}
1039static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void)
1040{
1041 return 0x80000U;
1042}
1043static inline u32 gr_fecs_ctxsw_reset_ctl_r(void)
1044{
1045 return 0x00409614U;
1046}
1047static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void)
1048{
1049 return 0x0U;
1050}
1051static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void)
1052{
1053 return 0x0U;
1054}
1055static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void)
1056{
1057 return 0x0U;
1058}
1059static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void)
1060{
1061 return 0x10U;
1062}
1063static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void)
1064{
1065 return 0x20U;
1066}
1067static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void)
1068{
1069 return 0x40U;
1070}
1071static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void)
1072{
1073 return 0x0U;
1074}
1075static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void)
1076{
1077 return 0x100U;
1078}
1079static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void)
1080{
1081 return 0x0U;
1082}
1083static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void)
1084{
1085 return 0x200U;
1086}
1087static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void)
1088{
1089 return 1U;
1090}
1091static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v)
1092{
1093 return (v & 0x1U) << 10U;
1094}
1095static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void)
1096{
1097 return 0x1U << 10U;
1098}
1099static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r)
1100{
1101 return (r >> 10U) & 0x1U;
1102}
1103static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void)
1104{
1105 return 0x0U;
1106}
1107static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void)
1108{
1109 return 0x400U;
1110}
1111static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void)
1112{
1113 return 0x0040960cU;
1114}
1115static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i)
1116{
1117 return 0x00409800U + i*4U;
1118}
1119static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void)
1120{
1121 return 0x00000008U;
1122}
1123static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v)
1124{
1125 return (v & 0xffffffffU) << 0U;
1126}
1127static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void)
1128{
1129 return 0x00000001U;
1130}
1131static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void)
1132{
1133 return 0x00000002U;
1134}
1135static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i)
1136{
1137 return 0x00409820U + i*4U;
1138}
1139static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v)
1140{
1141 return (v & 0xffffffffU) << 0U;
1142}
1143static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i)
1144{
1145 return 0x00409840U + i*4U;
1146}
1147static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v)
1148{
1149 return (v & 0xffffffffU) << 0U;
1150}
1151static inline u32 gr_fecs_fs_r(void)
1152{
1153 return 0x00409604U;
1154}
1155static inline u32 gr_fecs_fs_num_available_gpcs_s(void)
1156{
1157 return 5U;
1158}
1159static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v)
1160{
1161 return (v & 0x1fU) << 0U;
1162}
1163static inline u32 gr_fecs_fs_num_available_gpcs_m(void)
1164{
1165 return 0x1fU << 0U;
1166}
1167static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r)
1168{
1169 return (r >> 0U) & 0x1fU;
1170}
1171static inline u32 gr_fecs_fs_num_available_fbps_s(void)
1172{
1173 return 5U;
1174}
1175static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v)
1176{
1177 return (v & 0x1fU) << 16U;
1178}
1179static inline u32 gr_fecs_fs_num_available_fbps_m(void)
1180{
1181 return 0x1fU << 16U;
1182}
1183static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r)
1184{
1185 return (r >> 16U) & 0x1fU;
1186}
1187static inline u32 gr_fecs_cfg_r(void)
1188{
1189 return 0x00409620U;
1190}
1191static inline u32 gr_fecs_cfg_imem_sz_v(u32 r)
1192{
1193 return (r >> 0U) & 0xffU;
1194}
1195static inline u32 gr_fecs_rc_lanes_r(void)
1196{
1197 return 0x00409880U;
1198}
1199static inline u32 gr_fecs_rc_lanes_num_chains_s(void)
1200{
1201 return 6U;
1202}
1203static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v)
1204{
1205 return (v & 0x3fU) << 0U;
1206}
1207static inline u32 gr_fecs_rc_lanes_num_chains_m(void)
1208{
1209 return 0x3fU << 0U;
1210}
1211static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r)
1212{
1213 return (r >> 0U) & 0x3fU;
1214}
1215static inline u32 gr_fecs_ctxsw_status_1_r(void)
1216{
1217 return 0x00409400U;
1218}
1219static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void)
1220{
1221 return 1U;
1222}
1223static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v)
1224{
1225 return (v & 0x1U) << 12U;
1226}
1227static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void)
1228{
1229 return 0x1U << 12U;
1230}
1231static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r)
1232{
1233 return (r >> 12U) & 0x1U;
1234}
1235static inline u32 gr_fecs_arb_ctx_adr_r(void)
1236{
1237 return 0x00409a24U;
1238}
1239static inline u32 gr_fecs_new_ctx_r(void)
1240{
1241 return 0x00409b04U;
1242}
1243static inline u32 gr_fecs_new_ctx_ptr_s(void)
1244{
1245 return 28U;
1246}
1247static inline u32 gr_fecs_new_ctx_ptr_f(u32 v)
1248{
1249 return (v & 0xfffffffU) << 0U;
1250}
1251static inline u32 gr_fecs_new_ctx_ptr_m(void)
1252{
1253 return 0xfffffffU << 0U;
1254}
1255static inline u32 gr_fecs_new_ctx_ptr_v(u32 r)
1256{
1257 return (r >> 0U) & 0xfffffffU;
1258}
1259static inline u32 gr_fecs_new_ctx_target_s(void)
1260{
1261 return 2U;
1262}
1263static inline u32 gr_fecs_new_ctx_target_f(u32 v)
1264{
1265 return (v & 0x3U) << 28U;
1266}
1267static inline u32 gr_fecs_new_ctx_target_m(void)
1268{
1269 return 0x3U << 28U;
1270}
1271static inline u32 gr_fecs_new_ctx_target_v(u32 r)
1272{
1273 return (r >> 28U) & 0x3U;
1274}
1275static inline u32 gr_fecs_new_ctx_target_vid_mem_f(void)
1276{
1277 return 0x0U;
1278}
1279static inline u32 gr_fecs_new_ctx_target_sys_mem_ncoh_f(void)
1280{
1281 return 0x30000000U;
1282}
1283static inline u32 gr_fecs_new_ctx_target_sys_mem_coh_f(void)
1284{
1285 return 0x20000000U;
1286}
1287static inline u32 gr_fecs_new_ctx_valid_s(void)
1288{
1289 return 1U;
1290}
1291static inline u32 gr_fecs_new_ctx_valid_f(u32 v)
1292{
1293 return (v & 0x1U) << 31U;
1294}
1295static inline u32 gr_fecs_new_ctx_valid_m(void)
1296{
1297 return 0x1U << 31U;
1298}
1299static inline u32 gr_fecs_new_ctx_valid_v(u32 r)
1300{
1301 return (r >> 31U) & 0x1U;
1302}
1303static inline u32 gr_fecs_arb_ctx_ptr_r(void)
1304{
1305 return 0x00409a0cU;
1306}
1307static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void)
1308{
1309 return 28U;
1310}
1311static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v)
1312{
1313 return (v & 0xfffffffU) << 0U;
1314}
1315static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void)
1316{
1317 return 0xfffffffU << 0U;
1318}
1319static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r)
1320{
1321 return (r >> 0U) & 0xfffffffU;
1322}
1323static inline u32 gr_fecs_arb_ctx_ptr_target_s(void)
1324{
1325 return 2U;
1326}
1327static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v)
1328{
1329 return (v & 0x3U) << 28U;
1330}
1331static inline u32 gr_fecs_arb_ctx_ptr_target_m(void)
1332{
1333 return 0x3U << 28U;
1334}
1335static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
1336{
1337 return (r >> 28U) & 0x3U;
1338}
1339static inline u32 gr_fecs_arb_ctx_ptr_target_vid_mem_f(void)
1340{
1341 return 0x0U;
1342}
1343static inline u32 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(void)
1344{
1345 return 0x30000000U;
1346}
1347static inline u32 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(void)
1348{
1349 return 0x20000000U;
1350}
1351static inline u32 gr_fecs_arb_ctx_cmd_r(void)
1352{
1353 return 0x00409a10U;
1354}
1355static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void)
1356{
1357 return 5U;
1358}
1359static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v)
1360{
1361 return (v & 0x1fU) << 0U;
1362}
1363static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void)
1364{
1365 return 0x1fU << 0U;
1366}
1367static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r)
1368{
1369 return (r >> 0U) & 0x1fU;
1370}
1371static inline u32 gr_fecs_ctxsw_status_fe_0_r(void)
1372{
1373 return 0x00409c00U;
1374}
1375static inline u32 gr_gpc0_gpccs_ctxsw_status_gpc_0_r(void)
1376{
1377 return 0x00502c04U;
1378}
1379static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
1380{
1381 return 0x00502400U;
1382}
1383static inline u32 gr_fecs_ctxsw_idlestate_r(void)
1384{
1385 return 0x00409420U;
1386}
1387static inline u32 gr_gpc0_gpccs_ctxsw_idlestate_r(void)
1388{
1389 return 0x00502420U;
1390}
1391static inline u32 gr_rstr2d_gpc_map0_r(void)
1392{
1393 return 0x0040780cU;
1394}
1395static inline u32 gr_rstr2d_gpc_map1_r(void)
1396{
1397 return 0x00407810U;
1398}
1399static inline u32 gr_rstr2d_gpc_map2_r(void)
1400{
1401 return 0x00407814U;
1402}
1403static inline u32 gr_rstr2d_gpc_map3_r(void)
1404{
1405 return 0x00407818U;
1406}
1407static inline u32 gr_rstr2d_gpc_map4_r(void)
1408{
1409 return 0x0040781cU;
1410}
1411static inline u32 gr_rstr2d_gpc_map5_r(void)
1412{
1413 return 0x00407820U;
1414}
1415static inline u32 gr_rstr2d_map_table_cfg_r(void)
1416{
1417 return 0x004078bcU;
1418}
1419static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v)
1420{
1421 return (v & 0xffU) << 0U;
1422}
1423static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v)
1424{
1425 return (v & 0xffU) << 8U;
1426}
1427static inline u32 gr_pd_hww_esr_r(void)
1428{
1429 return 0x00406018U;
1430}
1431static inline u32 gr_pd_hww_esr_reset_active_f(void)
1432{
1433 return 0x40000000U;
1434}
1435static inline u32 gr_pd_hww_esr_en_enable_f(void)
1436{
1437 return 0x80000000U;
1438}
1439static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i)
1440{
1441 return 0x00406028U + i*4U;
1442}
1443static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void)
1444{
1445 return 0x00000004U;
1446}
1447static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v)
1448{
1449 return (v & 0xfU) << 0U;
1450}
1451static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v)
1452{
1453 return (v & 0xfU) << 4U;
1454}
1455static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v)
1456{
1457 return (v & 0xfU) << 8U;
1458}
1459static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v)
1460{
1461 return (v & 0xfU) << 12U;
1462}
1463static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v)
1464{
1465 return (v & 0xfU) << 16U;
1466}
1467static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v)
1468{
1469 return (v & 0xfU) << 20U;
1470}
1471static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v)
1472{
1473 return (v & 0xfU) << 24U;
1474}
1475static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v)
1476{
1477 return (v & 0xfU) << 28U;
1478}
1479static inline u32 gr_pd_ab_dist_cfg0_r(void)
1480{
1481 return 0x004064c0U;
1482}
1483static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void)
1484{
1485 return 0x80000000U;
1486}
1487static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void)
1488{
1489 return 0x0U;
1490}
1491static inline u32 gr_pd_ab_dist_cfg1_r(void)
1492{
1493 return 0x004064c4U;
1494}
1495static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void)
1496{
1497 return 0xffffU;
1498}
1499static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v)
1500{
1501 return (v & 0x7ffU) << 16U;
1502}
1503static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void)
1504{
1505 return 0x00000080U;
1506}
1507static inline u32 gr_pd_ab_dist_cfg2_r(void)
1508{
1509 return 0x004064c8U;
1510}
1511static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v)
1512{
1513 return (v & 0xfffU) << 0U;
1514}
1515static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void)
1516{
1517 return 0x00000100U;
1518}
1519static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v)
1520{
1521 return (v & 0xfffU) << 16U;
1522}
1523static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void)
1524{
1525 return 0x00000020U;
1526}
1527static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void)
1528{
1529 return 0x00000062U;
1530}
1531static inline u32 gr_pd_pagepool_r(void)
1532{
1533 return 0x004064ccU;
1534}
1535static inline u32 gr_pd_pagepool_total_pages_f(u32 v)
1536{
1537 return (v & 0xffU) << 0U;
1538}
1539static inline u32 gr_pd_pagepool_valid_true_f(void)
1540{
1541 return 0x80000000U;
1542}
1543static inline u32 gr_pd_dist_skip_table_r(u32 i)
1544{
1545 return 0x004064d0U + i*4U;
1546}
1547static inline u32 gr_pd_dist_skip_table__size_1_v(void)
1548{
1549 return 0x00000008U;
1550}
1551static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v)
1552{
1553 return (v & 0xffU) << 0U;
1554}
1555static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v)
1556{
1557 return (v & 0xffU) << 8U;
1558}
1559static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v)
1560{
1561 return (v & 0xffU) << 16U;
1562}
1563static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v)
1564{
1565 return (v & 0xffU) << 24U;
1566}
1567static inline u32 gr_pd_alpha_ratio_table_r(u32 i)
1568{
1569 return 0x00406800U + i*4U;
1570}
1571static inline u32 gr_pd_alpha_ratio_table__size_1_v(void)
1572{
1573 return 0x00000100U;
1574}
1575static inline u32 gr_pd_alpha_ratio_table_gpc_4n0_mask_f(u32 v)
1576{
1577 return (v & 0xffU) << 0U;
1578}
1579static inline u32 gr_pd_alpha_ratio_table_gpc_4n1_mask_f(u32 v)
1580{
1581 return (v & 0xffU) << 8U;
1582}
1583static inline u32 gr_pd_alpha_ratio_table_gpc_4n2_mask_f(u32 v)
1584{
1585 return (v & 0xffU) << 16U;
1586}
1587static inline u32 gr_pd_alpha_ratio_table_gpc_4n3_mask_f(u32 v)
1588{
1589 return (v & 0xffU) << 24U;
1590}
1591static inline u32 gr_pd_beta_ratio_table_r(u32 i)
1592{
1593 return 0x00406c00U + i*4U;
1594}
1595static inline u32 gr_pd_beta_ratio_table__size_1_v(void)
1596{
1597 return 0x00000100U;
1598}
1599static inline u32 gr_pd_beta_ratio_table_gpc_4n0_mask_f(u32 v)
1600{
1601 return (v & 0xffU) << 0U;
1602}
1603static inline u32 gr_pd_beta_ratio_table_gpc_4n1_mask_f(u32 v)
1604{
1605 return (v & 0xffU) << 8U;
1606}
1607static inline u32 gr_pd_beta_ratio_table_gpc_4n2_mask_f(u32 v)
1608{
1609 return (v & 0xffU) << 16U;
1610}
1611static inline u32 gr_pd_beta_ratio_table_gpc_4n3_mask_f(u32 v)
1612{
1613 return (v & 0xffU) << 24U;
1614}
1615static inline u32 gr_ds_debug_r(void)
1616{
1617 return 0x00405800U;
1618}
1619static inline u32 gr_ds_debug_timeslice_mode_disable_f(void)
1620{
1621 return 0x0U;
1622}
1623static inline u32 gr_ds_debug_timeslice_mode_enable_f(void)
1624{
1625 return 0x8000000U;
1626}
1627static inline u32 gr_ds_zbc_color_r_r(void)
1628{
1629 return 0x00405804U;
1630}
1631static inline u32 gr_ds_zbc_color_r_val_f(u32 v)
1632{
1633 return (v & 0xffffffffU) << 0U;
1634}
1635static inline u32 gr_ds_zbc_color_g_r(void)
1636{
1637 return 0x00405808U;
1638}
1639static inline u32 gr_ds_zbc_color_g_val_f(u32 v)
1640{
1641 return (v & 0xffffffffU) << 0U;
1642}
1643static inline u32 gr_ds_zbc_color_b_r(void)
1644{
1645 return 0x0040580cU;
1646}
1647static inline u32 gr_ds_zbc_color_b_val_f(u32 v)
1648{
1649 return (v & 0xffffffffU) << 0U;
1650}
1651static inline u32 gr_ds_zbc_color_a_r(void)
1652{
1653 return 0x00405810U;
1654}
1655static inline u32 gr_ds_zbc_color_a_val_f(u32 v)
1656{
1657 return (v & 0xffffffffU) << 0U;
1658}
1659static inline u32 gr_ds_zbc_color_fmt_r(void)
1660{
1661 return 0x00405814U;
1662}
1663static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v)
1664{
1665 return (v & 0x7fU) << 0U;
1666}
1667static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void)
1668{
1669 return 0x0U;
1670}
1671static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void)
1672{
1673 return 0x00000001U;
1674}
1675static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void)
1676{
1677 return 0x00000002U;
1678}
1679static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void)
1680{
1681 return 0x00000004U;
1682}
1683static inline u32 gr_ds_zbc_color_fmt_val_a8_b8_g8_r8_v(void)
1684{
1685 return 0x00000028U;
1686}
1687static inline u32 gr_ds_zbc_z_r(void)
1688{
1689 return 0x00405818U;
1690}
1691static inline u32 gr_ds_zbc_z_val_s(void)
1692{
1693 return 32U;
1694}
1695static inline u32 gr_ds_zbc_z_val_f(u32 v)
1696{
1697 return (v & 0xffffffffU) << 0U;
1698}
1699static inline u32 gr_ds_zbc_z_val_m(void)
1700{
1701 return 0xffffffffU << 0U;
1702}
1703static inline u32 gr_ds_zbc_z_val_v(u32 r)
1704{
1705 return (r >> 0U) & 0xffffffffU;
1706}
1707static inline u32 gr_ds_zbc_z_val__init_v(void)
1708{
1709 return 0x00000000U;
1710}
1711static inline u32 gr_ds_zbc_z_val__init_f(void)
1712{
1713 return 0x0U;
1714}
1715static inline u32 gr_ds_zbc_z_fmt_r(void)
1716{
1717 return 0x0040581cU;
1718}
1719static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v)
1720{
1721 return (v & 0x1U) << 0U;
1722}
1723static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void)
1724{
1725 return 0x0U;
1726}
1727static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void)
1728{
1729 return 0x00000001U;
1730}
1731static inline u32 gr_ds_zbc_tbl_index_r(void)
1732{
1733 return 0x00405820U;
1734}
1735static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v)
1736{
1737 return (v & 0xfU) << 0U;
1738}
1739static inline u32 gr_ds_zbc_tbl_ld_r(void)
1740{
1741 return 0x00405824U;
1742}
1743static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void)
1744{
1745 return 0x0U;
1746}
1747static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void)
1748{
1749 return 0x1U;
1750}
1751static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void)
1752{
1753 return 0x0U;
1754}
1755static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
1756{
1757 return 0x4U;
1758}
1759static inline u32 gr_ds_tga_constraintlogic_r(void)
1760{
1761 return 0x00405830U;
1762}
1763static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
1764{
1765 return (v & 0xfffU) << 16U;
1766}
1767static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v)
1768{
1769 return (v & 0xfffU) << 0U;
1770}
1771static inline u32 gr_ds_hww_esr_r(void)
1772{
1773 return 0x00405840U;
1774}
1775static inline u32 gr_ds_hww_esr_reset_s(void)
1776{
1777 return 1U;
1778}
1779static inline u32 gr_ds_hww_esr_reset_f(u32 v)
1780{
1781 return (v & 0x1U) << 30U;
1782}
1783static inline u32 gr_ds_hww_esr_reset_m(void)
1784{
1785 return 0x1U << 30U;
1786}
1787static inline u32 gr_ds_hww_esr_reset_v(u32 r)
1788{
1789 return (r >> 30U) & 0x1U;
1790}
1791static inline u32 gr_ds_hww_esr_reset_task_v(void)
1792{
1793 return 0x00000001U;
1794}
1795static inline u32 gr_ds_hww_esr_reset_task_f(void)
1796{
1797 return 0x40000000U;
1798}
1799static inline u32 gr_ds_hww_esr_en_enabled_f(void)
1800{
1801 return 0x80000000U;
1802}
1803static inline u32 gr_ds_hww_report_mask_r(void)
1804{
1805 return 0x00405844U;
1806}
1807static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void)
1808{
1809 return 0x1U;
1810}
1811static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void)
1812{
1813 return 0x2U;
1814}
1815static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void)
1816{
1817 return 0x4U;
1818}
1819static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void)
1820{
1821 return 0x8U;
1822}
1823static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void)
1824{
1825 return 0x10U;
1826}
1827static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void)
1828{
1829 return 0x20U;
1830}
1831static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void)
1832{
1833 return 0x40U;
1834}
1835static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void)
1836{
1837 return 0x80U;
1838}
1839static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void)
1840{
1841 return 0x100U;
1842}
1843static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void)
1844{
1845 return 0x200U;
1846}
1847static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void)
1848{
1849 return 0x400U;
1850}
1851static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void)
1852{
1853 return 0x800U;
1854}
1855static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void)
1856{
1857 return 0x1000U;
1858}
1859static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void)
1860{
1861 return 0x2000U;
1862}
1863static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void)
1864{
1865 return 0x4000U;
1866}
1867static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void)
1868{
1869 return 0x8000U;
1870}
1871static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void)
1872{
1873 return 0x10000U;
1874}
1875static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void)
1876{
1877 return 0x20000U;
1878}
1879static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void)
1880{
1881 return 0x40000U;
1882}
1883static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void)
1884{
1885 return 0x80000U;
1886}
1887static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void)
1888{
1889 return 0x100000U;
1890}
1891static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void)
1892{
1893 return 0x200000U;
1894}
1895static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void)
1896{
1897 return 0x400000U;
1898}
1899static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void)
1900{
1901 return 0x800000U;
1902}
1903static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i)
1904{
1905 return 0x00405870U + i*4U;
1906}
1907static inline u32 gr_scc_bundle_cb_base_r(void)
1908{
1909 return 0x00408004U;
1910}
1911static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v)
1912{
1913 return (v & 0xffffffffU) << 0U;
1914}
1915static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void)
1916{
1917 return 0x00000008U;
1918}
1919static inline u32 gr_scc_bundle_cb_size_r(void)
1920{
1921 return 0x00408008U;
1922}
1923static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v)
1924{
1925 return (v & 0x7ffU) << 0U;
1926}
1927static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void)
1928{
1929 return 0x00000018U;
1930}
1931static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void)
1932{
1933 return 0x00000100U;
1934}
1935static inline u32 gr_scc_bundle_cb_size_valid_false_v(void)
1936{
1937 return 0x00000000U;
1938}
1939static inline u32 gr_scc_bundle_cb_size_valid_false_f(void)
1940{
1941 return 0x0U;
1942}
1943static inline u32 gr_scc_bundle_cb_size_valid_true_f(void)
1944{
1945 return 0x80000000U;
1946}
1947static inline u32 gr_scc_pagepool_base_r(void)
1948{
1949 return 0x0040800cU;
1950}
1951static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v)
1952{
1953 return (v & 0xffffffffU) << 0U;
1954}
1955static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void)
1956{
1957 return 0x00000008U;
1958}
1959static inline u32 gr_scc_pagepool_r(void)
1960{
1961 return 0x00408010U;
1962}
1963static inline u32 gr_scc_pagepool_total_pages_f(u32 v)
1964{
1965 return (v & 0xffU) << 0U;
1966}
1967static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
1968{
1969 return 0x00000000U;
1970}
1971static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void)
1972{
1973 return 0x00000080U;
1974}
1975static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
1976{
1977 return 0x00000100U;
1978}
1979static inline u32 gr_scc_pagepool_max_valid_pages_s(void)
1980{
1981 return 8U;
1982}
1983static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v)
1984{
1985 return (v & 0xffU) << 8U;
1986}
1987static inline u32 gr_scc_pagepool_max_valid_pages_m(void)
1988{
1989 return 0xffU << 8U;
1990}
1991static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r)
1992{
1993 return (r >> 8U) & 0xffU;
1994}
1995static inline u32 gr_scc_pagepool_valid_true_f(void)
1996{
1997 return 0x80000000U;
1998}
1999static inline u32 gr_scc_init_r(void)
2000{
2001 return 0x0040802cU;
2002}
2003static inline u32 gr_scc_init_ram_trigger_f(void)
2004{
2005 return 0x1U;
2006}
2007static inline u32 gr_scc_hww_esr_r(void)
2008{
2009 return 0x00408030U;
2010}
2011static inline u32 gr_scc_hww_esr_reset_active_f(void)
2012{
2013 return 0x40000000U;
2014}
2015static inline u32 gr_scc_hww_esr_en_enable_f(void)
2016{
2017 return 0x80000000U;
2018}
2019static inline u32 gr_sked_hww_esr_r(void)
2020{
2021 return 0x00407020U;
2022}
2023static inline u32 gr_sked_hww_esr_reset_active_f(void)
2024{
2025 return 0x40000000U;
2026}
2027static inline u32 gr_cwd_fs_r(void)
2028{
2029 return 0x00405b00U;
2030}
2031static inline u32 gr_cwd_fs_num_gpcs_f(u32 v)
2032{
2033 return (v & 0xffU) << 0U;
2034}
2035static inline u32 gr_cwd_fs_num_tpcs_f(u32 v)
2036{
2037 return (v & 0xffU) << 8U;
2038}
2039static inline u32 gr_gpc0_fs_gpc_r(void)
2040{
2041 return 0x00502608U;
2042}
2043static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r)
2044{
2045 return (r >> 0U) & 0x1fU;
2046}
2047static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r)
2048{
2049 return (r >> 16U) & 0x1fU;
2050}
2051static inline u32 gr_gpc0_cfg_r(void)
2052{
2053 return 0x00502620U;
2054}
2055static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r)
2056{
2057 return (r >> 0U) & 0xffU;
2058}
2059static inline u32 gr_gpccs_rc_lanes_r(void)
2060{
2061 return 0x00502880U;
2062}
2063static inline u32 gr_gpccs_rc_lanes_num_chains_s(void)
2064{
2065 return 6U;
2066}
2067static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v)
2068{
2069 return (v & 0x3fU) << 0U;
2070}
2071static inline u32 gr_gpccs_rc_lanes_num_chains_m(void)
2072{
2073 return 0x3fU << 0U;
2074}
2075static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r)
2076{
2077 return (r >> 0U) & 0x3fU;
2078}
2079static inline u32 gr_gpccs_rc_lane_size_r(u32 i)
2080{
2081 return 0x00502910U + i*0U;
2082}
2083static inline u32 gr_gpccs_rc_lane_size__size_1_v(void)
2084{
2085 return 0x00000010U;
2086}
2087static inline u32 gr_gpccs_rc_lane_size_v_s(void)
2088{
2089 return 24U;
2090}
2091static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v)
2092{
2093 return (v & 0xffffffU) << 0U;
2094}
2095static inline u32 gr_gpccs_rc_lane_size_v_m(void)
2096{
2097 return 0xffffffU << 0U;
2098}
2099static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r)
2100{
2101 return (r >> 0U) & 0xffffffU;
2102}
2103static inline u32 gr_gpccs_rc_lane_size_v_0_v(void)
2104{
2105 return 0x00000000U;
2106}
2107static inline u32 gr_gpccs_rc_lane_size_v_0_f(void)
2108{
2109 return 0x0U;
2110}
2111static inline u32 gr_gpc0_zcull_fs_r(void)
2112{
2113 return 0x00500910U;
2114}
2115static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v)
2116{
2117 return (v & 0x1ffU) << 0U;
2118}
2119static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v)
2120{
2121 return (v & 0xfU) << 16U;
2122}
2123static inline u32 gr_gpc0_zcull_ram_addr_r(void)
2124{
2125 return 0x00500914U;
2126}
2127static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v)
2128{
2129 return (v & 0xfU) << 0U;
2130}
2131static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v)
2132{
2133 return (v & 0xfU) << 8U;
2134}
2135static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void)
2136{
2137 return 0x00500918U;
2138}
2139static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v)
2140{
2141 return (v & 0xffffffU) << 0U;
2142}
2143static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void)
2144{
2145 return 0x00800000U;
2146}
2147static inline u32 gr_gpc0_zcull_total_ram_size_r(void)
2148{
2149 return 0x00500920U;
2150}
2151static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v)
2152{
2153 return (v & 0xffffU) << 0U;
2154}
2155static inline u32 gr_gpc0_zcull_zcsize_r(u32 i)
2156{
2157 return 0x00500a04U + i*32U;
2158}
2159static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void)
2160{
2161 return 0x00000040U;
2162}
2163static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void)
2164{
2165 return 0x00000010U;
2166}
2167static inline u32 gr_gpc0_gpm_pd_active_tpcs_r(void)
2168{
2169 return 0x00500c08U;
2170}
2171static inline u32 gr_gpc0_gpm_pd_active_tpcs_num_f(u32 v)
2172{
2173 return (v & 0x7U) << 0U;
2174}
2175static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i)
2176{
2177 return 0x00500c10U + i*4U;
2178}
2179static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v)
2180{
2181 return (v & 0xffU) << 0U;
2182}
2183static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i)
2184{
2185 return 0x00500c30U + i*4U;
2186}
2187static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r)
2188{
2189 return (r >> 0U) & 0xffU;
2190}
2191static inline u32 gr_gpc0_gpm_sd_active_tpcs_r(void)
2192{
2193 return 0x00500c8cU;
2194}
2195static inline u32 gr_gpc0_gpm_sd_active_tpcs_num_f(u32 v)
2196{
2197 return (v & 0x7U) << 0U;
2198}
2199static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void)
2200{
2201 return 0x00504088U;
2202}
2203static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v)
2204{
2205 return (v & 0xffffU) << 0U;
2206}
2207static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_r(void)
2208{
2209 return 0x005044e8U;
2210}
2211static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_value_f(u32 v)
2212{
2213 return (v & 0xffffU) << 0U;
2214}
2215static inline u32 gr_gpc0_tpc0_sm_cfg_r(void)
2216{
2217 return 0x00504698U;
2218}
2219static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
2220{
2221 return (v & 0xffffU) << 0U;
2222}
2223static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
2224{
2225 return (r >> 0U) & 0xffffU;
2226}
2227static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
2228{
2229 return 0x0050469cU;
2230}
2231static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
2232{
2233 return (r >> 0U) & 0xffU;
2234}
2235static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
2236{
2237 return (r >> 8U) & 0xfU;
2238}
2239static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
2240{
2241 return 0x0000000cU;
2242}
2243static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
2244{
2245 return 0x00503018U;
2246}
2247static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void)
2248{
2249 return 0x1U << 0U;
2250}
2251static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void)
2252{
2253 return 0x1U;
2254}
2255static inline u32 gr_gpc0_ppc0_cbm_cfg_r(void)
2256{
2257 return 0x005030c0U;
2258}
2259static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_f(u32 v)
2260{
2261 return (v & 0xffffU) << 0U;
2262}
2263static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_m(void)
2264{
2265 return 0xffffU << 0U;
2266}
2267static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_v(u32 r)
2268{
2269 return (r >> 0U) & 0xffffU;
2270}
2271static inline u32 gr_gpc0_ppc0_cbm_cfg_size_f(u32 v)
2272{
2273 return (v & 0xfffU) << 16U;
2274}
2275static inline u32 gr_gpc0_ppc0_cbm_cfg_size_m(void)
2276{
2277 return 0xfffU << 16U;
2278}
2279static inline u32 gr_gpc0_ppc0_cbm_cfg_size_v(u32 r)
2280{
2281 return (r >> 16U) & 0xfffU;
2282}
2283static inline u32 gr_gpc0_ppc0_cbm_cfg_size_default_v(void)
2284{
2285 return 0x00000240U;
2286}
2287static inline u32 gr_gpc0_ppc0_cbm_cfg_size_granularity_v(void)
2288{
2289 return 0x00000020U;
2290}
2291static inline u32 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(u32 v)
2292{
2293 return (v & 0x1U) << 28U;
2294}
2295static inline u32 gr_gpc0_ppc0_cbm_cfg2_r(void)
2296{
2297 return 0x005030e4U;
2298}
2299static inline u32 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(u32 v)
2300{
2301 return (v & 0xffffU) << 0U;
2302}
2303static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_f(u32 v)
2304{
2305 return (v & 0xfffU) << 16U;
2306}
2307static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_m(void)
2308{
2309 return 0xfffU << 16U;
2310}
2311static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_v(u32 r)
2312{
2313 return (r >> 16U) & 0xfffU;
2314}
2315static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_default_v(void)
2316{
2317 return 0x00000648U;
2318}
2319static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v(void)
2320{
2321 return 0x00000020U;
2322}
2323static inline u32 gr_gpccs_falcon_addr_r(void)
2324{
2325 return 0x0041a0acU;
2326}
2327static inline u32 gr_gpccs_falcon_addr_lsb_s(void)
2328{
2329 return 6U;
2330}
2331static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v)
2332{
2333 return (v & 0x3fU) << 0U;
2334}
2335static inline u32 gr_gpccs_falcon_addr_lsb_m(void)
2336{
2337 return 0x3fU << 0U;
2338}
2339static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r)
2340{
2341 return (r >> 0U) & 0x3fU;
2342}
2343static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void)
2344{
2345 return 0x00000000U;
2346}
2347static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void)
2348{
2349 return 0x0U;
2350}
2351static inline u32 gr_gpccs_falcon_addr_msb_s(void)
2352{
2353 return 6U;
2354}
2355static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v)
2356{
2357 return (v & 0x3fU) << 6U;
2358}
2359static inline u32 gr_gpccs_falcon_addr_msb_m(void)
2360{
2361 return 0x3fU << 6U;
2362}
2363static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r)
2364{
2365 return (r >> 6U) & 0x3fU;
2366}
2367static inline u32 gr_gpccs_falcon_addr_msb_init_v(void)
2368{
2369 return 0x00000000U;
2370}
2371static inline u32 gr_gpccs_falcon_addr_msb_init_f(void)
2372{
2373 return 0x0U;
2374}
2375static inline u32 gr_gpccs_falcon_addr_ext_s(void)
2376{
2377 return 12U;
2378}
2379static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v)
2380{
2381 return (v & 0xfffU) << 0U;
2382}
2383static inline u32 gr_gpccs_falcon_addr_ext_m(void)
2384{
2385 return 0xfffU << 0U;
2386}
2387static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r)
2388{
2389 return (r >> 0U) & 0xfffU;
2390}
2391static inline u32 gr_gpccs_cpuctl_r(void)
2392{
2393 return 0x0041a100U;
2394}
2395static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v)
2396{
2397 return (v & 0x1U) << 1U;
2398}
2399static inline u32 gr_gpccs_dmactl_r(void)
2400{
2401 return 0x0041a10cU;
2402}
2403static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v)
2404{
2405 return (v & 0x1U) << 0U;
2406}
2407static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void)
2408{
2409 return 0x1U << 1U;
2410}
2411static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void)
2412{
2413 return 0x1U << 2U;
2414}
2415static inline u32 gr_gpccs_imemc_r(u32 i)
2416{
2417 return 0x0041a180U + i*16U;
2418}
2419static inline u32 gr_gpccs_imemc_offs_f(u32 v)
2420{
2421 return (v & 0x3fU) << 2U;
2422}
2423static inline u32 gr_gpccs_imemc_blk_f(u32 v)
2424{
2425 return (v & 0xffU) << 8U;
2426}
2427static inline u32 gr_gpccs_imemc_aincw_f(u32 v)
2428{
2429 return (v & 0x1U) << 24U;
2430}
2431static inline u32 gr_gpccs_imemd_r(u32 i)
2432{
2433 return 0x0041a184U + i*16U;
2434}
2435static inline u32 gr_gpccs_imemt_r(u32 i)
2436{
2437 return 0x0041a188U + i*16U;
2438}
2439static inline u32 gr_gpccs_imemt__size_1_v(void)
2440{
2441 return 0x00000004U;
2442}
2443static inline u32 gr_gpccs_imemt_tag_f(u32 v)
2444{
2445 return (v & 0xffffU) << 0U;
2446}
2447static inline u32 gr_gpccs_dmemc_r(u32 i)
2448{
2449 return 0x0041a1c0U + i*8U;
2450}
2451static inline u32 gr_gpccs_dmemc_offs_f(u32 v)
2452{
2453 return (v & 0x3fU) << 2U;
2454}
2455static inline u32 gr_gpccs_dmemc_blk_f(u32 v)
2456{
2457 return (v & 0xffU) << 8U;
2458}
2459static inline u32 gr_gpccs_dmemc_aincw_f(u32 v)
2460{
2461 return (v & 0x1U) << 24U;
2462}
2463static inline u32 gr_gpccs_dmemd_r(u32 i)
2464{
2465 return 0x0041a1c4U + i*8U;
2466}
2467static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i)
2468{
2469 return 0x0041a800U + i*4U;
2470}
2471static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v)
2472{
2473 return (v & 0xffffffffU) << 0U;
2474}
2475static inline u32 gr_gpcs_setup_bundle_cb_base_r(void)
2476{
2477 return 0x00418808U;
2478}
2479static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_s(void)
2480{
2481 return 32U;
2482}
2483static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(u32 v)
2484{
2485 return (v & 0xffffffffU) << 0U;
2486}
2487static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_m(void)
2488{
2489 return 0xffffffffU << 0U;
2490}
2491static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_v(u32 r)
2492{
2493 return (r >> 0U) & 0xffffffffU;
2494}
2495static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_v(void)
2496{
2497 return 0x00000000U;
2498}
2499static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_f(void)
2500{
2501 return 0x0U;
2502}
2503static inline u32 gr_gpcs_setup_bundle_cb_size_r(void)
2504{
2505 return 0x0041880cU;
2506}
2507static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_s(void)
2508{
2509 return 11U;
2510}
2511static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_f(u32 v)
2512{
2513 return (v & 0x7ffU) << 0U;
2514}
2515static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_m(void)
2516{
2517 return 0x7ffU << 0U;
2518}
2519static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_v(u32 r)
2520{
2521 return (r >> 0U) & 0x7ffU;
2522}
2523static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_v(void)
2524{
2525 return 0x00000000U;
2526}
2527static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_f(void)
2528{
2529 return 0x0U;
2530}
2531static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_v(void)
2532{
2533 return 0x00000018U;
2534}
2535static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_f(void)
2536{
2537 return 0x18U;
2538}
2539static inline u32 gr_gpcs_setup_bundle_cb_size_valid_s(void)
2540{
2541 return 1U;
2542}
2543static inline u32 gr_gpcs_setup_bundle_cb_size_valid_f(u32 v)
2544{
2545 return (v & 0x1U) << 31U;
2546}
2547static inline u32 gr_gpcs_setup_bundle_cb_size_valid_m(void)
2548{
2549 return 0x1U << 31U;
2550}
2551static inline u32 gr_gpcs_setup_bundle_cb_size_valid_v(u32 r)
2552{
2553 return (r >> 31U) & 0x1U;
2554}
2555static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_v(void)
2556{
2557 return 0x00000000U;
2558}
2559static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_f(void)
2560{
2561 return 0x0U;
2562}
2563static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_v(void)
2564{
2565 return 0x00000001U;
2566}
2567static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_f(void)
2568{
2569 return 0x80000000U;
2570}
2571static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
2572{
2573 return 0x00418810U;
2574}
2575static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v)
2576{
2577 return (v & 0xfffffffU) << 0U;
2578}
2579static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void)
2580{
2581 return 0x0000000cU;
2582}
2583static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void)
2584{
2585 return 0x80000000U;
2586}
2587static inline u32 gr_crstr_gpc_map0_r(void)
2588{
2589 return 0x00418b08U;
2590}
2591static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v)
2592{
2593 return (v & 0x7U) << 0U;
2594}
2595static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v)
2596{
2597 return (v & 0x7U) << 5U;
2598}
2599static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v)
2600{
2601 return (v & 0x7U) << 10U;
2602}
2603static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v)
2604{
2605 return (v & 0x7U) << 15U;
2606}
2607static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v)
2608{
2609 return (v & 0x7U) << 20U;
2610}
2611static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v)
2612{
2613 return (v & 0x7U) << 25U;
2614}
2615static inline u32 gr_crstr_gpc_map1_r(void)
2616{
2617 return 0x00418b0cU;
2618}
2619static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v)
2620{
2621 return (v & 0x7U) << 0U;
2622}
2623static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v)
2624{
2625 return (v & 0x7U) << 5U;
2626}
2627static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v)
2628{
2629 return (v & 0x7U) << 10U;
2630}
2631static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v)
2632{
2633 return (v & 0x7U) << 15U;
2634}
2635static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v)
2636{
2637 return (v & 0x7U) << 20U;
2638}
2639static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v)
2640{
2641 return (v & 0x7U) << 25U;
2642}
2643static inline u32 gr_crstr_gpc_map2_r(void)
2644{
2645 return 0x00418b10U;
2646}
2647static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v)
2648{
2649 return (v & 0x7U) << 0U;
2650}
2651static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v)
2652{
2653 return (v & 0x7U) << 5U;
2654}
2655static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v)
2656{
2657 return (v & 0x7U) << 10U;
2658}
2659static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v)
2660{
2661 return (v & 0x7U) << 15U;
2662}
2663static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v)
2664{
2665 return (v & 0x7U) << 20U;
2666}
2667static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v)
2668{
2669 return (v & 0x7U) << 25U;
2670}
2671static inline u32 gr_crstr_gpc_map3_r(void)
2672{
2673 return 0x00418b14U;
2674}
2675static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v)
2676{
2677 return (v & 0x7U) << 0U;
2678}
2679static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v)
2680{
2681 return (v & 0x7U) << 5U;
2682}
2683static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v)
2684{
2685 return (v & 0x7U) << 10U;
2686}
2687static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v)
2688{
2689 return (v & 0x7U) << 15U;
2690}
2691static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v)
2692{
2693 return (v & 0x7U) << 20U;
2694}
2695static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v)
2696{
2697 return (v & 0x7U) << 25U;
2698}
2699static inline u32 gr_crstr_gpc_map4_r(void)
2700{
2701 return 0x00418b18U;
2702}
2703static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v)
2704{
2705 return (v & 0x7U) << 0U;
2706}
2707static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v)
2708{
2709 return (v & 0x7U) << 5U;
2710}
2711static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v)
2712{
2713 return (v & 0x7U) << 10U;
2714}
2715static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v)
2716{
2717 return (v & 0x7U) << 15U;
2718}
2719static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v)
2720{
2721 return (v & 0x7U) << 20U;
2722}
2723static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v)
2724{
2725 return (v & 0x7U) << 25U;
2726}
2727static inline u32 gr_crstr_gpc_map5_r(void)
2728{
2729 return 0x00418b1cU;
2730}
2731static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v)
2732{
2733 return (v & 0x7U) << 0U;
2734}
2735static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v)
2736{
2737 return (v & 0x7U) << 5U;
2738}
2739static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v)
2740{
2741 return (v & 0x7U) << 10U;
2742}
2743static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v)
2744{
2745 return (v & 0x7U) << 15U;
2746}
2747static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v)
2748{
2749 return (v & 0x7U) << 20U;
2750}
2751static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v)
2752{
2753 return (v & 0x7U) << 25U;
2754}
2755static inline u32 gr_crstr_map_table_cfg_r(void)
2756{
2757 return 0x00418bb8U;
2758}
2759static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v)
2760{
2761 return (v & 0xffU) << 0U;
2762}
2763static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v)
2764{
2765 return (v & 0xffU) << 8U;
2766}
2767static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void)
2768{
2769 return 0x00418980U;
2770}
2771static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v)
2772{
2773 return (v & 0x7U) << 0U;
2774}
2775static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v)
2776{
2777 return (v & 0x7U) << 4U;
2778}
2779static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v)
2780{
2781 return (v & 0x7U) << 8U;
2782}
2783static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v)
2784{
2785 return (v & 0x7U) << 12U;
2786}
2787static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v)
2788{
2789 return (v & 0x7U) << 16U;
2790}
2791static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v)
2792{
2793 return (v & 0x7U) << 20U;
2794}
2795static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v)
2796{
2797 return (v & 0x7U) << 24U;
2798}
2799static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v)
2800{
2801 return (v & 0x7U) << 28U;
2802}
2803static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void)
2804{
2805 return 0x00418984U;
2806}
2807static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v)
2808{
2809 return (v & 0x7U) << 0U;
2810}
2811static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v)
2812{
2813 return (v & 0x7U) << 4U;
2814}
2815static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v)
2816{
2817 return (v & 0x7U) << 8U;
2818}
2819static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v)
2820{
2821 return (v & 0x7U) << 12U;
2822}
2823static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v)
2824{
2825 return (v & 0x7U) << 16U;
2826}
2827static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v)
2828{
2829 return (v & 0x7U) << 20U;
2830}
2831static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v)
2832{
2833 return (v & 0x7U) << 24U;
2834}
2835static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v)
2836{
2837 return (v & 0x7U) << 28U;
2838}
2839static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void)
2840{
2841 return 0x00418988U;
2842}
2843static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v)
2844{
2845 return (v & 0x7U) << 0U;
2846}
2847static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v)
2848{
2849 return (v & 0x7U) << 4U;
2850}
2851static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v)
2852{
2853 return (v & 0x7U) << 8U;
2854}
2855static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v)
2856{
2857 return (v & 0x7U) << 12U;
2858}
2859static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v)
2860{
2861 return (v & 0x7U) << 16U;
2862}
2863static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v)
2864{
2865 return (v & 0x7U) << 20U;
2866}
2867static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v)
2868{
2869 return (v & 0x7U) << 24U;
2870}
2871static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void)
2872{
2873 return 3U;
2874}
2875static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v)
2876{
2877 return (v & 0x7U) << 28U;
2878}
2879static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void)
2880{
2881 return 0x7U << 28U;
2882}
2883static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r)
2884{
2885 return (r >> 28U) & 0x7U;
2886}
2887static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void)
2888{
2889 return 0x0041898cU;
2890}
2891static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v)
2892{
2893 return (v & 0x7U) << 0U;
2894}
2895static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v)
2896{
2897 return (v & 0x7U) << 4U;
2898}
2899static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v)
2900{
2901 return (v & 0x7U) << 8U;
2902}
2903static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v)
2904{
2905 return (v & 0x7U) << 12U;
2906}
2907static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v)
2908{
2909 return (v & 0x7U) << 16U;
2910}
2911static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v)
2912{
2913 return (v & 0x7U) << 20U;
2914}
2915static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v)
2916{
2917 return (v & 0x7U) << 24U;
2918}
2919static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v)
2920{
2921 return (v & 0x7U) << 28U;
2922}
2923static inline u32 gr_gpcs_gpm_pd_cfg_r(void)
2924{
2925 return 0x00418c6cU;
2926}
2927static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void)
2928{
2929 return 0x0U;
2930}
2931static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void)
2932{
2933 return 0x1U;
2934}
2935static inline u32 gr_gpcs_gcc_pagepool_base_r(void)
2936{
2937 return 0x00419004U;
2938}
2939static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v)
2940{
2941 return (v & 0xffffffffU) << 0U;
2942}
2943static inline u32 gr_gpcs_gcc_pagepool_r(void)
2944{
2945 return 0x00419008U;
2946}
2947static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v)
2948{
2949 return (v & 0xffU) << 0U;
2950}
2951static inline u32 gr_gpcs_tpcs_pe_vaf_r(void)
2952{
2953 return 0x0041980cU;
2954}
2955static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void)
2956{
2957 return 0x10U;
2958}
2959static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void)
2960{
2961 return 0x00419848U;
2962}
2963static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v)
2964{
2965 return (v & 0xfffffffU) << 0U;
2966}
2967static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v)
2968{
2969 return (v & 0x1U) << 28U;
2970}
2971static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void)
2972{
2973 return 0x10000000U;
2974}
2975static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void)
2976{
2977 return 0x00419c00U;
2978}
2979static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void)
2980{
2981 return 0x0U;
2982}
2983static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void)
2984{
2985 return 0x8U;
2986}
2987static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void)
2988{
2989 return 0x00419e44U;
2990}
2991static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void)
2992{
2993 return 0x2U;
2994}
2995static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void)
2996{
2997 return 0x4U;
2998}
2999static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void)
3000{
3001 return 0x8U;
3002}
3003static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void)
3004{
3005 return 0x10U;
3006}
3007static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void)
3008{
3009 return 0x20U;
3010}
3011static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void)
3012{
3013 return 0x40U;
3014}
3015static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void)
3016{
3017 return 0x80U;
3018}
3019static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void)
3020{
3021 return 0x100U;
3022}
3023static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void)
3024{
3025 return 0x200U;
3026}
3027static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void)
3028{
3029 return 0x400U;
3030}
3031static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void)
3032{
3033 return 0x800U;
3034}
3035static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void)
3036{
3037 return 0x1000U;
3038}
3039static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void)
3040{
3041 return 0x2000U;
3042}
3043static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void)
3044{
3045 return 0x4000U;
3046}
3047static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void)
3048{
3049 return 0x8000U;
3050}
3051static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void)
3052{
3053 return 0x10000U;
3054}
3055static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void)
3056{
3057 return 0x20000U;
3058}
3059static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void)
3060{
3061 return 0x40000U;
3062}
3063static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void)
3064{
3065 return 0x80000U;
3066}
3067static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void)
3068{
3069 return 0x100000U;
3070}
3071static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void)
3072{
3073 return 0x00419e4cU;
3074}
3075static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void)
3076{
3077 return 0x1U;
3078}
3079static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void)
3080{
3081 return 0x2U;
3082}
3083static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void)
3084{
3085 return 0x4U;
3086}
3087static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void)
3088{
3089 return 0x8U;
3090}
3091static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void)
3092{
3093 return 0x10U;
3094}
3095static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
3096{
3097 return 0x20U;
3098}
3099static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void)
3100{
3101 return 0x40U;
3102}
3103static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void)
3104{
3105 return 0x00419d0cU;
3106}
3107static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f(void)
3108{
3109 return 0x2U;
3110}
3111static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f(void)
3112{
3113 return 0x1U;
3114}
3115static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
3116{
3117 return 0x0050450cU;
3118}
3119static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
3120{
3121 return (r >> 1U) & 0x1U;
3122}
3123static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
3124{
3125 return 0x2U;
3126}
3127static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
3128{
3129 return 0x0041ac94U;
3130}
3131static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v)
3132{
3133 return (v & 0xffU) << 16U;
3134}
3135static inline u32 gr_gpc0_gpccs_gpc_exception_r(void)
3136{
3137 return 0x00502c90U;
3138}
3139static inline u32 gr_gpc0_gpccs_gpc_exception_gcc_v(u32 r)
3140{
3141 return (r >> 2U) & 0x1U;
3142}
3143static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r)
3144{
3145 return (r >> 16U) & 0xffU;
3146}
3147static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void)
3148{
3149 return 0x00000001U;
3150}
3151static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
3152{
3153 return 0x00504508U;
3154}
3155static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_v(u32 r)
3156{
3157 return (r >> 0U) & 0x1U;
3158}
3159static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v(void)
3160{
3161 return 0x00000001U;
3162}
3163static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r)
3164{
3165 return (r >> 1U) & 0x1U;
3166}
3167static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v(void)
3168{
3169 return 0x00000001U;
3170}
3171static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void)
3172{
3173 return 0x00504610U;
3174}
3175static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(void)
3176{
3177 return 0x1U << 0U;
3178}
3179static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r)
3180{
3181 return (r >> 0U) & 0x1U;
3182}
3183static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
3184{
3185 return 0x00000001U;
3186}
3187static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_f(void)
3188{
3189 return 0x1U;
3190}
3191static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v(void)
3192{
3193 return 0x00000000U;
3194}
3195static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_f(void)
3196{
3197 return 0x0U;
3198}
3199static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
3200{
3201 return 0x80000000U;
3202}
3203static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_disable_f(void)
3204{
3205 return 0x0U;
3206}
3207static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_single_step_mode_enable_f(void)
3208{
3209 return 0x8U;
3210}
3211static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_single_step_mode_disable_f(void)
3212{
3213 return 0x0U;
3214}
3215static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
3216{
3217 return 0x40000000U;
3218}
3219static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m(void)
3220{
3221 return 0x1U << 1U;
3222}
3223static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
3224{
3225 return (r >> 1U) & 0x1U;
3226}
3227static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
3228{
3229 return 0x0U;
3230}
3231static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m(void)
3232{
3233 return 0x1U << 2U;
3234}
3235static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
3236{
3237 return (r >> 2U) & 0x1U;
3238}
3239static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
3240{
3241 return 0x0U;
3242}
3243static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_stop_on_any_warp_disable_v(void)
3244{
3245 return 0x00000000U;
3246}
3247static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_stop_on_any_sm_disable_v(void)
3248{
3249 return 0x00000000U;
3250}
3251static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
3252{
3253 return 0x00504614U;
3254}
3255static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void)
3256{
3257 return 0x00504618U;
3258}
3259static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
3260{
3261 return 0x00504624U;
3262}
3263static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void)
3264{
3265 return 0x00504628U;
3266}
3267static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
3268{
3269 return 0x00504634U;
3270}
3271static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void)
3272{
3273 return 0x00504638U;
3274}
3275static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void)
3276{
3277 return 0x00419e24U;
3278}
3279static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
3280{
3281 return 0x0050460cU;
3282}
3283static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
3284{
3285 return (r >> 0U) & 0x1U;
3286}
3287static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
3288{
3289 return (r >> 4U) & 0x1U;
3290}
3291static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
3292{
3293 return 0x00000001U;
3294}
3295static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_r(void)
3296{
3297 return 0x00419e50U;
3298}
3299static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_int_pending_f(void)
3300{
3301 return 0x10U;
3302}
3303static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(void)
3304{
3305 return 0x20U;
3306}
3307static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_single_step_complete_pending_f(void)
3308{
3309 return 0x40U;
3310}
3311static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_sm_to_sm_fault_pending_f(void)
3312{
3313 return 0x1U;
3314}
3315static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f(void)
3316{
3317 return 0x2U;
3318}
3319static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f(void)
3320{
3321 return 0x4U;
3322}
3323static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f(void)
3324{
3325 return 0x8U;
3326}
3327static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f(void)
3328{
3329 return 0x80000000U;
3330}
3331static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
3332{
3333 return 0x00504650U;
3334}
3335static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
3336{
3337 return 0x10U;
3338}
3339static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
3340{
3341 return 0x20U;
3342}
3343static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void)
3344{
3345 return 0x40U;
3346}
3347static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f(void)
3348{
3349 return 0x1U;
3350}
3351static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_l1_error_pending_f(void)
3352{
3353 return 0x2U;
3354}
3355static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_multiple_warp_errors_pending_f(void)
3356{
3357 return 0x4U;
3358}
3359static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_physical_stack_overflow_error_pending_f(void)
3360{
3361 return 0x8U;
3362}
3363static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_timeout_error_pending_f(void)
3364{
3365 return 0x80000000U;
3366}
3367static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_r(void)
3368{
3369 return 0x00504224U;
3370}
3371static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_intr_pending_f(void)
3372{
3373 return 0x1U;
3374}
3375static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
3376{
3377 return 0x00504648U;
3378}
3379static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r)
3380{
3381 return (r >> 0U) & 0xffffU;
3382}
3383static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void)
3384{
3385 return 0x00000000U;
3386}
3387static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
3388{
3389 return 0x0U;
3390}
3391static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
3392{
3393 return 0x00504770U;
3394}
3395static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void)
3396{
3397 return 0x00419f70U;
3398}
3399static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void)
3400{
3401 return 0x1U << 4U;
3402}
3403static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v)
3404{
3405 return (v & 0x1U) << 4U;
3406}
3407static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void)
3408{
3409 return 0x0050477cU;
3410}
3411static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void)
3412{
3413 return 0x00419f7cU;
3414}
3415static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void)
3416{
3417 return 0x1U << 0U;
3418}
3419static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v)
3420{
3421 return (v & 0x1U) << 0U;
3422}
3423static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void)
3424{
3425 return 0x0041be08U;
3426}
3427static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void)
3428{
3429 return 0x4U;
3430}
3431static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void)
3432{
3433 return 0x0041bf00U;
3434}
3435static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void)
3436{
3437 return 0x0041bf04U;
3438}
3439static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void)
3440{
3441 return 0x0041bf08U;
3442}
3443static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void)
3444{
3445 return 0x0041bf0cU;
3446}
3447static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void)
3448{
3449 return 0x0041bf10U;
3450}
3451static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void)
3452{
3453 return 0x0041bf14U;
3454}
3455static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void)
3456{
3457 return 0x0041bfd0U;
3458}
3459static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v)
3460{
3461 return (v & 0xffU) << 0U;
3462}
3463static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v)
3464{
3465 return (v & 0xffU) << 8U;
3466}
3467static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v)
3468{
3469 return (v & 0x1fU) << 16U;
3470}
3471static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v)
3472{
3473 return (v & 0x7U) << 21U;
3474}
3475static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v)
3476{
3477 return (v & 0x1fU) << 24U;
3478}
3479static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void)
3480{
3481 return 0x0041bfd4U;
3482}
3483static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v)
3484{
3485 return (v & 0xffffffU) << 0U;
3486}
3487static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void)
3488{
3489 return 0x0041bfe4U;
3490}
3491static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v)
3492{
3493 return (v & 0x1fU) << 0U;
3494}
3495static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v)
3496{
3497 return (v & 0x1fU) << 5U;
3498}
3499static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v)
3500{
3501 return (v & 0x1fU) << 10U;
3502}
3503static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v)
3504{
3505 return (v & 0x1fU) << 15U;
3506}
3507static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v)
3508{
3509 return (v & 0x1fU) << 20U;
3510}
3511static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v)
3512{
3513 return (v & 0x1fU) << 25U;
3514}
3515static inline u32 gr_gpcs_ppcs_cbm_cfg_r(void)
3516{
3517 return 0x0041bec0U;
3518}
3519static inline u32 gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(void)
3520{
3521 return 0x00000001U;
3522}
3523static inline u32 gr_bes_zrop_settings_r(void)
3524{
3525 return 0x00408850U;
3526}
3527static inline u32 gr_bes_zrop_settings_num_active_fbps_f(u32 v)
3528{
3529 return (v & 0xfU) << 0U;
3530}
3531static inline u32 gr_bes_crop_settings_r(void)
3532{
3533 return 0x00408958U;
3534}
3535static inline u32 gr_bes_crop_settings_num_active_fbps_f(u32 v)
3536{
3537 return (v & 0xfU) << 0U;
3538}
3539static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void)
3540{
3541 return 0x00000020U;
3542}
3543static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void)
3544{
3545 return 0x00000020U;
3546}
3547static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void)
3548{
3549 return 0x000000c0U;
3550}
3551static inline u32 gr_zcull_subregion_qty_v(void)
3552{
3553 return 0x00000010U;
3554}
3555static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void)
3556{
3557 return 0x00504604U;
3558}
3559static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void)
3560{
3561 return 0x00504608U;
3562}
3563static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void)
3564{
3565 return 0x0050465cU;
3566}
3567static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void)
3568{
3569 return 0x00504660U;
3570}
3571static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void)
3572{
3573 return 0x00504664U;
3574}
3575static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void)
3576{
3577 return 0x00504668U;
3578}
3579static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void)
3580{
3581 return 0x0050466cU;
3582}
3583static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void)
3584{
3585 return 0x00504658U;
3586}
3587static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(void)
3588{
3589 return 0x00504670U;
3590}
3591static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void)
3592{
3593 return 0x00504694U;
3594}
3595static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void)
3596{
3597 return 0x00504730U;
3598}
3599static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void)
3600{
3601 return 0x00504734U;
3602}
3603static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void)
3604{
3605 return 0x00504738U;
3606}
3607static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void)
3608{
3609 return 0x0050473cU;
3610}
3611static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void)
3612{
3613 return 0x00504740U;
3614}
3615static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void)
3616{
3617 return 0x00504744U;
3618}
3619static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void)
3620{
3621 return 0x00504748U;
3622}
3623static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void)
3624{
3625 return 0x0050474cU;
3626}
3627static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(void)
3628{
3629 return 0x00504674U;
3630}
3631static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(void)
3632{
3633 return 0x00504678U;
3634}
3635static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(void)
3636{
3637 return 0x0050467cU;
3638}
3639static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(void)
3640{
3641 return 0x00504680U;
3642}
3643static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(void)
3644{
3645 return 0x00504684U;
3646}
3647static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(void)
3648{
3649 return 0x00504688U;
3650}
3651static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(void)
3652{
3653 return 0x0050468cU;
3654}
3655static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(void)
3656{
3657 return 0x00504690U;
3658}
3659static inline u32 gr_fe_pwr_mode_r(void)
3660{
3661 return 0x00404170U;
3662}
3663static inline u32 gr_fe_pwr_mode_mode_auto_f(void)
3664{
3665 return 0x0U;
3666}
3667static inline u32 gr_fe_pwr_mode_mode_force_on_f(void)
3668{
3669 return 0x2U;
3670}
3671static inline u32 gr_fe_pwr_mode_req_v(u32 r)
3672{
3673 return (r >> 4U) & 0x1U;
3674}
3675static inline u32 gr_fe_pwr_mode_req_send_f(void)
3676{
3677 return 0x10U;
3678}
3679static inline u32 gr_fe_pwr_mode_req_done_v(void)
3680{
3681 return 0x00000000U;
3682}
3683static inline u32 gr_gpc0_tpc0_l1c_dbg_r(void)
3684{
3685 return 0x005044b0U;
3686}
3687static inline u32 gr_gpc0_tpc0_l1c_dbg_cya15_en_f(void)
3688{
3689 return 0x8000000U;
3690}
3691static inline u32 gr_gpcs_tpcs_sm_sch_texlock_r(void)
3692{
3693 return 0x00419ec8U;
3694}
3695static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_m(void)
3696{
3697 return 0x1U << 0U;
3698}
3699static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_disable_f(void)
3700{
3701 return 0x0U;
3702}
3703static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_m(void)
3704{
3705 return 0x1U << 1U;
3706}
3707static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_disable_f(void)
3708{
3709 return 0x0U;
3710}
3711static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_m(void)
3712{
3713 return 0x1U << 2U;
3714}
3715static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_disable_f(void)
3716{
3717 return 0x0U;
3718}
3719static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_m(void)
3720{
3721 return 0x1U << 3U;
3722}
3723static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_disable_f(void)
3724{
3725 return 0x0U;
3726}
3727static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_m(void)
3728{
3729 return 0xffU << 4U;
3730}
3731static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_disable_f(void)
3732{
3733 return 0x0U;
3734}
3735static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_m(void)
3736{
3737 return 0x1U << 16U;
3738}
3739static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f(void)
3740{
3741 return 0x0U;
3742}
3743static inline u32 gr_gpcs_tpcs_sm_sch_macro_sched_r(void)
3744{
3745 return 0x00419eacU;
3746}
3747static inline u32 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(u32 v)
3748{
3749 return (v & 0x1U) << 2U;
3750}
3751static inline u32 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_m(void)
3752{
3753 return 0x1U << 2U;
3754}
3755static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void)
3756{
3757 return 0x00419e10U;
3758}
3759static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_f(u32 v)
3760{
3761 return (v & 0x1U) << 0U;
3762}
3763static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_on_v(void)
3764{
3765 return 0x00000001U;
3766}
3767static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(void)
3768{
3769 return 0x1U << 31U;
3770}
3771static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_v(u32 r)
3772{
3773 return (r >> 31U) & 0x1U;
3774}
3775static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(void)
3776{
3777 return 0x80000000U;
3778}
3779static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f(void)
3780{
3781 return 0x0U;
3782}
3783static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(void)
3784{
3785 return 0x1U << 3U;
3786}
3787static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f(void)
3788{
3789 return 0x8U;
3790}
3791static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f(void)
3792{
3793 return 0x0U;
3794}
3795static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void)
3796{
3797 return 0x1U << 30U;
3798}
3799static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_v(u32 r)
3800{
3801 return (r >> 30U) & 0x1U;
3802}
3803static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
3804{
3805 return 0x40000000U;
3806}
3807#endif
diff --git a/include/gk20a/hw_ltc_gk20a.h b/include/gk20a/hw_ltc_gk20a.h
new file mode 100644
index 0000000..efe7f98
--- /dev/null
+++ b/include/gk20a/hw_ltc_gk20a.h
@@ -0,0 +1,455 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_ltc_gk20a_h_
57#define _hw_ltc_gk20a_h_
58
59static inline u32 ltc_pltcg_base_v(void)
60{
61 return 0x00140000U;
62}
63static inline u32 ltc_pltcg_extent_v(void)
64{
65 return 0x0017ffffU;
66}
67static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
68{
69 return 0x001410c8U;
70}
71static inline u32 ltc_ltc0_lts0_dstg_cfg0_r(void)
72{
73 return 0x00141200U;
74}
75static inline u32 ltc_ltcs_ltss_dstg_cfg0_r(void)
76{
77 return 0x0017ea00U;
78}
79static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void)
80{
81 return 0x00141104U;
82}
83static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r)
84{
85 return (r >> 0U) & 0xffffU;
86}
87static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r)
88{
89 return (r >> 16U) & 0x3U;
90}
91static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void)
92{
93 return 0x00000000U;
94}
95static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void)
96{
97 return 0x00000001U;
98}
99static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void)
100{
101 return 0x00000002U;
102}
103static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
104{
105 return 0x0017e8c8U;
106}
107static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
108{
109 return 0x1U;
110}
111static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
112{
113 return 0x2U;
114}
115static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
116{
117 return (r >> 2U) & 0x1U;
118}
119static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void)
120{
121 return 0x00000001U;
122}
123static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void)
124{
125 return 0x4U;
126}
127static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void)
128{
129 return 0x001410c8U;
130}
131static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
132{
133 return 0x0017e8ccU;
134}
135static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
136{
137 return (v & 0x1ffffU) << 0U;
138}
139static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
140{
141 return 0x0017e8d0U;
142}
143static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
144{
145 return (v & 0x1ffffU) << 0U;
146}
147static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
148{
149 return 0x0001ffffU;
150}
151static inline u32 ltc_ltcs_ltss_cbc_base_r(void)
152{
153 return 0x0017e8d4U;
154}
155static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void)
156{
157 return 0x0000000bU;
158}
159static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r)
160{
161 return (r >> 0U) & 0x3ffffffU;
162}
163static inline u32 ltc_ltcs_ltss_cbc_param_r(void)
164{
165 return 0x0017e8dcU;
166}
167static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r)
168{
169 return (r >> 0U) & 0xffffU;
170}
171static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r)
172{
173 return (r >> 24U) & 0xfU;
174}
175static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(u32 r)
176{
177 return (r >> 28U) & 0xfU;
178}
179static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void)
180{
181 return 0x0017e91cU;
182}
183static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v)
184{
185 return (v & 0x1fU) << 16U;
186}
187static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void)
188{
189 return 0x0017ea44U;
190}
191static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v)
192{
193 return (v & 0xfU) << 0U;
194}
195static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i)
196{
197 return 0x0017ea48U + i*4U;
198}
199static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void)
200{
201 return 0x00000004U;
202}
203static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void)
204{
205 return 0x0017ea58U;
206}
207static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void)
208{
209 return 32U;
210}
211static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v)
212{
213 return (v & 0xffffffffU) << 0U;
214}
215static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void)
216{
217 return 0xffffffffU << 0U;
218}
219static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r)
220{
221 return (r >> 0U) & 0xffffffffU;
222}
223static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void)
224{
225 return 0x0017e924U;
226}
227static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void)
228{
229 return 0x10000000U;
230}
231static inline u32 ltc_ltcs_ltss_g_elpg_r(void)
232{
233 return 0x0017e828U;
234}
235static inline u32 ltc_ltcs_ltss_g_elpg_flush_v(u32 r)
236{
237 return (r >> 0U) & 0x1U;
238}
239static inline u32 ltc_ltcs_ltss_g_elpg_flush_pending_v(void)
240{
241 return 0x00000001U;
242}
243static inline u32 ltc_ltcs_ltss_g_elpg_flush_pending_f(void)
244{
245 return 0x1U;
246}
247static inline u32 ltc_ltc0_ltss_g_elpg_r(void)
248{
249 return 0x00140828U;
250}
251static inline u32 ltc_ltc0_ltss_g_elpg_flush_v(u32 r)
252{
253 return (r >> 0U) & 0x1U;
254}
255static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_v(void)
256{
257 return 0x00000001U;
258}
259static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
260{
261 return 0x1U;
262}
263static inline u32 ltc_ltc0_ltss_intr_r(void)
264{
265 return 0x00140820U;
266}
267static inline u32 ltc_ltcs_ltss_intr_r(void)
268{
269 return 0x0017e820U;
270}
271static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void)
272{
273 return 0x1U << 20U;
274}
275static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_m(void)
276{
277 return 0x1U << 21U;
278}
279static inline u32 ltc_ltc0_lts0_intr_r(void)
280{
281 return 0x00141020U;
282}
283static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void)
284{
285 return 0x0017e910U;
286}
287static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_v(u32 r)
288{
289 return (r >> 0U) & 0x1U;
290}
291static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_v(void)
292{
293 return 0x00000001U;
294}
295static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f(void)
296{
297 return 0x1U;
298}
299static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_v(u32 r)
300{
301 return (r >> 8U) & 0xfU;
302}
303static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_v(void)
304{
305 return 0x00000003U;
306}
307static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f(void)
308{
309 return 0x300U;
310}
311static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_v(u32 r)
312{
313 return (r >> 28U) & 0x1U;
314}
315static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_v(void)
316{
317 return 0x00000001U;
318}
319static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f(void)
320{
321 return 0x10000000U;
322}
323static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_v(u32 r)
324{
325 return (r >> 29U) & 0x1U;
326}
327static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_v(void)
328{
329 return 0x00000001U;
330}
331static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f(void)
332{
333 return 0x20000000U;
334}
335static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_v(u32 r)
336{
337 return (r >> 30U) & 0x1U;
338}
339static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_v(void)
340{
341 return 0x00000001U;
342}
343static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f(void)
344{
345 return 0x40000000U;
346}
347static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_r(void)
348{
349 return 0x0017e914U;
350}
351static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_v(u32 r)
352{
353 return (r >> 0U) & 0x1U;
354}
355static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_v(void)
356{
357 return 0x00000001U;
358}
359static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f(void)
360{
361 return 0x1U;
362}
363static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_v(u32 r)
364{
365 return (r >> 8U) & 0xfU;
366}
367static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_v(void)
368{
369 return 0x00000003U;
370}
371static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f(void)
372{
373 return 0x300U;
374}
375static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_v(u32 r)
376{
377 return (r >> 16U) & 0x1U;
378}
379static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_v(void)
380{
381 return 0x00000001U;
382}
383static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f(void)
384{
385 return 0x10000U;
386}
387static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_v(u32 r)
388{
389 return (r >> 28U) & 0x1U;
390}
391static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_v(void)
392{
393 return 0x00000001U;
394}
395static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f(void)
396{
397 return 0x10000000U;
398}
399static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_v(u32 r)
400{
401 return (r >> 29U) & 0x1U;
402}
403static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_v(void)
404{
405 return 0x00000001U;
406}
407static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f(void)
408{
409 return 0x20000000U;
410}
411static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_v(u32 r)
412{
413 return (r >> 30U) & 0x1U;
414}
415static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_v(void)
416{
417 return 0x00000001U;
418}
419static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f(void)
420{
421 return 0x40000000U;
422}
423static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_r(void)
424{
425 return 0x00140910U;
426}
427static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_v(u32 r)
428{
429 return (r >> 0U) & 0x1U;
430}
431static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_v(void)
432{
433 return 0x00000001U;
434}
435static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f(void)
436{
437 return 0x1U;
438}
439static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_r(void)
440{
441 return 0x00140914U;
442}
443static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_v(u32 r)
444{
445 return (r >> 0U) & 0x1U;
446}
447static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_v(void)
448{
449 return 0x00000001U;
450}
451static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f(void)
452{
453 return 0x1U;
454}
455#endif
diff --git a/include/gk20a/hw_mc_gk20a.h b/include/gk20a/hw_mc_gk20a.h
new file mode 100644
index 0000000..3ca2a29
--- /dev/null
+++ b/include/gk20a/hw_mc_gk20a.h
@@ -0,0 +1,291 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_mc_gk20a_h_
57#define _hw_mc_gk20a_h_
58
59static inline u32 mc_boot_0_r(void)
60{
61 return 0x00000000U;
62}
63static inline u32 mc_boot_0_architecture_v(u32 r)
64{
65 return (r >> 24U) & 0x1fU;
66}
67static inline u32 mc_boot_0_implementation_v(u32 r)
68{
69 return (r >> 20U) & 0xfU;
70}
71static inline u32 mc_boot_0_major_revision_v(u32 r)
72{
73 return (r >> 4U) & 0xfU;
74}
75static inline u32 mc_boot_0_minor_revision_v(u32 r)
76{
77 return (r >> 0U) & 0xfU;
78}
79static inline u32 mc_intr_0_r(void)
80{
81 return 0x00000100U;
82}
83static inline u32 mc_intr_0_pfifo_pending_f(void)
84{
85 return 0x100U;
86}
87static inline u32 mc_intr_0_pgraph_pending_f(void)
88{
89 return 0x1000U;
90}
91static inline u32 mc_intr_0_pmu_pending_f(void)
92{
93 return 0x1000000U;
94}
95static inline u32 mc_intr_0_ltc_pending_f(void)
96{
97 return 0x2000000U;
98}
99static inline u32 mc_intr_0_priv_ring_pending_f(void)
100{
101 return 0x40000000U;
102}
103static inline u32 mc_intr_0_pbus_pending_f(void)
104{
105 return 0x10000000U;
106}
107static inline u32 mc_intr_1_r(void)
108{
109 return 0x00000104U;
110}
111static inline u32 mc_intr_mask_0_r(void)
112{
113 return 0x00000640U;
114}
115static inline u32 mc_intr_mask_0_pmu_enabled_f(void)
116{
117 return 0x1000000U;
118}
119static inline u32 mc_intr_en_0_r(void)
120{
121 return 0x00000140U;
122}
123static inline u32 mc_intr_en_0_inta_disabled_f(void)
124{
125 return 0x0U;
126}
127static inline u32 mc_intr_en_0_inta_hardware_f(void)
128{
129 return 0x1U;
130}
131static inline u32 mc_intr_mask_1_r(void)
132{
133 return 0x00000644U;
134}
135static inline u32 mc_intr_mask_1_pmu_s(void)
136{
137 return 1U;
138}
139static inline u32 mc_intr_mask_1_pmu_f(u32 v)
140{
141 return (v & 0x1U) << 24U;
142}
143static inline u32 mc_intr_mask_1_pmu_m(void)
144{
145 return 0x1U << 24U;
146}
147static inline u32 mc_intr_mask_1_pmu_v(u32 r)
148{
149 return (r >> 24U) & 0x1U;
150}
151static inline u32 mc_intr_mask_1_pmu_enabled_f(void)
152{
153 return 0x1000000U;
154}
155static inline u32 mc_intr_en_1_r(void)
156{
157 return 0x00000144U;
158}
159static inline u32 mc_intr_en_1_inta_disabled_f(void)
160{
161 return 0x0U;
162}
163static inline u32 mc_intr_en_1_inta_hardware_f(void)
164{
165 return 0x1U;
166}
167static inline u32 mc_enable_r(void)
168{
169 return 0x00000200U;
170}
171static inline u32 mc_enable_xbar_enabled_f(void)
172{
173 return 0x4U;
174}
175static inline u32 mc_enable_l2_enabled_f(void)
176{
177 return 0x8U;
178}
179static inline u32 mc_enable_pmedia_s(void)
180{
181 return 1U;
182}
183static inline u32 mc_enable_pmedia_f(u32 v)
184{
185 return (v & 0x1U) << 4U;
186}
187static inline u32 mc_enable_pmedia_m(void)
188{
189 return 0x1U << 4U;
190}
191static inline u32 mc_enable_pmedia_v(u32 r)
192{
193 return (r >> 4U) & 0x1U;
194}
195static inline u32 mc_enable_priv_ring_enabled_f(void)
196{
197 return 0x20U;
198}
199static inline u32 mc_enable_ce0_m(void)
200{
201 return 0x1U << 6U;
202}
203static inline u32 mc_enable_pfifo_enabled_f(void)
204{
205 return 0x100U;
206}
207static inline u32 mc_enable_pgraph_enabled_f(void)
208{
209 return 0x1000U;
210}
211static inline u32 mc_enable_pwr_v(u32 r)
212{
213 return (r >> 13U) & 0x1U;
214}
215static inline u32 mc_enable_pwr_disabled_v(void)
216{
217 return 0x00000000U;
218}
219static inline u32 mc_enable_pwr_enabled_f(void)
220{
221 return 0x2000U;
222}
223static inline u32 mc_enable_pfb_enabled_f(void)
224{
225 return 0x100000U;
226}
227static inline u32 mc_enable_ce2_m(void)
228{
229 return 0x1U << 21U;
230}
231static inline u32 mc_enable_ce2_enabled_f(void)
232{
233 return 0x200000U;
234}
235static inline u32 mc_enable_blg_enabled_f(void)
236{
237 return 0x8000000U;
238}
239static inline u32 mc_enable_perfmon_enabled_f(void)
240{
241 return 0x10000000U;
242}
243static inline u32 mc_enable_hub_enabled_f(void)
244{
245 return 0x20000000U;
246}
247static inline u32 mc_enable_pb_r(void)
248{
249 return 0x00000204U;
250}
251static inline u32 mc_enable_pb_0_s(void)
252{
253 return 1U;
254}
255static inline u32 mc_enable_pb_0_f(u32 v)
256{
257 return (v & 0x1U) << 0U;
258}
259static inline u32 mc_enable_pb_0_m(void)
260{
261 return 0x1U << 0U;
262}
263static inline u32 mc_enable_pb_0_v(u32 r)
264{
265 return (r >> 0U) & 0x1U;
266}
267static inline u32 mc_enable_pb_0_enabled_v(void)
268{
269 return 0x00000001U;
270}
271static inline u32 mc_enable_pb_sel_f(u32 v, u32 i)
272{
273 return (v & 0x1U) << (0U + i*1U);
274}
275static inline u32 mc_elpg_enable_r(void)
276{
277 return 0x0000020cU;
278}
279static inline u32 mc_elpg_enable_xbar_enabled_f(void)
280{
281 return 0x4U;
282}
283static inline u32 mc_elpg_enable_pfb_enabled_f(void)
284{
285 return 0x100000U;
286}
287static inline u32 mc_elpg_enable_hub_enabled_f(void)
288{
289 return 0x20000000U;
290}
291#endif
diff --git a/include/gk20a/hw_pbdma_gk20a.h b/include/gk20a/hw_pbdma_gk20a.h
new file mode 100644
index 0000000..2c8f48d
--- /dev/null
+++ b/include/gk20a/hw_pbdma_gk20a.h
@@ -0,0 +1,575 @@
1/*
2 * Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_pbdma_gk20a_h_
57#define _hw_pbdma_gk20a_h_
58
59static inline u32 pbdma_gp_entry1_r(void)
60{
61 return 0x10000004U;
62}
63static inline u32 pbdma_gp_entry1_get_hi_v(u32 r)
64{
65 return (r >> 0U) & 0xffU;
66}
67static inline u32 pbdma_gp_entry1_length_f(u32 v)
68{
69 return (v & 0x1fffffU) << 10U;
70}
71static inline u32 pbdma_gp_entry1_length_v(u32 r)
72{
73 return (r >> 10U) & 0x1fffffU;
74}
75static inline u32 pbdma_gp_base_r(u32 i)
76{
77 return 0x00040048U + i*8192U;
78}
79static inline u32 pbdma_gp_base__size_1_v(void)
80{
81 return 0x00000001U;
82}
83static inline u32 pbdma_gp_base_offset_f(u32 v)
84{
85 return (v & 0x1fffffffU) << 3U;
86}
87static inline u32 pbdma_gp_base_rsvd_s(void)
88{
89 return 3U;
90}
91static inline u32 pbdma_gp_base_hi_r(u32 i)
92{
93 return 0x0004004cU + i*8192U;
94}
95static inline u32 pbdma_gp_base_hi_offset_f(u32 v)
96{
97 return (v & 0xffU) << 0U;
98}
99static inline u32 pbdma_gp_base_hi_limit2_f(u32 v)
100{
101 return (v & 0x1fU) << 16U;
102}
103static inline u32 pbdma_gp_fetch_r(u32 i)
104{
105 return 0x00040050U + i*8192U;
106}
107static inline u32 pbdma_gp_get_r(u32 i)
108{
109 return 0x00040014U + i*8192U;
110}
111static inline u32 pbdma_gp_put_r(u32 i)
112{
113 return 0x00040000U + i*8192U;
114}
115static inline u32 pbdma_timeout_r(u32 i)
116{
117 return 0x0004012cU + i*8192U;
118}
119static inline u32 pbdma_timeout__size_1_v(void)
120{
121 return 0x00000001U;
122}
123static inline u32 pbdma_timeout_period_m(void)
124{
125 return 0xffffffffU << 0U;
126}
127static inline u32 pbdma_timeout_period_max_f(void)
128{
129 return 0xffffffffU;
130}
131static inline u32 pbdma_pb_fetch_r(u32 i)
132{
133 return 0x00040054U + i*8192U;
134}
135static inline u32 pbdma_pb_fetch_hi_r(u32 i)
136{
137 return 0x00040058U + i*8192U;
138}
139static inline u32 pbdma_get_r(u32 i)
140{
141 return 0x00040018U + i*8192U;
142}
143static inline u32 pbdma_get_hi_r(u32 i)
144{
145 return 0x0004001cU + i*8192U;
146}
147static inline u32 pbdma_put_r(u32 i)
148{
149 return 0x0004005cU + i*8192U;
150}
151static inline u32 pbdma_put_hi_r(u32 i)
152{
153 return 0x00040060U + i*8192U;
154}
155static inline u32 pbdma_formats_r(u32 i)
156{
157 return 0x0004009cU + i*8192U;
158}
159static inline u32 pbdma_formats_gp_fermi0_f(void)
160{
161 return 0x0U;
162}
163static inline u32 pbdma_formats_pb_fermi1_f(void)
164{
165 return 0x100U;
166}
167static inline u32 pbdma_formats_mp_fermi0_f(void)
168{
169 return 0x0U;
170}
171static inline u32 pbdma_pb_header_r(u32 i)
172{
173 return 0x00040084U + i*8192U;
174}
175static inline u32 pbdma_pb_header_priv_user_f(void)
176{
177 return 0x0U;
178}
179static inline u32 pbdma_pb_header_method_zero_f(void)
180{
181 return 0x0U;
182}
183static inline u32 pbdma_pb_header_subchannel_zero_f(void)
184{
185 return 0x0U;
186}
187static inline u32 pbdma_pb_header_level_main_f(void)
188{
189 return 0x0U;
190}
191static inline u32 pbdma_pb_header_first_true_f(void)
192{
193 return 0x400000U;
194}
195static inline u32 pbdma_pb_header_type_inc_f(void)
196{
197 return 0x20000000U;
198}
199static inline u32 pbdma_pb_header_type_non_inc_f(void)
200{
201 return 0x60000000U;
202}
203static inline u32 pbdma_hdr_shadow_r(u32 i)
204{
205 return 0x00040118U + i*8192U;
206}
207static inline u32 pbdma_gp_shadow_0_r(u32 i)
208{
209 return 0x00040110U + i*8192U;
210}
211static inline u32 pbdma_gp_shadow_1_r(u32 i)
212{
213 return 0x00040114U + i*8192U;
214}
215static inline u32 pbdma_subdevice_r(u32 i)
216{
217 return 0x00040094U + i*8192U;
218}
219static inline u32 pbdma_subdevice_id_f(u32 v)
220{
221 return (v & 0xfffU) << 0U;
222}
223static inline u32 pbdma_subdevice_status_active_f(void)
224{
225 return 0x10000000U;
226}
227static inline u32 pbdma_subdevice_channel_dma_enable_f(void)
228{
229 return 0x20000000U;
230}
231static inline u32 pbdma_method0_r(u32 i)
232{
233 return 0x000400c0U + i*8192U;
234}
235static inline u32 pbdma_method0_addr_f(u32 v)
236{
237 return (v & 0xfffU) << 2U;
238}
239static inline u32 pbdma_method0_addr_v(u32 r)
240{
241 return (r >> 2U) & 0xfffU;
242}
243static inline u32 pbdma_method0_subch_v(u32 r)
244{
245 return (r >> 16U) & 0x7U;
246}
247static inline u32 pbdma_method0_first_true_f(void)
248{
249 return 0x400000U;
250}
251static inline u32 pbdma_method0_valid_true_f(void)
252{
253 return 0x80000000U;
254}
255static inline u32 pbdma_method1_r(u32 i)
256{
257 return 0x000400c8U + i*8192U;
258}
259static inline u32 pbdma_method2_r(u32 i)
260{
261 return 0x000400d0U + i*8192U;
262}
263static inline u32 pbdma_method3_r(u32 i)
264{
265 return 0x000400d8U + i*8192U;
266}
267static inline u32 pbdma_data0_r(u32 i)
268{
269 return 0x000400c4U + i*8192U;
270}
271static inline u32 pbdma_target_r(u32 i)
272{
273 return 0x000400acU + i*8192U;
274}
275static inline u32 pbdma_target_engine_sw_f(void)
276{
277 return 0x1fU;
278}
279static inline u32 pbdma_acquire_r(u32 i)
280{
281 return 0x00040030U + i*8192U;
282}
283static inline u32 pbdma_acquire_retry_man_2_f(void)
284{
285 return 0x2U;
286}
287static inline u32 pbdma_acquire_retry_exp_2_f(void)
288{
289 return 0x100U;
290}
291static inline u32 pbdma_acquire_timeout_exp_f(u32 v)
292{
293 return (v & 0xfU) << 11U;
294}
295static inline u32 pbdma_acquire_timeout_exp_max_v(void)
296{
297 return 0x0000000fU;
298}
299static inline u32 pbdma_acquire_timeout_exp_max_f(void)
300{
301 return 0x7800U;
302}
303static inline u32 pbdma_acquire_timeout_man_f(u32 v)
304{
305 return (v & 0xffffU) << 15U;
306}
307static inline u32 pbdma_acquire_timeout_man_max_v(void)
308{
309 return 0x0000ffffU;
310}
311static inline u32 pbdma_acquire_timeout_man_max_f(void)
312{
313 return 0x7fff8000U;
314}
315static inline u32 pbdma_acquire_timeout_en_enable_f(void)
316{
317 return 0x80000000U;
318}
319static inline u32 pbdma_acquire_timeout_en_disable_f(void)
320{
321 return 0x0U;
322}
323static inline u32 pbdma_status_r(u32 i)
324{
325 return 0x00040100U + i*8192U;
326}
327static inline u32 pbdma_channel_r(u32 i)
328{
329 return 0x00040120U + i*8192U;
330}
331static inline u32 pbdma_signature_r(u32 i)
332{
333 return 0x00040010U + i*8192U;
334}
335static inline u32 pbdma_signature_hw_valid_f(void)
336{
337 return 0xfaceU;
338}
339static inline u32 pbdma_signature_sw_zero_f(void)
340{
341 return 0x0U;
342}
343static inline u32 pbdma_userd_r(u32 i)
344{
345 return 0x00040008U + i*8192U;
346}
347static inline u32 pbdma_userd_target_vid_mem_f(void)
348{
349 return 0x0U;
350}
351static inline u32 pbdma_userd_target_sys_mem_coh_f(void)
352{
353 return 0x2U;
354}
355static inline u32 pbdma_userd_target_sys_mem_ncoh_f(void)
356{
357 return 0x3U;
358}
359static inline u32 pbdma_userd_addr_f(u32 v)
360{
361 return (v & 0x7fffffU) << 9U;
362}
363static inline u32 pbdma_userd_hi_r(u32 i)
364{
365 return 0x0004000cU + i*8192U;
366}
367static inline u32 pbdma_userd_hi_addr_f(u32 v)
368{
369 return (v & 0xffU) << 0U;
370}
371static inline u32 pbdma_hce_ctrl_r(u32 i)
372{
373 return 0x000400e4U + i*8192U;
374}
375static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void)
376{
377 return 0x20U;
378}
379static inline u32 pbdma_intr_0_r(u32 i)
380{
381 return 0x00040108U + i*8192U;
382}
383static inline u32 pbdma_intr_0_memreq_v(u32 r)
384{
385 return (r >> 0U) & 0x1U;
386}
387static inline u32 pbdma_intr_0_memreq_pending_f(void)
388{
389 return 0x1U;
390}
391static inline u32 pbdma_intr_0_memack_timeout_pending_f(void)
392{
393 return 0x2U;
394}
395static inline u32 pbdma_intr_0_memack_extra_pending_f(void)
396{
397 return 0x4U;
398}
399static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void)
400{
401 return 0x8U;
402}
403static inline u32 pbdma_intr_0_memdat_extra_pending_f(void)
404{
405 return 0x10U;
406}
407static inline u32 pbdma_intr_0_memflush_pending_f(void)
408{
409 return 0x20U;
410}
411static inline u32 pbdma_intr_0_memop_pending_f(void)
412{
413 return 0x40U;
414}
415static inline u32 pbdma_intr_0_lbconnect_pending_f(void)
416{
417 return 0x80U;
418}
419static inline u32 pbdma_intr_0_lbreq_pending_f(void)
420{
421 return 0x100U;
422}
423static inline u32 pbdma_intr_0_lback_timeout_pending_f(void)
424{
425 return 0x200U;
426}
427static inline u32 pbdma_intr_0_lback_extra_pending_f(void)
428{
429 return 0x400U;
430}
431static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void)
432{
433 return 0x800U;
434}
435static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void)
436{
437 return 0x1000U;
438}
439static inline u32 pbdma_intr_0_gpfifo_pending_f(void)
440{
441 return 0x2000U;
442}
443static inline u32 pbdma_intr_0_gpptr_pending_f(void)
444{
445 return 0x4000U;
446}
447static inline u32 pbdma_intr_0_gpentry_pending_f(void)
448{
449 return 0x8000U;
450}
451static inline u32 pbdma_intr_0_gpcrc_pending_f(void)
452{
453 return 0x10000U;
454}
455static inline u32 pbdma_intr_0_pbptr_pending_f(void)
456{
457 return 0x20000U;
458}
459static inline u32 pbdma_intr_0_pbentry_pending_f(void)
460{
461 return 0x40000U;
462}
463static inline u32 pbdma_intr_0_pbcrc_pending_f(void)
464{
465 return 0x80000U;
466}
467static inline u32 pbdma_intr_0_xbarconnect_pending_f(void)
468{
469 return 0x100000U;
470}
471static inline u32 pbdma_intr_0_method_pending_f(void)
472{
473 return 0x200000U;
474}
475static inline u32 pbdma_intr_0_methodcrc_pending_f(void)
476{
477 return 0x400000U;
478}
479static inline u32 pbdma_intr_0_device_pending_f(void)
480{
481 return 0x800000U;
482}
483static inline u32 pbdma_intr_0_semaphore_pending_f(void)
484{
485 return 0x2000000U;
486}
487static inline u32 pbdma_intr_0_acquire_pending_f(void)
488{
489 return 0x4000000U;
490}
491static inline u32 pbdma_intr_0_pri_pending_f(void)
492{
493 return 0x8000000U;
494}
495static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void)
496{
497 return 0x20000000U;
498}
499static inline u32 pbdma_intr_0_pbseg_pending_f(void)
500{
501 return 0x40000000U;
502}
503static inline u32 pbdma_intr_0_signature_pending_f(void)
504{
505 return 0x80000000U;
506}
507static inline u32 pbdma_intr_1_r(u32 i)
508{
509 return 0x00040148U + i*8192U;
510}
511static inline u32 pbdma_intr_en_0_r(u32 i)
512{
513 return 0x0004010cU + i*8192U;
514}
515static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void)
516{
517 return 0x100U;
518}
519static inline u32 pbdma_intr_en_1_r(u32 i)
520{
521 return 0x0004014cU + i*8192U;
522}
523static inline u32 pbdma_intr_stall_r(u32 i)
524{
525 return 0x0004013cU + i*8192U;
526}
527static inline u32 pbdma_intr_stall_lbreq_enabled_f(void)
528{
529 return 0x100U;
530}
531static inline u32 pbdma_intr_stall_1_r(u32 i)
532{
533 return 0x00040140U + i*8192U;
534}
535static inline u32 pbdma_intr_stall_1_hce_illegal_op_enabled_f(void)
536{
537 return 0x1U;
538}
539static inline u32 pbdma_udma_nop_r(void)
540{
541 return 0x00000008U;
542}
543static inline u32 pbdma_syncpointa_r(u32 i)
544{
545 return 0x000400a4U + i*8192U;
546}
547static inline u32 pbdma_syncpointa_payload_v(u32 r)
548{
549 return (r >> 0U) & 0xffffffffU;
550}
551static inline u32 pbdma_syncpointb_r(u32 i)
552{
553 return 0x000400a8U + i*8192U;
554}
555static inline u32 pbdma_syncpointb_op_v(u32 r)
556{
557 return (r >> 0U) & 0x3U;
558}
559static inline u32 pbdma_syncpointb_op_wait_v(void)
560{
561 return 0x00000000U;
562}
563static inline u32 pbdma_syncpointb_wait_switch_v(u32 r)
564{
565 return (r >> 4U) & 0x1U;
566}
567static inline u32 pbdma_syncpointb_wait_switch_en_v(void)
568{
569 return 0x00000001U;
570}
571static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r)
572{
573 return (r >> 8U) & 0xffU;
574}
575#endif
diff --git a/include/gk20a/hw_perf_gk20a.h b/include/gk20a/hw_perf_gk20a.h
new file mode 100644
index 0000000..a93560f
--- /dev/null
+++ b/include/gk20a/hw_perf_gk20a.h
@@ -0,0 +1,211 @@
1/*
2 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_perf_gk20a_h_
57#define _hw_perf_gk20a_h_
58
59static inline u32 perf_pmasys_control_r(void)
60{
61 return 0x001b4000U;
62}
63static inline u32 perf_pmasys_control_membuf_status_v(u32 r)
64{
65 return (r >> 4U) & 0x1U;
66}
67static inline u32 perf_pmasys_control_membuf_status_overflowed_v(void)
68{
69 return 0x00000001U;
70}
71static inline u32 perf_pmasys_control_membuf_status_overflowed_f(void)
72{
73 return 0x10U;
74}
75static inline u32 perf_pmasys_control_membuf_clear_status_f(u32 v)
76{
77 return (v & 0x1U) << 5U;
78}
79static inline u32 perf_pmasys_control_membuf_clear_status_v(u32 r)
80{
81 return (r >> 5U) & 0x1U;
82}
83static inline u32 perf_pmasys_control_membuf_clear_status_doit_v(void)
84{
85 return 0x00000001U;
86}
87static inline u32 perf_pmasys_control_membuf_clear_status_doit_f(void)
88{
89 return 0x20U;
90}
91static inline u32 perf_pmasys_mem_block_r(void)
92{
93 return 0x001b4070U;
94}
95static inline u32 perf_pmasys_mem_block_base_f(u32 v)
96{
97 return (v & 0xfffffffU) << 0U;
98}
99static inline u32 perf_pmasys_mem_block_target_f(u32 v)
100{
101 return (v & 0x3U) << 28U;
102}
103static inline u32 perf_pmasys_mem_block_target_v(u32 r)
104{
105 return (r >> 28U) & 0x3U;
106}
107static inline u32 perf_pmasys_mem_block_target_lfb_v(void)
108{
109 return 0x00000000U;
110}
111static inline u32 perf_pmasys_mem_block_target_lfb_f(void)
112{
113 return 0x0U;
114}
115static inline u32 perf_pmasys_mem_block_target_sys_coh_v(void)
116{
117 return 0x00000002U;
118}
119static inline u32 perf_pmasys_mem_block_target_sys_coh_f(void)
120{
121 return 0x20000000U;
122}
123static inline u32 perf_pmasys_mem_block_target_sys_ncoh_v(void)
124{
125 return 0x00000003U;
126}
127static inline u32 perf_pmasys_mem_block_target_sys_ncoh_f(void)
128{
129 return 0x30000000U;
130}
131static inline u32 perf_pmasys_mem_block_valid_f(u32 v)
132{
133 return (v & 0x1U) << 31U;
134}
135static inline u32 perf_pmasys_mem_block_valid_v(u32 r)
136{
137 return (r >> 31U) & 0x1U;
138}
139static inline u32 perf_pmasys_mem_block_valid_true_v(void)
140{
141 return 0x00000001U;
142}
143static inline u32 perf_pmasys_mem_block_valid_true_f(void)
144{
145 return 0x80000000U;
146}
147static inline u32 perf_pmasys_mem_block_valid_false_v(void)
148{
149 return 0x00000000U;
150}
151static inline u32 perf_pmasys_mem_block_valid_false_f(void)
152{
153 return 0x0U;
154}
155static inline u32 perf_pmasys_outbase_r(void)
156{
157 return 0x001b4074U;
158}
159static inline u32 perf_pmasys_outbase_ptr_f(u32 v)
160{
161 return (v & 0x7ffffffU) << 5U;
162}
163static inline u32 perf_pmasys_outbaseupper_r(void)
164{
165 return 0x001b4078U;
166}
167static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v)
168{
169 return (v & 0xffU) << 0U;
170}
171static inline u32 perf_pmasys_outsize_r(void)
172{
173 return 0x001b407cU;
174}
175static inline u32 perf_pmasys_outsize_numbytes_f(u32 v)
176{
177 return (v & 0x7ffffffU) << 5U;
178}
179static inline u32 perf_pmasys_mem_bytes_r(void)
180{
181 return 0x001b4084U;
182}
183static inline u32 perf_pmasys_mem_bytes_numbytes_f(u32 v)
184{
185 return (v & 0xfffffffU) << 4U;
186}
187static inline u32 perf_pmasys_mem_bump_r(void)
188{
189 return 0x001b4088U;
190}
191static inline u32 perf_pmasys_mem_bump_numbytes_f(u32 v)
192{
193 return (v & 0xfffffffU) << 4U;
194}
195static inline u32 perf_pmasys_enginestatus_r(void)
196{
197 return 0x001b40a4U;
198}
199static inline u32 perf_pmasys_enginestatus_rbufempty_f(u32 v)
200{
201 return (v & 0x1U) << 4U;
202}
203static inline u32 perf_pmasys_enginestatus_rbufempty_empty_v(void)
204{
205 return 0x00000001U;
206}
207static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void)
208{
209 return 0x10U;
210}
211#endif
diff --git a/include/gk20a/hw_pram_gk20a.h b/include/gk20a/hw_pram_gk20a.h
new file mode 100644
index 0000000..10923e2
--- /dev/null
+++ b/include/gk20a/hw_pram_gk20a.h
@@ -0,0 +1,63 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_pram_gk20a_h_
57#define _hw_pram_gk20a_h_
58
59static inline u32 pram_data032_r(u32 i)
60{
61 return 0x00700000U + i*4U;
62}
63#endif
diff --git a/include/gk20a/hw_pri_ringmaster_gk20a.h b/include/gk20a/hw_pri_ringmaster_gk20a.h
new file mode 100644
index 0000000..ca2775e
--- /dev/null
+++ b/include/gk20a/hw_pri_ringmaster_gk20a.h
@@ -0,0 +1,159 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_pri_ringmaster_gk20a_h_
57#define _hw_pri_ringmaster_gk20a_h_
58
59static inline u32 pri_ringmaster_command_r(void)
60{
61 return 0x0012004cU;
62}
63static inline u32 pri_ringmaster_command_cmd_m(void)
64{
65 return 0x3fU << 0U;
66}
67static inline u32 pri_ringmaster_command_cmd_v(u32 r)
68{
69 return (r >> 0U) & 0x3fU;
70}
71static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void)
72{
73 return 0x00000000U;
74}
75static inline u32 pri_ringmaster_command_cmd_start_ring_f(void)
76{
77 return 0x1U;
78}
79static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void)
80{
81 return 0x2U;
82}
83static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void)
84{
85 return 0x3U;
86}
87static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void)
88{
89 return 0x0U;
90}
91static inline u32 pri_ringmaster_command_data_r(void)
92{
93 return 0x00120048U;
94}
95static inline u32 pri_ringmaster_start_results_r(void)
96{
97 return 0x00120050U;
98}
99static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r)
100{
101 return (r >> 0U) & 0x1U;
102}
103static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void)
104{
105 return 0x00000001U;
106}
107static inline u32 pri_ringmaster_intr_status0_r(void)
108{
109 return 0x00120058U;
110}
111static inline u32 pri_ringmaster_intr_status0_ring_start_conn_fault_v(u32 r)
112{
113 return (r >> 0U) & 0x1U;
114}
115static inline u32 pri_ringmaster_intr_status0_disconnect_fault_v(u32 r)
116{
117 return (r >> 1U) & 0x1U;
118}
119static inline u32 pri_ringmaster_intr_status0_overflow_fault_v(u32 r)
120{
121 return (r >> 2U) & 0x1U;
122}
123static inline u32 pri_ringmaster_intr_status0_gbl_write_error_sys_v(u32 r)
124{
125 return (r >> 8U) & 0x1U;
126}
127static inline u32 pri_ringmaster_intr_status1_r(void)
128{
129 return 0x0012005cU;
130}
131static inline u32 pri_ringmaster_global_ctl_r(void)
132{
133 return 0x00120060U;
134}
135static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void)
136{
137 return 0x1U;
138}
139static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void)
140{
141 return 0x0U;
142}
143static inline u32 pri_ringmaster_enum_fbp_r(void)
144{
145 return 0x00120074U;
146}
147static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r)
148{
149 return (r >> 0U) & 0x1fU;
150}
151static inline u32 pri_ringmaster_enum_gpc_r(void)
152{
153 return 0x00120078U;
154}
155static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r)
156{
157 return (r >> 0U) & 0x1fU;
158}
159#endif
diff --git a/include/gk20a/hw_pri_ringstation_fbp_gk20a.h b/include/gk20a/hw_pri_ringstation_fbp_gk20a.h
new file mode 100644
index 0000000..06e08bd
--- /dev/null
+++ b/include/gk20a/hw_pri_ringstation_fbp_gk20a.h
@@ -0,0 +1,231 @@
1/*
2 * drivers/video/tegra/host/gk20a/hw_pri_ringstation_fbp_gk20a.h
3 *
4 * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Function naming determines intended use:
27 *
28 * <x>_r(void) : Returns the offset for register <x>.
29 *
30 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
31 *
32 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
33 *
34 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
35 * and masked to place it at field <y> of register <x>. This value
36 * can be |'d with others to produce a full register value for
37 * register <x>.
38 *
39 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
40 * value can be ~'d and then &'d to clear the value of field <y> for
41 * register <x>.
42 *
43 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
44 * to place it at field <y> of register <x>. This value can be |'d
45 * with others to produce a full register value for <x>.
46 *
47 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
48 * <x> value 'r' after being shifted to place its LSB at bit 0.
49 * This value is suitable for direct comparison with other unshifted
50 * values appropriate for use in field <y> of register <x>.
51 *
52 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
53 * field <y> of register <x>. This value is suitable for direct
54 * comparison with unshifted values appropriate for use in field <y>
55 * of register <x>.
56 */
57
58#ifndef __hw_pri_ringstation_fbp_gk20a_h__
59#define __hw_pri_ringstation_fbp_gk20a_h__
60/*This file is autogenerated. Do not edit. */
61
62static inline u32 pri_ringstation_fbp_master_config_r(u32 i)
63{
64 return 0x00124300+((i)*4);
65}
66static inline u32 pri_ringstation_fbp_master_config__size_1_v(void)
67{
68 return 64;
69}
70static inline u32 pri_ringstation_fbp_master_config_timeout_s(void)
71{
72 return 18;
73}
74static inline u32 pri_ringstation_fbp_master_config_timeout_f(u32 v)
75{
76 return (v & 0x3ffff) << 0;
77}
78static inline u32 pri_ringstation_fbp_master_config_timeout_m(void)
79{
80 return 0x3ffff << 0;
81}
82static inline u32 pri_ringstation_fbp_master_config_timeout_v(u32 r)
83{
84 return (r >> 0) & 0x3ffff;
85}
86static inline u32 pri_ringstation_fbp_master_config_timeout_i_v(void)
87{
88 return 0x00000064;
89}
90static inline u32 pri_ringstation_fbp_master_config_timeout_i_f(void)
91{
92 return 0x64;
93}
94static inline u32 pri_ringstation_fbp_master_config_fs_action_s(void)
95{
96 return 1;
97}
98static inline u32 pri_ringstation_fbp_master_config_fs_action_f(u32 v)
99{
100 return (v & 0x1) << 30;
101}
102static inline u32 pri_ringstation_fbp_master_config_fs_action_m(void)
103{
104 return 0x1 << 30;
105}
106static inline u32 pri_ringstation_fbp_master_config_fs_action_v(u32 r)
107{
108 return (r >> 30) & 0x1;
109}
110static inline u32 pri_ringstation_fbp_master_config_fs_action_error_v(void)
111{
112 return 0x00000000;
113}
114static inline u32 pri_ringstation_fbp_master_config_fs_action_error_f(void)
115{
116 return 0x0;
117}
118static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_v(void)
119{
120 return 0x00000001;
121}
122static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_f(void)
123{
124 return 0x40000000;
125}
126static inline u32 pri_ringstation_fbp_master_config_reset_action_s(void)
127{
128 return 1;
129}
130static inline u32 pri_ringstation_fbp_master_config_reset_action_f(u32 v)
131{
132 return (v & 0x1) << 31;
133}
134static inline u32 pri_ringstation_fbp_master_config_reset_action_m(void)
135{
136 return 0x1 << 31;
137}
138static inline u32 pri_ringstation_fbp_master_config_reset_action_v(u32 r)
139{
140 return (r >> 31) & 0x1;
141}
142static inline u32 pri_ringstation_fbp_master_config_reset_action_error_v(void)
143{
144 return 0x00000000;
145}
146static inline u32 pri_ringstation_fbp_master_config_reset_action_error_f(void)
147{
148 return 0x0;
149}
150static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_v(void)
151{
152 return 0x00000001;
153}
154static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_f(void)
155{
156 return 0x80000000;
157}
158static inline u32 pri_ringstation_fbp_master_config_setup_clocks_s(void)
159{
160 return 3;
161}
162static inline u32 pri_ringstation_fbp_master_config_setup_clocks_f(u32 v)
163{
164 return (v & 0x7) << 20;
165}
166static inline u32 pri_ringstation_fbp_master_config_setup_clocks_m(void)
167{
168 return 0x7 << 20;
169}
170static inline u32 pri_ringstation_fbp_master_config_setup_clocks_v(u32 r)
171{
172 return (r >> 20) & 0x7;
173}
174static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_v(void)
175{
176 return 0x00000000;
177}
178static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_f(void)
179{
180 return 0x0;
181}
182static inline u32 pri_ringstation_fbp_master_config_wait_clocks_s(void)
183{
184 return 3;
185}
186static inline u32 pri_ringstation_fbp_master_config_wait_clocks_f(u32 v)
187{
188 return (v & 0x7) << 24;
189}
190static inline u32 pri_ringstation_fbp_master_config_wait_clocks_m(void)
191{
192 return 0x7 << 24;
193}
194static inline u32 pri_ringstation_fbp_master_config_wait_clocks_v(u32 r)
195{
196 return (r >> 24) & 0x7;
197}
198static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_v(void)
199{
200 return 0x00000000;
201}
202static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_f(void)
203{
204 return 0x0;
205}
206static inline u32 pri_ringstation_fbp_master_config_hold_clocks_s(void)
207{
208 return 3;
209}
210static inline u32 pri_ringstation_fbp_master_config_hold_clocks_f(u32 v)
211{
212 return (v & 0x7) << 27;
213}
214static inline u32 pri_ringstation_fbp_master_config_hold_clocks_m(void)
215{
216 return 0x7 << 27;
217}
218static inline u32 pri_ringstation_fbp_master_config_hold_clocks_v(u32 r)
219{
220 return (r >> 27) & 0x7;
221}
222static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_v(void)
223{
224 return 0x00000000;
225}
226static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_f(void)
227{
228 return 0x0;
229}
230
231#endif /* __hw_pri_ringstation_fbp_gk20a_h__ */
diff --git a/include/gk20a/hw_pri_ringstation_gpc_gk20a.h b/include/gk20a/hw_pri_ringstation_gpc_gk20a.h
new file mode 100644
index 0000000..6b57429
--- /dev/null
+++ b/include/gk20a/hw_pri_ringstation_gpc_gk20a.h
@@ -0,0 +1,79 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_pri_ringstation_gpc_gk20a_h_
57#define _hw_pri_ringstation_gpc_gk20a_h_
58
59static inline u32 pri_ringstation_gpc_master_config_r(u32 i)
60{
61 return 0x00128300U + i*4U;
62}
63static inline u32 pri_ringstation_gpc_gpc0_priv_error_adr_r(void)
64{
65 return 0x00128120U;
66}
67static inline u32 pri_ringstation_gpc_gpc0_priv_error_wrdat_r(void)
68{
69 return 0x00128124U;
70}
71static inline u32 pri_ringstation_gpc_gpc0_priv_error_info_r(void)
72{
73 return 0x00128128U;
74}
75static inline u32 pri_ringstation_gpc_gpc0_priv_error_code_r(void)
76{
77 return 0x0012812cU;
78}
79#endif
diff --git a/include/gk20a/hw_pri_ringstation_sys_gk20a.h b/include/gk20a/hw_pri_ringstation_sys_gk20a.h
new file mode 100644
index 0000000..e4d5c3b
--- /dev/null
+++ b/include/gk20a/hw_pri_ringstation_sys_gk20a.h
@@ -0,0 +1,91 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_pri_ringstation_sys_gk20a_h_
57#define _hw_pri_ringstation_sys_gk20a_h_
58
59static inline u32 pri_ringstation_sys_master_config_r(u32 i)
60{
61 return 0x00122300U + i*4U;
62}
63static inline u32 pri_ringstation_sys_decode_config_r(void)
64{
65 return 0x00122204U;
66}
67static inline u32 pri_ringstation_sys_decode_config_ring_m(void)
68{
69 return 0x7U << 0U;
70}
71static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void)
72{
73 return 0x1U;
74}
75static inline u32 pri_ringstation_sys_priv_error_adr_r(void)
76{
77 return 0x00122120U;
78}
79static inline u32 pri_ringstation_sys_priv_error_wrdat_r(void)
80{
81 return 0x00122124U;
82}
83static inline u32 pri_ringstation_sys_priv_error_info_r(void)
84{
85 return 0x00122128U;
86}
87static inline u32 pri_ringstation_sys_priv_error_code_r(void)
88{
89 return 0x0012212cU;
90}
91#endif
diff --git a/include/gk20a/hw_proj_gk20a.h b/include/gk20a/hw_proj_gk20a.h
new file mode 100644
index 0000000..10509ca
--- /dev/null
+++ b/include/gk20a/hw_proj_gk20a.h
@@ -0,0 +1,167 @@
1/*
2 * Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_proj_gk20a_h_
57#define _hw_proj_gk20a_h_
58
59static inline u32 proj_gpc_base_v(void)
60{
61 return 0x00500000U;
62}
63static inline u32 proj_gpc_shared_base_v(void)
64{
65 return 0x00418000U;
66}
67static inline u32 proj_gpc_stride_v(void)
68{
69 return 0x00008000U;
70}
71static inline u32 proj_gpc_priv_stride_v(void)
72{
73 return 0x00000800U;
74}
75static inline u32 proj_ltc_stride_v(void)
76{
77 return 0x00002000U;
78}
79static inline u32 proj_lts_stride_v(void)
80{
81 return 0x00000400U;
82}
83static inline u32 proj_fbpa_stride_v(void)
84{
85 return 0x00001000U;
86}
87static inline u32 proj_ppc_in_gpc_base_v(void)
88{
89 return 0x00003000U;
90}
91static inline u32 proj_ppc_in_gpc_shared_base_v(void)
92{
93 return 0x00003e00U;
94}
95static inline u32 proj_ppc_in_gpc_stride_v(void)
96{
97 return 0x00000200U;
98}
99static inline u32 proj_rop_base_v(void)
100{
101 return 0x00410000U;
102}
103static inline u32 proj_rop_shared_base_v(void)
104{
105 return 0x00408800U;
106}
107static inline u32 proj_rop_stride_v(void)
108{
109 return 0x00000400U;
110}
111static inline u32 proj_tpc_in_gpc_base_v(void)
112{
113 return 0x00004000U;
114}
115static inline u32 proj_tpc_in_gpc_stride_v(void)
116{
117 return 0x00000800U;
118}
119static inline u32 proj_tpc_in_gpc_shared_base_v(void)
120{
121 return 0x00001800U;
122}
123static inline u32 proj_host_num_engines_v(void)
124{
125 return 0x00000002U;
126}
127static inline u32 proj_host_num_pbdma_v(void)
128{
129 return 0x00000001U;
130}
131static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void)
132{
133 return 0x00000001U;
134}
135static inline u32 proj_scal_litter_num_fbps_v(void)
136{
137 return 0x00000001U;
138}
139static inline u32 proj_scal_litter_num_fbpas_v(void)
140{
141 return 0x00000001U;
142}
143static inline u32 proj_scal_litter_num_gpcs_v(void)
144{
145 return 0x00000001U;
146}
147static inline u32 proj_scal_litter_num_pes_per_gpc_v(void)
148{
149 return 0x00000001U;
150}
151static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void)
152{
153 return 0x00000001U;
154}
155static inline u32 proj_scal_litter_num_zcull_banks_v(void)
156{
157 return 0x00000004U;
158}
159static inline u32 proj_scal_max_gpcs_v(void)
160{
161 return 0x00000020U;
162}
163static inline u32 proj_scal_max_tpc_per_gpc_v(void)
164{
165 return 0x00000008U;
166}
167#endif
diff --git a/include/gk20a/hw_pwr_gk20a.h b/include/gk20a/hw_pwr_gk20a.h
new file mode 100644
index 0000000..2845763
--- /dev/null
+++ b/include/gk20a/hw_pwr_gk20a.h
@@ -0,0 +1,823 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_pwr_gk20a_h_
57#define _hw_pwr_gk20a_h_
58
59static inline u32 pwr_falcon_irqsset_r(void)
60{
61 return 0x0010a000U;
62}
63static inline u32 pwr_falcon_irqsset_swgen0_set_f(void)
64{
65 return 0x40U;
66}
67static inline u32 pwr_falcon_irqsclr_r(void)
68{
69 return 0x0010a004U;
70}
71static inline u32 pwr_falcon_irqstat_r(void)
72{
73 return 0x0010a008U;
74}
75static inline u32 pwr_falcon_irqstat_halt_true_f(void)
76{
77 return 0x10U;
78}
79static inline u32 pwr_falcon_irqstat_exterr_true_f(void)
80{
81 return 0x20U;
82}
83static inline u32 pwr_falcon_irqstat_swgen0_true_f(void)
84{
85 return 0x40U;
86}
87static inline u32 pwr_falcon_irqmode_r(void)
88{
89 return 0x0010a00cU;
90}
91static inline u32 pwr_falcon_irqmset_r(void)
92{
93 return 0x0010a010U;
94}
95static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v)
96{
97 return (v & 0x1U) << 0U;
98}
99static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v)
100{
101 return (v & 0x1U) << 1U;
102}
103static inline u32 pwr_falcon_irqmset_mthd_f(u32 v)
104{
105 return (v & 0x1U) << 2U;
106}
107static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v)
108{
109 return (v & 0x1U) << 3U;
110}
111static inline u32 pwr_falcon_irqmset_halt_f(u32 v)
112{
113 return (v & 0x1U) << 4U;
114}
115static inline u32 pwr_falcon_irqmset_exterr_f(u32 v)
116{
117 return (v & 0x1U) << 5U;
118}
119static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v)
120{
121 return (v & 0x1U) << 6U;
122}
123static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v)
124{
125 return (v & 0x1U) << 7U;
126}
127static inline u32 pwr_falcon_irqmclr_r(void)
128{
129 return 0x0010a014U;
130}
131static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v)
132{
133 return (v & 0x1U) << 0U;
134}
135static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v)
136{
137 return (v & 0x1U) << 1U;
138}
139static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v)
140{
141 return (v & 0x1U) << 2U;
142}
143static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v)
144{
145 return (v & 0x1U) << 3U;
146}
147static inline u32 pwr_falcon_irqmclr_halt_f(u32 v)
148{
149 return (v & 0x1U) << 4U;
150}
151static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v)
152{
153 return (v & 0x1U) << 5U;
154}
155static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v)
156{
157 return (v & 0x1U) << 6U;
158}
159static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v)
160{
161 return (v & 0x1U) << 7U;
162}
163static inline u32 pwr_falcon_irqmclr_ext_f(u32 v)
164{
165 return (v & 0xffU) << 8U;
166}
167static inline u32 pwr_falcon_irqmask_r(void)
168{
169 return 0x0010a018U;
170}
171static inline u32 pwr_falcon_irqdest_r(void)
172{
173 return 0x0010a01cU;
174}
175static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v)
176{
177 return (v & 0x1U) << 0U;
178}
179static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v)
180{
181 return (v & 0x1U) << 1U;
182}
183static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v)
184{
185 return (v & 0x1U) << 2U;
186}
187static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v)
188{
189 return (v & 0x1U) << 3U;
190}
191static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v)
192{
193 return (v & 0x1U) << 4U;
194}
195static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v)
196{
197 return (v & 0x1U) << 5U;
198}
199static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v)
200{
201 return (v & 0x1U) << 6U;
202}
203static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v)
204{
205 return (v & 0x1U) << 7U;
206}
207static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v)
208{
209 return (v & 0xffU) << 8U;
210}
211static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v)
212{
213 return (v & 0x1U) << 16U;
214}
215static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v)
216{
217 return (v & 0x1U) << 17U;
218}
219static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v)
220{
221 return (v & 0x1U) << 18U;
222}
223static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v)
224{
225 return (v & 0x1U) << 19U;
226}
227static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v)
228{
229 return (v & 0x1U) << 20U;
230}
231static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v)
232{
233 return (v & 0x1U) << 21U;
234}
235static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v)
236{
237 return (v & 0x1U) << 22U;
238}
239static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v)
240{
241 return (v & 0x1U) << 23U;
242}
243static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v)
244{
245 return (v & 0xffU) << 24U;
246}
247static inline u32 pwr_falcon_curctx_r(void)
248{
249 return 0x0010a050U;
250}
251static inline u32 pwr_falcon_nxtctx_r(void)
252{
253 return 0x0010a054U;
254}
255static inline u32 pwr_falcon_mailbox0_r(void)
256{
257 return 0x0010a040U;
258}
259static inline u32 pwr_falcon_mailbox1_r(void)
260{
261 return 0x0010a044U;
262}
263static inline u32 pwr_falcon_itfen_r(void)
264{
265 return 0x0010a048U;
266}
267static inline u32 pwr_falcon_itfen_ctxen_enable_f(void)
268{
269 return 0x1U;
270}
271static inline u32 pwr_falcon_idlestate_r(void)
272{
273 return 0x0010a04cU;
274}
275static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r)
276{
277 return (r >> 0U) & 0x1U;
278}
279static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r)
280{
281 return (r >> 1U) & 0x7fffU;
282}
283static inline u32 pwr_falcon_os_r(void)
284{
285 return 0x0010a080U;
286}
287static inline u32 pwr_falcon_engctl_r(void)
288{
289 return 0x0010a0a4U;
290}
291static inline u32 pwr_falcon_cpuctl_r(void)
292{
293 return 0x0010a100U;
294}
295static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
296{
297 return (v & 0x1U) << 1U;
298}
299static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
300{
301 return (v & 0x1U) << 4U;
302}
303static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
304{
305 return 0x1U << 4U;
306}
307static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
308{
309 return (r >> 4U) & 0x1U;
310}
311static inline u32 pwr_falcon_imemc_r(u32 i)
312{
313 return 0x0010a180U + i*16U;
314}
315static inline u32 pwr_falcon_imemc_offs_f(u32 v)
316{
317 return (v & 0x3fU) << 2U;
318}
319static inline u32 pwr_falcon_imemc_blk_f(u32 v)
320{
321 return (v & 0xffU) << 8U;
322}
323static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
324{
325 return (v & 0x1U) << 24U;
326}
327static inline u32 pwr_falcon_imemd_r(u32 i)
328{
329 return 0x0010a184U + i*16U;
330}
331static inline u32 pwr_falcon_imemt_r(u32 i)
332{
333 return 0x0010a188U + i*16U;
334}
335static inline u32 pwr_falcon_bootvec_r(void)
336{
337 return 0x0010a104U;
338}
339static inline u32 pwr_falcon_bootvec_vec_f(u32 v)
340{
341 return (v & 0xffffffffU) << 0U;
342}
343static inline u32 pwr_falcon_dmactl_r(void)
344{
345 return 0x0010a10cU;
346}
347static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void)
348{
349 return 0x1U << 1U;
350}
351static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void)
352{
353 return 0x1U << 2U;
354}
355static inline u32 pwr_falcon_hwcfg_r(void)
356{
357 return 0x0010a108U;
358}
359static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r)
360{
361 return (r >> 0U) & 0x1ffU;
362}
363static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r)
364{
365 return (r >> 9U) & 0x1ffU;
366}
367static inline u32 pwr_falcon_dmatrfbase_r(void)
368{
369 return 0x0010a110U;
370}
371static inline u32 pwr_falcon_dmatrfmoffs_r(void)
372{
373 return 0x0010a114U;
374}
375static inline u32 pwr_falcon_dmatrfcmd_r(void)
376{
377 return 0x0010a118U;
378}
379static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v)
380{
381 return (v & 0x1U) << 4U;
382}
383static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v)
384{
385 return (v & 0x1U) << 5U;
386}
387static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v)
388{
389 return (v & 0x7U) << 8U;
390}
391static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v)
392{
393 return (v & 0x7U) << 12U;
394}
395static inline u32 pwr_falcon_dmatrffboffs_r(void)
396{
397 return 0x0010a11cU;
398}
399static inline u32 pwr_falcon_exterraddr_r(void)
400{
401 return 0x0010a168U;
402}
403static inline u32 pwr_falcon_exterrstat_r(void)
404{
405 return 0x0010a16cU;
406}
407static inline u32 pwr_falcon_exterrstat_valid_m(void)
408{
409 return 0x1U << 31U;
410}
411static inline u32 pwr_falcon_exterrstat_valid_v(u32 r)
412{
413 return (r >> 31U) & 0x1U;
414}
415static inline u32 pwr_falcon_exterrstat_valid_true_v(void)
416{
417 return 0x00000001U;
418}
419static inline u32 pwr_pmu_falcon_icd_cmd_r(void)
420{
421 return 0x0010a200U;
422}
423static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void)
424{
425 return 4U;
426}
427static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v)
428{
429 return (v & 0xfU) << 0U;
430}
431static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void)
432{
433 return 0xfU << 0U;
434}
435static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r)
436{
437 return (r >> 0U) & 0xfU;
438}
439static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void)
440{
441 return 0x8U;
442}
443static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void)
444{
445 return 0xeU;
446}
447static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v)
448{
449 return (v & 0x1fU) << 8U;
450}
451static inline u32 pwr_pmu_falcon_icd_rdata_r(void)
452{
453 return 0x0010a20cU;
454}
455static inline u32 pwr_falcon_dmemc_r(u32 i)
456{
457 return 0x0010a1c0U + i*8U;
458}
459static inline u32 pwr_falcon_dmemc_offs_f(u32 v)
460{
461 return (v & 0x3fU) << 2U;
462}
463static inline u32 pwr_falcon_dmemc_offs_m(void)
464{
465 return 0x3fU << 2U;
466}
467static inline u32 pwr_falcon_dmemc_blk_f(u32 v)
468{
469 return (v & 0xffU) << 8U;
470}
471static inline u32 pwr_falcon_dmemc_blk_m(void)
472{
473 return 0xffU << 8U;
474}
475static inline u32 pwr_falcon_dmemc_aincw_f(u32 v)
476{
477 return (v & 0x1U) << 24U;
478}
479static inline u32 pwr_falcon_dmemc_aincr_f(u32 v)
480{
481 return (v & 0x1U) << 25U;
482}
483static inline u32 pwr_falcon_dmemd_r(u32 i)
484{
485 return 0x0010a1c4U + i*8U;
486}
487static inline u32 pwr_pmu_new_instblk_r(void)
488{
489 return 0x0010a480U;
490}
491static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v)
492{
493 return (v & 0xfffffffU) << 0U;
494}
495static inline u32 pwr_pmu_new_instblk_target_fb_f(void)
496{
497 return 0x0U;
498}
499static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void)
500{
501 return 0x20000000U;
502}
503static inline u32 pwr_pmu_new_instblk_target_sys_ncoh_f(void)
504{
505 return 0x30000000U;
506}
507static inline u32 pwr_pmu_new_instblk_valid_f(u32 v)
508{
509 return (v & 0x1U) << 30U;
510}
511static inline u32 pwr_pmu_mutex_id_r(void)
512{
513 return 0x0010a488U;
514}
515static inline u32 pwr_pmu_mutex_id_value_v(u32 r)
516{
517 return (r >> 0U) & 0xffU;
518}
519static inline u32 pwr_pmu_mutex_id_value_init_v(void)
520{
521 return 0x00000000U;
522}
523static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void)
524{
525 return 0x000000ffU;
526}
527static inline u32 pwr_pmu_mutex_id_release_r(void)
528{
529 return 0x0010a48cU;
530}
531static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v)
532{
533 return (v & 0xffU) << 0U;
534}
535static inline u32 pwr_pmu_mutex_id_release_value_m(void)
536{
537 return 0xffU << 0U;
538}
539static inline u32 pwr_pmu_mutex_id_release_value_init_v(void)
540{
541 return 0x00000000U;
542}
543static inline u32 pwr_pmu_mutex_id_release_value_init_f(void)
544{
545 return 0x0U;
546}
547static inline u32 pwr_pmu_mutex_r(u32 i)
548{
549 return 0x0010a580U + i*4U;
550}
551static inline u32 pwr_pmu_mutex__size_1_v(void)
552{
553 return 0x00000010U;
554}
555static inline u32 pwr_pmu_mutex_value_f(u32 v)
556{
557 return (v & 0xffU) << 0U;
558}
559static inline u32 pwr_pmu_mutex_value_v(u32 r)
560{
561 return (r >> 0U) & 0xffU;
562}
563static inline u32 pwr_pmu_mutex_value_initial_lock_f(void)
564{
565 return 0x0U;
566}
567static inline u32 pwr_pmu_queue_head_r(u32 i)
568{
569 return 0x0010a4a0U + i*4U;
570}
571static inline u32 pwr_pmu_queue_head__size_1_v(void)
572{
573 return 0x00000004U;
574}
575static inline u32 pwr_pmu_queue_head_address_f(u32 v)
576{
577 return (v & 0xffffffffU) << 0U;
578}
579static inline u32 pwr_pmu_queue_head_address_v(u32 r)
580{
581 return (r >> 0U) & 0xffffffffU;
582}
583static inline u32 pwr_pmu_queue_tail_r(u32 i)
584{
585 return 0x0010a4b0U + i*4U;
586}
587static inline u32 pwr_pmu_queue_tail__size_1_v(void)
588{
589 return 0x00000004U;
590}
591static inline u32 pwr_pmu_queue_tail_address_f(u32 v)
592{
593 return (v & 0xffffffffU) << 0U;
594}
595static inline u32 pwr_pmu_queue_tail_address_v(u32 r)
596{
597 return (r >> 0U) & 0xffffffffU;
598}
599static inline u32 pwr_pmu_msgq_head_r(void)
600{
601 return 0x0010a4c8U;
602}
603static inline u32 pwr_pmu_msgq_head_val_f(u32 v)
604{
605 return (v & 0xffffffffU) << 0U;
606}
607static inline u32 pwr_pmu_msgq_head_val_v(u32 r)
608{
609 return (r >> 0U) & 0xffffffffU;
610}
611static inline u32 pwr_pmu_msgq_tail_r(void)
612{
613 return 0x0010a4ccU;
614}
615static inline u32 pwr_pmu_msgq_tail_val_f(u32 v)
616{
617 return (v & 0xffffffffU) << 0U;
618}
619static inline u32 pwr_pmu_msgq_tail_val_v(u32 r)
620{
621 return (r >> 0U) & 0xffffffffU;
622}
623static inline u32 pwr_pmu_idle_mask_r(u32 i)
624{
625 return 0x0010a504U + i*16U;
626}
627static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void)
628{
629 return 0x1U;
630}
631static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void)
632{
633 return 0x200000U;
634}
635static inline u32 pwr_pmu_idle_count_r(u32 i)
636{
637 return 0x0010a508U + i*16U;
638}
639static inline u32 pwr_pmu_idle_count_value_f(u32 v)
640{
641 return (v & 0x7fffffffU) << 0U;
642}
643static inline u32 pwr_pmu_idle_count_value_v(u32 r)
644{
645 return (r >> 0U) & 0x7fffffffU;
646}
647static inline u32 pwr_pmu_idle_count_reset_f(u32 v)
648{
649 return (v & 0x1U) << 31U;
650}
651static inline u32 pwr_pmu_idle_ctrl_r(u32 i)
652{
653 return 0x0010a50cU + i*16U;
654}
655static inline u32 pwr_pmu_idle_ctrl_value_m(void)
656{
657 return 0x3U << 0U;
658}
659static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void)
660{
661 return 0x2U;
662}
663static inline u32 pwr_pmu_idle_ctrl_value_always_f(void)
664{
665 return 0x3U;
666}
667static inline u32 pwr_pmu_idle_ctrl_filter_m(void)
668{
669 return 0x1U << 2U;
670}
671static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
672{
673 return 0x0U;
674}
675static inline u32 pwr_pmu_idle_threshold_r(u32 i)
676{
677 return 0x0010a8a0U + i*4U;
678}
679static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
680{
681 return (v & 0x7fffffffU) << 0U;
682}
683static inline u32 pwr_pmu_idle_intr_r(void)
684{
685 return 0x0010a9e8U;
686}
687static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
688{
689 return (v & 0x1U) << 0U;
690}
691static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
692{
693 return 0x00000000U;
694}
695static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
696{
697 return 0x00000001U;
698}
699static inline u32 pwr_pmu_idle_intr_status_r(void)
700{
701 return 0x0010a9ecU;
702}
703static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
704{
705 return (v & 0x1U) << 0U;
706}
707static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
708{
709 return U32(0x1U) << 0U;
710}
711static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
712{
713 return (r >> 0U) & 0x1U;
714}
715static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
716{
717 return 0x0010a9f0U + i*8U;
718}
719static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i)
720{
721 return 0x0010a9f4U + i*8U;
722}
723static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i)
724{
725 return 0x0010aa30U + i*8U;
726}
727static inline u32 pwr_pmu_debug_r(u32 i)
728{
729 return 0x0010a5c0U + i*4U;
730}
731static inline u32 pwr_pmu_debug__size_1_v(void)
732{
733 return 0x00000004U;
734}
735static inline u32 pwr_pmu_mailbox_r(u32 i)
736{
737 return 0x0010a450U + i*4U;
738}
739static inline u32 pwr_pmu_mailbox__size_1_v(void)
740{
741 return 0x0000000cU;
742}
743static inline u32 pwr_pmu_bar0_addr_r(void)
744{
745 return 0x0010a7a0U;
746}
747static inline u32 pwr_pmu_bar0_data_r(void)
748{
749 return 0x0010a7a4U;
750}
751static inline u32 pwr_pmu_bar0_ctl_r(void)
752{
753 return 0x0010a7acU;
754}
755static inline u32 pwr_pmu_bar0_timeout_r(void)
756{
757 return 0x0010a7a8U;
758}
759static inline u32 pwr_pmu_bar0_fecs_error_r(void)
760{
761 return 0x0010a988U;
762}
763static inline u32 pwr_pmu_bar0_error_status_r(void)
764{
765 return 0x0010a7b0U;
766}
767static inline u32 pwr_pmu_pg_idlefilth_r(u32 i)
768{
769 return 0x0010a6c0U + i*4U;
770}
771static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i)
772{
773 return 0x0010a6e8U + i*4U;
774}
775static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i)
776{
777 return 0x0010a710U + i*4U;
778}
779static inline u32 pwr_pmu_pg_intren_r(u32 i)
780{
781 return 0x0010a760U + i*4U;
782}
783static inline u32 pwr_fbif_transcfg_r(u32 i)
784{
785 return 0x0010a600U + i*4U;
786}
787static inline u32 pwr_fbif_transcfg_target_local_fb_f(void)
788{
789 return 0x0U;
790}
791static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void)
792{
793 return 0x1U;
794}
795static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void)
796{
797 return 0x2U;
798}
799static inline u32 pwr_fbif_transcfg_mem_type_s(void)
800{
801 return 1U;
802}
803static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v)
804{
805 return (v & 0x1U) << 2U;
806}
807static inline u32 pwr_fbif_transcfg_mem_type_m(void)
808{
809 return 0x1U << 2U;
810}
811static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r)
812{
813 return (r >> 2U) & 0x1U;
814}
815static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void)
816{
817 return 0x0U;
818}
819static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void)
820{
821 return 0x4U;
822}
823#endif
diff --git a/include/gk20a/hw_ram_gk20a.h b/include/gk20a/hw_ram_gk20a.h
new file mode 100644
index 0000000..ed385d9
--- /dev/null
+++ b/include/gk20a/hw_ram_gk20a.h
@@ -0,0 +1,443 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_ram_gk20a_h_
57#define _hw_ram_gk20a_h_
58
59static inline u32 ram_in_ramfc_s(void)
60{
61 return 4096U;
62}
63static inline u32 ram_in_ramfc_w(void)
64{
65 return 0U;
66}
67static inline u32 ram_in_page_dir_base_target_f(u32 v)
68{
69 return (v & 0x3U) << 0U;
70}
71static inline u32 ram_in_page_dir_base_target_w(void)
72{
73 return 128U;
74}
75static inline u32 ram_in_page_dir_base_target_vid_mem_f(void)
76{
77 return 0x0U;
78}
79static inline u32 ram_in_page_dir_base_target_sys_mem_coh_f(void)
80{
81 return 0x2U;
82}
83static inline u32 ram_in_page_dir_base_target_sys_mem_ncoh_f(void)
84{
85 return 0x3U;
86}
87static inline u32 ram_in_page_dir_base_vol_w(void)
88{
89 return 128U;
90}
91static inline u32 ram_in_page_dir_base_vol_true_f(void)
92{
93 return 0x4U;
94}
95static inline u32 ram_in_page_dir_base_lo_f(u32 v)
96{
97 return (v & 0xfffffU) << 12U;
98}
99static inline u32 ram_in_page_dir_base_lo_w(void)
100{
101 return 128U;
102}
103static inline u32 ram_in_page_dir_base_hi_f(u32 v)
104{
105 return (v & 0xffU) << 0U;
106}
107static inline u32 ram_in_page_dir_base_hi_w(void)
108{
109 return 129U;
110}
111static inline u32 ram_in_adr_limit_lo_f(u32 v)
112{
113 return (v & 0xfffffU) << 12U;
114}
115static inline u32 ram_in_adr_limit_lo_w(void)
116{
117 return 130U;
118}
119static inline u32 ram_in_adr_limit_hi_f(u32 v)
120{
121 return (v & 0xffU) << 0U;
122}
123static inline u32 ram_in_adr_limit_hi_w(void)
124{
125 return 131U;
126}
127static inline u32 ram_in_engine_cs_w(void)
128{
129 return 132U;
130}
131static inline u32 ram_in_engine_cs_wfi_v(void)
132{
133 return 0x00000000U;
134}
135static inline u32 ram_in_engine_cs_wfi_f(void)
136{
137 return 0x0U;
138}
139static inline u32 ram_in_engine_cs_fg_v(void)
140{
141 return 0x00000001U;
142}
143static inline u32 ram_in_engine_cs_fg_f(void)
144{
145 return 0x8U;
146}
147static inline u32 ram_in_gr_cs_w(void)
148{
149 return 132U;
150}
151static inline u32 ram_in_gr_cs_wfi_f(void)
152{
153 return 0x0U;
154}
155static inline u32 ram_in_gr_wfi_target_w(void)
156{
157 return 132U;
158}
159static inline u32 ram_in_gr_wfi_mode_w(void)
160{
161 return 132U;
162}
163static inline u32 ram_in_gr_wfi_mode_physical_v(void)
164{
165 return 0x00000000U;
166}
167static inline u32 ram_in_gr_wfi_mode_physical_f(void)
168{
169 return 0x0U;
170}
171static inline u32 ram_in_gr_wfi_mode_virtual_v(void)
172{
173 return 0x00000001U;
174}
175static inline u32 ram_in_gr_wfi_mode_virtual_f(void)
176{
177 return 0x4U;
178}
179static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v)
180{
181 return (v & 0xfffffU) << 12U;
182}
183static inline u32 ram_in_gr_wfi_ptr_lo_w(void)
184{
185 return 132U;
186}
187static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v)
188{
189 return (v & 0xffU) << 0U;
190}
191static inline u32 ram_in_gr_wfi_ptr_hi_w(void)
192{
193 return 133U;
194}
195static inline u32 ram_in_base_shift_v(void)
196{
197 return 0x0000000cU;
198}
199static inline u32 ram_in_alloc_size_v(void)
200{
201 return 0x00001000U;
202}
203static inline u32 ram_fc_size_val_v(void)
204{
205 return 0x00000200U;
206}
207static inline u32 ram_fc_gp_put_w(void)
208{
209 return 0U;
210}
211static inline u32 ram_fc_userd_w(void)
212{
213 return 2U;
214}
215static inline u32 ram_fc_userd_hi_w(void)
216{
217 return 3U;
218}
219static inline u32 ram_fc_signature_w(void)
220{
221 return 4U;
222}
223static inline u32 ram_fc_gp_get_w(void)
224{
225 return 5U;
226}
227static inline u32 ram_fc_pb_get_w(void)
228{
229 return 6U;
230}
231static inline u32 ram_fc_pb_get_hi_w(void)
232{
233 return 7U;
234}
235static inline u32 ram_fc_pb_top_level_get_w(void)
236{
237 return 8U;
238}
239static inline u32 ram_fc_pb_top_level_get_hi_w(void)
240{
241 return 9U;
242}
243static inline u32 ram_fc_acquire_w(void)
244{
245 return 12U;
246}
247static inline u32 ram_fc_semaphorea_w(void)
248{
249 return 14U;
250}
251static inline u32 ram_fc_semaphoreb_w(void)
252{
253 return 15U;
254}
255static inline u32 ram_fc_semaphorec_w(void)
256{
257 return 16U;
258}
259static inline u32 ram_fc_semaphored_w(void)
260{
261 return 17U;
262}
263static inline u32 ram_fc_gp_base_w(void)
264{
265 return 18U;
266}
267static inline u32 ram_fc_gp_base_hi_w(void)
268{
269 return 19U;
270}
271static inline u32 ram_fc_gp_fetch_w(void)
272{
273 return 20U;
274}
275static inline u32 ram_fc_pb_fetch_w(void)
276{
277 return 21U;
278}
279static inline u32 ram_fc_pb_fetch_hi_w(void)
280{
281 return 22U;
282}
283static inline u32 ram_fc_pb_put_w(void)
284{
285 return 23U;
286}
287static inline u32 ram_fc_pb_put_hi_w(void)
288{
289 return 24U;
290}
291static inline u32 ram_fc_pb_header_w(void)
292{
293 return 33U;
294}
295static inline u32 ram_fc_pb_count_w(void)
296{
297 return 34U;
298}
299static inline u32 ram_fc_subdevice_w(void)
300{
301 return 37U;
302}
303static inline u32 ram_fc_formats_w(void)
304{
305 return 39U;
306}
307static inline u32 ram_fc_syncpointa_w(void)
308{
309 return 41U;
310}
311static inline u32 ram_fc_syncpointb_w(void)
312{
313 return 42U;
314}
315static inline u32 ram_fc_target_w(void)
316{
317 return 43U;
318}
319static inline u32 ram_fc_hce_ctrl_w(void)
320{
321 return 57U;
322}
323static inline u32 ram_fc_chid_w(void)
324{
325 return 58U;
326}
327static inline u32 ram_fc_chid_id_f(u32 v)
328{
329 return (v & 0xfffU) << 0U;
330}
331static inline u32 ram_fc_chid_id_w(void)
332{
333 return 0U;
334}
335static inline u32 ram_fc_runlist_timeslice_w(void)
336{
337 return 62U;
338}
339static inline u32 ram_fc_pb_timeslice_w(void)
340{
341 return 63U;
342}
343static inline u32 ram_userd_base_shift_v(void)
344{
345 return 0x00000009U;
346}
347static inline u32 ram_userd_chan_size_v(void)
348{
349 return 0x00000200U;
350}
351static inline u32 ram_userd_put_w(void)
352{
353 return 16U;
354}
355static inline u32 ram_userd_get_w(void)
356{
357 return 17U;
358}
359static inline u32 ram_userd_ref_w(void)
360{
361 return 18U;
362}
363static inline u32 ram_userd_put_hi_w(void)
364{
365 return 19U;
366}
367static inline u32 ram_userd_ref_threshold_w(void)
368{
369 return 20U;
370}
371static inline u32 ram_userd_top_level_get_w(void)
372{
373 return 22U;
374}
375static inline u32 ram_userd_top_level_get_hi_w(void)
376{
377 return 23U;
378}
379static inline u32 ram_userd_get_hi_w(void)
380{
381 return 24U;
382}
383static inline u32 ram_userd_gp_get_w(void)
384{
385 return 34U;
386}
387static inline u32 ram_userd_gp_put_w(void)
388{
389 return 35U;
390}
391static inline u32 ram_userd_gp_top_level_get_w(void)
392{
393 return 22U;
394}
395static inline u32 ram_userd_gp_top_level_get_hi_w(void)
396{
397 return 23U;
398}
399static inline u32 ram_rl_entry_size_v(void)
400{
401 return 0x00000008U;
402}
403static inline u32 ram_rl_entry_chid_f(u32 v)
404{
405 return (v & 0xfffU) << 0U;
406}
407static inline u32 ram_rl_entry_id_f(u32 v)
408{
409 return (v & 0xfffU) << 0U;
410}
411static inline u32 ram_rl_entry_type_f(u32 v)
412{
413 return (v & 0x1U) << 13U;
414}
415static inline u32 ram_rl_entry_type_chid_f(void)
416{
417 return 0x0U;
418}
419static inline u32 ram_rl_entry_type_tsg_f(void)
420{
421 return 0x2000U;
422}
423static inline u32 ram_rl_entry_timeslice_scale_f(u32 v)
424{
425 return (v & 0xfU) << 14U;
426}
427static inline u32 ram_rl_entry_timeslice_scale_3_f(void)
428{
429 return 0xc000U;
430}
431static inline u32 ram_rl_entry_timeslice_timeout_f(u32 v)
432{
433 return (v & 0xffU) << 18U;
434}
435static inline u32 ram_rl_entry_timeslice_timeout_128_f(void)
436{
437 return 0x2000000U;
438}
439static inline u32 ram_rl_entry_tsg_length_f(u32 v)
440{
441 return (v & 0x3fU) << 26U;
442}
443#endif
diff --git a/include/gk20a/hw_therm_gk20a.h b/include/gk20a/hw_therm_gk20a.h
new file mode 100644
index 0000000..075c9bc
--- /dev/null
+++ b/include/gk20a/hw_therm_gk20a.h
@@ -0,0 +1,367 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_therm_gk20a_h_
57#define _hw_therm_gk20a_h_
58
59static inline u32 therm_use_a_r(void)
60{
61 return 0x00020798U;
62}
63static inline u32 therm_use_a_ext_therm_0_enable_f(void)
64{
65 return 0x1U;
66}
67static inline u32 therm_use_a_ext_therm_1_enable_f(void)
68{
69 return 0x2U;
70}
71static inline u32 therm_use_a_ext_therm_2_enable_f(void)
72{
73 return 0x4U;
74}
75static inline u32 therm_evt_ext_therm_0_r(void)
76{
77 return 0x00020700U;
78}
79static inline u32 therm_evt_ext_therm_0_slow_factor_f(u32 v)
80{
81 return (v & 0x3fU) << 8U;
82}
83static inline u32 therm_evt_ext_therm_0_slow_factor_init_v(void)
84{
85 return 0x00000000U;
86}
87static inline u32 therm_evt_ext_therm_0_priority_f(u32 v)
88{
89 return (v & 0x1fU) << 24U;
90}
91static inline u32 therm_evt_ext_therm_1_r(void)
92{
93 return 0x00020704U;
94}
95static inline u32 therm_evt_ext_therm_1_slow_factor_f(u32 v)
96{
97 return (v & 0x3fU) << 8U;
98}
99static inline u32 therm_evt_ext_therm_1_slow_factor_init_v(void)
100{
101 return 0x00000000U;
102}
103static inline u32 therm_evt_ext_therm_1_priority_f(u32 v)
104{
105 return (v & 0x1fU) << 24U;
106}
107static inline u32 therm_evt_ext_therm_2_r(void)
108{
109 return 0x00020708U;
110}
111static inline u32 therm_evt_ext_therm_2_slow_factor_f(u32 v)
112{
113 return (v & 0x3fU) << 8U;
114}
115static inline u32 therm_evt_ext_therm_2_slow_factor_init_v(void)
116{
117 return 0x00000000U;
118}
119static inline u32 therm_evt_ext_therm_2_priority_f(u32 v)
120{
121 return (v & 0x1fU) << 24U;
122}
123static inline u32 therm_weight_1_r(void)
124{
125 return 0x00020024U;
126}
127static inline u32 therm_config1_r(void)
128{
129 return 0x00020050U;
130}
131static inline u32 therm_config2_r(void)
132{
133 return 0x00020130U;
134}
135static inline u32 therm_config2_slowdown_factor_extended_f(u32 v)
136{
137 return (v & 0x1U) << 24U;
138}
139static inline u32 therm_config2_grad_enable_f(u32 v)
140{
141 return (v & 0x1U) << 31U;
142}
143static inline u32 therm_gate_ctrl_r(u32 i)
144{
145 return 0x00020200U + i*4U;
146}
147static inline u32 therm_gate_ctrl_eng_clk_m(void)
148{
149 return 0x3U << 0U;
150}
151static inline u32 therm_gate_ctrl_eng_clk_run_f(void)
152{
153 return 0x0U;
154}
155static inline u32 therm_gate_ctrl_eng_clk_auto_f(void)
156{
157 return 0x1U;
158}
159static inline u32 therm_gate_ctrl_eng_clk_stop_f(void)
160{
161 return 0x2U;
162}
163static inline u32 therm_gate_ctrl_blk_clk_m(void)
164{
165 return 0x3U << 2U;
166}
167static inline u32 therm_gate_ctrl_blk_clk_run_f(void)
168{
169 return 0x0U;
170}
171static inline u32 therm_gate_ctrl_blk_clk_auto_f(void)
172{
173 return 0x4U;
174}
175static inline u32 therm_gate_ctrl_eng_pwr_m(void)
176{
177 return 0x3U << 4U;
178}
179static inline u32 therm_gate_ctrl_eng_pwr_auto_f(void)
180{
181 return 0x10U;
182}
183static inline u32 therm_gate_ctrl_eng_pwr_off_v(void)
184{
185 return 0x00000002U;
186}
187static inline u32 therm_gate_ctrl_eng_pwr_off_f(void)
188{
189 return 0x20U;
190}
191static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v)
192{
193 return (v & 0x1fU) << 8U;
194}
195static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void)
196{
197 return 0x1fU << 8U;
198}
199static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v)
200{
201 return (v & 0x7U) << 13U;
202}
203static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void)
204{
205 return 0x7U << 13U;
206}
207static inline u32 therm_gate_ctrl_eng_delay_before_f(u32 v)
208{
209 return (v & 0xfU) << 16U;
210}
211static inline u32 therm_gate_ctrl_eng_delay_before_m(void)
212{
213 return 0xfU << 16U;
214}
215static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v)
216{
217 return (v & 0xfU) << 20U;
218}
219static inline u32 therm_gate_ctrl_eng_delay_after_m(void)
220{
221 return 0xfU << 20U;
222}
223static inline u32 therm_fecs_idle_filter_r(void)
224{
225 return 0x00020288U;
226}
227static inline u32 therm_fecs_idle_filter_value_m(void)
228{
229 return 0xffffffffU << 0U;
230}
231static inline u32 therm_hubmmu_idle_filter_r(void)
232{
233 return 0x0002028cU;
234}
235static inline u32 therm_hubmmu_idle_filter_value_m(void)
236{
237 return 0xffffffffU << 0U;
238}
239static inline u32 therm_clk_slowdown_r(u32 i)
240{
241 return 0x00020160U + i*4U;
242}
243static inline u32 therm_clk_slowdown_idle_factor_f(u32 v)
244{
245 return (v & 0x3fU) << 16U;
246}
247static inline u32 therm_clk_slowdown_idle_factor_m(void)
248{
249 return 0x3fU << 16U;
250}
251static inline u32 therm_clk_slowdown_idle_factor_v(u32 r)
252{
253 return (r >> 16U) & 0x3fU;
254}
255static inline u32 therm_clk_slowdown_idle_factor_disabled_f(void)
256{
257 return 0x0U;
258}
259static inline u32 therm_grad_stepping_table_r(u32 i)
260{
261 return 0x000202c8U + i*4U;
262}
263static inline u32 therm_grad_stepping_table_slowdown_factor0_f(u32 v)
264{
265 return (v & 0x3fU) << 0U;
266}
267static inline u32 therm_grad_stepping_table_slowdown_factor0_m(void)
268{
269 return 0x3fU << 0U;
270}
271static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f(void)
272{
273 return 0x1U;
274}
275static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f(void)
276{
277 return 0x2U;
278}
279static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f(void)
280{
281 return 0x6U;
282}
283static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f(void)
284{
285 return 0xeU;
286}
287static inline u32 therm_grad_stepping_table_slowdown_factor1_f(u32 v)
288{
289 return (v & 0x3fU) << 6U;
290}
291static inline u32 therm_grad_stepping_table_slowdown_factor1_m(void)
292{
293 return 0x3fU << 6U;
294}
295static inline u32 therm_grad_stepping_table_slowdown_factor2_f(u32 v)
296{
297 return (v & 0x3fU) << 12U;
298}
299static inline u32 therm_grad_stepping_table_slowdown_factor2_m(void)
300{
301 return 0x3fU << 12U;
302}
303static inline u32 therm_grad_stepping_table_slowdown_factor3_f(u32 v)
304{
305 return (v & 0x3fU) << 18U;
306}
307static inline u32 therm_grad_stepping_table_slowdown_factor3_m(void)
308{
309 return 0x3fU << 18U;
310}
311static inline u32 therm_grad_stepping_table_slowdown_factor4_f(u32 v)
312{
313 return (v & 0x3fU) << 24U;
314}
315static inline u32 therm_grad_stepping_table_slowdown_factor4_m(void)
316{
317 return 0x3fU << 24U;
318}
319static inline u32 therm_grad_stepping0_r(void)
320{
321 return 0x000202c0U;
322}
323static inline u32 therm_grad_stepping0_feature_s(void)
324{
325 return 1U;
326}
327static inline u32 therm_grad_stepping0_feature_f(u32 v)
328{
329 return (v & 0x1U) << 0U;
330}
331static inline u32 therm_grad_stepping0_feature_m(void)
332{
333 return 0x1U << 0U;
334}
335static inline u32 therm_grad_stepping0_feature_v(u32 r)
336{
337 return (r >> 0U) & 0x1U;
338}
339static inline u32 therm_grad_stepping0_feature_enable_f(void)
340{
341 return 0x1U;
342}
343static inline u32 therm_grad_stepping1_r(void)
344{
345 return 0x000202c4U;
346}
347static inline u32 therm_grad_stepping1_pdiv_duration_f(u32 v)
348{
349 return (v & 0x1ffffU) << 0U;
350}
351static inline u32 therm_clk_timing_r(u32 i)
352{
353 return 0x000203c0U + i*4U;
354}
355static inline u32 therm_clk_timing_grad_slowdown_f(u32 v)
356{
357 return (v & 0x1U) << 16U;
358}
359static inline u32 therm_clk_timing_grad_slowdown_m(void)
360{
361 return 0x1U << 16U;
362}
363static inline u32 therm_clk_timing_grad_slowdown_enabled_f(void)
364{
365 return 0x10000U;
366}
367#endif
diff --git a/include/gk20a/hw_timer_gk20a.h b/include/gk20a/hw_timer_gk20a.h
new file mode 100644
index 0000000..972d68a
--- /dev/null
+++ b/include/gk20a/hw_timer_gk20a.h
@@ -0,0 +1,127 @@
1/*
2 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_timer_gk20a_h_
57#define _hw_timer_gk20a_h_
58
59static inline u32 timer_pri_timeout_r(void)
60{
61 return 0x00009080U;
62}
63static inline u32 timer_pri_timeout_period_f(u32 v)
64{
65 return (v & 0xffffffU) << 0U;
66}
67static inline u32 timer_pri_timeout_period_m(void)
68{
69 return 0xffffffU << 0U;
70}
71static inline u32 timer_pri_timeout_period_v(u32 r)
72{
73 return (r >> 0U) & 0xffffffU;
74}
75static inline u32 timer_pri_timeout_en_f(u32 v)
76{
77 return (v & 0x1U) << 31U;
78}
79static inline u32 timer_pri_timeout_en_m(void)
80{
81 return 0x1U << 31U;
82}
83static inline u32 timer_pri_timeout_en_v(u32 r)
84{
85 return (r >> 31U) & 0x1U;
86}
87static inline u32 timer_pri_timeout_en_en_enabled_f(void)
88{
89 return 0x80000000U;
90}
91static inline u32 timer_pri_timeout_en_en_disabled_f(void)
92{
93 return 0x0U;
94}
95static inline u32 timer_pri_timeout_save_0_r(void)
96{
97 return 0x00009084U;
98}
99static inline u32 timer_pri_timeout_save_0_fecs_tgt_v(u32 r)
100{
101 return (r >> 31U) & 0x1U;
102}
103static inline u32 timer_pri_timeout_save_0_addr_v(u32 r)
104{
105 return (r >> 2U) & 0x3fffffU;
106}
107static inline u32 timer_pri_timeout_save_0_write_v(u32 r)
108{
109 return (r >> 1U) & 0x1U;
110}
111static inline u32 timer_pri_timeout_save_1_r(void)
112{
113 return 0x00009088U;
114}
115static inline u32 timer_pri_timeout_fecs_errcode_r(void)
116{
117 return 0x0000908cU;
118}
119static inline u32 timer_time_0_r(void)
120{
121 return 0x00009400U;
122}
123static inline u32 timer_time_1_r(void)
124{
125 return 0x00009410U;
126}
127#endif
diff --git a/include/gk20a/hw_top_gk20a.h b/include/gk20a/hw_top_gk20a.h
new file mode 100644
index 0000000..be7fa4a
--- /dev/null
+++ b/include/gk20a/hw_top_gk20a.h
@@ -0,0 +1,211 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_top_gk20a_h_
57#define _hw_top_gk20a_h_
58
59static inline u32 top_num_gpcs_r(void)
60{
61 return 0x00022430U;
62}
63static inline u32 top_num_gpcs_value_v(u32 r)
64{
65 return (r >> 0U) & 0x1fU;
66}
67static inline u32 top_tpc_per_gpc_r(void)
68{
69 return 0x00022434U;
70}
71static inline u32 top_tpc_per_gpc_value_v(u32 r)
72{
73 return (r >> 0U) & 0x1fU;
74}
75static inline u32 top_num_fbps_r(void)
76{
77 return 0x00022438U;
78}
79static inline u32 top_num_fbps_value_v(u32 r)
80{
81 return (r >> 0U) & 0x1fU;
82}
83static inline u32 top_device_info_r(u32 i)
84{
85 return 0x00022700U + i*4U;
86}
87static inline u32 top_device_info__size_1_v(void)
88{
89 return 0x00000040U;
90}
91static inline u32 top_device_info_chain_v(u32 r)
92{
93 return (r >> 31U) & 0x1U;
94}
95static inline u32 top_device_info_chain_enable_v(void)
96{
97 return 0x00000001U;
98}
99static inline u32 top_device_info_engine_enum_v(u32 r)
100{
101 return (r >> 26U) & 0xfU;
102}
103static inline u32 top_device_info_runlist_enum_v(u32 r)
104{
105 return (r >> 21U) & 0xfU;
106}
107static inline u32 top_device_info_intr_enum_v(u32 r)
108{
109 return (r >> 15U) & 0x1fU;
110}
111static inline u32 top_device_info_reset_enum_v(u32 r)
112{
113 return (r >> 9U) & 0x1fU;
114}
115static inline u32 top_device_info_type_enum_v(u32 r)
116{
117 return (r >> 2U) & 0x1fffffffU;
118}
119static inline u32 top_device_info_type_enum_graphics_v(void)
120{
121 return 0x00000000U;
122}
123static inline u32 top_device_info_type_enum_graphics_f(void)
124{
125 return 0x0U;
126}
127static inline u32 top_device_info_type_enum_copy0_v(void)
128{
129 return 0x00000001U;
130}
131static inline u32 top_device_info_type_enum_copy0_f(void)
132{
133 return 0x4U;
134}
135static inline u32 top_device_info_type_enum_copy1_v(void)
136{
137 return 0x00000002U;
138}
139static inline u32 top_device_info_type_enum_copy1_f(void)
140{
141 return 0x8U;
142}
143static inline u32 top_device_info_type_enum_copy2_v(void)
144{
145 return 0x00000003U;
146}
147static inline u32 top_device_info_type_enum_copy2_f(void)
148{
149 return 0xcU;
150}
151static inline u32 top_device_info_engine_v(u32 r)
152{
153 return (r >> 5U) & 0x1U;
154}
155static inline u32 top_device_info_runlist_v(u32 r)
156{
157 return (r >> 4U) & 0x1U;
158}
159static inline u32 top_device_info_intr_v(u32 r)
160{
161 return (r >> 3U) & 0x1U;
162}
163static inline u32 top_device_info_reset_v(u32 r)
164{
165 return (r >> 2U) & 0x1U;
166}
167static inline u32 top_device_info_entry_v(u32 r)
168{
169 return (r >> 0U) & 0x3U;
170}
171static inline u32 top_device_info_entry_not_valid_v(void)
172{
173 return 0x00000000U;
174}
175static inline u32 top_device_info_entry_enum_v(void)
176{
177 return 0x00000002U;
178}
179static inline u32 top_device_info_entry_engine_type_v(void)
180{
181 return 0x00000003U;
182}
183static inline u32 top_device_info_entry_data_v(void)
184{
185 return 0x00000001U;
186}
187static inline u32 top_fs_status_fbp_r(void)
188{
189 return 0x00022548U;
190}
191static inline u32 top_fs_status_fbp_cluster_v(u32 r)
192{
193 return (r >> 0U) & 0xffffU;
194}
195static inline u32 top_fs_status_fbp_cluster_enable_v(void)
196{
197 return 0x00000000U;
198}
199static inline u32 top_fs_status_fbp_cluster_enable_f(void)
200{
201 return 0x0U;
202}
203static inline u32 top_fs_status_fbp_cluster_disable_v(void)
204{
205 return 0x00000001U;
206}
207static inline u32 top_fs_status_fbp_cluster_disable_f(void)
208{
209 return 0x1U;
210}
211#endif
diff --git a/include/gk20a/hw_trim_gk20a.h b/include/gk20a/hw_trim_gk20a.h
new file mode 100644
index 0000000..f28c21f
--- /dev/null
+++ b/include/gk20a/hw_trim_gk20a.h
@@ -0,0 +1,315 @@
1/*
2 * Copyright (c) 2012-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22/*
23 * Function naming determines intended use:
24 *
25 * <x>_r(void) : Returns the offset for register <x>.
26 *
27 * <x>_o(void) : Returns the offset for element <x>.
28 *
29 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
30 *
31 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
32 *
33 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
34 * and masked to place it at field <y> of register <x>. This value
35 * can be |'d with others to produce a full register value for
36 * register <x>.
37 *
38 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
39 * value can be ~'d and then &'d to clear the value of field <y> for
40 * register <x>.
41 *
42 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
43 * to place it at field <y> of register <x>. This value can be |'d
44 * with others to produce a full register value for <x>.
45 *
46 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
47 * <x> value 'r' after being shifted to place its LSB at bit 0.
48 * This value is suitable for direct comparison with other unshifted
49 * values appropriate for use in field <y> of register <x>.
50 *
51 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
52 * field <y> of register <x>. This value is suitable for direct
53 * comparison with unshifted values appropriate for use in field <y>
54 * of register <x>.
55 */
56#ifndef _hw_trim_gk20a_h_
57#define _hw_trim_gk20a_h_
58
59static inline u32 trim_sys_gpcpll_cfg_r(void)
60{
61 return 0x00137000U;
62}
63static inline u32 trim_sys_gpcpll_cfg_enable_m(void)
64{
65 return 0x1U << 0U;
66}
67static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r)
68{
69 return (r >> 0U) & 0x1U;
70}
71static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void)
72{
73 return 0x0U;
74}
75static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void)
76{
77 return 0x1U;
78}
79static inline u32 trim_sys_gpcpll_cfg_iddq_m(void)
80{
81 return 0x1U << 1U;
82}
83static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r)
84{
85 return (r >> 1U) & 0x1U;
86}
87static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void)
88{
89 return 0x00000000U;
90}
91static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void)
92{
93 return 0x1U << 4U;
94}
95static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void)
96{
97 return 0x0U;
98}
99static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void)
100{
101 return 0x10U;
102}
103static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r)
104{
105 return (r >> 17U) & 0x1U;
106}
107static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void)
108{
109 return 0x20000U;
110}
111static inline u32 trim_sys_gpcpll_coeff_r(void)
112{
113 return 0x00137004U;
114}
115static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v)
116{
117 return (v & 0xffU) << 0U;
118}
119static inline u32 trim_sys_gpcpll_coeff_mdiv_m(void)
120{
121 return 0xffU << 0U;
122}
123static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r)
124{
125 return (r >> 0U) & 0xffU;
126}
127static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v)
128{
129 return (v & 0xffU) << 8U;
130}
131static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void)
132{
133 return 0xffU << 8U;
134}
135static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r)
136{
137 return (r >> 8U) & 0xffU;
138}
139static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v)
140{
141 return (v & 0x3fU) << 16U;
142}
143static inline u32 trim_sys_gpcpll_coeff_pldiv_m(void)
144{
145 return 0x3fU << 16U;
146}
147static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r)
148{
149 return (r >> 16U) & 0x3fU;
150}
151static inline u32 trim_sys_sel_vco_r(void)
152{
153 return 0x00137100U;
154}
155static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void)
156{
157 return 0x1U << 0U;
158}
159static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void)
160{
161 return 0x00000000U;
162}
163static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void)
164{
165 return 0x0U;
166}
167static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void)
168{
169 return 0x0U;
170}
171static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void)
172{
173 return 0x1U;
174}
175static inline u32 trim_sys_gpc2clk_out_r(void)
176{
177 return 0x00137250U;
178}
179static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void)
180{
181 return 6U;
182}
183static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v)
184{
185 return (v & 0x3fU) << 0U;
186}
187static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void)
188{
189 return 0x3fU << 0U;
190}
191static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r)
192{
193 return (r >> 0U) & 0x3fU;
194}
195static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void)
196{
197 return 0x3cU;
198}
199static inline u32 trim_sys_gpc2clk_out_vcodiv_s(void)
200{
201 return 6U;
202}
203static inline u32 trim_sys_gpc2clk_out_vcodiv_f(u32 v)
204{
205 return (v & 0x3fU) << 8U;
206}
207static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void)
208{
209 return 0x3fU << 8U;
210}
211static inline u32 trim_sys_gpc2clk_out_vcodiv_v(u32 r)
212{
213 return (r >> 8U) & 0x3fU;
214}
215static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void)
216{
217 return 0x0U;
218}
219static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void)
220{
221 return 0x1U << 31U;
222}
223static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void)
224{
225 return 0x80000000U;
226}
227static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i)
228{
229 return 0x00134124U + i*512U;
230}
231static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
232{
233 return (v & 0x3fffU) << 0U;
234}
235static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
236{
237 return 0x10000U;
238}
239static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void)
240{
241 return 0x100000U;
242}
243static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
244{
245 return 0x1000000U;
246}
247static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i)
248{
249 return 0x00134128U + i*512U;
250}
251static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r)
252{
253 return (r >> 0U) & 0xfffffU;
254}
255static inline u32 trim_sys_gpcpll_cfg2_r(void)
256{
257 return 0x0013700cU;
258}
259static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v)
260{
261 return (v & 0xffU) << 24U;
262}
263static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void)
264{
265 return 0xffU << 24U;
266}
267static inline u32 trim_sys_gpcpll_cfg3_r(void)
268{
269 return 0x00137018U;
270}
271static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v)
272{
273 return (v & 0xffU) << 16U;
274}
275static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void)
276{
277 return 0xffU << 16U;
278}
279static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void)
280{
281 return 0x0013701cU;
282}
283static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void)
284{
285 return 0x1U << 22U;
286}
287static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void)
288{
289 return 0x400000U;
290}
291static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void)
292{
293 return 0x0U;
294}
295static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void)
296{
297 return 0x1U << 31U;
298}
299static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void)
300{
301 return 0x80000000U;
302}
303static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void)
304{
305 return 0x0U;
306}
307static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void)
308{
309 return 0x001328a0U;
310}
311static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r)
312{
313 return (r >> 24U) & 0x1U;
314}
315#endif
diff --git a/include/gk20a/mm_gk20a.c b/include/gk20a/mm_gk20a.c
new file mode 100644
index 0000000..10ca84d
--- /dev/null
+++ b/include/gk20a/mm_gk20a.c
@@ -0,0 +1,654 @@
1/*
2 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/mm.h>
26#include <nvgpu/vm.h>
27#include <nvgpu/vm_area.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/timers.h>
31#include <nvgpu/pramin.h>
32#include <nvgpu/list.h>
33#include <nvgpu/nvgpu_mem.h>
34#include <nvgpu/allocator.h>
35#include <nvgpu/semaphore.h>
36#include <nvgpu/page_allocator.h>
37#include <nvgpu/log.h>
38#include <nvgpu/bug.h>
39#include <nvgpu/log2.h>
40#include <nvgpu/enabled.h>
41#include <nvgpu/vidmem.h>
42#include <nvgpu/sizes.h>
43#include <nvgpu/io.h>
44#include <nvgpu/utils.h>
45#include <nvgpu/channel.h>
46
47#include "gk20a.h"
48#include "mm_gk20a.h"
49#include "fence_gk20a.h"
50
51#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
52#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
53#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
54#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
55
56/*
57 * GPU mapping life cycle
58 * ======================
59 *
60 * Kernel mappings
61 * ---------------
62 *
63 * Kernel mappings are created through vm.map(..., false):
64 *
65 * - Mappings to the same allocations are reused and refcounted.
66 * - This path does not support deferred unmapping (i.e. kernel must wait for
67 * all hw operations on the buffer to complete before unmapping).
68 * - References to dmabuf are owned and managed by the (kernel) clients of
69 * the gk20a_vm layer.
70 *
71 *
72 * User space mappings
73 * -------------------
74 *
75 * User space mappings are created through as.map_buffer -> vm.map(..., true):
76 *
77 * - Mappings to the same allocations are reused and refcounted.
78 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
79 * until all hw operations have completed).
80 * - References to dmabuf are owned and managed by the vm_gk20a
81 * layer itself. vm.map acquires these refs, and sets
82 * mapped_buffer->own_mem_ref to record that we must release the refs when we
83 * actually unmap.
84 *
85 */
86
87/* make sure gk20a_init_mm_support is called before */
88int gk20a_init_mm_setup_hw(struct gk20a *g)
89{
90 struct mm_gk20a *mm = &g->mm;
91 int err;
92
93 nvgpu_log_fn(g, " ");
94
95 if (g->ops.fb.set_mmu_page_size) {
96 g->ops.fb.set_mmu_page_size(g);
97 }
98
99 if (g->ops.fb.set_use_full_comp_tag_line) {
100 mm->use_full_comp_tag_line =
101 g->ops.fb.set_use_full_comp_tag_line(g);
102 }
103
104 g->ops.fb.init_hw(g);
105
106 if (g->ops.bus.bar1_bind) {
107 g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
108 }
109
110 if (g->ops.bus.bar2_bind) {
111 err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
112 if (err) {
113 return err;
114 }
115 }
116
117 if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g)) {
118 return -EBUSY;
119 }
120
121 nvgpu_log_fn(g, "done");
122 return 0;
123}
124
125/* for gk20a the "video memory" apertures here are misnomers. */
126static inline u32 big_valid_pde0_bits(struct gk20a *g,
127 struct nvgpu_gmmu_pd *pd, u64 addr)
128{
129 u32 pde0_bits =
130 nvgpu_aperture_mask(g, pd->mem,
131 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
132 gmmu_pde_aperture_big_sys_mem_coh_f(),
133 gmmu_pde_aperture_big_video_memory_f()) |
134 gmmu_pde_address_big_sys_f(
135 (u32)(addr >> gmmu_pde_address_shift_v()));
136
137 return pde0_bits;
138}
139
140static inline u32 small_valid_pde1_bits(struct gk20a *g,
141 struct nvgpu_gmmu_pd *pd, u64 addr)
142{
143 u32 pde1_bits =
144 nvgpu_aperture_mask(g, pd->mem,
145 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
146 gmmu_pde_aperture_small_sys_mem_coh_f(),
147 gmmu_pde_aperture_small_video_memory_f()) |
148 gmmu_pde_vol_small_true_f() | /* tbd: why? */
149 gmmu_pde_address_small_sys_f(
150 (u32)(addr >> gmmu_pde_address_shift_v()));
151
152 return pde1_bits;
153}
154
155static void update_gmmu_pde_locked(struct vm_gk20a *vm,
156 const struct gk20a_mmu_level *l,
157 struct nvgpu_gmmu_pd *pd,
158 u32 pd_idx,
159 u64 virt_addr,
160 u64 phys_addr,
161 struct nvgpu_gmmu_attrs *attrs)
162{
163 struct gk20a *g = gk20a_from_vm(vm);
164 bool small_valid, big_valid;
165 u32 pd_offset = pd_offset_from_index(l, pd_idx);
166 u32 pde_v[2] = {0, 0};
167
168 small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
169 big_valid = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
170
171 pde_v[0] = gmmu_pde_size_full_f();
172 pde_v[0] |= big_valid ?
173 big_valid_pde0_bits(g, pd, phys_addr) :
174 gmmu_pde_aperture_big_invalid_f();
175
176 pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
177 (gmmu_pde_aperture_small_invalid_f() |
178 gmmu_pde_vol_small_false_f()))
179 |
180 (big_valid ? (gmmu_pde_vol_big_true_f()) :
181 gmmu_pde_vol_big_false_f());
182
183 pte_dbg(g, attrs,
184 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
185 "GPU %#-12llx phys %#-12llx "
186 "[0x%08x, 0x%08x]",
187 pd_idx, l->entry_size, pd_offset,
188 small_valid ? 'S' : '-',
189 big_valid ? 'B' : '-',
190 virt_addr, phys_addr,
191 pde_v[1], pde_v[0]);
192
193 pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
194 pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
195}
196
197static void __update_pte_sparse(u32 *pte_w)
198{
199 pte_w[0] = gmmu_pte_valid_false_f();
200 pte_w[1] |= gmmu_pte_vol_true_f();
201}
202
203static void __update_pte(struct vm_gk20a *vm,
204 u32 *pte_w,
205 u64 phys_addr,
206 struct nvgpu_gmmu_attrs *attrs)
207{
208 struct gk20a *g = gk20a_from_vm(vm);
209 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
210 u32 pte_valid = attrs->valid ?
211 gmmu_pte_valid_true_f() :
212 gmmu_pte_valid_false_f();
213 u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
214 u32 addr = attrs->aperture == APERTURE_SYSMEM ?
215 gmmu_pte_address_sys_f(phys_shifted) :
216 gmmu_pte_address_vid_f(phys_shifted);
217 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
218
219 pte_w[0] = pte_valid | addr;
220
221 if (attrs->priv) {
222 pte_w[0] |= gmmu_pte_privilege_true_f();
223 }
224
225 pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
226 gmmu_pte_aperture_sys_mem_ncoh_f(),
227 gmmu_pte_aperture_sys_mem_coh_f(),
228 gmmu_pte_aperture_video_memory_f()) |
229 gmmu_pte_kind_f(attrs->kind_v) |
230 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
231
232 if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
233 phys_addr & 0x10000) {
234 pte_w[1] |= gmmu_pte_comptagline_f(
235 1 << (gmmu_pte_comptagline_s() - 1));
236 }
237
238 if (attrs->rw_flag == gk20a_mem_flag_read_only) {
239 pte_w[0] |= gmmu_pte_read_only_true_f();
240 pte_w[1] |= gmmu_pte_write_disable_true_f();
241 } else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
242 pte_w[1] |= gmmu_pte_read_disable_true_f();
243 }
244
245 if (!attrs->cacheable) {
246 pte_w[1] |= gmmu_pte_vol_true_f();
247 }
248
249 if (attrs->ctag) {
250 attrs->ctag += page_size;
251 }
252}
253
254static void update_gmmu_pte_locked(struct vm_gk20a *vm,
255 const struct gk20a_mmu_level *l,
256 struct nvgpu_gmmu_pd *pd,
257 u32 pd_idx,
258 u64 virt_addr,
259 u64 phys_addr,
260 struct nvgpu_gmmu_attrs *attrs)
261{
262 struct gk20a *g = gk20a_from_vm(vm);
263 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
264 u32 pd_offset = pd_offset_from_index(l, pd_idx);
265 u32 pte_w[2] = {0, 0};
266 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
267
268 if (phys_addr) {
269 __update_pte(vm, pte_w, phys_addr, attrs);
270 } else if (attrs->sparse) {
271 __update_pte_sparse(pte_w);
272 }
273
274 pte_dbg(g, attrs,
275 "PTE: i=%-4u size=%-2u offs=%-4u | "
276 "GPU %#-12llx phys %#-12llx "
277 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
278 "ctag=0x%08x "
279 "[0x%08x, 0x%08x]",
280 pd_idx, l->entry_size, pd_offset,
281 virt_addr, phys_addr,
282 page_size >> 10,
283 nvgpu_gmmu_perm_str(attrs->rw_flag),
284 attrs->kind_v,
285 nvgpu_aperture_str(g, attrs->aperture),
286 attrs->cacheable ? 'C' : '-',
287 attrs->sparse ? 'S' : '-',
288 attrs->priv ? 'P' : '-',
289 attrs->valid ? 'V' : '-',
290 (u32)attrs->ctag >> ctag_shift,
291 pte_w[1], pte_w[0]);
292
293 pd_write(g, pd, pd_offset + 0, pte_w[0]);
294 pd_write(g, pd, pd_offset + 1, pte_w[1]);
295}
296
297u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
298 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
299{
300 /*
301 * big and small page sizes are the same
302 */
303 return GMMU_PAGE_SIZE_SMALL;
304}
305
306u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
307 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
308{
309 /*
310 * return invalid
311 */
312 return GMMU_NR_PAGE_SIZES;
313}
314
315const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
316 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
317 .lo_bit = {26, 26},
318 .update_entry = update_gmmu_pde_locked,
319 .entry_size = 8,
320 .get_pgsz = gk20a_get_pde_pgsz},
321 {.hi_bit = {25, 25},
322 .lo_bit = {12, 16},
323 .update_entry = update_gmmu_pte_locked,
324 .entry_size = 8,
325 .get_pgsz = gk20a_get_pte_pgsz},
326 {.update_entry = NULL}
327};
328
329const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
330 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
331 .lo_bit = {27, 27},
332 .update_entry = update_gmmu_pde_locked,
333 .entry_size = 8,
334 .get_pgsz = gk20a_get_pde_pgsz},
335 {.hi_bit = {26, 26},
336 .lo_bit = {12, 17},
337 .update_entry = update_gmmu_pte_locked,
338 .entry_size = 8,
339 .get_pgsz = gk20a_get_pte_pgsz},
340 {.update_entry = NULL}
341};
342
343int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
344{
345 int err = 0;
346
347 nvgpu_log_fn(ch->g, " ");
348
349 nvgpu_vm_get(vm);
350 ch->vm = vm;
351 err = channel_gk20a_commit_va(ch);
352 if (err) {
353 ch->vm = NULL;
354 }
355
356 nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
357 ch->chid, vm->name);
358
359 return err;
360}
361
362void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
363 struct vm_gk20a *vm)
364{
365 u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
366 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
367 u32 pdb_addr_hi = u64_hi32(pdb_addr);
368
369 nvgpu_log_info(g, "pde pa=0x%llx", pdb_addr);
370
371 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
372 nvgpu_aperture_mask(g, vm->pdb.mem,
373 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
374 ram_in_page_dir_base_target_sys_mem_coh_f(),
375 ram_in_page_dir_base_target_vid_mem_f()) |
376 ram_in_page_dir_base_vol_true_f() |
377 ram_in_page_dir_base_lo_f(pdb_addr_lo));
378
379 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
380 ram_in_page_dir_base_hi_f(pdb_addr_hi));
381}
382
383void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
384 u32 big_page_size)
385{
386 struct gk20a *g = gk20a_from_vm(vm);
387
388 nvgpu_log_info(g, "inst block phys = 0x%llx, kv = 0x%p",
389 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
390
391 g->ops.mm.init_pdb(g, inst_block, vm);
392
393 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
394 u64_lo32(vm->va_limit - 1) & ~0xfff);
395
396 nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
397 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
398
399 if (big_page_size && g->ops.mm.set_big_page_size) {
400 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
401 }
402}
403
404int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
405{
406 int err;
407
408 nvgpu_log_fn(g, " ");
409
410 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
411 if (err) {
412 nvgpu_err(g, "%s: memory allocation failed", __func__);
413 return err;
414 }
415
416 nvgpu_log_fn(g, "done");
417 return 0;
418}
419
420int gk20a_mm_fb_flush(struct gk20a *g)
421{
422 struct mm_gk20a *mm = &g->mm;
423 u32 data;
424 int ret = 0;
425 struct nvgpu_timeout timeout;
426 u32 retries;
427
428 nvgpu_log_fn(g, " ");
429
430 gk20a_busy_noresume(g);
431 if (!g->power_on) {
432 gk20a_idle_nosuspend(g);
433 return 0;
434 }
435
436 retries = 100;
437
438 if (g->ops.mm.get_flush_retries) {
439 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
440 }
441
442 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
443
444 nvgpu_mutex_acquire(&mm->l2_op_lock);
445
446 /* Make sure all previous writes are committed to the L2. There's no
447 guarantee that writes are to DRAM. This will be a sysmembar internal
448 to the L2. */
449
450 trace_gk20a_mm_fb_flush(g->name);
451
452 gk20a_writel(g, flush_fb_flush_r(),
453 flush_fb_flush_pending_busy_f());
454
455 do {
456 data = gk20a_readl(g, flush_fb_flush_r());
457
458 if (flush_fb_flush_outstanding_v(data) ==
459 flush_fb_flush_outstanding_true_v() ||
460 flush_fb_flush_pending_v(data) ==
461 flush_fb_flush_pending_busy_v()) {
462 nvgpu_log_info(g, "fb_flush 0x%x", data);
463 nvgpu_udelay(5);
464 } else {
465 break;
466 }
467 } while (!nvgpu_timeout_expired(&timeout));
468
469 if (nvgpu_timeout_peek_expired(&timeout)) {
470 if (g->ops.fb.dump_vpr_info) {
471 g->ops.fb.dump_vpr_info(g);
472 }
473 if (g->ops.fb.dump_wpr_info) {
474 g->ops.fb.dump_wpr_info(g);
475 }
476 ret = -EBUSY;
477 }
478
479 trace_gk20a_mm_fb_flush_done(g->name);
480
481 nvgpu_mutex_release(&mm->l2_op_lock);
482
483 gk20a_idle_nosuspend(g);
484
485 return ret;
486}
487
488static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
489{
490 u32 data;
491 struct nvgpu_timeout timeout;
492 u32 retries = 200;
493
494 trace_gk20a_mm_l2_invalidate(g->name);
495
496 if (g->ops.mm.get_flush_retries) {
497 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
498 }
499
500 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
501
502 /* Invalidate any clean lines from the L2 so subsequent reads go to
503 DRAM. Dirty lines are not affected by this operation. */
504 gk20a_writel(g, flush_l2_system_invalidate_r(),
505 flush_l2_system_invalidate_pending_busy_f());
506
507 do {
508 data = gk20a_readl(g, flush_l2_system_invalidate_r());
509
510 if (flush_l2_system_invalidate_outstanding_v(data) ==
511 flush_l2_system_invalidate_outstanding_true_v() ||
512 flush_l2_system_invalidate_pending_v(data) ==
513 flush_l2_system_invalidate_pending_busy_v()) {
514 nvgpu_log_info(g, "l2_system_invalidate 0x%x",
515 data);
516 nvgpu_udelay(5);
517 } else {
518 break;
519 }
520 } while (!nvgpu_timeout_expired(&timeout));
521
522 if (nvgpu_timeout_peek_expired(&timeout)) {
523 nvgpu_warn(g, "l2_system_invalidate too many retries");
524 }
525
526 trace_gk20a_mm_l2_invalidate_done(g->name);
527}
528
529void gk20a_mm_l2_invalidate(struct gk20a *g)
530{
531 struct mm_gk20a *mm = &g->mm;
532 gk20a_busy_noresume(g);
533 if (g->power_on) {
534 nvgpu_mutex_acquire(&mm->l2_op_lock);
535 gk20a_mm_l2_invalidate_locked(g);
536 nvgpu_mutex_release(&mm->l2_op_lock);
537 }
538 gk20a_idle_nosuspend(g);
539}
540
541void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
542{
543 struct mm_gk20a *mm = &g->mm;
544 u32 data;
545 struct nvgpu_timeout timeout;
546 u32 retries = 2000;
547
548 nvgpu_log_fn(g, " ");
549
550 gk20a_busy_noresume(g);
551 if (!g->power_on) {
552 goto hw_was_off;
553 }
554
555 if (g->ops.mm.get_flush_retries) {
556 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
557 }
558
559 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
560
561 nvgpu_mutex_acquire(&mm->l2_op_lock);
562
563 trace_gk20a_mm_l2_flush(g->name);
564
565 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
566 as clean, so subsequent reads might hit in the L2. */
567 gk20a_writel(g, flush_l2_flush_dirty_r(),
568 flush_l2_flush_dirty_pending_busy_f());
569
570 do {
571 data = gk20a_readl(g, flush_l2_flush_dirty_r());
572
573 if (flush_l2_flush_dirty_outstanding_v(data) ==
574 flush_l2_flush_dirty_outstanding_true_v() ||
575 flush_l2_flush_dirty_pending_v(data) ==
576 flush_l2_flush_dirty_pending_busy_v()) {
577 nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
578 nvgpu_udelay(5);
579 } else {
580 break;
581 }
582 } while (!nvgpu_timeout_expired_msg(&timeout,
583 "l2_flush_dirty too many retries"));
584
585 trace_gk20a_mm_l2_flush_done(g->name);
586
587 if (invalidate) {
588 gk20a_mm_l2_invalidate_locked(g);
589 }
590
591 nvgpu_mutex_release(&mm->l2_op_lock);
592
593hw_was_off:
594 gk20a_idle_nosuspend(g);
595}
596
597void gk20a_mm_cbc_clean(struct gk20a *g)
598{
599 struct mm_gk20a *mm = &g->mm;
600 u32 data;
601 struct nvgpu_timeout timeout;
602 u32 retries = 200;
603
604 nvgpu_log_fn(g, " ");
605
606 gk20a_busy_noresume(g);
607 if (!g->power_on) {
608 goto hw_was_off;
609 }
610
611 if (g->ops.mm.get_flush_retries) {
612 retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
613 }
614
615 nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
616
617 nvgpu_mutex_acquire(&mm->l2_op_lock);
618
619 /* Flush all dirty lines from the CBC to L2 */
620 gk20a_writel(g, flush_l2_clean_comptags_r(),
621 flush_l2_clean_comptags_pending_busy_f());
622
623 do {
624 data = gk20a_readl(g, flush_l2_clean_comptags_r());
625
626 if (flush_l2_clean_comptags_outstanding_v(data) ==
627 flush_l2_clean_comptags_outstanding_true_v() ||
628 flush_l2_clean_comptags_pending_v(data) ==
629 flush_l2_clean_comptags_pending_busy_v()) {
630 nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
631 nvgpu_udelay(5);
632 } else {
633 break;
634 }
635 } while (!nvgpu_timeout_expired_msg(&timeout,
636 "l2_clean_comptags too many retries"));
637
638 nvgpu_mutex_release(&mm->l2_op_lock);
639
640hw_was_off:
641 gk20a_idle_nosuspend(g);
642}
643
644u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
645{
646 return 34;
647}
648
649const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
650 u32 big_page_size)
651{
652 return (big_page_size == SZ_64K) ?
653 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
654}
diff --git a/include/gk20a/mm_gk20a.h b/include/gk20a/mm_gk20a.h
new file mode 100644
index 0000000..76a1621
--- /dev/null
+++ b/include/gk20a/mm_gk20a.h
@@ -0,0 +1,155 @@
1/*
2 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef MM_GK20A_H
24#define MM_GK20A_H
25
26#include <nvgpu/nvgpu_mem.h>
27#include <nvgpu/allocator.h>
28#include <nvgpu/vm.h>
29#include <nvgpu/list.h>
30#include <nvgpu/rbtree.h>
31#include <nvgpu/kref.h>
32
33enum gk20a_mem_rw_flag;
34
35struct patch_desc {
36 struct nvgpu_mem mem;
37 u32 data_count;
38};
39
40struct zcull_ctx_desc {
41 u64 gpu_va;
42 u32 ctx_attr;
43 u32 ctx_sw_mode;
44};
45
46struct pm_ctx_desc {
47 struct nvgpu_mem mem;
48 u32 pm_mode;
49};
50
51struct compbit_store_desc {
52 struct nvgpu_mem mem;
53
54 /* The value that is written to the hardware. This depends on
55 * on the number of ltcs and is not an address. */
56 u64 base_hw;
57};
58
59struct gk20a_buffer_state {
60 struct nvgpu_list_node list;
61
62 /* The valid compbits and the fence must be changed atomically. */
63 struct nvgpu_mutex lock;
64
65 /* Offset of the surface within the dma-buf whose state is
66 * described by this struct (one dma-buf can contain multiple
67 * surfaces with different states). */
68 size_t offset;
69
70 /* A bitmask of valid sets of compbits (0 = uncompressed). */
71 u32 valid_compbits;
72
73 /* The ZBC color used on this buffer. */
74 u32 zbc_color;
75
76 /* This struct reflects the state of the buffer when this
77 * fence signals. */
78 struct gk20a_fence *fence;
79};
80
81static inline struct gk20a_buffer_state *
82gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
83{
84 return (struct gk20a_buffer_state *)
85 ((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
86};
87
88struct gk20a;
89struct channel_gk20a;
90
91int gk20a_mm_fb_flush(struct gk20a *g);
92void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
93void gk20a_mm_cbc_clean(struct gk20a *g);
94void gk20a_mm_l2_invalidate(struct gk20a *g);
95
96#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
97
98void gk20a_mm_ltc_isr(struct gk20a *g);
99
100bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
101
102int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
103void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
104 u32 big_page_size);
105int gk20a_init_mm_setup_hw(struct gk20a *g);
106
107u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
108 u64 map_offset,
109 struct nvgpu_sgt *sgt,
110 u64 buffer_offset,
111 u64 size,
112 u32 pgsz_idx,
113 u8 kind_v,
114 u32 ctag_offset,
115 u32 flags,
116 enum gk20a_mem_rw_flag rw_flag,
117 bool clear_ctags,
118 bool sparse,
119 bool priv,
120 struct vm_gk20a_mapping_batch *batch,
121 enum nvgpu_aperture aperture);
122
123void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
124 u64 vaddr,
125 u64 size,
126 u32 pgsz_idx,
127 bool va_allocated,
128 enum gk20a_mem_rw_flag rw_flag,
129 bool sparse,
130 struct vm_gk20a_mapping_batch *batch);
131
132/* vm-as interface */
133struct nvgpu_as_alloc_space_args;
134struct nvgpu_as_free_space_args;
135int gk20a_vm_release_share(struct gk20a_as_share *as_share);
136int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
137
138void pde_range_from_vaddr_range(struct vm_gk20a *vm,
139 u64 addr_lo, u64 addr_hi,
140 u32 *pde_lo, u32 *pde_hi);
141u32 gk20a_mm_get_iommu_bit(struct gk20a *g);
142
143const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
144 u32 big_page_size);
145void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
146 struct vm_gk20a *vm);
147
148extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
149extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
150
151u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
152 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
153u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
154 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
155#endif /* MM_GK20A_H */
diff --git a/include/gk20a/pmu_gk20a.c b/include/gk20a/pmu_gk20a.c
new file mode 100644
index 0000000..63a32f0
--- /dev/null
+++ b/include/gk20a/pmu_gk20a.c
@@ -0,0 +1,879 @@
1/*
2 * GK20A PMU (aka. gPMU outside gk20a context)
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/timers.h>
27#include <nvgpu/kmem.h>
28#include <nvgpu/dma.h>
29#include <nvgpu/log.h>
30#include <nvgpu/bug.h>
31#include <nvgpu/firmware.h>
32#include <nvgpu/falcon.h>
33#include <nvgpu/mm.h>
34#include <nvgpu/io.h>
35#include <nvgpu/clk_arb.h>
36#include <nvgpu/utils.h>
37#include <nvgpu/unit.h>
38
39#include "gk20a.h"
40#include "gr_gk20a.h"
41#include "pmu_gk20a.h"
42
43#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_pwr_gk20a.h>
45#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
46
47#define gk20a_dbg_pmu(g, fmt, arg...) \
48 nvgpu_log(g, gpu_dbg_pmu, fmt, ##arg)
49
50bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos)
51{
52 u32 i = 0, j = strlen(strings);
53
54 for (; i < j; i++) {
55 if (strings[i] == '%') {
56 if (strings[i + 1] == 'x' || strings[i + 1] == 'X') {
57 *hex_pos = i;
58 return true;
59 }
60 }
61 }
62 *hex_pos = -1;
63 return false;
64}
65
66static void print_pmu_trace(struct nvgpu_pmu *pmu)
67{
68 struct gk20a *g = pmu->g;
69 u32 i = 0, j = 0, k, l, m, count;
70 char part_str[40], buf[0x40];
71 void *tracebuffer;
72 char *trace;
73 u32 *trace1;
74
75 /* allocate system memory to copy pmu trace buffer */
76 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
77 if (tracebuffer == NULL) {
78 return;
79 }
80
81 /* read pmu traces into system memory buffer */
82 nvgpu_mem_rd_n(g, &pmu->trace_buf, 0, tracebuffer,
83 GK20A_PMU_TRACE_BUFSIZE);
84
85 trace = (char *)tracebuffer;
86 trace1 = (u32 *)tracebuffer;
87
88 nvgpu_err(g, "dump PMU trace buffer");
89 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
90 for (j = 0; j < 0x40; j++) {
91 if (trace1[(i / 4) + j]) {
92 break;
93 }
94 }
95 if (j == 0x40) {
96 break;
97 }
98 count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]);
99 l = 0;
100 m = 0;
101 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
102 if (k >= 40) {
103 break;
104 }
105 strncpy(part_str, (trace+i+20+m), k);
106 part_str[k] = '\0';
107 count += scnprintf((buf + count), 0x40, "%s0x%x",
108 part_str, trace1[(i / 4) + 1 + l]);
109 l++;
110 m += k + 2;
111 }
112
113 scnprintf((buf + count), 0x40, "%s", (trace+i+20+m));
114 nvgpu_err(g, "%s", buf);
115 }
116
117 nvgpu_kfree(g, tracebuffer);
118}
119
120u32 gk20a_pmu_get_irqdest(struct gk20a *g)
121{
122 u32 intr_dest;
123
124 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
125 intr_dest = pwr_falcon_irqdest_host_gptmr_f(0) |
126 pwr_falcon_irqdest_host_wdtmr_f(1) |
127 pwr_falcon_irqdest_host_mthd_f(0) |
128 pwr_falcon_irqdest_host_ctxsw_f(0) |
129 pwr_falcon_irqdest_host_halt_f(1) |
130 pwr_falcon_irqdest_host_exterr_f(0) |
131 pwr_falcon_irqdest_host_swgen0_f(1) |
132 pwr_falcon_irqdest_host_swgen1_f(0) |
133 pwr_falcon_irqdest_host_ext_f(0xff) |
134 pwr_falcon_irqdest_target_gptmr_f(1) |
135 pwr_falcon_irqdest_target_wdtmr_f(0) |
136 pwr_falcon_irqdest_target_mthd_f(0) |
137 pwr_falcon_irqdest_target_ctxsw_f(0) |
138 pwr_falcon_irqdest_target_halt_f(0) |
139 pwr_falcon_irqdest_target_exterr_f(0) |
140 pwr_falcon_irqdest_target_swgen0_f(0) |
141 pwr_falcon_irqdest_target_swgen1_f(0) |
142 pwr_falcon_irqdest_target_ext_f(0xff);
143
144 return intr_dest;
145}
146
147void gk20a_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable)
148{
149 struct gk20a *g = gk20a_from_pmu(pmu);
150 u32 intr_mask;
151 u32 intr_dest;
152
153 nvgpu_log_fn(g, " ");
154
155 g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_DISABLE, true,
156 mc_intr_mask_0_pmu_enabled_f());
157 g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_DISABLE, false,
158 mc_intr_mask_1_pmu_enabled_f());
159
160 nvgpu_flcn_set_irq(pmu->flcn, false, 0x0, 0x0);
161
162 if (enable) {
163 intr_dest = g->ops.pmu.get_irqdest(g);
164 /* 0=disable, 1=enable */
165 intr_mask = pwr_falcon_irqmset_gptmr_f(1) |
166 pwr_falcon_irqmset_wdtmr_f(1) |
167 pwr_falcon_irqmset_mthd_f(0) |
168 pwr_falcon_irqmset_ctxsw_f(0) |
169 pwr_falcon_irqmset_halt_f(1) |
170 pwr_falcon_irqmset_exterr_f(1) |
171 pwr_falcon_irqmset_swgen0_f(1) |
172 pwr_falcon_irqmset_swgen1_f(1);
173
174 nvgpu_flcn_set_irq(pmu->flcn, true, intr_mask, intr_dest);
175
176 g->ops.mc.intr_unit_config(g, MC_INTR_UNIT_ENABLE, true,
177 mc_intr_mask_0_pmu_enabled_f());
178 }
179
180 nvgpu_log_fn(g, "done");
181}
182
183
184
185int pmu_bootstrap(struct nvgpu_pmu *pmu)
186{
187 struct gk20a *g = gk20a_from_pmu(pmu);
188 struct mm_gk20a *mm = &g->mm;
189 struct pmu_ucode_desc *desc = pmu->desc;
190 u64 addr_code, addr_data, addr_load;
191 u32 i, blocks, addr_args;
192
193 nvgpu_log_fn(g, " ");
194
195 gk20a_writel(g, pwr_falcon_itfen_r(),
196 gk20a_readl(g, pwr_falcon_itfen_r()) |
197 pwr_falcon_itfen_ctxen_enable_f());
198 gk20a_writel(g, pwr_pmu_new_instblk_r(),
199 pwr_pmu_new_instblk_ptr_f(
200 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
201 pwr_pmu_new_instblk_valid_f(1) |
202 pwr_pmu_new_instblk_target_sys_coh_f());
203
204 /* TBD: load all other surfaces */
205 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
206 pmu, GK20A_PMU_TRACE_BUFSIZE);
207 g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
208 g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
209 pmu, GK20A_PMU_DMAIDX_VIRT);
210
211 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
212 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
213
214 addr_args = (pwr_falcon_hwcfg_dmem_size_v(
215 gk20a_readl(g, pwr_falcon_hwcfg_r()))
216 << GK20A_PMU_DMEM_BLKSIZE2) -
217 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
218
219 nvgpu_flcn_copy_to_dmem(pmu->flcn, addr_args,
220 (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
221 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
222
223 gk20a_writel(g, pwr_falcon_dmemc_r(0),
224 pwr_falcon_dmemc_offs_f(0) |
225 pwr_falcon_dmemc_blk_f(0) |
226 pwr_falcon_dmemc_aincw_f(1));
227
228 addr_code = u64_lo32((pmu->ucode.gpu_va +
229 desc->app_start_offset +
230 desc->app_resident_code_offset) >> 8) ;
231 addr_data = u64_lo32((pmu->ucode.gpu_va +
232 desc->app_start_offset +
233 desc->app_resident_data_offset) >> 8);
234 addr_load = u64_lo32((pmu->ucode.gpu_va +
235 desc->bootloader_start_offset) >> 8);
236
237 gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
238 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
239 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
240 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
241 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
242 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
243 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
244 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
245 gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
246 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
247
248 g->ops.pmu.write_dmatrfbase(g,
249 addr_load - (desc->bootloader_imem_offset >> 8));
250
251 blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
252
253 for (i = 0; i < blocks; i++) {
254 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
255 desc->bootloader_imem_offset + (i << 8));
256 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
257 desc->bootloader_imem_offset + (i << 8));
258 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
259 pwr_falcon_dmatrfcmd_imem_f(1) |
260 pwr_falcon_dmatrfcmd_write_f(0) |
261 pwr_falcon_dmatrfcmd_size_f(6) |
262 pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
263 }
264
265 nvgpu_flcn_bootstrap(g->pmu.flcn, desc->bootloader_entry_point);
266
267 gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
268
269 return 0;
270}
271
272void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id)
273{
274 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(pg_engine_id),
275 PMU_PG_IDLE_THRESHOLD);
276 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(pg_engine_id),
277 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
278}
279
280int gk20a_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token)
281{
282 struct gk20a *g = gk20a_from_pmu(pmu);
283 struct pmu_mutex *mutex;
284 u32 data, owner, max_retry;
285
286 if (!pmu->initialized) {
287 return -EINVAL;
288 }
289
290 BUG_ON(!token);
291 BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
292 BUG_ON(id > pmu->mutex_cnt);
293
294 mutex = &pmu->mutex[id];
295
296 owner = pwr_pmu_mutex_value_v(
297 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
298
299 if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
300 BUG_ON(mutex->ref_cnt == 0);
301 gk20a_dbg_pmu(g, "already acquired by owner : 0x%08x", *token);
302 mutex->ref_cnt++;
303 return 0;
304 }
305
306 max_retry = 40;
307 do {
308 data = pwr_pmu_mutex_id_value_v(
309 gk20a_readl(g, pwr_pmu_mutex_id_r()));
310 if (data == pwr_pmu_mutex_id_value_init_v() ||
311 data == pwr_pmu_mutex_id_value_not_avail_v()) {
312 nvgpu_warn(g,
313 "fail to generate mutex token: val 0x%08x",
314 owner);
315 nvgpu_usleep_range(20, 40);
316 continue;
317 }
318
319 owner = data;
320 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
321 pwr_pmu_mutex_value_f(owner));
322
323 data = pwr_pmu_mutex_value_v(
324 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
325
326 if (owner == data) {
327 mutex->ref_cnt = 1;
328 gk20a_dbg_pmu(g, "mutex acquired: id=%d, token=0x%x",
329 mutex->index, *token);
330 *token = owner;
331 return 0;
332 } else {
333 nvgpu_log_info(g, "fail to acquire mutex idx=0x%08x",
334 mutex->index);
335
336 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
337 data = set_field(data,
338 pwr_pmu_mutex_id_release_value_m(),
339 pwr_pmu_mutex_id_release_value_f(owner));
340 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
341
342 nvgpu_usleep_range(20, 40);
343 continue;
344 }
345 } while (max_retry-- > 0);
346
347 return -EBUSY;
348}
349
350int gk20a_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token)
351{
352 struct gk20a *g = gk20a_from_pmu(pmu);
353 struct pmu_mutex *mutex;
354 u32 owner, data;
355
356 if (!pmu->initialized) {
357 return -EINVAL;
358 }
359
360 BUG_ON(!token);
361 BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
362 BUG_ON(id > pmu->mutex_cnt);
363
364 mutex = &pmu->mutex[id];
365
366 owner = pwr_pmu_mutex_value_v(
367 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
368
369 if (*token != owner) {
370 nvgpu_err(g, "requester 0x%08x NOT match owner 0x%08x",
371 *token, owner);
372 return -EINVAL;
373 }
374
375 if (--mutex->ref_cnt > 0) {
376 return -EBUSY;
377 }
378
379 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
380 pwr_pmu_mutex_value_initial_lock_f());
381
382 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
383 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
384 pwr_pmu_mutex_id_release_value_f(owner));
385 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
386
387 gk20a_dbg_pmu(g, "mutex released: id=%d, token=0x%x",
388 mutex->index, *token);
389
390 return 0;
391}
392
393int gk20a_pmu_queue_head(struct gk20a *g, struct nvgpu_falcon_queue *queue,
394 u32 *head, bool set)
395{
396 u32 queue_head_size = 0;
397
398 if (g->ops.pmu.pmu_get_queue_head_size) {
399 queue_head_size = g->ops.pmu.pmu_get_queue_head_size();
400 }
401
402 BUG_ON(!head || !queue_head_size);
403
404 if (PMU_IS_COMMAND_QUEUE(queue->id)) {
405
406 if (queue->index >= queue_head_size) {
407 return -EINVAL;
408 }
409
410 if (!set) {
411 *head = pwr_pmu_queue_head_address_v(
412 gk20a_readl(g,
413 g->ops.pmu.pmu_get_queue_head(queue->index)));
414 } else {
415 gk20a_writel(g,
416 g->ops.pmu.pmu_get_queue_head(queue->index),
417 pwr_pmu_queue_head_address_f(*head));
418 }
419 } else {
420 if (!set) {
421 *head = pwr_pmu_msgq_head_val_v(
422 gk20a_readl(g, pwr_pmu_msgq_head_r()));
423 } else {
424 gk20a_writel(g,
425 pwr_pmu_msgq_head_r(),
426 pwr_pmu_msgq_head_val_f(*head));
427 }
428 }
429
430 return 0;
431}
432
433int gk20a_pmu_queue_tail(struct gk20a *g, struct nvgpu_falcon_queue *queue,
434 u32 *tail, bool set)
435{
436 u32 queue_tail_size = 0;
437
438 if (g->ops.pmu.pmu_get_queue_tail_size) {
439 queue_tail_size = g->ops.pmu.pmu_get_queue_tail_size();
440 }
441
442 BUG_ON(!tail || !queue_tail_size);
443
444 if (PMU_IS_COMMAND_QUEUE(queue->id)) {
445
446 if (queue->index >= queue_tail_size) {
447 return -EINVAL;
448 }
449
450 if (!set) {
451 *tail = pwr_pmu_queue_tail_address_v(gk20a_readl(g,
452 g->ops.pmu.pmu_get_queue_tail(queue->index)));
453 } else {
454 gk20a_writel(g,
455 g->ops.pmu.pmu_get_queue_tail(queue->index),
456 pwr_pmu_queue_tail_address_f(*tail));
457 }
458
459 } else {
460 if (!set) {
461 *tail = pwr_pmu_msgq_tail_val_v(
462 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
463 } else {
464 gk20a_writel(g,
465 pwr_pmu_msgq_tail_r(),
466 pwr_pmu_msgq_tail_val_f(*tail));
467 }
468 }
469
470 return 0;
471}
472
473void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set)
474{
475 struct gk20a *g = gk20a_from_pmu(pmu);
476 u32 queue_tail_size = 0;
477
478 if (g->ops.pmu.pmu_get_queue_tail_size) {
479 queue_tail_size = g->ops.pmu.pmu_get_queue_tail_size();
480 }
481
482 BUG_ON(!tail || !queue_tail_size);
483
484 if (!set) {
485 *tail = pwr_pmu_msgq_tail_val_v(
486 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
487 } else {
488 gk20a_writel(g,
489 pwr_pmu_msgq_tail_r(),
490 pwr_pmu_msgq_tail_val_f(*tail));
491 }
492}
493
494void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr)
495{
496 gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr);
497}
498
499bool gk20a_pmu_is_engine_in_reset(struct gk20a *g)
500{
501 bool status = false;
502
503 status = g->ops.mc.is_enabled(g, NVGPU_UNIT_PWR);
504
505 return status;
506}
507
508int gk20a_pmu_engine_reset(struct gk20a *g, bool do_reset)
509{
510 u32 reset_mask = g->ops.mc.reset_mask(g, NVGPU_UNIT_PWR);
511
512 if (do_reset) {
513 g->ops.mc.enable(g, reset_mask);
514 } else {
515 g->ops.mc.disable(g, reset_mask);
516 }
517
518 return 0;
519}
520
521bool gk20a_is_pmu_supported(struct gk20a *g)
522{
523 return true;
524}
525
526u32 gk20a_pmu_pg_engines_list(struct gk20a *g)
527{
528 return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
529}
530
531u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id)
532{
533 if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) {
534 return NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING;
535 }
536
537 return 0;
538}
539
540static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
541 void *param, u32 handle, u32 status)
542{
543 struct nvgpu_pmu *pmu = param;
544 gk20a_dbg_pmu(g, "reply ZBC_TABLE_UPDATE");
545 pmu->zbc_save_done = 1;
546}
547
548void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
549{
550 struct nvgpu_pmu *pmu = &g->pmu;
551 struct pmu_cmd cmd;
552 u32 seq;
553
554 if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) {
555 return;
556 }
557
558 memset(&cmd, 0, sizeof(struct pmu_cmd));
559 cmd.hdr.unit_id = PMU_UNIT_PG;
560 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
561 cmd.cmd.zbc.cmd_type = g->pmu_ver_cmd_id_zbc_table_update;
562 cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
563
564 pmu->zbc_save_done = 0;
565
566 gk20a_dbg_pmu(g, "cmd post ZBC_TABLE_UPDATE");
567 nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
568 pmu_handle_zbc_msg, pmu, &seq, ~0);
569 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
570 &pmu->zbc_save_done, 1);
571 if (!pmu->zbc_save_done) {
572 nvgpu_err(g, "ZBC save timeout");
573 }
574}
575
576int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
577 struct nv_pmu_therm_msg *msg)
578{
579 struct gk20a *g = gk20a_from_pmu(pmu);
580
581 nvgpu_log_fn(g, " ");
582
583 switch (msg->msg_type) {
584 case NV_PMU_THERM_MSG_ID_EVENT_HW_SLOWDOWN_NOTIFICATION:
585 if (msg->hw_slct_msg.mask == BIT(NV_PMU_THERM_EVENT_THERMAL_1)) {
586 nvgpu_clk_arb_send_thermal_alarm(pmu->g);
587 } else {
588 gk20a_dbg_pmu(g, "Unwanted/Unregistered thermal event received %d",
589 msg->hw_slct_msg.mask);
590 }
591 break;
592 default:
593 gk20a_dbg_pmu(g, "unkown therm event received %d", msg->msg_type);
594 break;
595 }
596
597 return 0;
598}
599
600void gk20a_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu)
601{
602 struct gk20a *g = gk20a_from_pmu(pmu);
603
604 gk20a_dbg_pmu(g, "pwr_pmu_idle_mask_supp_r(3): 0x%08x",
605 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
606 gk20a_dbg_pmu(g, "pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
607 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
608 gk20a_dbg_pmu(g, "pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
609 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
610 gk20a_dbg_pmu(g, "pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
611 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
612 gk20a_dbg_pmu(g, "pwr_pmu_pg_intren_r(0): 0x%08x",
613 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
614
615 gk20a_dbg_pmu(g, "pwr_pmu_idle_count_r(3): 0x%08x",
616 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
617 gk20a_dbg_pmu(g, "pwr_pmu_idle_count_r(4): 0x%08x",
618 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
619 gk20a_dbg_pmu(g, "pwr_pmu_idle_count_r(7): 0x%08x",
620 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
621}
622
623void gk20a_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu)
624{
625 struct gk20a *g = gk20a_from_pmu(pmu);
626 unsigned int i;
627
628 for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) {
629 nvgpu_err(g, "pwr_pmu_mailbox_r(%d) : 0x%x",
630 i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
631 }
632
633 for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) {
634 nvgpu_err(g, "pwr_pmu_debug_r(%d) : 0x%x",
635 i, gk20a_readl(g, pwr_pmu_debug_r(i)));
636 }
637
638 i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
639 nvgpu_err(g, "pwr_pmu_bar0_error_status_r : 0x%x", i);
640 if (i != 0) {
641 nvgpu_err(g, "pwr_pmu_bar0_addr_r : 0x%x",
642 gk20a_readl(g, pwr_pmu_bar0_addr_r()));
643 nvgpu_err(g, "pwr_pmu_bar0_data_r : 0x%x",
644 gk20a_readl(g, pwr_pmu_bar0_data_r()));
645 nvgpu_err(g, "pwr_pmu_bar0_timeout_r : 0x%x",
646 gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
647 nvgpu_err(g, "pwr_pmu_bar0_ctl_r : 0x%x",
648 gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
649 }
650
651 i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
652 nvgpu_err(g, "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
653
654 i = gk20a_readl(g, pwr_falcon_exterrstat_r());
655 nvgpu_err(g, "pwr_falcon_exterrstat_r : 0x%x", i);
656 if (pwr_falcon_exterrstat_valid_v(i) ==
657 pwr_falcon_exterrstat_valid_true_v()) {
658 nvgpu_err(g, "pwr_falcon_exterraddr_r : 0x%x",
659 gk20a_readl(g, pwr_falcon_exterraddr_r()));
660 }
661
662 /* Print PMU F/W debug prints */
663 print_pmu_trace(pmu);
664}
665
666bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu)
667{
668 struct gk20a *g = gk20a_from_pmu(pmu);
669 u32 servicedpmuint;
670
671 servicedpmuint = pwr_falcon_irqstat_halt_true_f() |
672 pwr_falcon_irqstat_exterr_true_f() |
673 pwr_falcon_irqstat_swgen0_true_f();
674
675 if (gk20a_readl(g, pwr_falcon_irqstat_r()) & servicedpmuint) {
676 return true;
677 }
678
679 return false;
680}
681
682void gk20a_pmu_isr(struct gk20a *g)
683{
684 struct nvgpu_pmu *pmu = &g->pmu;
685 struct nvgpu_falcon_queue *queue;
686 u32 intr, mask;
687 bool recheck = false;
688
689 nvgpu_log_fn(g, " ");
690
691 nvgpu_mutex_acquire(&pmu->isr_mutex);
692 if (!pmu->isr_enabled) {
693 nvgpu_mutex_release(&pmu->isr_mutex);
694 return;
695 }
696
697 mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
698 gk20a_readl(g, pwr_falcon_irqdest_r());
699
700 intr = gk20a_readl(g, pwr_falcon_irqstat_r());
701
702 gk20a_dbg_pmu(g, "received falcon interrupt: 0x%08x", intr);
703
704 intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
705 if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
706 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
707 nvgpu_mutex_release(&pmu->isr_mutex);
708 return;
709 }
710
711 if (intr & pwr_falcon_irqstat_halt_true_f()) {
712 nvgpu_err(g, "pmu halt intr not implemented");
713 nvgpu_pmu_dump_falcon_stats(pmu);
714 if (gk20a_readl(g, pwr_pmu_mailbox_r
715 (PMU_MODE_MISMATCH_STATUS_MAILBOX_R)) ==
716 PMU_MODE_MISMATCH_STATUS_VAL) {
717 if (g->ops.pmu.dump_secure_fuses) {
718 g->ops.pmu.dump_secure_fuses(g);
719 }
720 }
721 }
722 if (intr & pwr_falcon_irqstat_exterr_true_f()) {
723 nvgpu_err(g,
724 "pmu exterr intr not implemented. Clearing interrupt.");
725 nvgpu_pmu_dump_falcon_stats(pmu);
726
727 gk20a_writel(g, pwr_falcon_exterrstat_r(),
728 gk20a_readl(g, pwr_falcon_exterrstat_r()) &
729 ~pwr_falcon_exterrstat_valid_m());
730 }
731
732 if (g->ops.pmu.handle_ext_irq) {
733 g->ops.pmu.handle_ext_irq(g, intr);
734 }
735
736 if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
737 nvgpu_pmu_process_message(pmu);
738 recheck = true;
739 }
740
741 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
742
743 if (recheck) {
744 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
745 if (!nvgpu_flcn_queue_is_empty(pmu->flcn, queue)) {
746 gk20a_writel(g, pwr_falcon_irqsset_r(),
747 pwr_falcon_irqsset_swgen0_set_f());
748 }
749 }
750
751 nvgpu_mutex_release(&pmu->isr_mutex);
752}
753
754void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
755{
756 u32 data;
757
758 /* use counter #3 for GR && CE2 busy cycles */
759 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
760 pwr_pmu_idle_mask_gr_enabled_f() |
761 pwr_pmu_idle_mask_ce_2_enabled_f());
762
763 /* assign same mask setting from GR ELPG to counter #3 */
764 data = gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(0));
765 gk20a_writel(g, pwr_pmu_idle_mask_1_r(3), data);
766
767 /* disable idle filtering for counters 3 and 6 */
768 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
769 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
770 pwr_pmu_idle_ctrl_filter_m(),
771 pwr_pmu_idle_ctrl_value_busy_f() |
772 pwr_pmu_idle_ctrl_filter_disabled_f());
773 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
774
775 /* use counter #6 for total cycles */
776 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
777 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
778 pwr_pmu_idle_ctrl_filter_m(),
779 pwr_pmu_idle_ctrl_value_always_f() |
780 pwr_pmu_idle_ctrl_filter_disabled_f());
781 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
782
783 /*
784 * We don't want to disturb counters #3 and #6, which are used by
785 * perfmon, so we add wiring also to counters #1 and #2 for
786 * exposing raw counter readings.
787 */
788 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
789 pwr_pmu_idle_mask_gr_enabled_f() |
790 pwr_pmu_idle_mask_ce_2_enabled_f());
791
792 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
793 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
794 pwr_pmu_idle_ctrl_filter_m(),
795 pwr_pmu_idle_ctrl_value_busy_f() |
796 pwr_pmu_idle_ctrl_filter_disabled_f());
797 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
798
799 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
800 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
801 pwr_pmu_idle_ctrl_filter_m(),
802 pwr_pmu_idle_ctrl_value_always_f() |
803 pwr_pmu_idle_ctrl_filter_disabled_f());
804 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
805
806 /*
807 * use counters 4 and 0 for perfmon to log busy cycles and total cycles
808 * counter #0 overflow sets pmu idle intr status bit
809 */
810 gk20a_writel(g, pwr_pmu_idle_intr_r(),
811 pwr_pmu_idle_intr_en_f(0));
812
813 gk20a_writel(g, pwr_pmu_idle_threshold_r(0),
814 pwr_pmu_idle_threshold_value_f(0x7FFFFFFF));
815
816 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0));
817 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
818 pwr_pmu_idle_ctrl_filter_m(),
819 pwr_pmu_idle_ctrl_value_always_f() |
820 pwr_pmu_idle_ctrl_filter_disabled_f());
821 gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data);
822
823 gk20a_writel(g, pwr_pmu_idle_mask_r(4),
824 pwr_pmu_idle_mask_gr_enabled_f() |
825 pwr_pmu_idle_mask_ce_2_enabled_f());
826
827 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4));
828 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
829 pwr_pmu_idle_ctrl_filter_m(),
830 pwr_pmu_idle_ctrl_value_busy_f() |
831 pwr_pmu_idle_ctrl_filter_disabled_f());
832 gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data);
833
834 gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1));
835 gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1));
836 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
837 pwr_pmu_idle_intr_status_intr_f(1));
838}
839
840u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
841{
842 return pwr_pmu_idle_count_value_v(
843 gk20a_readl(g, pwr_pmu_idle_count_r(counter_id)));
844}
845
846void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
847{
848 gk20a_writel(g, pwr_pmu_idle_count_r(counter_id),
849 pwr_pmu_idle_count_reset_f(1));
850}
851
852u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g)
853{
854 return pwr_pmu_idle_intr_status_intr_v(
855 gk20a_readl(g, pwr_pmu_idle_intr_status_r()));
856}
857
858void gk20a_pmu_clear_idle_intr_status(struct gk20a *g)
859{
860 gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
861 pwr_pmu_idle_intr_status_intr_f(1));
862}
863
864void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
865 struct pmu_pg_stats_data *pg_stat_data)
866{
867 struct nvgpu_pmu *pmu = &g->pmu;
868 struct pmu_pg_stats stats;
869
870 nvgpu_flcn_copy_from_dmem(pmu->flcn,
871 pmu->stat_dmem_offset[pg_engine_id],
872 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
873
874 pg_stat_data->ingating_time = stats.pg_ingating_time_us;
875 pg_stat_data->ungating_time = stats.pg_ungating_time_us;
876 pg_stat_data->gating_cnt = stats.pg_gating_cnt;
877 pg_stat_data->avg_entry_latency_us = stats.pg_avg_entry_time_us;
878 pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us;
879}
diff --git a/include/gk20a/pmu_gk20a.h b/include/gk20a/pmu_gk20a.h
new file mode 100644
index 0000000..65ffd63
--- /dev/null
+++ b/include/gk20a/pmu_gk20a.h
@@ -0,0 +1,80 @@
1/*
2 * drivers/video/tegra/host/gk20a/pmu_gk20a.h
3 *
4 * GK20A PMU (aka. gPMU outside gk20a context)
5 *
6 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26#ifndef NVGPU_GK20A_PMU_GK20A_H
27#define NVGPU_GK20A_PMU_GK20A_H
28
29#include <nvgpu/flcnif_cmn.h>
30#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
31#include <nvgpu/pmu.h>
32
33struct nvgpu_firmware;
34
35#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe)
36
37bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu);
38void gk20a_pmu_isr(struct gk20a *g);
39
40u32 gk20a_pmu_pg_engines_list(struct gk20a *g);
41u32 gk20a_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id);
42
43void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
44
45void gk20a_pmu_init_perfmon_counter(struct gk20a *g);
46
47void gk20a_pmu_pg_idle_counter_config(struct gk20a *g, u32 pg_engine_id);
48
49int gk20a_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token);
50int gk20a_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token);
51
52int gk20a_pmu_queue_head(struct gk20a *g, struct nvgpu_falcon_queue *queue,
53 u32 *head, bool set);
54int gk20a_pmu_queue_tail(struct gk20a *g, struct nvgpu_falcon_queue *queue,
55 u32 *tail, bool set);
56void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set);
57
58u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
59void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
60
61u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g);
62void gk20a_pmu_clear_idle_intr_status(struct gk20a *g);
63
64void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
65bool gk20a_is_pmu_supported(struct gk20a *g);
66
67int pmu_bootstrap(struct nvgpu_pmu *pmu);
68
69void gk20a_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu);
70void gk20a_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu);
71
72void gk20a_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable);
73void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
74 void *param, u32 handle, u32 status);
75void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
76 struct pmu_pg_stats_data *pg_stat_data);
77bool gk20a_pmu_is_engine_in_reset(struct gk20a *g);
78int gk20a_pmu_engine_reset(struct gk20a *g, bool do_reset);
79u32 gk20a_pmu_get_irqdest(struct gk20a *g);
80#endif /*NVGPU_GK20A_PMU_GK20A_H*/
diff --git a/include/gk20a/regops_gk20a.c b/include/gk20a/regops_gk20a.c
new file mode 100644
index 0000000..0aec4f8
--- /dev/null
+++ b/include/gk20a/regops_gk20a.c
@@ -0,0 +1,472 @@
1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a.h"
26#include "gr_gk20a.h"
27#include "dbg_gpu_gk20a.h"
28#include "regops_gk20a.h"
29
30#include <nvgpu/log.h>
31#include <nvgpu/bsearch.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/io.h>
34
35static int regop_bsearch_range_cmp(const void *pkey, const void *pelem)
36{
37 u32 key = *(u32 *)pkey;
38 struct regop_offset_range *prange = (struct regop_offset_range *)pelem;
39 if (key < prange->base) {
40 return -1;
41 } else if (prange->base <= key && key < (prange->base +
42 (prange->count * 4U))) {
43 return 0;
44 }
45 return 1;
46}
47
48static inline bool linear_search(u32 offset, const u32 *list, int size)
49{
50 int i;
51 for (i = 0; i < size; i++) {
52 if (list[i] == offset) {
53 return true;
54 }
55 }
56 return false;
57}
58
59/*
60 * In order to perform a context relative op the context has
61 * to be created already... which would imply that the
62 * context switch mechanism has already been put in place.
63 * So by the time we perform such an opertation it should always
64 * be possible to query for the appropriate context offsets, etc.
65 *
66 * But note: while the dbg_gpu bind requires the a channel fd,
67 * it doesn't require an allocated gr/compute obj at that point...
68 */
69static bool gr_context_info_available(struct gr_gk20a *gr)
70{
71 int err;
72
73 nvgpu_mutex_acquire(&gr->ctx_mutex);
74 err = !gr->ctx_vars.golden_image_initialized;
75 nvgpu_mutex_release(&gr->ctx_mutex);
76 if (err) {
77 return false;
78 }
79
80 return true;
81
82}
83
84static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
85 u32 *ctx_rd_count, u32 *ctx_wr_count,
86 struct nvgpu_dbg_reg_op *ops,
87 u32 op_count);
88
89
90int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
91 struct nvgpu_dbg_reg_op *ops,
92 u64 num_ops,
93 bool *is_current_ctx)
94{
95 int err = 0;
96 unsigned int i;
97 struct channel_gk20a *ch = NULL;
98 struct gk20a *g = dbg_s->g;
99 /*struct gr_gk20a *gr = &g->gr;*/
100 u32 data32_lo = 0, data32_hi = 0;
101 u32 ctx_rd_count = 0, ctx_wr_count = 0;
102 bool skip_read_lo, skip_read_hi;
103 bool ok;
104
105 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
106
107 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
108
109 /* For vgpu, the regops routines need to be handled in the
110 * context of the server and support for that does not exist.
111 *
112 * The two users of the regops interface are the compute driver
113 * and tools. The compute driver will work without a functional
114 * regops implementation, so we return -ENOSYS. This will allow
115 * compute apps to run with vgpu. Tools will not work in this
116 * configuration and are not required to work at this time. */
117 if (g->is_virtual) {
118 return -ENOSYS;
119 }
120
121 ok = validate_reg_ops(dbg_s,
122 &ctx_rd_count, &ctx_wr_count,
123 ops, num_ops);
124 if (!ok) {
125 nvgpu_err(g, "invalid op(s)");
126 err = -EINVAL;
127 /* each op has its own err/status */
128 goto clean_up;
129 }
130
131 /* be sure that ctx info is in place if there are ctx ops */
132 if (ctx_wr_count | ctx_rd_count) {
133 if (!gr_context_info_available(&g->gr)) {
134 nvgpu_err(g, "gr context data not available");
135 return -ENODEV;
136 }
137 }
138
139 for (i = 0; i < num_ops; i++) {
140 /* if it isn't global then it is done in the ctx ops... */
141 if (ops[i].type != REGOP(TYPE_GLOBAL)) {
142 continue;
143 }
144
145 switch (ops[i].op) {
146
147 case REGOP(READ_32):
148 ops[i].value_hi = 0;
149 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
150 nvgpu_log(g, gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x",
151 ops[i].value_lo, ops[i].offset);
152
153 break;
154
155 case REGOP(READ_64):
156 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
157 ops[i].value_hi =
158 gk20a_readl(g, ops[i].offset + 4);
159
160 nvgpu_log(g, gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x",
161 ops[i].value_hi, ops[i].value_lo,
162 ops[i].offset);
163 break;
164
165 case REGOP(WRITE_32):
166 case REGOP(WRITE_64):
167 /* some of this appears wonky/unnecessary but
168 we've kept it for compat with existing
169 debugger code. just in case... */
170 skip_read_lo = skip_read_hi = false;
171 if (ops[i].and_n_mask_lo == ~(u32)0) {
172 data32_lo = ops[i].value_lo;
173 skip_read_lo = true;
174 }
175
176 if ((ops[i].op == REGOP(WRITE_64)) &&
177 (ops[i].and_n_mask_hi == ~(u32)0)) {
178 data32_hi = ops[i].value_hi;
179 skip_read_hi = true;
180 }
181
182 /* read first 32bits */
183 if (skip_read_lo == false) {
184 data32_lo = gk20a_readl(g, ops[i].offset);
185 data32_lo &= ~ops[i].and_n_mask_lo;
186 data32_lo |= ops[i].value_lo;
187 }
188
189 /* if desired, read second 32bits */
190 if ((ops[i].op == REGOP(WRITE_64)) &&
191 !skip_read_hi) {
192 data32_hi = gk20a_readl(g, ops[i].offset + 4);
193 data32_hi &= ~ops[i].and_n_mask_hi;
194 data32_hi |= ops[i].value_hi;
195 }
196
197 /* now update first 32bits */
198 gk20a_writel(g, ops[i].offset, data32_lo);
199 nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
200 data32_lo, ops[i].offset);
201 /* if desired, update second 32bits */
202 if (ops[i].op == REGOP(WRITE_64)) {
203 gk20a_writel(g, ops[i].offset + 4, data32_hi);
204 nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
205 data32_hi, ops[i].offset + 4);
206
207 }
208
209
210 break;
211
212 /* shouldn't happen as we've already screened */
213 default:
214 BUG();
215 err = -EINVAL;
216 goto clean_up;
217 break;
218 }
219 }
220
221 if (ctx_wr_count | ctx_rd_count) {
222 err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops,
223 ctx_wr_count, ctx_rd_count,
224 is_current_ctx);
225 if (err) {
226 nvgpu_warn(g, "failed to perform ctx ops\n");
227 goto clean_up;
228 }
229 }
230
231 clean_up:
232 nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
233 return err;
234
235}
236
237
238static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s,
239 struct nvgpu_dbg_reg_op *op)
240{
241 int err = 0;
242
243 op->status = REGOP(STATUS_SUCCESS);
244
245 switch (op->op) {
246 case REGOP(READ_32):
247 case REGOP(READ_64):
248 case REGOP(WRITE_32):
249 case REGOP(WRITE_64):
250 break;
251 default:
252 op->status |= REGOP(STATUS_UNSUPPORTED_OP);
253 err = -EINVAL;
254 break;
255 }
256
257 switch (op->type) {
258 case REGOP(TYPE_GLOBAL):
259 case REGOP(TYPE_GR_CTX):
260 case REGOP(TYPE_GR_CTX_TPC):
261 case REGOP(TYPE_GR_CTX_SM):
262 case REGOP(TYPE_GR_CTX_CROP):
263 case REGOP(TYPE_GR_CTX_ZROP):
264 case REGOP(TYPE_GR_CTX_QUAD):
265 break;
266 /*
267 case NVGPU_DBG_GPU_REG_OP_TYPE_FB:
268 */
269 default:
270 op->status |= REGOP(STATUS_INVALID_TYPE);
271 err = -EINVAL;
272 break;
273 }
274
275 return err;
276}
277
278static bool check_whitelists(struct dbg_session_gk20a *dbg_s,
279 struct nvgpu_dbg_reg_op *op, u32 offset)
280{
281 struct gk20a *g = dbg_s->g;
282 bool valid = false;
283 struct channel_gk20a *ch;
284
285 ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
286
287 if (op->type == REGOP(TYPE_GLOBAL)) {
288 /* search global list */
289 valid = g->ops.regops.get_global_whitelist_ranges &&
290 !!bsearch(&offset,
291 g->ops.regops.get_global_whitelist_ranges(),
292 g->ops.regops.get_global_whitelist_ranges_count(),
293 sizeof(*g->ops.regops.get_global_whitelist_ranges()),
294 regop_bsearch_range_cmp);
295
296 /* if debug session and channel is bound search context list */
297 if ((!valid) && (!dbg_s->is_profiler && ch)) {
298 /* binary search context list */
299 valid = g->ops.regops.get_context_whitelist_ranges &&
300 !!bsearch(&offset,
301 g->ops.regops.get_context_whitelist_ranges(),
302 g->ops.regops.get_context_whitelist_ranges_count(),
303 sizeof(*g->ops.regops.get_context_whitelist_ranges()),
304 regop_bsearch_range_cmp);
305 }
306
307 /* if debug session and channel is bound search runcontrol list */
308 if ((!valid) && (!dbg_s->is_profiler && ch)) {
309 valid = g->ops.regops.get_runcontrol_whitelist &&
310 linear_search(offset,
311 g->ops.regops.get_runcontrol_whitelist(),
312 g->ops.regops.get_runcontrol_whitelist_count());
313 }
314 } else if (op->type == REGOP(TYPE_GR_CTX)) {
315 /* it's a context-relative op */
316 if (!ch) {
317 nvgpu_err(dbg_s->g, "can't perform ctx regop unless bound");
318 op->status = REGOP(STATUS_UNSUPPORTED_OP);
319 return valid;
320 }
321
322 /* binary search context list */
323 valid = g->ops.regops.get_context_whitelist_ranges &&
324 !!bsearch(&offset,
325 g->ops.regops.get_context_whitelist_ranges(),
326 g->ops.regops.get_context_whitelist_ranges_count(),
327 sizeof(*g->ops.regops.get_context_whitelist_ranges()),
328 regop_bsearch_range_cmp);
329
330 /* if debug session and channel is bound search runcontrol list */
331 if ((!valid) && (!dbg_s->is_profiler && ch)) {
332 valid = g->ops.regops.get_runcontrol_whitelist &&
333 linear_search(offset,
334 g->ops.regops.get_runcontrol_whitelist(),
335 g->ops.regops.get_runcontrol_whitelist_count());
336 }
337
338 } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) {
339 valid = g->ops.regops.get_qctl_whitelist &&
340 linear_search(offset,
341 g->ops.regops.get_qctl_whitelist(),
342 g->ops.regops.get_qctl_whitelist_count());
343 }
344
345 return valid;
346}
347
348/* note: the op here has already been through validate_reg_op_info */
349static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
350 struct nvgpu_dbg_reg_op *op)
351{
352 int err;
353 u32 buf_offset_lo, buf_offset_addr, num_offsets, offset;
354 bool valid = false;
355
356 op->status = 0;
357 offset = op->offset;
358
359 /* support only 24-bit 4-byte aligned offsets */
360 if (offset & 0xFF000003) {
361 nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset);
362 op->status |= REGOP(STATUS_INVALID_OFFSET);
363 return -EINVAL;
364 }
365
366 valid = check_whitelists(dbg_s, op, offset);
367 if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
368 valid = check_whitelists(dbg_s, op, offset + 4);
369 }
370
371 if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
372 err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
373 op->offset,
374 1,
375 &buf_offset_lo,
376 &buf_offset_addr,
377 &num_offsets,
378 op->type == REGOP(TYPE_GR_CTX_QUAD),
379 op->quad);
380 if (err) {
381 err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g,
382 op->offset,
383 1,
384 &buf_offset_lo,
385 &buf_offset_addr,
386 &num_offsets);
387
388 if (err) {
389 op->status |= REGOP(STATUS_INVALID_OFFSET);
390 return -EINVAL;
391 }
392 }
393 if (!num_offsets) {
394 op->status |= REGOP(STATUS_INVALID_OFFSET);
395 return -EINVAL;
396 }
397 }
398
399 if (!valid) {
400 nvgpu_err(dbg_s->g, "invalid regop offset: 0x%x", offset);
401 op->status |= REGOP(STATUS_INVALID_OFFSET);
402 return -EINVAL;
403 }
404
405 return 0;
406}
407
408static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
409 u32 *ctx_rd_count, u32 *ctx_wr_count,
410 struct nvgpu_dbg_reg_op *ops,
411 u32 op_count)
412{
413 u32 i;
414 bool ok = true;
415 struct gk20a *g = dbg_s->g;
416
417 /* keep going until the end so every op can get
418 * a separate error code if needed */
419 for (i = 0; i < op_count; i++) {
420
421 if (validate_reg_op_info(dbg_s, &ops[i]) != 0) {
422 ok = false;
423 }
424
425 if (reg_op_is_gr_ctx(ops[i].type)) {
426 if (reg_op_is_read(ops[i].op)) {
427 (*ctx_rd_count)++;
428 } else {
429 (*ctx_wr_count)++;
430 }
431 }
432
433 /* if "allow_all" flag enabled, dont validate offset */
434 if (!g->allow_all) {
435 if (validate_reg_op_offset(dbg_s, &ops[i]) != 0) {
436 ok = false;
437 }
438 }
439 }
440
441 nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
442 *ctx_wr_count, *ctx_rd_count);
443
444 return ok;
445}
446
447/* exported for tools like cyclestats, etc */
448bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset)
449{
450 bool valid = !!bsearch(&offset,
451 g->ops.regops.get_global_whitelist_ranges(),
452 g->ops.regops.get_global_whitelist_ranges_count(),
453 sizeof(*g->ops.regops.get_global_whitelist_ranges()),
454 regop_bsearch_range_cmp);
455 return valid;
456}
457
458bool reg_op_is_gr_ctx(u8 type)
459{
460 return type == REGOP(TYPE_GR_CTX) ||
461 type == REGOP(TYPE_GR_CTX_TPC) ||
462 type == REGOP(TYPE_GR_CTX_SM) ||
463 type == REGOP(TYPE_GR_CTX_CROP) ||
464 type == REGOP(TYPE_GR_CTX_ZROP) ||
465 type == REGOP(TYPE_GR_CTX_QUAD);
466}
467
468bool reg_op_is_read(u8 op)
469{
470 return op == REGOP(READ_32) ||
471 op == REGOP(READ_64);
472}
diff --git a/include/gk20a/regops_gk20a.h b/include/gk20a/regops_gk20a.h
new file mode 100644
index 0000000..9670587
--- /dev/null
+++ b/include/gk20a/regops_gk20a.h
@@ -0,0 +1,90 @@
1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops
3 *
4 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef REGOPS_GK20A_H
25#define REGOPS_GK20A_H
26
27/*
28 * Register operations
29 * All operations are targeted towards first channel
30 * attached to debug session
31 */
32/* valid op values */
33#define NVGPU_DBG_REG_OP_READ_32 (0x00000000)
34#define NVGPU_DBG_REG_OP_WRITE_32 (0x00000001)
35#define NVGPU_DBG_REG_OP_READ_64 (0x00000002)
36#define NVGPU_DBG_REG_OP_WRITE_64 (0x00000003)
37/* note: 8b ops are unsupported */
38#define NVGPU_DBG_REG_OP_READ_08 (0x00000004)
39#define NVGPU_DBG_REG_OP_WRITE_08 (0x00000005)
40
41/* valid type values */
42#define NVGPU_DBG_REG_OP_TYPE_GLOBAL (0x00000000)
43#define NVGPU_DBG_REG_OP_TYPE_GR_CTX (0x00000001)
44#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_TPC (0x00000002)
45#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_SM (0x00000004)
46#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_CROP (0x00000008)
47#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_ZROP (0x00000010)
48/*#define NVGPU_DBG_REG_OP_TYPE_FB (0x00000020)*/
49#define NVGPU_DBG_REG_OP_TYPE_GR_CTX_QUAD (0x00000040)
50
51/* valid status values */
52#define NVGPU_DBG_REG_OP_STATUS_SUCCESS (0x00000000)
53#define NVGPU_DBG_REG_OP_STATUS_INVALID_OP (0x00000001)
54#define NVGPU_DBG_REG_OP_STATUS_INVALID_TYPE (0x00000002)
55#define NVGPU_DBG_REG_OP_STATUS_INVALID_OFFSET (0x00000004)
56#define NVGPU_DBG_REG_OP_STATUS_UNSUPPORTED_OP (0x00000008)
57#define NVGPU_DBG_REG_OP_STATUS_INVALID_MASK (0x00000010)
58
59struct nvgpu_dbg_reg_op {
60 u8 op;
61 u8 type;
62 u8 status;
63 u8 quad;
64 u32 group_mask;
65 u32 sub_group_mask;
66 u32 offset;
67 u32 value_lo;
68 u32 value_hi;
69 u32 and_n_mask_lo;
70 u32 and_n_mask_hi;
71};
72
73struct regop_offset_range {
74 u32 base:24;
75 u32 count:8;
76};
77
78int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
79 struct nvgpu_dbg_reg_op *ops,
80 u64 num_ops,
81 bool *is_current_ctx);
82
83/* turn seriously unwieldy names -> something shorter */
84#define REGOP(x) NVGPU_DBG_REG_OP_##x
85
86bool reg_op_is_gr_ctx(u8 type);
87bool reg_op_is_read(u8 op);
88bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset);
89
90#endif /* REGOPS_GK20A_H */