aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/gk20a.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
commit01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch)
tree4ef34501728a087be24f4ba0af90f91486bf780b /include/gk20a/gk20a.c
parent306a03d18b305e4e573be3b2931978fa10679eb9 (diff)
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/gk20a/gk20a.c')
-rw-r--r--include/gk20a/gk20a.c590
1 files changed, 590 insertions, 0 deletions
diff --git a/include/gk20a/gk20a.c b/include/gk20a/gk20a.c
new file mode 100644
index 0000000..c3068b7
--- /dev/null
+++ b/include/gk20a/gk20a.c
@@ -0,0 +1,590 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/nvgpu_common.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/allocator.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/soc.h>
30#include <nvgpu/enabled.h>
31#include <nvgpu/pmu.h>
32#include <nvgpu/gmmu.h>
33#include <nvgpu/ltc.h>
34#include <nvgpu/vidmem.h>
35#include <nvgpu/mm.h>
36#include <nvgpu/ctxsw_trace.h>
37#include <nvgpu/soc.h>
38#include <nvgpu/clk_arb.h>
39#include <nvgpu/therm.h>
40#include <nvgpu/mc.h>
41#include <nvgpu/channel_sync.h>
42
43#include <trace/events/gk20a.h>
44
45#include "gk20a.h"
46
47#include "dbg_gpu_gk20a.h"
48#include "pstate/pstate.h"
49
50void __nvgpu_check_gpu_state(struct gk20a *g)
51{
52 u32 boot_0 = 0xffffffff;
53
54 boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL);
55 if (boot_0 == 0xffffffff) {
56 nvgpu_err(g, "GPU has disappeared from bus!!");
57 nvgpu_err(g, "Rebooting system!!");
58 nvgpu_kernel_restart(NULL);
59 }
60}
61
62void __gk20a_warn_on_no_regs(void)
63{
64 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
65}
66
67static void gk20a_mask_interrupts(struct gk20a *g)
68{
69 if (g->ops.mc.intr_mask != NULL) {
70 g->ops.mc.intr_mask(g);
71 }
72
73 if (g->ops.mc.log_pending_intrs != NULL) {
74 g->ops.mc.log_pending_intrs(g);
75 }
76}
77
78int gk20a_prepare_poweroff(struct gk20a *g)
79{
80 int ret = 0;
81
82 nvgpu_log_fn(g, " ");
83
84 if (g->ops.fifo.channel_suspend) {
85 ret = g->ops.fifo.channel_suspend(g);
86 if (ret) {
87 return ret;
88 }
89 }
90
91 /* disable elpg before gr or fifo suspend */
92 if (g->ops.pmu.is_pmu_supported(g)) {
93 ret |= nvgpu_pmu_destroy(g);
94 }
95
96 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
97 ret |= nvgpu_sec2_destroy(g);
98 }
99
100 ret |= gk20a_gr_suspend(g);
101 ret |= nvgpu_mm_suspend(g);
102 ret |= gk20a_fifo_suspend(g);
103
104 gk20a_ce_suspend(g);
105
106 /* Disable GPCPLL */
107 if (g->ops.clk.suspend_clk_support) {
108 ret |= g->ops.clk.suspend_clk_support(g);
109 }
110
111 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
112 gk20a_deinit_pstate_support(g);
113 }
114
115 gk20a_mask_interrupts(g);
116
117 g->power_on = false;
118
119 return ret;
120}
121
122int gk20a_finalize_poweron(struct gk20a *g)
123{
124 int err = 0;
125#if defined(CONFIG_TEGRA_GK20A_NVHOST)
126 u32 nr_pages;
127#endif
128
129 u32 fuse_status;
130
131 nvgpu_log_fn(g, " ");
132
133 if (g->power_on) {
134 return 0;
135 }
136
137 g->power_on = true;
138
139 /*
140 * Before probing the GPU make sure the GPU's state is cleared. This is
141 * relevant for rebind operations.
142 */
143 if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
144 g->ops.xve.reset_gpu(g);
145 g->gpu_reset_done = true;
146 }
147
148 if (g->ops.clock_gating.slcg_acb_load_gating_prod != NULL) {
149 g->ops.clock_gating.slcg_acb_load_gating_prod(g, true);
150 }
151
152 /*
153 * Do this early so any early VMs that get made are capable of mapping
154 * buffers.
155 */
156 err = nvgpu_pd_cache_init(g);
157 if (err) {
158 return err;
159 }
160
161 /* init interface layer support for PMU falcon */
162 err = nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
163 if (err != 0) {
164 nvgpu_err(g, "failed to sw init FALCON_ID_PMU");
165 goto done;
166 }
167 err = nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
168 if (err != 0) {
169 nvgpu_err(g, "failed to sw init FALCON_ID_SEC2");
170 goto done;
171 }
172 err = nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
173 if (err != 0) {
174 nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC");
175 goto done;
176 }
177 err = nvgpu_flcn_sw_init(g, FALCON_ID_GSPLITE);
178 if (err != 0) {
179 nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE");
180 goto done;
181 }
182
183 if (g->ops.acr.acr_sw_init != NULL &&
184 nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
185 g->ops.acr.acr_sw_init(g, &g->acr);
186 }
187
188 if (g->ops.bios.init) {
189 err = g->ops.bios.init(g);
190 }
191 if (err) {
192 goto done;
193 }
194
195 g->ops.bus.init_hw(g);
196
197 if (g->ops.clk.disable_slowboot) {
198 g->ops.clk.disable_slowboot(g);
199 }
200
201 g->ops.priv_ring.enable_priv_ring(g);
202
203 /* TBD: move this after graphics init in which blcg/slcg is enabled.
204 This function removes SlowdownOnBoot which applies 32x divider
205 on gpcpll bypass path. The purpose of slowdown is to save power
206 during boot but it also significantly slows down gk20a init on
207 simulation and emulation. We should remove SOB after graphics power
208 saving features (blcg/slcg) are enabled. For now, do it here. */
209 if (g->ops.clk.init_clk_support) {
210 err = g->ops.clk.init_clk_support(g);
211 if (err) {
212 nvgpu_err(g, "failed to init gk20a clk");
213 goto done;
214 }
215 }
216
217 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
218 err = g->ops.nvlink.init(g);
219 if (err) {
220 nvgpu_err(g, "failed to init nvlink");
221 goto done;
222 }
223 }
224
225 if (g->ops.fb.init_fbpa) {
226 err = g->ops.fb.init_fbpa(g);
227 if (err) {
228 nvgpu_err(g, "failed to init fbpa");
229 goto done;
230 }
231 }
232
233 if (g->ops.fb.mem_unlock) {
234 err = g->ops.fb.mem_unlock(g);
235 if (err) {
236 nvgpu_err(g, "failed to unlock memory");
237 goto done;
238 }
239 }
240
241 err = g->ops.fifo.reset_enable_hw(g);
242
243 if (err) {
244 nvgpu_err(g, "failed to reset gk20a fifo");
245 goto done;
246 }
247
248 err = nvgpu_init_ltc_support(g);
249 if (err) {
250 nvgpu_err(g, "failed to init ltc");
251 goto done;
252 }
253
254 err = nvgpu_init_mm_support(g);
255 if (err) {
256 nvgpu_err(g, "failed to init gk20a mm");
257 goto done;
258 }
259
260 err = gk20a_init_fifo_support(g);
261 if (err) {
262 nvgpu_err(g, "failed to init gk20a fifo");
263 goto done;
264 }
265
266 if (g->ops.therm.elcg_init_idle_filters) {
267 g->ops.therm.elcg_init_idle_filters(g);
268 }
269
270 g->ops.mc.intr_enable(g);
271
272 /*
273 * Power gate the chip as per the TPC PG mask
274 * and the fuse_status register.
275 * If TPC PG mask is invalid halt the GPU poweron.
276 */
277 g->can_tpc_powergate = false;
278 fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0);
279
280 if (g->ops.tpc.tpc_powergate) {
281 err = g->ops.tpc.tpc_powergate(g, fuse_status);
282 }
283
284 if (err) {
285 nvgpu_err(g, "failed to power ON GPU");
286 goto done;
287 }
288
289 nvgpu_mutex_acquire(&g->tpc_pg_lock);
290
291 if (g->can_tpc_powergate) {
292 if (g->ops.gr.powergate_tpc != NULL)
293 g->ops.gr.powergate_tpc(g);
294 }
295
296 err = gk20a_enable_gr_hw(g);
297 if (err) {
298 nvgpu_err(g, "failed to enable gr");
299 nvgpu_mutex_release(&g->tpc_pg_lock);
300 goto done;
301 }
302
303 if (g->ops.pmu.is_pmu_supported(g)) {
304 if (g->ops.pmu.prepare_ucode) {
305 err = g->ops.pmu.prepare_ucode(g);
306 }
307 if (err) {
308 nvgpu_err(g, "failed to init pmu ucode");
309 nvgpu_mutex_release(&g->tpc_pg_lock);
310 goto done;
311 }
312 }
313
314 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
315 err = gk20a_init_pstate_support(g);
316 if (err) {
317 nvgpu_err(g, "failed to init pstates");
318 nvgpu_mutex_release(&g->tpc_pg_lock);
319 goto done;
320 }
321 }
322
323 if (g->acr.bootstrap_hs_acr != NULL &&
324 nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
325 err = g->acr.bootstrap_hs_acr(g, &g->acr, &g->acr.acr);
326 if (err != 0) {
327 nvgpu_err(g, "ACR bootstrap failed");
328 nvgpu_mutex_release(&g->tpc_pg_lock);
329 goto done;
330 }
331 }
332
333 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
334 err = nvgpu_init_sec2_support(g);
335 if (err != 0) {
336 nvgpu_err(g, "failed to init sec2");
337 nvgpu_mutex_release(&g->tpc_pg_lock);
338 goto done;
339 }
340 }
341
342 if (g->ops.pmu.is_pmu_supported(g)) {
343 err = nvgpu_init_pmu_support(g);
344 if (err) {
345 nvgpu_err(g, "failed to init gk20a pmu");
346 nvgpu_mutex_release(&g->tpc_pg_lock);
347 goto done;
348 }
349 }
350
351 err = gk20a_init_gr_support(g);
352 if (err) {
353 nvgpu_err(g, "failed to init gk20a gr");
354 nvgpu_mutex_release(&g->tpc_pg_lock);
355 goto done;
356 }
357
358 nvgpu_mutex_release(&g->tpc_pg_lock);
359
360 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
361 err = gk20a_init_pstate_pmu_support(g);
362 if (err) {
363 nvgpu_err(g, "failed to init pstates");
364 goto done;
365 }
366 }
367
368 if (g->ops.pmu_ver.clk.clk_set_boot_clk && nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
369 g->ops.pmu_ver.clk.clk_set_boot_clk(g);
370 } else {
371 err = nvgpu_clk_arb_init_arbiter(g);
372 if (err) {
373 nvgpu_err(g, "failed to init clk arb");
374 goto done;
375 }
376 }
377
378 err = nvgpu_init_therm_support(g);
379 if (err) {
380 nvgpu_err(g, "failed to init gk20a therm");
381 goto done;
382 }
383
384 err = g->ops.chip_init_gpu_characteristics(g);
385 if (err) {
386 nvgpu_err(g, "failed to init gk20a gpu characteristics");
387 goto done;
388 }
389
390#ifdef CONFIG_GK20A_CTXSW_TRACE
391 err = gk20a_ctxsw_trace_init(g);
392 if (err)
393 nvgpu_warn(g, "could not initialize ctxsw tracing");
394#endif
395
396 /* Restore the debug setting */
397 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
398
399 gk20a_init_ce_support(g);
400
401 if (g->ops.xve.available_speeds) {
402 u32 speed;
403
404 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) {
405 g->ops.xve.disable_aspm(g);
406 }
407
408 g->ops.xve.available_speeds(g, &speed);
409
410 /* Set to max speed */
411 speed = 1 << (fls(speed) - 1);
412 err = g->ops.xve.set_speed(g, speed);
413 if (err) {
414 nvgpu_err(g, "Failed to set PCIe bus speed!");
415 goto done;
416 }
417 }
418
419#if defined(CONFIG_TEGRA_GK20A_NVHOST)
420 if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) {
421 if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
422 nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
423 __nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
424 g->syncpt_unit_base, nr_pages);
425 }
426 }
427#endif
428
429 if (g->ops.fifo.channel_resume) {
430 g->ops.fifo.channel_resume(g);
431 }
432
433done:
434 if (err) {
435 g->power_on = false;
436 }
437
438 return err;
439}
440
441int gk20a_wait_for_idle(struct gk20a *g)
442{
443 int wait_length = 150; /* 3 second overall max wait. */
444 int target_usage_count = 0;
445
446 if (!g) {
447 return -ENODEV;
448 }
449
450 while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
451 && (wait_length-- >= 0)) {
452 nvgpu_msleep(20);
453 }
454
455 if (wait_length < 0) {
456 nvgpu_warn(g, "Timed out waiting for idle (%d)!\n",
457 nvgpu_atomic_read(&g->usage_count));
458 return -ETIMEDOUT;
459 }
460
461 return 0;
462}
463
464int gk20a_init_gpu_characteristics(struct gk20a *g)
465{
466 __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
467 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
468 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
469
470 if (IS_ENABLED(CONFIG_SYNC)) {
471 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
472 }
473
474 if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) {
475 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
476 }
477
478 /*
479 * Fast submits are supported as long as the user doesn't request
480 * anything that depends on job tracking. (Here, fast means strictly no
481 * metadata, just the gpfifo contents are copied and gp_put updated).
482 */
483 __nvgpu_set_enabled(g,
484 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
485 true);
486
487 /*
488 * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
489 * and other heavy stuff, which prevents deterministic submits. This is
490 * supported otherwise, provided that the user doesn't request anything
491 * that depends on deferred cleanup.
492 */
493 if (!nvgpu_channel_sync_needs_os_fence_framework(g)) {
494 __nvgpu_set_enabled(g,
495 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
496 true);
497 }
498
499 __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
500
501 __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
502 __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
503
504 if (g->ops.clk_arb.get_arbiter_clk_domains != NULL &&
505 g->ops.clk.support_clk_freq_controller) {
506 __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
507 }
508
509 g->ops.gr.detect_sm_arch(g);
510
511 if (g->ops.gr.init_cyclestats) {
512 g->ops.gr.init_cyclestats(g);
513 }
514
515 g->ops.gr.get_rop_l2_en_mask(g);
516
517 return 0;
518}
519
520/*
521 * Free the gk20a struct.
522 */
523static void gk20a_free_cb(struct nvgpu_ref *refcount)
524{
525 struct gk20a *g = container_of(refcount,
526 struct gk20a, refcount);
527
528 nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!");
529
530 gk20a_ce_destroy(g);
531
532 if (g->remove_support) {
533 g->remove_support(g);
534 }
535
536 if (g->free) {
537 g->free(g);
538 }
539}
540
541/**
542 * gk20a_get() - Increment ref count on driver
543 *
544 * @g The driver to increment
545 * This will fail if the driver is in the process of being released. In that
546 * case it will return NULL. Otherwise a pointer to the driver passed in will
547 * be returned.
548 */
549struct gk20a * __must_check gk20a_get(struct gk20a *g)
550{
551 int success;
552
553 /*
554 * Handle the possibility we are still freeing the gk20a struct while
555 * gk20a_get() is called. Unlikely but plausible race condition. Ideally
556 * the code will never be in such a situation that this race is
557 * possible.
558 */
559 success = nvgpu_ref_get_unless_zero(&g->refcount);
560
561 nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s",
562 nvgpu_atomic_read(&g->refcount.refcount),
563 success ? "" : "(FAILED)");
564
565 return success ? g : NULL;
566}
567
568/**
569 * gk20a_put() - Decrement ref count on driver
570 *
571 * @g - The driver to decrement
572 *
573 * Decrement the driver ref-count. If neccesary also free the underlying driver
574 * memory
575 */
576void gk20a_put(struct gk20a *g)
577{
578 /*
579 * Note - this is racy, two instances of this could run before the
580 * actual kref_put(0 runs, you could see something like:
581 *
582 * ... PUT: refs currently 2
583 * ... PUT: refs currently 2
584 * ... Freeing GK20A struct!
585 */
586 nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d",
587 nvgpu_atomic_read(&g->refcount.refcount));
588
589 nvgpu_ref_put(&g->refcount, gk20a_free_cb);
590}