summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c503
1 files changed, 503 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
new file mode 100644
index 00000000..5dc60917
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -0,0 +1,503 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <linux/reboot.h>
26
27#include <nvgpu/nvgpu_common.h>
28#include <nvgpu/kmem.h>
29#include <nvgpu/allocator.h>
30#include <nvgpu/timers.h>
31#include <nvgpu/soc.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/pmu.h>
34#include <nvgpu/gmmu.h>
35#include <nvgpu/ltc.h>
36#include <nvgpu/vidmem.h>
37#include <nvgpu/mm.h>
38#include <nvgpu/ctxsw_trace.h>
39
40#include <trace/events/gk20a.h>
41
42#include "gk20a.h"
43#include "channel_sync_gk20a.h"
44
45#include "dbg_gpu_gk20a.h"
46#include "mc_gk20a.h"
47#include "hal.h"
48#include "vgpu/vgpu.h"
49#include "bus_gk20a.h"
50#ifdef CONFIG_ARCH_TEGRA_18x_SOC
51#include "pstate/pstate.h"
52#endif
53
54#ifdef CONFIG_TEGRA_19x_GPU
55#include "nvgpu_gpuid_t19x.h"
56#endif
57
58void __nvgpu_check_gpu_state(struct gk20a *g)
59{
60 u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL);
61
62 if (boot_0 == 0xffffffff) {
63 pr_err("nvgpu: GPU has disappeared from bus!!\n");
64 pr_err("nvgpu: Rebooting system!!\n");
65 kernel_restart(NULL);
66 }
67}
68
69void __gk20a_warn_on_no_regs(void)
70{
71 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
72}
73
74static int gk20a_detect_chip(struct gk20a *g)
75{
76 struct nvgpu_gpu_params *p = &g->params;
77 u32 val;
78
79 if (p->gpu_arch)
80 return 0;
81
82 val = gk20a_mc_boot_0(g, &p->gpu_arch, &p->gpu_impl, &p->gpu_rev);
83
84 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
85 g->params.gpu_arch,
86 g->params.gpu_impl,
87 g->params.gpu_rev);
88
89 return gpu_init_hal(g);
90}
91
92int gk20a_prepare_poweroff(struct gk20a *g)
93{
94 int ret = 0;
95
96 gk20a_dbg_fn("");
97
98 ret = gk20a_channel_suspend(g);
99 if (ret)
100 return ret;
101
102 /* disable elpg before gr or fifo suspend */
103 if (g->ops.pmu.is_pmu_supported(g))
104 ret |= nvgpu_pmu_destroy(g);
105
106 ret |= gk20a_gr_suspend(g);
107 ret |= nvgpu_mm_suspend(g);
108 ret |= gk20a_fifo_suspend(g);
109
110 gk20a_ce_suspend(g);
111
112 /* Disable GPCPLL */
113 if (g->ops.clk.suspend_clk_support)
114 ret |= g->ops.clk.suspend_clk_support(g);
115
116#ifdef CONFIG_ARCH_TEGRA_18x_SOC
117 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE))
118 gk20a_deinit_pstate_support(g);
119#endif
120 g->power_on = false;
121
122 return ret;
123}
124
125int gk20a_finalize_poweron(struct gk20a *g)
126{
127 int err;
128#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
129 u32 nr_pages;
130#endif
131
132 gk20a_dbg_fn("");
133
134 if (g->power_on)
135 return 0;
136
137 g->power_on = true;
138
139 err = gk20a_detect_chip(g);
140 if (err)
141 goto done;
142
143 /*
144 * Before probing the GPU make sure the GPU's state is cleared. This is
145 * relevant for rebind operations.
146 */
147 if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
148 g->ops.xve.reset_gpu(g);
149 g->gpu_reset_done = true;
150 }
151
152 /*
153 * Do this early so any early VMs that get made are capable of mapping
154 * buffers.
155 */
156 err = nvgpu_pd_cache_init(g);
157 if (err)
158 return err;
159
160 /* init interface layer support for PMU falcon */
161 nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
162 nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
163 nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
164
165 if (g->ops.bios.init)
166 err = g->ops.bios.init(g);
167 if (err)
168 goto done;
169
170 g->ops.bus.init_hw(g);
171
172 if (g->ops.clk.disable_slowboot)
173 g->ops.clk.disable_slowboot(g);
174
175 gk20a_enable_priv_ring(g);
176
177 /* TBD: move this after graphics init in which blcg/slcg is enabled.
178 This function removes SlowdownOnBoot which applies 32x divider
179 on gpcpll bypass path. The purpose of slowdown is to save power
180 during boot but it also significantly slows down gk20a init on
181 simulation and emulation. We should remove SOB after graphics power
182 saving features (blcg/slcg) are enabled. For now, do it here. */
183 if (g->ops.clk.init_clk_support) {
184 err = g->ops.clk.init_clk_support(g);
185 if (err) {
186 nvgpu_err(g, "failed to init gk20a clk");
187 goto done;
188 }
189 }
190
191 if (g->ops.fb.mem_unlock) {
192 err = g->ops.fb.mem_unlock(g);
193 if (err) {
194 nvgpu_err(g, "failed to unlock memory");
195 goto done;
196 }
197 }
198
199 err = g->ops.fifo.reset_enable_hw(g);
200
201 if (err) {
202 nvgpu_err(g, "failed to reset gk20a fifo");
203 goto done;
204 }
205
206 err = nvgpu_init_ltc_support(g);
207 if (err) {
208 nvgpu_err(g, "failed to init ltc");
209 goto done;
210 }
211
212 err = nvgpu_init_mm_support(g);
213 if (err) {
214 nvgpu_err(g, "failed to init gk20a mm");
215 goto done;
216 }
217
218 err = gk20a_init_fifo_support(g);
219 if (err) {
220 nvgpu_err(g, "failed to init gk20a fifo");
221 goto done;
222 }
223
224 if (g->ops.therm.elcg_init_idle_filters)
225 g->ops.therm.elcg_init_idle_filters(g);
226
227 g->ops.mc.intr_enable(g);
228
229 err = gk20a_enable_gr_hw(g);
230 if (err) {
231 nvgpu_err(g, "failed to enable gr");
232 goto done;
233 }
234
235 if (g->ops.pmu.is_pmu_supported(g)) {
236 if (g->ops.pmu.prepare_ucode)
237 err = g->ops.pmu.prepare_ucode(g);
238 if (err) {
239 nvgpu_err(g, "failed to init pmu ucode");
240 goto done;
241 }
242 }
243
244#ifdef CONFIG_ARCH_TEGRA_18x_SOC
245 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
246 err = gk20a_init_pstate_support(g);
247 if (err) {
248 nvgpu_err(g, "failed to init pstates");
249 goto done;
250 }
251 }
252#endif
253
254 if (g->ops.pmu.is_pmu_supported(g)) {
255 err = nvgpu_init_pmu_support(g);
256 if (err) {
257 nvgpu_err(g, "failed to init gk20a pmu");
258 goto done;
259 }
260 }
261
262 err = gk20a_init_gr_support(g);
263 if (err) {
264 nvgpu_err(g, "failed to init gk20a gr");
265 goto done;
266 }
267
268#ifdef CONFIG_ARCH_TEGRA_18x_SOC
269 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
270 err = gk20a_init_pstate_pmu_support(g);
271 if (err) {
272 nvgpu_err(g, "failed to init pstates");
273 goto done;
274 }
275 }
276
277 err = nvgpu_clk_arb_init_arbiter(g);
278 if (err) {
279 nvgpu_err(g, "failed to init clk arb");
280 goto done;
281 }
282#endif
283
284 err = gk20a_init_therm_support(g);
285 if (err) {
286 nvgpu_err(g, "failed to init gk20a therm");
287 goto done;
288 }
289
290 err = g->ops.chip_init_gpu_characteristics(g);
291 if (err) {
292 nvgpu_err(g, "failed to init gk20a gpu characteristics");
293 goto done;
294 }
295
296#ifdef CONFIG_GK20A_CTXSW_TRACE
297 err = gk20a_ctxsw_trace_init(g);
298 if (err)
299 nvgpu_warn(g, "could not initialize ctxsw tracing");
300#endif
301
302 /* Restore the debug setting */
303 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
304
305 gk20a_channel_resume(g);
306
307 gk20a_init_ce_support(g);
308
309 nvgpu_init_mm_ce_context(g);
310
311 if (g->ops.xve.available_speeds) {
312 u32 speed;
313
314 if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm)
315 g->ops.xve.disable_aspm(g);
316
317 g->ops.xve.available_speeds(g, &speed);
318
319 /* Set to max speed */
320 speed = 1 << (fls(speed) - 1);
321 err = g->ops.xve.set_speed(g, speed);
322 if (err) {
323 nvgpu_err(g, "Failed to set PCIe bus speed!");
324 goto done;
325 }
326 }
327
328 nvgpu_vidmem_thread_unpause(&g->mm);
329
330#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
331 if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) {
332 if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
333 nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
334 __nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
335 g->syncpt_unit_base, nr_pages);
336 }
337 }
338#endif
339
340done:
341 if (err)
342 g->power_on = false;
343
344 return err;
345}
346
347/*
348 * Check if the device can go busy. Basically if the driver is currently
349 * in the process of dying then do not let new places make the driver busy.
350 */
351int gk20a_can_busy(struct gk20a *g)
352{
353 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
354 return 0;
355 return 1;
356}
357
358int gk20a_wait_for_idle(struct gk20a *g)
359{
360 int wait_length = 150; /* 3 second overall max wait. */
361 int target_usage_count = 0;
362
363 if (!g)
364 return -ENODEV;
365
366 if (g->user_railgate_disabled)
367 target_usage_count = 1;
368
369 while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
370 && (wait_length-- >= 0))
371 nvgpu_msleep(20);
372
373 if (wait_length < 0) {
374 pr_warn("%s: Timed out waiting for idle (%d)!\n",
375 __func__, nvgpu_atomic_read(&g->usage_count));
376 return -ETIMEDOUT;
377 }
378
379 return 0;
380}
381
382int gk20a_init_gpu_characteristics(struct gk20a *g)
383{
384 __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
385 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
386 __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
387
388 if (IS_ENABLED(CONFIG_SYNC))
389 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
390
391 if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g))
392 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
393
394 if (gk20a_platform_has_syncpoints(g))
395 __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, true);
396
397 /*
398 * Fast submits are supported as long as the user doesn't request
399 * anything that depends on job tracking. (Here, fast means strictly no
400 * metadata, just the gpfifo contents are copied and gp_put updated).
401 */
402 __nvgpu_set_enabled(g,
403 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
404 true);
405
406 /*
407 * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
408 * and other heavy stuff, which prevents deterministic submits. This is
409 * supported otherwise, provided that the user doesn't request anything
410 * that depends on deferred cleanup.
411 */
412 if (!gk20a_channel_sync_needs_sync_framework(g))
413 __nvgpu_set_enabled(g,
414 NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
415 true);
416
417 __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
418
419 __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
420 __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
421
422 if (g->ops.clk_arb.get_arbiter_clk_domains)
423 __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
424
425 g->ops.gr.detect_sm_arch(g);
426
427 if (g->ops.gr.init_cyclestats)
428 g->ops.gr.init_cyclestats(g);
429
430 g->ops.gr.get_rop_l2_en_mask(g);
431
432 return 0;
433}
434
435/*
436 * Free the gk20a struct.
437 */
438static void gk20a_free_cb(struct nvgpu_ref *refcount)
439{
440 struct gk20a *g = container_of(refcount,
441 struct gk20a, refcount);
442
443 gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!");
444
445 gk20a_ce_destroy(g);
446
447 if (g->remove_support)
448 g->remove_support(g);
449
450 if (g->free)
451 g->free(g);
452}
453
454/**
455 * gk20a_get() - Increment ref count on driver
456 *
457 * @g The driver to increment
458 * This will fail if the driver is in the process of being released. In that
459 * case it will return NULL. Otherwise a pointer to the driver passed in will
460 * be returned.
461 */
462struct gk20a * __must_check gk20a_get(struct gk20a *g)
463{
464 int success;
465
466 /*
467 * Handle the possibility we are still freeing the gk20a struct while
468 * gk20a_get() is called. Unlikely but plausible race condition. Ideally
469 * the code will never be in such a situation that this race is
470 * possible.
471 */
472 success = nvgpu_ref_get_unless_zero(&g->refcount);
473
474 gk20a_dbg(gpu_dbg_shutdown, "GET: refs currently %d %s",
475 nvgpu_atomic_read(&g->refcount.refcount),
476 success ? "" : "(FAILED)");
477
478 return success ? g : NULL;
479}
480
481/**
482 * gk20a_put() - Decrement ref count on driver
483 *
484 * @g - The driver to decrement
485 *
486 * Decrement the driver ref-count. If neccesary also free the underlying driver
487 * memory
488 */
489void gk20a_put(struct gk20a *g)
490{
491 /*
492 * Note - this is racy, two instances of this could run before the
493 * actual kref_put(0 runs, you could see something like:
494 *
495 * ... PUT: refs currently 2
496 * ... PUT: refs currently 2
497 * ... Freeing GK20A struct!
498 */
499 gk20a_dbg(gpu_dbg_shutdown, "PUT: refs currently %d",
500 nvgpu_atomic_read(&g->refcount.refcount));
501
502 nvgpu_ref_put(&g->refcount, gk20a_free_cb);
503}