diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c new file mode 100644 index 00000000..5dc60917 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -0,0 +1,503 @@ | |||
1 | /* | ||
2 | * GK20A Graphics | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/reboot.h> | ||
26 | |||
27 | #include <nvgpu/nvgpu_common.h> | ||
28 | #include <nvgpu/kmem.h> | ||
29 | #include <nvgpu/allocator.h> | ||
30 | #include <nvgpu/timers.h> | ||
31 | #include <nvgpu/soc.h> | ||
32 | #include <nvgpu/enabled.h> | ||
33 | #include <nvgpu/pmu.h> | ||
34 | #include <nvgpu/gmmu.h> | ||
35 | #include <nvgpu/ltc.h> | ||
36 | #include <nvgpu/vidmem.h> | ||
37 | #include <nvgpu/mm.h> | ||
38 | #include <nvgpu/ctxsw_trace.h> | ||
39 | |||
40 | #include <trace/events/gk20a.h> | ||
41 | |||
42 | #include "gk20a.h" | ||
43 | #include "channel_sync_gk20a.h" | ||
44 | |||
45 | #include "dbg_gpu_gk20a.h" | ||
46 | #include "mc_gk20a.h" | ||
47 | #include "hal.h" | ||
48 | #include "vgpu/vgpu.h" | ||
49 | #include "bus_gk20a.h" | ||
50 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
51 | #include "pstate/pstate.h" | ||
52 | #endif | ||
53 | |||
54 | #ifdef CONFIG_TEGRA_19x_GPU | ||
55 | #include "nvgpu_gpuid_t19x.h" | ||
56 | #endif | ||
57 | |||
58 | void __nvgpu_check_gpu_state(struct gk20a *g) | ||
59 | { | ||
60 | u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); | ||
61 | |||
62 | if (boot_0 == 0xffffffff) { | ||
63 | pr_err("nvgpu: GPU has disappeared from bus!!\n"); | ||
64 | pr_err("nvgpu: Rebooting system!!\n"); | ||
65 | kernel_restart(NULL); | ||
66 | } | ||
67 | } | ||
68 | |||
69 | void __gk20a_warn_on_no_regs(void) | ||
70 | { | ||
71 | WARN_ONCE(1, "Attempted access to GPU regs after unmapping!"); | ||
72 | } | ||
73 | |||
74 | static int gk20a_detect_chip(struct gk20a *g) | ||
75 | { | ||
76 | struct nvgpu_gpu_params *p = &g->params; | ||
77 | u32 val; | ||
78 | |||
79 | if (p->gpu_arch) | ||
80 | return 0; | ||
81 | |||
82 | val = gk20a_mc_boot_0(g, &p->gpu_arch, &p->gpu_impl, &p->gpu_rev); | ||
83 | |||
84 | gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", | ||
85 | g->params.gpu_arch, | ||
86 | g->params.gpu_impl, | ||
87 | g->params.gpu_rev); | ||
88 | |||
89 | return gpu_init_hal(g); | ||
90 | } | ||
91 | |||
92 | int gk20a_prepare_poweroff(struct gk20a *g) | ||
93 | { | ||
94 | int ret = 0; | ||
95 | |||
96 | gk20a_dbg_fn(""); | ||
97 | |||
98 | ret = gk20a_channel_suspend(g); | ||
99 | if (ret) | ||
100 | return ret; | ||
101 | |||
102 | /* disable elpg before gr or fifo suspend */ | ||
103 | if (g->ops.pmu.is_pmu_supported(g)) | ||
104 | ret |= nvgpu_pmu_destroy(g); | ||
105 | |||
106 | ret |= gk20a_gr_suspend(g); | ||
107 | ret |= nvgpu_mm_suspend(g); | ||
108 | ret |= gk20a_fifo_suspend(g); | ||
109 | |||
110 | gk20a_ce_suspend(g); | ||
111 | |||
112 | /* Disable GPCPLL */ | ||
113 | if (g->ops.clk.suspend_clk_support) | ||
114 | ret |= g->ops.clk.suspend_clk_support(g); | ||
115 | |||
116 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
117 | if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) | ||
118 | gk20a_deinit_pstate_support(g); | ||
119 | #endif | ||
120 | g->power_on = false; | ||
121 | |||
122 | return ret; | ||
123 | } | ||
124 | |||
125 | int gk20a_finalize_poweron(struct gk20a *g) | ||
126 | { | ||
127 | int err; | ||
128 | #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) | ||
129 | u32 nr_pages; | ||
130 | #endif | ||
131 | |||
132 | gk20a_dbg_fn(""); | ||
133 | |||
134 | if (g->power_on) | ||
135 | return 0; | ||
136 | |||
137 | g->power_on = true; | ||
138 | |||
139 | err = gk20a_detect_chip(g); | ||
140 | if (err) | ||
141 | goto done; | ||
142 | |||
143 | /* | ||
144 | * Before probing the GPU make sure the GPU's state is cleared. This is | ||
145 | * relevant for rebind operations. | ||
146 | */ | ||
147 | if (g->ops.xve.reset_gpu && !g->gpu_reset_done) { | ||
148 | g->ops.xve.reset_gpu(g); | ||
149 | g->gpu_reset_done = true; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Do this early so any early VMs that get made are capable of mapping | ||
154 | * buffers. | ||
155 | */ | ||
156 | err = nvgpu_pd_cache_init(g); | ||
157 | if (err) | ||
158 | return err; | ||
159 | |||
160 | /* init interface layer support for PMU falcon */ | ||
161 | nvgpu_flcn_sw_init(g, FALCON_ID_PMU); | ||
162 | nvgpu_flcn_sw_init(g, FALCON_ID_SEC2); | ||
163 | nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC); | ||
164 | |||
165 | if (g->ops.bios.init) | ||
166 | err = g->ops.bios.init(g); | ||
167 | if (err) | ||
168 | goto done; | ||
169 | |||
170 | g->ops.bus.init_hw(g); | ||
171 | |||
172 | if (g->ops.clk.disable_slowboot) | ||
173 | g->ops.clk.disable_slowboot(g); | ||
174 | |||
175 | gk20a_enable_priv_ring(g); | ||
176 | |||
177 | /* TBD: move this after graphics init in which blcg/slcg is enabled. | ||
178 | This function removes SlowdownOnBoot which applies 32x divider | ||
179 | on gpcpll bypass path. The purpose of slowdown is to save power | ||
180 | during boot but it also significantly slows down gk20a init on | ||
181 | simulation and emulation. We should remove SOB after graphics power | ||
182 | saving features (blcg/slcg) are enabled. For now, do it here. */ | ||
183 | if (g->ops.clk.init_clk_support) { | ||
184 | err = g->ops.clk.init_clk_support(g); | ||
185 | if (err) { | ||
186 | nvgpu_err(g, "failed to init gk20a clk"); | ||
187 | goto done; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | if (g->ops.fb.mem_unlock) { | ||
192 | err = g->ops.fb.mem_unlock(g); | ||
193 | if (err) { | ||
194 | nvgpu_err(g, "failed to unlock memory"); | ||
195 | goto done; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | err = g->ops.fifo.reset_enable_hw(g); | ||
200 | |||
201 | if (err) { | ||
202 | nvgpu_err(g, "failed to reset gk20a fifo"); | ||
203 | goto done; | ||
204 | } | ||
205 | |||
206 | err = nvgpu_init_ltc_support(g); | ||
207 | if (err) { | ||
208 | nvgpu_err(g, "failed to init ltc"); | ||
209 | goto done; | ||
210 | } | ||
211 | |||
212 | err = nvgpu_init_mm_support(g); | ||
213 | if (err) { | ||
214 | nvgpu_err(g, "failed to init gk20a mm"); | ||
215 | goto done; | ||
216 | } | ||
217 | |||
218 | err = gk20a_init_fifo_support(g); | ||
219 | if (err) { | ||
220 | nvgpu_err(g, "failed to init gk20a fifo"); | ||
221 | goto done; | ||
222 | } | ||
223 | |||
224 | if (g->ops.therm.elcg_init_idle_filters) | ||
225 | g->ops.therm.elcg_init_idle_filters(g); | ||
226 | |||
227 | g->ops.mc.intr_enable(g); | ||
228 | |||
229 | err = gk20a_enable_gr_hw(g); | ||
230 | if (err) { | ||
231 | nvgpu_err(g, "failed to enable gr"); | ||
232 | goto done; | ||
233 | } | ||
234 | |||
235 | if (g->ops.pmu.is_pmu_supported(g)) { | ||
236 | if (g->ops.pmu.prepare_ucode) | ||
237 | err = g->ops.pmu.prepare_ucode(g); | ||
238 | if (err) { | ||
239 | nvgpu_err(g, "failed to init pmu ucode"); | ||
240 | goto done; | ||
241 | } | ||
242 | } | ||
243 | |||
244 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
245 | if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { | ||
246 | err = gk20a_init_pstate_support(g); | ||
247 | if (err) { | ||
248 | nvgpu_err(g, "failed to init pstates"); | ||
249 | goto done; | ||
250 | } | ||
251 | } | ||
252 | #endif | ||
253 | |||
254 | if (g->ops.pmu.is_pmu_supported(g)) { | ||
255 | err = nvgpu_init_pmu_support(g); | ||
256 | if (err) { | ||
257 | nvgpu_err(g, "failed to init gk20a pmu"); | ||
258 | goto done; | ||
259 | } | ||
260 | } | ||
261 | |||
262 | err = gk20a_init_gr_support(g); | ||
263 | if (err) { | ||
264 | nvgpu_err(g, "failed to init gk20a gr"); | ||
265 | goto done; | ||
266 | } | ||
267 | |||
268 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
269 | if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { | ||
270 | err = gk20a_init_pstate_pmu_support(g); | ||
271 | if (err) { | ||
272 | nvgpu_err(g, "failed to init pstates"); | ||
273 | goto done; | ||
274 | } | ||
275 | } | ||
276 | |||
277 | err = nvgpu_clk_arb_init_arbiter(g); | ||
278 | if (err) { | ||
279 | nvgpu_err(g, "failed to init clk arb"); | ||
280 | goto done; | ||
281 | } | ||
282 | #endif | ||
283 | |||
284 | err = gk20a_init_therm_support(g); | ||
285 | if (err) { | ||
286 | nvgpu_err(g, "failed to init gk20a therm"); | ||
287 | goto done; | ||
288 | } | ||
289 | |||
290 | err = g->ops.chip_init_gpu_characteristics(g); | ||
291 | if (err) { | ||
292 | nvgpu_err(g, "failed to init gk20a gpu characteristics"); | ||
293 | goto done; | ||
294 | } | ||
295 | |||
296 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
297 | err = gk20a_ctxsw_trace_init(g); | ||
298 | if (err) | ||
299 | nvgpu_warn(g, "could not initialize ctxsw tracing"); | ||
300 | #endif | ||
301 | |||
302 | /* Restore the debug setting */ | ||
303 | g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); | ||
304 | |||
305 | gk20a_channel_resume(g); | ||
306 | |||
307 | gk20a_init_ce_support(g); | ||
308 | |||
309 | nvgpu_init_mm_ce_context(g); | ||
310 | |||
311 | if (g->ops.xve.available_speeds) { | ||
312 | u32 speed; | ||
313 | |||
314 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) | ||
315 | g->ops.xve.disable_aspm(g); | ||
316 | |||
317 | g->ops.xve.available_speeds(g, &speed); | ||
318 | |||
319 | /* Set to max speed */ | ||
320 | speed = 1 << (fls(speed) - 1); | ||
321 | err = g->ops.xve.set_speed(g, speed); | ||
322 | if (err) { | ||
323 | nvgpu_err(g, "Failed to set PCIe bus speed!"); | ||
324 | goto done; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | nvgpu_vidmem_thread_unpause(&g->mm); | ||
329 | |||
330 | #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) | ||
331 | if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) { | ||
332 | if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { | ||
333 | nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE); | ||
334 | __nvgpu_mem_create_from_phys(g, &g->syncpt_mem, | ||
335 | g->syncpt_unit_base, nr_pages); | ||
336 | } | ||
337 | } | ||
338 | #endif | ||
339 | |||
340 | done: | ||
341 | if (err) | ||
342 | g->power_on = false; | ||
343 | |||
344 | return err; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Check if the device can go busy. Basically if the driver is currently | ||
349 | * in the process of dying then do not let new places make the driver busy. | ||
350 | */ | ||
351 | int gk20a_can_busy(struct gk20a *g) | ||
352 | { | ||
353 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
354 | return 0; | ||
355 | return 1; | ||
356 | } | ||
357 | |||
358 | int gk20a_wait_for_idle(struct gk20a *g) | ||
359 | { | ||
360 | int wait_length = 150; /* 3 second overall max wait. */ | ||
361 | int target_usage_count = 0; | ||
362 | |||
363 | if (!g) | ||
364 | return -ENODEV; | ||
365 | |||
366 | if (g->user_railgate_disabled) | ||
367 | target_usage_count = 1; | ||
368 | |||
369 | while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count) | ||
370 | && (wait_length-- >= 0)) | ||
371 | nvgpu_msleep(20); | ||
372 | |||
373 | if (wait_length < 0) { | ||
374 | pr_warn("%s: Timed out waiting for idle (%d)!\n", | ||
375 | __func__, nvgpu_atomic_read(&g->usage_count)); | ||
376 | return -ETIMEDOUT; | ||
377 | } | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | int gk20a_init_gpu_characteristics(struct gk20a *g) | ||
383 | { | ||
384 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true); | ||
385 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true); | ||
386 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true); | ||
387 | |||
388 | if (IS_ENABLED(CONFIG_SYNC)) | ||
389 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true); | ||
390 | |||
391 | if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) | ||
392 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true); | ||
393 | |||
394 | if (gk20a_platform_has_syncpoints(g)) | ||
395 | __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, true); | ||
396 | |||
397 | /* | ||
398 | * Fast submits are supported as long as the user doesn't request | ||
399 | * anything that depends on job tracking. (Here, fast means strictly no | ||
400 | * metadata, just the gpfifo contents are copied and gp_put updated). | ||
401 | */ | ||
402 | __nvgpu_set_enabled(g, | ||
403 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, | ||
404 | true); | ||
405 | |||
406 | /* | ||
407 | * Sync framework requires deferred job cleanup, wrapping syncs in FDs, | ||
408 | * and other heavy stuff, which prevents deterministic submits. This is | ||
409 | * supported otherwise, provided that the user doesn't request anything | ||
410 | * that depends on deferred cleanup. | ||
411 | */ | ||
412 | if (!gk20a_channel_sync_needs_sync_framework(g)) | ||
413 | __nvgpu_set_enabled(g, | ||
414 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL, | ||
415 | true); | ||
416 | |||
417 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true); | ||
418 | |||
419 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); | ||
420 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true); | ||
421 | |||
422 | if (g->ops.clk_arb.get_arbiter_clk_domains) | ||
423 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true); | ||
424 | |||
425 | g->ops.gr.detect_sm_arch(g); | ||
426 | |||
427 | if (g->ops.gr.init_cyclestats) | ||
428 | g->ops.gr.init_cyclestats(g); | ||
429 | |||
430 | g->ops.gr.get_rop_l2_en_mask(g); | ||
431 | |||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Free the gk20a struct. | ||
437 | */ | ||
438 | static void gk20a_free_cb(struct nvgpu_ref *refcount) | ||
439 | { | ||
440 | struct gk20a *g = container_of(refcount, | ||
441 | struct gk20a, refcount); | ||
442 | |||
443 | gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!"); | ||
444 | |||
445 | gk20a_ce_destroy(g); | ||
446 | |||
447 | if (g->remove_support) | ||
448 | g->remove_support(g); | ||
449 | |||
450 | if (g->free) | ||
451 | g->free(g); | ||
452 | } | ||
453 | |||
454 | /** | ||
455 | * gk20a_get() - Increment ref count on driver | ||
456 | * | ||
457 | * @g The driver to increment | ||
458 | * This will fail if the driver is in the process of being released. In that | ||
459 | * case it will return NULL. Otherwise a pointer to the driver passed in will | ||
460 | * be returned. | ||
461 | */ | ||
462 | struct gk20a * __must_check gk20a_get(struct gk20a *g) | ||
463 | { | ||
464 | int success; | ||
465 | |||
466 | /* | ||
467 | * Handle the possibility we are still freeing the gk20a struct while | ||
468 | * gk20a_get() is called. Unlikely but plausible race condition. Ideally | ||
469 | * the code will never be in such a situation that this race is | ||
470 | * possible. | ||
471 | */ | ||
472 | success = nvgpu_ref_get_unless_zero(&g->refcount); | ||
473 | |||
474 | gk20a_dbg(gpu_dbg_shutdown, "GET: refs currently %d %s", | ||
475 | nvgpu_atomic_read(&g->refcount.refcount), | ||
476 | success ? "" : "(FAILED)"); | ||
477 | |||
478 | return success ? g : NULL; | ||
479 | } | ||
480 | |||
481 | /** | ||
482 | * gk20a_put() - Decrement ref count on driver | ||
483 | * | ||
484 | * @g - The driver to decrement | ||
485 | * | ||
486 | * Decrement the driver ref-count. If neccesary also free the underlying driver | ||
487 | * memory | ||
488 | */ | ||
489 | void gk20a_put(struct gk20a *g) | ||
490 | { | ||
491 | /* | ||
492 | * Note - this is racy, two instances of this could run before the | ||
493 | * actual kref_put(0 runs, you could see something like: | ||
494 | * | ||
495 | * ... PUT: refs currently 2 | ||
496 | * ... PUT: refs currently 2 | ||
497 | * ... Freeing GK20A struct! | ||
498 | */ | ||
499 | gk20a_dbg(gpu_dbg_shutdown, "PUT: refs currently %d", | ||
500 | nvgpu_atomic_read(&g->refcount.refcount)); | ||
501 | |||
502 | nvgpu_ref_put(&g->refcount, gk20a_free_cb); | ||
503 | } | ||