summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu5
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.c (renamed from drivers/gpu/nvgpu/common/nvgpu_common.c)16
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c1052
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.c (renamed from drivers/gpu/nvgpu/pci.c)1
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.h (renamed from drivers/gpu/nvgpu/pci.h)0
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c68
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1119
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h10
-rw-r--r--drivers/gpu/nvgpu/gk20a/sim_gk20a.c9
10 files changed, 1191 insertions, 1111 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 658f8b7f..983df242 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -23,6 +23,7 @@ endif
23obj-$(CONFIG_GK20A) := nvgpu.o 23obj-$(CONFIG_GK20A) := nvgpu.o
24 24
25nvgpu-y := \ 25nvgpu-y := \
26 common/linux/module.o \
26 common/linux/kmem.o \ 27 common/linux/kmem.o \
27 common/linux/timers.o \ 28 common/linux/timers.o \
28 common/linux/ioctl.o \ 29 common/linux/ioctl.o \
@@ -34,13 +35,13 @@ nvgpu-y := \
34 common/linux/nvgpu_mem.o \ 35 common/linux/nvgpu_mem.o \
35 common/linux/dma.o \ 36 common/linux/dma.o \
36 common/linux/soc.o \ 37 common/linux/soc.o \
38 common/linux/driver_common.o \
37 common/mm/nvgpu_allocator.o \ 39 common/mm/nvgpu_allocator.o \
38 common/mm/bitmap_allocator.o \ 40 common/mm/bitmap_allocator.o \
39 common/mm/buddy_allocator.o \ 41 common/mm/buddy_allocator.o \
40 common/mm/page_allocator.o \ 42 common/mm/page_allocator.o \
41 common/mm/lockless_allocator.o \ 43 common/mm/lockless_allocator.o \
42 common/pramin.o \ 44 common/pramin.o \
43 common/nvgpu_common.o \
44 common/semaphore.o \ 45 common/semaphore.o \
45 common/as.o \ 46 common/as.o \
46 common/rbtree.o \ 47 common/rbtree.o \
@@ -105,7 +106,7 @@ nvgpu-y := \
105 106
106nvgpu-$(CONFIG_TEGRA_GK20A) += tegra/linux/platform_gk20a_tegra.o 107nvgpu-$(CONFIG_TEGRA_GK20A) += tegra/linux/platform_gk20a_tegra.o
107nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o 108nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
108nvgpu-$(CONFIG_GK20A_PCI) += pci.o 109nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o
109 110
110nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ 111nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
111 gk20a/platform_vgpu_tegra.o \ 112 gk20a/platform_vgpu_tegra.o \
diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 0c812d34..5c96b4e8 100644
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -24,7 +24,7 @@
24 24
25#include "gk20a/gk20a_scale.h" 25#include "gk20a/gk20a_scale.h"
26#include "gk20a/gk20a.h" 26#include "gk20a/gk20a.h"
27#include "gk20a/gr_gk20a.h" 27#include "module.h"
28 28
29#define EMC3D_DEFAULT_RATIO 750 29#define EMC3D_DEFAULT_RATIO 750
30 30
@@ -124,6 +124,20 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
124 nvgpu_mutex_init(&g->mm.priv_lock); 124 nvgpu_mutex_init(&g->mm.priv_lock);
125} 125}
126 126
127static int gk20a_secure_page_alloc(struct device *dev)
128{
129 struct gk20a_platform *platform = dev_get_drvdata(dev);
130 int err = 0;
131
132 if (platform->secure_page_alloc) {
133 err = platform->secure_page_alloc(dev);
134 if (!err)
135 platform->secure_alloc_ready = true;
136 }
137
138 return err;
139}
140
127int nvgpu_probe(struct gk20a *g, 141int nvgpu_probe(struct gk20a *g,
128 const char *debugfs_symlink, 142 const char *debugfs_symlink,
129 const char *interface_name, 143 const char *interface_name,
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
new file mode 100644
index 00000000..2cbf996b
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -0,0 +1,1052 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/module.h>
20#include <linux/of.h>
21#include <linux/of_device.h>
22#include <linux/of_platform.h>
23#include <linux/interrupt.h>
24#include <linux/pm_runtime.h>
25#include <linux/reset.h>
26#include <linux/platform/tegra/common.h>
27
28#include <nvgpu/kmem.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/soc.h>
31
32#include "gk20a/gk20a.h"
33#include "vgpu/vgpu.h"
34#include "gk20a/gk20a_scale.h"
35#include "gk20a/ctxsw_trace_gk20a.h"
36#include "pci.h"
37#include "module.h"
38#ifdef CONFIG_TEGRA_19x_GPU
39#include "nvgpu_gpuid_t19x.h"
40#endif
41
42#define CLASS_NAME "nvidia-gpu"
43/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
44
45#define GK20A_WAIT_FOR_IDLE_MS 2000
46
47#define CREATE_TRACE_POINTS
48#include <trace/events/gk20a.h>
49
50void gk20a_busy_noresume(struct device *dev)
51{
52 pm_runtime_get_noresume(dev);
53}
54
55int gk20a_busy(struct gk20a *g)
56{
57 int ret = 0;
58 struct device *dev;
59
60 if (!g)
61 return -ENODEV;
62
63 atomic_inc(&g->usage_count);
64
65 down_read(&g->busy_lock);
66
67 if (!gk20a_can_busy(g)) {
68 ret = -ENODEV;
69 atomic_dec(&g->usage_count);
70 goto fail;
71 }
72
73 dev = g->dev;
74
75 if (pm_runtime_enabled(dev)) {
76 ret = pm_runtime_get_sync(dev);
77 if (ret < 0) {
78 pm_runtime_put_noidle(dev);
79 atomic_dec(&g->usage_count);
80 goto fail;
81 }
82 } else {
83 if (!g->power_on) {
84 ret = gk20a_gpu_is_virtual(dev) ?
85 vgpu_pm_finalize_poweron(dev)
86 : gk20a_pm_finalize_poweron(dev);
87 if (ret) {
88 atomic_dec(&g->usage_count);
89 goto fail;
90 }
91 }
92 }
93
94 gk20a_scale_notify_busy(dev);
95
96fail:
97 up_read(&g->busy_lock);
98
99 return ret < 0 ? ret : 0;
100}
101
102void gk20a_idle_nosuspend(struct device *dev)
103{
104 pm_runtime_put_noidle(dev);
105}
106
107void gk20a_idle(struct gk20a *g)
108{
109 struct device *dev;
110
111 atomic_dec(&g->usage_count);
112 down_read(&g->busy_lock);
113
114 dev = g->dev;
115
116 if (!(dev && gk20a_can_busy(g)))
117 goto fail;
118
119 if (pm_runtime_enabled(dev)) {
120#ifdef CONFIG_PM
121 if (atomic_read(&g->dev->power.usage_count) == 1)
122 gk20a_scale_notify_idle(dev);
123#endif
124
125 pm_runtime_mark_last_busy(dev);
126 pm_runtime_put_sync_autosuspend(dev);
127
128 } else {
129 gk20a_scale_notify_idle(dev);
130 }
131fail:
132 up_read(&g->busy_lock);
133}
134
135int gk20a_pm_finalize_poweron(struct device *dev)
136{
137 struct gk20a *g = get_gk20a(dev);
138 struct gk20a_platform *platform = gk20a_get_platform(dev);
139 int err, nice_value;
140
141 gk20a_dbg_fn("");
142
143 if (g->power_on)
144 return 0;
145
146 trace_gk20a_finalize_poweron(dev_name(dev));
147
148 /* Increment platform power refcount */
149 if (platform->busy) {
150 err = platform->busy(dev);
151 if (err < 0) {
152 nvgpu_err(g, "failed to poweron platform dependency");
153 return err;
154 }
155 }
156
157 err = gk20a_restore_registers(g);
158 if (err)
159 return err;
160
161 nice_value = task_nice(current);
162 set_user_nice(current, -20);
163
164 err = gk20a_finalize_poweron(g);
165 set_user_nice(current, nice_value);
166 if (err)
167 goto done;
168
169 trace_gk20a_finalize_poweron_done(dev_name(dev));
170
171 enable_irq(g->irq_stall);
172 if (g->irq_stall != g->irq_nonstall)
173 enable_irq(g->irq_nonstall);
174 g->irqs_enabled = 1;
175
176 gk20a_scale_resume(g->dev);
177
178 if (platform->has_cde)
179 gk20a_init_cde_support(g);
180
181done:
182 if (err)
183 g->power_on = false;
184
185 return err;
186}
187
188static int gk20a_pm_prepare_poweroff(struct device *dev)
189{
190 struct gk20a *g = get_gk20a(dev);
191 int ret = 0;
192 struct gk20a_platform *platform = gk20a_get_platform(dev);
193
194 gk20a_dbg_fn("");
195
196 nvgpu_mutex_acquire(&g->poweroff_lock);
197
198 if (!g->power_on)
199 goto done;
200
201 gk20a_scale_suspend(dev);
202
203 ret = gk20a_prepare_poweroff(g);
204 if (ret)
205 goto error;
206
207 /*
208 * After this point, gk20a interrupts should not get
209 * serviced.
210 */
211 disable_irq(g->irq_stall);
212 if (g->irq_stall != g->irq_nonstall)
213 disable_irq(g->irq_nonstall);
214
215 /*
216 * is_fmodel needs to be in gk20a struct for deferred teardown
217 */
218 g->is_fmodel = platform->is_fmodel;
219
220 /* Decrement platform power refcount */
221 if (platform->idle)
222 platform->idle(dev);
223
224 /* Stop CPU from accessing the GPU registers. */
225 gk20a_lockout_registers(g);
226
227 nvgpu_mutex_release(&g->poweroff_lock);
228 return 0;
229
230error:
231 gk20a_scale_resume(dev);
232done:
233 nvgpu_mutex_release(&g->poweroff_lock);
234
235 return ret;
236}
237
238static struct of_device_id tegra_gk20a_of_match[] = {
239#ifdef CONFIG_TEGRA_GK20A
240 { .compatible = "nvidia,tegra124-gk20a",
241 .data = &gk20a_tegra_platform },
242 { .compatible = "nvidia,tegra210-gm20b",
243 .data = &gm20b_tegra_platform },
244#ifdef CONFIG_ARCH_TEGRA_18x_SOC
245 { .compatible = "nvidia,tegra186-gp10b",
246 .data = &gp10b_tegra_platform },
247#endif
248#ifdef CONFIG_TEGRA_19x_GPU
249 { .compatible = TEGRA_19x_GPU_COMPAT_TEGRA,
250 .data = &t19x_gpu_tegra_platform },
251#endif
252#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
253 { .compatible = "nvidia,tegra124-gk20a-vgpu",
254 .data = &vgpu_tegra_platform },
255#endif
256#else
257 { .compatible = "nvidia,tegra124-gk20a",
258 .data = &gk20a_generic_platform },
259 { .compatible = "nvidia,tegra210-gm20b",
260 .data = &gk20a_generic_platform },
261#ifdef CONFIG_ARCH_TEGRA_18x_SOC
262 { .compatible = TEGRA_18x_GPU_COMPAT_TEGRA,
263 .data = &gk20a_generic_platform },
264#endif
265
266#endif
267 { .compatible = "nvidia,generic-gk20a",
268 .data = &gk20a_generic_platform },
269 { .compatible = "nvidia,generic-gm20b",
270 .data = &gk20a_generic_platform },
271#ifdef CONFIG_ARCH_TEGRA_18x_SOC
272 { .compatible = "nvidia,generic-gp10b",
273 .data = &gk20a_generic_platform },
274#endif
275 { },
276};
277
278#ifdef CONFIG_PM
279/**
280 * __gk20a_do_idle() - force the GPU to idle and railgate
281 *
282 * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
283 *
284 * Acquires two locks : &g->busy_lock and &platform->railgate_lock
285 * In success, we hold these locks and return
286 * In failure, we release these locks and return
287 */
288int __gk20a_do_idle(struct device *dev, bool force_reset)
289{
290 struct gk20a *g = get_gk20a(dev);
291 struct gk20a_platform *platform = dev_get_drvdata(dev);
292 struct nvgpu_timeout timeout;
293 int ref_cnt;
294 int target_ref_cnt = 0;
295 bool is_railgated;
296 int err = 0;
297
298 /* acquire busy lock to block other busy() calls */
299 down_write(&g->busy_lock);
300
301 /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
302 nvgpu_mutex_acquire(&platform->railgate_lock);
303
304 /* check if it is already railgated ? */
305 if (platform->is_railgated(dev))
306 return 0;
307
308 /*
309 * release railgate_lock, prevent suspend by incrementing usage counter,
310 * re-acquire railgate_lock
311 */
312 nvgpu_mutex_release(&platform->railgate_lock);
313 pm_runtime_get_sync(dev);
314
315 /*
316 * One refcount taken in this API
317 * If User disables rail gating, we take one more
318 * extra refcount
319 */
320 if (platform->user_railgate_disabled)
321 target_ref_cnt = 2;
322 else
323 target_ref_cnt = 1;
324 nvgpu_mutex_acquire(&platform->railgate_lock);
325
326 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
327 NVGPU_TIMER_CPU_TIMER);
328
329 /* check and wait until GPU is idle (with a timeout) */
330 do {
331 nvgpu_msleep(1);
332 ref_cnt = atomic_read(&dev->power.usage_count);
333 } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
334
335 if (ref_cnt != target_ref_cnt) {
336 nvgpu_err(g, "failed to idle - refcount %d != 1",
337 ref_cnt);
338 goto fail_drop_usage_count;
339 }
340
341 /* check if global force_reset flag is set */
342 force_reset |= platform->force_reset_in_do_idle;
343
344 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
345 NVGPU_TIMER_CPU_TIMER);
346
347 if (platform->can_railgate && !force_reset) {
348 /*
349 * Case 1 : GPU railgate is supported
350 *
351 * if GPU is now idle, we will have only one ref count,
352 * drop this ref which will rail gate the GPU
353 */
354 pm_runtime_put_sync(dev);
355
356 /* add sufficient delay to allow GPU to rail gate */
357 nvgpu_msleep(platform->railgate_delay);
358
359 /* check in loop if GPU is railgated or not */
360 do {
361 nvgpu_msleep(1);
362 is_railgated = platform->is_railgated(dev);
363 } while (!is_railgated && !nvgpu_timeout_expired(&timeout));
364
365 if (is_railgated) {
366 return 0;
367 } else {
368 nvgpu_err(g, "failed to idle in timeout");
369 goto fail_timeout;
370 }
371 } else {
372 /*
373 * Case 2 : GPU railgate is not supported or we explicitly
374 * do not want to depend on runtime PM
375 *
376 * if GPU is now idle, call prepare_poweroff() to save the
377 * state and then do explicit railgate
378 *
379 * __gk20a_do_unidle() needs to unrailgate, call
380 * finalize_poweron(), and then call pm_runtime_put_sync()
381 * to balance the GPU usage counter
382 */
383
384 /* Save the GPU state */
385 err = gk20a_pm_prepare_poweroff(dev);
386 if (err)
387 goto fail_drop_usage_count;
388
389 /* railgate GPU */
390 platform->railgate(dev);
391
392 nvgpu_udelay(10);
393
394 g->forced_reset = true;
395 return 0;
396 }
397
398fail_drop_usage_count:
399 pm_runtime_put_noidle(dev);
400fail_timeout:
401 nvgpu_mutex_release(&platform->railgate_lock);
402 up_write(&g->busy_lock);
403 return -EBUSY;
404}
405
406/**
407 * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
408 * from outside of GPU driver
409 *
410 * In success, this call MUST be balanced by caller with gk20a_do_unidle()
411 */
412int gk20a_do_idle(void)
413{
414 struct device_node *node =
415 of_find_matching_node(NULL, tegra_gk20a_of_match);
416 struct platform_device *pdev = of_find_device_by_node(node);
417
418 int ret = __gk20a_do_idle(&pdev->dev, true);
419
420 of_node_put(node);
421
422 return ret;
423}
424
425/**
426 * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
427 */
428int __gk20a_do_unidle(struct device *dev)
429{
430 struct gk20a *g = get_gk20a(dev);
431 struct gk20a_platform *platform = dev_get_drvdata(dev);
432 int err;
433
434 if (g->forced_reset) {
435 /*
436 * If we did a forced-reset/railgate
437 * then unrailgate the GPU here first
438 */
439 platform->unrailgate(dev);
440
441 /* restore the GPU state */
442 err = gk20a_pm_finalize_poweron(dev);
443 if (err)
444 return err;
445
446 /* balance GPU usage counter */
447 pm_runtime_put_sync(dev);
448
449 g->forced_reset = false;
450 }
451
452 /* release the lock and open up all other busy() calls */
453 nvgpu_mutex_release(&platform->railgate_lock);
454 up_write(&g->busy_lock);
455
456 return 0;
457}
458
459/**
460 * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
461 */
462int gk20a_do_unidle(void)
463{
464 struct device_node *node =
465 of_find_matching_node(NULL, tegra_gk20a_of_match);
466 struct platform_device *pdev = of_find_device_by_node(node);
467
468 int ret = __gk20a_do_unidle(&pdev->dev);
469
470 of_node_put(node);
471
472 return ret;
473}
474#endif
475
476static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
477 struct resource **out)
478{
479 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
480
481 if (!r)
482 return NULL;
483 if (out)
484 *out = r;
485 return devm_ioremap_resource(&dev->dev, r);
486}
487
488static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
489{
490 struct gk20a *g = dev_id;
491
492 return g->ops.mc.isr_stall(g);
493}
494
495static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
496{
497 struct gk20a *g = dev_id;
498
499 return g->ops.mc.isr_nonstall(g);
500}
501
502static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
503{
504 struct gk20a *g = dev_id;
505
506 return g->ops.mc.isr_thread_stall(g);
507}
508
509void gk20a_remove_support(struct gk20a *g)
510{
511#ifdef CONFIG_TEGRA_COMMON
512 tegra_unregister_idle_unidle();
513#endif
514 nvgpu_kfree(g, g->dbg_regops_tmp_buf);
515
516 if (g->pmu.remove_support)
517 g->pmu.remove_support(&g->pmu);
518
519 if (g->gr.remove_support)
520 g->gr.remove_support(&g->gr);
521
522 if (g->mm.remove_ce_support)
523 g->mm.remove_ce_support(&g->mm);
524
525 if (g->fifo.remove_support)
526 g->fifo.remove_support(&g->fifo);
527
528 if (g->mm.remove_support)
529 g->mm.remove_support(&g->mm);
530
531 if (g->sim.remove_support)
532 g->sim.remove_support(&g->sim);
533
534 /* free mappings to registers, etc */
535
536 if (g->regs) {
537 iounmap(g->regs);
538 g->regs = NULL;
539 }
540 if (g->bar1) {
541 iounmap(g->bar1);
542 g->bar1 = NULL;
543 }
544}
545
546static int gk20a_init_support(struct platform_device *dev)
547{
548 int err = 0;
549 struct gk20a *g = get_gk20a(&dev->dev);
550
551#ifdef CONFIG_TEGRA_COMMON
552 tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle);
553#endif
554
555 g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
556 &g->reg_mem);
557 if (IS_ERR(g->regs)) {
558 nvgpu_err(g, "failed to remap gk20a registers");
559 err = PTR_ERR(g->regs);
560 goto fail;
561 }
562
563 g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
564 &g->bar1_mem);
565 if (IS_ERR(g->bar1)) {
566 nvgpu_err(g, "failed to remap gk20a bar1");
567 err = PTR_ERR(g->bar1);
568 goto fail;
569 }
570
571 if (nvgpu_platform_is_simulation(g)) {
572 g->sim.g = g;
573 g->sim.regs = gk20a_ioremap_resource(dev,
574 GK20A_SIM_IORESOURCE_MEM,
575 &g->sim.reg_mem);
576 if (IS_ERR(g->sim.regs)) {
577 nvgpu_err(g, "failed to remap gk20a sim regs");
578 err = PTR_ERR(g->sim.regs);
579 goto fail;
580 }
581
582 err = gk20a_init_sim_support(dev);
583 if (err)
584 goto fail;
585 }
586
587 return 0;
588
589fail:
590 return err;
591}
592
593static int gk20a_pm_railgate(struct device *dev)
594{
595 struct gk20a_platform *platform = dev_get_drvdata(dev);
596 int ret = 0;
597#ifdef CONFIG_DEBUG_FS
598 struct gk20a *g = get_gk20a(dev);
599
600 g->pstats.last_rail_gate_start = jiffies;
601
602 if (g->pstats.railgating_cycle_count >= 1)
603 g->pstats.total_rail_ungate_time_ms =
604 g->pstats.total_rail_ungate_time_ms +
605 jiffies_to_msecs(g->pstats.last_rail_gate_start -
606 g->pstats.last_rail_ungate_complete);
607#endif
608
609 if (platform->railgate)
610 ret = platform->railgate(dev);
611
612#ifdef CONFIG_DEBUG_FS
613 g->pstats.last_rail_gate_complete = jiffies;
614#endif
615
616 return ret;
617}
618
619static int gk20a_pm_unrailgate(struct device *dev)
620{
621 struct gk20a_platform *platform = dev_get_drvdata(dev);
622 int ret = 0;
623 struct gk20a *g = get_gk20a(dev);
624
625#ifdef CONFIG_DEBUG_FS
626 g->pstats.last_rail_ungate_start = jiffies;
627 if (g->pstats.railgating_cycle_count >= 1)
628 g->pstats.total_rail_gate_time_ms =
629 g->pstats.total_rail_gate_time_ms +
630 jiffies_to_msecs(g->pstats.last_rail_ungate_start -
631 g->pstats.last_rail_gate_complete);
632
633 g->pstats.railgating_cycle_count++;
634#endif
635
636 trace_gk20a_pm_unrailgate(dev_name(dev));
637
638 if (platform->unrailgate) {
639 nvgpu_mutex_acquire(&platform->railgate_lock);
640 ret = platform->unrailgate(dev);
641 nvgpu_mutex_release(&platform->railgate_lock);
642 }
643
644#ifdef CONFIG_DEBUG_FS
645 g->pstats.last_rail_ungate_complete = jiffies;
646#endif
647
648 return ret;
649}
650
651static void gk20a_pm_shutdown(struct platform_device *pdev)
652{
653 struct gk20a_platform *platform = platform_get_drvdata(pdev);
654 struct gk20a *g = platform->g;
655 int err;
656
657 nvgpu_info(g, "shutting down");
658
659 /* vgpu has nothing to clean up currently */
660 if (gk20a_gpu_is_virtual(&pdev->dev))
661 return;
662
663 gk20a_driver_start_unload(g);
664
665 /* If GPU is already railgated,
666 * just prevent more requests, and return */
667 if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
668 __pm_runtime_disable(&pdev->dev, false);
669 nvgpu_info(g, "already railgated, shut down complete");
670 return;
671 }
672
673 /* Prevent more requests by disabling Runtime PM */
674 __pm_runtime_disable(&pdev->dev, false);
675
676 err = gk20a_wait_for_idle(&pdev->dev);
677 if (err) {
678 nvgpu_err(g, "failed to idle GPU, err=%d", err);
679 goto finish;
680 }
681
682 err = gk20a_fifo_disable_all_engine_activity(g, true);
683 if (err) {
684 nvgpu_err(g, "failed to disable engine activity, err=%d",
685 err);
686 goto finish;
687 }
688
689 err = gk20a_fifo_wait_engine_idle(g);
690 if (err) {
691 nvgpu_err(g, "failed to idle engines, err=%d",
692 err);
693 goto finish;
694 }
695
696 if (gk20a_gpu_is_virtual(&pdev->dev))
697 err = vgpu_pm_prepare_poweroff(&pdev->dev);
698 else
699 err = gk20a_pm_prepare_poweroff(&pdev->dev);
700 if (err) {
701 nvgpu_err(g, "failed to prepare for poweroff, err=%d",
702 err);
703 goto finish;
704 }
705
706 err = gk20a_pm_railgate(&pdev->dev);
707 if (err)
708 nvgpu_err(g, "failed to railgate, err=%d", err);
709
710finish:
711 nvgpu_info(g, "shut down complete");
712}
713
714#ifdef CONFIG_PM
715static int gk20a_pm_runtime_resume(struct device *dev)
716{
717 int err = 0;
718
719 err = gk20a_pm_unrailgate(dev);
720 if (err)
721 goto fail;
722
723 err = gk20a_pm_finalize_poweron(dev);
724 if (err)
725 goto fail_poweron;
726
727 return 0;
728
729fail_poweron:
730 gk20a_pm_railgate(dev);
731fail:
732 return err;
733}
734
735static int gk20a_pm_runtime_suspend(struct device *dev)
736{
737 int err = 0;
738
739 err = gk20a_pm_prepare_poweroff(dev);
740 if (err)
741 goto fail;
742
743 err = gk20a_pm_railgate(dev);
744 if (err)
745 goto fail_railgate;
746
747 return 0;
748
749fail_railgate:
750 gk20a_pm_finalize_poweron(dev);
751fail:
752 pm_runtime_mark_last_busy(dev);
753 return err;
754}
755
756static int gk20a_pm_suspend(struct device *dev)
757{
758 struct gk20a_platform *platform = dev_get_drvdata(dev);
759 struct gk20a *g = get_gk20a(dev);
760 int ret = 0;
761
762 if (platform->user_railgate_disabled)
763 gk20a_idle_nosuspend(dev);
764
765 if (atomic_read(&dev->power.usage_count) > 1) {
766 ret = -EBUSY;
767 goto fail;
768 }
769
770 if (!g->power_on)
771 return 0;
772
773 ret = gk20a_pm_runtime_suspend(dev);
774 if (ret)
775 goto fail;
776
777 if (platform->suspend)
778 platform->suspend(dev);
779
780 g->suspended = true;
781
782 return 0;
783
784fail:
785 if (platform->user_railgate_disabled)
786 gk20a_busy_noresume(dev);
787
788 return ret;
789}
790
791static int gk20a_pm_resume(struct device *dev)
792{
793 struct gk20a *g = get_gk20a(dev);
794 struct gk20a_platform *platform = dev_get_drvdata(dev);
795 int ret = 0;
796
797 if (platform->user_railgate_disabled)
798 gk20a_busy_noresume(dev);
799
800 if (!g->suspended)
801 return 0;
802
803 ret = gk20a_pm_runtime_resume(dev);
804
805 g->suspended = false;
806
807 return ret;
808}
809
810static const struct dev_pm_ops gk20a_pm_ops = {
811 .runtime_resume = gk20a_pm_runtime_resume,
812 .runtime_suspend = gk20a_pm_runtime_suspend,
813 .resume = gk20a_pm_resume,
814 .suspend = gk20a_pm_suspend,
815};
816#endif
817
818int gk20a_pm_init(struct device *dev)
819{
820 struct gk20a_platform *platform = dev_get_drvdata(dev);
821 int err = 0;
822
823 gk20a_dbg_fn("");
824
825 /* Initialise pm runtime */
826 if (platform->railgate_delay) {
827 pm_runtime_set_autosuspend_delay(dev,
828 platform->railgate_delay);
829 pm_runtime_use_autosuspend(dev);
830 }
831
832 if (platform->can_railgate) {
833 pm_runtime_enable(dev);
834 if (!pm_runtime_enabled(dev))
835 gk20a_pm_unrailgate(dev);
836 else
837 gk20a_pm_railgate(dev);
838 } else {
839 __pm_runtime_disable(dev, false);
840 gk20a_pm_unrailgate(dev);
841 }
842
843 return err;
844}
845
846static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
847{
848 gk20a_get_platform(&pdev->dev)->g = gk20a;
849}
850
851static int gk20a_probe(struct platform_device *dev)
852{
853 struct gk20a *gk20a;
854 int err;
855 struct gk20a_platform *platform = NULL;
856
857 if (dev->dev.of_node) {
858 const struct of_device_id *match;
859
860 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
861 if (match)
862 platform = (struct gk20a_platform *)match->data;
863 } else
864 platform = (struct gk20a_platform *)dev->dev.platform_data;
865
866 if (!platform) {
867 dev_err(&dev->dev, "no platform data\n");
868 return -ENODATA;
869 }
870
871 gk20a_dbg_fn("");
872
873 platform_set_drvdata(dev, platform);
874
875 if (gk20a_gpu_is_virtual(&dev->dev))
876 return vgpu_probe(dev);
877
878 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
879 if (!gk20a) {
880 dev_err(&dev->dev, "couldn't allocate gk20a support");
881 return -ENOMEM;
882 }
883
884 set_gk20a(dev, gk20a);
885 gk20a->dev = &dev->dev;
886
887 if (nvgpu_platform_is_simulation(gk20a))
888 platform->is_fmodel = true;
889
890 nvgpu_kmem_init(gk20a);
891
892 gk20a->irq_stall = platform_get_irq(dev, 0);
893 gk20a->irq_nonstall = platform_get_irq(dev, 1);
894 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
895 return -ENXIO;
896
897 err = devm_request_threaded_irq(&dev->dev,
898 gk20a->irq_stall,
899 gk20a_intr_isr_stall,
900 gk20a_intr_thread_stall,
901 0, "gk20a_stall", gk20a);
902 if (err) {
903 dev_err(&dev->dev,
904 "failed to request stall intr irq @ %d\n",
905 gk20a->irq_stall);
906 return err;
907 }
908 err = devm_request_irq(&dev->dev,
909 gk20a->irq_nonstall,
910 gk20a_intr_isr_nonstall,
911 0, "gk20a_nonstall", gk20a);
912 if (err) {
913 dev_err(&dev->dev,
914 "failed to request non-stall intr irq @ %d\n",
915 gk20a->irq_nonstall);
916 return err;
917 }
918 disable_irq(gk20a->irq_stall);
919 if (gk20a->irq_stall != gk20a->irq_nonstall)
920 disable_irq(gk20a->irq_nonstall);
921
922 err = gk20a_init_support(dev);
923 if (err)
924 return err;
925
926#ifdef CONFIG_RESET_CONTROLLER
927 platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
928 if (IS_ERR(platform->reset_control))
929 platform->reset_control = NULL;
930#endif
931
932 err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
933 if (err)
934 return err;
935
936 err = gk20a_pm_init(&dev->dev);
937 if (err) {
938 dev_err(&dev->dev, "pm init failed");
939 return err;
940 }
941
942 gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
943
944 return 0;
945}
946
947static int __exit gk20a_remove(struct platform_device *pdev)
948{
949 struct device *dev = &pdev->dev;
950 struct gk20a *g = get_gk20a(dev);
951 struct gk20a_platform *platform = gk20a_get_platform(dev);
952
953 gk20a_dbg_fn("");
954
955 if (gk20a_gpu_is_virtual(dev))
956 return vgpu_remove(pdev);
957
958 if (platform->has_cde)
959 gk20a_cde_destroy(g);
960
961 gk20a_ctxsw_trace_cleanup(g);
962
963 gk20a_sched_ctrl_cleanup(g);
964
965 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
966 gk20a_scale_exit(dev);
967
968 if (g->remove_support)
969 g->remove_support(g);
970
971 gk20a_ce_destroy(g);
972
973#ifdef CONFIG_ARCH_TEGRA_18x_SOC
974 nvgpu_clk_arb_cleanup_arbiter(g);
975#endif
976
977 gk20a_user_deinit(dev, &nvgpu_class);
978
979 debugfs_remove_recursive(platform->debugfs);
980 debugfs_remove_recursive(platform->debugfs_alias);
981
982 gk20a_remove_sysfs(dev);
983
984 if (platform->secure_buffer.destroy)
985 platform->secure_buffer.destroy(dev,
986 &platform->secure_buffer);
987
988 if (pm_runtime_enabled(dev))
989 pm_runtime_disable(dev);
990
991 if (platform->remove)
992 platform->remove(dev);
993
994 set_gk20a(pdev, NULL);
995 gk20a_put(g);
996
997 gk20a_dbg_fn("removed");
998
999 return 0;
1000}
1001
1002static struct platform_driver gk20a_driver = {
1003 .probe = gk20a_probe,
1004 .remove = __exit_p(gk20a_remove),
1005 .shutdown = gk20a_pm_shutdown,
1006 .driver = {
1007 .owner = THIS_MODULE,
1008 .name = "gk20a",
1009#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
1010 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
1011#endif
1012#ifdef CONFIG_OF
1013 .of_match_table = tegra_gk20a_of_match,
1014#endif
1015#ifdef CONFIG_PM
1016 .pm = &gk20a_pm_ops,
1017#endif
1018 .suppress_bind_attrs = true,
1019 }
1020};
1021
1022struct class nvgpu_class = {
1023 .owner = THIS_MODULE,
1024 .name = CLASS_NAME,
1025};
1026
1027static int __init gk20a_init(void)
1028{
1029
1030 int ret;
1031
1032 ret = class_register(&nvgpu_class);
1033 if (ret)
1034 return ret;
1035
1036 ret = nvgpu_pci_init();
1037 if (ret)
1038 return ret;
1039
1040 return platform_driver_register(&gk20a_driver);
1041}
1042
1043static void __exit gk20a_exit(void)
1044{
1045 nvgpu_pci_exit();
1046 platform_driver_unregister(&gk20a_driver);
1047 class_unregister(&nvgpu_class);
1048}
1049
1050MODULE_LICENSE("GPL v2");
1051module_init(gk20a_init);
1052module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h
new file mode 100644
index 00000000..45fa2f5c
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/module.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
14#define __NVGPU_COMMON_LINUX_MODULE_H__
15
16struct gk20a;
17struct device;
18
19int gk20a_pm_finalize_poweron(struct device *dev);
20void gk20a_remove_support(struct gk20a *g);
21
22#endif
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index fb54ae18..f90b3a6e 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -24,6 +24,7 @@
24#include "gk20a/gk20a.h" 24#include "gk20a/gk20a.h"
25#include "gk20a/platform_gk20a.h" 25#include "gk20a/platform_gk20a.h"
26#include "clk/clk.h" 26#include "clk/clk.h"
27#include "module.h"
27 28
28#include "pci.h" 29#include "pci.h"
29 30
diff --git a/drivers/gpu/nvgpu/pci.h b/drivers/gpu/nvgpu/common/linux/pci.h
index cc6b77b1..cc6b77b1 100644
--- a/drivers/gpu/nvgpu/pci.h
+++ b/drivers/gpu/nvgpu/common/linux/pci.h
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 85b24f2e..1a9ffe77 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -228,6 +228,74 @@ void gk20a_init_debug_ops(struct gpu_ops *gops)
228 gops->debug.show_dump = gk20a_debug_show_dump; 228 gops->debug.show_dump = gk20a_debug_show_dump;
229} 229}
230 230
231#ifdef CONFIG_DEBUG_FS
232static int railgate_residency_show(struct seq_file *s, void *data)
233{
234 struct device *dev = s->private;
235 struct gk20a_platform *platform = dev_get_drvdata(dev);
236 struct gk20a *g = get_gk20a(dev);
237 unsigned long time_since_last_state_transition_ms;
238 unsigned long total_rail_gate_time_ms;
239 unsigned long total_rail_ungate_time_ms;
240
241 if (platform->is_railgated(dev)) {
242 time_since_last_state_transition_ms =
243 jiffies_to_msecs(jiffies -
244 g->pstats.last_rail_gate_complete);
245 total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
246 total_rail_gate_time_ms =
247 g->pstats.total_rail_gate_time_ms +
248 time_since_last_state_transition_ms;
249 } else {
250 time_since_last_state_transition_ms =
251 jiffies_to_msecs(jiffies -
252 g->pstats.last_rail_ungate_complete);
253 total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
254 total_rail_ungate_time_ms =
255 g->pstats.total_rail_ungate_time_ms +
256 time_since_last_state_transition_ms;
257 }
258
259 seq_printf(s, "Time with Rails Gated: %lu ms\n"
260 "Time with Rails UnGated: %lu ms\n"
261 "Total railgating cycles: %lu\n",
262 total_rail_gate_time_ms,
263 total_rail_ungate_time_ms,
264 g->pstats.railgating_cycle_count - 1);
265 return 0;
266
267}
268
269static int railgate_residency_open(struct inode *inode, struct file *file)
270{
271 return single_open(file, railgate_residency_show, inode->i_private);
272}
273
274static const struct file_operations railgate_residency_fops = {
275 .open = railgate_residency_open,
276 .read = seq_read,
277 .llseek = seq_lseek,
278 .release = single_release,
279};
280
281int gk20a_railgating_debugfs_init(struct device *dev)
282{
283 struct dentry *d;
284 struct gk20a_platform *platform = dev_get_drvdata(dev);
285
286 if (!platform->can_railgate)
287 return 0;
288
289 d = debugfs_create_file(
290 "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev,
291 &railgate_residency_fops);
292 if (!d)
293 return -ENOMEM;
294
295 return 0;
296}
297#endif
298
231void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) 299void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
232{ 300{
233 struct gk20a_platform *platform = dev_get_drvdata(dev); 301 struct gk20a_platform *platform = dev_get_drvdata(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 2a80157d..b3f4e5fe 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -16,25 +16,7 @@
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */ 17 */
18 18
19#include <linux/module.h>
20#include <linux/string.h>
21#include <linux/interrupt.h>
22#include <linux/irq.h>
23#include <linux/export.h>
24#include <linux/of.h>
25#include <linux/of_device.h>
26#include <linux/of_platform.h>
27#include <linux/pm_runtime.h>
28#include <linux/thermal.h>
29#include <asm/cacheflush.h>
30#include <linux/debugfs.h>
31#include <linux/clk/tegra.h>
32#include <linux/kthread.h>
33#include <linux/platform/tegra/common.h>
34#include <linux/reset.h>
35#include <linux/reboot.h> 19#include <linux/reboot.h>
36#include <linux/sched.h>
37#include <linux/version.h>
38 20
39#include <nvgpu/nvgpu_common.h> 21#include <nvgpu/nvgpu_common.h>
40#include <nvgpu/kmem.h> 22#include <nvgpu/kmem.h>
@@ -42,26 +24,22 @@
42#include <nvgpu/timers.h> 24#include <nvgpu/timers.h>
43#include <nvgpu/soc.h> 25#include <nvgpu/soc.h>
44 26
27#include <trace/events/gk20a.h>
28
45#include "gk20a.h" 29#include "gk20a.h"
46#include "debug_gk20a.h"
47#include "channel_sync_gk20a.h" 30#include "channel_sync_gk20a.h"
48 31
49#include "gk20a_scale.h"
50#include "ctxsw_trace_gk20a.h" 32#include "ctxsw_trace_gk20a.h"
51#include "dbg_gpu_gk20a.h" 33#include "dbg_gpu_gk20a.h"
52#include "mc_gk20a.h" 34#include "mc_gk20a.h"
53#include "hal.h" 35#include "hal.h"
54#include "vgpu/vgpu.h" 36#include "vgpu/vgpu.h"
55#include "pci.h"
56#include "bus_gk20a.h" 37#include "bus_gk20a.h"
57#ifdef CONFIG_ARCH_TEGRA_18x_SOC 38#ifdef CONFIG_ARCH_TEGRA_18x_SOC
58#include "pstate/pstate.h" 39#include "pstate/pstate.h"
59#endif 40#endif
60 41
61 42
62#define CREATE_TRACE_POINTS
63#include <trace/events/gk20a.h>
64
65#ifdef CONFIG_TEGRA_19x_GPU 43#ifdef CONFIG_TEGRA_19x_GPU
66#include "nvgpu_gpuid_t19x.h" 44#include "nvgpu_gpuid_t19x.h"
67#endif 45#endif
@@ -70,93 +48,6 @@
70#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> 48#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
71#include <nvgpu/hw/gk20a/hw_fb_gk20a.h> 49#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
72 50
73
74#ifdef CONFIG_ARM64
75#define __cpuc_flush_dcache_area __flush_dcache_area
76#endif
77
78#define CLASS_NAME "nvidia-gpu"
79/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
80
81#define GK20A_NUM_CDEVS 7
82
83#define GK20A_WAIT_FOR_IDLE_MS 2000
84
85static int gk20a_pm_prepare_poweroff(struct device *dev);
86
87#ifdef CONFIG_DEBUG_FS
88static int railgate_residency_show(struct seq_file *s, void *data)
89{
90 struct device *dev = s->private;
91 struct gk20a_platform *platform = dev_get_drvdata(dev);
92 struct gk20a *g = get_gk20a(dev);
93 unsigned long time_since_last_state_transition_ms;
94 unsigned long total_rail_gate_time_ms;
95 unsigned long total_rail_ungate_time_ms;
96
97 if (platform->is_railgated(dev)) {
98 time_since_last_state_transition_ms =
99 jiffies_to_msecs(jiffies -
100 g->pstats.last_rail_gate_complete);
101 total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
102 total_rail_gate_time_ms =
103 g->pstats.total_rail_gate_time_ms +
104 time_since_last_state_transition_ms;
105 } else {
106 time_since_last_state_transition_ms =
107 jiffies_to_msecs(jiffies -
108 g->pstats.last_rail_ungate_complete);
109 total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
110 total_rail_ungate_time_ms =
111 g->pstats.total_rail_ungate_time_ms +
112 time_since_last_state_transition_ms;
113 }
114
115 seq_printf(s, "Time with Rails Gated: %lu ms\n"
116 "Time with Rails UnGated: %lu ms\n"
117 "Total railgating cycles: %lu\n",
118 total_rail_gate_time_ms,
119 total_rail_ungate_time_ms,
120 g->pstats.railgating_cycle_count - 1);
121 return 0;
122
123}
124
125static int railgate_residency_open(struct inode *inode, struct file *file)
126{
127 return single_open(file, railgate_residency_show, inode->i_private);
128}
129
130static const struct file_operations railgate_residency_fops = {
131 .open = railgate_residency_open,
132 .read = seq_read,
133 .llseek = seq_lseek,
134 .release = single_release,
135};
136
137int gk20a_railgating_debugfs_init(struct device *dev)
138{
139 struct dentry *d;
140 struct gk20a_platform *platform = dev_get_drvdata(dev);
141
142 if (!platform->can_railgate)
143 return 0;
144
145 d = debugfs_create_file(
146 "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev,
147 &railgate_residency_fops);
148 if (!d)
149 return -ENOMEM;
150
151 return 0;
152}
153#endif
154
155static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
156{
157 gk20a_get_platform(&pdev->dev)->g = gk20a;
158}
159
160void __nvgpu_check_gpu_state(struct gk20a *g) 51void __nvgpu_check_gpu_state(struct gk20a *g)
161{ 52{
162 u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); 53 u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL);
@@ -199,130 +90,32 @@ void __gk20a_warn_on_no_regs(void)
199 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!"); 90 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
200} 91}
201 92
202void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i, 93static int gk20a_detect_chip(struct gk20a *g)
203 struct resource **out)
204{
205 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
206 if (!r)
207 return NULL;
208 if (out)
209 *out = r;
210 return devm_ioremap_resource(&dev->dev, r);
211}
212
213static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
214{
215 struct gk20a *g = dev_id;
216
217 return g->ops.mc.isr_stall(g);
218}
219
220static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
221{
222 struct gk20a *g = dev_id;
223
224 return g->ops.mc.isr_nonstall(g);
225}
226
227static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
228{
229 struct gk20a *g = dev_id;
230 return g->ops.mc.isr_thread_stall(g);
231}
232
233void gk20a_remove_support(struct gk20a *g)
234{
235#ifdef CONFIG_TEGRA_COMMON
236 tegra_unregister_idle_unidle();
237#endif
238 if (g->dbg_regops_tmp_buf)
239 nvgpu_kfree(g, g->dbg_regops_tmp_buf);
240
241 if (g->pmu.remove_support)
242 g->pmu.remove_support(&g->pmu);
243
244 if (g->gr.remove_support)
245 g->gr.remove_support(&g->gr);
246
247 if (g->mm.remove_ce_support)
248 g->mm.remove_ce_support(&g->mm);
249
250 if (g->fifo.remove_support)
251 g->fifo.remove_support(&g->fifo);
252
253 if (g->mm.remove_support)
254 g->mm.remove_support(&g->mm);
255
256 if (g->sim.remove_support)
257 g->sim.remove_support(&g->sim);
258
259 /* free mappings to registers, etc */
260
261 if (g->regs) {
262 iounmap(g->regs);
263 g->regs = NULL;
264 }
265 if (g->bar1) {
266 iounmap(g->bar1);
267 g->bar1 = NULL;
268 }
269}
270
271static int gk20a_init_support(struct platform_device *dev)
272{ 94{
273 int err = 0; 95 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
274 struct gk20a *g = get_gk20a(&dev->dev); 96 u32 val;
275
276#ifdef CONFIG_TEGRA_COMMON
277 tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle);
278#endif
279
280 g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
281 &g->reg_mem);
282 if (IS_ERR(g->regs)) {
283 nvgpu_err(g, "failed to remap gk20a registers\n");
284 err = PTR_ERR(g->regs);
285 goto fail;
286 }
287 97
288 g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, 98 if (gpu->arch)
289 &g->bar1_mem); 99 return 0;
290 if (IS_ERR(g->bar1)) {
291 nvgpu_err(g, "failed to remap gk20a bar1\n");
292 err = PTR_ERR(g->bar1);
293 goto fail;
294 }
295 100
296 if (nvgpu_platform_is_simulation(g)) { 101 val = gk20a_mc_boot_0(g, &gpu->arch, &gpu->impl, &gpu->rev);
297 err = gk20a_init_sim_support(dev);
298 if (err)
299 goto fail;
300 }
301 102
302 return 0; 103 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
104 g->gpu_characteristics.arch,
105 g->gpu_characteristics.impl,
106 g->gpu_characteristics.rev);
303 107
304 fail: 108 return gpu_init_hal(g);
305 return err;
306} 109}
307 110
308static int gk20a_pm_prepare_poweroff(struct device *dev) 111int gk20a_prepare_poweroff(struct gk20a *g)
309{ 112{
310 struct gk20a *g = get_gk20a(dev);
311 int ret = 0; 113 int ret = 0;
312 struct gk20a_platform *platform = gk20a_get_platform(dev);
313 114
314 gk20a_dbg_fn(""); 115 gk20a_dbg_fn("");
315 116
316 nvgpu_mutex_acquire(&g->poweroff_lock); 117 if (gk20a_fifo_is_engine_busy(g))
317
318 if (!g->power_on)
319 goto done;
320
321 if (gk20a_fifo_is_engine_busy(g)) {
322 nvgpu_mutex_release(&g->poweroff_lock);
323 return -EBUSY; 118 return -EBUSY;
324 }
325 gk20a_scale_suspend(dev);
326 119
327 /* cancel any pending cde work */ 120 /* cancel any pending cde work */
328 gk20a_cde_suspend(g); 121 gk20a_cde_suspend(g);
@@ -331,18 +124,11 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
331 124
332 ret = gk20a_channel_suspend(g); 125 ret = gk20a_channel_suspend(g);
333 if (ret) 126 if (ret)
334 goto done; 127 return ret;
335 128
336 /* disable elpg before gr or fifo suspend */ 129 /* disable elpg before gr or fifo suspend */
337 if (g->ops.pmu.is_pmu_supported(g)) 130 if (g->ops.pmu.is_pmu_supported(g))
338 ret |= gk20a_pmu_destroy(g); 131 ret |= gk20a_pmu_destroy(g);
339 /*
340 * After this point, gk20a interrupts should not get
341 * serviced.
342 */
343 disable_irq(g->irq_stall);
344 if (g->irq_stall != g->irq_nonstall)
345 disable_irq(g->irq_nonstall);
346 132
347 ret |= gk20a_gr_suspend(g); 133 ret |= gk20a_gr_suspend(g);
348 ret |= gk20a_mm_suspend(g); 134 ret |= gk20a_mm_suspend(g);
@@ -361,67 +147,19 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
361#endif 147#endif
362 g->power_on = false; 148 g->power_on = false;
363 149
364 /* Decrement platform power refcount */
365 if (platform->idle)
366 platform->idle(dev);
367
368 /* Stop CPU from accessing the GPU registers. */
369 gk20a_lockout_registers(g);
370
371done:
372 nvgpu_mutex_release(&g->poweroff_lock);
373
374 return ret; 150 return ret;
375} 151}
376 152
377static int gk20a_detect_chip(struct gk20a *g) 153int gk20a_finalize_poweron(struct gk20a *g)
378{ 154{
379 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; 155 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
380 u32 val; 156 int err;
381
382 if (gpu->arch)
383 return 0;
384
385 val = gk20a_mc_boot_0(g, &gpu->arch, &gpu->impl, &gpu->rev);
386
387 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
388 g->gpu_characteristics.arch,
389 g->gpu_characteristics.impl,
390 g->gpu_characteristics.rev);
391
392 return gpu_init_hal(g);
393}
394
395int gk20a_pm_finalize_poweron(struct device *dev)
396{
397 struct gk20a *g = get_gk20a(dev);
398 struct gk20a_platform *platform = gk20a_get_platform(dev);
399 int err, nice_value;
400 157
401 gk20a_dbg_fn(""); 158 gk20a_dbg_fn("");
402 159
403 if (g->power_on) 160 if (g->power_on)
404 return 0; 161 return 0;
405 162
406 trace_gk20a_finalize_poweron(g->name);
407
408 /* Increment platform power refcount */
409 if (platform->busy) {
410 err = platform->busy(dev);
411 if (err < 0) {
412 nvgpu_err(g, "%s: failed to poweron platform dependency\n",
413 __func__);
414 goto done;
415 }
416 }
417
418 err = gk20a_restore_registers(g);
419 if (err)
420 return err;
421
422 nice_value = task_nice(current);
423 set_user_nice(current, -20);
424
425 g->power_on = true; 163 g->power_on = true;
426 164
427 err = gk20a_detect_chip(g); 165 err = gk20a_detect_chip(g);
@@ -586,31 +324,18 @@ int gk20a_pm_finalize_poweron(struct device *dev)
586 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); 324 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
587 325
588 gk20a_channel_resume(g); 326 gk20a_channel_resume(g);
589 set_user_nice(current, nice_value);
590
591 gk20a_scale_resume(dev);
592
593 trace_gk20a_finalize_poweron_done(g->name);
594
595 if (platform->has_cde)
596 gk20a_init_cde_support(g);
597 327
598 gk20a_init_ce_support(g); 328 gk20a_init_ce_support(g);
599 329
600 gk20a_init_mm_ce_context(g); 330 gk20a_init_mm_ce_context(g);
601 331
602 enable_irq(g->irq_stall);
603 if (g->irq_stall != g->irq_nonstall)
604 enable_irq(g->irq_nonstall);
605 g->irqs_enabled = 1;
606
607 if (g->ops.xve.available_speeds) { 332 if (g->ops.xve.available_speeds) {
608 u32 speed; 333 u32 speed;
609 334
610 if (platform->disable_aspm && g->ops.xve.disable_aspm) 335 if (platform->disable_aspm && g->ops.xve.disable_aspm)
611 g->ops.xve.disable_aspm(g); 336 g->ops.xve.disable_aspm(g);
612 337
613 g->ops.xve.sw_init(dev); 338 g->ops.xve.sw_init(g->dev);
614 g->ops.xve.available_speeds(g, &speed); 339 g->ops.xve.available_speeds(g, &speed);
615 340
616 /* Set to max speed */ 341 /* Set to max speed */
@@ -629,515 +354,15 @@ done:
629 return err; 354 return err;
630} 355}
631 356
632static struct of_device_id tegra_gk20a_of_match[] = { 357/*
633#ifdef CONFIG_TEGRA_GK20A 358 * Check if the device can go busy. Basically if the driver is currently
634 { .compatible = "nvidia,tegra124-gk20a", 359 * in the process of dying then do not let new places make the driver busy.
635 .data = &gk20a_tegra_platform }, 360 */
636 { .compatible = "nvidia,tegra210-gm20b", 361int gk20a_can_busy(struct gk20a *g)
637 .data = &gm20b_tegra_platform },
638#ifdef CONFIG_ARCH_TEGRA_18x_SOC
639 { .compatible = "nvidia,tegra186-gp10b",
640 .data = &gp10b_tegra_platform },
641#endif
642#ifdef CONFIG_TEGRA_19x_GPU
643 { .compatible = TEGRA_19x_GPU_COMPAT_TEGRA,
644 .data = &t19x_gpu_tegra_platform },
645#endif
646#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
647 { .compatible = "nvidia,tegra124-gk20a-vgpu",
648 .data = &vgpu_tegra_platform },
649#endif
650#else
651 { .compatible = "nvidia,tegra124-gk20a",
652 .data = &gk20a_generic_platform },
653 { .compatible = "nvidia,tegra210-gm20b",
654 .data = &gk20a_generic_platform },
655#ifdef CONFIG_ARCH_TEGRA_18x_SOC
656 { .compatible = TEGRA_18x_GPU_COMPAT_TEGRA,
657 .data = &gk20a_generic_platform },
658#endif
659
660#endif
661 { .compatible = "nvidia,generic-gk20a",
662 .data = &gk20a_generic_platform },
663 { .compatible = "nvidia,generic-gm20b",
664 .data = &gk20a_generic_platform },
665#ifdef CONFIG_ARCH_TEGRA_18x_SOC
666 { .compatible = "nvidia,generic-gp10b",
667 .data = &gk20a_generic_platform },
668#endif
669 { },
670};
671
672static int gk20a_pm_railgate(struct device *dev)
673{
674 struct gk20a_platform *platform = dev_get_drvdata(dev);
675 int ret = 0;
676#ifdef CONFIG_DEBUG_FS
677 struct gk20a *g = get_gk20a(dev);
678
679 g->pstats.last_rail_gate_start = jiffies;
680
681 if (g->pstats.railgating_cycle_count >= 1)
682 g->pstats.total_rail_ungate_time_ms =
683 g->pstats.total_rail_ungate_time_ms +
684 jiffies_to_msecs(g->pstats.last_rail_gate_start -
685 g->pstats.last_rail_ungate_complete);
686#endif
687
688 if (platform->railgate)
689 ret = platform->railgate(dev);
690
691#ifdef CONFIG_DEBUG_FS
692 g->pstats.last_rail_gate_complete = jiffies;
693#endif
694
695 return ret;
696}
697
698static int gk20a_pm_unrailgate(struct device *dev)
699{
700 struct gk20a_platform *platform = dev_get_drvdata(dev);
701 int ret = 0;
702 struct gk20a *g = get_gk20a(dev);
703
704#ifdef CONFIG_DEBUG_FS
705 g->pstats.last_rail_ungate_start = jiffies;
706 if (g->pstats.railgating_cycle_count >= 1)
707 g->pstats.total_rail_gate_time_ms =
708 g->pstats.total_rail_gate_time_ms +
709 jiffies_to_msecs(g->pstats.last_rail_ungate_start -
710 g->pstats.last_rail_gate_complete);
711
712 g->pstats.railgating_cycle_count++;
713#endif
714
715 trace_gk20a_pm_unrailgate(g->name);
716
717 if (platform->unrailgate) {
718 nvgpu_mutex_acquire(&platform->railgate_lock);
719 ret = platform->unrailgate(dev);
720 nvgpu_mutex_release(&platform->railgate_lock);
721 }
722
723#ifdef CONFIG_DEBUG_FS
724 g->pstats.last_rail_ungate_complete = jiffies;
725#endif
726
727 return ret;
728}
729
730static void gk20a_pm_shutdown(struct platform_device *pdev)
731{
732 struct gk20a_platform *platform = platform_get_drvdata(pdev);
733 struct gk20a *g = platform->g;
734 int err;
735
736 nvgpu_info(g, "shutting down");
737
738 /* vgpu has nothing to clean up currently */
739 if (gk20a_gpu_is_virtual(&pdev->dev))
740 return;
741
742 gk20a_driver_start_unload(g);
743
744 /* If GPU is already railgated,
745 * just prevent more requests, and return */
746 if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
747 __pm_runtime_disable(&pdev->dev, false);
748 nvgpu_info(g, "already railgated, shut down complete");
749 return;
750 }
751
752 /* Prevent more requests by disabling Runtime PM */
753 __pm_runtime_disable(&pdev->dev, false);
754
755 err = gk20a_wait_for_idle(&pdev->dev);
756 if (err) {
757 nvgpu_err(g, "failed to idle GPU, err=%d", err);
758 goto finish;
759 }
760
761 err = gk20a_fifo_disable_all_engine_activity(g, true);
762 if (err) {
763 nvgpu_err(g, "failed to disable engine activity, err=%d",
764 err);
765 goto finish;
766 }
767
768 err = gk20a_fifo_wait_engine_idle(g);
769 if (err) {
770 nvgpu_err(g, "failed to idle engines, err=%d",
771 err);
772 goto finish;
773 }
774
775 if (gk20a_gpu_is_virtual(&pdev->dev))
776 err = vgpu_pm_prepare_poweroff(&pdev->dev);
777 else
778 err = gk20a_pm_prepare_poweroff(&pdev->dev);
779 if (err) {
780 nvgpu_err(g, "failed to prepare for poweroff, err=%d",
781 err);
782 goto finish;
783 }
784
785 err = gk20a_pm_railgate(&pdev->dev);
786 if (err)
787 nvgpu_err(g, "failed to railgate, err=%d", err);
788
789finish:
790 nvgpu_info(g, "shut down complete\n");
791}
792
793#ifdef CONFIG_PM
794static int gk20a_pm_runtime_resume(struct device *dev)
795{
796 int err = 0;
797
798 err = gk20a_pm_unrailgate(dev);
799 if (err)
800 goto fail;
801
802 err = gk20a_pm_finalize_poweron(dev);
803 if (err)
804 goto fail_poweron;
805
806 return 0;
807
808fail_poweron:
809 gk20a_pm_railgate(dev);
810fail:
811 return err;
812}
813
814static int gk20a_pm_runtime_suspend(struct device *dev)
815{
816 int err = 0;
817
818 err = gk20a_pm_prepare_poweroff(dev);
819 if (err)
820 goto fail;
821
822 err = gk20a_pm_railgate(dev);
823 if (err)
824 goto fail_railgate;
825
826 return 0;
827
828fail_railgate:
829 gk20a_pm_finalize_poweron(dev);
830fail:
831 pm_runtime_mark_last_busy(dev);
832 return err;
833}
834
835static int gk20a_pm_suspend(struct device *dev)
836{
837 struct gk20a_platform *platform = dev_get_drvdata(dev);
838 struct gk20a *g = get_gk20a(dev);
839 int ret = 0;
840
841 if (platform->user_railgate_disabled)
842 gk20a_idle_nosuspend(dev);
843
844 if (atomic_read(&dev->power.usage_count) > 1) {
845 ret = -EBUSY;
846 goto fail;
847 }
848
849 if (!g->power_on)
850 return 0;
851
852 ret = gk20a_pm_runtime_suspend(dev);
853 if (ret)
854 goto fail;
855
856 if (platform->suspend)
857 platform->suspend(dev);
858
859 g->suspended = true;
860
861 return 0;
862
863fail:
864 if (platform->user_railgate_disabled)
865 gk20a_busy_noresume(dev);
866
867 return ret;
868}
869
870static int gk20a_pm_resume(struct device *dev)
871{ 362{
872 struct gk20a *g = get_gk20a(dev); 363 if (g->driver_is_dying)
873 struct gk20a_platform *platform = dev_get_drvdata(dev);
874 int ret = 0;
875
876 if (platform->user_railgate_disabled)
877 gk20a_busy_noresume(dev);
878
879 if (!g->suspended)
880 return 0; 364 return 0;
881 365 return 1;
882 ret = gk20a_pm_runtime_resume(dev);
883
884 g->suspended = false;
885
886 return ret;
887}
888
889static const struct dev_pm_ops gk20a_pm_ops = {
890 .runtime_resume = gk20a_pm_runtime_resume,
891 .runtime_suspend = gk20a_pm_runtime_suspend,
892 .resume = gk20a_pm_resume,
893 .suspend = gk20a_pm_suspend,
894};
895#endif
896
897int gk20a_pm_init(struct device *dev)
898{
899 struct gk20a_platform *platform = dev_get_drvdata(dev);
900 int err = 0;
901
902 gk20a_dbg_fn("");
903
904 /* Initialise pm runtime */
905 if (platform->railgate_delay) {
906 pm_runtime_set_autosuspend_delay(dev,
907 platform->railgate_delay);
908 pm_runtime_use_autosuspend(dev);
909 }
910
911 if (platform->can_railgate) {
912 pm_runtime_enable(dev);
913 if (!pm_runtime_enabled(dev))
914 gk20a_pm_unrailgate(dev);
915 else
916 gk20a_pm_railgate(dev);
917 } else {
918 __pm_runtime_disable(dev, false);
919 gk20a_pm_unrailgate(dev);
920 }
921
922 return err;
923}
924
925int gk20a_secure_page_alloc(struct device *dev)
926{
927 struct gk20a_platform *platform = dev_get_drvdata(dev);
928 int err = 0;
929
930 if (platform->secure_page_alloc) {
931 err = platform->secure_page_alloc(dev);
932 if (!err)
933 platform->secure_alloc_ready = true;
934 }
935
936 return err;
937}
938
939static int gk20a_probe(struct platform_device *dev)
940{
941 struct gk20a *gk20a;
942 int err;
943 struct gk20a_platform *platform = NULL;
944
945 if (dev->dev.of_node) {
946 const struct of_device_id *match;
947
948 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
949 if (match)
950 platform = (struct gk20a_platform *)match->data;
951 } else
952 platform = (struct gk20a_platform *)dev->dev.platform_data;
953
954 if (!platform) {
955 dev_err(&dev->dev, "no platform data\n");
956 return -ENODATA;
957 }
958
959 gk20a_dbg_fn("");
960
961 platform_set_drvdata(dev, platform);
962
963 if (gk20a_gpu_is_virtual(&dev->dev))
964 return vgpu_probe(dev);
965
966 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
967 if (!gk20a) {
968 dev_err(&dev->dev, "couldn't allocate gk20a support");
969 return -ENOMEM;
970 }
971
972 set_gk20a(dev, gk20a);
973 gk20a->dev = &dev->dev;
974
975 if (nvgpu_platform_is_simulation(gk20a))
976 platform->is_fmodel = true;
977
978 nvgpu_kmem_init(gk20a);
979
980 gk20a->irq_stall = platform_get_irq(dev, 0);
981 gk20a->irq_nonstall = platform_get_irq(dev, 1);
982 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
983 return -ENXIO;
984
985 err = devm_request_threaded_irq(&dev->dev,
986 gk20a->irq_stall,
987 gk20a_intr_isr_stall,
988 gk20a_intr_thread_stall,
989 0, "gk20a_stall", gk20a);
990 if (err) {
991 dev_err(&dev->dev,
992 "failed to request stall intr irq @ %d\n",
993 gk20a->irq_stall);
994 return err;
995 }
996 err = devm_request_irq(&dev->dev,
997 gk20a->irq_nonstall,
998 gk20a_intr_isr_nonstall,
999 0, "gk20a_nonstall", gk20a);
1000 if (err) {
1001 dev_err(&dev->dev,
1002 "failed to request non-stall intr irq @ %d\n",
1003 gk20a->irq_nonstall);
1004 return err;
1005 }
1006 disable_irq(gk20a->irq_stall);
1007 if (gk20a->irq_stall != gk20a->irq_nonstall)
1008 disable_irq(gk20a->irq_nonstall);
1009
1010 /*
1011 * is_fmodel needs to be in gk20a struct for deferred teardown
1012 */
1013 gk20a->is_fmodel = platform->is_fmodel;
1014
1015 err = gk20a_init_support(dev);
1016 if (err)
1017 return err;
1018
1019#ifdef CONFIG_RESET_CONTROLLER
1020 platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
1021 if (IS_ERR(platform->reset_control))
1022 platform->reset_control = NULL;
1023#endif
1024
1025 err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
1026 if (err)
1027 return err;
1028
1029 err = gk20a_pm_init(&dev->dev);
1030 if (err) {
1031 dev_err(&dev->dev, "pm init failed");
1032 return err;
1033 }
1034
1035 gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
1036
1037 return 0;
1038}
1039
1040static int __exit gk20a_remove(struct platform_device *pdev)
1041{
1042 struct device *dev = &pdev->dev;
1043 struct gk20a *g = get_gk20a(dev);
1044 struct gk20a_platform *platform = gk20a_get_platform(dev);
1045
1046 gk20a_dbg_fn("");
1047
1048 if (gk20a_gpu_is_virtual(dev))
1049 return vgpu_remove(pdev);
1050
1051 if (platform->has_cde)
1052 gk20a_cde_destroy(g);
1053
1054 gk20a_ctxsw_trace_cleanup(g);
1055
1056 gk20a_sched_ctrl_cleanup(g);
1057
1058 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1059 gk20a_scale_exit(dev);
1060
1061#ifdef CONFIG_ARCH_TEGRA_18x_SOC
1062 nvgpu_clk_arb_cleanup_arbiter(g);
1063#endif
1064
1065 gk20a_user_deinit(dev, &nvgpu_class);
1066
1067 debugfs_remove_recursive(platform->debugfs);
1068 debugfs_remove_recursive(platform->debugfs_alias);
1069
1070 gk20a_remove_sysfs(dev);
1071
1072 if (platform->secure_buffer.destroy)
1073 platform->secure_buffer.destroy(dev,
1074 &platform->secure_buffer);
1075
1076 if (pm_runtime_enabled(dev))
1077 pm_runtime_disable(dev);
1078
1079 if (platform->remove)
1080 platform->remove(dev);
1081
1082 set_gk20a(pdev, NULL);
1083 gk20a_put(g);
1084
1085 gk20a_dbg_fn("removed");
1086
1087 return 0;
1088}
1089
1090static struct platform_driver gk20a_driver = {
1091 .probe = gk20a_probe,
1092 .remove = __exit_p(gk20a_remove),
1093 .shutdown = gk20a_pm_shutdown,
1094 .driver = {
1095 .owner = THIS_MODULE,
1096 .name = "gk20a",
1097#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
1098 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
1099#endif
1100#ifdef CONFIG_OF
1101 .of_match_table = tegra_gk20a_of_match,
1102#endif
1103#ifdef CONFIG_PM
1104 .pm = &gk20a_pm_ops,
1105#endif
1106 .suppress_bind_attrs = true,
1107 }
1108};
1109
1110struct class nvgpu_class = {
1111 .owner = THIS_MODULE,
1112 .name = CLASS_NAME,
1113};
1114
1115static int __init gk20a_init(void)
1116{
1117
1118 int ret;
1119
1120 ret = class_register(&nvgpu_class);
1121 if (ret)
1122 return ret;
1123
1124 ret = nvgpu_pci_init();
1125 if (ret)
1126 return ret;
1127
1128 return platform_driver_register(&gk20a_driver);
1129}
1130
1131static void __exit gk20a_exit(void)
1132{
1133 nvgpu_pci_exit();
1134 platform_driver_unregister(&gk20a_driver);
1135 class_unregister(&nvgpu_class);
1136}
1137
1138void gk20a_busy_noresume(struct device *dev)
1139{
1140 pm_runtime_get_noresume(dev);
1141} 366}
1142 367
1143/* 368/*
@@ -1193,292 +418,6 @@ int gk20a_wait_for_idle(struct device *dev)
1193 return 0; 418 return 0;
1194} 419}
1195 420
1196/*
1197 * Check if the device can go busy. Basically if the driver is currently
1198 * in the process of dying then do not let new places make the driver busy.
1199 */
1200static int gk20a_can_busy(struct gk20a *g)
1201{
1202 if (g->driver_is_dying)
1203 return 0;
1204 return 1;
1205}
1206
1207int gk20a_busy(struct gk20a *g)
1208{
1209 int ret = 0;
1210 struct device *dev;
1211
1212 if (!g)
1213 return -ENODEV;
1214
1215 atomic_inc(&g->usage_count);
1216
1217 down_read(&g->busy_lock);
1218
1219 if (!gk20a_can_busy(g)) {
1220 ret = -ENODEV;
1221 atomic_dec(&g->usage_count);
1222 goto fail;
1223 }
1224
1225 dev = g->dev;
1226
1227 if (pm_runtime_enabled(dev)) {
1228 ret = pm_runtime_get_sync(dev);
1229 if (ret < 0) {
1230 pm_runtime_put_noidle(dev);
1231 atomic_dec(&g->usage_count);
1232 goto fail;
1233 }
1234 } else {
1235 if (!g->power_on) {
1236 ret = gk20a_gpu_is_virtual(dev) ?
1237 vgpu_pm_finalize_poweron(dev)
1238 : gk20a_pm_finalize_poweron(dev);
1239 if (ret) {
1240 atomic_dec(&g->usage_count);
1241 goto fail;
1242 }
1243 }
1244 }
1245
1246 gk20a_scale_notify_busy(dev);
1247
1248fail:
1249 up_read(&g->busy_lock);
1250
1251 return ret < 0 ? ret : 0;
1252}
1253
1254void gk20a_idle_nosuspend(struct device *dev)
1255{
1256 pm_runtime_put_noidle(dev);
1257}
1258
1259void gk20a_idle(struct gk20a *g)
1260{
1261 struct device *dev;
1262
1263 atomic_dec(&g->usage_count);
1264 down_read(&g->busy_lock);
1265
1266 dev = g->dev;
1267
1268 if (!(dev && gk20a_can_busy(g)))
1269 goto fail;
1270
1271 if (pm_runtime_enabled(dev)) {
1272#ifdef CONFIG_PM
1273 if (atomic_read(&g->dev->power.usage_count) == 1)
1274 gk20a_scale_notify_idle(dev);
1275#endif
1276
1277 pm_runtime_mark_last_busy(dev);
1278 pm_runtime_put_sync_autosuspend(dev);
1279
1280 } else {
1281 gk20a_scale_notify_idle(dev);
1282 }
1283fail:
1284 up_read(&g->busy_lock);
1285}
1286
1287#ifdef CONFIG_PM
1288/**
1289 * __gk20a_do_idle() - force the GPU to idle and railgate
1290 *
1291 * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
1292 *
1293 * Acquires two locks : &g->busy_lock and &platform->railgate_lock
1294 * In success, we hold these locks and return
1295 * In failure, we release these locks and return
1296 */
1297int __gk20a_do_idle(struct device *dev, bool force_reset)
1298{
1299 struct gk20a *g = get_gk20a(dev);
1300 struct gk20a_platform *platform = dev_get_drvdata(dev);
1301 struct nvgpu_timeout timeout;
1302 int ref_cnt;
1303 int target_ref_cnt = 0;
1304 bool is_railgated;
1305 int err = 0;
1306
1307 /* acquire busy lock to block other busy() calls */
1308 down_write(&g->busy_lock);
1309
1310 /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
1311 nvgpu_mutex_acquire(&platform->railgate_lock);
1312
1313 /* check if it is already railgated ? */
1314 if (platform->is_railgated(dev))
1315 return 0;
1316
1317 /*
1318 * release railgate_lock, prevent suspend by incrementing usage counter,
1319 * re-acquire railgate_lock
1320 */
1321 nvgpu_mutex_release(&platform->railgate_lock);
1322 pm_runtime_get_sync(dev);
1323
1324 /*
1325 * One refcount taken in this API
1326 * If User disables rail gating, we take one more
1327 * extra refcount
1328 */
1329 if (platform->user_railgate_disabled)
1330 target_ref_cnt = 2;
1331 else
1332 target_ref_cnt = 1;
1333 nvgpu_mutex_acquire(&platform->railgate_lock);
1334
1335 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
1336 NVGPU_TIMER_CPU_TIMER);
1337
1338 /* check and wait until GPU is idle (with a timeout) */
1339 do {
1340 nvgpu_msleep(1);
1341 ref_cnt = atomic_read(&dev->power.usage_count);
1342 } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
1343
1344 if (ref_cnt != target_ref_cnt) {
1345 nvgpu_err(g, "failed to idle - refcount %d != 1\n",
1346 ref_cnt);
1347 goto fail_drop_usage_count;
1348 }
1349
1350 /* check if global force_reset flag is set */
1351 force_reset |= platform->force_reset_in_do_idle;
1352
1353 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
1354 NVGPU_TIMER_CPU_TIMER);
1355
1356 if (platform->can_railgate && !force_reset) {
1357 /*
1358 * Case 1 : GPU railgate is supported
1359 *
1360 * if GPU is now idle, we will have only one ref count,
1361 * drop this ref which will rail gate the GPU
1362 */
1363 pm_runtime_put_sync(dev);
1364
1365 /* add sufficient delay to allow GPU to rail gate */
1366 nvgpu_msleep(platform->railgate_delay);
1367
1368 /* check in loop if GPU is railgated or not */
1369 do {
1370 nvgpu_msleep(1);
1371 is_railgated = platform->is_railgated(dev);
1372 } while (!is_railgated && !nvgpu_timeout_expired(&timeout));
1373
1374 if (is_railgated) {
1375 return 0;
1376 } else {
1377 nvgpu_err(g, "failed to idle in timeout\n");
1378 goto fail_timeout;
1379 }
1380 } else {
1381 /*
1382 * Case 2 : GPU railgate is not supported or we explicitly
1383 * do not want to depend on runtime PM
1384 *
1385 * if GPU is now idle, call prepare_poweroff() to save the
1386 * state and then do explicit railgate
1387 *
1388 * __gk20a_do_unidle() needs to unrailgate, call
1389 * finalize_poweron(), and then call pm_runtime_put_sync()
1390 * to balance the GPU usage counter
1391 */
1392
1393 /* Save the GPU state */
1394 err = gk20a_pm_prepare_poweroff(dev);
1395 if (err)
1396 goto fail_drop_usage_count;
1397
1398 /* railgate GPU */
1399 platform->railgate(dev);
1400
1401 nvgpu_udelay(10);
1402
1403 g->forced_reset = true;
1404 return 0;
1405 }
1406
1407fail_drop_usage_count:
1408 pm_runtime_put_noidle(dev);
1409fail_timeout:
1410 nvgpu_mutex_release(&platform->railgate_lock);
1411 up_write(&g->busy_lock);
1412 return -EBUSY;
1413}
1414
1415/**
1416 * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
1417 * from outside of GPU driver
1418 *
1419 * In success, this call MUST be balanced by caller with gk20a_do_unidle()
1420 */
1421int gk20a_do_idle(void)
1422{
1423 struct device_node *node =
1424 of_find_matching_node(NULL, tegra_gk20a_of_match);
1425 struct platform_device *pdev = of_find_device_by_node(node);
1426
1427 int ret = __gk20a_do_idle(&pdev->dev, true);
1428
1429 of_node_put(node);
1430
1431 return ret;
1432}
1433
1434/**
1435 * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
1436 */
1437int __gk20a_do_unidle(struct device *dev)
1438{
1439 struct gk20a *g = get_gk20a(dev);
1440 struct gk20a_platform *platform = dev_get_drvdata(dev);
1441
1442 if (g->forced_reset) {
1443 /*
1444 * If we did a forced-reset/railgate
1445 * then unrailgate the GPU here first
1446 */
1447 platform->unrailgate(dev);
1448
1449 /* restore the GPU state */
1450 gk20a_pm_finalize_poweron(dev);
1451
1452 /* balance GPU usage counter */
1453 pm_runtime_put_sync(dev);
1454
1455 g->forced_reset = false;
1456 }
1457
1458 /* release the lock and open up all other busy() calls */
1459 nvgpu_mutex_release(&platform->railgate_lock);
1460 up_write(&g->busy_lock);
1461
1462 return 0;
1463}
1464
1465/**
1466 * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
1467 */
1468int gk20a_do_unidle(void)
1469{
1470 struct device_node *node =
1471 of_find_matching_node(NULL, tegra_gk20a_of_match);
1472 struct platform_device *pdev = of_find_device_by_node(node);
1473
1474 int ret = __gk20a_do_unidle(&pdev->dev);
1475
1476 of_node_put(node);
1477
1478 return ret;
1479}
1480#endif
1481
1482int gk20a_init_gpu_characteristics(struct gk20a *g) 421int gk20a_init_gpu_characteristics(struct gk20a *g)
1483{ 422{
1484 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; 423 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
@@ -1651,7 +590,3 @@ void gk20a_put(struct gk20a *g)
1651 590
1652 kref_put(&g->refcount, gk20a_free_cb); 591 kref_put(&g->refcount, gk20a_free_cb);
1653} 592}
1654
1655MODULE_LICENSE("GPL v2");
1656module_init(gk20a_init);
1657module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 60d04b64..9860910c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1389,6 +1389,7 @@ int gk20a_do_unidle(void);
1389int __gk20a_do_idle(struct device *dev, bool force_reset); 1389int __gk20a_do_idle(struct device *dev, bool force_reset);
1390int __gk20a_do_unidle(struct device *dev); 1390int __gk20a_do_unidle(struct device *dev);
1391 1391
1392int gk20a_can_busy(struct gk20a *g);
1392void gk20a_driver_start_unload(struct gk20a *g); 1393void gk20a_driver_start_unload(struct gk20a *g);
1393int gk20a_wait_for_idle(struct device *dev); 1394int gk20a_wait_for_idle(struct device *dev);
1394 1395
@@ -1431,9 +1432,8 @@ extern struct class nvgpu_class;
1431 1432
1432#define INTERFACE_NAME "nvhost%s-gpu" 1433#define INTERFACE_NAME "nvhost%s-gpu"
1433 1434
1434int gk20a_pm_init(struct device *dev); 1435int gk20a_prepare_poweroff(struct gk20a *g);
1435int gk20a_pm_finalize_poweron(struct device *dev); 1436int gk20a_finalize_poweron(struct gk20a *g);
1436void gk20a_remove_support(struct gk20a *g);
1437 1437
1438static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch) 1438static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch)
1439{ 1439{
@@ -1476,8 +1476,4 @@ void gk20a_put(struct gk20a *g);
1476int gk20a_railgating_debugfs_init(struct device *dev); 1476int gk20a_railgating_debugfs_init(struct device *dev);
1477#endif 1477#endif
1478 1478
1479int gk20a_secure_page_alloc(struct device *dev);
1480void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
1481 struct resource **out);
1482
1483#endif /* GK20A_H */ 1479#endif /* GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/sim_gk20a.c b/drivers/gpu/nvgpu/gk20a/sim_gk20a.c
index 8951d5a4..5c11c118 100644
--- a/drivers/gpu/nvgpu/gk20a/sim_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sim_gk20a.c
@@ -103,15 +103,6 @@ int gk20a_init_sim_support(struct platform_device *pdev)
103 struct gk20a *g = get_gk20a(dev); 103 struct gk20a *g = get_gk20a(dev);
104 u64 phys; 104 u64 phys;
105 105
106 g->sim.g = g;
107 g->sim.regs = gk20a_ioremap_resource(pdev, GK20A_SIM_IORESOURCE_MEM,
108 &g->sim.reg_mem);
109 if (IS_ERR(g->sim.regs)) {
110 nvgpu_err(g, "failed to remap gk20a sim regs\n");
111 err = PTR_ERR(g->sim.regs);
112 goto fail;
113 }
114
115 /* allocate sim event/msg buffers */ 106 /* allocate sim event/msg buffers */
116 err = alloc_and_kmap_iopage(g, &g->sim.send_bfr.kvaddr, 107 err = alloc_and_kmap_iopage(g, &g->sim.send_bfr.kvaddr,
117 &g->sim.send_bfr.phys, 108 &g->sim.send_bfr.phys,