summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1119
1 files changed, 27 insertions, 1092 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 2a80157d..b3f4e5fe 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -16,25 +16,7 @@
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */ 17 */
18 18
19#include <linux/module.h>
20#include <linux/string.h>
21#include <linux/interrupt.h>
22#include <linux/irq.h>
23#include <linux/export.h>
24#include <linux/of.h>
25#include <linux/of_device.h>
26#include <linux/of_platform.h>
27#include <linux/pm_runtime.h>
28#include <linux/thermal.h>
29#include <asm/cacheflush.h>
30#include <linux/debugfs.h>
31#include <linux/clk/tegra.h>
32#include <linux/kthread.h>
33#include <linux/platform/tegra/common.h>
34#include <linux/reset.h>
35#include <linux/reboot.h> 19#include <linux/reboot.h>
36#include <linux/sched.h>
37#include <linux/version.h>
38 20
39#include <nvgpu/nvgpu_common.h> 21#include <nvgpu/nvgpu_common.h>
40#include <nvgpu/kmem.h> 22#include <nvgpu/kmem.h>
@@ -42,26 +24,22 @@
42#include <nvgpu/timers.h> 24#include <nvgpu/timers.h>
43#include <nvgpu/soc.h> 25#include <nvgpu/soc.h>
44 26
27#include <trace/events/gk20a.h>
28
45#include "gk20a.h" 29#include "gk20a.h"
46#include "debug_gk20a.h"
47#include "channel_sync_gk20a.h" 30#include "channel_sync_gk20a.h"
48 31
49#include "gk20a_scale.h"
50#include "ctxsw_trace_gk20a.h" 32#include "ctxsw_trace_gk20a.h"
51#include "dbg_gpu_gk20a.h" 33#include "dbg_gpu_gk20a.h"
52#include "mc_gk20a.h" 34#include "mc_gk20a.h"
53#include "hal.h" 35#include "hal.h"
54#include "vgpu/vgpu.h" 36#include "vgpu/vgpu.h"
55#include "pci.h"
56#include "bus_gk20a.h" 37#include "bus_gk20a.h"
57#ifdef CONFIG_ARCH_TEGRA_18x_SOC 38#ifdef CONFIG_ARCH_TEGRA_18x_SOC
58#include "pstate/pstate.h" 39#include "pstate/pstate.h"
59#endif 40#endif
60 41
61 42
62#define CREATE_TRACE_POINTS
63#include <trace/events/gk20a.h>
64
65#ifdef CONFIG_TEGRA_19x_GPU 43#ifdef CONFIG_TEGRA_19x_GPU
66#include "nvgpu_gpuid_t19x.h" 44#include "nvgpu_gpuid_t19x.h"
67#endif 45#endif
@@ -70,93 +48,6 @@
70#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> 48#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
71#include <nvgpu/hw/gk20a/hw_fb_gk20a.h> 49#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
72 50
73
74#ifdef CONFIG_ARM64
75#define __cpuc_flush_dcache_area __flush_dcache_area
76#endif
77
78#define CLASS_NAME "nvidia-gpu"
79/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
80
81#define GK20A_NUM_CDEVS 7
82
83#define GK20A_WAIT_FOR_IDLE_MS 2000
84
85static int gk20a_pm_prepare_poweroff(struct device *dev);
86
87#ifdef CONFIG_DEBUG_FS
88static int railgate_residency_show(struct seq_file *s, void *data)
89{
90 struct device *dev = s->private;
91 struct gk20a_platform *platform = dev_get_drvdata(dev);
92 struct gk20a *g = get_gk20a(dev);
93 unsigned long time_since_last_state_transition_ms;
94 unsigned long total_rail_gate_time_ms;
95 unsigned long total_rail_ungate_time_ms;
96
97 if (platform->is_railgated(dev)) {
98 time_since_last_state_transition_ms =
99 jiffies_to_msecs(jiffies -
100 g->pstats.last_rail_gate_complete);
101 total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
102 total_rail_gate_time_ms =
103 g->pstats.total_rail_gate_time_ms +
104 time_since_last_state_transition_ms;
105 } else {
106 time_since_last_state_transition_ms =
107 jiffies_to_msecs(jiffies -
108 g->pstats.last_rail_ungate_complete);
109 total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
110 total_rail_ungate_time_ms =
111 g->pstats.total_rail_ungate_time_ms +
112 time_since_last_state_transition_ms;
113 }
114
115 seq_printf(s, "Time with Rails Gated: %lu ms\n"
116 "Time with Rails UnGated: %lu ms\n"
117 "Total railgating cycles: %lu\n",
118 total_rail_gate_time_ms,
119 total_rail_ungate_time_ms,
120 g->pstats.railgating_cycle_count - 1);
121 return 0;
122
123}
124
125static int railgate_residency_open(struct inode *inode, struct file *file)
126{
127 return single_open(file, railgate_residency_show, inode->i_private);
128}
129
130static const struct file_operations railgate_residency_fops = {
131 .open = railgate_residency_open,
132 .read = seq_read,
133 .llseek = seq_lseek,
134 .release = single_release,
135};
136
137int gk20a_railgating_debugfs_init(struct device *dev)
138{
139 struct dentry *d;
140 struct gk20a_platform *platform = dev_get_drvdata(dev);
141
142 if (!platform->can_railgate)
143 return 0;
144
145 d = debugfs_create_file(
146 "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev,
147 &railgate_residency_fops);
148 if (!d)
149 return -ENOMEM;
150
151 return 0;
152}
153#endif
154
155static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
156{
157 gk20a_get_platform(&pdev->dev)->g = gk20a;
158}
159
160void __nvgpu_check_gpu_state(struct gk20a *g) 51void __nvgpu_check_gpu_state(struct gk20a *g)
161{ 52{
162 u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); 53 u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL);
@@ -199,130 +90,32 @@ void __gk20a_warn_on_no_regs(void)
199 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!"); 90 WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
200} 91}
201 92
202void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i, 93static int gk20a_detect_chip(struct gk20a *g)
203 struct resource **out)
204{
205 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
206 if (!r)
207 return NULL;
208 if (out)
209 *out = r;
210 return devm_ioremap_resource(&dev->dev, r);
211}
212
213static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
214{
215 struct gk20a *g = dev_id;
216
217 return g->ops.mc.isr_stall(g);
218}
219
220static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
221{
222 struct gk20a *g = dev_id;
223
224 return g->ops.mc.isr_nonstall(g);
225}
226
227static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
228{
229 struct gk20a *g = dev_id;
230 return g->ops.mc.isr_thread_stall(g);
231}
232
233void gk20a_remove_support(struct gk20a *g)
234{
235#ifdef CONFIG_TEGRA_COMMON
236 tegra_unregister_idle_unidle();
237#endif
238 if (g->dbg_regops_tmp_buf)
239 nvgpu_kfree(g, g->dbg_regops_tmp_buf);
240
241 if (g->pmu.remove_support)
242 g->pmu.remove_support(&g->pmu);
243
244 if (g->gr.remove_support)
245 g->gr.remove_support(&g->gr);
246
247 if (g->mm.remove_ce_support)
248 g->mm.remove_ce_support(&g->mm);
249
250 if (g->fifo.remove_support)
251 g->fifo.remove_support(&g->fifo);
252
253 if (g->mm.remove_support)
254 g->mm.remove_support(&g->mm);
255
256 if (g->sim.remove_support)
257 g->sim.remove_support(&g->sim);
258
259 /* free mappings to registers, etc */
260
261 if (g->regs) {
262 iounmap(g->regs);
263 g->regs = NULL;
264 }
265 if (g->bar1) {
266 iounmap(g->bar1);
267 g->bar1 = NULL;
268 }
269}
270
271static int gk20a_init_support(struct platform_device *dev)
272{ 94{
273 int err = 0; 95 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
274 struct gk20a *g = get_gk20a(&dev->dev); 96 u32 val;
275
276#ifdef CONFIG_TEGRA_COMMON
277 tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle);
278#endif
279
280 g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
281 &g->reg_mem);
282 if (IS_ERR(g->regs)) {
283 nvgpu_err(g, "failed to remap gk20a registers\n");
284 err = PTR_ERR(g->regs);
285 goto fail;
286 }
287 97
288 g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, 98 if (gpu->arch)
289 &g->bar1_mem); 99 return 0;
290 if (IS_ERR(g->bar1)) {
291 nvgpu_err(g, "failed to remap gk20a bar1\n");
292 err = PTR_ERR(g->bar1);
293 goto fail;
294 }
295 100
296 if (nvgpu_platform_is_simulation(g)) { 101 val = gk20a_mc_boot_0(g, &gpu->arch, &gpu->impl, &gpu->rev);
297 err = gk20a_init_sim_support(dev);
298 if (err)
299 goto fail;
300 }
301 102
302 return 0; 103 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
104 g->gpu_characteristics.arch,
105 g->gpu_characteristics.impl,
106 g->gpu_characteristics.rev);
303 107
304 fail: 108 return gpu_init_hal(g);
305 return err;
306} 109}
307 110
308static int gk20a_pm_prepare_poweroff(struct device *dev) 111int gk20a_prepare_poweroff(struct gk20a *g)
309{ 112{
310 struct gk20a *g = get_gk20a(dev);
311 int ret = 0; 113 int ret = 0;
312 struct gk20a_platform *platform = gk20a_get_platform(dev);
313 114
314 gk20a_dbg_fn(""); 115 gk20a_dbg_fn("");
315 116
316 nvgpu_mutex_acquire(&g->poweroff_lock); 117 if (gk20a_fifo_is_engine_busy(g))
317
318 if (!g->power_on)
319 goto done;
320
321 if (gk20a_fifo_is_engine_busy(g)) {
322 nvgpu_mutex_release(&g->poweroff_lock);
323 return -EBUSY; 118 return -EBUSY;
324 }
325 gk20a_scale_suspend(dev);
326 119
327 /* cancel any pending cde work */ 120 /* cancel any pending cde work */
328 gk20a_cde_suspend(g); 121 gk20a_cde_suspend(g);
@@ -331,18 +124,11 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
331 124
332 ret = gk20a_channel_suspend(g); 125 ret = gk20a_channel_suspend(g);
333 if (ret) 126 if (ret)
334 goto done; 127 return ret;
335 128
336 /* disable elpg before gr or fifo suspend */ 129 /* disable elpg before gr or fifo suspend */
337 if (g->ops.pmu.is_pmu_supported(g)) 130 if (g->ops.pmu.is_pmu_supported(g))
338 ret |= gk20a_pmu_destroy(g); 131 ret |= gk20a_pmu_destroy(g);
339 /*
340 * After this point, gk20a interrupts should not get
341 * serviced.
342 */
343 disable_irq(g->irq_stall);
344 if (g->irq_stall != g->irq_nonstall)
345 disable_irq(g->irq_nonstall);
346 132
347 ret |= gk20a_gr_suspend(g); 133 ret |= gk20a_gr_suspend(g);
348 ret |= gk20a_mm_suspend(g); 134 ret |= gk20a_mm_suspend(g);
@@ -361,67 +147,19 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
361#endif 147#endif
362 g->power_on = false; 148 g->power_on = false;
363 149
364 /* Decrement platform power refcount */
365 if (platform->idle)
366 platform->idle(dev);
367
368 /* Stop CPU from accessing the GPU registers. */
369 gk20a_lockout_registers(g);
370
371done:
372 nvgpu_mutex_release(&g->poweroff_lock);
373
374 return ret; 150 return ret;
375} 151}
376 152
377static int gk20a_detect_chip(struct gk20a *g) 153int gk20a_finalize_poweron(struct gk20a *g)
378{ 154{
379 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; 155 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
380 u32 val; 156 int err;
381
382 if (gpu->arch)
383 return 0;
384
385 val = gk20a_mc_boot_0(g, &gpu->arch, &gpu->impl, &gpu->rev);
386
387 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
388 g->gpu_characteristics.arch,
389 g->gpu_characteristics.impl,
390 g->gpu_characteristics.rev);
391
392 return gpu_init_hal(g);
393}
394
395int gk20a_pm_finalize_poweron(struct device *dev)
396{
397 struct gk20a *g = get_gk20a(dev);
398 struct gk20a_platform *platform = gk20a_get_platform(dev);
399 int err, nice_value;
400 157
401 gk20a_dbg_fn(""); 158 gk20a_dbg_fn("");
402 159
403 if (g->power_on) 160 if (g->power_on)
404 return 0; 161 return 0;
405 162
406 trace_gk20a_finalize_poweron(g->name);
407
408 /* Increment platform power refcount */
409 if (platform->busy) {
410 err = platform->busy(dev);
411 if (err < 0) {
412 nvgpu_err(g, "%s: failed to poweron platform dependency\n",
413 __func__);
414 goto done;
415 }
416 }
417
418 err = gk20a_restore_registers(g);
419 if (err)
420 return err;
421
422 nice_value = task_nice(current);
423 set_user_nice(current, -20);
424
425 g->power_on = true; 163 g->power_on = true;
426 164
427 err = gk20a_detect_chip(g); 165 err = gk20a_detect_chip(g);
@@ -586,31 +324,18 @@ int gk20a_pm_finalize_poweron(struct device *dev)
586 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); 324 g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
587 325
588 gk20a_channel_resume(g); 326 gk20a_channel_resume(g);
589 set_user_nice(current, nice_value);
590
591 gk20a_scale_resume(dev);
592
593 trace_gk20a_finalize_poweron_done(g->name);
594
595 if (platform->has_cde)
596 gk20a_init_cde_support(g);
597 327
598 gk20a_init_ce_support(g); 328 gk20a_init_ce_support(g);
599 329
600 gk20a_init_mm_ce_context(g); 330 gk20a_init_mm_ce_context(g);
601 331
602 enable_irq(g->irq_stall);
603 if (g->irq_stall != g->irq_nonstall)
604 enable_irq(g->irq_nonstall);
605 g->irqs_enabled = 1;
606
607 if (g->ops.xve.available_speeds) { 332 if (g->ops.xve.available_speeds) {
608 u32 speed; 333 u32 speed;
609 334
610 if (platform->disable_aspm && g->ops.xve.disable_aspm) 335 if (platform->disable_aspm && g->ops.xve.disable_aspm)
611 g->ops.xve.disable_aspm(g); 336 g->ops.xve.disable_aspm(g);
612 337
613 g->ops.xve.sw_init(dev); 338 g->ops.xve.sw_init(g->dev);
614 g->ops.xve.available_speeds(g, &speed); 339 g->ops.xve.available_speeds(g, &speed);
615 340
616 /* Set to max speed */ 341 /* Set to max speed */
@@ -629,515 +354,15 @@ done:
629 return err; 354 return err;
630} 355}
631 356
632static struct of_device_id tegra_gk20a_of_match[] = { 357/*
633#ifdef CONFIG_TEGRA_GK20A 358 * Check if the device can go busy. Basically if the driver is currently
634 { .compatible = "nvidia,tegra124-gk20a", 359 * in the process of dying then do not let new places make the driver busy.
635 .data = &gk20a_tegra_platform }, 360 */
636 { .compatible = "nvidia,tegra210-gm20b", 361int gk20a_can_busy(struct gk20a *g)
637 .data = &gm20b_tegra_platform },
638#ifdef CONFIG_ARCH_TEGRA_18x_SOC
639 { .compatible = "nvidia,tegra186-gp10b",
640 .data = &gp10b_tegra_platform },
641#endif
642#ifdef CONFIG_TEGRA_19x_GPU
643 { .compatible = TEGRA_19x_GPU_COMPAT_TEGRA,
644 .data = &t19x_gpu_tegra_platform },
645#endif
646#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
647 { .compatible = "nvidia,tegra124-gk20a-vgpu",
648 .data = &vgpu_tegra_platform },
649#endif
650#else
651 { .compatible = "nvidia,tegra124-gk20a",
652 .data = &gk20a_generic_platform },
653 { .compatible = "nvidia,tegra210-gm20b",
654 .data = &gk20a_generic_platform },
655#ifdef CONFIG_ARCH_TEGRA_18x_SOC
656 { .compatible = TEGRA_18x_GPU_COMPAT_TEGRA,
657 .data = &gk20a_generic_platform },
658#endif
659
660#endif
661 { .compatible = "nvidia,generic-gk20a",
662 .data = &gk20a_generic_platform },
663 { .compatible = "nvidia,generic-gm20b",
664 .data = &gk20a_generic_platform },
665#ifdef CONFIG_ARCH_TEGRA_18x_SOC
666 { .compatible = "nvidia,generic-gp10b",
667 .data = &gk20a_generic_platform },
668#endif
669 { },
670};
671
672static int gk20a_pm_railgate(struct device *dev)
673{
674 struct gk20a_platform *platform = dev_get_drvdata(dev);
675 int ret = 0;
676#ifdef CONFIG_DEBUG_FS
677 struct gk20a *g = get_gk20a(dev);
678
679 g->pstats.last_rail_gate_start = jiffies;
680
681 if (g->pstats.railgating_cycle_count >= 1)
682 g->pstats.total_rail_ungate_time_ms =
683 g->pstats.total_rail_ungate_time_ms +
684 jiffies_to_msecs(g->pstats.last_rail_gate_start -
685 g->pstats.last_rail_ungate_complete);
686#endif
687
688 if (platform->railgate)
689 ret = platform->railgate(dev);
690
691#ifdef CONFIG_DEBUG_FS
692 g->pstats.last_rail_gate_complete = jiffies;
693#endif
694
695 return ret;
696}
697
698static int gk20a_pm_unrailgate(struct device *dev)
699{
700 struct gk20a_platform *platform = dev_get_drvdata(dev);
701 int ret = 0;
702 struct gk20a *g = get_gk20a(dev);
703
704#ifdef CONFIG_DEBUG_FS
705 g->pstats.last_rail_ungate_start = jiffies;
706 if (g->pstats.railgating_cycle_count >= 1)
707 g->pstats.total_rail_gate_time_ms =
708 g->pstats.total_rail_gate_time_ms +
709 jiffies_to_msecs(g->pstats.last_rail_ungate_start -
710 g->pstats.last_rail_gate_complete);
711
712 g->pstats.railgating_cycle_count++;
713#endif
714
715 trace_gk20a_pm_unrailgate(g->name);
716
717 if (platform->unrailgate) {
718 nvgpu_mutex_acquire(&platform->railgate_lock);
719 ret = platform->unrailgate(dev);
720 nvgpu_mutex_release(&platform->railgate_lock);
721 }
722
723#ifdef CONFIG_DEBUG_FS
724 g->pstats.last_rail_ungate_complete = jiffies;
725#endif
726
727 return ret;
728}
729
730static void gk20a_pm_shutdown(struct platform_device *pdev)
731{
732 struct gk20a_platform *platform = platform_get_drvdata(pdev);
733 struct gk20a *g = platform->g;
734 int err;
735
736 nvgpu_info(g, "shutting down");
737
738 /* vgpu has nothing to clean up currently */
739 if (gk20a_gpu_is_virtual(&pdev->dev))
740 return;
741
742 gk20a_driver_start_unload(g);
743
744 /* If GPU is already railgated,
745 * just prevent more requests, and return */
746 if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
747 __pm_runtime_disable(&pdev->dev, false);
748 nvgpu_info(g, "already railgated, shut down complete");
749 return;
750 }
751
752 /* Prevent more requests by disabling Runtime PM */
753 __pm_runtime_disable(&pdev->dev, false);
754
755 err = gk20a_wait_for_idle(&pdev->dev);
756 if (err) {
757 nvgpu_err(g, "failed to idle GPU, err=%d", err);
758 goto finish;
759 }
760
761 err = gk20a_fifo_disable_all_engine_activity(g, true);
762 if (err) {
763 nvgpu_err(g, "failed to disable engine activity, err=%d",
764 err);
765 goto finish;
766 }
767
768 err = gk20a_fifo_wait_engine_idle(g);
769 if (err) {
770 nvgpu_err(g, "failed to idle engines, err=%d",
771 err);
772 goto finish;
773 }
774
775 if (gk20a_gpu_is_virtual(&pdev->dev))
776 err = vgpu_pm_prepare_poweroff(&pdev->dev);
777 else
778 err = gk20a_pm_prepare_poweroff(&pdev->dev);
779 if (err) {
780 nvgpu_err(g, "failed to prepare for poweroff, err=%d",
781 err);
782 goto finish;
783 }
784
785 err = gk20a_pm_railgate(&pdev->dev);
786 if (err)
787 nvgpu_err(g, "failed to railgate, err=%d", err);
788
789finish:
790 nvgpu_info(g, "shut down complete\n");
791}
792
793#ifdef CONFIG_PM
794static int gk20a_pm_runtime_resume(struct device *dev)
795{
796 int err = 0;
797
798 err = gk20a_pm_unrailgate(dev);
799 if (err)
800 goto fail;
801
802 err = gk20a_pm_finalize_poweron(dev);
803 if (err)
804 goto fail_poweron;
805
806 return 0;
807
808fail_poweron:
809 gk20a_pm_railgate(dev);
810fail:
811 return err;
812}
813
814static int gk20a_pm_runtime_suspend(struct device *dev)
815{
816 int err = 0;
817
818 err = gk20a_pm_prepare_poweroff(dev);
819 if (err)
820 goto fail;
821
822 err = gk20a_pm_railgate(dev);
823 if (err)
824 goto fail_railgate;
825
826 return 0;
827
828fail_railgate:
829 gk20a_pm_finalize_poweron(dev);
830fail:
831 pm_runtime_mark_last_busy(dev);
832 return err;
833}
834
835static int gk20a_pm_suspend(struct device *dev)
836{
837 struct gk20a_platform *platform = dev_get_drvdata(dev);
838 struct gk20a *g = get_gk20a(dev);
839 int ret = 0;
840
841 if (platform->user_railgate_disabled)
842 gk20a_idle_nosuspend(dev);
843
844 if (atomic_read(&dev->power.usage_count) > 1) {
845 ret = -EBUSY;
846 goto fail;
847 }
848
849 if (!g->power_on)
850 return 0;
851
852 ret = gk20a_pm_runtime_suspend(dev);
853 if (ret)
854 goto fail;
855
856 if (platform->suspend)
857 platform->suspend(dev);
858
859 g->suspended = true;
860
861 return 0;
862
863fail:
864 if (platform->user_railgate_disabled)
865 gk20a_busy_noresume(dev);
866
867 return ret;
868}
869
870static int gk20a_pm_resume(struct device *dev)
871{ 362{
872 struct gk20a *g = get_gk20a(dev); 363 if (g->driver_is_dying)
873 struct gk20a_platform *platform = dev_get_drvdata(dev);
874 int ret = 0;
875
876 if (platform->user_railgate_disabled)
877 gk20a_busy_noresume(dev);
878
879 if (!g->suspended)
880 return 0; 364 return 0;
881 365 return 1;
882 ret = gk20a_pm_runtime_resume(dev);
883
884 g->suspended = false;
885
886 return ret;
887}
888
889static const struct dev_pm_ops gk20a_pm_ops = {
890 .runtime_resume = gk20a_pm_runtime_resume,
891 .runtime_suspend = gk20a_pm_runtime_suspend,
892 .resume = gk20a_pm_resume,
893 .suspend = gk20a_pm_suspend,
894};
895#endif
896
897int gk20a_pm_init(struct device *dev)
898{
899 struct gk20a_platform *platform = dev_get_drvdata(dev);
900 int err = 0;
901
902 gk20a_dbg_fn("");
903
904 /* Initialise pm runtime */
905 if (platform->railgate_delay) {
906 pm_runtime_set_autosuspend_delay(dev,
907 platform->railgate_delay);
908 pm_runtime_use_autosuspend(dev);
909 }
910
911 if (platform->can_railgate) {
912 pm_runtime_enable(dev);
913 if (!pm_runtime_enabled(dev))
914 gk20a_pm_unrailgate(dev);
915 else
916 gk20a_pm_railgate(dev);
917 } else {
918 __pm_runtime_disable(dev, false);
919 gk20a_pm_unrailgate(dev);
920 }
921
922 return err;
923}
924
925int gk20a_secure_page_alloc(struct device *dev)
926{
927 struct gk20a_platform *platform = dev_get_drvdata(dev);
928 int err = 0;
929
930 if (platform->secure_page_alloc) {
931 err = platform->secure_page_alloc(dev);
932 if (!err)
933 platform->secure_alloc_ready = true;
934 }
935
936 return err;
937}
938
939static int gk20a_probe(struct platform_device *dev)
940{
941 struct gk20a *gk20a;
942 int err;
943 struct gk20a_platform *platform = NULL;
944
945 if (dev->dev.of_node) {
946 const struct of_device_id *match;
947
948 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
949 if (match)
950 platform = (struct gk20a_platform *)match->data;
951 } else
952 platform = (struct gk20a_platform *)dev->dev.platform_data;
953
954 if (!platform) {
955 dev_err(&dev->dev, "no platform data\n");
956 return -ENODATA;
957 }
958
959 gk20a_dbg_fn("");
960
961 platform_set_drvdata(dev, platform);
962
963 if (gk20a_gpu_is_virtual(&dev->dev))
964 return vgpu_probe(dev);
965
966 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
967 if (!gk20a) {
968 dev_err(&dev->dev, "couldn't allocate gk20a support");
969 return -ENOMEM;
970 }
971
972 set_gk20a(dev, gk20a);
973 gk20a->dev = &dev->dev;
974
975 if (nvgpu_platform_is_simulation(gk20a))
976 platform->is_fmodel = true;
977
978 nvgpu_kmem_init(gk20a);
979
980 gk20a->irq_stall = platform_get_irq(dev, 0);
981 gk20a->irq_nonstall = platform_get_irq(dev, 1);
982 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
983 return -ENXIO;
984
985 err = devm_request_threaded_irq(&dev->dev,
986 gk20a->irq_stall,
987 gk20a_intr_isr_stall,
988 gk20a_intr_thread_stall,
989 0, "gk20a_stall", gk20a);
990 if (err) {
991 dev_err(&dev->dev,
992 "failed to request stall intr irq @ %d\n",
993 gk20a->irq_stall);
994 return err;
995 }
996 err = devm_request_irq(&dev->dev,
997 gk20a->irq_nonstall,
998 gk20a_intr_isr_nonstall,
999 0, "gk20a_nonstall", gk20a);
1000 if (err) {
1001 dev_err(&dev->dev,
1002 "failed to request non-stall intr irq @ %d\n",
1003 gk20a->irq_nonstall);
1004 return err;
1005 }
1006 disable_irq(gk20a->irq_stall);
1007 if (gk20a->irq_stall != gk20a->irq_nonstall)
1008 disable_irq(gk20a->irq_nonstall);
1009
1010 /*
1011 * is_fmodel needs to be in gk20a struct for deferred teardown
1012 */
1013 gk20a->is_fmodel = platform->is_fmodel;
1014
1015 err = gk20a_init_support(dev);
1016 if (err)
1017 return err;
1018
1019#ifdef CONFIG_RESET_CONTROLLER
1020 platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
1021 if (IS_ERR(platform->reset_control))
1022 platform->reset_control = NULL;
1023#endif
1024
1025 err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
1026 if (err)
1027 return err;
1028
1029 err = gk20a_pm_init(&dev->dev);
1030 if (err) {
1031 dev_err(&dev->dev, "pm init failed");
1032 return err;
1033 }
1034
1035 gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
1036
1037 return 0;
1038}
1039
1040static int __exit gk20a_remove(struct platform_device *pdev)
1041{
1042 struct device *dev = &pdev->dev;
1043 struct gk20a *g = get_gk20a(dev);
1044 struct gk20a_platform *platform = gk20a_get_platform(dev);
1045
1046 gk20a_dbg_fn("");
1047
1048 if (gk20a_gpu_is_virtual(dev))
1049 return vgpu_remove(pdev);
1050
1051 if (platform->has_cde)
1052 gk20a_cde_destroy(g);
1053
1054 gk20a_ctxsw_trace_cleanup(g);
1055
1056 gk20a_sched_ctrl_cleanup(g);
1057
1058 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1059 gk20a_scale_exit(dev);
1060
1061#ifdef CONFIG_ARCH_TEGRA_18x_SOC
1062 nvgpu_clk_arb_cleanup_arbiter(g);
1063#endif
1064
1065 gk20a_user_deinit(dev, &nvgpu_class);
1066
1067 debugfs_remove_recursive(platform->debugfs);
1068 debugfs_remove_recursive(platform->debugfs_alias);
1069
1070 gk20a_remove_sysfs(dev);
1071
1072 if (platform->secure_buffer.destroy)
1073 platform->secure_buffer.destroy(dev,
1074 &platform->secure_buffer);
1075
1076 if (pm_runtime_enabled(dev))
1077 pm_runtime_disable(dev);
1078
1079 if (platform->remove)
1080 platform->remove(dev);
1081
1082 set_gk20a(pdev, NULL);
1083 gk20a_put(g);
1084
1085 gk20a_dbg_fn("removed");
1086
1087 return 0;
1088}
1089
1090static struct platform_driver gk20a_driver = {
1091 .probe = gk20a_probe,
1092 .remove = __exit_p(gk20a_remove),
1093 .shutdown = gk20a_pm_shutdown,
1094 .driver = {
1095 .owner = THIS_MODULE,
1096 .name = "gk20a",
1097#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
1098 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
1099#endif
1100#ifdef CONFIG_OF
1101 .of_match_table = tegra_gk20a_of_match,
1102#endif
1103#ifdef CONFIG_PM
1104 .pm = &gk20a_pm_ops,
1105#endif
1106 .suppress_bind_attrs = true,
1107 }
1108};
1109
1110struct class nvgpu_class = {
1111 .owner = THIS_MODULE,
1112 .name = CLASS_NAME,
1113};
1114
1115static int __init gk20a_init(void)
1116{
1117
1118 int ret;
1119
1120 ret = class_register(&nvgpu_class);
1121 if (ret)
1122 return ret;
1123
1124 ret = nvgpu_pci_init();
1125 if (ret)
1126 return ret;
1127
1128 return platform_driver_register(&gk20a_driver);
1129}
1130
1131static void __exit gk20a_exit(void)
1132{
1133 nvgpu_pci_exit();
1134 platform_driver_unregister(&gk20a_driver);
1135 class_unregister(&nvgpu_class);
1136}
1137
1138void gk20a_busy_noresume(struct device *dev)
1139{
1140 pm_runtime_get_noresume(dev);
1141} 366}
1142 367
1143/* 368/*
@@ -1193,292 +418,6 @@ int gk20a_wait_for_idle(struct device *dev)
1193 return 0; 418 return 0;
1194} 419}
1195 420
1196/*
1197 * Check if the device can go busy. Basically if the driver is currently
1198 * in the process of dying then do not let new places make the driver busy.
1199 */
1200static int gk20a_can_busy(struct gk20a *g)
1201{
1202 if (g->driver_is_dying)
1203 return 0;
1204 return 1;
1205}
1206
1207int gk20a_busy(struct gk20a *g)
1208{
1209 int ret = 0;
1210 struct device *dev;
1211
1212 if (!g)
1213 return -ENODEV;
1214
1215 atomic_inc(&g->usage_count);
1216
1217 down_read(&g->busy_lock);
1218
1219 if (!gk20a_can_busy(g)) {
1220 ret = -ENODEV;
1221 atomic_dec(&g->usage_count);
1222 goto fail;
1223 }
1224
1225 dev = g->dev;
1226
1227 if (pm_runtime_enabled(dev)) {
1228 ret = pm_runtime_get_sync(dev);
1229 if (ret < 0) {
1230 pm_runtime_put_noidle(dev);
1231 atomic_dec(&g->usage_count);
1232 goto fail;
1233 }
1234 } else {
1235 if (!g->power_on) {
1236 ret = gk20a_gpu_is_virtual(dev) ?
1237 vgpu_pm_finalize_poweron(dev)
1238 : gk20a_pm_finalize_poweron(dev);
1239 if (ret) {
1240 atomic_dec(&g->usage_count);
1241 goto fail;
1242 }
1243 }
1244 }
1245
1246 gk20a_scale_notify_busy(dev);
1247
1248fail:
1249 up_read(&g->busy_lock);
1250
1251 return ret < 0 ? ret : 0;
1252}
1253
1254void gk20a_idle_nosuspend(struct device *dev)
1255{
1256 pm_runtime_put_noidle(dev);
1257}
1258
1259void gk20a_idle(struct gk20a *g)
1260{
1261 struct device *dev;
1262
1263 atomic_dec(&g->usage_count);
1264 down_read(&g->busy_lock);
1265
1266 dev = g->dev;
1267
1268 if (!(dev && gk20a_can_busy(g)))
1269 goto fail;
1270
1271 if (pm_runtime_enabled(dev)) {
1272#ifdef CONFIG_PM
1273 if (atomic_read(&g->dev->power.usage_count) == 1)
1274 gk20a_scale_notify_idle(dev);
1275#endif
1276
1277 pm_runtime_mark_last_busy(dev);
1278 pm_runtime_put_sync_autosuspend(dev);
1279
1280 } else {
1281 gk20a_scale_notify_idle(dev);
1282 }
1283fail:
1284 up_read(&g->busy_lock);
1285}
1286
1287#ifdef CONFIG_PM
1288/**
1289 * __gk20a_do_idle() - force the GPU to idle and railgate
1290 *
1291 * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
1292 *
1293 * Acquires two locks : &g->busy_lock and &platform->railgate_lock
1294 * In success, we hold these locks and return
1295 * In failure, we release these locks and return
1296 */
1297int __gk20a_do_idle(struct device *dev, bool force_reset)
1298{
1299 struct gk20a *g = get_gk20a(dev);
1300 struct gk20a_platform *platform = dev_get_drvdata(dev);
1301 struct nvgpu_timeout timeout;
1302 int ref_cnt;
1303 int target_ref_cnt = 0;
1304 bool is_railgated;
1305 int err = 0;
1306
1307 /* acquire busy lock to block other busy() calls */
1308 down_write(&g->busy_lock);
1309
1310 /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
1311 nvgpu_mutex_acquire(&platform->railgate_lock);
1312
1313 /* check if it is already railgated ? */
1314 if (platform->is_railgated(dev))
1315 return 0;
1316
1317 /*
1318 * release railgate_lock, prevent suspend by incrementing usage counter,
1319 * re-acquire railgate_lock
1320 */
1321 nvgpu_mutex_release(&platform->railgate_lock);
1322 pm_runtime_get_sync(dev);
1323
1324 /*
1325 * One refcount taken in this API
1326 * If User disables rail gating, we take one more
1327 * extra refcount
1328 */
1329 if (platform->user_railgate_disabled)
1330 target_ref_cnt = 2;
1331 else
1332 target_ref_cnt = 1;
1333 nvgpu_mutex_acquire(&platform->railgate_lock);
1334
1335 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
1336 NVGPU_TIMER_CPU_TIMER);
1337
1338 /* check and wait until GPU is idle (with a timeout) */
1339 do {
1340 nvgpu_msleep(1);
1341 ref_cnt = atomic_read(&dev->power.usage_count);
1342 } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
1343
1344 if (ref_cnt != target_ref_cnt) {
1345 nvgpu_err(g, "failed to idle - refcount %d != 1\n",
1346 ref_cnt);
1347 goto fail_drop_usage_count;
1348 }
1349
1350 /* check if global force_reset flag is set */
1351 force_reset |= platform->force_reset_in_do_idle;
1352
1353 nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
1354 NVGPU_TIMER_CPU_TIMER);
1355
1356 if (platform->can_railgate && !force_reset) {
1357 /*
1358 * Case 1 : GPU railgate is supported
1359 *
1360 * if GPU is now idle, we will have only one ref count,
1361 * drop this ref which will rail gate the GPU
1362 */
1363 pm_runtime_put_sync(dev);
1364
1365 /* add sufficient delay to allow GPU to rail gate */
1366 nvgpu_msleep(platform->railgate_delay);
1367
1368 /* check in loop if GPU is railgated or not */
1369 do {
1370 nvgpu_msleep(1);
1371 is_railgated = platform->is_railgated(dev);
1372 } while (!is_railgated && !nvgpu_timeout_expired(&timeout));
1373
1374 if (is_railgated) {
1375 return 0;
1376 } else {
1377 nvgpu_err(g, "failed to idle in timeout\n");
1378 goto fail_timeout;
1379 }
1380 } else {
1381 /*
1382 * Case 2 : GPU railgate is not supported or we explicitly
1383 * do not want to depend on runtime PM
1384 *
1385 * if GPU is now idle, call prepare_poweroff() to save the
1386 * state and then do explicit railgate
1387 *
1388 * __gk20a_do_unidle() needs to unrailgate, call
1389 * finalize_poweron(), and then call pm_runtime_put_sync()
1390 * to balance the GPU usage counter
1391 */
1392
1393 /* Save the GPU state */
1394 err = gk20a_pm_prepare_poweroff(dev);
1395 if (err)
1396 goto fail_drop_usage_count;
1397
1398 /* railgate GPU */
1399 platform->railgate(dev);
1400
1401 nvgpu_udelay(10);
1402
1403 g->forced_reset = true;
1404 return 0;
1405 }
1406
1407fail_drop_usage_count:
1408 pm_runtime_put_noidle(dev);
1409fail_timeout:
1410 nvgpu_mutex_release(&platform->railgate_lock);
1411 up_write(&g->busy_lock);
1412 return -EBUSY;
1413}
1414
1415/**
1416 * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
1417 * from outside of GPU driver
1418 *
1419 * In success, this call MUST be balanced by caller with gk20a_do_unidle()
1420 */
1421int gk20a_do_idle(void)
1422{
1423 struct device_node *node =
1424 of_find_matching_node(NULL, tegra_gk20a_of_match);
1425 struct platform_device *pdev = of_find_device_by_node(node);
1426
1427 int ret = __gk20a_do_idle(&pdev->dev, true);
1428
1429 of_node_put(node);
1430
1431 return ret;
1432}
1433
1434/**
1435 * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
1436 */
1437int __gk20a_do_unidle(struct device *dev)
1438{
1439 struct gk20a *g = get_gk20a(dev);
1440 struct gk20a_platform *platform = dev_get_drvdata(dev);
1441
1442 if (g->forced_reset) {
1443 /*
1444 * If we did a forced-reset/railgate
1445 * then unrailgate the GPU here first
1446 */
1447 platform->unrailgate(dev);
1448
1449 /* restore the GPU state */
1450 gk20a_pm_finalize_poweron(dev);
1451
1452 /* balance GPU usage counter */
1453 pm_runtime_put_sync(dev);
1454
1455 g->forced_reset = false;
1456 }
1457
1458 /* release the lock and open up all other busy() calls */
1459 nvgpu_mutex_release(&platform->railgate_lock);
1460 up_write(&g->busy_lock);
1461
1462 return 0;
1463}
1464
1465/**
1466 * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
1467 */
1468int gk20a_do_unidle(void)
1469{
1470 struct device_node *node =
1471 of_find_matching_node(NULL, tegra_gk20a_of_match);
1472 struct platform_device *pdev = of_find_device_by_node(node);
1473
1474 int ret = __gk20a_do_unidle(&pdev->dev);
1475
1476 of_node_put(node);
1477
1478 return ret;
1479}
1480#endif
1481
1482int gk20a_init_gpu_characteristics(struct gk20a *g) 421int gk20a_init_gpu_characteristics(struct gk20a *g)
1483{ 422{
1484 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; 423 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
@@ -1651,7 +590,3 @@ void gk20a_put(struct gk20a *g)
1651 590
1652 kref_put(&g->refcount, gk20a_free_cb); 591 kref_put(&g->refcount, gk20a_free_cb);
1653} 592}
1654
1655MODULE_LICENSE("GPL v2");
1656module_init(gk20a_init);
1657module_exit(gk20a_exit);