diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 68 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 1119 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sim_gk20a.c | 9 |
4 files changed, 98 insertions, 1108 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 85b24f2e..1a9ffe77 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -228,6 +228,74 @@ void gk20a_init_debug_ops(struct gpu_ops *gops) | |||
228 | gops->debug.show_dump = gk20a_debug_show_dump; | 228 | gops->debug.show_dump = gk20a_debug_show_dump; |
229 | } | 229 | } |
230 | 230 | ||
231 | #ifdef CONFIG_DEBUG_FS | ||
232 | static int railgate_residency_show(struct seq_file *s, void *data) | ||
233 | { | ||
234 | struct device *dev = s->private; | ||
235 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
236 | struct gk20a *g = get_gk20a(dev); | ||
237 | unsigned long time_since_last_state_transition_ms; | ||
238 | unsigned long total_rail_gate_time_ms; | ||
239 | unsigned long total_rail_ungate_time_ms; | ||
240 | |||
241 | if (platform->is_railgated(dev)) { | ||
242 | time_since_last_state_transition_ms = | ||
243 | jiffies_to_msecs(jiffies - | ||
244 | g->pstats.last_rail_gate_complete); | ||
245 | total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; | ||
246 | total_rail_gate_time_ms = | ||
247 | g->pstats.total_rail_gate_time_ms + | ||
248 | time_since_last_state_transition_ms; | ||
249 | } else { | ||
250 | time_since_last_state_transition_ms = | ||
251 | jiffies_to_msecs(jiffies - | ||
252 | g->pstats.last_rail_ungate_complete); | ||
253 | total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; | ||
254 | total_rail_ungate_time_ms = | ||
255 | g->pstats.total_rail_ungate_time_ms + | ||
256 | time_since_last_state_transition_ms; | ||
257 | } | ||
258 | |||
259 | seq_printf(s, "Time with Rails Gated: %lu ms\n" | ||
260 | "Time with Rails UnGated: %lu ms\n" | ||
261 | "Total railgating cycles: %lu\n", | ||
262 | total_rail_gate_time_ms, | ||
263 | total_rail_ungate_time_ms, | ||
264 | g->pstats.railgating_cycle_count - 1); | ||
265 | return 0; | ||
266 | |||
267 | } | ||
268 | |||
269 | static int railgate_residency_open(struct inode *inode, struct file *file) | ||
270 | { | ||
271 | return single_open(file, railgate_residency_show, inode->i_private); | ||
272 | } | ||
273 | |||
274 | static const struct file_operations railgate_residency_fops = { | ||
275 | .open = railgate_residency_open, | ||
276 | .read = seq_read, | ||
277 | .llseek = seq_lseek, | ||
278 | .release = single_release, | ||
279 | }; | ||
280 | |||
281 | int gk20a_railgating_debugfs_init(struct device *dev) | ||
282 | { | ||
283 | struct dentry *d; | ||
284 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
285 | |||
286 | if (!platform->can_railgate) | ||
287 | return 0; | ||
288 | |||
289 | d = debugfs_create_file( | ||
290 | "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev, | ||
291 | &railgate_residency_fops); | ||
292 | if (!d) | ||
293 | return -ENOMEM; | ||
294 | |||
295 | return 0; | ||
296 | } | ||
297 | #endif | ||
298 | |||
231 | void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) | 299 | void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) |
232 | { | 300 | { |
233 | struct gk20a_platform *platform = dev_get_drvdata(dev); | 301 | struct gk20a_platform *platform = dev_get_drvdata(dev); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 2a80157d..b3f4e5fe 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -16,25 +16,7 @@ | |||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/irq.h> | ||
23 | #include <linux/export.h> | ||
24 | #include <linux/of.h> | ||
25 | #include <linux/of_device.h> | ||
26 | #include <linux/of_platform.h> | ||
27 | #include <linux/pm_runtime.h> | ||
28 | #include <linux/thermal.h> | ||
29 | #include <asm/cacheflush.h> | ||
30 | #include <linux/debugfs.h> | ||
31 | #include <linux/clk/tegra.h> | ||
32 | #include <linux/kthread.h> | ||
33 | #include <linux/platform/tegra/common.h> | ||
34 | #include <linux/reset.h> | ||
35 | #include <linux/reboot.h> | 19 | #include <linux/reboot.h> |
36 | #include <linux/sched.h> | ||
37 | #include <linux/version.h> | ||
38 | 20 | ||
39 | #include <nvgpu/nvgpu_common.h> | 21 | #include <nvgpu/nvgpu_common.h> |
40 | #include <nvgpu/kmem.h> | 22 | #include <nvgpu/kmem.h> |
@@ -42,26 +24,22 @@ | |||
42 | #include <nvgpu/timers.h> | 24 | #include <nvgpu/timers.h> |
43 | #include <nvgpu/soc.h> | 25 | #include <nvgpu/soc.h> |
44 | 26 | ||
27 | #include <trace/events/gk20a.h> | ||
28 | |||
45 | #include "gk20a.h" | 29 | #include "gk20a.h" |
46 | #include "debug_gk20a.h" | ||
47 | #include "channel_sync_gk20a.h" | 30 | #include "channel_sync_gk20a.h" |
48 | 31 | ||
49 | #include "gk20a_scale.h" | ||
50 | #include "ctxsw_trace_gk20a.h" | 32 | #include "ctxsw_trace_gk20a.h" |
51 | #include "dbg_gpu_gk20a.h" | 33 | #include "dbg_gpu_gk20a.h" |
52 | #include "mc_gk20a.h" | 34 | #include "mc_gk20a.h" |
53 | #include "hal.h" | 35 | #include "hal.h" |
54 | #include "vgpu/vgpu.h" | 36 | #include "vgpu/vgpu.h" |
55 | #include "pci.h" | ||
56 | #include "bus_gk20a.h" | 37 | #include "bus_gk20a.h" |
57 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | 38 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC |
58 | #include "pstate/pstate.h" | 39 | #include "pstate/pstate.h" |
59 | #endif | 40 | #endif |
60 | 41 | ||
61 | 42 | ||
62 | #define CREATE_TRACE_POINTS | ||
63 | #include <trace/events/gk20a.h> | ||
64 | |||
65 | #ifdef CONFIG_TEGRA_19x_GPU | 43 | #ifdef CONFIG_TEGRA_19x_GPU |
66 | #include "nvgpu_gpuid_t19x.h" | 44 | #include "nvgpu_gpuid_t19x.h" |
67 | #endif | 45 | #endif |
@@ -70,93 +48,6 @@ | |||
70 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> | 48 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> |
71 | #include <nvgpu/hw/gk20a/hw_fb_gk20a.h> | 49 | #include <nvgpu/hw/gk20a/hw_fb_gk20a.h> |
72 | 50 | ||
73 | |||
74 | #ifdef CONFIG_ARM64 | ||
75 | #define __cpuc_flush_dcache_area __flush_dcache_area | ||
76 | #endif | ||
77 | |||
78 | #define CLASS_NAME "nvidia-gpu" | ||
79 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ | ||
80 | |||
81 | #define GK20A_NUM_CDEVS 7 | ||
82 | |||
83 | #define GK20A_WAIT_FOR_IDLE_MS 2000 | ||
84 | |||
85 | static int gk20a_pm_prepare_poweroff(struct device *dev); | ||
86 | |||
87 | #ifdef CONFIG_DEBUG_FS | ||
88 | static int railgate_residency_show(struct seq_file *s, void *data) | ||
89 | { | ||
90 | struct device *dev = s->private; | ||
91 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
92 | struct gk20a *g = get_gk20a(dev); | ||
93 | unsigned long time_since_last_state_transition_ms; | ||
94 | unsigned long total_rail_gate_time_ms; | ||
95 | unsigned long total_rail_ungate_time_ms; | ||
96 | |||
97 | if (platform->is_railgated(dev)) { | ||
98 | time_since_last_state_transition_ms = | ||
99 | jiffies_to_msecs(jiffies - | ||
100 | g->pstats.last_rail_gate_complete); | ||
101 | total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; | ||
102 | total_rail_gate_time_ms = | ||
103 | g->pstats.total_rail_gate_time_ms + | ||
104 | time_since_last_state_transition_ms; | ||
105 | } else { | ||
106 | time_since_last_state_transition_ms = | ||
107 | jiffies_to_msecs(jiffies - | ||
108 | g->pstats.last_rail_ungate_complete); | ||
109 | total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; | ||
110 | total_rail_ungate_time_ms = | ||
111 | g->pstats.total_rail_ungate_time_ms + | ||
112 | time_since_last_state_transition_ms; | ||
113 | } | ||
114 | |||
115 | seq_printf(s, "Time with Rails Gated: %lu ms\n" | ||
116 | "Time with Rails UnGated: %lu ms\n" | ||
117 | "Total railgating cycles: %lu\n", | ||
118 | total_rail_gate_time_ms, | ||
119 | total_rail_ungate_time_ms, | ||
120 | g->pstats.railgating_cycle_count - 1); | ||
121 | return 0; | ||
122 | |||
123 | } | ||
124 | |||
125 | static int railgate_residency_open(struct inode *inode, struct file *file) | ||
126 | { | ||
127 | return single_open(file, railgate_residency_show, inode->i_private); | ||
128 | } | ||
129 | |||
130 | static const struct file_operations railgate_residency_fops = { | ||
131 | .open = railgate_residency_open, | ||
132 | .read = seq_read, | ||
133 | .llseek = seq_lseek, | ||
134 | .release = single_release, | ||
135 | }; | ||
136 | |||
137 | int gk20a_railgating_debugfs_init(struct device *dev) | ||
138 | { | ||
139 | struct dentry *d; | ||
140 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
141 | |||
142 | if (!platform->can_railgate) | ||
143 | return 0; | ||
144 | |||
145 | d = debugfs_create_file( | ||
146 | "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev, | ||
147 | &railgate_residency_fops); | ||
148 | if (!d) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a) | ||
156 | { | ||
157 | gk20a_get_platform(&pdev->dev)->g = gk20a; | ||
158 | } | ||
159 | |||
160 | void __nvgpu_check_gpu_state(struct gk20a *g) | 51 | void __nvgpu_check_gpu_state(struct gk20a *g) |
161 | { | 52 | { |
162 | u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); | 53 | u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); |
@@ -199,130 +90,32 @@ void __gk20a_warn_on_no_regs(void) | |||
199 | WARN_ONCE(1, "Attempted access to GPU regs after unmapping!"); | 90 | WARN_ONCE(1, "Attempted access to GPU regs after unmapping!"); |
200 | } | 91 | } |
201 | 92 | ||
202 | void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i, | 93 | static int gk20a_detect_chip(struct gk20a *g) |
203 | struct resource **out) | ||
204 | { | ||
205 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
206 | if (!r) | ||
207 | return NULL; | ||
208 | if (out) | ||
209 | *out = r; | ||
210 | return devm_ioremap_resource(&dev->dev, r); | ||
211 | } | ||
212 | |||
213 | static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) | ||
214 | { | ||
215 | struct gk20a *g = dev_id; | ||
216 | |||
217 | return g->ops.mc.isr_stall(g); | ||
218 | } | ||
219 | |||
220 | static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) | ||
221 | { | ||
222 | struct gk20a *g = dev_id; | ||
223 | |||
224 | return g->ops.mc.isr_nonstall(g); | ||
225 | } | ||
226 | |||
227 | static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) | ||
228 | { | ||
229 | struct gk20a *g = dev_id; | ||
230 | return g->ops.mc.isr_thread_stall(g); | ||
231 | } | ||
232 | |||
233 | void gk20a_remove_support(struct gk20a *g) | ||
234 | { | ||
235 | #ifdef CONFIG_TEGRA_COMMON | ||
236 | tegra_unregister_idle_unidle(); | ||
237 | #endif | ||
238 | if (g->dbg_regops_tmp_buf) | ||
239 | nvgpu_kfree(g, g->dbg_regops_tmp_buf); | ||
240 | |||
241 | if (g->pmu.remove_support) | ||
242 | g->pmu.remove_support(&g->pmu); | ||
243 | |||
244 | if (g->gr.remove_support) | ||
245 | g->gr.remove_support(&g->gr); | ||
246 | |||
247 | if (g->mm.remove_ce_support) | ||
248 | g->mm.remove_ce_support(&g->mm); | ||
249 | |||
250 | if (g->fifo.remove_support) | ||
251 | g->fifo.remove_support(&g->fifo); | ||
252 | |||
253 | if (g->mm.remove_support) | ||
254 | g->mm.remove_support(&g->mm); | ||
255 | |||
256 | if (g->sim.remove_support) | ||
257 | g->sim.remove_support(&g->sim); | ||
258 | |||
259 | /* free mappings to registers, etc */ | ||
260 | |||
261 | if (g->regs) { | ||
262 | iounmap(g->regs); | ||
263 | g->regs = NULL; | ||
264 | } | ||
265 | if (g->bar1) { | ||
266 | iounmap(g->bar1); | ||
267 | g->bar1 = NULL; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | static int gk20a_init_support(struct platform_device *dev) | ||
272 | { | 94 | { |
273 | int err = 0; | 95 | struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; |
274 | struct gk20a *g = get_gk20a(&dev->dev); | 96 | u32 val; |
275 | |||
276 | #ifdef CONFIG_TEGRA_COMMON | ||
277 | tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle); | ||
278 | #endif | ||
279 | |||
280 | g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM, | ||
281 | &g->reg_mem); | ||
282 | if (IS_ERR(g->regs)) { | ||
283 | nvgpu_err(g, "failed to remap gk20a registers\n"); | ||
284 | err = PTR_ERR(g->regs); | ||
285 | goto fail; | ||
286 | } | ||
287 | 97 | ||
288 | g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, | 98 | if (gpu->arch) |
289 | &g->bar1_mem); | 99 | return 0; |
290 | if (IS_ERR(g->bar1)) { | ||
291 | nvgpu_err(g, "failed to remap gk20a bar1\n"); | ||
292 | err = PTR_ERR(g->bar1); | ||
293 | goto fail; | ||
294 | } | ||
295 | 100 | ||
296 | if (nvgpu_platform_is_simulation(g)) { | 101 | val = gk20a_mc_boot_0(g, &gpu->arch, &gpu->impl, &gpu->rev); |
297 | err = gk20a_init_sim_support(dev); | ||
298 | if (err) | ||
299 | goto fail; | ||
300 | } | ||
301 | 102 | ||
302 | return 0; | 103 | gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", |
104 | g->gpu_characteristics.arch, | ||
105 | g->gpu_characteristics.impl, | ||
106 | g->gpu_characteristics.rev); | ||
303 | 107 | ||
304 | fail: | 108 | return gpu_init_hal(g); |
305 | return err; | ||
306 | } | 109 | } |
307 | 110 | ||
308 | static int gk20a_pm_prepare_poweroff(struct device *dev) | 111 | int gk20a_prepare_poweroff(struct gk20a *g) |
309 | { | 112 | { |
310 | struct gk20a *g = get_gk20a(dev); | ||
311 | int ret = 0; | 113 | int ret = 0; |
312 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
313 | 114 | ||
314 | gk20a_dbg_fn(""); | 115 | gk20a_dbg_fn(""); |
315 | 116 | ||
316 | nvgpu_mutex_acquire(&g->poweroff_lock); | 117 | if (gk20a_fifo_is_engine_busy(g)) |
317 | |||
318 | if (!g->power_on) | ||
319 | goto done; | ||
320 | |||
321 | if (gk20a_fifo_is_engine_busy(g)) { | ||
322 | nvgpu_mutex_release(&g->poweroff_lock); | ||
323 | return -EBUSY; | 118 | return -EBUSY; |
324 | } | ||
325 | gk20a_scale_suspend(dev); | ||
326 | 119 | ||
327 | /* cancel any pending cde work */ | 120 | /* cancel any pending cde work */ |
328 | gk20a_cde_suspend(g); | 121 | gk20a_cde_suspend(g); |
@@ -331,18 +124,11 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
331 | 124 | ||
332 | ret = gk20a_channel_suspend(g); | 125 | ret = gk20a_channel_suspend(g); |
333 | if (ret) | 126 | if (ret) |
334 | goto done; | 127 | return ret; |
335 | 128 | ||
336 | /* disable elpg before gr or fifo suspend */ | 129 | /* disable elpg before gr or fifo suspend */ |
337 | if (g->ops.pmu.is_pmu_supported(g)) | 130 | if (g->ops.pmu.is_pmu_supported(g)) |
338 | ret |= gk20a_pmu_destroy(g); | 131 | ret |= gk20a_pmu_destroy(g); |
339 | /* | ||
340 | * After this point, gk20a interrupts should not get | ||
341 | * serviced. | ||
342 | */ | ||
343 | disable_irq(g->irq_stall); | ||
344 | if (g->irq_stall != g->irq_nonstall) | ||
345 | disable_irq(g->irq_nonstall); | ||
346 | 132 | ||
347 | ret |= gk20a_gr_suspend(g); | 133 | ret |= gk20a_gr_suspend(g); |
348 | ret |= gk20a_mm_suspend(g); | 134 | ret |= gk20a_mm_suspend(g); |
@@ -361,67 +147,19 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
361 | #endif | 147 | #endif |
362 | g->power_on = false; | 148 | g->power_on = false; |
363 | 149 | ||
364 | /* Decrement platform power refcount */ | ||
365 | if (platform->idle) | ||
366 | platform->idle(dev); | ||
367 | |||
368 | /* Stop CPU from accessing the GPU registers. */ | ||
369 | gk20a_lockout_registers(g); | ||
370 | |||
371 | done: | ||
372 | nvgpu_mutex_release(&g->poweroff_lock); | ||
373 | |||
374 | return ret; | 150 | return ret; |
375 | } | 151 | } |
376 | 152 | ||
377 | static int gk20a_detect_chip(struct gk20a *g) | 153 | int gk20a_finalize_poweron(struct gk20a *g) |
378 | { | 154 | { |
379 | struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; | 155 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); |
380 | u32 val; | 156 | int err; |
381 | |||
382 | if (gpu->arch) | ||
383 | return 0; | ||
384 | |||
385 | val = gk20a_mc_boot_0(g, &gpu->arch, &gpu->impl, &gpu->rev); | ||
386 | |||
387 | gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", | ||
388 | g->gpu_characteristics.arch, | ||
389 | g->gpu_characteristics.impl, | ||
390 | g->gpu_characteristics.rev); | ||
391 | |||
392 | return gpu_init_hal(g); | ||
393 | } | ||
394 | |||
395 | int gk20a_pm_finalize_poweron(struct device *dev) | ||
396 | { | ||
397 | struct gk20a *g = get_gk20a(dev); | ||
398 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
399 | int err, nice_value; | ||
400 | 157 | ||
401 | gk20a_dbg_fn(""); | 158 | gk20a_dbg_fn(""); |
402 | 159 | ||
403 | if (g->power_on) | 160 | if (g->power_on) |
404 | return 0; | 161 | return 0; |
405 | 162 | ||
406 | trace_gk20a_finalize_poweron(g->name); | ||
407 | |||
408 | /* Increment platform power refcount */ | ||
409 | if (platform->busy) { | ||
410 | err = platform->busy(dev); | ||
411 | if (err < 0) { | ||
412 | nvgpu_err(g, "%s: failed to poweron platform dependency\n", | ||
413 | __func__); | ||
414 | goto done; | ||
415 | } | ||
416 | } | ||
417 | |||
418 | err = gk20a_restore_registers(g); | ||
419 | if (err) | ||
420 | return err; | ||
421 | |||
422 | nice_value = task_nice(current); | ||
423 | set_user_nice(current, -20); | ||
424 | |||
425 | g->power_on = true; | 163 | g->power_on = true; |
426 | 164 | ||
427 | err = gk20a_detect_chip(g); | 165 | err = gk20a_detect_chip(g); |
@@ -586,31 +324,18 @@ int gk20a_pm_finalize_poweron(struct device *dev) | |||
586 | g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); | 324 | g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); |
587 | 325 | ||
588 | gk20a_channel_resume(g); | 326 | gk20a_channel_resume(g); |
589 | set_user_nice(current, nice_value); | ||
590 | |||
591 | gk20a_scale_resume(dev); | ||
592 | |||
593 | trace_gk20a_finalize_poweron_done(g->name); | ||
594 | |||
595 | if (platform->has_cde) | ||
596 | gk20a_init_cde_support(g); | ||
597 | 327 | ||
598 | gk20a_init_ce_support(g); | 328 | gk20a_init_ce_support(g); |
599 | 329 | ||
600 | gk20a_init_mm_ce_context(g); | 330 | gk20a_init_mm_ce_context(g); |
601 | 331 | ||
602 | enable_irq(g->irq_stall); | ||
603 | if (g->irq_stall != g->irq_nonstall) | ||
604 | enable_irq(g->irq_nonstall); | ||
605 | g->irqs_enabled = 1; | ||
606 | |||
607 | if (g->ops.xve.available_speeds) { | 332 | if (g->ops.xve.available_speeds) { |
608 | u32 speed; | 333 | u32 speed; |
609 | 334 | ||
610 | if (platform->disable_aspm && g->ops.xve.disable_aspm) | 335 | if (platform->disable_aspm && g->ops.xve.disable_aspm) |
611 | g->ops.xve.disable_aspm(g); | 336 | g->ops.xve.disable_aspm(g); |
612 | 337 | ||
613 | g->ops.xve.sw_init(dev); | 338 | g->ops.xve.sw_init(g->dev); |
614 | g->ops.xve.available_speeds(g, &speed); | 339 | g->ops.xve.available_speeds(g, &speed); |
615 | 340 | ||
616 | /* Set to max speed */ | 341 | /* Set to max speed */ |
@@ -629,515 +354,15 @@ done: | |||
629 | return err; | 354 | return err; |
630 | } | 355 | } |
631 | 356 | ||
632 | static struct of_device_id tegra_gk20a_of_match[] = { | 357 | /* |
633 | #ifdef CONFIG_TEGRA_GK20A | 358 | * Check if the device can go busy. Basically if the driver is currently |
634 | { .compatible = "nvidia,tegra124-gk20a", | 359 | * in the process of dying then do not let new places make the driver busy. |
635 | .data = &gk20a_tegra_platform }, | 360 | */ |
636 | { .compatible = "nvidia,tegra210-gm20b", | 361 | int gk20a_can_busy(struct gk20a *g) |
637 | .data = &gm20b_tegra_platform }, | ||
638 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
639 | { .compatible = "nvidia,tegra186-gp10b", | ||
640 | .data = &gp10b_tegra_platform }, | ||
641 | #endif | ||
642 | #ifdef CONFIG_TEGRA_19x_GPU | ||
643 | { .compatible = TEGRA_19x_GPU_COMPAT_TEGRA, | ||
644 | .data = &t19x_gpu_tegra_platform }, | ||
645 | #endif | ||
646 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
647 | { .compatible = "nvidia,tegra124-gk20a-vgpu", | ||
648 | .data = &vgpu_tegra_platform }, | ||
649 | #endif | ||
650 | #else | ||
651 | { .compatible = "nvidia,tegra124-gk20a", | ||
652 | .data = &gk20a_generic_platform }, | ||
653 | { .compatible = "nvidia,tegra210-gm20b", | ||
654 | .data = &gk20a_generic_platform }, | ||
655 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
656 | { .compatible = TEGRA_18x_GPU_COMPAT_TEGRA, | ||
657 | .data = &gk20a_generic_platform }, | ||
658 | #endif | ||
659 | |||
660 | #endif | ||
661 | { .compatible = "nvidia,generic-gk20a", | ||
662 | .data = &gk20a_generic_platform }, | ||
663 | { .compatible = "nvidia,generic-gm20b", | ||
664 | .data = &gk20a_generic_platform }, | ||
665 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
666 | { .compatible = "nvidia,generic-gp10b", | ||
667 | .data = &gk20a_generic_platform }, | ||
668 | #endif | ||
669 | { }, | ||
670 | }; | ||
671 | |||
672 | static int gk20a_pm_railgate(struct device *dev) | ||
673 | { | ||
674 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
675 | int ret = 0; | ||
676 | #ifdef CONFIG_DEBUG_FS | ||
677 | struct gk20a *g = get_gk20a(dev); | ||
678 | |||
679 | g->pstats.last_rail_gate_start = jiffies; | ||
680 | |||
681 | if (g->pstats.railgating_cycle_count >= 1) | ||
682 | g->pstats.total_rail_ungate_time_ms = | ||
683 | g->pstats.total_rail_ungate_time_ms + | ||
684 | jiffies_to_msecs(g->pstats.last_rail_gate_start - | ||
685 | g->pstats.last_rail_ungate_complete); | ||
686 | #endif | ||
687 | |||
688 | if (platform->railgate) | ||
689 | ret = platform->railgate(dev); | ||
690 | |||
691 | #ifdef CONFIG_DEBUG_FS | ||
692 | g->pstats.last_rail_gate_complete = jiffies; | ||
693 | #endif | ||
694 | |||
695 | return ret; | ||
696 | } | ||
697 | |||
698 | static int gk20a_pm_unrailgate(struct device *dev) | ||
699 | { | ||
700 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
701 | int ret = 0; | ||
702 | struct gk20a *g = get_gk20a(dev); | ||
703 | |||
704 | #ifdef CONFIG_DEBUG_FS | ||
705 | g->pstats.last_rail_ungate_start = jiffies; | ||
706 | if (g->pstats.railgating_cycle_count >= 1) | ||
707 | g->pstats.total_rail_gate_time_ms = | ||
708 | g->pstats.total_rail_gate_time_ms + | ||
709 | jiffies_to_msecs(g->pstats.last_rail_ungate_start - | ||
710 | g->pstats.last_rail_gate_complete); | ||
711 | |||
712 | g->pstats.railgating_cycle_count++; | ||
713 | #endif | ||
714 | |||
715 | trace_gk20a_pm_unrailgate(g->name); | ||
716 | |||
717 | if (platform->unrailgate) { | ||
718 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
719 | ret = platform->unrailgate(dev); | ||
720 | nvgpu_mutex_release(&platform->railgate_lock); | ||
721 | } | ||
722 | |||
723 | #ifdef CONFIG_DEBUG_FS | ||
724 | g->pstats.last_rail_ungate_complete = jiffies; | ||
725 | #endif | ||
726 | |||
727 | return ret; | ||
728 | } | ||
729 | |||
730 | static void gk20a_pm_shutdown(struct platform_device *pdev) | ||
731 | { | ||
732 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
733 | struct gk20a *g = platform->g; | ||
734 | int err; | ||
735 | |||
736 | nvgpu_info(g, "shutting down"); | ||
737 | |||
738 | /* vgpu has nothing to clean up currently */ | ||
739 | if (gk20a_gpu_is_virtual(&pdev->dev)) | ||
740 | return; | ||
741 | |||
742 | gk20a_driver_start_unload(g); | ||
743 | |||
744 | /* If GPU is already railgated, | ||
745 | * just prevent more requests, and return */ | ||
746 | if (platform->is_railgated && platform->is_railgated(&pdev->dev)) { | ||
747 | __pm_runtime_disable(&pdev->dev, false); | ||
748 | nvgpu_info(g, "already railgated, shut down complete"); | ||
749 | return; | ||
750 | } | ||
751 | |||
752 | /* Prevent more requests by disabling Runtime PM */ | ||
753 | __pm_runtime_disable(&pdev->dev, false); | ||
754 | |||
755 | err = gk20a_wait_for_idle(&pdev->dev); | ||
756 | if (err) { | ||
757 | nvgpu_err(g, "failed to idle GPU, err=%d", err); | ||
758 | goto finish; | ||
759 | } | ||
760 | |||
761 | err = gk20a_fifo_disable_all_engine_activity(g, true); | ||
762 | if (err) { | ||
763 | nvgpu_err(g, "failed to disable engine activity, err=%d", | ||
764 | err); | ||
765 | goto finish; | ||
766 | } | ||
767 | |||
768 | err = gk20a_fifo_wait_engine_idle(g); | ||
769 | if (err) { | ||
770 | nvgpu_err(g, "failed to idle engines, err=%d", | ||
771 | err); | ||
772 | goto finish; | ||
773 | } | ||
774 | |||
775 | if (gk20a_gpu_is_virtual(&pdev->dev)) | ||
776 | err = vgpu_pm_prepare_poweroff(&pdev->dev); | ||
777 | else | ||
778 | err = gk20a_pm_prepare_poweroff(&pdev->dev); | ||
779 | if (err) { | ||
780 | nvgpu_err(g, "failed to prepare for poweroff, err=%d", | ||
781 | err); | ||
782 | goto finish; | ||
783 | } | ||
784 | |||
785 | err = gk20a_pm_railgate(&pdev->dev); | ||
786 | if (err) | ||
787 | nvgpu_err(g, "failed to railgate, err=%d", err); | ||
788 | |||
789 | finish: | ||
790 | nvgpu_info(g, "shut down complete\n"); | ||
791 | } | ||
792 | |||
793 | #ifdef CONFIG_PM | ||
794 | static int gk20a_pm_runtime_resume(struct device *dev) | ||
795 | { | ||
796 | int err = 0; | ||
797 | |||
798 | err = gk20a_pm_unrailgate(dev); | ||
799 | if (err) | ||
800 | goto fail; | ||
801 | |||
802 | err = gk20a_pm_finalize_poweron(dev); | ||
803 | if (err) | ||
804 | goto fail_poweron; | ||
805 | |||
806 | return 0; | ||
807 | |||
808 | fail_poweron: | ||
809 | gk20a_pm_railgate(dev); | ||
810 | fail: | ||
811 | return err; | ||
812 | } | ||
813 | |||
814 | static int gk20a_pm_runtime_suspend(struct device *dev) | ||
815 | { | ||
816 | int err = 0; | ||
817 | |||
818 | err = gk20a_pm_prepare_poweroff(dev); | ||
819 | if (err) | ||
820 | goto fail; | ||
821 | |||
822 | err = gk20a_pm_railgate(dev); | ||
823 | if (err) | ||
824 | goto fail_railgate; | ||
825 | |||
826 | return 0; | ||
827 | |||
828 | fail_railgate: | ||
829 | gk20a_pm_finalize_poweron(dev); | ||
830 | fail: | ||
831 | pm_runtime_mark_last_busy(dev); | ||
832 | return err; | ||
833 | } | ||
834 | |||
835 | static int gk20a_pm_suspend(struct device *dev) | ||
836 | { | ||
837 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
838 | struct gk20a *g = get_gk20a(dev); | ||
839 | int ret = 0; | ||
840 | |||
841 | if (platform->user_railgate_disabled) | ||
842 | gk20a_idle_nosuspend(dev); | ||
843 | |||
844 | if (atomic_read(&dev->power.usage_count) > 1) { | ||
845 | ret = -EBUSY; | ||
846 | goto fail; | ||
847 | } | ||
848 | |||
849 | if (!g->power_on) | ||
850 | return 0; | ||
851 | |||
852 | ret = gk20a_pm_runtime_suspend(dev); | ||
853 | if (ret) | ||
854 | goto fail; | ||
855 | |||
856 | if (platform->suspend) | ||
857 | platform->suspend(dev); | ||
858 | |||
859 | g->suspended = true; | ||
860 | |||
861 | return 0; | ||
862 | |||
863 | fail: | ||
864 | if (platform->user_railgate_disabled) | ||
865 | gk20a_busy_noresume(dev); | ||
866 | |||
867 | return ret; | ||
868 | } | ||
869 | |||
870 | static int gk20a_pm_resume(struct device *dev) | ||
871 | { | 362 | { |
872 | struct gk20a *g = get_gk20a(dev); | 363 | if (g->driver_is_dying) |
873 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
874 | int ret = 0; | ||
875 | |||
876 | if (platform->user_railgate_disabled) | ||
877 | gk20a_busy_noresume(dev); | ||
878 | |||
879 | if (!g->suspended) | ||
880 | return 0; | 364 | return 0; |
881 | 365 | return 1; | |
882 | ret = gk20a_pm_runtime_resume(dev); | ||
883 | |||
884 | g->suspended = false; | ||
885 | |||
886 | return ret; | ||
887 | } | ||
888 | |||
889 | static const struct dev_pm_ops gk20a_pm_ops = { | ||
890 | .runtime_resume = gk20a_pm_runtime_resume, | ||
891 | .runtime_suspend = gk20a_pm_runtime_suspend, | ||
892 | .resume = gk20a_pm_resume, | ||
893 | .suspend = gk20a_pm_suspend, | ||
894 | }; | ||
895 | #endif | ||
896 | |||
897 | int gk20a_pm_init(struct device *dev) | ||
898 | { | ||
899 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
900 | int err = 0; | ||
901 | |||
902 | gk20a_dbg_fn(""); | ||
903 | |||
904 | /* Initialise pm runtime */ | ||
905 | if (platform->railgate_delay) { | ||
906 | pm_runtime_set_autosuspend_delay(dev, | ||
907 | platform->railgate_delay); | ||
908 | pm_runtime_use_autosuspend(dev); | ||
909 | } | ||
910 | |||
911 | if (platform->can_railgate) { | ||
912 | pm_runtime_enable(dev); | ||
913 | if (!pm_runtime_enabled(dev)) | ||
914 | gk20a_pm_unrailgate(dev); | ||
915 | else | ||
916 | gk20a_pm_railgate(dev); | ||
917 | } else { | ||
918 | __pm_runtime_disable(dev, false); | ||
919 | gk20a_pm_unrailgate(dev); | ||
920 | } | ||
921 | |||
922 | return err; | ||
923 | } | ||
924 | |||
925 | int gk20a_secure_page_alloc(struct device *dev) | ||
926 | { | ||
927 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
928 | int err = 0; | ||
929 | |||
930 | if (platform->secure_page_alloc) { | ||
931 | err = platform->secure_page_alloc(dev); | ||
932 | if (!err) | ||
933 | platform->secure_alloc_ready = true; | ||
934 | } | ||
935 | |||
936 | return err; | ||
937 | } | ||
938 | |||
939 | static int gk20a_probe(struct platform_device *dev) | ||
940 | { | ||
941 | struct gk20a *gk20a; | ||
942 | int err; | ||
943 | struct gk20a_platform *platform = NULL; | ||
944 | |||
945 | if (dev->dev.of_node) { | ||
946 | const struct of_device_id *match; | ||
947 | |||
948 | match = of_match_device(tegra_gk20a_of_match, &dev->dev); | ||
949 | if (match) | ||
950 | platform = (struct gk20a_platform *)match->data; | ||
951 | } else | ||
952 | platform = (struct gk20a_platform *)dev->dev.platform_data; | ||
953 | |||
954 | if (!platform) { | ||
955 | dev_err(&dev->dev, "no platform data\n"); | ||
956 | return -ENODATA; | ||
957 | } | ||
958 | |||
959 | gk20a_dbg_fn(""); | ||
960 | |||
961 | platform_set_drvdata(dev, platform); | ||
962 | |||
963 | if (gk20a_gpu_is_virtual(&dev->dev)) | ||
964 | return vgpu_probe(dev); | ||
965 | |||
966 | gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); | ||
967 | if (!gk20a) { | ||
968 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | ||
969 | return -ENOMEM; | ||
970 | } | ||
971 | |||
972 | set_gk20a(dev, gk20a); | ||
973 | gk20a->dev = &dev->dev; | ||
974 | |||
975 | if (nvgpu_platform_is_simulation(gk20a)) | ||
976 | platform->is_fmodel = true; | ||
977 | |||
978 | nvgpu_kmem_init(gk20a); | ||
979 | |||
980 | gk20a->irq_stall = platform_get_irq(dev, 0); | ||
981 | gk20a->irq_nonstall = platform_get_irq(dev, 1); | ||
982 | if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) | ||
983 | return -ENXIO; | ||
984 | |||
985 | err = devm_request_threaded_irq(&dev->dev, | ||
986 | gk20a->irq_stall, | ||
987 | gk20a_intr_isr_stall, | ||
988 | gk20a_intr_thread_stall, | ||
989 | 0, "gk20a_stall", gk20a); | ||
990 | if (err) { | ||
991 | dev_err(&dev->dev, | ||
992 | "failed to request stall intr irq @ %d\n", | ||
993 | gk20a->irq_stall); | ||
994 | return err; | ||
995 | } | ||
996 | err = devm_request_irq(&dev->dev, | ||
997 | gk20a->irq_nonstall, | ||
998 | gk20a_intr_isr_nonstall, | ||
999 | 0, "gk20a_nonstall", gk20a); | ||
1000 | if (err) { | ||
1001 | dev_err(&dev->dev, | ||
1002 | "failed to request non-stall intr irq @ %d\n", | ||
1003 | gk20a->irq_nonstall); | ||
1004 | return err; | ||
1005 | } | ||
1006 | disable_irq(gk20a->irq_stall); | ||
1007 | if (gk20a->irq_stall != gk20a->irq_nonstall) | ||
1008 | disable_irq(gk20a->irq_nonstall); | ||
1009 | |||
1010 | /* | ||
1011 | * is_fmodel needs to be in gk20a struct for deferred teardown | ||
1012 | */ | ||
1013 | gk20a->is_fmodel = platform->is_fmodel; | ||
1014 | |||
1015 | err = gk20a_init_support(dev); | ||
1016 | if (err) | ||
1017 | return err; | ||
1018 | |||
1019 | #ifdef CONFIG_RESET_CONTROLLER | ||
1020 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); | ||
1021 | if (IS_ERR(platform->reset_control)) | ||
1022 | platform->reset_control = NULL; | ||
1023 | #endif | ||
1024 | |||
1025 | err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class); | ||
1026 | if (err) | ||
1027 | return err; | ||
1028 | |||
1029 | err = gk20a_pm_init(&dev->dev); | ||
1030 | if (err) { | ||
1031 | dev_err(&dev->dev, "pm init failed"); | ||
1032 | return err; | ||
1033 | } | ||
1034 | |||
1035 | gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); | ||
1036 | |||
1037 | return 0; | ||
1038 | } | ||
1039 | |||
1040 | static int __exit gk20a_remove(struct platform_device *pdev) | ||
1041 | { | ||
1042 | struct device *dev = &pdev->dev; | ||
1043 | struct gk20a *g = get_gk20a(dev); | ||
1044 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
1045 | |||
1046 | gk20a_dbg_fn(""); | ||
1047 | |||
1048 | if (gk20a_gpu_is_virtual(dev)) | ||
1049 | return vgpu_remove(pdev); | ||
1050 | |||
1051 | if (platform->has_cde) | ||
1052 | gk20a_cde_destroy(g); | ||
1053 | |||
1054 | gk20a_ctxsw_trace_cleanup(g); | ||
1055 | |||
1056 | gk20a_sched_ctrl_cleanup(g); | ||
1057 | |||
1058 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
1059 | gk20a_scale_exit(dev); | ||
1060 | |||
1061 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
1062 | nvgpu_clk_arb_cleanup_arbiter(g); | ||
1063 | #endif | ||
1064 | |||
1065 | gk20a_user_deinit(dev, &nvgpu_class); | ||
1066 | |||
1067 | debugfs_remove_recursive(platform->debugfs); | ||
1068 | debugfs_remove_recursive(platform->debugfs_alias); | ||
1069 | |||
1070 | gk20a_remove_sysfs(dev); | ||
1071 | |||
1072 | if (platform->secure_buffer.destroy) | ||
1073 | platform->secure_buffer.destroy(dev, | ||
1074 | &platform->secure_buffer); | ||
1075 | |||
1076 | if (pm_runtime_enabled(dev)) | ||
1077 | pm_runtime_disable(dev); | ||
1078 | |||
1079 | if (platform->remove) | ||
1080 | platform->remove(dev); | ||
1081 | |||
1082 | set_gk20a(pdev, NULL); | ||
1083 | gk20a_put(g); | ||
1084 | |||
1085 | gk20a_dbg_fn("removed"); | ||
1086 | |||
1087 | return 0; | ||
1088 | } | ||
1089 | |||
1090 | static struct platform_driver gk20a_driver = { | ||
1091 | .probe = gk20a_probe, | ||
1092 | .remove = __exit_p(gk20a_remove), | ||
1093 | .shutdown = gk20a_pm_shutdown, | ||
1094 | .driver = { | ||
1095 | .owner = THIS_MODULE, | ||
1096 | .name = "gk20a", | ||
1097 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) | ||
1098 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, | ||
1099 | #endif | ||
1100 | #ifdef CONFIG_OF | ||
1101 | .of_match_table = tegra_gk20a_of_match, | ||
1102 | #endif | ||
1103 | #ifdef CONFIG_PM | ||
1104 | .pm = &gk20a_pm_ops, | ||
1105 | #endif | ||
1106 | .suppress_bind_attrs = true, | ||
1107 | } | ||
1108 | }; | ||
1109 | |||
1110 | struct class nvgpu_class = { | ||
1111 | .owner = THIS_MODULE, | ||
1112 | .name = CLASS_NAME, | ||
1113 | }; | ||
1114 | |||
1115 | static int __init gk20a_init(void) | ||
1116 | { | ||
1117 | |||
1118 | int ret; | ||
1119 | |||
1120 | ret = class_register(&nvgpu_class); | ||
1121 | if (ret) | ||
1122 | return ret; | ||
1123 | |||
1124 | ret = nvgpu_pci_init(); | ||
1125 | if (ret) | ||
1126 | return ret; | ||
1127 | |||
1128 | return platform_driver_register(&gk20a_driver); | ||
1129 | } | ||
1130 | |||
1131 | static void __exit gk20a_exit(void) | ||
1132 | { | ||
1133 | nvgpu_pci_exit(); | ||
1134 | platform_driver_unregister(&gk20a_driver); | ||
1135 | class_unregister(&nvgpu_class); | ||
1136 | } | ||
1137 | |||
1138 | void gk20a_busy_noresume(struct device *dev) | ||
1139 | { | ||
1140 | pm_runtime_get_noresume(dev); | ||
1141 | } | 366 | } |
1142 | 367 | ||
1143 | /* | 368 | /* |
@@ -1193,292 +418,6 @@ int gk20a_wait_for_idle(struct device *dev) | |||
1193 | return 0; | 418 | return 0; |
1194 | } | 419 | } |
1195 | 420 | ||
1196 | /* | ||
1197 | * Check if the device can go busy. Basically if the driver is currently | ||
1198 | * in the process of dying then do not let new places make the driver busy. | ||
1199 | */ | ||
1200 | static int gk20a_can_busy(struct gk20a *g) | ||
1201 | { | ||
1202 | if (g->driver_is_dying) | ||
1203 | return 0; | ||
1204 | return 1; | ||
1205 | } | ||
1206 | |||
1207 | int gk20a_busy(struct gk20a *g) | ||
1208 | { | ||
1209 | int ret = 0; | ||
1210 | struct device *dev; | ||
1211 | |||
1212 | if (!g) | ||
1213 | return -ENODEV; | ||
1214 | |||
1215 | atomic_inc(&g->usage_count); | ||
1216 | |||
1217 | down_read(&g->busy_lock); | ||
1218 | |||
1219 | if (!gk20a_can_busy(g)) { | ||
1220 | ret = -ENODEV; | ||
1221 | atomic_dec(&g->usage_count); | ||
1222 | goto fail; | ||
1223 | } | ||
1224 | |||
1225 | dev = g->dev; | ||
1226 | |||
1227 | if (pm_runtime_enabled(dev)) { | ||
1228 | ret = pm_runtime_get_sync(dev); | ||
1229 | if (ret < 0) { | ||
1230 | pm_runtime_put_noidle(dev); | ||
1231 | atomic_dec(&g->usage_count); | ||
1232 | goto fail; | ||
1233 | } | ||
1234 | } else { | ||
1235 | if (!g->power_on) { | ||
1236 | ret = gk20a_gpu_is_virtual(dev) ? | ||
1237 | vgpu_pm_finalize_poweron(dev) | ||
1238 | : gk20a_pm_finalize_poweron(dev); | ||
1239 | if (ret) { | ||
1240 | atomic_dec(&g->usage_count); | ||
1241 | goto fail; | ||
1242 | } | ||
1243 | } | ||
1244 | } | ||
1245 | |||
1246 | gk20a_scale_notify_busy(dev); | ||
1247 | |||
1248 | fail: | ||
1249 | up_read(&g->busy_lock); | ||
1250 | |||
1251 | return ret < 0 ? ret : 0; | ||
1252 | } | ||
1253 | |||
1254 | void gk20a_idle_nosuspend(struct device *dev) | ||
1255 | { | ||
1256 | pm_runtime_put_noidle(dev); | ||
1257 | } | ||
1258 | |||
1259 | void gk20a_idle(struct gk20a *g) | ||
1260 | { | ||
1261 | struct device *dev; | ||
1262 | |||
1263 | atomic_dec(&g->usage_count); | ||
1264 | down_read(&g->busy_lock); | ||
1265 | |||
1266 | dev = g->dev; | ||
1267 | |||
1268 | if (!(dev && gk20a_can_busy(g))) | ||
1269 | goto fail; | ||
1270 | |||
1271 | if (pm_runtime_enabled(dev)) { | ||
1272 | #ifdef CONFIG_PM | ||
1273 | if (atomic_read(&g->dev->power.usage_count) == 1) | ||
1274 | gk20a_scale_notify_idle(dev); | ||
1275 | #endif | ||
1276 | |||
1277 | pm_runtime_mark_last_busy(dev); | ||
1278 | pm_runtime_put_sync_autosuspend(dev); | ||
1279 | |||
1280 | } else { | ||
1281 | gk20a_scale_notify_idle(dev); | ||
1282 | } | ||
1283 | fail: | ||
1284 | up_read(&g->busy_lock); | ||
1285 | } | ||
1286 | |||
1287 | #ifdef CONFIG_PM | ||
1288 | /** | ||
1289 | * __gk20a_do_idle() - force the GPU to idle and railgate | ||
1290 | * | ||
1291 | * In success, this call MUST be balanced by caller with __gk20a_do_unidle() | ||
1292 | * | ||
1293 | * Acquires two locks : &g->busy_lock and &platform->railgate_lock | ||
1294 | * In success, we hold these locks and return | ||
1295 | * In failure, we release these locks and return | ||
1296 | */ | ||
1297 | int __gk20a_do_idle(struct device *dev, bool force_reset) | ||
1298 | { | ||
1299 | struct gk20a *g = get_gk20a(dev); | ||
1300 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
1301 | struct nvgpu_timeout timeout; | ||
1302 | int ref_cnt; | ||
1303 | int target_ref_cnt = 0; | ||
1304 | bool is_railgated; | ||
1305 | int err = 0; | ||
1306 | |||
1307 | /* acquire busy lock to block other busy() calls */ | ||
1308 | down_write(&g->busy_lock); | ||
1309 | |||
1310 | /* acquire railgate lock to prevent unrailgate in midst of do_idle() */ | ||
1311 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
1312 | |||
1313 | /* check if it is already railgated ? */ | ||
1314 | if (platform->is_railgated(dev)) | ||
1315 | return 0; | ||
1316 | |||
1317 | /* | ||
1318 | * release railgate_lock, prevent suspend by incrementing usage counter, | ||
1319 | * re-acquire railgate_lock | ||
1320 | */ | ||
1321 | nvgpu_mutex_release(&platform->railgate_lock); | ||
1322 | pm_runtime_get_sync(dev); | ||
1323 | |||
1324 | /* | ||
1325 | * One refcount taken in this API | ||
1326 | * If User disables rail gating, we take one more | ||
1327 | * extra refcount | ||
1328 | */ | ||
1329 | if (platform->user_railgate_disabled) | ||
1330 | target_ref_cnt = 2; | ||
1331 | else | ||
1332 | target_ref_cnt = 1; | ||
1333 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
1334 | |||
1335 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
1336 | NVGPU_TIMER_CPU_TIMER); | ||
1337 | |||
1338 | /* check and wait until GPU is idle (with a timeout) */ | ||
1339 | do { | ||
1340 | nvgpu_msleep(1); | ||
1341 | ref_cnt = atomic_read(&dev->power.usage_count); | ||
1342 | } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout)); | ||
1343 | |||
1344 | if (ref_cnt != target_ref_cnt) { | ||
1345 | nvgpu_err(g, "failed to idle - refcount %d != 1\n", | ||
1346 | ref_cnt); | ||
1347 | goto fail_drop_usage_count; | ||
1348 | } | ||
1349 | |||
1350 | /* check if global force_reset flag is set */ | ||
1351 | force_reset |= platform->force_reset_in_do_idle; | ||
1352 | |||
1353 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
1354 | NVGPU_TIMER_CPU_TIMER); | ||
1355 | |||
1356 | if (platform->can_railgate && !force_reset) { | ||
1357 | /* | ||
1358 | * Case 1 : GPU railgate is supported | ||
1359 | * | ||
1360 | * if GPU is now idle, we will have only one ref count, | ||
1361 | * drop this ref which will rail gate the GPU | ||
1362 | */ | ||
1363 | pm_runtime_put_sync(dev); | ||
1364 | |||
1365 | /* add sufficient delay to allow GPU to rail gate */ | ||
1366 | nvgpu_msleep(platform->railgate_delay); | ||
1367 | |||
1368 | /* check in loop if GPU is railgated or not */ | ||
1369 | do { | ||
1370 | nvgpu_msleep(1); | ||
1371 | is_railgated = platform->is_railgated(dev); | ||
1372 | } while (!is_railgated && !nvgpu_timeout_expired(&timeout)); | ||
1373 | |||
1374 | if (is_railgated) { | ||
1375 | return 0; | ||
1376 | } else { | ||
1377 | nvgpu_err(g, "failed to idle in timeout\n"); | ||
1378 | goto fail_timeout; | ||
1379 | } | ||
1380 | } else { | ||
1381 | /* | ||
1382 | * Case 2 : GPU railgate is not supported or we explicitly | ||
1383 | * do not want to depend on runtime PM | ||
1384 | * | ||
1385 | * if GPU is now idle, call prepare_poweroff() to save the | ||
1386 | * state and then do explicit railgate | ||
1387 | * | ||
1388 | * __gk20a_do_unidle() needs to unrailgate, call | ||
1389 | * finalize_poweron(), and then call pm_runtime_put_sync() | ||
1390 | * to balance the GPU usage counter | ||
1391 | */ | ||
1392 | |||
1393 | /* Save the GPU state */ | ||
1394 | err = gk20a_pm_prepare_poweroff(dev); | ||
1395 | if (err) | ||
1396 | goto fail_drop_usage_count; | ||
1397 | |||
1398 | /* railgate GPU */ | ||
1399 | platform->railgate(dev); | ||
1400 | |||
1401 | nvgpu_udelay(10); | ||
1402 | |||
1403 | g->forced_reset = true; | ||
1404 | return 0; | ||
1405 | } | ||
1406 | |||
1407 | fail_drop_usage_count: | ||
1408 | pm_runtime_put_noidle(dev); | ||
1409 | fail_timeout: | ||
1410 | nvgpu_mutex_release(&platform->railgate_lock); | ||
1411 | up_write(&g->busy_lock); | ||
1412 | return -EBUSY; | ||
1413 | } | ||
1414 | |||
1415 | /** | ||
1416 | * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called | ||
1417 | * from outside of GPU driver | ||
1418 | * | ||
1419 | * In success, this call MUST be balanced by caller with gk20a_do_unidle() | ||
1420 | */ | ||
1421 | int gk20a_do_idle(void) | ||
1422 | { | ||
1423 | struct device_node *node = | ||
1424 | of_find_matching_node(NULL, tegra_gk20a_of_match); | ||
1425 | struct platform_device *pdev = of_find_device_by_node(node); | ||
1426 | |||
1427 | int ret = __gk20a_do_idle(&pdev->dev, true); | ||
1428 | |||
1429 | of_node_put(node); | ||
1430 | |||
1431 | return ret; | ||
1432 | } | ||
1433 | |||
1434 | /** | ||
1435 | * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle() | ||
1436 | */ | ||
1437 | int __gk20a_do_unidle(struct device *dev) | ||
1438 | { | ||
1439 | struct gk20a *g = get_gk20a(dev); | ||
1440 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
1441 | |||
1442 | if (g->forced_reset) { | ||
1443 | /* | ||
1444 | * If we did a forced-reset/railgate | ||
1445 | * then unrailgate the GPU here first | ||
1446 | */ | ||
1447 | platform->unrailgate(dev); | ||
1448 | |||
1449 | /* restore the GPU state */ | ||
1450 | gk20a_pm_finalize_poweron(dev); | ||
1451 | |||
1452 | /* balance GPU usage counter */ | ||
1453 | pm_runtime_put_sync(dev); | ||
1454 | |||
1455 | g->forced_reset = false; | ||
1456 | } | ||
1457 | |||
1458 | /* release the lock and open up all other busy() calls */ | ||
1459 | nvgpu_mutex_release(&platform->railgate_lock); | ||
1460 | up_write(&g->busy_lock); | ||
1461 | |||
1462 | return 0; | ||
1463 | } | ||
1464 | |||
1465 | /** | ||
1466 | * gk20a_do_unidle() - wrap up for __gk20a_do_unidle() | ||
1467 | */ | ||
1468 | int gk20a_do_unidle(void) | ||
1469 | { | ||
1470 | struct device_node *node = | ||
1471 | of_find_matching_node(NULL, tegra_gk20a_of_match); | ||
1472 | struct platform_device *pdev = of_find_device_by_node(node); | ||
1473 | |||
1474 | int ret = __gk20a_do_unidle(&pdev->dev); | ||
1475 | |||
1476 | of_node_put(node); | ||
1477 | |||
1478 | return ret; | ||
1479 | } | ||
1480 | #endif | ||
1481 | |||
1482 | int gk20a_init_gpu_characteristics(struct gk20a *g) | 421 | int gk20a_init_gpu_characteristics(struct gk20a *g) |
1483 | { | 422 | { |
1484 | struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; | 423 | struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; |
@@ -1651,7 +590,3 @@ void gk20a_put(struct gk20a *g) | |||
1651 | 590 | ||
1652 | kref_put(&g->refcount, gk20a_free_cb); | 591 | kref_put(&g->refcount, gk20a_free_cb); |
1653 | } | 592 | } |
1654 | |||
1655 | MODULE_LICENSE("GPL v2"); | ||
1656 | module_init(gk20a_init); | ||
1657 | module_exit(gk20a_exit); | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 60d04b64..9860910c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1389,6 +1389,7 @@ int gk20a_do_unidle(void); | |||
1389 | int __gk20a_do_idle(struct device *dev, bool force_reset); | 1389 | int __gk20a_do_idle(struct device *dev, bool force_reset); |
1390 | int __gk20a_do_unidle(struct device *dev); | 1390 | int __gk20a_do_unidle(struct device *dev); |
1391 | 1391 | ||
1392 | int gk20a_can_busy(struct gk20a *g); | ||
1392 | void gk20a_driver_start_unload(struct gk20a *g); | 1393 | void gk20a_driver_start_unload(struct gk20a *g); |
1393 | int gk20a_wait_for_idle(struct device *dev); | 1394 | int gk20a_wait_for_idle(struct device *dev); |
1394 | 1395 | ||
@@ -1431,9 +1432,8 @@ extern struct class nvgpu_class; | |||
1431 | 1432 | ||
1432 | #define INTERFACE_NAME "nvhost%s-gpu" | 1433 | #define INTERFACE_NAME "nvhost%s-gpu" |
1433 | 1434 | ||
1434 | int gk20a_pm_init(struct device *dev); | 1435 | int gk20a_prepare_poweroff(struct gk20a *g); |
1435 | int gk20a_pm_finalize_poweron(struct device *dev); | 1436 | int gk20a_finalize_poweron(struct gk20a *g); |
1436 | void gk20a_remove_support(struct gk20a *g); | ||
1437 | 1437 | ||
1438 | static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch) | 1438 | static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch) |
1439 | { | 1439 | { |
@@ -1476,8 +1476,4 @@ void gk20a_put(struct gk20a *g); | |||
1476 | int gk20a_railgating_debugfs_init(struct device *dev); | 1476 | int gk20a_railgating_debugfs_init(struct device *dev); |
1477 | #endif | 1477 | #endif |
1478 | 1478 | ||
1479 | int gk20a_secure_page_alloc(struct device *dev); | ||
1480 | void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i, | ||
1481 | struct resource **out); | ||
1482 | |||
1483 | #endif /* GK20A_H */ | 1479 | #endif /* GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/sim_gk20a.c b/drivers/gpu/nvgpu/gk20a/sim_gk20a.c index 8951d5a4..5c11c118 100644 --- a/drivers/gpu/nvgpu/gk20a/sim_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sim_gk20a.c | |||
@@ -103,15 +103,6 @@ int gk20a_init_sim_support(struct platform_device *pdev) | |||
103 | struct gk20a *g = get_gk20a(dev); | 103 | struct gk20a *g = get_gk20a(dev); |
104 | u64 phys; | 104 | u64 phys; |
105 | 105 | ||
106 | g->sim.g = g; | ||
107 | g->sim.regs = gk20a_ioremap_resource(pdev, GK20A_SIM_IORESOURCE_MEM, | ||
108 | &g->sim.reg_mem); | ||
109 | if (IS_ERR(g->sim.regs)) { | ||
110 | nvgpu_err(g, "failed to remap gk20a sim regs\n"); | ||
111 | err = PTR_ERR(g->sim.regs); | ||
112 | goto fail; | ||
113 | } | ||
114 | |||
115 | /* allocate sim event/msg buffers */ | 106 | /* allocate sim event/msg buffers */ |
116 | err = alloc_and_kmap_iopage(g, &g->sim.send_bfr.kvaddr, | 107 | err = alloc_and_kmap_iopage(g, &g->sim.send_bfr.kvaddr, |
117 | &g->sim.send_bfr.phys, | 108 | &g->sim.send_bfr.phys, |