summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1681
1 files changed, 1681 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
new file mode 100644
index 00000000..4cc500de
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -0,0 +1,1681 @@
1/*
2 * drivers/video/tegra/host/gk20a/gk20a.c
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#define CREATE_TRACE_POINTS
22#include <trace/events/gk20a.h>
23
24#include <linux/dma-mapping.h>
25#include <linux/highmem.h>
26#include <linux/string.h>
27#include <linux/cdev.h>
28#include <linux/delay.h>
29#include <linux/firmware.h>
30#include <linux/interrupt.h>
31#include <linux/irq.h>
32#include <linux/export.h>
33#include <linux/file.h>
34#include <linux/of.h>
35#include <linux/of_device.h>
36#include <linux/of_platform.h>
37#include <linux/pm_runtime.h>
38#include <linux/thermal.h>
39#include <asm/cacheflush.h>
40#include <linux/debugfs.h>
41#include <linux/spinlock.h>
42#include <linux/tegra-powergate.h>
43
44#include <linux/sched.h>
45#include <linux/input-cfboost.h>
46
47#include <mach/pm_domains.h>
48
49#include "gk20a.h"
50#include "debug_gk20a.h"
51#include "ctrl_gk20a.h"
52#include "hw_mc_gk20a.h"
53#include "hw_timer_gk20a.h"
54#include "hw_bus_gk20a.h"
55#include "hw_sim_gk20a.h"
56#include "hw_top_gk20a.h"
57#include "hw_ltc_gk20a.h"
58#include "gk20a_scale.h"
59#include "dbg_gpu_gk20a.h"
60#include "hal.h"
61
62#ifdef CONFIG_ARM64
63#define __cpuc_flush_dcache_area __flush_dcache_area
64#endif
65
66#define CLASS_NAME "nvidia-gpu"
67/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
68#define INTERFACE_NAME "nvhost%s-gpu"
69
70#define GK20A_NUM_CDEVS 5
71
72#if defined(GK20A_DEBUG)
73u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
74u32 gk20a_dbg_ftrace;
75#endif
76
77static int gk20a_pm_finalize_poweron(struct device *dev);
78static int gk20a_pm_prepare_poweroff(struct device *dev);
79
80static inline void set_gk20a(struct platform_device *dev, struct gk20a *gk20a)
81{
82 gk20a_get_platform(dev)->g = gk20a;
83}
84
85static const struct file_operations gk20a_channel_ops = {
86 .owner = THIS_MODULE,
87 .release = gk20a_channel_release,
88 .open = gk20a_channel_open,
89#ifdef CONFIG_COMPAT
90 .compat_ioctl = gk20a_channel_ioctl,
91#endif
92 .unlocked_ioctl = gk20a_channel_ioctl,
93};
94
95static const struct file_operations gk20a_ctrl_ops = {
96 .owner = THIS_MODULE,
97 .release = gk20a_ctrl_dev_release,
98 .open = gk20a_ctrl_dev_open,
99 .unlocked_ioctl = gk20a_ctrl_dev_ioctl,
100#ifdef CONFIG_COMPAT
101 .compat_ioctl = gk20a_ctrl_dev_ioctl,
102#endif
103};
104
105static const struct file_operations gk20a_dbg_ops = {
106 .owner = THIS_MODULE,
107 .release = gk20a_dbg_gpu_dev_release,
108 .open = gk20a_dbg_gpu_dev_open,
109 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
110 .poll = gk20a_dbg_gpu_dev_poll,
111#ifdef CONFIG_COMPAT
112 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
113#endif
114};
115
116static const struct file_operations gk20a_as_ops = {
117 .owner = THIS_MODULE,
118 .release = gk20a_as_dev_release,
119 .open = gk20a_as_dev_open,
120#ifdef CONFIG_COMPAT
121 .compat_ioctl = gk20a_as_dev_ioctl,
122#endif
123 .unlocked_ioctl = gk20a_as_dev_ioctl,
124};
125
126/*
127 * Note: We use a different 'open' to trigger handling of the profiler session.
128 * Most of the code is shared between them... Though, at some point if the
129 * code does get too tangled trying to handle each in the same path we can
130 * separate them cleanly.
131 */
132static const struct file_operations gk20a_prof_ops = {
133 .owner = THIS_MODULE,
134 .release = gk20a_dbg_gpu_dev_release,
135 .open = gk20a_prof_gpu_dev_open,
136 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
137 /* .mmap = gk20a_prof_gpu_dev_mmap,*/
138 /*int (*mmap) (struct file *, struct vm_area_struct *);*/
139 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
140#ifdef CONFIG_COMPAT
141 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
142#endif
143};
144
145static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
146{
147 writel(v, g->sim.regs+r);
148}
149
150static inline u32 sim_readl(struct gk20a *g, u32 r)
151{
152 return readl(g->sim.regs+r);
153}
154
155static void kunmap_and_free_iopage(void **kvaddr, struct page **page)
156{
157 if (*kvaddr) {
158 kunmap(*kvaddr);
159 *kvaddr = 0;
160 }
161 if (*page) {
162 __free_page(*page);
163 *page = 0;
164 }
165}
166
167static void gk20a_free_sim_support(struct gk20a *g)
168{
169 /* free sim mappings, bfrs */
170 kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr,
171 &g->sim.send_bfr.page);
172
173 kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr,
174 &g->sim.recv_bfr.page);
175
176 kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr,
177 &g->sim.msg_bfr.page);
178}
179
180static void gk20a_remove_sim_support(struct sim_gk20a *s)
181{
182 struct gk20a *g = s->g;
183 if (g->sim.regs)
184 sim_writel(g, sim_config_r(), sim_config_mode_disabled_v());
185 gk20a_free_sim_support(g);
186}
187
188static int alloc_and_kmap_iopage(struct device *d,
189 void **kvaddr,
190 phys_addr_t *phys,
191 struct page **page)
192{
193 int err = 0;
194 *page = alloc_page(GFP_KERNEL);
195
196 if (!*page) {
197 err = -ENOMEM;
198 dev_err(d, "couldn't allocate io page\n");
199 goto fail;
200 }
201
202 *kvaddr = kmap(*page);
203 if (!*kvaddr) {
204 err = -ENOMEM;
205 dev_err(d, "couldn't kmap io page\n");
206 goto fail;
207 }
208 *phys = page_to_phys(*page);
209 return 0;
210
211 fail:
212 kunmap_and_free_iopage(kvaddr, page);
213 return err;
214
215}
216
217static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
218 struct resource **out)
219{
220 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
221 if (!r)
222 return NULL;
223 if (out)
224 *out = r;
225 return devm_request_and_ioremap(&dev->dev, r);
226}
227
228/* TBD: strip from released */
229static int gk20a_init_sim_support(struct platform_device *dev)
230{
231 int err = 0;
232 struct gk20a *g = get_gk20a(dev);
233 struct device *d = &dev->dev;
234 phys_addr_t phys;
235
236 g->sim.g = g;
237 g->sim.regs = gk20a_ioremap_resource(dev, GK20A_SIM_IORESOURCE_MEM,
238 &g->sim.reg_mem);
239 if (!g->sim.regs) {
240 dev_err(d, "failed to remap gk20a sim regs\n");
241 err = -ENXIO;
242 goto fail;
243 }
244
245 /* allocate sim event/msg buffers */
246 err = alloc_and_kmap_iopage(d, &g->sim.send_bfr.kvaddr,
247 &g->sim.send_bfr.phys,
248 &g->sim.send_bfr.page);
249
250 err = err || alloc_and_kmap_iopage(d, &g->sim.recv_bfr.kvaddr,
251 &g->sim.recv_bfr.phys,
252 &g->sim.recv_bfr.page);
253
254 err = err || alloc_and_kmap_iopage(d, &g->sim.msg_bfr.kvaddr,
255 &g->sim.msg_bfr.phys,
256 &g->sim.msg_bfr.page);
257
258 if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr &&
259 g->sim.msg_bfr.kvaddr)) {
260 dev_err(d, "couldn't allocate all sim buffers\n");
261 goto fail;
262 }
263
264 /*mark send ring invalid*/
265 sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f());
266
267 /*read get pointer and make equal to put*/
268 g->sim.send_ring_put = sim_readl(g, sim_send_get_r());
269 sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
270
271 /*write send ring address and make it valid*/
272 /*TBD: work for >32b physmem*/
273 phys = g->sim.send_bfr.phys;
274 sim_writel(g, sim_send_ring_hi_r(), 0);
275 sim_writel(g, sim_send_ring_r(),
276 sim_send_ring_status_valid_f() |
277 sim_send_ring_target_phys_pci_coherent_f() |
278 sim_send_ring_size_4kb_f() |
279 sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT));
280
281 /*repeat for recv ring (but swap put,get as roles are opposite) */
282 sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f());
283
284 /*read put pointer and make equal to get*/
285 g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r());
286 sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
287
288 /*write send ring address and make it valid*/
289 /*TBD: work for >32b physmem*/
290 phys = g->sim.recv_bfr.phys;
291 sim_writel(g, sim_recv_ring_hi_r(), 0);
292 sim_writel(g, sim_recv_ring_r(),
293 sim_recv_ring_status_valid_f() |
294 sim_recv_ring_target_phys_pci_coherent_f() |
295 sim_recv_ring_size_4kb_f() |
296 sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT));
297
298 g->sim.remove_support = gk20a_remove_sim_support;
299 return 0;
300
301 fail:
302 gk20a_free_sim_support(g);
303 return err;
304}
305
306static inline u32 sim_msg_header_size(void)
307{
308 return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/
309}
310
311static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset)
312{
313 return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset);
314}
315
316static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset)
317{
318 return sim_msg_bfr(g, byte_offset); /*starts at 0*/
319}
320
321static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset)
322{
323 /*starts after msg header/cmn*/
324 return sim_msg_bfr(g, byte_offset + sim_msg_header_size());
325}
326
327static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size)
328{
329 /*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/
330 *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v();
331 *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v();
332 *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v();
333 *sim_msg_hdr(g, sim_msg_function_r()) = func;
334 *sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size();
335}
336
337static inline u32 sim_escape_read_hdr_size(void)
338{
339 return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/
340}
341
342static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset)
343{
344 return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset);
345}
346
347static int rpc_send_message(struct gk20a *g)
348{
349 /* calculations done in units of u32s */
350 u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2;
351 u32 dma_offset = send_base + sim_dma_r()/sizeof(u32);
352 u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32);
353
354 *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) =
355 sim_dma_target_phys_pci_coherent_f() |
356 sim_dma_status_valid_f() |
357 sim_dma_size_4kb_f() |
358 sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT);
359
360 *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = 0; /*TBD >32b phys*/
361
362 *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++;
363
364 g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) %
365 PAGE_SIZE;
366
367 __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
368 __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
369 __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
370
371 /* Update the put pointer. This will trap into the host. */
372 sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
373
374 return 0;
375}
376
377static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset)
378{
379 return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset);
380}
381
382static int rpc_recv_poll(struct gk20a *g)
383{
384 phys_addr_t recv_phys_addr;
385
386 /* XXX This read is not required (?) */
387 /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/
388
389 /* Poll the recv ring get pointer in an infinite loop*/
390 do {
391 g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
392 } while (g->sim.recv_ring_put == g->sim.recv_ring_get);
393
394 /* process all replies */
395 while (g->sim.recv_ring_put != g->sim.recv_ring_get) {
396 /* these are in u32 offsets*/
397 u32 dma_lo_offset =
398 sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0;
399 /*u32 dma_hi_offset = dma_lo_offset + 1;*/
400 u32 recv_phys_addr_lo = sim_dma_addr_lo_v(*sim_recv_ring_bfr(g, dma_lo_offset*4));
401
402 /*u32 recv_phys_addr_hi = sim_dma_hi_addr_v(
403 (phys_addr_t)sim_recv_ring_bfr(g,dma_hi_offset*4));*/
404
405 /*TBD >32b phys addr */
406 recv_phys_addr = recv_phys_addr_lo << PAGE_SHIFT;
407
408 if (recv_phys_addr != g->sim.msg_bfr.phys) {
409 dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n",
410 __func__);
411 return -1;
412 }
413
414 /* Update GET pointer */
415 g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) %
416 PAGE_SIZE;
417
418 __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
419 __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
420 __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
421
422 sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
423
424 g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
425 }
426
427 return 0;
428}
429
430static int issue_rpc_and_wait(struct gk20a *g)
431{
432 int err;
433
434 err = rpc_send_message(g);
435 if (err) {
436 dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n",
437 __func__);
438 return err;
439 }
440
441 err = rpc_recv_poll(g);
442 if (err) {
443 dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n",
444 __func__);
445 return err;
446 }
447
448 /* Now check if RPC really succeeded */
449 if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) {
450 dev_err(dev_from_gk20a(g), "%s received failed status!\n",
451 __func__);
452 return -(*sim_msg_hdr(g, sim_msg_result_r()));
453 }
454 return 0;
455}
456
457int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data)
458{
459 int err;
460 size_t pathlen = strlen(path);
461 u32 data_offset;
462
463 sim_write_hdr(g, sim_msg_function_sim_escape_read_v(),
464 sim_escape_read_hdr_size());
465 *sim_msg_param(g, 0) = index;
466 *sim_msg_param(g, 4) = count;
467 data_offset = roundup(0xc + pathlen + 1, sizeof(u32));
468 *sim_msg_param(g, 8) = data_offset;
469 strcpy((char *)sim_msg_param(g, 0xc), path);
470
471 err = issue_rpc_and_wait(g);
472
473 if (!err)
474 memcpy(data, sim_msg_param(g, data_offset), count);
475 return err;
476}
477
478static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
479{
480 struct gk20a *g = dev_id;
481 u32 mc_intr_0;
482
483 if (!g->power_on)
484 return IRQ_NONE;
485
486 /* not from gpu when sharing irq with others */
487 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
488 if (unlikely(!mc_intr_0))
489 return IRQ_NONE;
490
491 gk20a_writel(g, mc_intr_en_0_r(),
492 mc_intr_en_0_inta_disabled_f());
493
494 /* flush previous write */
495 gk20a_readl(g, mc_intr_en_0_r());
496
497 return IRQ_WAKE_THREAD;
498}
499
500static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
501{
502 struct gk20a *g = dev_id;
503 u32 mc_intr_1;
504
505 if (!g->power_on)
506 return IRQ_NONE;
507
508 /* not from gpu when sharing irq with others */
509 mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
510 if (unlikely(!mc_intr_1))
511 return IRQ_NONE;
512
513 gk20a_writel(g, mc_intr_en_1_r(),
514 mc_intr_en_1_inta_disabled_f());
515
516 /* flush previous write */
517 gk20a_readl(g, mc_intr_en_1_r());
518
519 return IRQ_WAKE_THREAD;
520}
521
522static void gk20a_pbus_isr(struct gk20a *g)
523{
524 u32 val;
525 val = gk20a_readl(g, bus_intr_0_r());
526 if (val & (bus_intr_0_pri_squash_m() |
527 bus_intr_0_pri_fecserr_m() |
528 bus_intr_0_pri_timeout_m())) {
529 gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x",
530 gk20a_readl(g, top_fs_status_r()));
531 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
532 gk20a_readl(g, mc_enable_r()));
533 gk20a_err(&g->dev->dev,
534 "NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n",
535 gk20a_readl(g, timer_pri_timeout_save_0_r()));
536 gk20a_err(&g->dev->dev,
537 "NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n",
538 gk20a_readl(g, timer_pri_timeout_save_1_r()));
539 gk20a_err(&g->dev->dev,
540 "NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n",
541 gk20a_readl(g, timer_pri_timeout_fecs_errcode_r()));
542 }
543
544 if (val)
545 gk20a_err(&g->dev->dev,
546 "Unhandled pending pbus interrupt\n");
547
548 gk20a_writel(g, bus_intr_0_r(), val);
549}
550
551static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
552{
553 struct gk20a *g = dev_id;
554 u32 mc_intr_0;
555
556 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
557
558 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
559
560 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
561
562 if (mc_intr_0 & mc_intr_0_pgraph_pending_f())
563 gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
564 if (mc_intr_0 & mc_intr_0_pfifo_pending_f())
565 gk20a_fifo_isr(g);
566 if (mc_intr_0 & mc_intr_0_pmu_pending_f())
567 gk20a_pmu_isr(g);
568 if (mc_intr_0 & mc_intr_0_priv_ring_pending_f())
569 gk20a_priv_ring_isr(g);
570 if (mc_intr_0 & mc_intr_0_ltc_pending_f())
571 gk20a_mm_ltc_isr(g);
572 if (mc_intr_0 & mc_intr_0_pbus_pending_f())
573 gk20a_pbus_isr(g);
574
575 gk20a_writel(g, mc_intr_en_0_r(),
576 mc_intr_en_0_inta_hardware_f());
577
578 /* flush previous write */
579 gk20a_readl(g, mc_intr_en_0_r());
580
581 return IRQ_HANDLED;
582}
583
584static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id)
585{
586 struct gk20a *g = dev_id;
587 u32 mc_intr_1;
588
589 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
590
591 mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
592
593 gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
594
595 if (mc_intr_1 & mc_intr_0_pfifo_pending_f())
596 gk20a_fifo_nonstall_isr(g);
597 if (mc_intr_1 & mc_intr_0_pgraph_pending_f())
598 gk20a_gr_nonstall_isr(g);
599
600 gk20a_writel(g, mc_intr_en_1_r(),
601 mc_intr_en_1_inta_hardware_f());
602
603 /* flush previous write */
604 gk20a_readl(g, mc_intr_en_1_r());
605
606 return IRQ_HANDLED;
607}
608
609static void gk20a_remove_support(struct platform_device *dev)
610{
611 struct gk20a *g = get_gk20a(dev);
612
613 /* pmu support should already be removed when driver turns off
614 gpu power rail in prepapre_poweroff */
615 if (g->gk20a_cdev.gk20a_cooling_dev)
616 thermal_cooling_device_unregister(g->gk20a_cdev.gk20a_cooling_dev);
617
618 if (g->gr.remove_support)
619 g->gr.remove_support(&g->gr);
620
621 if (g->fifo.remove_support)
622 g->fifo.remove_support(&g->fifo);
623
624 if (g->mm.remove_support)
625 g->mm.remove_support(&g->mm);
626
627 if (g->sim.remove_support)
628 g->sim.remove_support(&g->sim);
629
630 release_firmware(g->pmu_fw);
631
632 if (g->irq_requested) {
633 free_irq(g->irq_stall, g);
634 free_irq(g->irq_nonstall, g);
635 g->irq_requested = false;
636 }
637
638 /* free mappings to registers, etc*/
639
640 if (g->regs) {
641 iounmap(g->regs);
642 g->regs = 0;
643 }
644 if (g->bar1) {
645 iounmap(g->bar1);
646 g->bar1 = 0;
647 }
648}
649
650static int gk20a_init_support(struct platform_device *dev)
651{
652 int err = 0;
653 struct gk20a *g = get_gk20a(dev);
654
655 g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
656 &g->reg_mem);
657 if (!g->regs) {
658 dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n");
659 err = -ENXIO;
660 goto fail;
661 }
662
663 g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
664 &g->bar1_mem);
665 if (!g->bar1) {
666 dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
667 err = -ENXIO;
668 goto fail;
669 }
670
671 /* Get interrupt numbers */
672 g->irq_stall = platform_get_irq(dev, 0);
673 g->irq_nonstall = platform_get_irq(dev, 1);
674 if (g->irq_stall < 0 || g->irq_nonstall < 0) {
675 err = -ENXIO;
676 goto fail;
677 }
678
679 if (tegra_cpu_is_asim()) {
680 err = gk20a_init_sim_support(dev);
681 if (err)
682 goto fail;
683 }
684
685 mutex_init(&g->dbg_sessions_lock);
686 mutex_init(&g->client_lock);
687
688 g->remove_support = gk20a_remove_support;
689 return 0;
690
691 fail:
692 gk20a_remove_support(dev);
693 return err;
694}
695
696static int gk20a_init_client(struct platform_device *dev)
697{
698 struct gk20a *g = get_gk20a(dev);
699 int err;
700
701 gk20a_dbg_fn("");
702
703#ifndef CONFIG_PM_RUNTIME
704 gk20a_pm_finalize_poweron(&dev->dev);
705#endif
706
707 err = gk20a_init_mm_setup_sw(g);
708 if (err)
709 return err;
710
711 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
712 gk20a_scale_hw_init(dev);
713 return 0;
714}
715
716static void gk20a_deinit_client(struct platform_device *dev)
717{
718 gk20a_dbg_fn("");
719#ifndef CONFIG_PM_RUNTIME
720 gk20a_pm_prepare_poweroff(&dev->dev);
721#endif
722}
723
724int gk20a_get_client(struct gk20a *g)
725{
726 int err = 0;
727
728 mutex_lock(&g->client_lock);
729 if (g->client_refcount == 0)
730 err = gk20a_init_client(g->dev);
731 if (!err)
732 g->client_refcount++;
733 mutex_unlock(&g->client_lock);
734 return err;
735}
736
737void gk20a_put_client(struct gk20a *g)
738{
739 mutex_lock(&g->client_lock);
740 if (g->client_refcount == 1)
741 gk20a_deinit_client(g->dev);
742 g->client_refcount--;
743 mutex_unlock(&g->client_lock);
744 WARN_ON(g->client_refcount < 0);
745}
746
747static int gk20a_pm_prepare_poweroff(struct device *_dev)
748{
749 struct platform_device *dev = to_platform_device(_dev);
750 struct gk20a *g = get_gk20a(dev);
751 int ret = 0;
752
753 gk20a_dbg_fn("");
754
755 if (!g->power_on)
756 return 0;
757
758 ret |= gk20a_channel_suspend(g);
759
760 /* disable elpg before gr or fifo suspend */
761 ret |= gk20a_pmu_destroy(g);
762 ret |= gk20a_gr_suspend(g);
763 ret |= gk20a_mm_suspend(g);
764 ret |= gk20a_fifo_suspend(g);
765
766 /*
767 * After this point, gk20a interrupts should not get
768 * serviced.
769 */
770 if (g->irq_requested) {
771 free_irq(g->irq_stall, g);
772 free_irq(g->irq_nonstall, g);
773 g->irq_requested = false;
774 }
775
776 /* Disable GPCPLL */
777 ret |= gk20a_suspend_clk_support(g);
778 g->power_on = false;
779
780 return ret;
781}
782
783static void gk20a_detect_chip(struct gk20a *g)
784{
785 struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
786
787 u32 mc_boot_0_value = gk20a_readl(g, mc_boot_0_r());
788 gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
789 NVHOST_GPU_ARCHITECTURE_SHIFT;
790 gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
791 gpu->rev =
792 (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
793 mc_boot_0_minor_revision_v(mc_boot_0_value);
794
795 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
796 g->gpu_characteristics.arch,
797 g->gpu_characteristics.impl,
798 g->gpu_characteristics.rev);
799}
800
801static int gk20a_pm_finalize_poweron(struct device *_dev)
802{
803 struct platform_device *dev = to_platform_device(_dev);
804 struct gk20a *g = get_gk20a(dev);
805 int err, nice_value;
806
807 gk20a_dbg_fn("");
808
809 if (g->power_on)
810 return 0;
811
812 nice_value = task_nice(current);
813 set_user_nice(current, -20);
814
815 if (!g->irq_requested) {
816 err = request_threaded_irq(g->irq_stall,
817 gk20a_intr_isr_stall,
818 gk20a_intr_thread_stall,
819 0, "gk20a_stall", g);
820 if (err) {
821 dev_err(dev_from_gk20a(g),
822 "failed to request stall intr irq @ %lld\n",
823 (u64)g->irq_stall);
824 goto done;
825 }
826 err = request_threaded_irq(g->irq_nonstall,
827 gk20a_intr_isr_nonstall,
828 gk20a_intr_thread_nonstall,
829 0, "gk20a_nonstall", g);
830 if (err) {
831 dev_err(dev_from_gk20a(g),
832 "failed to request non-stall intr irq @ %lld\n",
833 (u64)g->irq_nonstall);
834 goto done;
835 }
836 g->irq_requested = true;
837 }
838
839 g->power_on = true;
840
841 gk20a_writel(g, mc_intr_mask_1_r(),
842 mc_intr_0_pfifo_pending_f()
843 | mc_intr_0_pgraph_pending_f());
844 gk20a_writel(g, mc_intr_en_1_r(),
845 mc_intr_en_1_inta_hardware_f());
846
847 gk20a_writel(g, mc_intr_mask_0_r(),
848 mc_intr_0_pgraph_pending_f()
849 | mc_intr_0_pfifo_pending_f()
850 | mc_intr_0_priv_ring_pending_f()
851 | mc_intr_0_ltc_pending_f()
852 | mc_intr_0_pbus_pending_f());
853 gk20a_writel(g, mc_intr_en_0_r(),
854 mc_intr_en_0_inta_hardware_f());
855
856 if (!tegra_platform_is_silicon())
857 gk20a_writel(g, bus_intr_en_0_r(), 0x0);
858 else
859 gk20a_writel(g, bus_intr_en_0_r(),
860 bus_intr_en_0_pri_squash_m() |
861 bus_intr_en_0_pri_fecserr_m() |
862 bus_intr_en_0_pri_timeout_m());
863 gk20a_reset_priv_ring(g);
864
865 gk20a_detect_chip(g);
866 err = gpu_init_hal(g);
867 if (err)
868 goto done;
869
870 /* TBD: move this after graphics init in which blcg/slcg is enabled.
871 This function removes SlowdownOnBoot which applies 32x divider
872 on gpcpll bypass path. The purpose of slowdown is to save power
873 during boot but it also significantly slows down gk20a init on
874 simulation and emulation. We should remove SOB after graphics power
875 saving features (blcg/slcg) are enabled. For now, do it here. */
876 err = gk20a_init_clk_support(g);
877 if (err) {
878 gk20a_err(&dev->dev, "failed to init gk20a clk");
879 goto done;
880 }
881
882 /* enable pri timeout only on silicon */
883 if (tegra_platform_is_silicon()) {
884 gk20a_writel(g,
885 timer_pri_timeout_r(),
886 timer_pri_timeout_period_f(0x186A0) |
887 timer_pri_timeout_en_en_enabled_f());
888 } else {
889 gk20a_writel(g,
890 timer_pri_timeout_r(),
891 timer_pri_timeout_period_f(0x186A0) |
892 timer_pri_timeout_en_en_disabled_f());
893 }
894
895 err = gk20a_init_fifo_reset_enable_hw(g);
896 if (err) {
897 gk20a_err(&dev->dev, "failed to reset gk20a fifo");
898 goto done;
899 }
900
901 err = gk20a_init_mm_support(g);
902 if (err) {
903 gk20a_err(&dev->dev, "failed to init gk20a mm");
904 goto done;
905 }
906
907 err = gk20a_init_pmu_support(g);
908 if (err) {
909 gk20a_err(&dev->dev, "failed to init gk20a pmu");
910 goto done;
911 }
912
913 err = gk20a_init_fifo_support(g);
914 if (err) {
915 gk20a_err(&dev->dev, "failed to init gk20a fifo");
916 goto done;
917 }
918
919 err = gk20a_init_gr_support(g);
920 if (err) {
921 gk20a_err(&dev->dev, "failed to init gk20a gr");
922 goto done;
923 }
924
925 err = gk20a_init_pmu_setup_hw2(g);
926 if (err) {
927 gk20a_err(&dev->dev, "failed to init gk20a pmu_hw2");
928 goto done;
929 }
930
931 err = gk20a_init_therm_support(g);
932 if (err) {
933 gk20a_err(&dev->dev, "failed to init gk20a therm");
934 goto done;
935 }
936
937 err = gk20a_init_gpu_characteristics(g);
938 if (err) {
939 gk20a_err(&dev->dev, "failed to init gk20a gpu characteristics");
940 goto done;
941 }
942
943 gk20a_channel_resume(g);
944 set_user_nice(current, nice_value);
945
946done:
947 return err;
948}
949
950static struct of_device_id tegra_gk20a_of_match[] = {
951#ifdef CONFIG_TEGRA_GK20A
952 { .compatible = "nvidia,tegra124-gk20a",
953 .data = &gk20a_tegra_platform },
954#endif
955 { .compatible = "nvidia,generic-gk20a",
956 .data = &gk20a_generic_platform },
957 { },
958};
959
960int tegra_gpu_get_max_state(struct thermal_cooling_device *cdev,
961 unsigned long *max_state)
962{
963 struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
964
965 *max_state = gk20a_gpufreq_device->gk20a_freq_table_size - 1;
966 return 0;
967}
968
969int tegra_gpu_get_cur_state(struct thermal_cooling_device *cdev,
970 unsigned long *cur_state)
971{
972 struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
973
974 *cur_state = gk20a_gpufreq_device->gk20a_freq_state;
975 return 0;
976}
977
978int tegra_gpu_set_cur_state(struct thermal_cooling_device *c_dev,
979 unsigned long cur_state)
980{
981 u32 target_freq;
982 struct gk20a *g;
983 struct gpufreq_table_data *gpu_cooling_table;
984 struct cooling_device_gk20a *gk20a_gpufreq_device = c_dev->devdata;
985
986 BUG_ON(cur_state >= gk20a_gpufreq_device->gk20a_freq_table_size);
987
988 g = container_of(gk20a_gpufreq_device, struct gk20a, gk20a_cdev);
989
990 gpu_cooling_table = tegra_gpufreq_table_get();
991 target_freq = gpu_cooling_table[cur_state].frequency;
992
993 /* ensure a query for state will get the proper value */
994 gk20a_gpufreq_device->gk20a_freq_state = cur_state;
995
996 gk20a_clk_set_rate(g, target_freq);
997
998 return 0;
999}
1000
1001static struct thermal_cooling_device_ops tegra_gpu_cooling_ops = {
1002 .get_max_state = tegra_gpu_get_max_state,
1003 .get_cur_state = tegra_gpu_get_cur_state,
1004 .set_cur_state = tegra_gpu_set_cur_state,
1005};
1006
1007static int gk20a_create_device(
1008 struct platform_device *pdev, int devno, const char *cdev_name,
1009 struct cdev *cdev, struct device **out,
1010 const struct file_operations *ops)
1011{
1012 struct device *dev;
1013 int err;
1014 struct gk20a *g = get_gk20a(pdev);
1015
1016 gk20a_dbg_fn("");
1017
1018 cdev_init(cdev, ops);
1019 cdev->owner = THIS_MODULE;
1020
1021 err = cdev_add(cdev, devno, 1);
1022 if (err) {
1023 dev_err(&pdev->dev,
1024 "failed to add %s cdev\n", cdev_name);
1025 return err;
1026 }
1027
1028 dev = device_create(g->class, NULL, devno, NULL,
1029 (pdev->id <= 0) ? INTERFACE_NAME : INTERFACE_NAME ".%d",
1030 cdev_name, pdev->id);
1031
1032 if (IS_ERR(dev)) {
1033 err = PTR_ERR(dev);
1034 cdev_del(cdev);
1035 dev_err(&pdev->dev,
1036 "failed to create %s device for %s\n",
1037 cdev_name, pdev->name);
1038 return err;
1039 }
1040
1041 *out = dev;
1042 return 0;
1043}
1044
1045static void gk20a_user_deinit(struct platform_device *dev)
1046{
1047 struct gk20a *g = get_gk20a(dev);
1048
1049 if (g->channel.node) {
1050 device_destroy(g->class, g->channel.cdev.dev);
1051 cdev_del(&g->channel.cdev);
1052 }
1053
1054 if (g->as.node) {
1055 device_destroy(g->class, g->as.cdev.dev);
1056 cdev_del(&g->as.cdev);
1057 }
1058
1059 if (g->ctrl.node) {
1060 device_destroy(g->class, g->ctrl.cdev.dev);
1061 cdev_del(&g->ctrl.cdev);
1062 }
1063
1064 if (g->dbg.node) {
1065 device_destroy(g->class, g->dbg.cdev.dev);
1066 cdev_del(&g->dbg.cdev);
1067 }
1068
1069 if (g->prof.node) {
1070 device_destroy(g->class, g->prof.cdev.dev);
1071 cdev_del(&g->prof.cdev);
1072 }
1073
1074 if (g->cdev_region)
1075 unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
1076
1077 if (g->class)
1078 class_destroy(g->class);
1079}
1080
1081static int gk20a_user_init(struct platform_device *dev)
1082{
1083 int err;
1084 dev_t devno;
1085 struct gk20a *g = get_gk20a(dev);
1086
1087 g->class = class_create(THIS_MODULE, CLASS_NAME);
1088 if (IS_ERR(g->class)) {
1089 err = PTR_ERR(g->class);
1090 g->class = NULL;
1091 dev_err(&dev->dev,
1092 "failed to create " CLASS_NAME " class\n");
1093 goto fail;
1094 }
1095
1096 err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, CLASS_NAME);
1097 if (err) {
1098 dev_err(&dev->dev, "failed to allocate devno\n");
1099 goto fail;
1100 }
1101 g->cdev_region = devno;
1102
1103 err = gk20a_create_device(dev, devno++, "",
1104 &g->channel.cdev, &g->channel.node,
1105 &gk20a_channel_ops);
1106 if (err)
1107 goto fail;
1108
1109 err = gk20a_create_device(dev, devno++, "-as",
1110 &g->as.cdev, &g->as.node,
1111 &gk20a_as_ops);
1112 if (err)
1113 goto fail;
1114
1115 err = gk20a_create_device(dev, devno++, "-ctrl",
1116 &g->ctrl.cdev, &g->ctrl.node,
1117 &gk20a_ctrl_ops);
1118 if (err)
1119 goto fail;
1120
1121 err = gk20a_create_device(dev, devno++, "-dbg",
1122 &g->dbg.cdev, &g->dbg.node,
1123 &gk20a_dbg_ops);
1124 if (err)
1125 goto fail;
1126
1127 err = gk20a_create_device(dev, devno++, "-prof",
1128 &g->prof.cdev, &g->prof.node,
1129 &gk20a_prof_ops);
1130 if (err)
1131 goto fail;
1132
1133 return 0;
1134fail:
1135 gk20a_user_deinit(dev);
1136 return err;
1137}
1138
1139struct channel_gk20a *gk20a_get_channel_from_file(int fd)
1140{
1141 struct channel_gk20a *ch;
1142 struct file *f = fget(fd);
1143 if (!f)
1144 return 0;
1145
1146 if (f->f_op != &gk20a_channel_ops) {
1147 fput(f);
1148 return 0;
1149 }
1150
1151 ch = (struct channel_gk20a *)f->private_data;
1152 fput(f);
1153 return ch;
1154}
1155
1156static int gk20a_pm_enable_clk(struct device *dev)
1157{
1158 int index = 0;
1159 struct gk20a_platform *platform;
1160
1161 platform = dev_get_drvdata(dev);
1162 if (!platform)
1163 return -EINVAL;
1164
1165 for (index = 0; index < platform->num_clks; index++) {
1166 int err = clk_prepare_enable(platform->clk[index]);
1167 if (err)
1168 return -EINVAL;
1169 }
1170
1171 return 0;
1172}
1173
1174static int gk20a_pm_disable_clk(struct device *dev)
1175{
1176 int index = 0;
1177 struct gk20a_platform *platform;
1178
1179 platform = dev_get_drvdata(dev);
1180 if (!platform)
1181 return -EINVAL;
1182
1183 for (index = 0; index < platform->num_clks; index++)
1184 clk_disable_unprepare(platform->clk[index]);
1185
1186 return 0;
1187}
1188
1189#ifdef CONFIG_PM
1190const struct dev_pm_ops gk20a_pm_ops = {
1191#if defined(CONFIG_PM_RUNTIME) && !defined(CONFIG_PM_GENERIC_DOMAINS)
1192 .runtime_resume = gk20a_pm_enable_clk,
1193 .runtime_suspend = gk20a_pm_disable_clk,
1194#endif
1195};
1196#endif
1197
1198static int gk20a_pm_railgate(struct generic_pm_domain *domain)
1199{
1200 struct gk20a *g = container_of(domain, struct gk20a, pd);
1201 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1202 int ret = 0;
1203
1204 if (platform->railgate)
1205 ret = platform->railgate(platform->g->dev);
1206
1207 return ret;
1208}
1209
1210static int gk20a_pm_unrailgate(struct generic_pm_domain *domain)
1211{
1212 struct gk20a *g = container_of(domain, struct gk20a, pd);
1213 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1214 int ret = 0;
1215
1216 if (platform->unrailgate)
1217 ret = platform->unrailgate(platform->g->dev);
1218
1219 return ret;
1220}
1221
1222static int gk20a_pm_suspend(struct device *dev)
1223{
1224 struct gk20a_platform *platform = dev_get_drvdata(dev);
1225 int ret = 0;
1226
1227 if (atomic_read(&dev->power.usage_count) > 1)
1228 return -EBUSY;
1229
1230 ret = gk20a_pm_prepare_poweroff(dev);
1231 if (ret)
1232 return ret;
1233
1234 gk20a_scale_suspend(to_platform_device(dev));
1235
1236 if (platform->suspend)
1237 platform->suspend(dev);
1238
1239 return 0;
1240}
1241
1242static int gk20a_pm_resume(struct device *dev)
1243{
1244 int ret = 0;
1245
1246 ret = gk20a_pm_finalize_poweron(dev);
1247 if (ret)
1248 return ret;
1249
1250 gk20a_scale_resume(to_platform_device(dev));
1251
1252 return 0;
1253}
1254
1255static int gk20a_pm_initialise_domain(struct platform_device *pdev)
1256{
1257 struct gk20a_platform *platform = platform_get_drvdata(pdev);
1258 struct dev_power_governor *pm_domain_gov = NULL;
1259 struct generic_pm_domain *domain = &platform->g->pd;
1260 int ret = 0;
1261
1262 domain->name = kstrdup(pdev->name, GFP_KERNEL);
1263
1264 if (!platform->can_railgate)
1265 pm_domain_gov = &pm_domain_always_on_gov;
1266
1267 pm_genpd_init(domain, pm_domain_gov, true);
1268
1269 domain->power_off = gk20a_pm_railgate;
1270 domain->power_on = gk20a_pm_unrailgate;
1271 domain->dev_ops.start = gk20a_pm_enable_clk;
1272 domain->dev_ops.stop = gk20a_pm_disable_clk;
1273 domain->dev_ops.save_state = gk20a_pm_prepare_poweroff;
1274 domain->dev_ops.restore_state = gk20a_pm_finalize_poweron;
1275 domain->dev_ops.suspend = gk20a_pm_suspend;
1276 domain->dev_ops.resume = gk20a_pm_resume;
1277
1278 device_set_wakeup_capable(&pdev->dev, 0);
1279 ret = pm_genpd_add_device(domain, &pdev->dev);
1280
1281 if (platform->railgate_delay)
1282 pm_genpd_set_poweroff_delay(domain, platform->railgate_delay);
1283
1284 return ret;
1285}
1286
1287static int gk20a_pm_init(struct platform_device *dev)
1288{
1289 struct gk20a_platform *platform = platform_get_drvdata(dev);
1290 int err = 0;
1291
1292 /* Initialise pm runtime */
1293 if (platform->clockgate_delay) {
1294 pm_runtime_set_autosuspend_delay(&dev->dev,
1295 platform->clockgate_delay);
1296 pm_runtime_use_autosuspend(&dev->dev);
1297 }
1298
1299 pm_runtime_enable(&dev->dev);
1300 if (!pm_runtime_enabled(&dev->dev))
1301 gk20a_pm_enable_clk(&dev->dev);
1302
1303 /* Enable runtime railgating if possible. If not,
1304 * turn on the rail now. */
1305 if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1306 platform->railgate(dev);
1307 else
1308 platform->unrailgate(dev);
1309
1310 /* genpd will take care of runtime power management if it is enabled */
1311 if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1312 err = gk20a_pm_initialise_domain(dev);
1313
1314 return err;
1315}
1316
1317static int gk20a_probe(struct platform_device *dev)
1318{
1319 struct gk20a *gk20a;
1320 int err;
1321 struct gk20a_platform *platform = NULL;
1322 struct cooling_device_gk20a *gpu_cdev = NULL;
1323
1324 if (dev->dev.of_node) {
1325 const struct of_device_id *match;
1326
1327 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
1328 if (match)
1329 platform = (struct gk20a_platform *)match->data;
1330 } else
1331 platform = (struct gk20a_platform *)dev->dev.platform_data;
1332
1333 if (!platform) {
1334 dev_err(&dev->dev, "no platform data\n");
1335 return -ENODATA;
1336 }
1337
1338 gk20a_dbg_fn("");
1339
1340 platform_set_drvdata(dev, platform);
1341
1342 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
1343 if (!gk20a) {
1344 dev_err(&dev->dev, "couldn't allocate gk20a support");
1345 return -ENOMEM;
1346 }
1347
1348 set_gk20a(dev, gk20a);
1349 gk20a->dev = dev;
1350
1351 err = gk20a_user_init(dev);
1352 if (err)
1353 return err;
1354
1355 gk20a_init_support(dev);
1356
1357 spin_lock_init(&gk20a->mc_enable_lock);
1358
1359 /* Initialize the platform interface. */
1360 err = platform->probe(dev);
1361 if (err) {
1362 dev_err(&dev->dev, "platform probe failed");
1363 return err;
1364 }
1365
1366 err = gk20a_pm_init(dev);
1367 if (err) {
1368 dev_err(&dev->dev, "pm init failed");
1369 return err;
1370 }
1371
1372 /* Initialise scaling */
1373 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1374 gk20a_scale_init(dev);
1375
1376 if (platform->late_probe) {
1377 err = platform->late_probe(dev);
1378 if (err) {
1379 dev_err(&dev->dev, "late probe failed");
1380 return err;
1381 }
1382 }
1383
1384 gk20a_debug_init(dev);
1385
1386 /* Set DMA parameters to allow larger sgt lists */
1387 dev->dev.dma_parms = &gk20a->dma_parms;
1388 dma_set_max_seg_size(&dev->dev, UINT_MAX);
1389
1390 gpu_cdev = &gk20a->gk20a_cdev;
1391 gpu_cdev->gk20a_freq_table_size = tegra_gpufreq_table_size_get();
1392 gpu_cdev->gk20a_freq_state = 0;
1393 gpu_cdev->g = gk20a;
1394 gpu_cdev->gk20a_cooling_dev = thermal_cooling_device_register("gk20a_cdev", gpu_cdev,
1395 &tegra_gpu_cooling_ops);
1396
1397 gk20a->gr_idle_timeout_default =
1398 CONFIG_GK20A_DEFAULT_TIMEOUT;
1399 gk20a->timeouts_enabled = true;
1400
1401 /* Set up initial clock gating settings */
1402 if (tegra_platform_is_silicon()) {
1403 gk20a->slcg_enabled = true;
1404 gk20a->blcg_enabled = true;
1405 gk20a->elcg_enabled = true;
1406 gk20a->elpg_enabled = true;
1407 gk20a->aelpg_enabled = true;
1408 }
1409
1410 gk20a_create_sysfs(dev);
1411
1412#ifdef CONFIG_DEBUG_FS
1413 clk_gk20a_debugfs_init(dev);
1414
1415 spin_lock_init(&gk20a->debugfs_lock);
1416 gk20a->mm.ltc_enabled = true;
1417 gk20a->mm.ltc_enabled_debug = true;
1418 gk20a->debugfs_ltc_enabled =
1419 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
1420 platform->debugfs,
1421 &gk20a->mm.ltc_enabled_debug);
1422 gk20a->mm.ltc_enabled_debug = true;
1423 gk20a->debugfs_gr_idle_timeout_default =
1424 debugfs_create_u32("gr_idle_timeout_default_us",
1425 S_IRUGO|S_IWUSR, platform->debugfs,
1426 &gk20a->gr_idle_timeout_default);
1427 gk20a->debugfs_timeouts_enabled =
1428 debugfs_create_bool("timeouts_enabled",
1429 S_IRUGO|S_IWUSR,
1430 platform->debugfs,
1431 &gk20a->timeouts_enabled);
1432 gk20a_pmu_debugfs_init(dev);
1433#endif
1434
1435#ifdef CONFIG_INPUT_CFBOOST
1436 cfb_add_device(&dev->dev);
1437#endif
1438
1439 return 0;
1440}
1441
1442static int __exit gk20a_remove(struct platform_device *dev)
1443{
1444 struct gk20a *g = get_gk20a(dev);
1445 gk20a_dbg_fn("");
1446
1447#ifdef CONFIG_INPUT_CFBOOST
1448 cfb_remove_device(&dev->dev);
1449#endif
1450
1451 if (g->remove_support)
1452 g->remove_support(dev);
1453
1454 gk20a_user_deinit(dev);
1455
1456 set_gk20a(dev, 0);
1457#ifdef CONFIG_DEBUG_FS
1458 debugfs_remove(g->debugfs_ltc_enabled);
1459 debugfs_remove(g->debugfs_gr_idle_timeout_default);
1460 debugfs_remove(g->debugfs_timeouts_enabled);
1461#endif
1462
1463 kfree(g);
1464
1465#ifdef CONFIG_PM_RUNTIME
1466 pm_runtime_put(&dev->dev);
1467 pm_runtime_disable(&dev->dev);
1468#else
1469 nvhost_module_disable_clk(&dev->dev);
1470#endif
1471
1472 return 0;
1473}
1474
1475static struct platform_driver gk20a_driver = {
1476 .probe = gk20a_probe,
1477 .remove = __exit_p(gk20a_remove),
1478 .driver = {
1479 .owner = THIS_MODULE,
1480 .name = "gk20a",
1481#ifdef CONFIG_OF
1482 .of_match_table = tegra_gk20a_of_match,
1483#endif
1484#ifdef CONFIG_PM
1485 .pm = &gk20a_pm_ops,
1486#endif
1487 }
1488};
1489
1490static int __init gk20a_init(void)
1491{
1492 return platform_driver_register(&gk20a_driver);
1493}
1494
1495static void __exit gk20a_exit(void)
1496{
1497 platform_driver_unregister(&gk20a_driver);
1498}
1499
1500bool is_gk20a_module(struct platform_device *dev)
1501{
1502 return &gk20a_driver.driver == dev->dev.driver;
1503}
1504
1505void gk20a_busy_noresume(struct platform_device *pdev)
1506{
1507 pm_runtime_get_noresume(&pdev->dev);
1508}
1509
1510int gk20a_channel_busy(struct platform_device *pdev)
1511{
1512 int ret = 0;
1513
1514 ret = gk20a_platform_channel_busy(pdev);
1515 if (ret)
1516 return ret;
1517
1518 ret = gk20a_busy(pdev);
1519 if (ret)
1520 gk20a_platform_channel_idle(pdev);
1521
1522 return ret;
1523}
1524
1525void gk20a_channel_idle(struct platform_device *pdev)
1526{
1527 gk20a_idle(pdev);
1528 gk20a_platform_channel_idle(pdev);
1529}
1530
1531int gk20a_busy(struct platform_device *pdev)
1532{
1533 int ret = 0;
1534
1535#ifdef CONFIG_PM_RUNTIME
1536 ret = pm_runtime_get_sync(&pdev->dev);
1537#endif
1538 gk20a_scale_notify_busy(pdev);
1539
1540 return ret < 0 ? ret : 0;
1541}
1542
1543void gk20a_idle(struct platform_device *pdev)
1544{
1545#ifdef CONFIG_PM_RUNTIME
1546 if (atomic_read(&pdev->dev.power.usage_count) == 1)
1547 gk20a_scale_notify_idle(pdev);
1548 pm_runtime_mark_last_busy(&pdev->dev);
1549 pm_runtime_put_sync_autosuspend(&pdev->dev);
1550#else
1551 gk20a_scale_notify_idle(pdev);
1552#endif
1553}
1554
1555void gk20a_disable(struct gk20a *g, u32 units)
1556{
1557 u32 pmc;
1558
1559 gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
1560
1561 spin_lock(&g->mc_enable_lock);
1562 pmc = gk20a_readl(g, mc_enable_r());
1563 pmc &= ~units;
1564 gk20a_writel(g, mc_enable_r(), pmc);
1565 spin_unlock(&g->mc_enable_lock);
1566}
1567
1568void gk20a_enable(struct gk20a *g, u32 units)
1569{
1570 u32 pmc;
1571
1572 gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
1573
1574 spin_lock(&g->mc_enable_lock);
1575 pmc = gk20a_readl(g, mc_enable_r());
1576 pmc |= units;
1577 gk20a_writel(g, mc_enable_r(), pmc);
1578 spin_unlock(&g->mc_enable_lock);
1579 gk20a_readl(g, mc_enable_r());
1580
1581 udelay(20);
1582}
1583
1584void gk20a_reset(struct gk20a *g, u32 units)
1585{
1586 gk20a_disable(g, units);
1587 udelay(20);
1588 gk20a_enable(g, units);
1589}
1590
1591int gk20a_init_gpu_characteristics(struct gk20a *g)
1592{
1593 struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
1594
1595 gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
1596 gpu->on_board_video_memory_size = 0; /* integrated GPU */
1597
1598 gpu->num_gpc = g->gr.gpc_count;
1599 gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
1600
1601 gpu->bus_type = NVHOST_GPU_BUS_TYPE_AXI; /* always AXI for now */
1602
1603 gpu->big_page_size = g->mm.big_page_size;
1604 gpu->compression_page_size = g->mm.compression_page_size;
1605
1606 return 0;
1607}
1608
1609int nvhost_vpr_info_fetch(void)
1610{
1611 struct gk20a *g = get_gk20a(to_platform_device(
1612 bus_find_device_by_name(&platform_bus_type,
1613 NULL, "gk20a.0")));
1614
1615 if (!g) {
1616 pr_info("gk20a ins't ready yet\n");
1617 return 0;
1618 }
1619
1620 return gk20a_mm_mmu_vpr_info_fetch(g);
1621}
1622
1623static const struct firmware *
1624do_request_firmware(struct device *dev, const char *prefix, const char *fw_name)
1625{
1626 const struct firmware *fw;
1627 char *fw_path = NULL;
1628 int path_len, err;
1629
1630 if (prefix) {
1631 path_len = strlen(prefix) + strlen(fw_name);
1632 path_len += 2; /* for the path separator and zero terminator*/
1633
1634 fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL);
1635 if (!fw_path)
1636 return NULL;
1637
1638 sprintf(fw_path, "%s/%s", prefix, fw_name);
1639 fw_name = fw_path;
1640 }
1641
1642 err = request_firmware(&fw, fw_name, dev);
1643 kfree(fw_path);
1644 if (err)
1645 return NULL;
1646 return fw;
1647}
1648
1649/* This is a simple wrapper around request_firmware that takes 'fw_name' and
1650 * applies an IP specific relative path prefix to it. The caller is
1651 * responsible for calling release_firmware later. */
1652const struct firmware *
1653gk20a_request_firmware(struct gk20a *g, const char *fw_name)
1654{
1655 struct device *dev = &g->dev->dev;
1656 const struct firmware *fw;
1657
1658 /* current->fs is NULL when calling from SYS_EXIT.
1659 Add a check here to prevent crash in request_firmware */
1660 if (!current->fs || !fw_name)
1661 return NULL;
1662
1663 BUG_ON(!g->ops.name);
1664 fw = do_request_firmware(dev, g->ops.name, fw_name);
1665
1666#ifdef CONFIG_TEGRA_GK20A
1667 /* TO BE REMOVED - Support loading from legacy SOC specific path. */
1668 if (!fw)
1669 fw = nvhost_client_request_firmware(g->dev, fw_name);
1670#endif
1671
1672 if (!fw) {
1673 dev_err(dev, "failed to get firmware\n");
1674 return NULL;
1675 }
1676
1677 return fw;
1678}
1679
1680module_init(gk20a_init);
1681module_exit(gk20a_exit);