summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c561
1 files changed, 561 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
new file mode 100644
index 00000000..35658f31
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -0,0 +1,561 @@
1/*
2 * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
3 *
4 * GK20A Tegra Platform Interface
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <linux/debugfs.h>
19#include <linux/tegra-powergate.h>
20#include <linux/platform_data/tegra_edp.h>
21#include <linux/nvhost_ioctl.h>
22#include <linux/dma-buf.h>
23#include <linux/nvmap.h>
24#include <mach/irqs.h>
25#include <mach/pm_domains.h>
26
27#include "../../../arch/arm/mach-tegra/iomap.h"
28
29#include "gk20a.h"
30#include "hal_gk20a.h"
31#include "platform_gk20a.h"
32#include "gk20a_scale.h"
33
34#define TEGRA_GK20A_INTR INT_GPU
35#define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL
36
37#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
38#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
39
40extern struct device tegra_vpr_dev;
41struct gk20a_platform t132_gk20a_tegra_platform;
42
43struct gk20a_emc_params {
44 long emc_slope;
45 long emc_offset;
46 long emc_dip_slope;
47 long emc_dip_offset;
48 long emc_xmid;
49 bool linear;
50};
51
52/*
53 * 20.12 fixed point arithmetic
54 */
55
56static const int FXFRAC = 12;
57static const int FX_HALF = (1 << 12) / 2;
58
59#define INT_TO_FX(x) ((x) << FXFRAC)
60#define FX_TO_INT(x) ((x) >> FXFRAC)
61
62#define MHZ_TO_HZ(x) ((x) * 1000000)
63#define HZ_TO_MHZ(x) ((x) / 1000000)
64
65int FXMUL(int x, int y)
66{
67 return ((long long) x * (long long) y) >> FXFRAC;
68}
69
70int FXDIV(int x, int y)
71{
72 /* long long div operation not supported, must shift manually. This
73 * would have been
74 *
75 * return (((long long) x) << FXFRAC) / (long long) y;
76 */
77 int pos, t;
78 if (x == 0)
79 return 0;
80
81 /* find largest allowable right shift to numerator, limit to FXFRAC */
82 t = x < 0 ? -x : x;
83 pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
84 if (pos > FXFRAC)
85 pos = FXFRAC;
86
87 y >>= FXFRAC - pos;
88 if (y == 0)
89 return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
90
91 return (x << pos) / y;
92}
93
94static int gk20a_tegra_channel_busy(struct platform_device *dev)
95{
96 int ret = 0;
97
98 /* Explicitly turn on the host1x clocks
99 * - This is needed as host1x driver sets ignore_children = true
100 * to cater the use case of display clock ON but host1x clock OFF
101 * in OS-Idle-Display-ON case
102 * - This was easily done in ACM as it only checked the ref count
103 * of host1x (or any device for that matter) to be zero before
104 * turning off its clock
105 * - However, runtime PM checks to see if *ANY* child of device is
106 * in ACTIVE state and if yes, it doesn't suspend the parent. As a
107 * result of this, display && host1x clocks remains ON during
108 * OS-Idle-Display-ON case
109 * - The code below fixes this use-case
110 */
111 if (to_platform_device(dev->dev.parent))
112 ret = nvhost_module_busy_ext(
113 to_platform_device(dev->dev.parent));
114
115 return ret;
116}
117
118static void gk20a_tegra_channel_idle(struct platform_device *dev)
119{
120 /* Explicitly turn off the host1x clocks */
121 if (to_platform_device(dev->dev.parent))
122 nvhost_module_idle_ext(to_platform_device(dev->dev.parent));
123}
124
125static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
126 struct gr_ctx_buffer_desc *desc)
127{
128 gk20a_free_sgtable(&desc->sgt);
129 dma_free_attrs(&tegra_vpr_dev, desc->size,
130 (void *)(uintptr_t)&desc->iova,
131 desc->iova, &desc->attrs);
132}
133
134static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
135 struct gr_ctx_buffer_desc *desc,
136 size_t size)
137{
138 struct device *dev = &pdev->dev;
139 DEFINE_DMA_ATTRS(attrs);
140 dma_addr_t iova;
141 struct sg_table *sgt;
142 struct page *page;
143 int err = 0;
144
145 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
146
147 (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
148 GFP_KERNEL, &attrs);
149 if (dma_mapping_error(&tegra_vpr_dev, iova))
150 return -ENOMEM;
151
152 desc->iova = iova;
153 desc->size = size;
154 desc->attrs = attrs;
155 desc->destroy = gk20a_tegra_secure_destroy;
156
157 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
158 if (!sgt) {
159 gk20a_err(dev, "failed to allocate memory\n");
160 goto fail;
161 }
162 err = sg_alloc_table(sgt, 1, GFP_KERNEL);
163 if (err) {
164 gk20a_err(dev, "failed to allocate sg_table\n");
165 goto fail_sgt;
166 }
167 page = phys_to_page(iova);
168 sg_set_page(sgt->sgl, page, size, 0);
169 sg_dma_address(sgt->sgl) = iova;
170
171 desc->sgt = sgt;
172
173 return err;
174
175fail_sgt:
176 kfree(sgt);
177fail:
178 dma_free_attrs(&tegra_vpr_dev, desc->size,
179 (void *)(uintptr_t)&desc->iova,
180 desc->iova, &desc->attrs);
181 return err;
182}
183
184/*
185 * gk20a_tegra_get_emc_rate()
186 *
187 * This function returns the minimum emc clock based on gpu frequency
188 */
189
190long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
191{
192 long hz;
193
194 freq = INT_TO_FX(HZ_TO_MHZ(freq));
195 hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
196
197 hz -= FXMUL(emc_params->emc_dip_slope,
198 FXMUL(freq - emc_params->emc_xmid,
199 freq - emc_params->emc_xmid)) +
200 emc_params->emc_dip_offset;
201
202 hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
203 hz = (hz < 0) ? 0 : hz;
204
205 return hz;
206}
207
208/*
209 * gk20a_tegra_postscale(profile, freq)
210 *
211 * This function sets emc frequency based on current gpu frequency
212 */
213
214static void gk20a_tegra_postscale(struct platform_device *pdev,
215 unsigned long freq)
216{
217 struct gk20a_platform *platform = platform_get_drvdata(pdev);
218 struct gk20a_scale_profile *profile = platform->g->scale_profile;
219 struct gk20a_emc_params *emc_params = profile->private_data;
220 struct gk20a *g = get_gk20a(pdev);
221
222 long after = gk20a_clk_get_rate(g);
223 long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
224
225 clk_set_rate(platform->clk[2], emc_target);
226}
227
228/*
229 * gk20a_tegra_prescale(profile, freq)
230 *
231 * This function informs EDP about changed constraints.
232 */
233
234static void gk20a_tegra_prescale(struct platform_device *pdev)
235{
236 struct gk20a *g = get_gk20a(pdev);
237 u32 avg = 0;
238
239 gk20a_pmu_load_norm(g, &avg);
240 tegra_edp_notify_gpu_load(avg);
241}
242
243/*
244 * gk20a_tegra_calibrate_emc()
245 *
246 * Compute emc scaling parameters
247 *
248 * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
249 *
250 * Remc - 3d.emc rate
251 * R3d - 3d.cbus rate
252 * Rm - 3d.cbus 'middle' rate = (max + min)/2
253 * S - emc_slope
254 * O - emc_offset
255 * Sd - emc_dip_slope
256 * Od - emc_dip_offset
257 *
258 * this superposes a quadratic dip centered around the middle 3d
259 * frequency over a linear correlation of 3d.emc to 3d clock
260 * rates.
261 *
262 * S, O are chosen so that the maximum 3d rate produces the
263 * maximum 3d.emc rate exactly, and the minimum 3d rate produces
264 * at least the minimum 3d.emc rate.
265 *
266 * Sd and Od are chosen to produce the largest dip that will
267 * keep 3d.emc frequencies monotonously decreasing with 3d
268 * frequencies. To achieve this, the first derivative of Remc
269 * with respect to R3d should be zero for the minimal 3d rate:
270 *
271 * R'emc = S - 2 * Sd * (R3d - Rm)
272 * R'emc(R3d-min) = 0
273 * S = 2 * Sd * (R3d-min - Rm)
274 * = 2 * Sd * (R3d-min - R3d-max) / 2
275 *
276 * +------------------------------+
277 * | Sd = S / (R3d-min - R3d-max) |
278 * +------------------------------+
279 *
280 * dip = Sd * (R3d - Rm)^2 + Od
281 *
282 * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
283 *
284 * Sd * (R3d-min - Rm)^2 + Od = 0
285 * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
286 * = -Sd * ((R3d-min - R3d-max)^2) / 4
287 *
288 * +------------------------------+
289 * | Od = (emc-max - emc-min) / 4 |
290 * +------------------------------+
291 *
292 */
293
294void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
295 struct clk *clk_3d, struct clk *clk_3d_emc)
296{
297 long correction;
298 unsigned long max_emc;
299 unsigned long min_emc;
300 unsigned long min_rate_3d;
301 unsigned long max_rate_3d;
302
303 max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
304 max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
305
306 min_emc = clk_round_rate(clk_3d_emc, 0);
307 min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
308
309 max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
310 max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
311
312 min_rate_3d = clk_round_rate(clk_3d, 0);
313 min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
314
315 emc_params->emc_slope =
316 FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
317 emc_params->emc_offset = max_emc -
318 FXMUL(emc_params->emc_slope, max_rate_3d);
319 /* Guarantee max 3d rate maps to max emc rate */
320 emc_params->emc_offset += max_emc -
321 (FXMUL(emc_params->emc_slope, max_rate_3d) +
322 emc_params->emc_offset);
323
324 emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
325 emc_params->emc_dip_slope =
326 -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
327 emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
328 correction =
329 emc_params->emc_dip_offset +
330 FXMUL(emc_params->emc_dip_slope,
331 FXMUL(max_rate_3d - emc_params->emc_xmid,
332 max_rate_3d - emc_params->emc_xmid));
333 emc_params->emc_dip_offset -= correction;
334}
335
336/*
337 * gk20a_tegra_railgate()
338 *
339 * Gate (disable) gk20a power rail
340 */
341
342static int gk20a_tegra_railgate(struct platform_device *pdev)
343{
344 if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
345 tegra_powergate_partition(TEGRA_POWERGATE_GPU);
346 return 0;
347}
348
349/*
350 * gk20a_tegra_unrailgate()
351 *
352 * Ungate (enable) gk20a power rail
353 */
354
355static int gk20a_tegra_unrailgate(struct platform_device *pdev)
356{
357 tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
358 return 0;
359}
360
361struct {
362 char *name;
363 unsigned long default_rate;
364} tegra_gk20a_clocks[] = {
365 {"PLLG_ref", UINT_MAX},
366 {"pwr", 204000000},
367 {"emc", UINT_MAX} };
368
369/*
370 * gk20a_tegra_get_clocks()
371 *
372 * This function finds clocks in tegra platform and populates
373 * the clock information to gk20a platform data.
374 */
375
376static int gk20a_tegra_get_clocks(struct platform_device *pdev)
377{
378 struct gk20a_platform *platform = platform_get_drvdata(pdev);
379 char devname[16];
380 int i;
381 int ret = 0;
382
383 snprintf(devname, sizeof(devname),
384 (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
385 pdev->name, pdev->id);
386
387 platform->num_clks = 0;
388 for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
389 long rate = tegra_gk20a_clocks[i].default_rate;
390 struct clk *c;
391
392 c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
393 if (IS_ERR(c)) {
394 ret = PTR_ERR(c);
395 goto err_get_clock;
396 }
397 rate = clk_round_rate(c, rate);
398 clk_set_rate(c, rate);
399 platform->clk[i] = c;
400 }
401 platform->num_clks = i;
402
403 return 0;
404
405err_get_clock:
406
407 while (i--)
408 clk_put(platform->clk[i]);
409 return ret;
410}
411
412static void gk20a_tegra_scale_init(struct platform_device *pdev)
413{
414 struct gk20a_platform *platform = gk20a_get_platform(pdev);
415 struct gk20a_scale_profile *profile = platform->g->scale_profile;
416 struct gk20a_emc_params *emc_params;
417
418 if (!profile)
419 return;
420
421 emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
422 if (!emc_params)
423 return;
424
425 gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
426 platform->clk[2]);
427
428 profile->private_data = emc_params;
429}
430
431static void gk20a_tegra_debug_dump(struct platform_device *pdev)
432{
433 struct gk20a_platform *platform = gk20a_get_platform(pdev);
434 struct gk20a *g = platform->g;
435 nvhost_debug_dump_device(g->dev);
436}
437
438static int gk20a_tegra_probe(struct platform_device *dev)
439{
440 struct gk20a_platform *platform = gk20a_get_platform(dev);
441
442 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
443 t132_gk20a_tegra_platform.g = platform->g;
444 *platform = t132_gk20a_tegra_platform;
445 }
446
447 gk20a_tegra_get_clocks(dev);
448
449 return 0;
450}
451
452static int gk20a_tegra_late_probe(struct platform_device *dev)
453{
454 struct gk20a_platform *platform = gk20a_get_platform(dev);
455
456 /* Make gk20a power domain a subdomain of mc */
457 tegra_pd_add_sd(&platform->g->pd);
458
459 /* Initialise tegra specific scaling quirks */
460 gk20a_tegra_scale_init(dev);
461
462 return 0;
463}
464
465static int gk20a_tegra_suspend(struct device *dev)
466{
467 tegra_edp_notify_gpu_load(0);
468 return 0;
469}
470
471static struct resource gk20a_tegra_resources[] = {
472 {
473 .start = TEGRA_GK20A_BAR0_BASE,
474 .end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
475 .flags = IORESOURCE_MEM,
476 },
477 {
478 .start = TEGRA_GK20A_BAR1_BASE,
479 .end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
480 .flags = IORESOURCE_MEM,
481 },
482 { /* Used on ASIM only */
483 .start = TEGRA_GK20A_SIM_BASE,
484 .end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
485 .flags = IORESOURCE_MEM,
486 },
487 {
488 .start = TEGRA_GK20A_INTR,
489 .end = TEGRA_GK20A_INTR,
490 .flags = IORESOURCE_IRQ,
491 },
492 {
493 .start = TEGRA_GK20A_INTR_NONSTALL,
494 .end = TEGRA_GK20A_INTR_NONSTALL,
495 .flags = IORESOURCE_IRQ,
496 },
497};
498
499struct gk20a_platform t132_gk20a_tegra_platform = {
500 .has_syncpoints = true,
501
502 /* power management configuration */
503 .railgate_delay = 500,
504 .clockgate_delay = 50,
505
506 .probe = gk20a_tegra_probe,
507 .late_probe = gk20a_tegra_late_probe,
508
509 /* power management callbacks */
510 .suspend = gk20a_tegra_suspend,
511 .railgate = gk20a_tegra_railgate,
512 .unrailgate = gk20a_tegra_unrailgate,
513
514 /* frequency scaling configuration */
515 .prescale = gk20a_tegra_prescale,
516 .postscale = gk20a_tegra_postscale,
517 .devfreq_governor = "nvhost_podgov",
518 .qos_id = PM_QOS_GPU_FREQ_MIN,
519
520 .channel_busy = gk20a_tegra_channel_busy,
521 .channel_idle = gk20a_tegra_channel_idle,
522 .secure_alloc = gk20a_tegra_secure_alloc,
523 .dump_platform_dependencies = gk20a_tegra_debug_dump,
524};
525
526struct gk20a_platform gk20a_tegra_platform = {
527 .has_syncpoints = true,
528
529 /* power management configuration */
530 .railgate_delay = 500,
531 .clockgate_delay = 50,
532 .can_railgate = true,
533
534 .probe = gk20a_tegra_probe,
535 .late_probe = gk20a_tegra_late_probe,
536
537 /* power management callbacks */
538 .suspend = gk20a_tegra_suspend,
539 .railgate = gk20a_tegra_railgate,
540 .unrailgate = gk20a_tegra_unrailgate,
541
542 /* frequency scaling configuration */
543 .prescale = gk20a_tegra_prescale,
544 .postscale = gk20a_tegra_postscale,
545 .devfreq_governor = "nvhost_podgov",
546 .qos_id = PM_QOS_GPU_FREQ_MIN,
547
548 .channel_busy = gk20a_tegra_channel_busy,
549 .channel_idle = gk20a_tegra_channel_idle,
550 .secure_alloc = gk20a_tegra_secure_alloc,
551 .dump_platform_dependencies = gk20a_tegra_debug_dump,
552};
553
554struct platform_device tegra_gk20a_device = {
555 .name = "gk20a",
556 .resource = gk20a_tegra_resources,
557 .num_resources = ARRAY_SIZE(gk20a_tegra_resources),
558 .dev = {
559 .platform_data = &gk20a_tegra_platform,
560 },
561};