aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/video/tegra/host/gr3d/scale3d.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/video/tegra/host/gr3d/scale3d.c')
-rw-r--r--drivers/video/tegra/host/gr3d/scale3d.c661
1 files changed, 661 insertions, 0 deletions
diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c
new file mode 100644
index 00000000000..8a267a127ea
--- /dev/null
+++ b/drivers/video/tegra/host/gr3d/scale3d.c
@@ -0,0 +1,661 @@
1/*
2 * drivers/video/tegra/host/t20/scale3d.c
3 *
4 * Tegra Graphics Host 3D clock scaling
5 *
6 * Copyright (c) 2010-2012, NVIDIA Corporation.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21/*
22 * 3d clock scaling
23 *
24 * module3d_notify_busy() is called upon submit, module3d_notify_idle() is
25 * called when all outstanding submits are completed. Idle times are measured
26 * over a fixed time period (scale3d.p_period). If the 3d module idle time
27 * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are
28 * scaled down. If the percentage goes under the minimum limit (set in
29 * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
30 * over the time frame given in scale3d.p_fast_response for clocking up
31 * quickly in response to load peaks.
32 *
33 * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
34 * bezier-like factor added to pull 3d.emc rate a bit lower.
35 */
36
37#include <linux/debugfs.h>
38#include <linux/types.h>
39#include <linux/clk.h>
40#include <mach/clk.h>
41#include <mach/hardware.h>
42#include "scale3d.h"
43#include "dev.h"
44
45static int scale3d_is_enabled(void);
46static void scale3d_enable(int enable);
47
48#define POW2(x) ((x) * (x))
49
50/*
51 * debugfs parameters to control 3d clock scaling test
52 *
53 * period - time period for clock rate evaluation
54 * fast_response - time period for evaluation of 'busy' spikes
55 * idle_min - if less than [idle_min] percent idle over [fast_response]
56 * microseconds, clock up.
57 * idle_max - if over [idle_max] percent idle over [period] microseconds,
58 * clock down.
59 * max_scale - limits rate changes to no less than (100 - max_scale)% or
60 * (100 + 2 * max_scale)% of current clock rate
61 * verbosity - set above 5 for debug printouts
62 */
63
64struct scale3d_info_rec {
65 struct mutex lock; /* lock for timestamps etc */
66 int enable;
67 int init;
68 ktime_t idle_frame;
69 ktime_t fast_frame;
70 ktime_t last_idle;
71 ktime_t last_short_term_idle;
72 int is_idle;
73 ktime_t last_tweak;
74 ktime_t last_down;
75 int fast_up_count;
76 int slow_down_count;
77 int is_scaled;
78 int fast_responses;
79 unsigned long idle_total;
80 unsigned long idle_short_term_total;
81 unsigned long max_rate_3d;
82 long emc_slope;
83 long emc_offset;
84 long emc_dip_slope;
85 long emc_dip_offset;
86 long emc_xmid;
87 unsigned long min_rate_3d;
88 struct work_struct work;
89 struct delayed_work idle_timer;
90 unsigned int scale;
91 unsigned int p_period;
92 unsigned int period;
93 unsigned int p_idle_min;
94 unsigned int idle_min;
95 unsigned int p_idle_max;
96 unsigned int idle_max;
97 unsigned int p_fast_response;
98 unsigned int fast_response;
99 unsigned int p_adjust;
100 unsigned int p_scale_emc;
101 unsigned int p_emc_dip;
102 unsigned int p_verbosity;
103 struct clk *clk_3d;
104 struct clk *clk_3d2;
105 struct clk *clk_3d_emc;
106};
107
108static struct scale3d_info_rec scale3d;
109
110static void scale3d_clocks(unsigned long percent)
111{
112 unsigned long hz, curr;
113
114 if (!tegra_is_clk_enabled(scale3d.clk_3d))
115 return;
116
117 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
118 if (!tegra_is_clk_enabled(scale3d.clk_3d2))
119 return;
120
121 curr = clk_get_rate(scale3d.clk_3d);
122 hz = percent * (curr / 100);
123
124 if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) {
125 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
126 clk_set_rate(scale3d.clk_3d2, 0);
127 clk_set_rate(scale3d.clk_3d, hz);
128
129 if (scale3d.p_scale_emc) {
130 long after = (long) clk_get_rate(scale3d.clk_3d);
131 hz = after * scale3d.emc_slope + scale3d.emc_offset;
132 if (scale3d.p_emc_dip)
133 hz -=
134 (scale3d.emc_dip_slope *
135 POW2(after / 1000 - scale3d.emc_xmid) +
136 scale3d.emc_dip_offset);
137 clk_set_rate(scale3d.clk_3d_emc, hz);
138 }
139 }
140}
141
142static void scale3d_clocks_handler(struct work_struct *work)
143{
144 unsigned int scale;
145
146 mutex_lock(&scale3d.lock);
147 scale = scale3d.scale;
148 mutex_unlock(&scale3d.lock);
149
150 if (scale != 0)
151 scale3d_clocks(scale);
152}
153
154void nvhost_scale3d_suspend(struct nvhost_device *dev)
155{
156 if (!scale3d.enable)
157 return;
158
159 cancel_work_sync(&scale3d.work);
160 cancel_delayed_work(&scale3d.idle_timer);
161}
162
163/* set 3d clocks to max */
164static void reset_3d_clocks(void)
165{
166 if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) {
167 clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
168 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
169 clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d);
170 if (scale3d.p_scale_emc)
171 clk_set_rate(scale3d.clk_3d_emc,
172 clk_round_rate(scale3d.clk_3d_emc, UINT_MAX));
173 }
174}
175
176static int scale3d_is_enabled(void)
177{
178 int enable;
179
180 if (!scale3d.enable)
181 return 0;
182
183 mutex_lock(&scale3d.lock);
184 enable = scale3d.enable;
185 mutex_unlock(&scale3d.lock);
186
187 return enable;
188}
189
190static void scale3d_enable(int enable)
191{
192 int disable = 0;
193
194 mutex_lock(&scale3d.lock);
195
196 if (enable) {
197 if (scale3d.max_rate_3d != scale3d.min_rate_3d)
198 scale3d.enable = 1;
199 } else {
200 scale3d.enable = 0;
201 disable = 1;
202 }
203
204 mutex_unlock(&scale3d.lock);
205
206 if (disable)
207 reset_3d_clocks();
208}
209
210static void reset_scaling_counters(ktime_t time)
211{
212 scale3d.idle_total = 0;
213 scale3d.idle_short_term_total = 0;
214 scale3d.last_idle = time;
215 scale3d.last_short_term_idle = time;
216 scale3d.idle_frame = time;
217}
218
219/* scaling_adjust - use scale up / scale down hint counts to adjust scaling
220 * parameters.
221 *
222 * hint_ratio is 100 x the ratio of scale up to scale down hints. Three cases
223 * are distinguished:
224 *
225 * hint_ratio < HINT_RATIO_MIN - set parameters to maximize scaling effect
226 * hint_ratio > HINT_RATIO_MAX - set parameters to minimize scaling effect
227 * hint_ratio between limits - scale parameters linearly
228 *
229 * the parameters adjusted are
230 *
231 * * fast_response time
232 * * period - time for scaling down estimate
233 * * idle_min percentage
234 * * idle_max percentage
235 */
236#define SCALING_ADJUST_PERIOD 1000000
237#define HINT_RATIO_MAX 400
238#define HINT_RATIO_MIN 100
239#define HINT_RATIO_MID ((HINT_RATIO_MAX + HINT_RATIO_MIN) / 2)
240#define HINT_RATIO_DIFF (HINT_RATIO_MAX - HINT_RATIO_MIN)
241
242static void scaling_adjust(ktime_t time)
243{
244 long hint_ratio;
245 long fast_response_adjustment;
246 long period_adjustment;
247 int idle_min_adjustment;
248 int idle_max_adjustment;
249 unsigned long dt;
250
251 dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak);
252 if (dt < SCALING_ADJUST_PERIOD)
253 return;
254
255 hint_ratio = (100 * (scale3d.fast_up_count + 1)) /
256 (scale3d.slow_down_count + 1);
257
258 if (hint_ratio > HINT_RATIO_MAX) {
259 fast_response_adjustment = -((int) scale3d.p_fast_response) / 4;
260 period_adjustment = scale3d.p_period / 2;
261 idle_min_adjustment = scale3d.p_idle_min;
262 idle_max_adjustment = scale3d.p_idle_max;
263 } else if (hint_ratio < HINT_RATIO_MIN) {
264 fast_response_adjustment = scale3d.p_fast_response / 2;
265 period_adjustment = -((int) scale3d.p_period) / 4;
266 idle_min_adjustment = -((int) scale3d.p_idle_min) / 2;
267 idle_max_adjustment = -((int) scale3d.p_idle_max) / 2;
268 } else {
269 int diff;
270 int factor;
271
272 diff = HINT_RATIO_MID - hint_ratio;
273 if (diff < 0)
274 factor = -diff * 2;
275 else {
276 factor = -diff;
277 diff *= 2;
278 }
279
280 fast_response_adjustment = diff *
281 (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2));
282 period_adjustment =
283 diff * (scale3d.p_period / HINT_RATIO_DIFF);
284 idle_min_adjustment =
285 (factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF;
286 idle_max_adjustment =
287 (factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF;
288 }
289
290 scale3d.fast_response =
291 scale3d.p_fast_response + fast_response_adjustment;
292 scale3d.period = scale3d.p_period + period_adjustment;
293 scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
294 scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment;
295
296 if (scale3d.p_verbosity >= 10)
297 pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n",
298 scale3d.fast_up_count, scale3d.slow_down_count,
299 scale3d.fast_responses, scale3d.fast_response,
300 scale3d.period, scale3d.idle_min, scale3d.idle_max);
301
302 scale3d.fast_up_count = 0;
303 scale3d.slow_down_count = 0;
304 scale3d.fast_responses = 0;
305 scale3d.last_down = time;
306 scale3d.last_tweak = time;
307}
308
309#undef SCALING_ADJUST_PERIOD
310#undef HINT_RATIO_MAX
311#undef HINT_RATIO_MIN
312#undef HINT_RATIO_MID
313#undef HINT_RATIO_DIFF
314
315static void scaling_state_check(ktime_t time)
316{
317 unsigned long dt;
318
319 /* adjustment: set scale parameters (fast_response, period) +/- 25%
320 * based on ratio of scale up to scale down hints
321 */
322 if (scale3d.p_adjust)
323 scaling_adjust(time);
324 else {
325 scale3d.fast_response = scale3d.p_fast_response;
326 scale3d.period = scale3d.p_period;
327 scale3d.idle_min = scale3d.p_idle_min;
328 scale3d.idle_max = scale3d.p_idle_max;
329 }
330
331 /* check for load peaks */
332 dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame);
333 if (dt > scale3d.fast_response) {
334 unsigned long idleness =
335 (scale3d.idle_short_term_total * 100) / dt;
336 scale3d.fast_responses++;
337 scale3d.fast_frame = time;
338 /* if too busy, scale up */
339 if (idleness < scale3d.idle_min) {
340 scale3d.is_scaled = 0;
341 scale3d.fast_up_count++;
342 if (scale3d.p_verbosity >= 5)
343 pr_info("scale3d: %ld%% busy\n",
344 100 - idleness);
345
346 reset_3d_clocks();
347 reset_scaling_counters(time);
348 return;
349 }
350 scale3d.idle_short_term_total = 0;
351 scale3d.last_short_term_idle = time;
352 }
353
354 dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame);
355 if (dt > scale3d.period) {
356 unsigned long idleness = (scale3d.idle_total * 100) / dt;
357
358 if (scale3d.p_verbosity >= 5)
359 pr_info("scale3d: idle %lu, ~%lu%%\n",
360 scale3d.idle_total, idleness);
361
362 if (idleness > scale3d.idle_max) {
363 if (!scale3d.is_scaled) {
364 scale3d.is_scaled = 1;
365 scale3d.last_down = time;
366 }
367 scale3d.slow_down_count++;
368 /* if idle time is high, clock down */
369 scale3d.scale = 100 - (idleness - scale3d.idle_min);
370 schedule_work(&scale3d.work);
371 }
372
373 reset_scaling_counters(time);
374 }
375}
376
377void nvhost_scale3d_notify_idle(struct nvhost_device *dev)
378{
379 ktime_t t;
380 unsigned long dt;
381
382 if (!scale3d.enable)
383 return;
384
385 mutex_lock(&scale3d.lock);
386
387 t = ktime_get();
388
389 if (scale3d.is_idle) {
390 dt = ktime_us_delta(t, scale3d.last_idle);
391 scale3d.idle_total += dt;
392 dt = ktime_us_delta(t, scale3d.last_short_term_idle);
393 scale3d.idle_short_term_total += dt;
394 } else
395 scale3d.is_idle = 1;
396
397 scale3d.last_idle = t;
398 scale3d.last_short_term_idle = t;
399
400 scaling_state_check(scale3d.last_idle);
401
402 /* delay idle_max % of 2 * fast_response time (given in microseconds) */
403 schedule_delayed_work(&scale3d.idle_timer,
404 msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response)
405 / 50000));
406
407 mutex_unlock(&scale3d.lock);
408}
409
410void nvhost_scale3d_notify_busy(struct nvhost_device *dev)
411{
412 unsigned long idle;
413 unsigned long short_term_idle;
414 ktime_t t;
415
416 if (!scale3d.enable)
417 return;
418
419 mutex_lock(&scale3d.lock);
420
421 cancel_delayed_work(&scale3d.idle_timer);
422
423 t = ktime_get();
424
425 if (scale3d.is_idle) {
426 idle = (unsigned long)
427 ktime_us_delta(t, scale3d.last_idle);
428 scale3d.idle_total += idle;
429 short_term_idle =
430 ktime_us_delta(t, scale3d.last_short_term_idle);
431 scale3d.idle_short_term_total += short_term_idle;
432 scale3d.is_idle = 0;
433 }
434
435 scaling_state_check(t);
436
437 mutex_unlock(&scale3d.lock);
438}
439
440static void scale3d_idle_handler(struct work_struct *work)
441{
442 int notify_idle = 0;
443
444 if (!scale3d.enable)
445 return;
446
447 mutex_lock(&scale3d.lock);
448
449 if (scale3d.is_idle && tegra_is_clk_enabled(scale3d.clk_3d)) {
450 unsigned long curr = clk_get_rate(scale3d.clk_3d);
451 if (curr > scale3d.min_rate_3d)
452 notify_idle = 1;
453 }
454
455 mutex_unlock(&scale3d.lock);
456
457 if (notify_idle)
458 nvhost_scale3d_notify_idle(NULL);
459}
460
461void nvhost_scale3d_reset()
462{
463 ktime_t t;
464
465 if (!scale3d.enable)
466 return;
467
468 t = ktime_get();
469 mutex_lock(&scale3d.lock);
470 reset_scaling_counters(t);
471 mutex_unlock(&scale3d.lock);
472}
473
474/*
475 * debugfs parameters to control 3d clock scaling
476 */
477
478void nvhost_scale3d_debug_init(struct dentry *de)
479{
480 struct dentry *d, *f;
481
482 d = debugfs_create_dir("scaling", de);
483 if (!d) {
484 pr_err("scale3d: can\'t create debugfs directory\n");
485 return;
486 }
487
488#define CREATE_SCALE3D_FILE(fname) \
489 do {\
490 f = debugfs_create_u32(#fname, S_IRUGO | S_IWUSR, d,\
491 &scale3d.p_##fname);\
492 if (NULL == f) {\
493 pr_err("scale3d: can\'t create file " #fname "\n");\
494 return;\
495 } \
496 } while (0)
497
498 CREATE_SCALE3D_FILE(fast_response);
499 CREATE_SCALE3D_FILE(idle_min);
500 CREATE_SCALE3D_FILE(idle_max);
501 CREATE_SCALE3D_FILE(period);
502 CREATE_SCALE3D_FILE(adjust);
503 CREATE_SCALE3D_FILE(scale_emc);
504 CREATE_SCALE3D_FILE(emc_dip);
505 CREATE_SCALE3D_FILE(verbosity);
506#undef CREATE_SCALE3D_FILE
507}
508
509static ssize_t enable_3d_scaling_show(struct device *device,
510 struct device_attribute *attr, char *buf)
511{
512 ssize_t res;
513
514 res = snprintf(buf, PAGE_SIZE, "%d\n", scale3d_is_enabled());
515
516 return res;
517}
518
519static ssize_t enable_3d_scaling_store(struct device *dev,
520 struct device_attribute *attr, const char *buf, size_t count)
521{
522 unsigned long val = 0;
523
524 if (strict_strtoul(buf, 10, &val) < 0)
525 return -EINVAL;
526
527 scale3d_enable(val);
528
529 return count;
530}
531
532static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR,
533 enable_3d_scaling_show, enable_3d_scaling_store);
534
535void nvhost_scale3d_init(struct nvhost_device *d)
536{
537 if (!scale3d.init) {
538 int error;
539 unsigned long max_emc, min_emc;
540 long correction;
541 mutex_init(&scale3d.lock);
542
543 scale3d.clk_3d = d->clk[0];
544 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) {
545 scale3d.clk_3d2 = d->clk[1];
546 scale3d.clk_3d_emc = d->clk[2];
547 } else
548 scale3d.clk_3d_emc = d->clk[1];
549
550 scale3d.max_rate_3d = clk_round_rate(scale3d.clk_3d, UINT_MAX);
551 scale3d.min_rate_3d = clk_round_rate(scale3d.clk_3d, 0);
552
553 if (scale3d.max_rate_3d == scale3d.min_rate_3d) {
554 pr_warn("scale3d: 3d max rate = min rate (%lu), "
555 "disabling\n", scale3d.max_rate_3d);
556 scale3d.enable = 0;
557 return;
558 }
559
560 /* emc scaling:
561 *
562 * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
563 *
564 * Remc - 3d.emc rate
565 * R3d - 3d.cbus rate
566 * Rm - 3d.cbus 'middle' rate = (max + min)/2
567 * S - emc_slope
568 * O - emc_offset
569 * Sd - emc_dip_slope
570 * Od - emc_dip_offset
571 *
572 * this superposes a quadratic dip centered around the middle 3d
573 * frequency over a linear correlation of 3d.emc to 3d clock
574 * rates.
575 *
576 * S, O are chosen so that the maximum 3d rate produces the
577 * maximum 3d.emc rate exactly, and the minimum 3d rate produces
578 * at least the minimum 3d.emc rate.
579 *
580 * Sd and Od are chosen to produce the largest dip that will
581 * keep 3d.emc frequencies monotonously decreasing with 3d
582 * frequencies. To achieve this, the first derivative of Remc
583 * with respect to R3d should be zero for the minimal 3d rate:
584 *
585 * R'emc = S - 2 * Sd * (R3d - Rm)
586 * R'emc(R3d-min) = 0
587 * S = 2 * Sd * (R3d-min - Rm)
588 * = 2 * Sd * (R3d-min - R3d-max) / 2
589 * Sd = S / (R3d-min - R3d-max)
590 *
591 * +---------------------------------------------------+
592 * | Sd = -(emc-max - emc-min) / (R3d-min - R3d-max)^2 |
593 * +---------------------------------------------------+
594 *
595 * dip = Sd * (R3d - Rm)^2 + Od
596 *
597 * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
598 *
599 * Sd * (R3d-min - Rm)^2 + Od = 0
600 * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
601 * = -Sd * ((R3d-min - R3d-max)^2) / 4
602 *
603 * +------------------------------+
604 * | Od = (emc-max - emc-min) / 4 |
605 * +------------------------------+
606 */
607
608 max_emc = clk_round_rate(scale3d.clk_3d_emc, UINT_MAX);
609 min_emc = clk_round_rate(scale3d.clk_3d_emc, 0);
610
611 scale3d.emc_slope = (max_emc - min_emc) /
612 (scale3d.max_rate_3d - scale3d.min_rate_3d);
613 scale3d.emc_offset = max_emc -
614 scale3d.emc_slope * scale3d.max_rate_3d;
615 /* guarantee max 3d rate maps to max emc rate */
616 scale3d.emc_offset += max_emc -
617 (scale3d.emc_slope * scale3d.max_rate_3d +
618 scale3d.emc_offset);
619
620 scale3d.emc_dip_offset = (max_emc - min_emc) / 4;
621 scale3d.emc_dip_slope =
622 -4 * (scale3d.emc_dip_offset /
623 (POW2(scale3d.max_rate_3d - scale3d.min_rate_3d)));
624 scale3d.emc_xmid =
625 (scale3d.max_rate_3d + scale3d.min_rate_3d) / 2;
626 correction =
627 scale3d.emc_dip_offset +
628 scale3d.emc_dip_slope *
629 POW2(scale3d.max_rate_3d - scale3d.emc_xmid);
630 scale3d.emc_dip_offset -= correction;
631
632 INIT_WORK(&scale3d.work, scale3d_clocks_handler);
633 INIT_DELAYED_WORK(&scale3d.idle_timer, scale3d_idle_handler);
634
635 /* set scaling parameter defaults */
636 scale3d.enable = 1;
637 scale3d.period = scale3d.p_period = 100000;
638 scale3d.idle_min = scale3d.p_idle_min = 10;
639 scale3d.idle_max = scale3d.p_idle_max = 15;
640 scale3d.fast_response = scale3d.p_fast_response = 7000;
641 scale3d.p_scale_emc = 1;
642 scale3d.p_emc_dip = 1;
643 scale3d.p_verbosity = 0;
644 scale3d.p_adjust = 1;
645
646 error = device_create_file(&d->dev,
647 &dev_attr_enable_3d_scaling);
648 if (error)
649 dev_err(&d->dev, "failed to create sysfs attributes");
650
651 scale3d.init = 1;
652 }
653
654 nvhost_scale3d_reset();
655}
656
657void nvhost_scale3d_deinit(struct nvhost_device *dev)
658{
659 device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling);
660 scale3d.init = 0;
661}