diff options
author | Paul Walmsley <paul@pwsan.com> | 2008-07-03 05:24:46 -0400 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2008-07-03 05:24:46 -0400 |
commit | 88b8ba90570067178d32c654ad95786041e86e86 (patch) | |
tree | c2ce719334f9fbbde2500b990bc1cc295226334a /arch/arm/mach-omap2/clock.c | |
parent | 542313cc98e72d026d2df86f515699dfaface460 (diff) |
ARM: OMAP2: Clock: New OMAP2/3 DPLL rate rounding algorithm
This patch adds a new rate rounding algorithm for DPLL clocks on the
OMAP2/3 architecture.
For a desired DPLL target rate, there may be several
multiplier/divider (M, N) values which will generate a sufficiently
close rate. Lower N values result in greater power economy. However,
lower N values can cause the difference between the rounded rate and
the target rate ("rate error") to be larger than it would be with a
higher N. This can cause downstream devices to run more slowly than
they otherwise would.
This DPLL rate rounding algorithm:
- attempts to find the lowest possible N (DPLL divider) to reach the
target_rate (since, according to Richard Woodruff <r-woodruff@ti.com>,
lower N values save more power than higher N values).
- allows developers to set an upper bound on the error between the
rounded rate and the desired target rate ("rate tolerance"), so an
appropriate balance between rate fidelity and power savings can be
set. This maximum rate error tolerance is set via
omap2_set_dpll_rate_tolerance().
- never returns a rounded rate higher than the target rate.
The rate rounding algorithm caches the last rounded M, N, and rate
computation to avoid rounding the rate twice for each clk_set_rate()
call. (This patch does not yet implement set_rate for DPLLs; that
follows in a future patch.)
The algorithm trades execution speed for rate accuracy. It will find
the (M, N) set that results in the least rate error, within a
specified rate tolerance. It does this by evaluating each divider
setting - on OMAP3, this involves 128 steps. Another approach to DPLL
rate rounding would be to bail out as soon as a valid rate is found
within the rate tolerance, which would trade rate accuracy for
execution speed. Alternate implementations welcome.
This code is not yet used by the OMAP24XX DPLL clock, since it
is currently defined as a composite clock, fusing the DPLL M,N and the
M2 output divider. This patch also renames the existing OMAP24xx DPLL
programming functions to highlight that they program both the DPLL and
the DPLL's output multiplier.
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Diffstat (limited to 'arch/arm/mach-omap2/clock.c')
-rw-r--r-- | arch/arm/mach-omap2/clock.c | 198 |
1 files changed, 197 insertions, 1 deletions
diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 0243480e8bfe..15675bce8012 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c | |||
@@ -41,6 +41,24 @@ | |||
41 | 41 | ||
42 | #define MAX_CLOCK_ENABLE_WAIT 100000 | 42 | #define MAX_CLOCK_ENABLE_WAIT 100000 |
43 | 43 | ||
44 | /* DPLL rate rounding: minimum DPLL multiplier, divider values */ | ||
45 | #define DPLL_MIN_MULTIPLIER 1 | ||
46 | #define DPLL_MIN_DIVIDER 1 | ||
47 | |||
48 | /* Possible error results from _dpll_test_mult */ | ||
49 | #define DPLL_MULT_UNDERFLOW (1 << 0) | ||
50 | |||
51 | /* | ||
52 | * Scale factor to mitigate roundoff errors in DPLL rate rounding. | ||
53 | * The higher the scale factor, the greater the risk of arithmetic overflow, | ||
54 | * but the closer the rounded rate to the target rate. DPLL_SCALE_FACTOR | ||
55 | * must be a power of DPLL_SCALE_BASE. | ||
56 | */ | ||
57 | #define DPLL_SCALE_FACTOR 64 | ||
58 | #define DPLL_SCALE_BASE 2 | ||
59 | #define DPLL_ROUNDING_VAL ((DPLL_SCALE_BASE / 2) * \ | ||
60 | (DPLL_SCALE_FACTOR / DPLL_SCALE_BASE)) | ||
61 | |||
44 | u8 cpu_mask; | 62 | u8 cpu_mask; |
45 | 63 | ||
46 | /*------------------------------------------------------------------------- | 64 | /*------------------------------------------------------------------------- |
@@ -95,7 +113,7 @@ u32 omap2_get_dpll_rate(struct clk *clk) | |||
95 | { | 113 | { |
96 | long long dpll_clk; | 114 | long long dpll_clk; |
97 | u32 dpll_mult, dpll_div, dpll; | 115 | u32 dpll_mult, dpll_div, dpll; |
98 | const struct dpll_data *dd; | 116 | struct dpll_data *dd; |
99 | 117 | ||
100 | dd = clk->dpll_data; | 118 | dd = clk->dpll_data; |
101 | /* REVISIT: What do we return on error? */ | 119 | /* REVISIT: What do we return on error? */ |
@@ -724,6 +742,184 @@ int omap2_clk_set_parent(struct clk *clk, struct clk *new_parent) | |||
724 | return 0; | 742 | return 0; |
725 | } | 743 | } |
726 | 744 | ||
745 | /* DPLL rate rounding code */ | ||
746 | |||
747 | /** | ||
748 | * omap2_dpll_set_rate_tolerance: set the error tolerance during rate rounding | ||
749 | * @clk: struct clk * of the DPLL | ||
750 | * @tolerance: maximum rate error tolerance | ||
751 | * | ||
752 | * Set the maximum DPLL rate error tolerance for the rate rounding | ||
753 | * algorithm. The rate tolerance is an attempt to balance DPLL power | ||
754 | * saving (the least divider value "n") vs. rate fidelity (the least | ||
755 | * difference between the desired DPLL target rate and the rounded | ||
756 | * rate out of the algorithm). So, increasing the tolerance is likely | ||
757 | * to decrease DPLL power consumption and increase DPLL rate error. | ||
758 | * Returns -EINVAL if provided a null clock ptr or a clk that is not a | ||
759 | * DPLL; or 0 upon success. | ||
760 | */ | ||
761 | int omap2_dpll_set_rate_tolerance(struct clk *clk, unsigned int tolerance) | ||
762 | { | ||
763 | if (!clk || !clk->dpll_data) | ||
764 | return -EINVAL; | ||
765 | |||
766 | clk->dpll_data->rate_tolerance = tolerance; | ||
767 | |||
768 | return 0; | ||
769 | } | ||
770 | |||
771 | static unsigned long _dpll_compute_new_rate(unsigned long parent_rate, unsigned int m, unsigned int n) | ||
772 | { | ||
773 | unsigned long long num; | ||
774 | |||
775 | num = (unsigned long long)parent_rate * m; | ||
776 | do_div(num, n); | ||
777 | return num; | ||
778 | } | ||
779 | |||
780 | /* | ||
781 | * _dpll_test_mult - test a DPLL multiplier value | ||
782 | * @m: pointer to the DPLL m (multiplier) value under test | ||
783 | * @n: current DPLL n (divider) value under test | ||
784 | * @new_rate: pointer to storage for the resulting rounded rate | ||
785 | * @target_rate: the desired DPLL rate | ||
786 | * @parent_rate: the DPLL's parent clock rate | ||
787 | * | ||
788 | * This code tests a DPLL multiplier value, ensuring that the | ||
789 | * resulting rate will not be higher than the target_rate, and that | ||
790 | * the multiplier value itself is valid for the DPLL. Initially, the | ||
791 | * integer pointed to by the m argument should be prescaled by | ||
792 | * multiplying by DPLL_SCALE_FACTOR. The code will replace this with | ||
793 | * a non-scaled m upon return. This non-scaled m will result in a | ||
794 | * new_rate as close as possible to target_rate (but not greater than | ||
795 | * target_rate) given the current (parent_rate, n, prescaled m) | ||
796 | * triple. Returns DPLL_MULT_UNDERFLOW in the event that the | ||
797 | * non-scaled m attempted to underflow, which can allow the calling | ||
798 | * function to bail out early; or 0 upon success. | ||
799 | */ | ||
800 | static int _dpll_test_mult(int *m, int n, unsigned long *new_rate, | ||
801 | unsigned long target_rate, | ||
802 | unsigned long parent_rate) | ||
803 | { | ||
804 | int flags = 0, carry = 0; | ||
805 | |||
806 | /* Unscale m and round if necessary */ | ||
807 | if (*m % DPLL_SCALE_FACTOR >= DPLL_ROUNDING_VAL) | ||
808 | carry = 1; | ||
809 | *m = (*m / DPLL_SCALE_FACTOR) + carry; | ||
810 | |||
811 | /* | ||
812 | * The new rate must be <= the target rate to avoid programming | ||
813 | * a rate that is impossible for the hardware to handle | ||
814 | */ | ||
815 | *new_rate = _dpll_compute_new_rate(parent_rate, *m, n); | ||
816 | if (*new_rate > target_rate) { | ||
817 | (*m)--; | ||
818 | *new_rate = 0; | ||
819 | } | ||
820 | |||
821 | /* Guard against m underflow */ | ||
822 | if (*m < DPLL_MIN_MULTIPLIER) { | ||
823 | *m = DPLL_MIN_MULTIPLIER; | ||
824 | *new_rate = 0; | ||
825 | flags = DPLL_MULT_UNDERFLOW; | ||
826 | } | ||
827 | |||
828 | if (*new_rate == 0) | ||
829 | *new_rate = _dpll_compute_new_rate(parent_rate, *m, n); | ||
830 | |||
831 | return flags; | ||
832 | } | ||
833 | |||
834 | /** | ||
835 | * omap2_dpll_round_rate - round a target rate for an OMAP DPLL | ||
836 | * @clk: struct clk * for a DPLL | ||
837 | * @target_rate: desired DPLL clock rate | ||
838 | * | ||
839 | * Given a DPLL, a desired target rate, and a rate tolerance, round | ||
840 | * the target rate to a possible, programmable rate for this DPLL. | ||
841 | * Rate tolerance is assumed to be set by the caller before this | ||
842 | * function is called. Attempts to select the minimum possible n | ||
843 | * within the tolerance to reduce power consumption. Stores the | ||
844 | * computed (m, n) in the DPLL's dpll_data structure so set_rate() | ||
845 | * will not need to call this (expensive) function again. Returns ~0 | ||
846 | * if the target rate cannot be rounded, either because the rate is | ||
847 | * too low or because the rate tolerance is set too tightly; or the | ||
848 | * rounded rate upon success. | ||
849 | */ | ||
850 | long omap2_dpll_round_rate(struct clk *clk, unsigned long target_rate) | ||
851 | { | ||
852 | int m, n, r, e, scaled_max_m; | ||
853 | unsigned long scaled_rt_rp, new_rate; | ||
854 | int min_e = -1, min_e_m = -1, min_e_n = -1; | ||
855 | |||
856 | if (!clk || !clk->dpll_data) | ||
857 | return ~0; | ||
858 | |||
859 | pr_debug("clock: starting DPLL round_rate for clock %s, target rate " | ||
860 | "%ld\n", clk->name, target_rate); | ||
861 | |||
862 | scaled_rt_rp = target_rate / (clk->parent->rate / DPLL_SCALE_FACTOR); | ||
863 | scaled_max_m = clk->dpll_data->max_multiplier * DPLL_SCALE_FACTOR; | ||
864 | |||
865 | clk->dpll_data->last_rounded_rate = 0; | ||
866 | |||
867 | for (n = clk->dpll_data->max_divider; n >= DPLL_MIN_DIVIDER; n--) { | ||
868 | |||
869 | /* Compute the scaled DPLL multiplier, based on the divider */ | ||
870 | m = scaled_rt_rp * n; | ||
871 | |||
872 | /* | ||
873 | * Since we're counting n down, a m overflow means we can | ||
874 | * can immediately skip to the next n | ||
875 | */ | ||
876 | if (m > scaled_max_m) | ||
877 | continue; | ||
878 | |||
879 | r = _dpll_test_mult(&m, n, &new_rate, target_rate, | ||
880 | clk->parent->rate); | ||
881 | |||
882 | e = target_rate - new_rate; | ||
883 | pr_debug("clock: n = %d: m = %d: rate error is %d " | ||
884 | "(new_rate = %ld)\n", n, m, e, new_rate); | ||
885 | |||
886 | if (min_e == -1 || | ||
887 | min_e >= (int)(abs(e) - clk->dpll_data->rate_tolerance)) { | ||
888 | min_e = e; | ||
889 | min_e_m = m; | ||
890 | min_e_n = n; | ||
891 | |||
892 | pr_debug("clock: found new least error %d\n", min_e); | ||
893 | } | ||
894 | |||
895 | /* | ||
896 | * Since we're counting n down, a m underflow means we | ||
897 | * can bail out completely (since as n decreases in | ||
898 | * the next iteration, there's no way that m can | ||
899 | * increase beyond the current m) | ||
900 | */ | ||
901 | if (r & DPLL_MULT_UNDERFLOW) | ||
902 | break; | ||
903 | } | ||
904 | |||
905 | if (min_e < 0) { | ||
906 | pr_debug("clock: error: target rate or tolerance too low\n"); | ||
907 | return ~0; | ||
908 | } | ||
909 | |||
910 | clk->dpll_data->last_rounded_m = min_e_m; | ||
911 | clk->dpll_data->last_rounded_n = min_e_n; | ||
912 | clk->dpll_data->last_rounded_rate = | ||
913 | _dpll_compute_new_rate(clk->parent->rate, min_e_m, min_e_n); | ||
914 | |||
915 | pr_debug("clock: final least error: e = %d, m = %d, n = %d\n", | ||
916 | min_e, min_e_m, min_e_n); | ||
917 | pr_debug("clock: final rate: %ld (target rate: %ld)\n", | ||
918 | clk->dpll_data->last_rounded_rate, target_rate); | ||
919 | |||
920 | return clk->dpll_data->last_rounded_rate; | ||
921 | } | ||
922 | |||
727 | /*------------------------------------------------------------------------- | 923 | /*------------------------------------------------------------------------- |
728 | * Omap2 clock reset and init functions | 924 | * Omap2 clock reset and init functions |
729 | *-------------------------------------------------------------------------*/ | 925 | *-------------------------------------------------------------------------*/ |