ARM: OMAP2: Clock: New OMAP2/3 DPLL rate rounding algorithm

This patch adds a new rate rounding algorithm for DPLL clocks on the OMAP2/3 architecture. For a desired DPLL target rate, there may be several multiplier/divider (M, N) values which will generate a sufficiently close rate. Lower N values result in greater power economy. However, lower N values can cause the difference between the rounded rate and the target rate ("rate error") to be larger than it would be with a higher N. This can cause downstream devices to run more slowly than they otherwise would. This DPLL rate rounding algorithm: - attempts to find the lowest possible N (DPLL divider) to reach the target_rate (since, according to Richard Woodruff <r-woodruff@ti.com>, lower N values save more power than higher N values). - allows developers to set an upper bound on the error between the rounded rate and the desired target rate ("rate tolerance"), so an appropriate balance between rate fidelity and power savings can be set. This maximum rate error tolerance is set via omap2_set_dpll_rate_tolerance(). - never returns a rounded rate higher than the target rate. The rate rounding algorithm caches the last rounded M, N, and rate computation to avoid rounding the rate twice for each clk_set_rate() call. (This patch does not yet implement set_rate for DPLLs; that follows in a future patch.) The algorithm trades execution speed for rate accuracy. It will find the (M, N) set that results in the least rate error, within a specified rate tolerance. It does this by evaluating each divider setting - on OMAP3, this involves 128 steps. Another approach to DPLL rate rounding would be to bail out as soon as a valid rate is found within the rate tolerance, which would trade rate accuracy for execution speed. Alternate implementations welcome. This code is not yet used by the OMAP24XX DPLL clock, since it is currently defined as a composite clock, fusing the DPLL M,N and the M2 output divider. This patch also renames the existing OMAP24xx DPLL programming functions to highlight that they program both the DPLL and the DPLL's output multiplier. Signed-off-by: Paul Walmsley <paul@pwsan.com> Signed-off-by: Tony Lindgren <tony@atomide.com>
author: Paul Walmsley <paul@pwsan.com> 2008-07-03 05:24:46 -0400
committer: Tony Lindgren <tony@atomide.com> 2008-07-03 05:24:46 -0400
commit: 88b8ba90570067178d32c654ad95786041e86e86 (patch)
tree: c2ce719334f9fbbde2500b990bc1cc295226334a /arch/arm/mach-omap2/clock.c
parent: 542313cc98e72d026d2df86f515699dfaface460 (diff)
1 files changed, 197 insertions, 1 deletions
diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 0243480e8bfe..15675bce8012 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -41,6 +41,24 @@
 #define MAX_CLOCK_ENABLE_WAIT           100000
+/* DPLL rate rounding: minimum DPLL multiplier, divider values */
+#define DPLL_MIN_MULTIPLIER             1
+#define DPLL_MIN_DIVIDER                1
+/* Possible error results from _dpll_test_mult */
+#define DPLL_MULT_UNDERFLOW             (1 << 0)
+/*
+ * Scale factor to mitigate roundoff errors in DPLL rate rounding.
+ * The higher the scale factor, the greater the risk of arithmetic overflow,
+ * but the closer the rounded rate to the target rate.  DPLL_SCALE_FACTOR
+ * must be a power of DPLL_SCALE_BASE.
+ */
+#define DPLL_SCALE_FACTOR               64
+#define DPLL_SCALE_BASE                 2
+#define DPLL_ROUNDING_VAL               ((DPLL_SCALE_BASE / 2) * \
+                                         (DPLL_SCALE_FACTOR / DPLL_SCALE_BASE))
 u8 cpu_mask;
 /*-------------------------------------------------------------------------
@@ -95,7 +113,7 @@ u32 omap2_get_dpll_rate(struct clk *clk)
 {
        long long dpll_clk;
        u32 dpll_mult, dpll_div, dpll;
-        const struct dpll_data *dd;
+        struct dpll_data *dd;
        dd = clk->dpll_data;
        /* REVISIT: What do we return on error? */
@@ -724,6 +742,184 @@ int omap2_clk_set_parent(struct clk *clk, struct clk *new_parent)
        return 0;
 }
+/* DPLL rate rounding code */
+/**
+ * omap2_dpll_set_rate_tolerance: set the error tolerance during rate rounding
+ * @clk: struct clk * of the DPLL
+ * @tolerance: maximum rate error tolerance
+ *
+ * Set the maximum DPLL rate error tolerance for the rate rounding
+ * algorithm.  The rate tolerance is an attempt to balance DPLL power
+ * saving (the least divider value "n") vs. rate fidelity (the least
+ * difference between the desired DPLL target rate and the rounded
+ * rate out of the algorithm).  So, increasing the tolerance is likely
+ * to decrease DPLL power consumption and increase DPLL rate error.
+ * Returns -EINVAL if provided a null clock ptr or a clk that is not a
+ * DPLL; or 0 upon success.
+ */
+int omap2_dpll_set_rate_tolerance(struct clk *clk, unsigned int tolerance)
+{
+        if (!clk || !clk->dpll_data)
+                return -EINVAL;
+        clk->dpll_data->rate_tolerance = tolerance;
+        return 0;
+}
+static unsigned long _dpll_compute_new_rate(unsigned long parent_rate, unsigned int m, unsigned int n)
+{
+        unsigned long long num;
+        num = (unsigned long long)parent_rate * m;
+        do_div(num, n);
+        return num;
+}
+/*
+ * _dpll_test_mult - test a DPLL multiplier value
+ * @m: pointer to the DPLL m (multiplier) value under test
+ * @n: current DPLL n (divider) value under test
+ * @new_rate: pointer to storage for the resulting rounded rate
+ * @target_rate: the desired DPLL rate
+ * @parent_rate: the DPLL's parent clock rate
+ *
+ * This code tests a DPLL multiplier value, ensuring that the
+ * resulting rate will not be higher than the target_rate, and that
+ * the multiplier value itself is valid for the DPLL.  Initially, the
+ * integer pointed to by the m argument should be prescaled by
+ * multiplying by DPLL_SCALE_FACTOR.  The code will replace this with
+ * a non-scaled m upon return.  This non-scaled m will result in a
+ * new_rate as close as possible to target_rate (but not greater than
+ * target_rate) given the current (parent_rate, n, prescaled m)
+ * triple. Returns DPLL_MULT_UNDERFLOW in the event that the
+ * non-scaled m attempted to underflow, which can allow the calling
+ * function to bail out early; or 0 upon success.
+ */
+static int _dpll_test_mult(int *m, int n, unsigned long *new_rate,
+                           unsigned long target_rate,
+                           unsigned long parent_rate)
+{
+        int flags = 0, carry = 0;
+        /* Unscale m and round if necessary */
+        if (*m % DPLL_SCALE_FACTOR >= DPLL_ROUNDING_VAL)
+                carry = 1;
+        *m = (*m / DPLL_SCALE_FACTOR) + carry;
+        /*
+         * The new rate must be <= the target rate to avoid programming
+         * a rate that is impossible for the hardware to handle
+         */
+        *new_rate = _dpll_compute_new_rate(parent_rate, *m, n);
+        if (*new_rate > target_rate) {
+                (*m)--;
+                *new_rate = 0;
+        }
+        /* Guard against m underflow */
+        if (*m < DPLL_MIN_MULTIPLIER) {
+                *m = DPLL_MIN_MULTIPLIER;
+                *new_rate = 0;
+                flags = DPLL_MULT_UNDERFLOW;
+        }
+        if (*new_rate == 0)
+                *new_rate = _dpll_compute_new_rate(parent_rate, *m, n);
+        return flags;
+}
+/**
+ * omap2_dpll_round_rate - round a target rate for an OMAP DPLL
+ * @clk: struct clk * for a DPLL
+ * @target_rate: desired DPLL clock rate
+ *
+ * Given a DPLL, a desired target rate, and a rate tolerance, round
+ * the target rate to a possible, programmable rate for this DPLL.
+ * Rate tolerance is assumed to be set by the caller before this
+ * function is called.  Attempts to select the minimum possible n
+ * within the tolerance to reduce power consumption.  Stores the
+ * computed (m, n) in the DPLL's dpll_data structure so set_rate()
+ * will not need to call this (expensive) function again.  Returns ~0
+ * if the target rate cannot be rounded, either because the rate is
+ * too low or because the rate tolerance is set too tightly; or the
+ * rounded rate upon success.
+ */
+long omap2_dpll_round_rate(struct clk *clk, unsigned long target_rate)
+{
+        int m, n, r, e, scaled_max_m;
+        unsigned long scaled_rt_rp, new_rate;
+        int min_e = -1, min_e_m = -1, min_e_n = -1;
+        if (!clk || !clk->dpll_data)
+                return ~0;
+        pr_debug("clock: starting DPLL round_rate for clock %s, target rate "
+                 "%ld\n", clk->name, target_rate);
+        scaled_rt_rp = target_rate / (clk->parent->rate / DPLL_SCALE_FACTOR);
+        scaled_max_m = clk->dpll_data->max_multiplier * DPLL_SCALE_FACTOR;
+        clk->dpll_data->last_rounded_rate = 0;
+        for (n = clk->dpll_data->max_divider; n >= DPLL_MIN_DIVIDER; n--) {
+                /* Compute the scaled DPLL multiplier, based on the divider */
+                m = scaled_rt_rp * n;
+                /*
+                 * Since we're counting n down, a m overflow means we can
+                 * can immediately skip to the next n
+                 */
+                if (m > scaled_max_m)
+                        continue;
+                r = _dpll_test_mult(&m, n, &new_rate, target_rate,
+                                    clk->parent->rate);
+                e = target_rate - new_rate;
+                pr_debug("clock: n = %d: m = %d: rate error is %d "
+                         "(new_rate = %ld)\n", n, m, e, new_rate);
+                if (min_e == -1 ||
+                    min_e >= (int)(abs(e) - clk->dpll_data->rate_tolerance)) {
+                        min_e = e;
+                        min_e_m = m;
+                        min_e_n = n;
+                        pr_debug("clock: found new least error %d\n", min_e);
+                }
+                /*
+                 * Since we're counting n down, a m underflow means we
+                 * can bail out completely (since as n decreases in
+                 * the next iteration, there's no way that m can
+                 * increase beyond the current m)
+                 */
+                if (r & DPLL_MULT_UNDERFLOW)
+                        break;
+        }
+        if (min_e < 0) {
+                pr_debug("clock: error: target rate or tolerance too low\n");
+                return ~0;
+        }
+        clk->dpll_data->last_rounded_m = min_e_m;
+        clk->dpll_data->last_rounded_n = min_e_n;
+        clk->dpll_data->last_rounded_rate =
+                _dpll_compute_new_rate(clk->parent->rate, min_e_m,  min_e_n);
+        pr_debug("clock: final least error: e = %d, m = %d, n = %d\n",
+                 min_e, min_e_m, min_e_n);
+        pr_debug("clock: final rate: %ld  (target rate: %ld)\n",
+                 clk->dpll_data->last_rounded_rate, target_rate);
+        return clk->dpll_data->last_rounded_rate;
+}
 /*-------------------------------------------------------------------------
 * Omap2 clock reset and init functions
 *-------------------------------------------------------------------------*/
author	Paul Walmsley <paul@pwsan.com>	2008-07-03 05:24:46 -0400
committer	Tony Lindgren <tony@atomide.com>	2008-07-03 05:24:46 -0400
commit	88b8ba90570067178d32c654ad95786041e86e86 (patch)
tree	c2ce719334f9fbbde2500b990bc1cc295226334a /arch/arm/mach-omap2/clock.c
parent	542313cc98e72d026d2df86f515699dfaface460 (diff)

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 0243480e8bfe..15675bce8012 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c
@@ -41,6 +41,24 @@
41		41
42	#define MAX_CLOCK_ENABLE_WAIT 100000	42	#define MAX_CLOCK_ENABLE_WAIT 100000
43		43
		44	/* DPLL rate rounding: minimum DPLL multiplier, divider values */
		45	#define DPLL_MIN_MULTIPLIER 1
		46	#define DPLL_MIN_DIVIDER 1
		47
		48	/* Possible error results from _dpll_test_mult */
		49	#define DPLL_MULT_UNDERFLOW (1 << 0)
		50
		51	/*
		52	* Scale factor to mitigate roundoff errors in DPLL rate rounding.
		53	* The higher the scale factor, the greater the risk of arithmetic overflow,
		54	* but the closer the rounded rate to the target rate. DPLL_SCALE_FACTOR
		55	* must be a power of DPLL_SCALE_BASE.
		56	*/
		57	#define DPLL_SCALE_FACTOR 64
		58	#define DPLL_SCALE_BASE 2
		59	#define DPLL_ROUNDING_VAL ((DPLL_SCALE_BASE / 2) * \
		60	(DPLL_SCALE_FACTOR / DPLL_SCALE_BASE))
		61
44	u8 cpu_mask;	62	u8 cpu_mask;
45		63
46	/*-------------------------------------------------------------------------	64	/*-------------------------------------------------------------------------
@@ -95,7 +113,7 @@ u32 omap2_get_dpll_rate(struct clk *clk)
95	{	113	{
96	long long dpll_clk;	114	long long dpll_clk;
97	u32 dpll_mult, dpll_div, dpll;	115	u32 dpll_mult, dpll_div, dpll;
98	const struct dpll_data *dd;	116	struct dpll_data *dd;
99		117
100	dd = clk->dpll_data;	118	dd = clk->dpll_data;
101	/* REVISIT: What do we return on error? */	119	/* REVISIT: What do we return on error? */
@@ -724,6 +742,184 @@ int omap2_clk_set_parent(struct clk clk, struct clk new_parent)
724	return 0;	742	return 0;
725	}	743	}
726		744
		745	/* DPLL rate rounding code */
		746
		747	/**
		748	* omap2_dpll_set_rate_tolerance: set the error tolerance during rate rounding
		749	* @clk: struct clk * of the DPLL
		750	* @tolerance: maximum rate error tolerance
		751	*
		752	* Set the maximum DPLL rate error tolerance for the rate rounding
		753	* algorithm. The rate tolerance is an attempt to balance DPLL power
		754	* saving (the least divider value "n") vs. rate fidelity (the least
		755	* difference between the desired DPLL target rate and the rounded
		756	* rate out of the algorithm). So, increasing the tolerance is likely
		757	* to decrease DPLL power consumption and increase DPLL rate error.
		758	* Returns -EINVAL if provided a null clock ptr or a clk that is not a
		759	* DPLL; or 0 upon success.
		760	*/
		761	int omap2_dpll_set_rate_tolerance(struct clk *clk, unsigned int tolerance)
		762	{
		763	if (!clk \|\| !clk->dpll_data)
		764	return -EINVAL;
		765
		766	clk->dpll_data->rate_tolerance = tolerance;
		767
		768	return 0;
		769	}
		770
		771	static unsigned long _dpll_compute_new_rate(unsigned long parent_rate, unsigned int m, unsigned int n)
		772	{
		773	unsigned long long num;
		774
		775	num = (unsigned long long)parent_rate * m;
		776	do_div(num, n);
		777	return num;
		778	}
		779
		780	/*
		781	* _dpll_test_mult - test a DPLL multiplier value
		782	* @m: pointer to the DPLL m (multiplier) value under test
		783	* @n: current DPLL n (divider) value under test
		784	* @new_rate: pointer to storage for the resulting rounded rate
		785	* @target_rate: the desired DPLL rate
		786	* @parent_rate: the DPLL's parent clock rate
		787	*
		788	* This code tests a DPLL multiplier value, ensuring that the
		789	* resulting rate will not be higher than the target_rate, and that
		790	* the multiplier value itself is valid for the DPLL. Initially, the
		791	* integer pointed to by the m argument should be prescaled by
		792	* multiplying by DPLL_SCALE_FACTOR. The code will replace this with
		793	* a non-scaled m upon return. This non-scaled m will result in a
		794	* new_rate as close as possible to target_rate (but not greater than
		795	* target_rate) given the current (parent_rate, n, prescaled m)
		796	* triple. Returns DPLL_MULT_UNDERFLOW in the event that the
		797	* non-scaled m attempted to underflow, which can allow the calling
		798	* function to bail out early; or 0 upon success.
		799	*/
		800	static int _dpll_test_mult(int m, int n, unsigned long new_rate,
		801	unsigned long target_rate,
		802	unsigned long parent_rate)
		803	{
		804	int flags = 0, carry = 0;
		805
		806	/* Unscale m and round if necessary */
		807	if (*m % DPLL_SCALE_FACTOR >= DPLL_ROUNDING_VAL)
		808	carry = 1;
		809	m = (m / DPLL_SCALE_FACTOR) + carry;
		810
		811	/*
		812	* The new rate must be <= the target rate to avoid programming
		813	* a rate that is impossible for the hardware to handle
		814	*/
		815	new_rate = _dpll_compute_new_rate(parent_rate, m, n);
		816	if (*new_rate > target_rate) {
		817	(*m)--;
		818	*new_rate = 0;
		819	}
		820
		821	/* Guard against m underflow */
		822	if (*m < DPLL_MIN_MULTIPLIER) {
		823	*m = DPLL_MIN_MULTIPLIER;
		824	*new_rate = 0;
		825	flags = DPLL_MULT_UNDERFLOW;
		826	}
		827
		828	if (*new_rate == 0)
		829	new_rate = _dpll_compute_new_rate(parent_rate, m, n);
		830
		831	return flags;
		832	}
		833
		834	/**
		835	* omap2_dpll_round_rate - round a target rate for an OMAP DPLL
		836	* @clk: struct clk * for a DPLL
		837	* @target_rate: desired DPLL clock rate
		838	*
		839	* Given a DPLL, a desired target rate, and a rate tolerance, round
		840	* the target rate to a possible, programmable rate for this DPLL.
		841	* Rate tolerance is assumed to be set by the caller before this
		842	* function is called. Attempts to select the minimum possible n
		843	* within the tolerance to reduce power consumption. Stores the
		844	* computed (m, n) in the DPLL's dpll_data structure so set_rate()
		845	* will not need to call this (expensive) function again. Returns ~0
		846	* if the target rate cannot be rounded, either because the rate is
		847	* too low or because the rate tolerance is set too tightly; or the
		848	* rounded rate upon success.
		849	*/
		850	long omap2_dpll_round_rate(struct clk *clk, unsigned long target_rate)
		851	{
		852	int m, n, r, e, scaled_max_m;
		853	unsigned long scaled_rt_rp, new_rate;
		854	int min_e = -1, min_e_m = -1, min_e_n = -1;
		855
		856	if (!clk \|\| !clk->dpll_data)
		857	return ~0;
		858
		859	pr_debug("clock: starting DPLL round_rate for clock %s, target rate "
		860	"%ld\n", clk->name, target_rate);
		861
		862	scaled_rt_rp = target_rate / (clk->parent->rate / DPLL_SCALE_FACTOR);
		863	scaled_max_m = clk->dpll_data->max_multiplier * DPLL_SCALE_FACTOR;
		864
		865	clk->dpll_data->last_rounded_rate = 0;
		866
		867	for (n = clk->dpll_data->max_divider; n >= DPLL_MIN_DIVIDER; n--) {
		868
		869	/* Compute the scaled DPLL multiplier, based on the divider */
		870	m = scaled_rt_rp * n;
		871
		872	/*
		873	* Since we're counting n down, a m overflow means we can
		874	* can immediately skip to the next n
		875	*/
		876	if (m > scaled_max_m)
		877	continue;
		878
		879	r = _dpll_test_mult(&m, n, &new_rate, target_rate,
		880	clk->parent->rate);
		881
		882	e = target_rate - new_rate;
		883	pr_debug("clock: n = %d: m = %d: rate error is %d "
		884	"(new_rate = %ld)\n", n, m, e, new_rate);
		885
		886	if (min_e == -1 \|\|
		887	min_e >= (int)(abs(e) - clk->dpll_data->rate_tolerance)) {
		888	min_e = e;
		889	min_e_m = m;
		890	min_e_n = n;
		891
		892	pr_debug("clock: found new least error %d\n", min_e);
		893	}
		894
		895	/*
		896	* Since we're counting n down, a m underflow means we
		897	* can bail out completely (since as n decreases in
		898	* the next iteration, there's no way that m can
		899	* increase beyond the current m)
		900	*/
		901	if (r & DPLL_MULT_UNDERFLOW)
		902	break;
		903	}
		904
		905	if (min_e < 0) {
		906	pr_debug("clock: error: target rate or tolerance too low\n");
		907	return ~0;
		908	}
		909
		910	clk->dpll_data->last_rounded_m = min_e_m;
		911	clk->dpll_data->last_rounded_n = min_e_n;
		912	clk->dpll_data->last_rounded_rate =
		913	_dpll_compute_new_rate(clk->parent->rate, min_e_m, min_e_n);
		914
		915	pr_debug("clock: final least error: e = %d, m = %d, n = %d\n",
		916	min_e, min_e_m, min_e_n);
		917	pr_debug("clock: final rate: %ld (target rate: %ld)\n",
		918	clk->dpll_data->last_rounded_rate, target_rate);
		919
		920	return clk->dpll_data->last_rounded_rate;
		921	}
		922
727	/*-------------------------------------------------------------------------	923	/*-------------------------------------------------------------------------
728	* Omap2 clock reset and init functions	924	* Omap2 clock reset and init functions
729	-------------------------------------------------------------------------/	925	-------------------------------------------------------------------------/