sched: Add a comment to effective_load() since it's a pain

Every time I have to stare at this function I need to completely reverse engineer its workings, about time I write a comment explaining the thing. Collected bits and pieces from previous changelogs, mostly: 4be9daaa1b33701f011f4117f22dc1e45a3e6e34 83378269a5fad98f562ebc0f09c349575e6cbfe1 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1318518057.27731.2.camel@twins Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2011-10-13 10:52:28 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-11-14 06:50:32 -0500
commit: cf5f0acf3935c91379e709a71ecf68805d366659 (patch)
tree: 66bbd968ec8031c33e7134b6c7f9387c796d6873 /kernel
parent: 7f80850d3f9fd8fda23a317044aef3a6bafab06b (diff)
1 files changed, 95 insertions, 18 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c9e67923b7c..aba20f495188 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
                list_del_leaf_cfs_rq(cfs_rq);
 }
+static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
+{
+        long tg_weight;
+        /*
+         * Use this CPU's actual weight instead of the last load_contribution
+         * to gain a more accurate current total weight. See
+         * update_cfs_rq_load_contribution().
+         */
+        tg_weight = atomic_read(&tg->load_weight);
+        tg_weight -= cfs_rq->load_contribution;
+        tg_weight += cfs_rq->load.weight;
+        return tg_weight;
+}
 static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
 {
-        long load_weight, load, shares;
+        long tg_weight, load, shares;
+        tg_weight = calc_tg_weight(tg, cfs_rq);
        load = cfs_rq->load.weight;
-        load_weight = atomic_read(&tg->load_weight);
-        load_weight += load;
-        load_weight -= cfs_rq->load_contribution;
        shares = (tg->shares * load);
-        if (load_weight)
+        if (tg_weight)
-                shares /= load_weight;
+                shares /= tg_weight;
        if (shares < MIN_SHARES)
                shares = MIN_SHARES;
@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)
 * Adding load to a group doesn't make a group heavier, but can cause movement
 * of group shares between cpus. Assuming the shares were perfectly aligned one
 * can calculate the shift in shares.
+ *
+ * Calculate the effective load difference if @wl is added (subtracted) to @tg
+ * on this @cpu and results in a total addition (subtraction) of @wg to the
+ * total group weight.
+ *
+ * Given a runqueue weight distribution (rw_i) we can compute a shares
+ * distribution (s_i) using:
+ *
+ *   s_i = rw_i / \Sum rw_j                                             (1)
+ *
+ * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
+ * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
+ * shares distribution (s_i):
+ *
+ *   rw_i = {   2,   4,   1,   0 }
+ *   s_i  = { 2/7, 4/7, 1/7,   0 }
+ *
+ * As per wake_affine() we're interested in the load of two CPUs (the CPU the
+ * task used to run on and the CPU the waker is running on), we need to
+ * compute the effect of waking a task on either CPU and, in case of a sync
+ * wakeup, compute the effect of the current task going to sleep.
+ *
+ * So for a change of @wl to the local @cpu with an overall group weight change
+ * of @wl we can compute the new shares distribution (s'_i) using:
+ *
+ *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)                            (2)
+ *
+ * Suppose we're interested in CPUs 0 and 1, and want to compute the load
+ * differences in waking a task to CPU 0. The additional task changes the
+ * weight and shares distributions like:
+ *
+ *   rw'_i = {   3,   4,   1,   0 }
+ *   s'_i  = { 3/8, 4/8, 1/8,   0 }
+ *
+ * We can then compute the difference in effective weight by using:
+ *
+ *   dw_i = S * (s'_i - s_i)                                            (3)
+ *
+ * Where 'S' is the group weight as seen by its parent.
+ *
+ * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
+ * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
+ * 4/7) times the weight of the group.
 */
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 {
        struct sched_entity *se = tg->se[cpu];
-        if (!tg->parent)
+        if (!tg->parent)        /* the trivial, non-cgroup case */
                return wl;
        for_each_sched_entity(se) {
-                long lw, w;
+                long w, W;
                tg = se->my_q->tg;
-                w = se->my_q->load.weight;
-                /* use this cpu's instantaneous contribution */
+                /*
-                lw = atomic_read(&tg->load_weight);
+                 * W = @wg + \Sum rw_j
-                lw -= se->my_q->load_contribution;
+                 */
-                lw += w + wg;
+                W = wg + calc_tg_weight(tg, se->my_q);
-                wl += w;
+                /*
+                 * w = rw_i + @wl
+                 */
+                w = se->my_q->load.weight + wl;
-                if (lw > 0 && wl < lw)
+                /*
-                        wl = (wl * tg->shares) / lw;
+                 * wl = S * s'_i; see (2)
+                 */
+                if (W > 0 && w < W)
+                        wl = (w * tg->shares) / W;
                else
                        wl = tg->shares;
-                /* zero point is MIN_SHARES */
+                /*
+                 * Per the above, wl is the new se->load.weight value; since
+                 * those are clipped to [MIN_SHARES, ...) do so now. See
+                 * calc_cfs_shares().
+                 */
                if (wl < MIN_SHARES)
                        wl = MIN_SHARES;
+                /*
+                 * wl = dw_i = S * (s'_i - s_i); see (3)
+                 */
                wl -= se->load.weight;
+                /*
+                 * Recursively apply this logic to all parent groups to compute
+                 * the final effective load change on the root group. Since
+                 * only the @tg group gets extra weight, all parent groups can
+                 * only redistribute existing shares. @wl is the shift in shares
+                 * resulting from this level per the above.
+                 */
                wg = 0;
        }
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2011-10-13 10:52:28 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-11-14 06:50:32 -0500
commit	cf5f0acf3935c91379e709a71ecf68805d366659 (patch)
tree	66bbd968ec8031c33e7134b6c7f9387c796d6873 /kernel
parent	7f80850d3f9fd8fda23a317044aef3a6bafab06b (diff)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7c..aba20f495188 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
772	list_del_leaf_cfs_rq(cfs_rq);	772	list_del_leaf_cfs_rq(cfs_rq);
773	}	773	}
774		774
		775	static inline long calc_tg_weight(struct task_group tg, struct cfs_rq cfs_rq)
		776	{
		777	long tg_weight;
		778
		779	/*
		780	* Use this CPU's actual weight instead of the last load_contribution
		781	* to gain a more accurate current total weight. See
		782	* update_cfs_rq_load_contribution().
		783	*/
		784	tg_weight = atomic_read(&tg->load_weight);
		785	tg_weight -= cfs_rq->load_contribution;
		786	tg_weight += cfs_rq->load.weight;
		787
		788	return tg_weight;
		789	}
		790
775	static long calc_cfs_shares(struct cfs_rq cfs_rq, struct task_group tg)	791	static long calc_cfs_shares(struct cfs_rq cfs_rq, struct task_group tg)
776	{	792	{
777	long load_weight, load, shares;	793	long tg_weight, load, shares;
778		794
		795	tg_weight = calc_tg_weight(tg, cfs_rq);
779	load = cfs_rq->load.weight;	796	load = cfs_rq->load.weight;
780		797
781	load_weight = atomic_read(&tg->load_weight);
782	load_weight += load;
783	load_weight -= cfs_rq->load_contribution;
784
785	shares = (tg->shares * load);	798	shares = (tg->shares * load);
786	if (load_weight)	799	if (tg_weight)
787	shares /= load_weight;	800	shares /= tg_weight;
788		801
789	if (shares < MIN_SHARES)	802	if (shares < MIN_SHARES)
790	shares = MIN_SHARES;	803	shares = MIN_SHARES;
@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)
2036	* Adding load to a group doesn't make a group heavier, but can cause movement	2049	* Adding load to a group doesn't make a group heavier, but can cause movement
2037	* of group shares between cpus. Assuming the shares were perfectly aligned one	2050	* of group shares between cpus. Assuming the shares were perfectly aligned one
2038	* can calculate the shift in shares.	2051	* can calculate the shift in shares.
		2052	*
		2053	* Calculate the effective load difference if @wl is added (subtracted) to @tg
		2054	* on this @cpu and results in a total addition (subtraction) of @wg to the
		2055	* total group weight.
		2056	*
		2057	* Given a runqueue weight distribution (rw_i) we can compute a shares
		2058	* distribution (s_i) using:
		2059	*
		2060	* s_i = rw_i / \Sum rw_j (1)
		2061	*
		2062	* Suppose we have 4 CPUs and our @tg is a direct child of the root group and
		2063	* has 7 equal weight tasks, distributed as below (rw_i), with the resulting
		2064	* shares distribution (s_i):
		2065	*
		2066	* rw_i = { 2, 4, 1, 0 }
		2067	* s_i = { 2/7, 4/7, 1/7, 0 }
		2068	*
		2069	* As per wake_affine() we're interested in the load of two CPUs (the CPU the
		2070	* task used to run on and the CPU the waker is running on), we need to
		2071	* compute the effect of waking a task on either CPU and, in case of a sync
		2072	* wakeup, compute the effect of the current task going to sleep.
		2073	*
		2074	* So for a change of @wl to the local @cpu with an overall group weight change
		2075	* of @wl we can compute the new shares distribution (s'_i) using:
		2076	*
		2077	* s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2)
		2078	*
		2079	* Suppose we're interested in CPUs 0 and 1, and want to compute the load
		2080	* differences in waking a task to CPU 0. The additional task changes the
		2081	* weight and shares distributions like:
		2082	*
		2083	* rw'_i = { 3, 4, 1, 0 }
		2084	* s'_i = { 3/8, 4/8, 1/8, 0 }
		2085	*
		2086	* We can then compute the difference in effective weight by using:
		2087	*
		2088	* dw_i = S * (s'_i - s_i) (3)
		2089	*
		2090	* Where 'S' is the group weight as seen by its parent.
		2091	*
		2092	* Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
		2093	* times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
		2094	* 4/7) times the weight of the group.
2039	*/	2095	*/
2040	static long effective_load(struct task_group *tg, int cpu, long wl, long wg)	2096	static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
2041	{	2097	{
2042	struct sched_entity *se = tg->se[cpu];	2098	struct sched_entity *se = tg->se[cpu];
2043		2099
2044	if (!tg->parent)	2100	if (!tg->parent) /* the trivial, non-cgroup case */
2045	return wl;	2101	return wl;
2046		2102
2047	for_each_sched_entity(se) {	2103	for_each_sched_entity(se) {
2048	long lw, w;	2104	long w, W;
2049		2105
2050	tg = se->my_q->tg;	2106	tg = se->my_q->tg;
2051	w = se->my_q->load.weight;
2052		2107
2053	/* use this cpu's instantaneous contribution */	2108	/*
2054	lw = atomic_read(&tg->load_weight);	2109	* W = @wg + \Sum rw_j
2055	lw -= se->my_q->load_contribution;	2110	*/
2056	lw += w + wg;	2111	W = wg + calc_tg_weight(tg, se->my_q);
2057		2112
2058	wl += w;	2113	/*
		2114	* w = rw_i + @wl
		2115	*/
		2116	w = se->my_q->load.weight + wl;
2059		2117
2060	if (lw > 0 && wl < lw)	2118	/*
2061	wl = (wl * tg->shares) / lw;	2119	* wl = S * s'_i; see (2)
		2120	*/
		2121	if (W > 0 && w < W)
		2122	wl = (w * tg->shares) / W;
2062	else	2123	else
2063	wl = tg->shares;	2124	wl = tg->shares;
2064		2125
2065	/* zero point is MIN_SHARES */	2126	/*
		2127	* Per the above, wl is the new se->load.weight value; since
		2128	* those are clipped to [MIN_SHARES, ...) do so now. See
		2129	* calc_cfs_shares().
		2130	*/
2066	if (wl < MIN_SHARES)	2131	if (wl < MIN_SHARES)
2067	wl = MIN_SHARES;	2132	wl = MIN_SHARES;
		2133
		2134	/*
		2135	* wl = dw_i = S * (s'_i - s_i); see (3)
		2136	*/
2068	wl -= se->load.weight;	2137	wl -= se->load.weight;
		2138
		2139	/*
		2140	* Recursively apply this logic to all parent groups to compute
		2141	* the final effective load change on the root group. Since
		2142	* only the @tg group gets extra weight, all parent groups can
		2143	* only redistribute existing shares. @wl is the shift in shares
		2144	* resulting from this level per the above.
		2145	*/
2069	wg = 0;	2146	wg = 0;
2070	}	2147	}
2071		2148