aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c159
1 files changed, 126 insertions, 33 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c9e67923b7c..8a39fa3e3c6c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
772 list_del_leaf_cfs_rq(cfs_rq); 772 list_del_leaf_cfs_rq(cfs_rq);
773} 773}
774 774
775static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
776{
777 long tg_weight;
778
779 /*
780 * Use this CPU's actual weight instead of the last load_contribution
781 * to gain a more accurate current total weight. See
782 * update_cfs_rq_load_contribution().
783 */
784 tg_weight = atomic_read(&tg->load_weight);
785 tg_weight -= cfs_rq->load_contribution;
786 tg_weight += cfs_rq->load.weight;
787
788 return tg_weight;
789}
790
775static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) 791static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
776{ 792{
777 long load_weight, load, shares; 793 long tg_weight, load, shares;
778 794
795 tg_weight = calc_tg_weight(tg, cfs_rq);
779 load = cfs_rq->load.weight; 796 load = cfs_rq->load.weight;
780 797
781 load_weight = atomic_read(&tg->load_weight);
782 load_weight += load;
783 load_weight -= cfs_rq->load_contribution;
784
785 shares = (tg->shares * load); 798 shares = (tg->shares * load);
786 if (load_weight) 799 if (tg_weight)
787 shares /= load_weight; 800 shares /= tg_weight;
788 801
789 if (shares < MIN_SHARES) 802 if (shares < MIN_SHARES)
790 shares = MIN_SHARES; 803 shares = MIN_SHARES;
@@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1743 1756
1744static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) 1757static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1745{ 1758{
1746 if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) 1759 if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
1747 return; 1760 return;
1748 1761
1749 __return_cfs_rq_runtime(cfs_rq); 1762 __return_cfs_rq_runtime(cfs_rq);
@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)
2036 * Adding load to a group doesn't make a group heavier, but can cause movement 2049 * Adding load to a group doesn't make a group heavier, but can cause movement
2037 * of group shares between cpus. Assuming the shares were perfectly aligned one 2050 * of group shares between cpus. Assuming the shares were perfectly aligned one
2038 * can calculate the shift in shares. 2051 * can calculate the shift in shares.
2052 *
2053 * Calculate the effective load difference if @wl is added (subtracted) to @tg
2054 * on this @cpu and results in a total addition (subtraction) of @wg to the
2055 * total group weight.
2056 *
2057 * Given a runqueue weight distribution (rw_i) we can compute a shares
2058 * distribution (s_i) using:
2059 *
2060 * s_i = rw_i / \Sum rw_j (1)
2061 *
2062 * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
2063 * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
2064 * shares distribution (s_i):
2065 *
2066 * rw_i = { 2, 4, 1, 0 }
2067 * s_i = { 2/7, 4/7, 1/7, 0 }
2068 *
2069 * As per wake_affine() we're interested in the load of two CPUs (the CPU the
2070 * task used to run on and the CPU the waker is running on), we need to
2071 * compute the effect of waking a task on either CPU and, in case of a sync
2072 * wakeup, compute the effect of the current task going to sleep.
2073 *
2074 * So for a change of @wl to the local @cpu with an overall group weight change
2075 * of @wl we can compute the new shares distribution (s'_i) using:
2076 *
2077 * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2)
2078 *
2079 * Suppose we're interested in CPUs 0 and 1, and want to compute the load
2080 * differences in waking a task to CPU 0. The additional task changes the
2081 * weight and shares distributions like:
2082 *
2083 * rw'_i = { 3, 4, 1, 0 }
2084 * s'_i = { 3/8, 4/8, 1/8, 0 }
2085 *
2086 * We can then compute the difference in effective weight by using:
2087 *
2088 * dw_i = S * (s'_i - s_i) (3)
2089 *
2090 * Where 'S' is the group weight as seen by its parent.
2091 *
2092 * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
2093 * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
2094 * 4/7) times the weight of the group.
2039 */ 2095 */
2040static long effective_load(struct task_group *tg, int cpu, long wl, long wg) 2096static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
2041{ 2097{
2042 struct sched_entity *se = tg->se[cpu]; 2098 struct sched_entity *se = tg->se[cpu];
2043 2099
2044 if (!tg->parent) 2100 if (!tg->parent) /* the trivial, non-cgroup case */
2045 return wl; 2101 return wl;
2046 2102
2047 for_each_sched_entity(se) { 2103 for_each_sched_entity(se) {
2048 long lw, w; 2104 long w, W;
2049 2105
2050 tg = se->my_q->tg; 2106 tg = se->my_q->tg;
2051 w = se->my_q->load.weight;
2052 2107
2053 /* use this cpu's instantaneous contribution */ 2108 /*
2054 lw = atomic_read(&tg->load_weight); 2109 * W = @wg + \Sum rw_j
2055 lw -= se->my_q->load_contribution; 2110 */
2056 lw += w + wg; 2111 W = wg + calc_tg_weight(tg, se->my_q);
2057 2112
2058 wl += w; 2113 /*
2114 * w = rw_i + @wl
2115 */
2116 w = se->my_q->load.weight + wl;
2059 2117
2060 if (lw > 0 && wl < lw) 2118 /*
2061 wl = (wl * tg->shares) / lw; 2119 * wl = S * s'_i; see (2)
2120 */
2121 if (W > 0 && w < W)
2122 wl = (w * tg->shares) / W;
2062 else 2123 else
2063 wl = tg->shares; 2124 wl = tg->shares;
2064 2125
2065 /* zero point is MIN_SHARES */ 2126 /*
2127 * Per the above, wl is the new se->load.weight value; since
2128 * those are clipped to [MIN_SHARES, ...) do so now. See
2129 * calc_cfs_shares().
2130 */
2066 if (wl < MIN_SHARES) 2131 if (wl < MIN_SHARES)
2067 wl = MIN_SHARES; 2132 wl = MIN_SHARES;
2133
2134 /*
2135 * wl = dw_i = S * (s'_i - s_i); see (3)
2136 */
2068 wl -= se->load.weight; 2137 wl -= se->load.weight;
2138
2139 /*
2140 * Recursively apply this logic to all parent groups to compute
2141 * the final effective load change on the root group. Since
2142 * only the @tg group gets extra weight, all parent groups can
2143 * only redistribute existing shares. @wl is the shift in shares
2144 * resulting from this level per the above.
2145 */
2069 wg = 0; 2146 wg = 0;
2070 } 2147 }
2071 2148
@@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
2249 int cpu = smp_processor_id(); 2326 int cpu = smp_processor_id();
2250 int prev_cpu = task_cpu(p); 2327 int prev_cpu = task_cpu(p);
2251 struct sched_domain *sd; 2328 struct sched_domain *sd;
2252 int i; 2329 struct sched_group *sg;
2330 int i, smt = 0;
2253 2331
2254 /* 2332 /*
2255 * If the task is going to be woken-up on this cpu and if it is 2333 * If the task is going to be woken-up on this cpu and if it is
@@ -2269,25 +2347,40 @@ static int select_idle_sibling(struct task_struct *p, int target)
2269 * Otherwise, iterate the domains and find an elegible idle cpu. 2347 * Otherwise, iterate the domains and find an elegible idle cpu.
2270 */ 2348 */
2271 rcu_read_lock(); 2349 rcu_read_lock();
2350again:
2272 for_each_domain(target, sd) { 2351 for_each_domain(target, sd) {
2352 if (!smt && (sd->flags & SD_SHARE_CPUPOWER))
2353 continue;
2354
2355 if (smt && !(sd->flags & SD_SHARE_CPUPOWER))
2356 break;
2357
2273 if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) 2358 if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
2274 break; 2359 break;
2275 2360
2276 for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { 2361 sg = sd->groups;
2277 if (idle_cpu(i)) { 2362 do {
2278 target = i; 2363 if (!cpumask_intersects(sched_group_cpus(sg),
2279 break; 2364 tsk_cpus_allowed(p)))
2365 goto next;
2366
2367 for_each_cpu(i, sched_group_cpus(sg)) {
2368 if (!idle_cpu(i))
2369 goto next;
2280 } 2370 }
2281 }
2282 2371
2283 /* 2372 target = cpumask_first_and(sched_group_cpus(sg),
2284 * Lets stop looking for an idle sibling when we reached 2373 tsk_cpus_allowed(p));
2285 * the domain that spans the current cpu and prev_cpu. 2374 goto done;
2286 */ 2375next:
2287 if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && 2376 sg = sg->next;
2288 cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) 2377 } while (sg != sd->groups);
2289 break; 2378 }
2379 if (!smt) {
2380 smt = 1;
2381 goto again;
2290 } 2382 }
2383done:
2291 rcu_read_unlock(); 2384 rcu_read_unlock();
2292 2385
2293 return target; 2386 return target;
@@ -3511,7 +3604,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
3511} 3604}
3512 3605
3513/** 3606/**
3514 * update_sd_lb_stats - Update sched_group's statistics for load balancing. 3607 * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
3515 * @sd: sched_domain whose statistics are to be updated. 3608 * @sd: sched_domain whose statistics are to be updated.
3516 * @this_cpu: Cpu for which load balance is currently performed. 3609 * @this_cpu: Cpu for which load balance is currently performed.
3517 * @idle: Idle status of this_cpu 3610 * @idle: Idle status of this_cpu