diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 159 |
1 files changed, 126 insertions, 33 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7c..8a39fa3e3c6c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
772 | list_del_leaf_cfs_rq(cfs_rq); | 772 | list_del_leaf_cfs_rq(cfs_rq); |
773 | } | 773 | } |
774 | 774 | ||
775 | static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) | ||
776 | { | ||
777 | long tg_weight; | ||
778 | |||
779 | /* | ||
780 | * Use this CPU's actual weight instead of the last load_contribution | ||
781 | * to gain a more accurate current total weight. See | ||
782 | * update_cfs_rq_load_contribution(). | ||
783 | */ | ||
784 | tg_weight = atomic_read(&tg->load_weight); | ||
785 | tg_weight -= cfs_rq->load_contribution; | ||
786 | tg_weight += cfs_rq->load.weight; | ||
787 | |||
788 | return tg_weight; | ||
789 | } | ||
790 | |||
775 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) | 791 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
776 | { | 792 | { |
777 | long load_weight, load, shares; | 793 | long tg_weight, load, shares; |
778 | 794 | ||
795 | tg_weight = calc_tg_weight(tg, cfs_rq); | ||
779 | load = cfs_rq->load.weight; | 796 | load = cfs_rq->load.weight; |
780 | 797 | ||
781 | load_weight = atomic_read(&tg->load_weight); | ||
782 | load_weight += load; | ||
783 | load_weight -= cfs_rq->load_contribution; | ||
784 | |||
785 | shares = (tg->shares * load); | 798 | shares = (tg->shares * load); |
786 | if (load_weight) | 799 | if (tg_weight) |
787 | shares /= load_weight; | 800 | shares /= tg_weight; |
788 | 801 | ||
789 | if (shares < MIN_SHARES) | 802 | if (shares < MIN_SHARES) |
790 | shares = MIN_SHARES; | 803 | shares = MIN_SHARES; |
@@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
1743 | 1756 | ||
1744 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 1757 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
1745 | { | 1758 | { |
1746 | if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) | 1759 | if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) |
1747 | return; | 1760 | return; |
1748 | 1761 | ||
1749 | __return_cfs_rq_runtime(cfs_rq); | 1762 | __return_cfs_rq_runtime(cfs_rq); |
@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p) | |||
2036 | * Adding load to a group doesn't make a group heavier, but can cause movement | 2049 | * Adding load to a group doesn't make a group heavier, but can cause movement |
2037 | * of group shares between cpus. Assuming the shares were perfectly aligned one | 2050 | * of group shares between cpus. Assuming the shares were perfectly aligned one |
2038 | * can calculate the shift in shares. | 2051 | * can calculate the shift in shares. |
2052 | * | ||
2053 | * Calculate the effective load difference if @wl is added (subtracted) to @tg | ||
2054 | * on this @cpu and results in a total addition (subtraction) of @wg to the | ||
2055 | * total group weight. | ||
2056 | * | ||
2057 | * Given a runqueue weight distribution (rw_i) we can compute a shares | ||
2058 | * distribution (s_i) using: | ||
2059 | * | ||
2060 | * s_i = rw_i / \Sum rw_j (1) | ||
2061 | * | ||
2062 | * Suppose we have 4 CPUs and our @tg is a direct child of the root group and | ||
2063 | * has 7 equal weight tasks, distributed as below (rw_i), with the resulting | ||
2064 | * shares distribution (s_i): | ||
2065 | * | ||
2066 | * rw_i = { 2, 4, 1, 0 } | ||
2067 | * s_i = { 2/7, 4/7, 1/7, 0 } | ||
2068 | * | ||
2069 | * As per wake_affine() we're interested in the load of two CPUs (the CPU the | ||
2070 | * task used to run on and the CPU the waker is running on), we need to | ||
2071 | * compute the effect of waking a task on either CPU and, in case of a sync | ||
2072 | * wakeup, compute the effect of the current task going to sleep. | ||
2073 | * | ||
2074 | * So for a change of @wl to the local @cpu with an overall group weight change | ||
2075 | * of @wl we can compute the new shares distribution (s'_i) using: | ||
2076 | * | ||
2077 | * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) | ||
2078 | * | ||
2079 | * Suppose we're interested in CPUs 0 and 1, and want to compute the load | ||
2080 | * differences in waking a task to CPU 0. The additional task changes the | ||
2081 | * weight and shares distributions like: | ||
2082 | * | ||
2083 | * rw'_i = { 3, 4, 1, 0 } | ||
2084 | * s'_i = { 3/8, 4/8, 1/8, 0 } | ||
2085 | * | ||
2086 | * We can then compute the difference in effective weight by using: | ||
2087 | * | ||
2088 | * dw_i = S * (s'_i - s_i) (3) | ||
2089 | * | ||
2090 | * Where 'S' is the group weight as seen by its parent. | ||
2091 | * | ||
2092 | * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) | ||
2093 | * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - | ||
2094 | * 4/7) times the weight of the group. | ||
2039 | */ | 2095 | */ |
2040 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | 2096 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) |
2041 | { | 2097 | { |
2042 | struct sched_entity *se = tg->se[cpu]; | 2098 | struct sched_entity *se = tg->se[cpu]; |
2043 | 2099 | ||
2044 | if (!tg->parent) | 2100 | if (!tg->parent) /* the trivial, non-cgroup case */ |
2045 | return wl; | 2101 | return wl; |
2046 | 2102 | ||
2047 | for_each_sched_entity(se) { | 2103 | for_each_sched_entity(se) { |
2048 | long lw, w; | 2104 | long w, W; |
2049 | 2105 | ||
2050 | tg = se->my_q->tg; | 2106 | tg = se->my_q->tg; |
2051 | w = se->my_q->load.weight; | ||
2052 | 2107 | ||
2053 | /* use this cpu's instantaneous contribution */ | 2108 | /* |
2054 | lw = atomic_read(&tg->load_weight); | 2109 | * W = @wg + \Sum rw_j |
2055 | lw -= se->my_q->load_contribution; | 2110 | */ |
2056 | lw += w + wg; | 2111 | W = wg + calc_tg_weight(tg, se->my_q); |
2057 | 2112 | ||
2058 | wl += w; | 2113 | /* |
2114 | * w = rw_i + @wl | ||
2115 | */ | ||
2116 | w = se->my_q->load.weight + wl; | ||
2059 | 2117 | ||
2060 | if (lw > 0 && wl < lw) | 2118 | /* |
2061 | wl = (wl * tg->shares) / lw; | 2119 | * wl = S * s'_i; see (2) |
2120 | */ | ||
2121 | if (W > 0 && w < W) | ||
2122 | wl = (w * tg->shares) / W; | ||
2062 | else | 2123 | else |
2063 | wl = tg->shares; | 2124 | wl = tg->shares; |
2064 | 2125 | ||
2065 | /* zero point is MIN_SHARES */ | 2126 | /* |
2127 | * Per the above, wl is the new se->load.weight value; since | ||
2128 | * those are clipped to [MIN_SHARES, ...) do so now. See | ||
2129 | * calc_cfs_shares(). | ||
2130 | */ | ||
2066 | if (wl < MIN_SHARES) | 2131 | if (wl < MIN_SHARES) |
2067 | wl = MIN_SHARES; | 2132 | wl = MIN_SHARES; |
2133 | |||
2134 | /* | ||
2135 | * wl = dw_i = S * (s'_i - s_i); see (3) | ||
2136 | */ | ||
2068 | wl -= se->load.weight; | 2137 | wl -= se->load.weight; |
2138 | |||
2139 | /* | ||
2140 | * Recursively apply this logic to all parent groups to compute | ||
2141 | * the final effective load change on the root group. Since | ||
2142 | * only the @tg group gets extra weight, all parent groups can | ||
2143 | * only redistribute existing shares. @wl is the shift in shares | ||
2144 | * resulting from this level per the above. | ||
2145 | */ | ||
2069 | wg = 0; | 2146 | wg = 0; |
2070 | } | 2147 | } |
2071 | 2148 | ||
@@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2249 | int cpu = smp_processor_id(); | 2326 | int cpu = smp_processor_id(); |
2250 | int prev_cpu = task_cpu(p); | 2327 | int prev_cpu = task_cpu(p); |
2251 | struct sched_domain *sd; | 2328 | struct sched_domain *sd; |
2252 | int i; | 2329 | struct sched_group *sg; |
2330 | int i, smt = 0; | ||
2253 | 2331 | ||
2254 | /* | 2332 | /* |
2255 | * If the task is going to be woken-up on this cpu and if it is | 2333 | * If the task is going to be woken-up on this cpu and if it is |
@@ -2269,25 +2347,40 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2269 | * Otherwise, iterate the domains and find an elegible idle cpu. | 2347 | * Otherwise, iterate the domains and find an elegible idle cpu. |
2270 | */ | 2348 | */ |
2271 | rcu_read_lock(); | 2349 | rcu_read_lock(); |
2350 | again: | ||
2272 | for_each_domain(target, sd) { | 2351 | for_each_domain(target, sd) { |
2352 | if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) | ||
2353 | continue; | ||
2354 | |||
2355 | if (smt && !(sd->flags & SD_SHARE_CPUPOWER)) | ||
2356 | break; | ||
2357 | |||
2273 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) | 2358 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) |
2274 | break; | 2359 | break; |
2275 | 2360 | ||
2276 | for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { | 2361 | sg = sd->groups; |
2277 | if (idle_cpu(i)) { | 2362 | do { |
2278 | target = i; | 2363 | if (!cpumask_intersects(sched_group_cpus(sg), |
2279 | break; | 2364 | tsk_cpus_allowed(p))) |
2365 | goto next; | ||
2366 | |||
2367 | for_each_cpu(i, sched_group_cpus(sg)) { | ||
2368 | if (!idle_cpu(i)) | ||
2369 | goto next; | ||
2280 | } | 2370 | } |
2281 | } | ||
2282 | 2371 | ||
2283 | /* | 2372 | target = cpumask_first_and(sched_group_cpus(sg), |
2284 | * Lets stop looking for an idle sibling when we reached | 2373 | tsk_cpus_allowed(p)); |
2285 | * the domain that spans the current cpu and prev_cpu. | 2374 | goto done; |
2286 | */ | 2375 | next: |
2287 | if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && | 2376 | sg = sg->next; |
2288 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | 2377 | } while (sg != sd->groups); |
2289 | break; | 2378 | } |
2379 | if (!smt) { | ||
2380 | smt = 1; | ||
2381 | goto again; | ||
2290 | } | 2382 | } |
2383 | done: | ||
2291 | rcu_read_unlock(); | 2384 | rcu_read_unlock(); |
2292 | 2385 | ||
2293 | return target; | 2386 | return target; |
@@ -3511,7 +3604,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, | |||
3511 | } | 3604 | } |
3512 | 3605 | ||
3513 | /** | 3606 | /** |
3514 | * update_sd_lb_stats - Update sched_group's statistics for load balancing. | 3607 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
3515 | * @sd: sched_domain whose statistics are to be updated. | 3608 | * @sd: sched_domain whose statistics are to be updated. |
3516 | * @this_cpu: Cpu for which load balance is currently performed. | 3609 | * @this_cpu: Cpu for which load balance is currently performed. |
3517 | * @idle: Idle status of this_cpu | 3610 | * @idle: Idle status of this_cpu |