diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 525 |
1 files changed, 403 insertions, 122 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 39eb6011bc38..468bdd44c1ba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features = | |||
| 142 | #define SCHED_FEAT(name, enabled) \ | 142 | #define SCHED_FEAT(name, enabled) \ |
| 143 | #name , | 143 | #name , |
| 144 | 144 | ||
| 145 | static __read_mostly char *sched_feat_names[] = { | 145 | static const char * const sched_feat_names[] = { |
| 146 | #include "features.h" | 146 | #include "features.h" |
| 147 | NULL | ||
| 148 | }; | 147 | }; |
| 149 | 148 | ||
| 150 | #undef SCHED_FEAT | 149 | #undef SCHED_FEAT |
| @@ -2082,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 2082 | #endif | 2081 | #endif |
| 2083 | 2082 | ||
| 2084 | /* Here we just switch the register state and the stack. */ | 2083 | /* Here we just switch the register state and the stack. */ |
| 2085 | rcu_switch_from(prev); | ||
| 2086 | switch_to(prev, next, prev); | 2084 | switch_to(prev, next, prev); |
| 2087 | 2085 | ||
| 2088 | barrier(); | 2086 | barrier(); |
| @@ -2162,11 +2160,73 @@ unsigned long this_cpu_load(void) | |||
| 2162 | } | 2160 | } |
| 2163 | 2161 | ||
| 2164 | 2162 | ||
| 2163 | /* | ||
| 2164 | * Global load-average calculations | ||
| 2165 | * | ||
| 2166 | * We take a distributed and async approach to calculating the global load-avg | ||
| 2167 | * in order to minimize overhead. | ||
| 2168 | * | ||
| 2169 | * The global load average is an exponentially decaying average of nr_running + | ||
| 2170 | * nr_uninterruptible. | ||
| 2171 | * | ||
| 2172 | * Once every LOAD_FREQ: | ||
| 2173 | * | ||
| 2174 | * nr_active = 0; | ||
| 2175 | * for_each_possible_cpu(cpu) | ||
| 2176 | * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; | ||
| 2177 | * | ||
| 2178 | * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) | ||
| 2179 | * | ||
| 2180 | * Due to a number of reasons the above turns in the mess below: | ||
| 2181 | * | ||
| 2182 | * - for_each_possible_cpu() is prohibitively expensive on machines with | ||
| 2183 | * serious number of cpus, therefore we need to take a distributed approach | ||
| 2184 | * to calculating nr_active. | ||
| 2185 | * | ||
| 2186 | * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 | ||
| 2187 | * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } | ||
| 2188 | * | ||
| 2189 | * So assuming nr_active := 0 when we start out -- true per definition, we | ||
| 2190 | * can simply take per-cpu deltas and fold those into a global accumulate | ||
| 2191 | * to obtain the same result. See calc_load_fold_active(). | ||
| 2192 | * | ||
| 2193 | * Furthermore, in order to avoid synchronizing all per-cpu delta folding | ||
| 2194 | * across the machine, we assume 10 ticks is sufficient time for every | ||
| 2195 | * cpu to have completed this task. | ||
| 2196 | * | ||
| 2197 | * This places an upper-bound on the IRQ-off latency of the machine. Then | ||
| 2198 | * again, being late doesn't loose the delta, just wrecks the sample. | ||
| 2199 | * | ||
| 2200 | * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because | ||
| 2201 | * this would add another cross-cpu cacheline miss and atomic operation | ||
| 2202 | * to the wakeup path. Instead we increment on whatever cpu the task ran | ||
| 2203 | * when it went into uninterruptible state and decrement on whatever cpu | ||
| 2204 | * did the wakeup. This means that only the sum of nr_uninterruptible over | ||
| 2205 | * all cpus yields the correct result. | ||
| 2206 | * | ||
| 2207 | * This covers the NO_HZ=n code, for extra head-aches, see the comment below. | ||
| 2208 | */ | ||
| 2209 | |||
| 2165 | /* Variables and functions for calc_load */ | 2210 | /* Variables and functions for calc_load */ |
| 2166 | static atomic_long_t calc_load_tasks; | 2211 | static atomic_long_t calc_load_tasks; |
| 2167 | static unsigned long calc_load_update; | 2212 | static unsigned long calc_load_update; |
| 2168 | unsigned long avenrun[3]; | 2213 | unsigned long avenrun[3]; |
| 2169 | EXPORT_SYMBOL(avenrun); | 2214 | EXPORT_SYMBOL(avenrun); /* should be removed */ |
| 2215 | |||
| 2216 | /** | ||
| 2217 | * get_avenrun - get the load average array | ||
| 2218 | * @loads: pointer to dest load array | ||
| 2219 | * @offset: offset to add | ||
| 2220 | * @shift: shift count to shift the result left | ||
| 2221 | * | ||
| 2222 | * These values are estimates at best, so no need for locking. | ||
| 2223 | */ | ||
| 2224 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
| 2225 | { | ||
| 2226 | loads[0] = (avenrun[0] + offset) << shift; | ||
| 2227 | loads[1] = (avenrun[1] + offset) << shift; | ||
| 2228 | loads[2] = (avenrun[2] + offset) << shift; | ||
| 2229 | } | ||
| 2170 | 2230 | ||
| 2171 | static long calc_load_fold_active(struct rq *this_rq) | 2231 | static long calc_load_fold_active(struct rq *this_rq) |
| 2172 | { | 2232 | { |
| @@ -2183,6 +2243,9 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
| 2183 | return delta; | 2243 | return delta; |
| 2184 | } | 2244 | } |
| 2185 | 2245 | ||
| 2246 | /* | ||
| 2247 | * a1 = a0 * e + a * (1 - e) | ||
| 2248 | */ | ||
| 2186 | static unsigned long | 2249 | static unsigned long |
| 2187 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | 2250 | calc_load(unsigned long load, unsigned long exp, unsigned long active) |
| 2188 | { | 2251 | { |
| @@ -2194,30 +2257,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
| 2194 | 2257 | ||
| 2195 | #ifdef CONFIG_NO_HZ | 2258 | #ifdef CONFIG_NO_HZ |
| 2196 | /* | 2259 | /* |
| 2197 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 2260 | * Handle NO_HZ for the global load-average. |
| 2261 | * | ||
| 2262 | * Since the above described distributed algorithm to compute the global | ||
| 2263 | * load-average relies on per-cpu sampling from the tick, it is affected by | ||
| 2264 | * NO_HZ. | ||
| 2265 | * | ||
| 2266 | * The basic idea is to fold the nr_active delta into a global idle-delta upon | ||
| 2267 | * entering NO_HZ state such that we can include this as an 'extra' cpu delta | ||
| 2268 | * when we read the global state. | ||
| 2269 | * | ||
| 2270 | * Obviously reality has to ruin such a delightfully simple scheme: | ||
| 2271 | * | ||
| 2272 | * - When we go NO_HZ idle during the window, we can negate our sample | ||
| 2273 | * contribution, causing under-accounting. | ||
| 2274 | * | ||
| 2275 | * We avoid this by keeping two idle-delta counters and flipping them | ||
| 2276 | * when the window starts, thus separating old and new NO_HZ load. | ||
| 2277 | * | ||
| 2278 | * The only trick is the slight shift in index flip for read vs write. | ||
| 2279 | * | ||
| 2280 | * 0s 5s 10s 15s | ||
| 2281 | * +10 +10 +10 +10 | ||
| 2282 | * |-|-----------|-|-----------|-|-----------|-| | ||
| 2283 | * r:0 0 1 1 0 0 1 1 0 | ||
| 2284 | * w:0 1 1 0 0 1 1 0 0 | ||
| 2285 | * | ||
| 2286 | * This ensures we'll fold the old idle contribution in this window while | ||
| 2287 | * accumlating the new one. | ||
| 2288 | * | ||
| 2289 | * - When we wake up from NO_HZ idle during the window, we push up our | ||
| 2290 | * contribution, since we effectively move our sample point to a known | ||
| 2291 | * busy state. | ||
| 2292 | * | ||
| 2293 | * This is solved by pushing the window forward, and thus skipping the | ||
| 2294 | * sample, for this cpu (effectively using the idle-delta for this cpu which | ||
| 2295 | * was in effect at the time the window opened). This also solves the issue | ||
| 2296 | * of having to deal with a cpu having been in NOHZ idle for multiple | ||
| 2297 | * LOAD_FREQ intervals. | ||
| 2198 | * | 2298 | * |
| 2199 | * When making the ILB scale, we should try to pull this in as well. | 2299 | * When making the ILB scale, we should try to pull this in as well. |
| 2200 | */ | 2300 | */ |
| 2201 | static atomic_long_t calc_load_tasks_idle; | 2301 | static atomic_long_t calc_load_idle[2]; |
| 2302 | static int calc_load_idx; | ||
| 2303 | |||
| 2304 | static inline int calc_load_write_idx(void) | ||
| 2305 | { | ||
| 2306 | int idx = calc_load_idx; | ||
| 2307 | |||
| 2308 | /* | ||
| 2309 | * See calc_global_nohz(), if we observe the new index, we also | ||
| 2310 | * need to observe the new update time. | ||
| 2311 | */ | ||
| 2312 | smp_rmb(); | ||
| 2313 | |||
| 2314 | /* | ||
| 2315 | * If the folding window started, make sure we start writing in the | ||
| 2316 | * next idle-delta. | ||
| 2317 | */ | ||
| 2318 | if (!time_before(jiffies, calc_load_update)) | ||
| 2319 | idx++; | ||
| 2202 | 2320 | ||
| 2203 | void calc_load_account_idle(struct rq *this_rq) | 2321 | return idx & 1; |
| 2322 | } | ||
| 2323 | |||
| 2324 | static inline int calc_load_read_idx(void) | ||
| 2204 | { | 2325 | { |
| 2326 | return calc_load_idx & 1; | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | void calc_load_enter_idle(void) | ||
| 2330 | { | ||
| 2331 | struct rq *this_rq = this_rq(); | ||
| 2205 | long delta; | 2332 | long delta; |
| 2206 | 2333 | ||
| 2334 | /* | ||
| 2335 | * We're going into NOHZ mode, if there's any pending delta, fold it | ||
| 2336 | * into the pending idle delta. | ||
| 2337 | */ | ||
| 2207 | delta = calc_load_fold_active(this_rq); | 2338 | delta = calc_load_fold_active(this_rq); |
| 2208 | if (delta) | 2339 | if (delta) { |
| 2209 | atomic_long_add(delta, &calc_load_tasks_idle); | 2340 | int idx = calc_load_write_idx(); |
| 2341 | atomic_long_add(delta, &calc_load_idle[idx]); | ||
| 2342 | } | ||
| 2210 | } | 2343 | } |
| 2211 | 2344 | ||
| 2212 | static long calc_load_fold_idle(void) | 2345 | void calc_load_exit_idle(void) |
| 2213 | { | 2346 | { |
| 2214 | long delta = 0; | 2347 | struct rq *this_rq = this_rq(); |
| 2215 | 2348 | ||
| 2216 | /* | 2349 | /* |
| 2217 | * Its got a race, we don't care... | 2350 | * If we're still before the sample window, we're done. |
| 2218 | */ | 2351 | */ |
| 2219 | if (atomic_long_read(&calc_load_tasks_idle)) | 2352 | if (time_before(jiffies, this_rq->calc_load_update)) |
| 2220 | delta = atomic_long_xchg(&calc_load_tasks_idle, 0); | 2353 | return; |
| 2354 | |||
| 2355 | /* | ||
| 2356 | * We woke inside or after the sample window, this means we're already | ||
| 2357 | * accounted through the nohz accounting, so skip the entire deal and | ||
| 2358 | * sync up for the next window. | ||
| 2359 | */ | ||
| 2360 | this_rq->calc_load_update = calc_load_update; | ||
| 2361 | if (time_before(jiffies, this_rq->calc_load_update + 10)) | ||
| 2362 | this_rq->calc_load_update += LOAD_FREQ; | ||
| 2363 | } | ||
| 2364 | |||
| 2365 | static long calc_load_fold_idle(void) | ||
| 2366 | { | ||
| 2367 | int idx = calc_load_read_idx(); | ||
| 2368 | long delta = 0; | ||
| 2369 | |||
| 2370 | if (atomic_long_read(&calc_load_idle[idx])) | ||
| 2371 | delta = atomic_long_xchg(&calc_load_idle[idx], 0); | ||
| 2221 | 2372 | ||
| 2222 | return delta; | 2373 | return delta; |
| 2223 | } | 2374 | } |
| @@ -2303,66 +2454,39 @@ static void calc_global_nohz(void) | |||
| 2303 | { | 2454 | { |
| 2304 | long delta, active, n; | 2455 | long delta, active, n; |
| 2305 | 2456 | ||
| 2306 | /* | 2457 | if (!time_before(jiffies, calc_load_update + 10)) { |
| 2307 | * If we crossed a calc_load_update boundary, make sure to fold | 2458 | /* |
| 2308 | * any pending idle changes, the respective CPUs might have | 2459 | * Catch-up, fold however many we are behind still |
| 2309 | * missed the tick driven calc_load_account_active() update | 2460 | */ |
| 2310 | * due to NO_HZ. | 2461 | delta = jiffies - calc_load_update - 10; |
| 2311 | */ | 2462 | n = 1 + (delta / LOAD_FREQ); |
| 2312 | delta = calc_load_fold_idle(); | ||
| 2313 | if (delta) | ||
| 2314 | atomic_long_add(delta, &calc_load_tasks); | ||
| 2315 | |||
| 2316 | /* | ||
| 2317 | * It could be the one fold was all it took, we done! | ||
| 2318 | */ | ||
| 2319 | if (time_before(jiffies, calc_load_update + 10)) | ||
| 2320 | return; | ||
| 2321 | |||
| 2322 | /* | ||
| 2323 | * Catch-up, fold however many we are behind still | ||
| 2324 | */ | ||
| 2325 | delta = jiffies - calc_load_update - 10; | ||
| 2326 | n = 1 + (delta / LOAD_FREQ); | ||
| 2327 | 2463 | ||
| 2328 | active = atomic_long_read(&calc_load_tasks); | 2464 | active = atomic_long_read(&calc_load_tasks); |
| 2329 | active = active > 0 ? active * FIXED_1 : 0; | 2465 | active = active > 0 ? active * FIXED_1 : 0; |
| 2330 | 2466 | ||
| 2331 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | 2467 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); |
| 2332 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | 2468 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); |
| 2333 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | 2469 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); |
| 2334 | 2470 | ||
| 2335 | calc_load_update += n * LOAD_FREQ; | 2471 | calc_load_update += n * LOAD_FREQ; |
| 2336 | } | 2472 | } |
| 2337 | #else | ||
| 2338 | void calc_load_account_idle(struct rq *this_rq) | ||
| 2339 | { | ||
| 2340 | } | ||
| 2341 | 2473 | ||
| 2342 | static inline long calc_load_fold_idle(void) | 2474 | /* |
| 2343 | { | 2475 | * Flip the idle index... |
| 2344 | return 0; | 2476 | * |
| 2477 | * Make sure we first write the new time then flip the index, so that | ||
| 2478 | * calc_load_write_idx() will see the new time when it reads the new | ||
| 2479 | * index, this avoids a double flip messing things up. | ||
| 2480 | */ | ||
| 2481 | smp_wmb(); | ||
| 2482 | calc_load_idx++; | ||
| 2345 | } | 2483 | } |
| 2484 | #else /* !CONFIG_NO_HZ */ | ||
| 2346 | 2485 | ||
| 2347 | static void calc_global_nohz(void) | 2486 | static inline long calc_load_fold_idle(void) { return 0; } |
| 2348 | { | 2487 | static inline void calc_global_nohz(void) { } |
| 2349 | } | ||
| 2350 | #endif | ||
| 2351 | 2488 | ||
| 2352 | /** | 2489 | #endif /* CONFIG_NO_HZ */ |
| 2353 | * get_avenrun - get the load average array | ||
| 2354 | * @loads: pointer to dest load array | ||
| 2355 | * @offset: offset to add | ||
| 2356 | * @shift: shift count to shift the result left | ||
| 2357 | * | ||
| 2358 | * These values are estimates at best, so no need for locking. | ||
| 2359 | */ | ||
| 2360 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
| 2361 | { | ||
| 2362 | loads[0] = (avenrun[0] + offset) << shift; | ||
| 2363 | loads[1] = (avenrun[1] + offset) << shift; | ||
| 2364 | loads[2] = (avenrun[2] + offset) << shift; | ||
| 2365 | } | ||
| 2366 | 2490 | ||
| 2367 | /* | 2491 | /* |
| 2368 | * calc_load - update the avenrun load estimates 10 ticks after the | 2492 | * calc_load - update the avenrun load estimates 10 ticks after the |
| @@ -2370,11 +2494,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
| 2370 | */ | 2494 | */ |
| 2371 | void calc_global_load(unsigned long ticks) | 2495 | void calc_global_load(unsigned long ticks) |
| 2372 | { | 2496 | { |
| 2373 | long active; | 2497 | long active, delta; |
| 2374 | 2498 | ||
| 2375 | if (time_before(jiffies, calc_load_update + 10)) | 2499 | if (time_before(jiffies, calc_load_update + 10)) |
| 2376 | return; | 2500 | return; |
| 2377 | 2501 | ||
| 2502 | /* | ||
| 2503 | * Fold the 'old' idle-delta to include all NO_HZ cpus. | ||
| 2504 | */ | ||
| 2505 | delta = calc_load_fold_idle(); | ||
| 2506 | if (delta) | ||
| 2507 | atomic_long_add(delta, &calc_load_tasks); | ||
| 2508 | |||
| 2378 | active = atomic_long_read(&calc_load_tasks); | 2509 | active = atomic_long_read(&calc_load_tasks); |
| 2379 | active = active > 0 ? active * FIXED_1 : 0; | 2510 | active = active > 0 ? active * FIXED_1 : 0; |
| 2380 | 2511 | ||
| @@ -2385,12 +2516,7 @@ void calc_global_load(unsigned long ticks) | |||
| 2385 | calc_load_update += LOAD_FREQ; | 2516 | calc_load_update += LOAD_FREQ; |
| 2386 | 2517 | ||
| 2387 | /* | 2518 | /* |
| 2388 | * Account one period with whatever state we found before | 2519 | * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. |
| 2389 | * folding in the nohz state and ageing the entire idle period. | ||
| 2390 | * | ||
| 2391 | * This avoids loosing a sample when we go idle between | ||
| 2392 | * calc_load_account_active() (10 ticks ago) and now and thus | ||
| 2393 | * under-accounting. | ||
| 2394 | */ | 2520 | */ |
| 2395 | calc_global_nohz(); | 2521 | calc_global_nohz(); |
| 2396 | } | 2522 | } |
| @@ -2407,7 +2533,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
| 2407 | return; | 2533 | return; |
| 2408 | 2534 | ||
| 2409 | delta = calc_load_fold_active(this_rq); | 2535 | delta = calc_load_fold_active(this_rq); |
| 2410 | delta += calc_load_fold_idle(); | ||
| 2411 | if (delta) | 2536 | if (delta) |
| 2412 | atomic_long_add(delta, &calc_load_tasks); | 2537 | atomic_long_add(delta, &calc_load_tasks); |
| 2413 | 2538 | ||
| @@ -2415,6 +2540,10 @@ static void calc_load_account_active(struct rq *this_rq) | |||
| 2415 | } | 2540 | } |
| 2416 | 2541 | ||
| 2417 | /* | 2542 | /* |
| 2543 | * End of global load-average stuff | ||
| 2544 | */ | ||
| 2545 | |||
| 2546 | /* | ||
| 2418 | * The exact cpuload at various idx values, calculated at every tick would be | 2547 | * The exact cpuload at various idx values, calculated at every tick would be |
| 2419 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | 2548 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load |
| 2420 | * | 2549 | * |
| @@ -2517,25 +2646,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | |||
| 2517 | sched_avg_update(this_rq); | 2646 | sched_avg_update(this_rq); |
| 2518 | } | 2647 | } |
| 2519 | 2648 | ||
| 2649 | #ifdef CONFIG_NO_HZ | ||
| 2650 | /* | ||
| 2651 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
| 2652 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
| 2653 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
| 2654 | * | ||
| 2655 | * Therefore we cannot use the delta approach from the regular tick since that | ||
| 2656 | * would seriously skew the load calculation. However we'll make do for those | ||
| 2657 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
| 2658 | * (tick_nohz_idle_exit). | ||
| 2659 | * | ||
| 2660 | * This means we might still be one tick off for nohz periods. | ||
| 2661 | */ | ||
| 2662 | |||
| 2520 | /* | 2663 | /* |
| 2521 | * Called from nohz_idle_balance() to update the load ratings before doing the | 2664 | * Called from nohz_idle_balance() to update the load ratings before doing the |
| 2522 | * idle balance. | 2665 | * idle balance. |
| 2523 | */ | 2666 | */ |
| 2524 | void update_idle_cpu_load(struct rq *this_rq) | 2667 | void update_idle_cpu_load(struct rq *this_rq) |
| 2525 | { | 2668 | { |
| 2526 | unsigned long curr_jiffies = jiffies; | 2669 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); |
| 2527 | unsigned long load = this_rq->load.weight; | 2670 | unsigned long load = this_rq->load.weight; |
| 2528 | unsigned long pending_updates; | 2671 | unsigned long pending_updates; |
| 2529 | 2672 | ||
| 2530 | /* | 2673 | /* |
| 2531 | * Bloody broken means of dealing with nohz, but better than nothing.. | 2674 | * bail if there's load or we're actually up-to-date. |
| 2532 | * jiffies is updated by one cpu, another cpu can drift wrt the jiffy | ||
| 2533 | * update and see 0 difference the one time and 2 the next, even though | ||
| 2534 | * we ticked at roughtly the same rate. | ||
| 2535 | * | ||
| 2536 | * Hence we only use this from nohz_idle_balance() and skip this | ||
| 2537 | * nonsense when called from the scheduler_tick() since that's | ||
| 2538 | * guaranteed a stable rate. | ||
| 2539 | */ | 2675 | */ |
| 2540 | if (load || curr_jiffies == this_rq->last_load_update_tick) | 2676 | if (load || curr_jiffies == this_rq->last_load_update_tick) |
| 2541 | return; | 2677 | return; |
| @@ -2547,12 +2683,38 @@ void update_idle_cpu_load(struct rq *this_rq) | |||
| 2547 | } | 2683 | } |
| 2548 | 2684 | ||
| 2549 | /* | 2685 | /* |
| 2686 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
| 2687 | */ | ||
| 2688 | void update_cpu_load_nohz(void) | ||
| 2689 | { | ||
| 2690 | struct rq *this_rq = this_rq(); | ||
| 2691 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
| 2692 | unsigned long pending_updates; | ||
| 2693 | |||
| 2694 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
| 2695 | return; | ||
| 2696 | |||
| 2697 | raw_spin_lock(&this_rq->lock); | ||
| 2698 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
| 2699 | if (pending_updates) { | ||
| 2700 | this_rq->last_load_update_tick = curr_jiffies; | ||
| 2701 | /* | ||
| 2702 | * We were idle, this means load 0, the current load might be | ||
| 2703 | * !0 due to remote wakeups and the sort. | ||
| 2704 | */ | ||
| 2705 | __update_cpu_load(this_rq, 0, pending_updates); | ||
| 2706 | } | ||
| 2707 | raw_spin_unlock(&this_rq->lock); | ||
| 2708 | } | ||
| 2709 | #endif /* CONFIG_NO_HZ */ | ||
| 2710 | |||
| 2711 | /* | ||
| 2550 | * Called from scheduler_tick() | 2712 | * Called from scheduler_tick() |
| 2551 | */ | 2713 | */ |
| 2552 | static void update_cpu_load_active(struct rq *this_rq) | 2714 | static void update_cpu_load_active(struct rq *this_rq) |
| 2553 | { | 2715 | { |
| 2554 | /* | 2716 | /* |
| 2555 | * See the mess in update_idle_cpu_load(). | 2717 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). |
| 2556 | */ | 2718 | */ |
| 2557 | this_rq->last_load_update_tick = jiffies; | 2719 | this_rq->last_load_update_tick = jiffies; |
| 2558 | __update_cpu_load(this_rq, this_rq->load.weight, 1); | 2720 | __update_cpu_load(this_rq, this_rq->load.weight, 1); |
| @@ -4982,7 +5144,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |||
| 4982 | p->sched_class->set_cpus_allowed(p, new_mask); | 5144 | p->sched_class->set_cpus_allowed(p, new_mask); |
| 4983 | 5145 | ||
| 4984 | cpumask_copy(&p->cpus_allowed, new_mask); | 5146 | cpumask_copy(&p->cpus_allowed, new_mask); |
| 4985 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | 5147 | p->nr_cpus_allowed = cpumask_weight(new_mask); |
| 4986 | } | 5148 | } |
| 4987 | 5149 | ||
| 4988 | /* | 5150 | /* |
| @@ -5524,15 +5686,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ | |||
| 5524 | 5686 | ||
| 5525 | #ifdef CONFIG_SCHED_DEBUG | 5687 | #ifdef CONFIG_SCHED_DEBUG |
| 5526 | 5688 | ||
| 5527 | static __read_mostly int sched_domain_debug_enabled; | 5689 | static __read_mostly int sched_debug_enabled; |
| 5528 | 5690 | ||
| 5529 | static int __init sched_domain_debug_setup(char *str) | 5691 | static int __init sched_debug_setup(char *str) |
| 5530 | { | 5692 | { |
| 5531 | sched_domain_debug_enabled = 1; | 5693 | sched_debug_enabled = 1; |
| 5532 | 5694 | ||
| 5533 | return 0; | 5695 | return 0; |
| 5534 | } | 5696 | } |
| 5535 | early_param("sched_debug", sched_domain_debug_setup); | 5697 | early_param("sched_debug", sched_debug_setup); |
| 5698 | |||
| 5699 | static inline bool sched_debug(void) | ||
| 5700 | { | ||
| 5701 | return sched_debug_enabled; | ||
| 5702 | } | ||
| 5536 | 5703 | ||
| 5537 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 5704 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
| 5538 | struct cpumask *groupmask) | 5705 | struct cpumask *groupmask) |
| @@ -5572,7 +5739,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5572 | break; | 5739 | break; |
| 5573 | } | 5740 | } |
| 5574 | 5741 | ||
| 5575 | if (!group->sgp->power) { | 5742 | /* |
| 5743 | * Even though we initialize ->power to something semi-sane, | ||
| 5744 | * we leave power_orig unset. This allows us to detect if | ||
| 5745 | * domain iteration is still funny without causing /0 traps. | ||
| 5746 | */ | ||
| 5747 | if (!group->sgp->power_orig) { | ||
| 5576 | printk(KERN_CONT "\n"); | 5748 | printk(KERN_CONT "\n"); |
| 5577 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 5749 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
| 5578 | "set\n"); | 5750 | "set\n"); |
| @@ -5620,7 +5792,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
| 5620 | { | 5792 | { |
| 5621 | int level = 0; | 5793 | int level = 0; |
| 5622 | 5794 | ||
| 5623 | if (!sched_domain_debug_enabled) | 5795 | if (!sched_debug_enabled) |
| 5624 | return; | 5796 | return; |
| 5625 | 5797 | ||
| 5626 | if (!sd) { | 5798 | if (!sd) { |
| @@ -5641,6 +5813,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
| 5641 | } | 5813 | } |
| 5642 | #else /* !CONFIG_SCHED_DEBUG */ | 5814 | #else /* !CONFIG_SCHED_DEBUG */ |
| 5643 | # define sched_domain_debug(sd, cpu) do { } while (0) | 5815 | # define sched_domain_debug(sd, cpu) do { } while (0) |
| 5816 | static inline bool sched_debug(void) | ||
| 5817 | { | ||
| 5818 | return false; | ||
| 5819 | } | ||
| 5644 | #endif /* CONFIG_SCHED_DEBUG */ | 5820 | #endif /* CONFIG_SCHED_DEBUG */ |
| 5645 | 5821 | ||
| 5646 | static int sd_degenerate(struct sched_domain *sd) | 5822 | static int sd_degenerate(struct sched_domain *sd) |
| @@ -5962,6 +6138,44 @@ struct sched_domain_topology_level { | |||
| 5962 | struct sd_data data; | 6138 | struct sd_data data; |
| 5963 | }; | 6139 | }; |
| 5964 | 6140 | ||
| 6141 | /* | ||
| 6142 | * Build an iteration mask that can exclude certain CPUs from the upwards | ||
| 6143 | * domain traversal. | ||
| 6144 | * | ||
| 6145 | * Asymmetric node setups can result in situations where the domain tree is of | ||
| 6146 | * unequal depth, make sure to skip domains that already cover the entire | ||
| 6147 | * range. | ||
| 6148 | * | ||
| 6149 | * In that case build_sched_domains() will have terminated the iteration early | ||
| 6150 | * and our sibling sd spans will be empty. Domains should always include the | ||
| 6151 | * cpu they're built on, so check that. | ||
| 6152 | * | ||
| 6153 | */ | ||
| 6154 | static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) | ||
| 6155 | { | ||
| 6156 | const struct cpumask *span = sched_domain_span(sd); | ||
| 6157 | struct sd_data *sdd = sd->private; | ||
| 6158 | struct sched_domain *sibling; | ||
| 6159 | int i; | ||
| 6160 | |||
| 6161 | for_each_cpu(i, span) { | ||
| 6162 | sibling = *per_cpu_ptr(sdd->sd, i); | ||
| 6163 | if (!cpumask_test_cpu(i, sched_domain_span(sibling))) | ||
| 6164 | continue; | ||
| 6165 | |||
| 6166 | cpumask_set_cpu(i, sched_group_mask(sg)); | ||
| 6167 | } | ||
| 6168 | } | ||
| 6169 | |||
| 6170 | /* | ||
| 6171 | * Return the canonical balance cpu for this group, this is the first cpu | ||
| 6172 | * of this group that's also in the iteration mask. | ||
| 6173 | */ | ||
| 6174 | int group_balance_cpu(struct sched_group *sg) | ||
| 6175 | { | ||
| 6176 | return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg)); | ||
| 6177 | } | ||
| 6178 | |||
| 5965 | static int | 6179 | static int |
| 5966 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) | 6180 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) |
| 5967 | { | 6181 | { |
| @@ -5980,6 +6194,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5980 | if (cpumask_test_cpu(i, covered)) | 6194 | if (cpumask_test_cpu(i, covered)) |
| 5981 | continue; | 6195 | continue; |
| 5982 | 6196 | ||
| 6197 | child = *per_cpu_ptr(sdd->sd, i); | ||
| 6198 | |||
| 6199 | /* See the comment near build_group_mask(). */ | ||
| 6200 | if (!cpumask_test_cpu(i, sched_domain_span(child))) | ||
| 6201 | continue; | ||
| 6202 | |||
| 5983 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | 6203 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), |
| 5984 | GFP_KERNEL, cpu_to_node(cpu)); | 6204 | GFP_KERNEL, cpu_to_node(cpu)); |
| 5985 | 6205 | ||
| @@ -5987,8 +6207,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5987 | goto fail; | 6207 | goto fail; |
| 5988 | 6208 | ||
| 5989 | sg_span = sched_group_cpus(sg); | 6209 | sg_span = sched_group_cpus(sg); |
| 5990 | |||
| 5991 | child = *per_cpu_ptr(sdd->sd, i); | ||
| 5992 | if (child->child) { | 6210 | if (child->child) { |
| 5993 | child = child->child; | 6211 | child = child->child; |
| 5994 | cpumask_copy(sg_span, sched_domain_span(child)); | 6212 | cpumask_copy(sg_span, sched_domain_span(child)); |
| @@ -5997,10 +6215,24 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5997 | 6215 | ||
| 5998 | cpumask_or(covered, covered, sg_span); | 6216 | cpumask_or(covered, covered, sg_span); |
| 5999 | 6217 | ||
| 6000 | sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); | 6218 | sg->sgp = *per_cpu_ptr(sdd->sgp, i); |
| 6001 | atomic_inc(&sg->sgp->ref); | 6219 | if (atomic_inc_return(&sg->sgp->ref) == 1) |
| 6220 | build_group_mask(sd, sg); | ||
| 6002 | 6221 | ||
| 6003 | if (cpumask_test_cpu(cpu, sg_span)) | 6222 | /* |
| 6223 | * Initialize sgp->power such that even if we mess up the | ||
| 6224 | * domains and no possible iteration will get us here, we won't | ||
| 6225 | * die on a /0 trap. | ||
| 6226 | */ | ||
| 6227 | sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); | ||
| 6228 | |||
| 6229 | /* | ||
| 6230 | * Make sure the first group of this domain contains the | ||
| 6231 | * canonical balance cpu. Otherwise the sched_domain iteration | ||
| 6232 | * breaks. See update_sg_lb_stats(). | ||
| 6233 | */ | ||
| 6234 | if ((!groups && cpumask_test_cpu(cpu, sg_span)) || | ||
| 6235 | group_balance_cpu(sg) == cpu) | ||
| 6004 | groups = sg; | 6236 | groups = sg; |
| 6005 | 6237 | ||
| 6006 | if (!first) | 6238 | if (!first) |
| @@ -6074,6 +6306,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) | |||
| 6074 | 6306 | ||
| 6075 | cpumask_clear(sched_group_cpus(sg)); | 6307 | cpumask_clear(sched_group_cpus(sg)); |
| 6076 | sg->sgp->power = 0; | 6308 | sg->sgp->power = 0; |
| 6309 | cpumask_setall(sched_group_mask(sg)); | ||
| 6077 | 6310 | ||
| 6078 | for_each_cpu(j, span) { | 6311 | for_each_cpu(j, span) { |
| 6079 | if (get_group(j, sdd, NULL) != group) | 6312 | if (get_group(j, sdd, NULL) != group) |
| @@ -6115,7 +6348,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
| 6115 | sg = sg->next; | 6348 | sg = sg->next; |
| 6116 | } while (sg != sd->groups); | 6349 | } while (sg != sd->groups); |
| 6117 | 6350 | ||
| 6118 | if (cpu != group_first_cpu(sg)) | 6351 | if (cpu != group_balance_cpu(sg)) |
| 6119 | return; | 6352 | return; |
| 6120 | 6353 | ||
| 6121 | update_group_power(sd, cpu); | 6354 | update_group_power(sd, cpu); |
| @@ -6165,11 +6398,8 @@ int sched_domain_level_max; | |||
| 6165 | 6398 | ||
| 6166 | static int __init setup_relax_domain_level(char *str) | 6399 | static int __init setup_relax_domain_level(char *str) |
| 6167 | { | 6400 | { |
| 6168 | unsigned long val; | 6401 | if (kstrtoint(str, 0, &default_relax_domain_level)) |
| 6169 | 6402 | pr_warn("Unable to set relax_domain_level\n"); | |
| 6170 | val = simple_strtoul(str, NULL, 0); | ||
| 6171 | if (val < sched_domain_level_max) | ||
| 6172 | default_relax_domain_level = val; | ||
| 6173 | 6403 | ||
| 6174 | return 1; | 6404 | return 1; |
| 6175 | } | 6405 | } |
| @@ -6279,14 +6509,13 @@ static struct sched_domain_topology_level *sched_domain_topology = default_topol | |||
| 6279 | #ifdef CONFIG_NUMA | 6509 | #ifdef CONFIG_NUMA |
| 6280 | 6510 | ||
| 6281 | static int sched_domains_numa_levels; | 6511 | static int sched_domains_numa_levels; |
| 6282 | static int sched_domains_numa_scale; | ||
| 6283 | static int *sched_domains_numa_distance; | 6512 | static int *sched_domains_numa_distance; |
| 6284 | static struct cpumask ***sched_domains_numa_masks; | 6513 | static struct cpumask ***sched_domains_numa_masks; |
| 6285 | static int sched_domains_curr_level; | 6514 | static int sched_domains_curr_level; |
| 6286 | 6515 | ||
| 6287 | static inline int sd_local_flags(int level) | 6516 | static inline int sd_local_flags(int level) |
| 6288 | { | 6517 | { |
| 6289 | if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) | 6518 | if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) |
| 6290 | return 0; | 6519 | return 0; |
| 6291 | 6520 | ||
| 6292 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | 6521 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; |
| @@ -6344,6 +6573,42 @@ static const struct cpumask *sd_numa_mask(int cpu) | |||
| 6344 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | 6573 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; |
| 6345 | } | 6574 | } |
| 6346 | 6575 | ||
| 6576 | static void sched_numa_warn(const char *str) | ||
| 6577 | { | ||
| 6578 | static int done = false; | ||
| 6579 | int i,j; | ||
| 6580 | |||
| 6581 | if (done) | ||
| 6582 | return; | ||
| 6583 | |||
| 6584 | done = true; | ||
| 6585 | |||
| 6586 | printk(KERN_WARNING "ERROR: %s\n\n", str); | ||
| 6587 | |||
| 6588 | for (i = 0; i < nr_node_ids; i++) { | ||
| 6589 | printk(KERN_WARNING " "); | ||
| 6590 | for (j = 0; j < nr_node_ids; j++) | ||
| 6591 | printk(KERN_CONT "%02d ", node_distance(i,j)); | ||
| 6592 | printk(KERN_CONT "\n"); | ||
| 6593 | } | ||
| 6594 | printk(KERN_WARNING "\n"); | ||
| 6595 | } | ||
| 6596 | |||
| 6597 | static bool find_numa_distance(int distance) | ||
| 6598 | { | ||
| 6599 | int i; | ||
| 6600 | |||
| 6601 | if (distance == node_distance(0, 0)) | ||
| 6602 | return true; | ||
| 6603 | |||
| 6604 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
| 6605 | if (sched_domains_numa_distance[i] == distance) | ||
| 6606 | return true; | ||
| 6607 | } | ||
| 6608 | |||
| 6609 | return false; | ||
| 6610 | } | ||
| 6611 | |||
| 6347 | static void sched_init_numa(void) | 6612 | static void sched_init_numa(void) |
| 6348 | { | 6613 | { |
| 6349 | int next_distance, curr_distance = node_distance(0, 0); | 6614 | int next_distance, curr_distance = node_distance(0, 0); |
| @@ -6351,7 +6616,6 @@ static void sched_init_numa(void) | |||
| 6351 | int level = 0; | 6616 | int level = 0; |
| 6352 | int i, j, k; | 6617 | int i, j, k; |
| 6353 | 6618 | ||
| 6354 | sched_domains_numa_scale = curr_distance; | ||
| 6355 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); | 6619 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); |
| 6356 | if (!sched_domains_numa_distance) | 6620 | if (!sched_domains_numa_distance) |
| 6357 | return; | 6621 | return; |
| @@ -6362,23 +6626,41 @@ static void sched_init_numa(void) | |||
| 6362 | * | 6626 | * |
| 6363 | * Assumes node_distance(0,j) includes all distances in | 6627 | * Assumes node_distance(0,j) includes all distances in |
| 6364 | * node_distance(i,j) in order to avoid cubic time. | 6628 | * node_distance(i,j) in order to avoid cubic time. |
| 6365 | * | ||
| 6366 | * XXX: could be optimized to O(n log n) by using sort() | ||
| 6367 | */ | 6629 | */ |
| 6368 | next_distance = curr_distance; | 6630 | next_distance = curr_distance; |
| 6369 | for (i = 0; i < nr_node_ids; i++) { | 6631 | for (i = 0; i < nr_node_ids; i++) { |
| 6370 | for (j = 0; j < nr_node_ids; j++) { | 6632 | for (j = 0; j < nr_node_ids; j++) { |
| 6371 | int distance = node_distance(0, j); | 6633 | for (k = 0; k < nr_node_ids; k++) { |
| 6372 | if (distance > curr_distance && | 6634 | int distance = node_distance(i, k); |
| 6373 | (distance < next_distance || | 6635 | |
| 6374 | next_distance == curr_distance)) | 6636 | if (distance > curr_distance && |
| 6375 | next_distance = distance; | 6637 | (distance < next_distance || |
| 6638 | next_distance == curr_distance)) | ||
| 6639 | next_distance = distance; | ||
| 6640 | |||
| 6641 | /* | ||
| 6642 | * While not a strong assumption it would be nice to know | ||
| 6643 | * about cases where if node A is connected to B, B is not | ||
| 6644 | * equally connected to A. | ||
| 6645 | */ | ||
| 6646 | if (sched_debug() && node_distance(k, i) != distance) | ||
| 6647 | sched_numa_warn("Node-distance not symmetric"); | ||
| 6648 | |||
| 6649 | if (sched_debug() && i && !find_numa_distance(distance)) | ||
| 6650 | sched_numa_warn("Node-0 not representative"); | ||
| 6651 | } | ||
| 6652 | if (next_distance != curr_distance) { | ||
| 6653 | sched_domains_numa_distance[level++] = next_distance; | ||
| 6654 | sched_domains_numa_levels = level; | ||
| 6655 | curr_distance = next_distance; | ||
| 6656 | } else break; | ||
| 6376 | } | 6657 | } |
| 6377 | if (next_distance != curr_distance) { | 6658 | |
| 6378 | sched_domains_numa_distance[level++] = next_distance; | 6659 | /* |
| 6379 | sched_domains_numa_levels = level; | 6660 | * In case of sched_debug() we verify the above assumption. |
| 6380 | curr_distance = next_distance; | 6661 | */ |
| 6381 | } else break; | 6662 | if (!sched_debug()) |
| 6663 | break; | ||
| 6382 | } | 6664 | } |
| 6383 | /* | 6665 | /* |
| 6384 | * 'level' contains the number of unique distances, excluding the | 6666 | * 'level' contains the number of unique distances, excluding the |
| @@ -6403,7 +6685,7 @@ static void sched_init_numa(void) | |||
| 6403 | return; | 6685 | return; |
| 6404 | 6686 | ||
| 6405 | for (j = 0; j < nr_node_ids; j++) { | 6687 | for (j = 0; j < nr_node_ids; j++) { |
| 6406 | struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); | 6688 | struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); |
| 6407 | if (!mask) | 6689 | if (!mask) |
| 6408 | return; | 6690 | return; |
| 6409 | 6691 | ||
| @@ -6490,7 +6772,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
| 6490 | 6772 | ||
| 6491 | *per_cpu_ptr(sdd->sg, j) = sg; | 6773 | *per_cpu_ptr(sdd->sg, j) = sg; |
| 6492 | 6774 | ||
| 6493 | sgp = kzalloc_node(sizeof(struct sched_group_power), | 6775 | sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), |
| 6494 | GFP_KERNEL, cpu_to_node(j)); | 6776 | GFP_KERNEL, cpu_to_node(j)); |
| 6495 | if (!sgp) | 6777 | if (!sgp) |
| 6496 | return -ENOMEM; | 6778 | return -ENOMEM; |
| @@ -6543,7 +6825,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
| 6543 | if (!sd) | 6825 | if (!sd) |
| 6544 | return child; | 6826 | return child; |
| 6545 | 6827 | ||
| 6546 | set_domain_attribute(sd, attr); | ||
| 6547 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); | 6828 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); |
| 6548 | if (child) { | 6829 | if (child) { |
| 6549 | sd->level = child->level + 1; | 6830 | sd->level = child->level + 1; |
| @@ -6551,6 +6832,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
| 6551 | child->parent = sd; | 6832 | child->parent = sd; |
| 6552 | } | 6833 | } |
| 6553 | sd->child = child; | 6834 | sd->child = child; |
| 6835 | set_domain_attribute(sd, attr); | ||
| 6554 | 6836 | ||
| 6555 | return sd; | 6837 | return sd; |
| 6556 | } | 6838 | } |
| @@ -6691,7 +6973,6 @@ static int init_sched_domains(const struct cpumask *cpu_map) | |||
| 6691 | if (!doms_cur) | 6973 | if (!doms_cur) |
| 6692 | doms_cur = &fallback_doms; | 6974 | doms_cur = &fallback_doms; |
| 6693 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); | 6975 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
| 6694 | dattr_cur = NULL; | ||
| 6695 | err = build_sched_domains(doms_cur[0], NULL); | 6976 | err = build_sched_domains(doms_cur[0], NULL); |
| 6696 | register_sched_domain_sysctl(); | 6977 | register_sched_domain_sysctl(); |
| 6697 | 6978 | ||
