diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 511 |
1 files changed, 303 insertions, 208 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index f385eff4682d..8a0afb97af71 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -225,8 +225,10 @@ struct rq { | |||
225 | unsigned long nr_uninterruptible; | 225 | unsigned long nr_uninterruptible; |
226 | 226 | ||
227 | unsigned long expired_timestamp; | 227 | unsigned long expired_timestamp; |
228 | unsigned long long timestamp_last_tick; | 228 | /* Cached timestamp set by update_cpu_clock() */ |
229 | unsigned long long most_recent_timestamp; | ||
229 | struct task_struct *curr, *idle; | 230 | struct task_struct *curr, *idle; |
231 | unsigned long next_balance; | ||
230 | struct mm_struct *prev_mm; | 232 | struct mm_struct *prev_mm; |
231 | struct prio_array *active, *expired, arrays[2]; | 233 | struct prio_array *active, *expired, arrays[2]; |
232 | int best_expired_prio; | 234 | int best_expired_prio; |
@@ -426,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
426 | * bump this up when changing the output format or the meaning of an existing | 428 | * bump this up when changing the output format or the meaning of an existing |
427 | * format, so that tools can adapt (or abort) | 429 | * format, so that tools can adapt (or abort) |
428 | */ | 430 | */ |
429 | #define SCHEDSTAT_VERSION 12 | 431 | #define SCHEDSTAT_VERSION 14 |
430 | 432 | ||
431 | static int show_schedstat(struct seq_file *seq, void *v) | 433 | static int show_schedstat(struct seq_file *seq, void *v) |
432 | { | 434 | { |
@@ -464,7 +466,8 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
464 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | 466 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); |
465 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; | 467 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; |
466 | itype++) { | 468 | itype++) { |
467 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", | 469 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " |
470 | "%lu", | ||
468 | sd->lb_cnt[itype], | 471 | sd->lb_cnt[itype], |
469 | sd->lb_balanced[itype], | 472 | sd->lb_balanced[itype], |
470 | sd->lb_failed[itype], | 473 | sd->lb_failed[itype], |
@@ -474,11 +477,13 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
474 | sd->lb_nobusyq[itype], | 477 | sd->lb_nobusyq[itype], |
475 | sd->lb_nobusyg[itype]); | 478 | sd->lb_nobusyg[itype]); |
476 | } | 479 | } |
477 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | 480 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" |
481 | " %lu %lu %lu\n", | ||
478 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | 482 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, |
479 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | 483 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, |
480 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | 484 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, |
481 | sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); | 485 | sd->ttwu_wake_remote, sd->ttwu_move_affine, |
486 | sd->ttwu_move_balance); | ||
482 | } | 487 | } |
483 | preempt_enable(); | 488 | preempt_enable(); |
484 | #endif | 489 | #endif |
@@ -547,7 +552,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | |||
547 | #endif | 552 | #endif |
548 | 553 | ||
549 | /* | 554 | /* |
550 | * rq_lock - lock a given runqueue and disable interrupts. | 555 | * this_rq_lock - lock this runqueue and disable interrupts. |
551 | */ | 556 | */ |
552 | static inline struct rq *this_rq_lock(void) | 557 | static inline struct rq *this_rq_lock(void) |
553 | __acquires(rq->lock) | 558 | __acquires(rq->lock) |
@@ -938,13 +943,16 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
938 | { | 943 | { |
939 | unsigned long long now; | 944 | unsigned long long now; |
940 | 945 | ||
946 | if (rt_task(p)) | ||
947 | goto out; | ||
948 | |||
941 | now = sched_clock(); | 949 | now = sched_clock(); |
942 | #ifdef CONFIG_SMP | 950 | #ifdef CONFIG_SMP |
943 | if (!local) { | 951 | if (!local) { |
944 | /* Compensate for drifting sched_clock */ | 952 | /* Compensate for drifting sched_clock */ |
945 | struct rq *this_rq = this_rq(); | 953 | struct rq *this_rq = this_rq(); |
946 | now = (now - this_rq->timestamp_last_tick) | 954 | now = (now - this_rq->most_recent_timestamp) |
947 | + rq->timestamp_last_tick; | 955 | + rq->most_recent_timestamp; |
948 | } | 956 | } |
949 | #endif | 957 | #endif |
950 | 958 | ||
@@ -959,8 +967,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
959 | (now - p->timestamp) >> 20); | 967 | (now - p->timestamp) >> 20); |
960 | } | 968 | } |
961 | 969 | ||
962 | if (!rt_task(p)) | 970 | p->prio = recalc_task_prio(p, now); |
963 | p->prio = recalc_task_prio(p, now); | ||
964 | 971 | ||
965 | /* | 972 | /* |
966 | * This checks to make sure it's not an uninterruptible task | 973 | * This checks to make sure it's not an uninterruptible task |
@@ -985,7 +992,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
985 | } | 992 | } |
986 | } | 993 | } |
987 | p->timestamp = now; | 994 | p->timestamp = now; |
988 | 995 | out: | |
989 | __activate_task(p, rq); | 996 | __activate_task(p, rq); |
990 | } | 997 | } |
991 | 998 | ||
@@ -1450,7 +1457,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1450 | 1457 | ||
1451 | if (this_sd->flags & SD_WAKE_AFFINE) { | 1458 | if (this_sd->flags & SD_WAKE_AFFINE) { |
1452 | unsigned long tl = this_load; | 1459 | unsigned long tl = this_load; |
1453 | unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu); | 1460 | unsigned long tl_per_task; |
1461 | |||
1462 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1454 | 1463 | ||
1455 | /* | 1464 | /* |
1456 | * If sync wakeup then subtract the (maximum possible) | 1465 | * If sync wakeup then subtract the (maximum possible) |
@@ -1688,8 +1697,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
1688 | * Not the local CPU - must adjust timestamp. This should | 1697 | * Not the local CPU - must adjust timestamp. This should |
1689 | * get optimised away in the !CONFIG_SMP case. | 1698 | * get optimised away in the !CONFIG_SMP case. |
1690 | */ | 1699 | */ |
1691 | p->timestamp = (p->timestamp - this_rq->timestamp_last_tick) | 1700 | p->timestamp = (p->timestamp - this_rq->most_recent_timestamp) |
1692 | + rq->timestamp_last_tick; | 1701 | + rq->most_recent_timestamp; |
1693 | __activate_task(p, rq); | 1702 | __activate_task(p, rq); |
1694 | if (TASK_PREEMPTS_CURR(p, rq)) | 1703 | if (TASK_PREEMPTS_CURR(p, rq)) |
1695 | resched_task(rq->curr); | 1704 | resched_task(rq->curr); |
@@ -1952,6 +1961,7 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
1952 | __acquires(rq1->lock) | 1961 | __acquires(rq1->lock) |
1953 | __acquires(rq2->lock) | 1962 | __acquires(rq2->lock) |
1954 | { | 1963 | { |
1964 | BUG_ON(!irqs_disabled()); | ||
1955 | if (rq1 == rq2) { | 1965 | if (rq1 == rq2) { |
1956 | spin_lock(&rq1->lock); | 1966 | spin_lock(&rq1->lock); |
1957 | __acquire(rq2->lock); /* Fake it out ;) */ | 1967 | __acquire(rq2->lock); /* Fake it out ;) */ |
@@ -1991,6 +2001,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1991 | __acquires(busiest->lock) | 2001 | __acquires(busiest->lock) |
1992 | __acquires(this_rq->lock) | 2002 | __acquires(this_rq->lock) |
1993 | { | 2003 | { |
2004 | if (unlikely(!irqs_disabled())) { | ||
2005 | /* printk() doesn't work good under rq->lock */ | ||
2006 | spin_unlock(&this_rq->lock); | ||
2007 | BUG_ON(1); | ||
2008 | } | ||
1994 | if (unlikely(!spin_trylock(&busiest->lock))) { | 2009 | if (unlikely(!spin_trylock(&busiest->lock))) { |
1995 | if (busiest < this_rq) { | 2010 | if (busiest < this_rq) { |
1996 | spin_unlock(&this_rq->lock); | 2011 | spin_unlock(&this_rq->lock); |
@@ -2061,8 +2076,8 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array, | |||
2061 | set_task_cpu(p, this_cpu); | 2076 | set_task_cpu(p, this_cpu); |
2062 | inc_nr_running(p, this_rq); | 2077 | inc_nr_running(p, this_rq); |
2063 | enqueue_task(p, this_array); | 2078 | enqueue_task(p, this_array); |
2064 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 2079 | p->timestamp = (p->timestamp - src_rq->most_recent_timestamp) |
2065 | + this_rq->timestamp_last_tick; | 2080 | + this_rq->most_recent_timestamp; |
2066 | /* | 2081 | /* |
2067 | * Note that idle threads have a prio of MAX_PRIO, for this test | 2082 | * Note that idle threads have a prio of MAX_PRIO, for this test |
2068 | * to be always true for them. | 2083 | * to be always true for them. |
@@ -2098,10 +2113,15 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
2098 | * 2) too many balance attempts have failed. | 2113 | * 2) too many balance attempts have failed. |
2099 | */ | 2114 | */ |
2100 | 2115 | ||
2101 | if (sd->nr_balance_failed > sd->cache_nice_tries) | 2116 | if (sd->nr_balance_failed > sd->cache_nice_tries) { |
2117 | #ifdef CONFIG_SCHEDSTATS | ||
2118 | if (task_hot(p, rq->most_recent_timestamp, sd)) | ||
2119 | schedstat_inc(sd, lb_hot_gained[idle]); | ||
2120 | #endif | ||
2102 | return 1; | 2121 | return 1; |
2122 | } | ||
2103 | 2123 | ||
2104 | if (task_hot(p, rq->timestamp_last_tick, sd)) | 2124 | if (task_hot(p, rq->most_recent_timestamp, sd)) |
2105 | return 0; | 2125 | return 0; |
2106 | return 1; | 2126 | return 1; |
2107 | } | 2127 | } |
@@ -2199,11 +2219,6 @@ skip_queue: | |||
2199 | goto skip_bitmap; | 2219 | goto skip_bitmap; |
2200 | } | 2220 | } |
2201 | 2221 | ||
2202 | #ifdef CONFIG_SCHEDSTATS | ||
2203 | if (task_hot(tmp, busiest->timestamp_last_tick, sd)) | ||
2204 | schedstat_inc(sd, lb_hot_gained[idle]); | ||
2205 | #endif | ||
2206 | |||
2207 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); | 2222 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); |
2208 | pulled++; | 2223 | pulled++; |
2209 | rem_load_move -= tmp->load_weight; | 2224 | rem_load_move -= tmp->load_weight; |
@@ -2241,7 +2256,7 @@ out: | |||
2241 | static struct sched_group * | 2256 | static struct sched_group * |
2242 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2257 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
2243 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, | 2258 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, |
2244 | cpumask_t *cpus) | 2259 | cpumask_t *cpus, int *balance) |
2245 | { | 2260 | { |
2246 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2261 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
2247 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2262 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
@@ -2270,10 +2285,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2270 | unsigned long load, group_capacity; | 2285 | unsigned long load, group_capacity; |
2271 | int local_group; | 2286 | int local_group; |
2272 | int i; | 2287 | int i; |
2288 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
2273 | unsigned long sum_nr_running, sum_weighted_load; | 2289 | unsigned long sum_nr_running, sum_weighted_load; |
2274 | 2290 | ||
2275 | local_group = cpu_isset(this_cpu, group->cpumask); | 2291 | local_group = cpu_isset(this_cpu, group->cpumask); |
2276 | 2292 | ||
2293 | if (local_group) | ||
2294 | balance_cpu = first_cpu(group->cpumask); | ||
2295 | |||
2277 | /* Tally up the load of all CPUs in the group */ | 2296 | /* Tally up the load of all CPUs in the group */ |
2278 | sum_weighted_load = sum_nr_running = avg_load = 0; | 2297 | sum_weighted_load = sum_nr_running = avg_load = 0; |
2279 | 2298 | ||
@@ -2289,9 +2308,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2289 | *sd_idle = 0; | 2308 | *sd_idle = 0; |
2290 | 2309 | ||
2291 | /* Bias balancing toward cpus of our domain */ | 2310 | /* Bias balancing toward cpus of our domain */ |
2292 | if (local_group) | 2311 | if (local_group) { |
2312 | if (idle_cpu(i) && !first_idle_cpu) { | ||
2313 | first_idle_cpu = 1; | ||
2314 | balance_cpu = i; | ||
2315 | } | ||
2316 | |||
2293 | load = target_load(i, load_idx); | 2317 | load = target_load(i, load_idx); |
2294 | else | 2318 | } else |
2295 | load = source_load(i, load_idx); | 2319 | load = source_load(i, load_idx); |
2296 | 2320 | ||
2297 | avg_load += load; | 2321 | avg_load += load; |
@@ -2299,6 +2323,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2299 | sum_weighted_load += rq->raw_weighted_load; | 2323 | sum_weighted_load += rq->raw_weighted_load; |
2300 | } | 2324 | } |
2301 | 2325 | ||
2326 | /* | ||
2327 | * First idle cpu or the first cpu(busiest) in this sched group | ||
2328 | * is eligible for doing load balancing at this and above | ||
2329 | * domains. | ||
2330 | */ | ||
2331 | if (local_group && balance_cpu != this_cpu && balance) { | ||
2332 | *balance = 0; | ||
2333 | goto ret; | ||
2334 | } | ||
2335 | |||
2302 | total_load += avg_load; | 2336 | total_load += avg_load; |
2303 | total_pwr += group->cpu_power; | 2337 | total_pwr += group->cpu_power; |
2304 | 2338 | ||
@@ -2458,18 +2492,21 @@ small_imbalance: | |||
2458 | pwr_now /= SCHED_LOAD_SCALE; | 2492 | pwr_now /= SCHED_LOAD_SCALE; |
2459 | 2493 | ||
2460 | /* Amount of load we'd subtract */ | 2494 | /* Amount of load we'd subtract */ |
2461 | tmp = busiest_load_per_task*SCHED_LOAD_SCALE/busiest->cpu_power; | 2495 | tmp = busiest_load_per_task * SCHED_LOAD_SCALE / |
2496 | busiest->cpu_power; | ||
2462 | if (max_load > tmp) | 2497 | if (max_load > tmp) |
2463 | pwr_move += busiest->cpu_power * | 2498 | pwr_move += busiest->cpu_power * |
2464 | min(busiest_load_per_task, max_load - tmp); | 2499 | min(busiest_load_per_task, max_load - tmp); |
2465 | 2500 | ||
2466 | /* Amount of load we'd add */ | 2501 | /* Amount of load we'd add */ |
2467 | if (max_load*busiest->cpu_power < | 2502 | if (max_load * busiest->cpu_power < |
2468 | busiest_load_per_task*SCHED_LOAD_SCALE) | 2503 | busiest_load_per_task * SCHED_LOAD_SCALE) |
2469 | tmp = max_load*busiest->cpu_power/this->cpu_power; | 2504 | tmp = max_load * busiest->cpu_power / this->cpu_power; |
2470 | else | 2505 | else |
2471 | tmp = busiest_load_per_task*SCHED_LOAD_SCALE/this->cpu_power; | 2506 | tmp = busiest_load_per_task * SCHED_LOAD_SCALE / |
2472 | pwr_move += this->cpu_power*min(this_load_per_task, this_load + tmp); | 2507 | this->cpu_power; |
2508 | pwr_move += this->cpu_power * | ||
2509 | min(this_load_per_task, this_load + tmp); | ||
2473 | pwr_move /= SCHED_LOAD_SCALE; | 2510 | pwr_move /= SCHED_LOAD_SCALE; |
2474 | 2511 | ||
2475 | /* Move if we gain throughput */ | 2512 | /* Move if we gain throughput */ |
@@ -2490,8 +2527,8 @@ out_balanced: | |||
2490 | *imbalance = min_load_per_task; | 2527 | *imbalance = min_load_per_task; |
2491 | return group_min; | 2528 | return group_min; |
2492 | } | 2529 | } |
2493 | ret: | ||
2494 | #endif | 2530 | #endif |
2531 | ret: | ||
2495 | *imbalance = 0; | 2532 | *imbalance = 0; |
2496 | return NULL; | 2533 | return NULL; |
2497 | } | 2534 | } |
@@ -2540,17 +2577,17 @@ static inline unsigned long minus_1_or_zero(unsigned long n) | |||
2540 | /* | 2577 | /* |
2541 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2578 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
2542 | * tasks if there is an imbalance. | 2579 | * tasks if there is an imbalance. |
2543 | * | ||
2544 | * Called with this_rq unlocked. | ||
2545 | */ | 2580 | */ |
2546 | static int load_balance(int this_cpu, struct rq *this_rq, | 2581 | static int load_balance(int this_cpu, struct rq *this_rq, |
2547 | struct sched_domain *sd, enum idle_type idle) | 2582 | struct sched_domain *sd, enum idle_type idle, |
2583 | int *balance) | ||
2548 | { | 2584 | { |
2549 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 2585 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
2550 | struct sched_group *group; | 2586 | struct sched_group *group; |
2551 | unsigned long imbalance; | 2587 | unsigned long imbalance; |
2552 | struct rq *busiest; | 2588 | struct rq *busiest; |
2553 | cpumask_t cpus = CPU_MASK_ALL; | 2589 | cpumask_t cpus = CPU_MASK_ALL; |
2590 | unsigned long flags; | ||
2554 | 2591 | ||
2555 | /* | 2592 | /* |
2556 | * When power savings policy is enabled for the parent domain, idle | 2593 | * When power savings policy is enabled for the parent domain, idle |
@@ -2566,7 +2603,11 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2566 | 2603 | ||
2567 | redo: | 2604 | redo: |
2568 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 2605 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
2569 | &cpus); | 2606 | &cpus, balance); |
2607 | |||
2608 | if (*balance == 0) | ||
2609 | goto out_balanced; | ||
2610 | |||
2570 | if (!group) { | 2611 | if (!group) { |
2571 | schedstat_inc(sd, lb_nobusyg[idle]); | 2612 | schedstat_inc(sd, lb_nobusyg[idle]); |
2572 | goto out_balanced; | 2613 | goto out_balanced; |
@@ -2590,11 +2631,13 @@ redo: | |||
2590 | * still unbalanced. nr_moved simply stays zero, so it is | 2631 | * still unbalanced. nr_moved simply stays zero, so it is |
2591 | * correctly treated as an imbalance. | 2632 | * correctly treated as an imbalance. |
2592 | */ | 2633 | */ |
2634 | local_irq_save(flags); | ||
2593 | double_rq_lock(this_rq, busiest); | 2635 | double_rq_lock(this_rq, busiest); |
2594 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2636 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
2595 | minus_1_or_zero(busiest->nr_running), | 2637 | minus_1_or_zero(busiest->nr_running), |
2596 | imbalance, sd, idle, &all_pinned); | 2638 | imbalance, sd, idle, &all_pinned); |
2597 | double_rq_unlock(this_rq, busiest); | 2639 | double_rq_unlock(this_rq, busiest); |
2640 | local_irq_restore(flags); | ||
2598 | 2641 | ||
2599 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2642 | /* All tasks on this runqueue were pinned by CPU affinity */ |
2600 | if (unlikely(all_pinned)) { | 2643 | if (unlikely(all_pinned)) { |
@@ -2611,13 +2654,13 @@ redo: | |||
2611 | 2654 | ||
2612 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 2655 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
2613 | 2656 | ||
2614 | spin_lock(&busiest->lock); | 2657 | spin_lock_irqsave(&busiest->lock, flags); |
2615 | 2658 | ||
2616 | /* don't kick the migration_thread, if the curr | 2659 | /* don't kick the migration_thread, if the curr |
2617 | * task on busiest cpu can't be moved to this_cpu | 2660 | * task on busiest cpu can't be moved to this_cpu |
2618 | */ | 2661 | */ |
2619 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { | 2662 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { |
2620 | spin_unlock(&busiest->lock); | 2663 | spin_unlock_irqrestore(&busiest->lock, flags); |
2621 | all_pinned = 1; | 2664 | all_pinned = 1; |
2622 | goto out_one_pinned; | 2665 | goto out_one_pinned; |
2623 | } | 2666 | } |
@@ -2627,7 +2670,7 @@ redo: | |||
2627 | busiest->push_cpu = this_cpu; | 2670 | busiest->push_cpu = this_cpu; |
2628 | active_balance = 1; | 2671 | active_balance = 1; |
2629 | } | 2672 | } |
2630 | spin_unlock(&busiest->lock); | 2673 | spin_unlock_irqrestore(&busiest->lock, flags); |
2631 | if (active_balance) | 2674 | if (active_balance) |
2632 | wake_up_process(busiest->migration_thread); | 2675 | wake_up_process(busiest->migration_thread); |
2633 | 2676 | ||
@@ -2706,7 +2749,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2706 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2749 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
2707 | redo: | 2750 | redo: |
2708 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | 2751 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, |
2709 | &sd_idle, &cpus); | 2752 | &sd_idle, &cpus, NULL); |
2710 | if (!group) { | 2753 | if (!group) { |
2711 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2754 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
2712 | goto out_balanced; | 2755 | goto out_balanced; |
@@ -2766,14 +2809,28 @@ out_balanced: | |||
2766 | static void idle_balance(int this_cpu, struct rq *this_rq) | 2809 | static void idle_balance(int this_cpu, struct rq *this_rq) |
2767 | { | 2810 | { |
2768 | struct sched_domain *sd; | 2811 | struct sched_domain *sd; |
2812 | int pulled_task = 0; | ||
2813 | unsigned long next_balance = jiffies + 60 * HZ; | ||
2769 | 2814 | ||
2770 | for_each_domain(this_cpu, sd) { | 2815 | for_each_domain(this_cpu, sd) { |
2771 | if (sd->flags & SD_BALANCE_NEWIDLE) { | 2816 | if (sd->flags & SD_BALANCE_NEWIDLE) { |
2772 | /* If we've pulled tasks over stop searching: */ | 2817 | /* If we've pulled tasks over stop searching: */ |
2773 | if (load_balance_newidle(this_cpu, this_rq, sd)) | 2818 | pulled_task = load_balance_newidle(this_cpu, |
2819 | this_rq, sd); | ||
2820 | if (time_after(next_balance, | ||
2821 | sd->last_balance + sd->balance_interval)) | ||
2822 | next_balance = sd->last_balance | ||
2823 | + sd->balance_interval; | ||
2824 | if (pulled_task) | ||
2774 | break; | 2825 | break; |
2775 | } | 2826 | } |
2776 | } | 2827 | } |
2828 | if (!pulled_task) | ||
2829 | /* | ||
2830 | * We are going idle. next_balance may be set based on | ||
2831 | * a busy processor. So reset next_balance. | ||
2832 | */ | ||
2833 | this_rq->next_balance = next_balance; | ||
2777 | } | 2834 | } |
2778 | 2835 | ||
2779 | /* | 2836 | /* |
@@ -2826,26 +2883,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
2826 | spin_unlock(&target_rq->lock); | 2883 | spin_unlock(&target_rq->lock); |
2827 | } | 2884 | } |
2828 | 2885 | ||
2829 | /* | 2886 | static void update_load(struct rq *this_rq) |
2830 | * rebalance_tick will get called every timer tick, on every CPU. | ||
2831 | * | ||
2832 | * It checks each scheduling domain to see if it is due to be balanced, | ||
2833 | * and initiates a balancing operation if so. | ||
2834 | * | ||
2835 | * Balancing parameters are set up in arch_init_sched_domains. | ||
2836 | */ | ||
2837 | |||
2838 | /* Don't have all balancing operations going off at once: */ | ||
2839 | static inline unsigned long cpu_offset(int cpu) | ||
2840 | { | 2887 | { |
2841 | return jiffies + cpu * HZ / NR_CPUS; | 2888 | unsigned long this_load; |
2842 | } | ||
2843 | |||
2844 | static void | ||
2845 | rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | ||
2846 | { | ||
2847 | unsigned long this_load, interval, j = cpu_offset(this_cpu); | ||
2848 | struct sched_domain *sd; | ||
2849 | int i, scale; | 2889 | int i, scale; |
2850 | 2890 | ||
2851 | this_load = this_rq->raw_weighted_load; | 2891 | this_load = this_rq->raw_weighted_load; |
@@ -2865,6 +2905,32 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | |||
2865 | new_load += scale-1; | 2905 | new_load += scale-1; |
2866 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; | 2906 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; |
2867 | } | 2907 | } |
2908 | } | ||
2909 | |||
2910 | /* | ||
2911 | * run_rebalance_domains is triggered when needed from the scheduler tick. | ||
2912 | * | ||
2913 | * It checks each scheduling domain to see if it is due to be balanced, | ||
2914 | * and initiates a balancing operation if so. | ||
2915 | * | ||
2916 | * Balancing parameters are set up in arch_init_sched_domains. | ||
2917 | */ | ||
2918 | static DEFINE_SPINLOCK(balancing); | ||
2919 | |||
2920 | static void run_rebalance_domains(struct softirq_action *h) | ||
2921 | { | ||
2922 | int this_cpu = smp_processor_id(), balance = 1; | ||
2923 | struct rq *this_rq = cpu_rq(this_cpu); | ||
2924 | unsigned long interval; | ||
2925 | struct sched_domain *sd; | ||
2926 | /* | ||
2927 | * We are idle if there are no processes running. This | ||
2928 | * is valid even if we are the idle process (SMT). | ||
2929 | */ | ||
2930 | enum idle_type idle = !this_rq->nr_running ? | ||
2931 | SCHED_IDLE : NOT_IDLE; | ||
2932 | /* Earliest time when we have to call run_rebalance_domains again */ | ||
2933 | unsigned long next_balance = jiffies + 60*HZ; | ||
2868 | 2934 | ||
2869 | for_each_domain(this_cpu, sd) { | 2935 | for_each_domain(this_cpu, sd) { |
2870 | if (!(sd->flags & SD_LOAD_BALANCE)) | 2936 | if (!(sd->flags & SD_LOAD_BALANCE)) |
@@ -2879,8 +2945,13 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | |||
2879 | if (unlikely(!interval)) | 2945 | if (unlikely(!interval)) |
2880 | interval = 1; | 2946 | interval = 1; |
2881 | 2947 | ||
2882 | if (j - sd->last_balance >= interval) { | 2948 | if (sd->flags & SD_SERIALIZE) { |
2883 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2949 | if (!spin_trylock(&balancing)) |
2950 | goto out; | ||
2951 | } | ||
2952 | |||
2953 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | ||
2954 | if (load_balance(this_cpu, this_rq, sd, idle, &balance)) { | ||
2884 | /* | 2955 | /* |
2885 | * We've pulled tasks over so either we're no | 2956 | * We've pulled tasks over so either we're no |
2886 | * longer idle, or one of our SMT siblings is | 2957 | * longer idle, or one of our SMT siblings is |
@@ -2888,39 +2959,48 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | |||
2888 | */ | 2959 | */ |
2889 | idle = NOT_IDLE; | 2960 | idle = NOT_IDLE; |
2890 | } | 2961 | } |
2891 | sd->last_balance += interval; | 2962 | sd->last_balance = jiffies; |
2892 | } | 2963 | } |
2964 | if (sd->flags & SD_SERIALIZE) | ||
2965 | spin_unlock(&balancing); | ||
2966 | out: | ||
2967 | if (time_after(next_balance, sd->last_balance + interval)) | ||
2968 | next_balance = sd->last_balance + interval; | ||
2969 | |||
2970 | /* | ||
2971 | * Stop the load balance at this level. There is another | ||
2972 | * CPU in our sched group which is doing load balancing more | ||
2973 | * actively. | ||
2974 | */ | ||
2975 | if (!balance) | ||
2976 | break; | ||
2893 | } | 2977 | } |
2978 | this_rq->next_balance = next_balance; | ||
2894 | } | 2979 | } |
2895 | #else | 2980 | #else |
2896 | /* | 2981 | /* |
2897 | * on UP we do not need to balance between CPUs: | 2982 | * on UP we do not need to balance between CPUs: |
2898 | */ | 2983 | */ |
2899 | static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle) | ||
2900 | { | ||
2901 | } | ||
2902 | static inline void idle_balance(int cpu, struct rq *rq) | 2984 | static inline void idle_balance(int cpu, struct rq *rq) |
2903 | { | 2985 | { |
2904 | } | 2986 | } |
2905 | #endif | 2987 | #endif |
2906 | 2988 | ||
2907 | static inline int wake_priority_sleeper(struct rq *rq) | 2989 | static inline void wake_priority_sleeper(struct rq *rq) |
2908 | { | 2990 | { |
2909 | int ret = 0; | ||
2910 | |||
2911 | #ifdef CONFIG_SCHED_SMT | 2991 | #ifdef CONFIG_SCHED_SMT |
2992 | if (!rq->nr_running) | ||
2993 | return; | ||
2994 | |||
2912 | spin_lock(&rq->lock); | 2995 | spin_lock(&rq->lock); |
2913 | /* | 2996 | /* |
2914 | * If an SMT sibling task has been put to sleep for priority | 2997 | * If an SMT sibling task has been put to sleep for priority |
2915 | * reasons reschedule the idle task to see if it can now run. | 2998 | * reasons reschedule the idle task to see if it can now run. |
2916 | */ | 2999 | */ |
2917 | if (rq->nr_running) { | 3000 | if (rq->nr_running) |
2918 | resched_task(rq->idle); | 3001 | resched_task(rq->idle); |
2919 | ret = 1; | ||
2920 | } | ||
2921 | spin_unlock(&rq->lock); | 3002 | spin_unlock(&rq->lock); |
2922 | #endif | 3003 | #endif |
2923 | return ret; | ||
2924 | } | 3004 | } |
2925 | 3005 | ||
2926 | DEFINE_PER_CPU(struct kernel_stat, kstat); | 3006 | DEFINE_PER_CPU(struct kernel_stat, kstat); |
@@ -2934,7 +3014,8 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
2934 | static inline void | 3014 | static inline void |
2935 | update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) | 3015 | update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) |
2936 | { | 3016 | { |
2937 | p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick); | 3017 | p->sched_time += now - p->last_ran; |
3018 | p->last_ran = rq->most_recent_timestamp = now; | ||
2938 | } | 3019 | } |
2939 | 3020 | ||
2940 | /* | 3021 | /* |
@@ -2947,8 +3028,7 @@ unsigned long long current_sched_time(const struct task_struct *p) | |||
2947 | unsigned long flags; | 3028 | unsigned long flags; |
2948 | 3029 | ||
2949 | local_irq_save(flags); | 3030 | local_irq_save(flags); |
2950 | ns = max(p->timestamp, task_rq(p)->timestamp_last_tick); | 3031 | ns = p->sched_time + sched_clock() - p->last_ran; |
2951 | ns = p->sched_time + sched_clock() - ns; | ||
2952 | local_irq_restore(flags); | 3032 | local_irq_restore(flags); |
2953 | 3033 | ||
2954 | return ns; | 3034 | return ns; |
@@ -3048,35 +3128,12 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
3048 | cpustat->steal = cputime64_add(cpustat->steal, tmp); | 3128 | cpustat->steal = cputime64_add(cpustat->steal, tmp); |
3049 | } | 3129 | } |
3050 | 3130 | ||
3051 | /* | 3131 | static void task_running_tick(struct rq *rq, struct task_struct *p) |
3052 | * This function gets called by the timer code, with HZ frequency. | ||
3053 | * We call it with interrupts disabled. | ||
3054 | * | ||
3055 | * It also gets called by the fork code, when changing the parent's | ||
3056 | * timeslices. | ||
3057 | */ | ||
3058 | void scheduler_tick(void) | ||
3059 | { | 3132 | { |
3060 | unsigned long long now = sched_clock(); | ||
3061 | struct task_struct *p = current; | ||
3062 | int cpu = smp_processor_id(); | ||
3063 | struct rq *rq = cpu_rq(cpu); | ||
3064 | |||
3065 | update_cpu_clock(p, rq, now); | ||
3066 | |||
3067 | rq->timestamp_last_tick = now; | ||
3068 | |||
3069 | if (p == rq->idle) { | ||
3070 | if (wake_priority_sleeper(rq)) | ||
3071 | goto out; | ||
3072 | rebalance_tick(cpu, rq, SCHED_IDLE); | ||
3073 | return; | ||
3074 | } | ||
3075 | |||
3076 | /* Task might have expired already, but not scheduled off yet */ | ||
3077 | if (p->array != rq->active) { | 3133 | if (p->array != rq->active) { |
3134 | /* Task has expired but was not scheduled yet */ | ||
3078 | set_tsk_need_resched(p); | 3135 | set_tsk_need_resched(p); |
3079 | goto out; | 3136 | return; |
3080 | } | 3137 | } |
3081 | spin_lock(&rq->lock); | 3138 | spin_lock(&rq->lock); |
3082 | /* | 3139 | /* |
@@ -3144,8 +3201,34 @@ void scheduler_tick(void) | |||
3144 | } | 3201 | } |
3145 | out_unlock: | 3202 | out_unlock: |
3146 | spin_unlock(&rq->lock); | 3203 | spin_unlock(&rq->lock); |
3147 | out: | 3204 | } |
3148 | rebalance_tick(cpu, rq, NOT_IDLE); | 3205 | |
3206 | /* | ||
3207 | * This function gets called by the timer code, with HZ frequency. | ||
3208 | * We call it with interrupts disabled. | ||
3209 | * | ||
3210 | * It also gets called by the fork code, when changing the parent's | ||
3211 | * timeslices. | ||
3212 | */ | ||
3213 | void scheduler_tick(void) | ||
3214 | { | ||
3215 | unsigned long long now = sched_clock(); | ||
3216 | struct task_struct *p = current; | ||
3217 | int cpu = smp_processor_id(); | ||
3218 | struct rq *rq = cpu_rq(cpu); | ||
3219 | |||
3220 | update_cpu_clock(p, rq, now); | ||
3221 | |||
3222 | if (p == rq->idle) | ||
3223 | /* Task on the idle queue */ | ||
3224 | wake_priority_sleeper(rq); | ||
3225 | else | ||
3226 | task_running_tick(rq, p); | ||
3227 | #ifdef CONFIG_SMP | ||
3228 | update_load(rq); | ||
3229 | if (time_after_eq(jiffies, rq->next_balance)) | ||
3230 | raise_softirq(SCHED_SOFTIRQ); | ||
3231 | #endif | ||
3149 | } | 3232 | } |
3150 | 3233 | ||
3151 | #ifdef CONFIG_SCHED_SMT | 3234 | #ifdef CONFIG_SCHED_SMT |
@@ -3291,7 +3374,8 @@ void fastcall add_preempt_count(int val) | |||
3291 | /* | 3374 | /* |
3292 | * Spinlock count overflowing soon? | 3375 | * Spinlock count overflowing soon? |
3293 | */ | 3376 | */ |
3294 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); | 3377 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= |
3378 | PREEMPT_MASK - 10); | ||
3295 | } | 3379 | } |
3296 | EXPORT_SYMBOL(add_preempt_count); | 3380 | EXPORT_SYMBOL(add_preempt_count); |
3297 | 3381 | ||
@@ -4990,8 +5074,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
4990 | * afterwards, and pretending it was a local activate. | 5074 | * afterwards, and pretending it was a local activate. |
4991 | * This way is cleaner and logically correct. | 5075 | * This way is cleaner and logically correct. |
4992 | */ | 5076 | */ |
4993 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick | 5077 | p->timestamp = p->timestamp - rq_src->most_recent_timestamp |
4994 | + rq_dest->timestamp_last_tick; | 5078 | + rq_dest->most_recent_timestamp; |
4995 | deactivate_task(p, rq_src); | 5079 | deactivate_task(p, rq_src); |
4996 | __activate_task(p, rq_dest); | 5080 | __activate_task(p, rq_dest); |
4997 | if (TASK_PREEMPTS_CURR(p, rq_dest)) | 5081 | if (TASK_PREEMPTS_CURR(p, rq_dest)) |
@@ -5067,7 +5151,10 @@ wait_to_die: | |||
5067 | } | 5151 | } |
5068 | 5152 | ||
5069 | #ifdef CONFIG_HOTPLUG_CPU | 5153 | #ifdef CONFIG_HOTPLUG_CPU |
5070 | /* Figure out where task on dead CPU should go, use force if neccessary. */ | 5154 | /* |
5155 | * Figure out where task on dead CPU should go, use force if neccessary. | ||
5156 | * NOTE: interrupts should be disabled by the caller | ||
5157 | */ | ||
5071 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 5158 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
5072 | { | 5159 | { |
5073 | unsigned long flags; | 5160 | unsigned long flags; |
@@ -5187,6 +5274,7 @@ void idle_task_exit(void) | |||
5187 | mmdrop(mm); | 5274 | mmdrop(mm); |
5188 | } | 5275 | } |
5189 | 5276 | ||
5277 | /* called under rq->lock with disabled interrupts */ | ||
5190 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | 5278 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) |
5191 | { | 5279 | { |
5192 | struct rq *rq = cpu_rq(dead_cpu); | 5280 | struct rq *rq = cpu_rq(dead_cpu); |
@@ -5203,10 +5291,11 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
5203 | * Drop lock around migration; if someone else moves it, | 5291 | * Drop lock around migration; if someone else moves it, |
5204 | * that's OK. No task can be added to this CPU, so iteration is | 5292 | * that's OK. No task can be added to this CPU, so iteration is |
5205 | * fine. | 5293 | * fine. |
5294 | * NOTE: interrupts should be left disabled --dev@ | ||
5206 | */ | 5295 | */ |
5207 | spin_unlock_irq(&rq->lock); | 5296 | spin_unlock(&rq->lock); |
5208 | move_task_off_dead_cpu(dead_cpu, p); | 5297 | move_task_off_dead_cpu(dead_cpu, p); |
5209 | spin_lock_irq(&rq->lock); | 5298 | spin_lock(&rq->lock); |
5210 | 5299 | ||
5211 | put_task_struct(p); | 5300 | put_task_struct(p); |
5212 | } | 5301 | } |
@@ -5359,16 +5448,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5359 | if (!(sd->flags & SD_LOAD_BALANCE)) { | 5448 | if (!(sd->flags & SD_LOAD_BALANCE)) { |
5360 | printk("does not load-balance\n"); | 5449 | printk("does not load-balance\n"); |
5361 | if (sd->parent) | 5450 | if (sd->parent) |
5362 | printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); | 5451 | printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" |
5452 | " has parent"); | ||
5363 | break; | 5453 | break; |
5364 | } | 5454 | } |
5365 | 5455 | ||
5366 | printk("span %s\n", str); | 5456 | printk("span %s\n", str); |
5367 | 5457 | ||
5368 | if (!cpu_isset(cpu, sd->span)) | 5458 | if (!cpu_isset(cpu, sd->span)) |
5369 | printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); | 5459 | printk(KERN_ERR "ERROR: domain->span does not contain " |
5460 | "CPU%d\n", cpu); | ||
5370 | if (!cpu_isset(cpu, group->cpumask)) | 5461 | if (!cpu_isset(cpu, group->cpumask)) |
5371 | printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); | 5462 | printk(KERN_ERR "ERROR: domain->groups does not contain" |
5463 | " CPU%d\n", cpu); | ||
5372 | 5464 | ||
5373 | printk(KERN_DEBUG); | 5465 | printk(KERN_DEBUG); |
5374 | for (i = 0; i < level + 2; i++) | 5466 | for (i = 0; i < level + 2; i++) |
@@ -5383,7 +5475,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5383 | 5475 | ||
5384 | if (!group->cpu_power) { | 5476 | if (!group->cpu_power) { |
5385 | printk("\n"); | 5477 | printk("\n"); |
5386 | printk(KERN_ERR "ERROR: domain->cpu_power not set\n"); | 5478 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
5479 | "set\n"); | ||
5387 | } | 5480 | } |
5388 | 5481 | ||
5389 | if (!cpus_weight(group->cpumask)) { | 5482 | if (!cpus_weight(group->cpumask)) { |
@@ -5406,15 +5499,17 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5406 | printk("\n"); | 5499 | printk("\n"); |
5407 | 5500 | ||
5408 | if (!cpus_equal(sd->span, groupmask)) | 5501 | if (!cpus_equal(sd->span, groupmask)) |
5409 | printk(KERN_ERR "ERROR: groups don't span domain->span\n"); | 5502 | printk(KERN_ERR "ERROR: groups don't span " |
5503 | "domain->span\n"); | ||
5410 | 5504 | ||
5411 | level++; | 5505 | level++; |
5412 | sd = sd->parent; | 5506 | sd = sd->parent; |
5507 | if (!sd) | ||
5508 | continue; | ||
5413 | 5509 | ||
5414 | if (sd) { | 5510 | if (!cpus_subset(groupmask, sd->span)) |
5415 | if (!cpus_subset(groupmask, sd->span)) | 5511 | printk(KERN_ERR "ERROR: parent span is not a superset " |
5416 | printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); | 5512 | "of domain->span\n"); |
5417 | } | ||
5418 | 5513 | ||
5419 | } while (sd); | 5514 | } while (sd); |
5420 | } | 5515 | } |
@@ -5528,28 +5623,27 @@ static int __init isolated_cpu_setup(char *str) | |||
5528 | __setup ("isolcpus=", isolated_cpu_setup); | 5623 | __setup ("isolcpus=", isolated_cpu_setup); |
5529 | 5624 | ||
5530 | /* | 5625 | /* |
5531 | * init_sched_build_groups takes an array of groups, the cpumask we wish | 5626 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer |
5532 | * to span, and a pointer to a function which identifies what group a CPU | 5627 | * to a function which identifies what group(along with sched group) a CPU |
5533 | * belongs to. The return value of group_fn must be a valid index into the | 5628 | * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS |
5534 | * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we | 5629 | * (due to the fact that we keep track of groups covered with a cpumask_t). |
5535 | * keep track of groups covered with a cpumask_t). | ||
5536 | * | 5630 | * |
5537 | * init_sched_build_groups will build a circular linked list of the groups | 5631 | * init_sched_build_groups will build a circular linked list of the groups |
5538 | * covered by the given span, and will set each group's ->cpumask correctly, | 5632 | * covered by the given span, and will set each group's ->cpumask correctly, |
5539 | * and ->cpu_power to 0. | 5633 | * and ->cpu_power to 0. |
5540 | */ | 5634 | */ |
5541 | static void | 5635 | static void |
5542 | init_sched_build_groups(struct sched_group groups[], cpumask_t span, | 5636 | init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, |
5543 | const cpumask_t *cpu_map, | 5637 | int (*group_fn)(int cpu, const cpumask_t *cpu_map, |
5544 | int (*group_fn)(int cpu, const cpumask_t *cpu_map)) | 5638 | struct sched_group **sg)) |
5545 | { | 5639 | { |
5546 | struct sched_group *first = NULL, *last = NULL; | 5640 | struct sched_group *first = NULL, *last = NULL; |
5547 | cpumask_t covered = CPU_MASK_NONE; | 5641 | cpumask_t covered = CPU_MASK_NONE; |
5548 | int i; | 5642 | int i; |
5549 | 5643 | ||
5550 | for_each_cpu_mask(i, span) { | 5644 | for_each_cpu_mask(i, span) { |
5551 | int group = group_fn(i, cpu_map); | 5645 | struct sched_group *sg; |
5552 | struct sched_group *sg = &groups[group]; | 5646 | int group = group_fn(i, cpu_map, &sg); |
5553 | int j; | 5647 | int j; |
5554 | 5648 | ||
5555 | if (cpu_isset(i, covered)) | 5649 | if (cpu_isset(i, covered)) |
@@ -5559,7 +5653,7 @@ init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5559 | sg->cpu_power = 0; | 5653 | sg->cpu_power = 0; |
5560 | 5654 | ||
5561 | for_each_cpu_mask(j, span) { | 5655 | for_each_cpu_mask(j, span) { |
5562 | if (group_fn(j, cpu_map) != group) | 5656 | if (group_fn(j, cpu_map, NULL) != group) |
5563 | continue; | 5657 | continue; |
5564 | 5658 | ||
5565 | cpu_set(j, covered); | 5659 | cpu_set(j, covered); |
@@ -5733,8 +5827,9 @@ __setup("max_cache_size=", setup_max_cache_size); | |||
5733 | */ | 5827 | */ |
5734 | static void touch_cache(void *__cache, unsigned long __size) | 5828 | static void touch_cache(void *__cache, unsigned long __size) |
5735 | { | 5829 | { |
5736 | unsigned long size = __size/sizeof(long), chunk1 = size/3, | 5830 | unsigned long size = __size / sizeof(long); |
5737 | chunk2 = 2*size/3; | 5831 | unsigned long chunk1 = size / 3; |
5832 | unsigned long chunk2 = 2 * size / 3; | ||
5738 | unsigned long *cache = __cache; | 5833 | unsigned long *cache = __cache; |
5739 | int i; | 5834 | int i; |
5740 | 5835 | ||
@@ -5843,11 +5938,11 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) | |||
5843 | */ | 5938 | */ |
5844 | measure_one(cache, size, cpu1, cpu2); | 5939 | measure_one(cache, size, cpu1, cpu2); |
5845 | for (i = 0; i < ITERATIONS; i++) | 5940 | for (i = 0; i < ITERATIONS; i++) |
5846 | cost1 += measure_one(cache, size - i*1024, cpu1, cpu2); | 5941 | cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2); |
5847 | 5942 | ||
5848 | measure_one(cache, size, cpu2, cpu1); | 5943 | measure_one(cache, size, cpu2, cpu1); |
5849 | for (i = 0; i < ITERATIONS; i++) | 5944 | for (i = 0; i < ITERATIONS; i++) |
5850 | cost1 += measure_one(cache, size - i*1024, cpu2, cpu1); | 5945 | cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1); |
5851 | 5946 | ||
5852 | /* | 5947 | /* |
5853 | * (We measure the non-migrating [cached] cost on both | 5948 | * (We measure the non-migrating [cached] cost on both |
@@ -5857,17 +5952,17 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) | |||
5857 | 5952 | ||
5858 | measure_one(cache, size, cpu1, cpu1); | 5953 | measure_one(cache, size, cpu1, cpu1); |
5859 | for (i = 0; i < ITERATIONS; i++) | 5954 | for (i = 0; i < ITERATIONS; i++) |
5860 | cost2 += measure_one(cache, size - i*1024, cpu1, cpu1); | 5955 | cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1); |
5861 | 5956 | ||
5862 | measure_one(cache, size, cpu2, cpu2); | 5957 | measure_one(cache, size, cpu2, cpu2); |
5863 | for (i = 0; i < ITERATIONS; i++) | 5958 | for (i = 0; i < ITERATIONS; i++) |
5864 | cost2 += measure_one(cache, size - i*1024, cpu2, cpu2); | 5959 | cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2); |
5865 | 5960 | ||
5866 | /* | 5961 | /* |
5867 | * Get the per-iteration migration cost: | 5962 | * Get the per-iteration migration cost: |
5868 | */ | 5963 | */ |
5869 | do_div(cost1, 2*ITERATIONS); | 5964 | do_div(cost1, 2 * ITERATIONS); |
5870 | do_div(cost2, 2*ITERATIONS); | 5965 | do_div(cost2, 2 * ITERATIONS); |
5871 | 5966 | ||
5872 | return cost1 - cost2; | 5967 | return cost1 - cost2; |
5873 | } | 5968 | } |
@@ -5905,7 +6000,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
5905 | */ | 6000 | */ |
5906 | cache = vmalloc(max_size); | 6001 | cache = vmalloc(max_size); |
5907 | if (!cache) { | 6002 | if (!cache) { |
5908 | printk("could not vmalloc %d bytes for cache!\n", 2*max_size); | 6003 | printk("could not vmalloc %d bytes for cache!\n", 2 * max_size); |
5909 | return 1000000; /* return 1 msec on very small boxen */ | 6004 | return 1000000; /* return 1 msec on very small boxen */ |
5910 | } | 6005 | } |
5911 | 6006 | ||
@@ -5930,7 +6025,8 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
5930 | avg_fluct = (avg_fluct + fluct)/2; | 6025 | avg_fluct = (avg_fluct + fluct)/2; |
5931 | 6026 | ||
5932 | if (migration_debug) | 6027 | if (migration_debug) |
5933 | printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): (%8Ld %8Ld)\n", | 6028 | printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): " |
6029 | "(%8Ld %8Ld)\n", | ||
5934 | cpu1, cpu2, size, | 6030 | cpu1, cpu2, size, |
5935 | (long)cost / 1000000, | 6031 | (long)cost / 1000000, |
5936 | ((long)cost / 100000) % 10, | 6032 | ((long)cost / 100000) % 10, |
@@ -6025,20 +6121,18 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map) | |||
6025 | -1 | 6121 | -1 |
6026 | #endif | 6122 | #endif |
6027 | ); | 6123 | ); |
6028 | if (system_state == SYSTEM_BOOTING) { | 6124 | if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) { |
6029 | if (num_online_cpus() > 1) { | 6125 | printk("migration_cost="); |
6030 | printk("migration_cost="); | 6126 | for (distance = 0; distance <= max_distance; distance++) { |
6031 | for (distance = 0; distance <= max_distance; distance++) { | 6127 | if (distance) |
6032 | if (distance) | 6128 | printk(","); |
6033 | printk(","); | 6129 | printk("%ld", (long)migration_cost[distance] / 1000); |
6034 | printk("%ld", (long)migration_cost[distance] / 1000); | ||
6035 | } | ||
6036 | printk("\n"); | ||
6037 | } | 6130 | } |
6131 | printk("\n"); | ||
6038 | } | 6132 | } |
6039 | j1 = jiffies; | 6133 | j1 = jiffies; |
6040 | if (migration_debug) | 6134 | if (migration_debug) |
6041 | printk("migration: %ld seconds\n", (j1-j0)/HZ); | 6135 | printk("migration: %ld seconds\n", (j1-j0) / HZ); |
6042 | 6136 | ||
6043 | /* | 6137 | /* |
6044 | * Move back to the original CPU. NUMA-Q gets confused | 6138 | * Move back to the original CPU. NUMA-Q gets confused |
@@ -6135,10 +6229,13 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
6135 | */ | 6229 | */ |
6136 | #ifdef CONFIG_SCHED_SMT | 6230 | #ifdef CONFIG_SCHED_SMT |
6137 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 6231 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
6138 | static struct sched_group sched_group_cpus[NR_CPUS]; | 6232 | static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); |
6139 | 6233 | ||
6140 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) | 6234 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, |
6235 | struct sched_group **sg) | ||
6141 | { | 6236 | { |
6237 | if (sg) | ||
6238 | *sg = &per_cpu(sched_group_cpus, cpu); | ||
6142 | return cpu; | 6239 | return cpu; |
6143 | } | 6240 | } |
6144 | #endif | 6241 | #endif |
@@ -6148,39 +6245,52 @@ static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) | |||
6148 | */ | 6245 | */ |
6149 | #ifdef CONFIG_SCHED_MC | 6246 | #ifdef CONFIG_SCHED_MC |
6150 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6247 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
6151 | static struct sched_group sched_group_core[NR_CPUS]; | 6248 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); |
6152 | #endif | 6249 | #endif |
6153 | 6250 | ||
6154 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6251 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6155 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) | 6252 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, |
6253 | struct sched_group **sg) | ||
6156 | { | 6254 | { |
6255 | int group; | ||
6157 | cpumask_t mask = cpu_sibling_map[cpu]; | 6256 | cpumask_t mask = cpu_sibling_map[cpu]; |
6158 | cpus_and(mask, mask, *cpu_map); | 6257 | cpus_and(mask, mask, *cpu_map); |
6159 | return first_cpu(mask); | 6258 | group = first_cpu(mask); |
6259 | if (sg) | ||
6260 | *sg = &per_cpu(sched_group_core, group); | ||
6261 | return group; | ||
6160 | } | 6262 | } |
6161 | #elif defined(CONFIG_SCHED_MC) | 6263 | #elif defined(CONFIG_SCHED_MC) |
6162 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) | 6264 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, |
6265 | struct sched_group **sg) | ||
6163 | { | 6266 | { |
6267 | if (sg) | ||
6268 | *sg = &per_cpu(sched_group_core, cpu); | ||
6164 | return cpu; | 6269 | return cpu; |
6165 | } | 6270 | } |
6166 | #endif | 6271 | #endif |
6167 | 6272 | ||
6168 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 6273 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
6169 | static struct sched_group sched_group_phys[NR_CPUS]; | 6274 | static DEFINE_PER_CPU(struct sched_group, sched_group_phys); |
6170 | 6275 | ||
6171 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map) | 6276 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, |
6277 | struct sched_group **sg) | ||
6172 | { | 6278 | { |
6279 | int group; | ||
6173 | #ifdef CONFIG_SCHED_MC | 6280 | #ifdef CONFIG_SCHED_MC |
6174 | cpumask_t mask = cpu_coregroup_map(cpu); | 6281 | cpumask_t mask = cpu_coregroup_map(cpu); |
6175 | cpus_and(mask, mask, *cpu_map); | 6282 | cpus_and(mask, mask, *cpu_map); |
6176 | return first_cpu(mask); | 6283 | group = first_cpu(mask); |
6177 | #elif defined(CONFIG_SCHED_SMT) | 6284 | #elif defined(CONFIG_SCHED_SMT) |
6178 | cpumask_t mask = cpu_sibling_map[cpu]; | 6285 | cpumask_t mask = cpu_sibling_map[cpu]; |
6179 | cpus_and(mask, mask, *cpu_map); | 6286 | cpus_and(mask, mask, *cpu_map); |
6180 | return first_cpu(mask); | 6287 | group = first_cpu(mask); |
6181 | #else | 6288 | #else |
6182 | return cpu; | 6289 | group = cpu; |
6183 | #endif | 6290 | #endif |
6291 | if (sg) | ||
6292 | *sg = &per_cpu(sched_group_phys, group); | ||
6293 | return group; | ||
6184 | } | 6294 | } |
6185 | 6295 | ||
6186 | #ifdef CONFIG_NUMA | 6296 | #ifdef CONFIG_NUMA |
@@ -6193,12 +6303,22 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains); | |||
6193 | static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; | 6303 | static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; |
6194 | 6304 | ||
6195 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | 6305 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); |
6196 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; | 6306 | static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); |
6197 | 6307 | ||
6198 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map) | 6308 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, |
6309 | struct sched_group **sg) | ||
6199 | { | 6310 | { |
6200 | return cpu_to_node(cpu); | 6311 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu)); |
6312 | int group; | ||
6313 | |||
6314 | cpus_and(nodemask, nodemask, *cpu_map); | ||
6315 | group = first_cpu(nodemask); | ||
6316 | |||
6317 | if (sg) | ||
6318 | *sg = &per_cpu(sched_group_allnodes, group); | ||
6319 | return group; | ||
6201 | } | 6320 | } |
6321 | |||
6202 | static void init_numa_sched_groups_power(struct sched_group *group_head) | 6322 | static void init_numa_sched_groups_power(struct sched_group *group_head) |
6203 | { | 6323 | { |
6204 | struct sched_group *sg = group_head; | 6324 | struct sched_group *sg = group_head; |
@@ -6234,16 +6354,9 @@ static void free_sched_groups(const cpumask_t *cpu_map) | |||
6234 | int cpu, i; | 6354 | int cpu, i; |
6235 | 6355 | ||
6236 | for_each_cpu_mask(cpu, *cpu_map) { | 6356 | for_each_cpu_mask(cpu, *cpu_map) { |
6237 | struct sched_group *sched_group_allnodes | ||
6238 | = sched_group_allnodes_bycpu[cpu]; | ||
6239 | struct sched_group **sched_group_nodes | 6357 | struct sched_group **sched_group_nodes |
6240 | = sched_group_nodes_bycpu[cpu]; | 6358 | = sched_group_nodes_bycpu[cpu]; |
6241 | 6359 | ||
6242 | if (sched_group_allnodes) { | ||
6243 | kfree(sched_group_allnodes); | ||
6244 | sched_group_allnodes_bycpu[cpu] = NULL; | ||
6245 | } | ||
6246 | |||
6247 | if (!sched_group_nodes) | 6360 | if (!sched_group_nodes) |
6248 | continue; | 6361 | continue; |
6249 | 6362 | ||
@@ -6337,7 +6450,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6337 | struct sched_domain *sd; | 6450 | struct sched_domain *sd; |
6338 | #ifdef CONFIG_NUMA | 6451 | #ifdef CONFIG_NUMA |
6339 | struct sched_group **sched_group_nodes = NULL; | 6452 | struct sched_group **sched_group_nodes = NULL; |
6340 | struct sched_group *sched_group_allnodes = NULL; | 6453 | int sd_allnodes = 0; |
6341 | 6454 | ||
6342 | /* | 6455 | /* |
6343 | * Allocate the per-node list of sched groups | 6456 | * Allocate the per-node list of sched groups |
@@ -6355,7 +6468,6 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6355 | * Set up domains for cpus specified by the cpu_map. | 6468 | * Set up domains for cpus specified by the cpu_map. |
6356 | */ | 6469 | */ |
6357 | for_each_cpu_mask(i, *cpu_map) { | 6470 | for_each_cpu_mask(i, *cpu_map) { |
6358 | int group; | ||
6359 | struct sched_domain *sd = NULL, *p; | 6471 | struct sched_domain *sd = NULL, *p; |
6360 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); | 6472 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); |
6361 | 6473 | ||
@@ -6364,26 +6476,12 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6364 | #ifdef CONFIG_NUMA | 6476 | #ifdef CONFIG_NUMA |
6365 | if (cpus_weight(*cpu_map) | 6477 | if (cpus_weight(*cpu_map) |
6366 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | 6478 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { |
6367 | if (!sched_group_allnodes) { | ||
6368 | sched_group_allnodes | ||
6369 | = kmalloc_node(sizeof(struct sched_group) | ||
6370 | * MAX_NUMNODES, | ||
6371 | GFP_KERNEL, | ||
6372 | cpu_to_node(i)); | ||
6373 | if (!sched_group_allnodes) { | ||
6374 | printk(KERN_WARNING | ||
6375 | "Can not alloc allnodes sched group\n"); | ||
6376 | goto error; | ||
6377 | } | ||
6378 | sched_group_allnodes_bycpu[i] | ||
6379 | = sched_group_allnodes; | ||
6380 | } | ||
6381 | sd = &per_cpu(allnodes_domains, i); | 6479 | sd = &per_cpu(allnodes_domains, i); |
6382 | *sd = SD_ALLNODES_INIT; | 6480 | *sd = SD_ALLNODES_INIT; |
6383 | sd->span = *cpu_map; | 6481 | sd->span = *cpu_map; |
6384 | group = cpu_to_allnodes_group(i, cpu_map); | 6482 | cpu_to_allnodes_group(i, cpu_map, &sd->groups); |
6385 | sd->groups = &sched_group_allnodes[group]; | ||
6386 | p = sd; | 6483 | p = sd; |
6484 | sd_allnodes = 1; | ||
6387 | } else | 6485 | } else |
6388 | p = NULL; | 6486 | p = NULL; |
6389 | 6487 | ||
@@ -6398,36 +6496,33 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6398 | 6496 | ||
6399 | p = sd; | 6497 | p = sd; |
6400 | sd = &per_cpu(phys_domains, i); | 6498 | sd = &per_cpu(phys_domains, i); |
6401 | group = cpu_to_phys_group(i, cpu_map); | ||
6402 | *sd = SD_CPU_INIT; | 6499 | *sd = SD_CPU_INIT; |
6403 | sd->span = nodemask; | 6500 | sd->span = nodemask; |
6404 | sd->parent = p; | 6501 | sd->parent = p; |
6405 | if (p) | 6502 | if (p) |
6406 | p->child = sd; | 6503 | p->child = sd; |
6407 | sd->groups = &sched_group_phys[group]; | 6504 | cpu_to_phys_group(i, cpu_map, &sd->groups); |
6408 | 6505 | ||
6409 | #ifdef CONFIG_SCHED_MC | 6506 | #ifdef CONFIG_SCHED_MC |
6410 | p = sd; | 6507 | p = sd; |
6411 | sd = &per_cpu(core_domains, i); | 6508 | sd = &per_cpu(core_domains, i); |
6412 | group = cpu_to_core_group(i, cpu_map); | ||
6413 | *sd = SD_MC_INIT; | 6509 | *sd = SD_MC_INIT; |
6414 | sd->span = cpu_coregroup_map(i); | 6510 | sd->span = cpu_coregroup_map(i); |
6415 | cpus_and(sd->span, sd->span, *cpu_map); | 6511 | cpus_and(sd->span, sd->span, *cpu_map); |
6416 | sd->parent = p; | 6512 | sd->parent = p; |
6417 | p->child = sd; | 6513 | p->child = sd; |
6418 | sd->groups = &sched_group_core[group]; | 6514 | cpu_to_core_group(i, cpu_map, &sd->groups); |
6419 | #endif | 6515 | #endif |
6420 | 6516 | ||
6421 | #ifdef CONFIG_SCHED_SMT | 6517 | #ifdef CONFIG_SCHED_SMT |
6422 | p = sd; | 6518 | p = sd; |
6423 | sd = &per_cpu(cpu_domains, i); | 6519 | sd = &per_cpu(cpu_domains, i); |
6424 | group = cpu_to_cpu_group(i, cpu_map); | ||
6425 | *sd = SD_SIBLING_INIT; | 6520 | *sd = SD_SIBLING_INIT; |
6426 | sd->span = cpu_sibling_map[i]; | 6521 | sd->span = cpu_sibling_map[i]; |
6427 | cpus_and(sd->span, sd->span, *cpu_map); | 6522 | cpus_and(sd->span, sd->span, *cpu_map); |
6428 | sd->parent = p; | 6523 | sd->parent = p; |
6429 | p->child = sd; | 6524 | p->child = sd; |
6430 | sd->groups = &sched_group_cpus[group]; | 6525 | cpu_to_cpu_group(i, cpu_map, &sd->groups); |
6431 | #endif | 6526 | #endif |
6432 | } | 6527 | } |
6433 | 6528 | ||
@@ -6439,8 +6534,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6439 | if (i != first_cpu(this_sibling_map)) | 6534 | if (i != first_cpu(this_sibling_map)) |
6440 | continue; | 6535 | continue; |
6441 | 6536 | ||
6442 | init_sched_build_groups(sched_group_cpus, this_sibling_map, | 6537 | init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group); |
6443 | cpu_map, &cpu_to_cpu_group); | ||
6444 | } | 6538 | } |
6445 | #endif | 6539 | #endif |
6446 | 6540 | ||
@@ -6451,8 +6545,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6451 | cpus_and(this_core_map, this_core_map, *cpu_map); | 6545 | cpus_and(this_core_map, this_core_map, *cpu_map); |
6452 | if (i != first_cpu(this_core_map)) | 6546 | if (i != first_cpu(this_core_map)) |
6453 | continue; | 6547 | continue; |
6454 | init_sched_build_groups(sched_group_core, this_core_map, | 6548 | init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group); |
6455 | cpu_map, &cpu_to_core_group); | ||
6456 | } | 6549 | } |
6457 | #endif | 6550 | #endif |
6458 | 6551 | ||
@@ -6465,15 +6558,13 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6465 | if (cpus_empty(nodemask)) | 6558 | if (cpus_empty(nodemask)) |
6466 | continue; | 6559 | continue; |
6467 | 6560 | ||
6468 | init_sched_build_groups(sched_group_phys, nodemask, | 6561 | init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group); |
6469 | cpu_map, &cpu_to_phys_group); | ||
6470 | } | 6562 | } |
6471 | 6563 | ||
6472 | #ifdef CONFIG_NUMA | 6564 | #ifdef CONFIG_NUMA |
6473 | /* Set up node groups */ | 6565 | /* Set up node groups */ |
6474 | if (sched_group_allnodes) | 6566 | if (sd_allnodes) |
6475 | init_sched_build_groups(sched_group_allnodes, *cpu_map, | 6567 | init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group); |
6476 | cpu_map, &cpu_to_allnodes_group); | ||
6477 | 6568 | ||
6478 | for (i = 0; i < MAX_NUMNODES; i++) { | 6569 | for (i = 0; i < MAX_NUMNODES; i++) { |
6479 | /* Set up node groups */ | 6570 | /* Set up node groups */ |
@@ -6565,10 +6656,10 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6565 | for (i = 0; i < MAX_NUMNODES; i++) | 6656 | for (i = 0; i < MAX_NUMNODES; i++) |
6566 | init_numa_sched_groups_power(sched_group_nodes[i]); | 6657 | init_numa_sched_groups_power(sched_group_nodes[i]); |
6567 | 6658 | ||
6568 | if (sched_group_allnodes) { | 6659 | if (sd_allnodes) { |
6569 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map); | 6660 | struct sched_group *sg; |
6570 | struct sched_group *sg = &sched_group_allnodes[group]; | ||
6571 | 6661 | ||
6662 | cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg); | ||
6572 | init_numa_sched_groups_power(sg); | 6663 | init_numa_sched_groups_power(sg); |
6573 | } | 6664 | } |
6574 | #endif | 6665 | #endif |
@@ -6847,6 +6938,10 @@ void __init sched_init(void) | |||
6847 | 6938 | ||
6848 | set_load_weight(&init_task); | 6939 | set_load_weight(&init_task); |
6849 | 6940 | ||
6941 | #ifdef CONFIG_SMP | ||
6942 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); | ||
6943 | #endif | ||
6944 | |||
6850 | #ifdef CONFIG_RT_MUTEXES | 6945 | #ifdef CONFIG_RT_MUTEXES |
6851 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); | 6946 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); |
6852 | #endif | 6947 | #endif |