diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 59 |
1 files changed, 47 insertions, 12 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 15ce772a471a..4e453431c61a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -428,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
| 428 | * bump this up when changing the output format or the meaning of an existing | 428 | * bump this up when changing the output format or the meaning of an existing |
| 429 | * format, so that tools can adapt (or abort) | 429 | * format, so that tools can adapt (or abort) |
| 430 | */ | 430 | */ |
| 431 | #define SCHEDSTAT_VERSION 12 | 431 | #define SCHEDSTAT_VERSION 13 |
| 432 | 432 | ||
| 433 | static int show_schedstat(struct seq_file *seq, void *v) | 433 | static int show_schedstat(struct seq_file *seq, void *v) |
| 434 | { | 434 | { |
| @@ -466,7 +466,7 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 466 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | 466 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); |
| 467 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; | 467 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; |
| 468 | itype++) { | 468 | itype++) { |
| 469 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", | 469 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu", |
| 470 | sd->lb_cnt[itype], | 470 | sd->lb_cnt[itype], |
| 471 | sd->lb_balanced[itype], | 471 | sd->lb_balanced[itype], |
| 472 | sd->lb_failed[itype], | 472 | sd->lb_failed[itype], |
| @@ -474,7 +474,8 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 474 | sd->lb_gained[itype], | 474 | sd->lb_gained[itype], |
| 475 | sd->lb_hot_gained[itype], | 475 | sd->lb_hot_gained[itype], |
| 476 | sd->lb_nobusyq[itype], | 476 | sd->lb_nobusyq[itype], |
| 477 | sd->lb_nobusyg[itype]); | 477 | sd->lb_nobusyg[itype], |
| 478 | sd->lb_stopbalance[itype]); | ||
| 478 | } | 479 | } |
| 479 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | 480 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", |
| 480 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | 481 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, |
| @@ -2249,7 +2250,7 @@ out: | |||
| 2249 | static struct sched_group * | 2250 | static struct sched_group * |
| 2250 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2251 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 2251 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, | 2252 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, |
| 2252 | cpumask_t *cpus) | 2253 | cpumask_t *cpus, int *balance) |
| 2253 | { | 2254 | { |
| 2254 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2255 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
| 2255 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2256 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
| @@ -2278,10 +2279,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2278 | unsigned long load, group_capacity; | 2279 | unsigned long load, group_capacity; |
| 2279 | int local_group; | 2280 | int local_group; |
| 2280 | int i; | 2281 | int i; |
| 2282 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
| 2281 | unsigned long sum_nr_running, sum_weighted_load; | 2283 | unsigned long sum_nr_running, sum_weighted_load; |
| 2282 | 2284 | ||
| 2283 | local_group = cpu_isset(this_cpu, group->cpumask); | 2285 | local_group = cpu_isset(this_cpu, group->cpumask); |
| 2284 | 2286 | ||
| 2287 | if (local_group) | ||
| 2288 | balance_cpu = first_cpu(group->cpumask); | ||
| 2289 | |||
| 2285 | /* Tally up the load of all CPUs in the group */ | 2290 | /* Tally up the load of all CPUs in the group */ |
| 2286 | sum_weighted_load = sum_nr_running = avg_load = 0; | 2291 | sum_weighted_load = sum_nr_running = avg_load = 0; |
| 2287 | 2292 | ||
| @@ -2297,9 +2302,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2297 | *sd_idle = 0; | 2302 | *sd_idle = 0; |
| 2298 | 2303 | ||
| 2299 | /* Bias balancing toward cpus of our domain */ | 2304 | /* Bias balancing toward cpus of our domain */ |
| 2300 | if (local_group) | 2305 | if (local_group) { |
| 2306 | if (idle_cpu(i) && !first_idle_cpu) { | ||
| 2307 | first_idle_cpu = 1; | ||
| 2308 | balance_cpu = i; | ||
| 2309 | } | ||
| 2310 | |||
| 2301 | load = target_load(i, load_idx); | 2311 | load = target_load(i, load_idx); |
| 2302 | else | 2312 | } else |
| 2303 | load = source_load(i, load_idx); | 2313 | load = source_load(i, load_idx); |
| 2304 | 2314 | ||
| 2305 | avg_load += load; | 2315 | avg_load += load; |
| @@ -2307,6 +2317,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2307 | sum_weighted_load += rq->raw_weighted_load; | 2317 | sum_weighted_load += rq->raw_weighted_load; |
| 2308 | } | 2318 | } |
| 2309 | 2319 | ||
| 2320 | /* | ||
| 2321 | * First idle cpu or the first cpu(busiest) in this sched group | ||
| 2322 | * is eligible for doing load balancing at this and above | ||
| 2323 | * domains. | ||
| 2324 | */ | ||
| 2325 | if (local_group && balance_cpu != this_cpu && balance) { | ||
| 2326 | *balance = 0; | ||
| 2327 | goto ret; | ||
| 2328 | } | ||
| 2329 | |||
| 2310 | total_load += avg_load; | 2330 | total_load += avg_load; |
| 2311 | total_pwr += group->cpu_power; | 2331 | total_pwr += group->cpu_power; |
| 2312 | 2332 | ||
| @@ -2498,8 +2518,8 @@ out_balanced: | |||
| 2498 | *imbalance = min_load_per_task; | 2518 | *imbalance = min_load_per_task; |
| 2499 | return group_min; | 2519 | return group_min; |
| 2500 | } | 2520 | } |
| 2501 | ret: | ||
| 2502 | #endif | 2521 | #endif |
| 2522 | ret: | ||
| 2503 | *imbalance = 0; | 2523 | *imbalance = 0; |
| 2504 | return NULL; | 2524 | return NULL; |
| 2505 | } | 2525 | } |
| @@ -2550,7 +2570,8 @@ static inline unsigned long minus_1_or_zero(unsigned long n) | |||
| 2550 | * tasks if there is an imbalance. | 2570 | * tasks if there is an imbalance. |
| 2551 | */ | 2571 | */ |
| 2552 | static int load_balance(int this_cpu, struct rq *this_rq, | 2572 | static int load_balance(int this_cpu, struct rq *this_rq, |
| 2553 | struct sched_domain *sd, enum idle_type idle) | 2573 | struct sched_domain *sd, enum idle_type idle, |
| 2574 | int *balance) | ||
| 2554 | { | 2575 | { |
| 2555 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 2576 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
| 2556 | struct sched_group *group; | 2577 | struct sched_group *group; |
| @@ -2573,7 +2594,13 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 2573 | 2594 | ||
| 2574 | redo: | 2595 | redo: |
| 2575 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 2596 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
| 2576 | &cpus); | 2597 | &cpus, balance); |
| 2598 | |||
| 2599 | if (*balance == 0) { | ||
| 2600 | schedstat_inc(sd, lb_stopbalance[idle]); | ||
| 2601 | goto out_balanced; | ||
| 2602 | } | ||
| 2603 | |||
| 2577 | if (!group) { | 2604 | if (!group) { |
| 2578 | schedstat_inc(sd, lb_nobusyg[idle]); | 2605 | schedstat_inc(sd, lb_nobusyg[idle]); |
| 2579 | goto out_balanced; | 2606 | goto out_balanced; |
| @@ -2715,7 +2742,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 2715 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2742 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
| 2716 | redo: | 2743 | redo: |
| 2717 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | 2744 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, |
| 2718 | &sd_idle, &cpus); | 2745 | &sd_idle, &cpus, NULL); |
| 2719 | if (!group) { | 2746 | if (!group) { |
| 2720 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2747 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
| 2721 | goto out_balanced; | 2748 | goto out_balanced; |
| @@ -2885,7 +2912,7 @@ static DEFINE_SPINLOCK(balancing); | |||
| 2885 | 2912 | ||
| 2886 | static void run_rebalance_domains(struct softirq_action *h) | 2913 | static void run_rebalance_domains(struct softirq_action *h) |
| 2887 | { | 2914 | { |
| 2888 | int this_cpu = smp_processor_id(); | 2915 | int this_cpu = smp_processor_id(), balance = 1; |
| 2889 | struct rq *this_rq = cpu_rq(this_cpu); | 2916 | struct rq *this_rq = cpu_rq(this_cpu); |
| 2890 | unsigned long interval; | 2917 | unsigned long interval; |
| 2891 | struct sched_domain *sd; | 2918 | struct sched_domain *sd; |
| @@ -2917,7 +2944,7 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 2917 | } | 2944 | } |
| 2918 | 2945 | ||
| 2919 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | 2946 | if (time_after_eq(jiffies, sd->last_balance + interval)) { |
| 2920 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2947 | if (load_balance(this_cpu, this_rq, sd, idle, &balance)) { |
| 2921 | /* | 2948 | /* |
| 2922 | * We've pulled tasks over so either we're no | 2949 | * We've pulled tasks over so either we're no |
| 2923 | * longer idle, or one of our SMT siblings is | 2950 | * longer idle, or one of our SMT siblings is |
| @@ -2932,6 +2959,14 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 2932 | out: | 2959 | out: |
| 2933 | if (time_after(next_balance, sd->last_balance + interval)) | 2960 | if (time_after(next_balance, sd->last_balance + interval)) |
| 2934 | next_balance = sd->last_balance + interval; | 2961 | next_balance = sd->last_balance + interval; |
| 2962 | |||
| 2963 | /* | ||
| 2964 | * Stop the load balance at this level. There is another | ||
| 2965 | * CPU in our sched group which is doing load balancing more | ||
| 2966 | * actively. | ||
| 2967 | */ | ||
| 2968 | if (!balance) | ||
| 2969 | break; | ||
| 2935 | } | 2970 | } |
| 2936 | this_rq->next_balance = next_balance; | 2971 | this_rq->next_balance = next_balance; |
| 2937 | } | 2972 | } |
