diff options
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/sched.c | 59 |
2 files changed, 48 insertions, 12 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index ea92e5c89089..72d6927d29ed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -707,6 +707,7 @@ struct sched_domain { | |||
707 | unsigned long lb_hot_gained[MAX_IDLE_TYPES]; | 707 | unsigned long lb_hot_gained[MAX_IDLE_TYPES]; |
708 | unsigned long lb_nobusyg[MAX_IDLE_TYPES]; | 708 | unsigned long lb_nobusyg[MAX_IDLE_TYPES]; |
709 | unsigned long lb_nobusyq[MAX_IDLE_TYPES]; | 709 | unsigned long lb_nobusyq[MAX_IDLE_TYPES]; |
710 | unsigned long lb_stopbalance[MAX_IDLE_TYPES]; | ||
710 | 711 | ||
711 | /* Active load balancing */ | 712 | /* Active load balancing */ |
712 | unsigned long alb_cnt; | 713 | unsigned long alb_cnt; |
diff --git a/kernel/sched.c b/kernel/sched.c index 15ce772a471a..4e453431c61a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -428,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
428 | * bump this up when changing the output format or the meaning of an existing | 428 | * bump this up when changing the output format or the meaning of an existing |
429 | * format, so that tools can adapt (or abort) | 429 | * format, so that tools can adapt (or abort) |
430 | */ | 430 | */ |
431 | #define SCHEDSTAT_VERSION 12 | 431 | #define SCHEDSTAT_VERSION 13 |
432 | 432 | ||
433 | static int show_schedstat(struct seq_file *seq, void *v) | 433 | static int show_schedstat(struct seq_file *seq, void *v) |
434 | { | 434 | { |
@@ -466,7 +466,7 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
466 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | 466 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); |
467 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; | 467 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; |
468 | itype++) { | 468 | itype++) { |
469 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", | 469 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu", |
470 | sd->lb_cnt[itype], | 470 | sd->lb_cnt[itype], |
471 | sd->lb_balanced[itype], | 471 | sd->lb_balanced[itype], |
472 | sd->lb_failed[itype], | 472 | sd->lb_failed[itype], |
@@ -474,7 +474,8 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
474 | sd->lb_gained[itype], | 474 | sd->lb_gained[itype], |
475 | sd->lb_hot_gained[itype], | 475 | sd->lb_hot_gained[itype], |
476 | sd->lb_nobusyq[itype], | 476 | sd->lb_nobusyq[itype], |
477 | sd->lb_nobusyg[itype]); | 477 | sd->lb_nobusyg[itype], |
478 | sd->lb_stopbalance[itype]); | ||
478 | } | 479 | } |
479 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | 480 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", |
480 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | 481 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, |
@@ -2249,7 +2250,7 @@ out: | |||
2249 | static struct sched_group * | 2250 | static struct sched_group * |
2250 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2251 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
2251 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, | 2252 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, |
2252 | cpumask_t *cpus) | 2253 | cpumask_t *cpus, int *balance) |
2253 | { | 2254 | { |
2254 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2255 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
2255 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2256 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
@@ -2278,10 +2279,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2278 | unsigned long load, group_capacity; | 2279 | unsigned long load, group_capacity; |
2279 | int local_group; | 2280 | int local_group; |
2280 | int i; | 2281 | int i; |
2282 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
2281 | unsigned long sum_nr_running, sum_weighted_load; | 2283 | unsigned long sum_nr_running, sum_weighted_load; |
2282 | 2284 | ||
2283 | local_group = cpu_isset(this_cpu, group->cpumask); | 2285 | local_group = cpu_isset(this_cpu, group->cpumask); |
2284 | 2286 | ||
2287 | if (local_group) | ||
2288 | balance_cpu = first_cpu(group->cpumask); | ||
2289 | |||
2285 | /* Tally up the load of all CPUs in the group */ | 2290 | /* Tally up the load of all CPUs in the group */ |
2286 | sum_weighted_load = sum_nr_running = avg_load = 0; | 2291 | sum_weighted_load = sum_nr_running = avg_load = 0; |
2287 | 2292 | ||
@@ -2297,9 +2302,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2297 | *sd_idle = 0; | 2302 | *sd_idle = 0; |
2298 | 2303 | ||
2299 | /* Bias balancing toward cpus of our domain */ | 2304 | /* Bias balancing toward cpus of our domain */ |
2300 | if (local_group) | 2305 | if (local_group) { |
2306 | if (idle_cpu(i) && !first_idle_cpu) { | ||
2307 | first_idle_cpu = 1; | ||
2308 | balance_cpu = i; | ||
2309 | } | ||
2310 | |||
2301 | load = target_load(i, load_idx); | 2311 | load = target_load(i, load_idx); |
2302 | else | 2312 | } else |
2303 | load = source_load(i, load_idx); | 2313 | load = source_load(i, load_idx); |
2304 | 2314 | ||
2305 | avg_load += load; | 2315 | avg_load += load; |
@@ -2307,6 +2317,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2307 | sum_weighted_load += rq->raw_weighted_load; | 2317 | sum_weighted_load += rq->raw_weighted_load; |
2308 | } | 2318 | } |
2309 | 2319 | ||
2320 | /* | ||
2321 | * First idle cpu or the first cpu(busiest) in this sched group | ||
2322 | * is eligible for doing load balancing at this and above | ||
2323 | * domains. | ||
2324 | */ | ||
2325 | if (local_group && balance_cpu != this_cpu && balance) { | ||
2326 | *balance = 0; | ||
2327 | goto ret; | ||
2328 | } | ||
2329 | |||
2310 | total_load += avg_load; | 2330 | total_load += avg_load; |
2311 | total_pwr += group->cpu_power; | 2331 | total_pwr += group->cpu_power; |
2312 | 2332 | ||
@@ -2498,8 +2518,8 @@ out_balanced: | |||
2498 | *imbalance = min_load_per_task; | 2518 | *imbalance = min_load_per_task; |
2499 | return group_min; | 2519 | return group_min; |
2500 | } | 2520 | } |
2501 | ret: | ||
2502 | #endif | 2521 | #endif |
2522 | ret: | ||
2503 | *imbalance = 0; | 2523 | *imbalance = 0; |
2504 | return NULL; | 2524 | return NULL; |
2505 | } | 2525 | } |
@@ -2550,7 +2570,8 @@ static inline unsigned long minus_1_or_zero(unsigned long n) | |||
2550 | * tasks if there is an imbalance. | 2570 | * tasks if there is an imbalance. |
2551 | */ | 2571 | */ |
2552 | static int load_balance(int this_cpu, struct rq *this_rq, | 2572 | static int load_balance(int this_cpu, struct rq *this_rq, |
2553 | struct sched_domain *sd, enum idle_type idle) | 2573 | struct sched_domain *sd, enum idle_type idle, |
2574 | int *balance) | ||
2554 | { | 2575 | { |
2555 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 2576 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
2556 | struct sched_group *group; | 2577 | struct sched_group *group; |
@@ -2573,7 +2594,13 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2573 | 2594 | ||
2574 | redo: | 2595 | redo: |
2575 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 2596 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
2576 | &cpus); | 2597 | &cpus, balance); |
2598 | |||
2599 | if (*balance == 0) { | ||
2600 | schedstat_inc(sd, lb_stopbalance[idle]); | ||
2601 | goto out_balanced; | ||
2602 | } | ||
2603 | |||
2577 | if (!group) { | 2604 | if (!group) { |
2578 | schedstat_inc(sd, lb_nobusyg[idle]); | 2605 | schedstat_inc(sd, lb_nobusyg[idle]); |
2579 | goto out_balanced; | 2606 | goto out_balanced; |
@@ -2715,7 +2742,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2715 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2742 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
2716 | redo: | 2743 | redo: |
2717 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | 2744 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, |
2718 | &sd_idle, &cpus); | 2745 | &sd_idle, &cpus, NULL); |
2719 | if (!group) { | 2746 | if (!group) { |
2720 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2747 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
2721 | goto out_balanced; | 2748 | goto out_balanced; |
@@ -2885,7 +2912,7 @@ static DEFINE_SPINLOCK(balancing); | |||
2885 | 2912 | ||
2886 | static void run_rebalance_domains(struct softirq_action *h) | 2913 | static void run_rebalance_domains(struct softirq_action *h) |
2887 | { | 2914 | { |
2888 | int this_cpu = smp_processor_id(); | 2915 | int this_cpu = smp_processor_id(), balance = 1; |
2889 | struct rq *this_rq = cpu_rq(this_cpu); | 2916 | struct rq *this_rq = cpu_rq(this_cpu); |
2890 | unsigned long interval; | 2917 | unsigned long interval; |
2891 | struct sched_domain *sd; | 2918 | struct sched_domain *sd; |
@@ -2917,7 +2944,7 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
2917 | } | 2944 | } |
2918 | 2945 | ||
2919 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | 2946 | if (time_after_eq(jiffies, sd->last_balance + interval)) { |
2920 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2947 | if (load_balance(this_cpu, this_rq, sd, idle, &balance)) { |
2921 | /* | 2948 | /* |
2922 | * We've pulled tasks over so either we're no | 2949 | * We've pulled tasks over so either we're no |
2923 | * longer idle, or one of our SMT siblings is | 2950 | * longer idle, or one of our SMT siblings is |
@@ -2932,6 +2959,14 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
2932 | out: | 2959 | out: |
2933 | if (time_after(next_balance, sd->last_balance + interval)) | 2960 | if (time_after(next_balance, sd->last_balance + interval)) |
2934 | next_balance = sd->last_balance + interval; | 2961 | next_balance = sd->last_balance + interval; |
2962 | |||
2963 | /* | ||
2964 | * Stop the load balance at this level. There is another | ||
2965 | * CPU in our sched group which is doing load balancing more | ||
2966 | * actively. | ||
2967 | */ | ||
2968 | if (!balance) | ||
2969 | break; | ||
2935 | } | 2970 | } |
2936 | this_rq->next_balance = next_balance; | 2971 | this_rq->next_balance = next_balance; |
2937 | } | 2972 | } |