diff options
| -rw-r--r-- | kernel/sched_fair.c | 76 |
1 files changed, 43 insertions, 33 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ff7692ccda89..3e1fd96c6cf9 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -2097,6 +2097,7 @@ struct sd_lb_stats { | |||
| 2097 | unsigned long max_load; | 2097 | unsigned long max_load; |
| 2098 | unsigned long busiest_load_per_task; | 2098 | unsigned long busiest_load_per_task; |
| 2099 | unsigned long busiest_nr_running; | 2099 | unsigned long busiest_nr_running; |
| 2100 | unsigned long busiest_group_capacity; | ||
| 2100 | 2101 | ||
| 2101 | int group_imb; /* Is there imbalance in this sd */ | 2102 | int group_imb; /* Is there imbalance in this sd */ |
| 2102 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2103 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
| @@ -2416,14 +2417,12 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2416 | unsigned long load, max_cpu_load, min_cpu_load; | 2417 | unsigned long load, max_cpu_load, min_cpu_load; |
| 2417 | int i; | 2418 | int i; |
| 2418 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | 2419 | unsigned int balance_cpu = -1, first_idle_cpu = 0; |
| 2419 | unsigned long sum_avg_load_per_task; | 2420 | unsigned long avg_load_per_task = 0; |
| 2420 | unsigned long avg_load_per_task; | ||
| 2421 | 2421 | ||
| 2422 | if (local_group) | 2422 | if (local_group) |
| 2423 | balance_cpu = group_first_cpu(group); | 2423 | balance_cpu = group_first_cpu(group); |
| 2424 | 2424 | ||
| 2425 | /* Tally up the load of all CPUs in the group */ | 2425 | /* Tally up the load of all CPUs in the group */ |
| 2426 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
| 2427 | max_cpu_load = 0; | 2426 | max_cpu_load = 0; |
| 2428 | min_cpu_load = ~0UL; | 2427 | min_cpu_load = ~0UL; |
| 2429 | 2428 | ||
| @@ -2453,7 +2452,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2453 | sgs->sum_nr_running += rq->nr_running; | 2452 | sgs->sum_nr_running += rq->nr_running; |
| 2454 | sgs->sum_weighted_load += weighted_cpuload(i); | 2453 | sgs->sum_weighted_load += weighted_cpuload(i); |
| 2455 | 2454 | ||
| 2456 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
| 2457 | } | 2455 | } |
| 2458 | 2456 | ||
| 2459 | /* | 2457 | /* |
| @@ -2473,7 +2471,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2473 | /* Adjust by relative CPU power of the group */ | 2471 | /* Adjust by relative CPU power of the group */ |
| 2474 | sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; | 2472 | sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; |
| 2475 | 2473 | ||
| 2476 | |||
| 2477 | /* | 2474 | /* |
| 2478 | * Consider the group unbalanced when the imbalance is larger | 2475 | * Consider the group unbalanced when the imbalance is larger |
| 2479 | * than the average weight of two tasks. | 2476 | * than the average weight of two tasks. |
| @@ -2483,8 +2480,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2483 | * normalized nr_running number somewhere that negates | 2480 | * normalized nr_running number somewhere that negates |
| 2484 | * the hierarchy? | 2481 | * the hierarchy? |
| 2485 | */ | 2482 | */ |
| 2486 | avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) / | 2483 | if (sgs->sum_nr_running) |
| 2487 | group->cpu_power; | 2484 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; |
| 2488 | 2485 | ||
| 2489 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | 2486 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) |
| 2490 | sgs->group_imb = 1; | 2487 | sgs->group_imb = 1; |
| @@ -2553,6 +2550,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2553 | sds->max_load = sgs.avg_load; | 2550 | sds->max_load = sgs.avg_load; |
| 2554 | sds->busiest = group; | 2551 | sds->busiest = group; |
| 2555 | sds->busiest_nr_running = sgs.sum_nr_running; | 2552 | sds->busiest_nr_running = sgs.sum_nr_running; |
| 2553 | sds->busiest_group_capacity = sgs.group_capacity; | ||
| 2556 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2554 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
| 2557 | sds->group_imb = sgs.group_imb; | 2555 | sds->group_imb = sgs.group_imb; |
| 2558 | } | 2556 | } |
| @@ -2575,6 +2573,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
| 2575 | { | 2573 | { |
| 2576 | unsigned long tmp, pwr_now = 0, pwr_move = 0; | 2574 | unsigned long tmp, pwr_now = 0, pwr_move = 0; |
| 2577 | unsigned int imbn = 2; | 2575 | unsigned int imbn = 2; |
| 2576 | unsigned long scaled_busy_load_per_task; | ||
| 2578 | 2577 | ||
| 2579 | if (sds->this_nr_running) { | 2578 | if (sds->this_nr_running) { |
| 2580 | sds->this_load_per_task /= sds->this_nr_running; | 2579 | sds->this_load_per_task /= sds->this_nr_running; |
| @@ -2585,8 +2584,12 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
| 2585 | sds->this_load_per_task = | 2584 | sds->this_load_per_task = |
| 2586 | cpu_avg_load_per_task(this_cpu); | 2585 | cpu_avg_load_per_task(this_cpu); |
| 2587 | 2586 | ||
| 2588 | if (sds->max_load - sds->this_load + sds->busiest_load_per_task >= | 2587 | scaled_busy_load_per_task = sds->busiest_load_per_task |
| 2589 | sds->busiest_load_per_task * imbn) { | 2588 | * SCHED_LOAD_SCALE; |
| 2589 | scaled_busy_load_per_task /= sds->busiest->cpu_power; | ||
| 2590 | |||
| 2591 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= | ||
| 2592 | (scaled_busy_load_per_task * imbn)) { | ||
| 2590 | *imbalance = sds->busiest_load_per_task; | 2593 | *imbalance = sds->busiest_load_per_task; |
| 2591 | return; | 2594 | return; |
| 2592 | } | 2595 | } |
| @@ -2637,7 +2640,14 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
| 2637 | static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | 2640 | static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, |
| 2638 | unsigned long *imbalance) | 2641 | unsigned long *imbalance) |
| 2639 | { | 2642 | { |
| 2640 | unsigned long max_pull; | 2643 | unsigned long max_pull, load_above_capacity = ~0UL; |
| 2644 | |||
| 2645 | sds->busiest_load_per_task /= sds->busiest_nr_running; | ||
| 2646 | if (sds->group_imb) { | ||
| 2647 | sds->busiest_load_per_task = | ||
| 2648 | min(sds->busiest_load_per_task, sds->avg_load); | ||
| 2649 | } | ||
| 2650 | |||
| 2641 | /* | 2651 | /* |
| 2642 | * In the presence of smp nice balancing, certain scenarios can have | 2652 | * In the presence of smp nice balancing, certain scenarios can have |
| 2643 | * max load less than avg load(as we skip the groups at or below | 2653 | * max load less than avg load(as we skip the groups at or below |
| @@ -2648,9 +2658,29 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 2648 | return fix_small_imbalance(sds, this_cpu, imbalance); | 2658 | return fix_small_imbalance(sds, this_cpu, imbalance); |
| 2649 | } | 2659 | } |
| 2650 | 2660 | ||
| 2651 | /* Don't want to pull so many tasks that a group would go idle */ | 2661 | if (!sds->group_imb) { |
| 2652 | max_pull = min(sds->max_load - sds->avg_load, | 2662 | /* |
| 2653 | sds->max_load - sds->busiest_load_per_task); | 2663 | * Don't want to pull so many tasks that a group would go idle. |
| 2664 | */ | ||
| 2665 | load_above_capacity = (sds->busiest_nr_running - | ||
| 2666 | sds->busiest_group_capacity); | ||
| 2667 | |||
| 2668 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_LOAD_SCALE); | ||
| 2669 | |||
| 2670 | load_above_capacity /= sds->busiest->cpu_power; | ||
| 2671 | } | ||
| 2672 | |||
| 2673 | /* | ||
| 2674 | * We're trying to get all the cpus to the average_load, so we don't | ||
| 2675 | * want to push ourselves above the average load, nor do we wish to | ||
| 2676 | * reduce the max loaded cpu below the average load. At the same time, | ||
| 2677 | * we also don't want to reduce the group load below the group capacity | ||
| 2678 | * (so that we can implement power-savings policies etc). Thus we look | ||
| 2679 | * for the minimum possible imbalance. | ||
| 2680 | * Be careful of negative numbers as they'll appear as very large values | ||
| 2681 | * with unsigned longs. | ||
| 2682 | */ | ||
| 2683 | max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); | ||
| 2654 | 2684 | ||
| 2655 | /* How much load to actually move to equalise the imbalance */ | 2685 | /* How much load to actually move to equalise the imbalance */ |
| 2656 | *imbalance = min(max_pull * sds->busiest->cpu_power, | 2686 | *imbalance = min(max_pull * sds->busiest->cpu_power, |
| @@ -2718,7 +2748,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2718 | * 4) This group is more busy than the avg busieness at this | 2748 | * 4) This group is more busy than the avg busieness at this |
| 2719 | * sched_domain. | 2749 | * sched_domain. |
| 2720 | * 5) The imbalance is within the specified limit. | 2750 | * 5) The imbalance is within the specified limit. |
| 2721 | * 6) Any rebalance would lead to ping-pong | ||
| 2722 | */ | 2751 | */ |
| 2723 | if (!(*balance)) | 2752 | if (!(*balance)) |
| 2724 | goto ret; | 2753 | goto ret; |
| @@ -2737,25 +2766,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2737 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2766 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) |
| 2738 | goto out_balanced; | 2767 | goto out_balanced; |
| 2739 | 2768 | ||
| 2740 | sds.busiest_load_per_task /= sds.busiest_nr_running; | ||
| 2741 | if (sds.group_imb) | ||
| 2742 | sds.busiest_load_per_task = | ||
| 2743 | min(sds.busiest_load_per_task, sds.avg_load); | ||
| 2744 | |||
| 2745 | /* | ||
| 2746 | * We're trying to get all the cpus to the average_load, so we don't | ||
| 2747 | * want to push ourselves above the average load, nor do we wish to | ||
| 2748 | * reduce the max loaded cpu below the average load, as either of these | ||
| 2749 | * actions would just result in more rebalancing later, and ping-pong | ||
| 2750 | * tasks around. Thus we look for the minimum possible imbalance. | ||
| 2751 | * Negative imbalances (*we* are more loaded than anyone else) will | ||
| 2752 | * be counted as no imbalance for these purposes -- we can't fix that | ||
| 2753 | * by pulling tasks to us. Be careful of negative numbers as they'll | ||
| 2754 | * appear as very large values with unsigned longs. | ||
| 2755 | */ | ||
| 2756 | if (sds.max_load <= sds.busiest_load_per_task) | ||
| 2757 | goto out_balanced; | ||
| 2758 | |||
| 2759 | /* Looks like there is an imbalance. Compute it */ | 2769 | /* Looks like there is an imbalance. Compute it */ |
| 2760 | calculate_imbalance(&sds, this_cpu, imbalance); | 2770 | calculate_imbalance(&sds, this_cpu, imbalance); |
| 2761 | return sds.busiest; | 2771 | return sds.busiest; |
