diff options
-rw-r--r-- | kernel/sched_fair.c | 135 |
1 files changed, 13 insertions, 122 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 10408323794e..de5ab1239e04 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -3010,125 +3010,6 @@ out: | |||
3010 | } | 3010 | } |
3011 | 3011 | ||
3012 | /* | 3012 | /* |
3013 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | ||
3014 | * tasks if there is an imbalance. | ||
3015 | * | ||
3016 | * Called from schedule when this_rq is about to become idle (CPU_NEWLY_IDLE). | ||
3017 | * this_rq is locked. | ||
3018 | */ | ||
3019 | static int | ||
3020 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | ||
3021 | { | ||
3022 | struct sched_group *group; | ||
3023 | struct rq *busiest = NULL; | ||
3024 | unsigned long imbalance; | ||
3025 | int ld_moved = 0; | ||
3026 | int sd_idle = 0; | ||
3027 | int all_pinned = 0; | ||
3028 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | ||
3029 | |||
3030 | cpumask_copy(cpus, cpu_active_mask); | ||
3031 | |||
3032 | /* | ||
3033 | * When power savings policy is enabled for the parent domain, idle | ||
3034 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
3035 | * let the state of idle sibling percolate up as IDLE, instead of | ||
3036 | * portraying it as CPU_NOT_IDLE. | ||
3037 | */ | ||
3038 | if (sd->flags & SD_SHARE_CPUPOWER && | ||
3039 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3040 | sd_idle = 1; | ||
3041 | |||
3042 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); | ||
3043 | redo: | ||
3044 | update_shares_locked(this_rq, sd); | ||
3045 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, | ||
3046 | &sd_idle, cpus, NULL); | ||
3047 | if (!group) { | ||
3048 | schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]); | ||
3049 | goto out_balanced; | ||
3050 | } | ||
3051 | |||
3052 | busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance, cpus); | ||
3053 | if (!busiest) { | ||
3054 | schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]); | ||
3055 | goto out_balanced; | ||
3056 | } | ||
3057 | |||
3058 | BUG_ON(busiest == this_rq); | ||
3059 | |||
3060 | schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); | ||
3061 | |||
3062 | ld_moved = 0; | ||
3063 | if (busiest->nr_running > 1) { | ||
3064 | /* Attempt to move tasks */ | ||
3065 | double_lock_balance(this_rq, busiest); | ||
3066 | /* this_rq->clock is already updated */ | ||
3067 | update_rq_clock(busiest); | ||
3068 | ld_moved = move_tasks(this_rq, this_cpu, busiest, | ||
3069 | imbalance, sd, CPU_NEWLY_IDLE, | ||
3070 | &all_pinned); | ||
3071 | double_unlock_balance(this_rq, busiest); | ||
3072 | |||
3073 | if (unlikely(all_pinned)) { | ||
3074 | cpumask_clear_cpu(cpu_of(busiest), cpus); | ||
3075 | if (!cpumask_empty(cpus)) | ||
3076 | goto redo; | ||
3077 | } | ||
3078 | } | ||
3079 | |||
3080 | if (!ld_moved) { | ||
3081 | int active_balance = 0; | ||
3082 | |||
3083 | schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); | ||
3084 | sd->nr_balance_failed++; | ||
3085 | |||
3086 | if (need_active_balance(sd, sd_idle, CPU_NEWLY_IDLE)) { | ||
3087 | double_lock_balance(this_rq, busiest); | ||
3088 | |||
3089 | /* | ||
3090 | * don't kick the migration_thread, if the curr | ||
3091 | * task on busiest cpu can't be moved to this_cpu | ||
3092 | */ | ||
3093 | if (!cpumask_test_cpu(this_cpu, | ||
3094 | &busiest->curr->cpus_allowed)) { | ||
3095 | double_unlock_balance(this_rq, busiest); | ||
3096 | all_pinned = 1; | ||
3097 | return ld_moved; | ||
3098 | } | ||
3099 | |||
3100 | if (!busiest->active_balance) { | ||
3101 | busiest->active_balance = 1; | ||
3102 | busiest->push_cpu = this_cpu; | ||
3103 | active_balance = 1; | ||
3104 | } | ||
3105 | |||
3106 | double_unlock_balance(this_rq, busiest); | ||
3107 | /* | ||
3108 | * Should not call ttwu while holding a rq->lock | ||
3109 | */ | ||
3110 | raw_spin_unlock(&this_rq->lock); | ||
3111 | if (active_balance) | ||
3112 | wake_up_process(busiest->migration_thread); | ||
3113 | raw_spin_lock(&this_rq->lock); | ||
3114 | } | ||
3115 | } else | ||
3116 | sd->nr_balance_failed = 0; | ||
3117 | |||
3118 | update_shares_locked(this_rq, sd); | ||
3119 | return ld_moved; | ||
3120 | |||
3121 | out_balanced: | ||
3122 | schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); | ||
3123 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
3124 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3125 | return -1; | ||
3126 | sd->nr_balance_failed = 0; | ||
3127 | |||
3128 | return 0; | ||
3129 | } | ||
3130 | |||
3131 | /* | ||
3132 | * idle_balance is called by schedule() if this_cpu is about to become | 3013 | * idle_balance is called by schedule() if this_cpu is about to become |
3133 | * idle. Attempts to pull tasks from other CPUs. | 3014 | * idle. Attempts to pull tasks from other CPUs. |
3134 | */ | 3015 | */ |
@@ -3143,16 +3024,23 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3143 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | 3024 | if (this_rq->avg_idle < sysctl_sched_migration_cost) |
3144 | return; | 3025 | return; |
3145 | 3026 | ||
3027 | /* | ||
3028 | * Drop the rq->lock, but keep IRQ/preempt disabled. | ||
3029 | */ | ||
3030 | raw_spin_unlock(&this_rq->lock); | ||
3031 | |||
3146 | for_each_domain(this_cpu, sd) { | 3032 | for_each_domain(this_cpu, sd) { |
3147 | unsigned long interval; | 3033 | unsigned long interval; |
3034 | int balance = 1; | ||
3148 | 3035 | ||
3149 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3036 | if (!(sd->flags & SD_LOAD_BALANCE)) |
3150 | continue; | 3037 | continue; |
3151 | 3038 | ||
3152 | if (sd->flags & SD_BALANCE_NEWIDLE) | 3039 | if (sd->flags & SD_BALANCE_NEWIDLE) { |
3153 | /* If we've pulled tasks over stop searching: */ | 3040 | /* If we've pulled tasks over stop searching: */ |
3154 | pulled_task = load_balance_newidle(this_cpu, this_rq, | 3041 | pulled_task = load_balance(this_cpu, this_rq, |
3155 | sd); | 3042 | sd, CPU_NEWLY_IDLE, &balance); |
3043 | } | ||
3156 | 3044 | ||
3157 | interval = msecs_to_jiffies(sd->balance_interval); | 3045 | interval = msecs_to_jiffies(sd->balance_interval); |
3158 | if (time_after(next_balance, sd->last_balance + interval)) | 3046 | if (time_after(next_balance, sd->last_balance + interval)) |
@@ -3162,6 +3050,9 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3162 | break; | 3050 | break; |
3163 | } | 3051 | } |
3164 | } | 3052 | } |
3053 | |||
3054 | raw_spin_lock(&this_rq->lock); | ||
3055 | |||
3165 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 3056 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
3166 | /* | 3057 | /* |
3167 | * We are going idle. next_balance may be set based on | 3058 | * We are going idle. next_balance may be set based on |