diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 468 |
1 files changed, 298 insertions, 170 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index aa7f84121016..4e777b47eeda 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
384 | 384 | ||
385 | #ifdef CONFIG_SCHED_DEBUG | 385 | #ifdef CONFIG_SCHED_DEBUG |
386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 386 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
387 | struct file *filp, void __user *buffer, size_t *lenp, | 387 | void __user *buffer, size_t *lenp, |
388 | loff_t *ppos) | 388 | loff_t *ppos) |
389 | { | 389 | { |
390 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 390 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
391 | 391 | ||
392 | if (ret || !write) | 392 | if (ret || !write) |
393 | return ret; | 393 | return ret; |
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
513 | if (entity_is_task(curr)) { | 513 | if (entity_is_task(curr)) { |
514 | struct task_struct *curtask = task_of(curr); | 514 | struct task_struct *curtask = task_of(curr); |
515 | 515 | ||
516 | trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); | ||
516 | cpuacct_charge(curtask, delta_exec); | 517 | cpuacct_charge(curtask, delta_exec); |
517 | account_group_exec_runtime(curtask, delta_exec); | 518 | account_group_exec_runtime(curtask, delta_exec); |
518 | } | 519 | } |
@@ -709,24 +710,28 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
709 | if (initial && sched_feat(START_DEBIT)) | 710 | if (initial && sched_feat(START_DEBIT)) |
710 | vruntime += sched_vslice(cfs_rq, se); | 711 | vruntime += sched_vslice(cfs_rq, se); |
711 | 712 | ||
712 | if (!initial) { | 713 | /* sleeps up to a single latency don't count. */ |
713 | /* sleeps upto a single latency don't count. */ | 714 | if (!initial && sched_feat(FAIR_SLEEPERS)) { |
714 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 715 | unsigned long thresh = sysctl_sched_latency; |
715 | unsigned long thresh = sysctl_sched_latency; | ||
716 | 716 | ||
717 | /* | 717 | /* |
718 | * Convert the sleeper threshold into virtual time. | 718 | * Convert the sleeper threshold into virtual time. |
719 | * SCHED_IDLE is a special sub-class. We care about | 719 | * SCHED_IDLE is a special sub-class. We care about |
720 | * fairness only relative to other SCHED_IDLE tasks, | 720 | * fairness only relative to other SCHED_IDLE tasks, |
721 | * all of which have the same weight. | 721 | * all of which have the same weight. |
722 | */ | 722 | */ |
723 | if (sched_feat(NORMALIZED_SLEEPER) && | 723 | if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) || |
724 | (!entity_is_task(se) || | 724 | task_of(se)->policy != SCHED_IDLE)) |
725 | task_of(se)->policy != SCHED_IDLE)) | 725 | thresh = calc_delta_fair(thresh, se); |
726 | thresh = calc_delta_fair(thresh, se); | ||
727 | 726 | ||
728 | vruntime -= thresh; | 727 | /* |
729 | } | 728 | * Halve their sleep time's effect, to allow |
729 | * for a gentler effect of sleepers: | ||
730 | */ | ||
731 | if (sched_feat(GENTLE_FAIR_SLEEPERS)) | ||
732 | thresh >>= 1; | ||
733 | |||
734 | vruntime -= thresh; | ||
730 | } | 735 | } |
731 | 736 | ||
732 | /* ensure we never gain time by being placed backwards. */ | 737 | /* ensure we never gain time by being placed backwards. */ |
@@ -757,10 +762,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
757 | 762 | ||
758 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 763 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
759 | { | 764 | { |
760 | if (cfs_rq->last == se) | 765 | if (!se || cfs_rq->last == se) |
761 | cfs_rq->last = NULL; | 766 | cfs_rq->last = NULL; |
762 | 767 | ||
763 | if (cfs_rq->next == se) | 768 | if (!se || cfs_rq->next == se) |
764 | cfs_rq->next = NULL; | 769 | cfs_rq->next = NULL; |
765 | } | 770 | } |
766 | 771 | ||
@@ -1062,83 +1067,6 @@ static void yield_task_fair(struct rq *rq) | |||
1062 | se->vruntime = rightmost->vruntime + 1; | 1067 | se->vruntime = rightmost->vruntime + 1; |
1063 | } | 1068 | } |
1064 | 1069 | ||
1065 | /* | ||
1066 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
1067 | * not idle and an idle cpu is available. The span of cpus to | ||
1068 | * search starts with cpus closest then further out as needed, | ||
1069 | * so we always favor a closer, idle cpu. | ||
1070 | * Domains may include CPUs that are not usable for migration, | ||
1071 | * hence we need to mask them out (rq->rd->online) | ||
1072 | * | ||
1073 | * Returns the CPU we should wake onto. | ||
1074 | */ | ||
1075 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
1076 | |||
1077 | #define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) | ||
1078 | |||
1079 | static int wake_idle(int cpu, struct task_struct *p) | ||
1080 | { | ||
1081 | struct sched_domain *sd; | ||
1082 | int i; | ||
1083 | unsigned int chosen_wakeup_cpu; | ||
1084 | int this_cpu; | ||
1085 | struct rq *task_rq = task_rq(p); | ||
1086 | |||
1087 | /* | ||
1088 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu | ||
1089 | * are idle and this is not a kernel thread and this task's affinity | ||
1090 | * allows it to be moved to preferred cpu, then just move! | ||
1091 | */ | ||
1092 | |||
1093 | this_cpu = smp_processor_id(); | ||
1094 | chosen_wakeup_cpu = | ||
1095 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; | ||
1096 | |||
1097 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && | ||
1098 | idle_cpu(cpu) && idle_cpu(this_cpu) && | ||
1099 | p->mm && !(p->flags & PF_KTHREAD) && | ||
1100 | cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) | ||
1101 | return chosen_wakeup_cpu; | ||
1102 | |||
1103 | /* | ||
1104 | * If it is idle, then it is the best cpu to run this task. | ||
1105 | * | ||
1106 | * This cpu is also the best, if it has more than one task already. | ||
1107 | * Siblings must be also busy(in most cases) as they didn't already | ||
1108 | * pickup the extra load from this cpu and hence we need not check | ||
1109 | * sibling runqueue info. This will avoid the checks and cache miss | ||
1110 | * penalities associated with that. | ||
1111 | */ | ||
1112 | if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) | ||
1113 | return cpu; | ||
1114 | |||
1115 | for_each_domain(cpu, sd) { | ||
1116 | if ((sd->flags & SD_WAKE_IDLE) | ||
1117 | || ((sd->flags & SD_WAKE_IDLE_FAR) | ||
1118 | && !task_hot(p, task_rq->clock, sd))) { | ||
1119 | for_each_cpu_and(i, sched_domain_span(sd), | ||
1120 | &p->cpus_allowed) { | ||
1121 | if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { | ||
1122 | if (i != task_cpu(p)) { | ||
1123 | schedstat_inc(p, | ||
1124 | se.nr_wakeups_idle); | ||
1125 | } | ||
1126 | return i; | ||
1127 | } | ||
1128 | } | ||
1129 | } else { | ||
1130 | break; | ||
1131 | } | ||
1132 | } | ||
1133 | return cpu; | ||
1134 | } | ||
1135 | #else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ | ||
1136 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
1137 | { | ||
1138 | return cpu; | ||
1139 | } | ||
1140 | #endif | ||
1141 | |||
1142 | #ifdef CONFIG_SMP | 1070 | #ifdef CONFIG_SMP |
1143 | 1071 | ||
1144 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1072 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1225,25 +1153,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
1225 | 1153 | ||
1226 | #endif | 1154 | #endif |
1227 | 1155 | ||
1228 | static int | 1156 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
1229 | wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | ||
1230 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
1231 | int idx, unsigned long load, unsigned long this_load, | ||
1232 | unsigned int imbalance) | ||
1233 | { | 1157 | { |
1234 | struct task_struct *curr = this_rq->curr; | 1158 | struct task_struct *curr = current; |
1235 | struct task_group *tg; | 1159 | unsigned long this_load, load; |
1236 | unsigned long tl = this_load; | 1160 | int idx, this_cpu, prev_cpu; |
1237 | unsigned long tl_per_task; | 1161 | unsigned long tl_per_task; |
1162 | unsigned int imbalance; | ||
1163 | struct task_group *tg; | ||
1238 | unsigned long weight; | 1164 | unsigned long weight; |
1239 | int balanced; | 1165 | int balanced; |
1240 | 1166 | ||
1241 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1167 | idx = sd->wake_idx; |
1242 | return 0; | 1168 | this_cpu = smp_processor_id(); |
1169 | prev_cpu = task_cpu(p); | ||
1170 | load = source_load(prev_cpu, idx); | ||
1171 | this_load = target_load(this_cpu, idx); | ||
1243 | 1172 | ||
1244 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || | 1173 | if (sync) { |
1245 | p->se.avg_overlap > sysctl_sched_migration_cost)) | 1174 | if (sched_feat(SYNC_LESS) && |
1246 | sync = 0; | 1175 | (curr->se.avg_overlap > sysctl_sched_migration_cost || |
1176 | p->se.avg_overlap > sysctl_sched_migration_cost)) | ||
1177 | sync = 0; | ||
1178 | } else { | ||
1179 | if (sched_feat(SYNC_MORE) && | ||
1180 | (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
1181 | p->se.avg_overlap < sysctl_sched_migration_cost)) | ||
1182 | sync = 1; | ||
1183 | } | ||
1247 | 1184 | ||
1248 | /* | 1185 | /* |
1249 | * If sync wakeup then subtract the (maximum possible) | 1186 | * If sync wakeup then subtract the (maximum possible) |
@@ -1254,24 +1191,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1254 | tg = task_group(current); | 1191 | tg = task_group(current); |
1255 | weight = current->se.load.weight; | 1192 | weight = current->se.load.weight; |
1256 | 1193 | ||
1257 | tl += effective_load(tg, this_cpu, -weight, -weight); | 1194 | this_load += effective_load(tg, this_cpu, -weight, -weight); |
1258 | load += effective_load(tg, prev_cpu, 0, -weight); | 1195 | load += effective_load(tg, prev_cpu, 0, -weight); |
1259 | } | 1196 | } |
1260 | 1197 | ||
1261 | tg = task_group(p); | 1198 | tg = task_group(p); |
1262 | weight = p->se.load.weight; | 1199 | weight = p->se.load.weight; |
1263 | 1200 | ||
1201 | imbalance = 100 + (sd->imbalance_pct - 100) / 2; | ||
1202 | |||
1264 | /* | 1203 | /* |
1265 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | 1204 | * In low-load situations, where prev_cpu is idle and this_cpu is idle |
1266 | * due to the sync cause above having dropped tl to 0, we'll always have | 1205 | * due to the sync cause above having dropped this_load to 0, we'll |
1267 | * an imbalance, but there's really nothing you can do about that, so | 1206 | * always have an imbalance, but there's really nothing you can do |
1268 | * that's good too. | 1207 | * about that, so that's good too. |
1269 | * | 1208 | * |
1270 | * Otherwise check if either cpus are near enough in load to allow this | 1209 | * Otherwise check if either cpus are near enough in load to allow this |
1271 | * task to be woken on this_cpu. | 1210 | * task to be woken on this_cpu. |
1272 | */ | 1211 | */ |
1273 | balanced = !tl || | 1212 | balanced = !this_load || |
1274 | 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | 1213 | 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= |
1275 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1214 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); |
1276 | 1215 | ||
1277 | /* | 1216 | /* |
@@ -1285,14 +1224,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1285 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1224 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1286 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1225 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1287 | 1226 | ||
1288 | if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= | 1227 | if (balanced || |
1289 | tl_per_task)) { | 1228 | (this_load <= load && |
1229 | this_load + target_load(prev_cpu, idx) <= tl_per_task)) { | ||
1290 | /* | 1230 | /* |
1291 | * This domain has SD_WAKE_AFFINE and | 1231 | * This domain has SD_WAKE_AFFINE and |
1292 | * p is cache cold in this domain, and | 1232 | * p is cache cold in this domain, and |
1293 | * there is no bad imbalance. | 1233 | * there is no bad imbalance. |
1294 | */ | 1234 | */ |
1295 | schedstat_inc(this_sd, ttwu_move_affine); | 1235 | schedstat_inc(sd, ttwu_move_affine); |
1296 | schedstat_inc(p, se.nr_wakeups_affine); | 1236 | schedstat_inc(p, se.nr_wakeups_affine); |
1297 | 1237 | ||
1298 | return 1; | 1238 | return 1; |
@@ -1300,65 +1240,216 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1300 | return 0; | 1240 | return 0; |
1301 | } | 1241 | } |
1302 | 1242 | ||
1303 | static int select_task_rq_fair(struct task_struct *p, int sync) | 1243 | /* |
1244 | * find_idlest_group finds and returns the least busy CPU group within the | ||
1245 | * domain. | ||
1246 | */ | ||
1247 | static struct sched_group * | ||
1248 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, | ||
1249 | int this_cpu, int load_idx) | ||
1304 | { | 1250 | { |
1305 | struct sched_domain *sd, *this_sd = NULL; | 1251 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; |
1306 | int prev_cpu, this_cpu, new_cpu; | 1252 | unsigned long min_load = ULONG_MAX, this_load = 0; |
1307 | unsigned long load, this_load; | 1253 | int imbalance = 100 + (sd->imbalance_pct-100)/2; |
1308 | struct rq *this_rq; | ||
1309 | unsigned int imbalance; | ||
1310 | int idx; | ||
1311 | 1254 | ||
1312 | prev_cpu = task_cpu(p); | 1255 | do { |
1313 | this_cpu = smp_processor_id(); | 1256 | unsigned long load, avg_load; |
1314 | this_rq = cpu_rq(this_cpu); | 1257 | int local_group; |
1315 | new_cpu = prev_cpu; | 1258 | int i; |
1316 | 1259 | ||
1317 | /* | 1260 | /* Skip over this group if it has no CPUs allowed */ |
1318 | * 'this_sd' is the first domain that both | 1261 | if (!cpumask_intersects(sched_group_cpus(group), |
1319 | * this_cpu and prev_cpu are present in: | 1262 | &p->cpus_allowed)) |
1320 | */ | 1263 | continue; |
1321 | for_each_domain(this_cpu, sd) { | 1264 | |
1322 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { | 1265 | local_group = cpumask_test_cpu(this_cpu, |
1323 | this_sd = sd; | 1266 | sched_group_cpus(group)); |
1324 | break; | 1267 | |
1268 | /* Tally up the load of all CPUs in the group */ | ||
1269 | avg_load = 0; | ||
1270 | |||
1271 | for_each_cpu(i, sched_group_cpus(group)) { | ||
1272 | /* Bias balancing toward cpus of our domain */ | ||
1273 | if (local_group) | ||
1274 | load = source_load(i, load_idx); | ||
1275 | else | ||
1276 | load = target_load(i, load_idx); | ||
1277 | |||
1278 | avg_load += load; | ||
1279 | } | ||
1280 | |||
1281 | /* Adjust by relative CPU power of the group */ | ||
1282 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
1283 | |||
1284 | if (local_group) { | ||
1285 | this_load = avg_load; | ||
1286 | this = group; | ||
1287 | } else if (avg_load < min_load) { | ||
1288 | min_load = avg_load; | ||
1289 | idlest = group; | ||
1290 | } | ||
1291 | } while (group = group->next, group != sd->groups); | ||
1292 | |||
1293 | if (!idlest || 100*this_load < imbalance*min_load) | ||
1294 | return NULL; | ||
1295 | return idlest; | ||
1296 | } | ||
1297 | |||
1298 | /* | ||
1299 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
1300 | */ | ||
1301 | static int | ||
1302 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
1303 | { | ||
1304 | unsigned long load, min_load = ULONG_MAX; | ||
1305 | int idlest = -1; | ||
1306 | int i; | ||
1307 | |||
1308 | /* Traverse only the allowed CPUs */ | ||
1309 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
1310 | load = weighted_cpuload(i); | ||
1311 | |||
1312 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
1313 | min_load = load; | ||
1314 | idlest = i; | ||
1325 | } | 1315 | } |
1326 | } | 1316 | } |
1327 | 1317 | ||
1328 | if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) | 1318 | return idlest; |
1329 | goto out; | 1319 | } |
1330 | 1320 | ||
1331 | /* | 1321 | /* |
1332 | * Check for affine wakeup and passive balancing possibilities. | 1322 | * sched_balance_self: balance the current task (running on cpu) in domains |
1333 | */ | 1323 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and |
1334 | if (!this_sd) | 1324 | * SD_BALANCE_EXEC. |
1325 | * | ||
1326 | * Balance, ie. select the least loaded group. | ||
1327 | * | ||
1328 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
1329 | * | ||
1330 | * preempt must be disabled. | ||
1331 | */ | ||
1332 | static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | ||
1333 | { | ||
1334 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | ||
1335 | int cpu = smp_processor_id(); | ||
1336 | int prev_cpu = task_cpu(p); | ||
1337 | int new_cpu = cpu; | ||
1338 | int want_affine = 0; | ||
1339 | int want_sd = 1; | ||
1340 | int sync = wake_flags & WF_SYNC; | ||
1341 | |||
1342 | if (sd_flag & SD_BALANCE_WAKE) { | ||
1343 | if (sched_feat(AFFINE_WAKEUPS) && | ||
1344 | cpumask_test_cpu(cpu, &p->cpus_allowed)) | ||
1345 | want_affine = 1; | ||
1346 | new_cpu = prev_cpu; | ||
1347 | } | ||
1348 | |||
1349 | rcu_read_lock(); | ||
1350 | for_each_domain(cpu, tmp) { | ||
1351 | /* | ||
1352 | * If power savings logic is enabled for a domain, see if we | ||
1353 | * are not overloaded, if so, don't balance wider. | ||
1354 | */ | ||
1355 | if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { | ||
1356 | unsigned long power = 0; | ||
1357 | unsigned long nr_running = 0; | ||
1358 | unsigned long capacity; | ||
1359 | int i; | ||
1360 | |||
1361 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
1362 | power += power_of(i); | ||
1363 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
1364 | } | ||
1365 | |||
1366 | capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); | ||
1367 | |||
1368 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
1369 | nr_running /= 2; | ||
1370 | |||
1371 | if (nr_running < capacity) | ||
1372 | want_sd = 0; | ||
1373 | } | ||
1374 | |||
1375 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | ||
1376 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | ||
1377 | |||
1378 | affine_sd = tmp; | ||
1379 | want_affine = 0; | ||
1380 | } | ||
1381 | |||
1382 | if (!want_sd && !want_affine) | ||
1383 | break; | ||
1384 | |||
1385 | if (!(tmp->flags & sd_flag)) | ||
1386 | continue; | ||
1387 | |||
1388 | if (want_sd) | ||
1389 | sd = tmp; | ||
1390 | } | ||
1391 | |||
1392 | if (sched_feat(LB_SHARES_UPDATE)) { | ||
1393 | /* | ||
1394 | * Pick the largest domain to update shares over | ||
1395 | */ | ||
1396 | tmp = sd; | ||
1397 | if (affine_sd && (!tmp || | ||
1398 | cpumask_weight(sched_domain_span(affine_sd)) > | ||
1399 | cpumask_weight(sched_domain_span(sd)))) | ||
1400 | tmp = affine_sd; | ||
1401 | |||
1402 | if (tmp) | ||
1403 | update_shares(tmp); | ||
1404 | } | ||
1405 | |||
1406 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | ||
1407 | new_cpu = cpu; | ||
1335 | goto out; | 1408 | goto out; |
1409 | } | ||
1336 | 1410 | ||
1337 | idx = this_sd->wake_idx; | 1411 | while (sd) { |
1412 | int load_idx = sd->forkexec_idx; | ||
1413 | struct sched_group *group; | ||
1414 | int weight; | ||
1338 | 1415 | ||
1339 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | 1416 | if (!(sd->flags & sd_flag)) { |
1417 | sd = sd->child; | ||
1418 | continue; | ||
1419 | } | ||
1340 | 1420 | ||
1341 | load = source_load(prev_cpu, idx); | 1421 | if (sd_flag & SD_BALANCE_WAKE) |
1342 | this_load = target_load(this_cpu, idx); | 1422 | load_idx = sd->wake_idx; |
1343 | 1423 | ||
1344 | if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, | 1424 | group = find_idlest_group(sd, p, cpu, load_idx); |
1345 | load, this_load, imbalance)) | 1425 | if (!group) { |
1346 | return this_cpu; | 1426 | sd = sd->child; |
1427 | continue; | ||
1428 | } | ||
1347 | 1429 | ||
1348 | /* | 1430 | new_cpu = find_idlest_cpu(group, p, cpu); |
1349 | * Start passive balancing when half the imbalance_pct | 1431 | if (new_cpu == -1 || new_cpu == cpu) { |
1350 | * limit is reached. | 1432 | /* Now try balancing at a lower domain level of cpu */ |
1351 | */ | 1433 | sd = sd->child; |
1352 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1434 | continue; |
1353 | if (imbalance*this_load <= 100*load) { | 1435 | } |
1354 | schedstat_inc(this_sd, ttwu_move_balance); | 1436 | |
1355 | schedstat_inc(p, se.nr_wakeups_passive); | 1437 | /* Now try balancing at a lower domain level of new_cpu */ |
1356 | return this_cpu; | 1438 | cpu = new_cpu; |
1439 | weight = cpumask_weight(sched_domain_span(sd)); | ||
1440 | sd = NULL; | ||
1441 | for_each_domain(cpu, tmp) { | ||
1442 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
1443 | break; | ||
1444 | if (tmp->flags & sd_flag) | ||
1445 | sd = tmp; | ||
1357 | } | 1446 | } |
1447 | /* while loop will break here if sd == NULL */ | ||
1358 | } | 1448 | } |
1359 | 1449 | ||
1360 | out: | 1450 | out: |
1361 | return wake_idle(new_cpu, p); | 1451 | rcu_read_unlock(); |
1452 | return new_cpu; | ||
1362 | } | 1453 | } |
1363 | #endif /* CONFIG_SMP */ | 1454 | #endif /* CONFIG_SMP */ |
1364 | 1455 | ||
@@ -1471,11 +1562,12 @@ static void set_next_buddy(struct sched_entity *se) | |||
1471 | /* | 1562 | /* |
1472 | * Preempt the current task with a newly woken task if needed: | 1563 | * Preempt the current task with a newly woken task if needed: |
1473 | */ | 1564 | */ |
1474 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1565 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) |
1475 | { | 1566 | { |
1476 | struct task_struct *curr = rq->curr; | 1567 | struct task_struct *curr = rq->curr; |
1477 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1568 | struct sched_entity *se = &curr->se, *pse = &p->se; |
1478 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1569 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1570 | int sync = wake_flags & WF_SYNC; | ||
1479 | 1571 | ||
1480 | update_curr(cfs_rq); | 1572 | update_curr(cfs_rq); |
1481 | 1573 | ||
@@ -1501,7 +1593,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1501 | */ | 1593 | */ |
1502 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | 1594 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) |
1503 | set_last_buddy(se); | 1595 | set_last_buddy(se); |
1504 | set_next_buddy(pse); | 1596 | if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) |
1597 | set_next_buddy(pse); | ||
1505 | 1598 | ||
1506 | /* | 1599 | /* |
1507 | * We can come here with TIF_NEED_RESCHED already set from new task | 1600 | * We can come here with TIF_NEED_RESCHED already set from new task |
@@ -1523,16 +1616,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1523 | return; | 1616 | return; |
1524 | } | 1617 | } |
1525 | 1618 | ||
1526 | if (!sched_feat(WAKEUP_PREEMPT)) | 1619 | if ((sched_feat(WAKEUP_SYNC) && sync) || |
1527 | return; | 1620 | (sched_feat(WAKEUP_OVERLAP) && |
1528 | 1621 | (se->avg_overlap < sysctl_sched_migration_cost && | |
1529 | if (sched_feat(WAKEUP_OVERLAP) && (sync || | 1622 | pse->avg_overlap < sysctl_sched_migration_cost))) { |
1530 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
1531 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
1532 | resched_task(curr); | 1623 | resched_task(curr); |
1533 | return; | 1624 | return; |
1534 | } | 1625 | } |
1535 | 1626 | ||
1627 | if (sched_feat(WAKEUP_RUNNING)) { | ||
1628 | if (pse->avg_running < se->avg_running) { | ||
1629 | set_next_buddy(pse); | ||
1630 | resched_task(curr); | ||
1631 | return; | ||
1632 | } | ||
1633 | } | ||
1634 | |||
1635 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1636 | return; | ||
1637 | |||
1536 | find_matching_se(&se, &pse); | 1638 | find_matching_se(&se, &pse); |
1537 | 1639 | ||
1538 | BUG_ON(!pse); | 1640 | BUG_ON(!pse); |
@@ -1555,8 +1657,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
1555 | /* | 1657 | /* |
1556 | * If se was a buddy, clear it so that it will have to earn | 1658 | * If se was a buddy, clear it so that it will have to earn |
1557 | * the favour again. | 1659 | * the favour again. |
1660 | * | ||
1661 | * If se was not a buddy, clear the buddies because neither | ||
1662 | * was elegible to run, let them earn it again. | ||
1663 | * | ||
1664 | * IOW. unconditionally clear buddies. | ||
1558 | */ | 1665 | */ |
1559 | __clear_buddies(cfs_rq, se); | 1666 | __clear_buddies(cfs_rq, NULL); |
1560 | set_next_entity(cfs_rq, se); | 1667 | set_next_entity(cfs_rq, se); |
1561 | cfs_rq = group_cfs_rq(se); | 1668 | cfs_rq = group_cfs_rq(se); |
1562 | } while (cfs_rq); | 1669 | } while (cfs_rq); |
@@ -1832,6 +1939,25 @@ static void moved_group_fair(struct task_struct *p) | |||
1832 | } | 1939 | } |
1833 | #endif | 1940 | #endif |
1834 | 1941 | ||
1942 | unsigned int get_rr_interval_fair(struct task_struct *task) | ||
1943 | { | ||
1944 | struct sched_entity *se = &task->se; | ||
1945 | unsigned long flags; | ||
1946 | struct rq *rq; | ||
1947 | unsigned int rr_interval = 0; | ||
1948 | |||
1949 | /* | ||
1950 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise | ||
1951 | * idle runqueue: | ||
1952 | */ | ||
1953 | rq = task_rq_lock(task, &flags); | ||
1954 | if (rq->cfs.load.weight) | ||
1955 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | ||
1956 | task_rq_unlock(rq, &flags); | ||
1957 | |||
1958 | return rr_interval; | ||
1959 | } | ||
1960 | |||
1835 | /* | 1961 | /* |
1836 | * All the scheduling class methods: | 1962 | * All the scheduling class methods: |
1837 | */ | 1963 | */ |
@@ -1860,6 +1986,8 @@ static const struct sched_class fair_sched_class = { | |||
1860 | .prio_changed = prio_changed_fair, | 1986 | .prio_changed = prio_changed_fair, |
1861 | .switched_to = switched_to_fair, | 1987 | .switched_to = switched_to_fair, |
1862 | 1988 | ||
1989 | .get_rr_interval = get_rr_interval_fair, | ||
1990 | |||
1863 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1991 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1864 | .moved_group = moved_group_fair, | 1992 | .moved_group = moved_group_fair, |
1865 | #endif | 1993 | #endif |