diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 233 |
1 files changed, 63 insertions, 170 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f2eb5b934715..09d19f77eb3a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1062,83 +1062,6 @@ static void yield_task_fair(struct rq *rq) | |||
1062 | se->vruntime = rightmost->vruntime + 1; | 1062 | se->vruntime = rightmost->vruntime + 1; |
1063 | } | 1063 | } |
1064 | 1064 | ||
1065 | /* | ||
1066 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
1067 | * not idle and an idle cpu is available. The span of cpus to | ||
1068 | * search starts with cpus closest then further out as needed, | ||
1069 | * so we always favor a closer, idle cpu. | ||
1070 | * Domains may include CPUs that are not usable for migration, | ||
1071 | * hence we need to mask them out (rq->rd->online) | ||
1072 | * | ||
1073 | * Returns the CPU we should wake onto. | ||
1074 | */ | ||
1075 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
1076 | |||
1077 | #define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) | ||
1078 | |||
1079 | static int wake_idle(int cpu, struct task_struct *p) | ||
1080 | { | ||
1081 | struct sched_domain *sd; | ||
1082 | int i; | ||
1083 | unsigned int chosen_wakeup_cpu; | ||
1084 | int this_cpu; | ||
1085 | struct rq *task_rq = task_rq(p); | ||
1086 | |||
1087 | /* | ||
1088 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu | ||
1089 | * are idle and this is not a kernel thread and this task's affinity | ||
1090 | * allows it to be moved to preferred cpu, then just move! | ||
1091 | */ | ||
1092 | |||
1093 | this_cpu = smp_processor_id(); | ||
1094 | chosen_wakeup_cpu = | ||
1095 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; | ||
1096 | |||
1097 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && | ||
1098 | idle_cpu(cpu) && idle_cpu(this_cpu) && | ||
1099 | p->mm && !(p->flags & PF_KTHREAD) && | ||
1100 | cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) | ||
1101 | return chosen_wakeup_cpu; | ||
1102 | |||
1103 | /* | ||
1104 | * If it is idle, then it is the best cpu to run this task. | ||
1105 | * | ||
1106 | * This cpu is also the best, if it has more than one task already. | ||
1107 | * Siblings must be also busy(in most cases) as they didn't already | ||
1108 | * pickup the extra load from this cpu and hence we need not check | ||
1109 | * sibling runqueue info. This will avoid the checks and cache miss | ||
1110 | * penalities associated with that. | ||
1111 | */ | ||
1112 | if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) | ||
1113 | return cpu; | ||
1114 | |||
1115 | for_each_domain(cpu, sd) { | ||
1116 | if ((sd->flags & SD_WAKE_IDLE) | ||
1117 | || ((sd->flags & SD_WAKE_IDLE_FAR) | ||
1118 | && !task_hot(p, task_rq->clock, sd))) { | ||
1119 | for_each_cpu_and(i, sched_domain_span(sd), | ||
1120 | &p->cpus_allowed) { | ||
1121 | if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { | ||
1122 | if (i != task_cpu(p)) { | ||
1123 | schedstat_inc(p, | ||
1124 | se.nr_wakeups_idle); | ||
1125 | } | ||
1126 | return i; | ||
1127 | } | ||
1128 | } | ||
1129 | } else { | ||
1130 | break; | ||
1131 | } | ||
1132 | } | ||
1133 | return cpu; | ||
1134 | } | ||
1135 | #else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ | ||
1136 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
1137 | { | ||
1138 | return cpu; | ||
1139 | } | ||
1140 | #endif | ||
1141 | |||
1142 | #ifdef CONFIG_SMP | 1065 | #ifdef CONFIG_SMP |
1143 | 1066 | ||
1144 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1067 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1225,21 +1148,22 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
1225 | 1148 | ||
1226 | #endif | 1149 | #endif |
1227 | 1150 | ||
1228 | static int | 1151 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
1229 | wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | ||
1230 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
1231 | int idx, unsigned long load, unsigned long this_load, | ||
1232 | unsigned int imbalance) | ||
1233 | { | 1152 | { |
1234 | struct task_struct *curr = this_rq->curr; | 1153 | struct task_struct *curr = current; |
1235 | struct task_group *tg; | 1154 | unsigned long this_load, load; |
1236 | unsigned long tl = this_load; | 1155 | int idx, this_cpu, prev_cpu; |
1237 | unsigned long tl_per_task; | 1156 | unsigned long tl_per_task; |
1157 | unsigned int imbalance; | ||
1158 | struct task_group *tg; | ||
1238 | unsigned long weight; | 1159 | unsigned long weight; |
1239 | int balanced; | 1160 | int balanced; |
1240 | 1161 | ||
1241 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1162 | idx = sd->wake_idx; |
1242 | return 0; | 1163 | this_cpu = smp_processor_id(); |
1164 | prev_cpu = task_cpu(p); | ||
1165 | load = source_load(prev_cpu, idx); | ||
1166 | this_load = target_load(this_cpu, idx); | ||
1243 | 1167 | ||
1244 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || | 1168 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || |
1245 | p->se.avg_overlap > sysctl_sched_migration_cost)) | 1169 | p->se.avg_overlap > sysctl_sched_migration_cost)) |
@@ -1254,24 +1178,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1254 | tg = task_group(current); | 1178 | tg = task_group(current); |
1255 | weight = current->se.load.weight; | 1179 | weight = current->se.load.weight; |
1256 | 1180 | ||
1257 | tl += effective_load(tg, this_cpu, -weight, -weight); | 1181 | this_load += effective_load(tg, this_cpu, -weight, -weight); |
1258 | load += effective_load(tg, prev_cpu, 0, -weight); | 1182 | load += effective_load(tg, prev_cpu, 0, -weight); |
1259 | } | 1183 | } |
1260 | 1184 | ||
1261 | tg = task_group(p); | 1185 | tg = task_group(p); |
1262 | weight = p->se.load.weight; | 1186 | weight = p->se.load.weight; |
1263 | 1187 | ||
1188 | imbalance = 100 + (sd->imbalance_pct - 100) / 2; | ||
1189 | |||
1264 | /* | 1190 | /* |
1265 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | 1191 | * In low-load situations, where prev_cpu is idle and this_cpu is idle |
1266 | * due to the sync cause above having dropped tl to 0, we'll always have | 1192 | * due to the sync cause above having dropped this_load to 0, we'll |
1267 | * an imbalance, but there's really nothing you can do about that, so | 1193 | * always have an imbalance, but there's really nothing you can do |
1268 | * that's good too. | 1194 | * about that, so that's good too. |
1269 | * | 1195 | * |
1270 | * Otherwise check if either cpus are near enough in load to allow this | 1196 | * Otherwise check if either cpus are near enough in load to allow this |
1271 | * task to be woken on this_cpu. | 1197 | * task to be woken on this_cpu. |
1272 | */ | 1198 | */ |
1273 | balanced = !tl || | 1199 | balanced = !this_load || |
1274 | 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | 1200 | 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= |
1275 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1201 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); |
1276 | 1202 | ||
1277 | /* | 1203 | /* |
@@ -1285,14 +1211,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1285 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1211 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1286 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1212 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1287 | 1213 | ||
1288 | if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= | 1214 | if (balanced || |
1289 | tl_per_task)) { | 1215 | (this_load <= load && |
1216 | this_load + target_load(prev_cpu, idx) <= tl_per_task)) { | ||
1290 | /* | 1217 | /* |
1291 | * This domain has SD_WAKE_AFFINE and | 1218 | * This domain has SD_WAKE_AFFINE and |
1292 | * p is cache cold in this domain, and | 1219 | * p is cache cold in this domain, and |
1293 | * there is no bad imbalance. | 1220 | * there is no bad imbalance. |
1294 | */ | 1221 | */ |
1295 | schedstat_inc(this_sd, ttwu_move_affine); | 1222 | schedstat_inc(sd, ttwu_move_affine); |
1296 | schedstat_inc(p, se.nr_wakeups_affine); | 1223 | schedstat_inc(p, se.nr_wakeups_affine); |
1297 | 1224 | ||
1298 | return 1; | 1225 | return 1; |
@@ -1300,72 +1227,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1300 | return 0; | 1227 | return 0; |
1301 | } | 1228 | } |
1302 | 1229 | ||
1303 | static int sched_balance_self(int cpu, int flag); | ||
1304 | |||
1305 | static int select_task_rq_fair(struct task_struct *p, int flag, int sync) | ||
1306 | { | ||
1307 | struct sched_domain *sd, *this_sd = NULL; | ||
1308 | int prev_cpu, this_cpu, new_cpu; | ||
1309 | unsigned long load, this_load; | ||
1310 | struct rq *this_rq; | ||
1311 | unsigned int imbalance; | ||
1312 | int idx; | ||
1313 | |||
1314 | prev_cpu = task_cpu(p); | ||
1315 | this_cpu = smp_processor_id(); | ||
1316 | this_rq = cpu_rq(this_cpu); | ||
1317 | new_cpu = prev_cpu; | ||
1318 | |||
1319 | if (flag != SD_BALANCE_WAKE) | ||
1320 | return sched_balance_self(this_cpu, flag); | ||
1321 | |||
1322 | /* | ||
1323 | * 'this_sd' is the first domain that both | ||
1324 | * this_cpu and prev_cpu are present in: | ||
1325 | */ | ||
1326 | for_each_domain(this_cpu, sd) { | ||
1327 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { | ||
1328 | this_sd = sd; | ||
1329 | break; | ||
1330 | } | ||
1331 | } | ||
1332 | |||
1333 | if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) | ||
1334 | goto out; | ||
1335 | |||
1336 | /* | ||
1337 | * Check for affine wakeup and passive balancing possibilities. | ||
1338 | */ | ||
1339 | if (!this_sd) | ||
1340 | goto out; | ||
1341 | |||
1342 | idx = this_sd->wake_idx; | ||
1343 | |||
1344 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | ||
1345 | |||
1346 | load = source_load(prev_cpu, idx); | ||
1347 | this_load = target_load(this_cpu, idx); | ||
1348 | |||
1349 | if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, | ||
1350 | load, this_load, imbalance)) | ||
1351 | return this_cpu; | ||
1352 | |||
1353 | /* | ||
1354 | * Start passive balancing when half the imbalance_pct | ||
1355 | * limit is reached. | ||
1356 | */ | ||
1357 | if (this_sd->flags & SD_WAKE_BALANCE) { | ||
1358 | if (imbalance*this_load <= 100*load) { | ||
1359 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1360 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1361 | return this_cpu; | ||
1362 | } | ||
1363 | } | ||
1364 | |||
1365 | out: | ||
1366 | return wake_idle(new_cpu, p); | ||
1367 | } | ||
1368 | |||
1369 | /* | 1230 | /* |
1370 | * find_idlest_group finds and returns the least busy CPU group within the | 1231 | * find_idlest_group finds and returns the least busy CPU group within the |
1371 | * domain. | 1232 | * domain. |
@@ -1455,10 +1316,20 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
1455 | * | 1316 | * |
1456 | * preempt must be disabled. | 1317 | * preempt must be disabled. |
1457 | */ | 1318 | */ |
1458 | static int sched_balance_self(int cpu, int flag) | 1319 | static int select_task_rq_fair(struct task_struct *p, int flag, int sync) |
1459 | { | 1320 | { |
1460 | struct task_struct *t = current; | 1321 | struct task_struct *t = current; |
1461 | struct sched_domain *tmp, *sd = NULL; | 1322 | struct sched_domain *tmp, *sd = NULL; |
1323 | int cpu = smp_processor_id(); | ||
1324 | int prev_cpu = task_cpu(p); | ||
1325 | int new_cpu = cpu; | ||
1326 | int want_affine = 0; | ||
1327 | |||
1328 | if (flag & SD_BALANCE_WAKE) { | ||
1329 | if (sched_feat(AFFINE_WAKEUPS)) | ||
1330 | want_affine = 1; | ||
1331 | new_cpu = prev_cpu; | ||
1332 | } | ||
1462 | 1333 | ||
1463 | for_each_domain(cpu, tmp) { | 1334 | for_each_domain(cpu, tmp) { |
1464 | /* | 1335 | /* |
@@ -1466,16 +1337,38 @@ static int sched_balance_self(int cpu, int flag) | |||
1466 | */ | 1337 | */ |
1467 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | 1338 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) |
1468 | break; | 1339 | break; |
1469 | if (tmp->flags & flag) | ||
1470 | sd = tmp; | ||
1471 | } | ||
1472 | 1340 | ||
1473 | if (sd) | 1341 | switch (flag) { |
1474 | update_shares(sd); | 1342 | case SD_BALANCE_WAKE: |
1343 | if (!sched_feat(LB_WAKEUP_UPDATE)) | ||
1344 | break; | ||
1345 | case SD_BALANCE_FORK: | ||
1346 | case SD_BALANCE_EXEC: | ||
1347 | if (root_task_group_empty()) | ||
1348 | break; | ||
1349 | update_shares(tmp); | ||
1350 | default: | ||
1351 | break; | ||
1352 | } | ||
1353 | |||
1354 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | ||
1355 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | ||
1356 | |||
1357 | if (wake_affine(tmp, p, sync)) | ||
1358 | return cpu; | ||
1359 | |||
1360 | want_affine = 0; | ||
1361 | } | ||
1362 | |||
1363 | if (!(tmp->flags & flag)) | ||
1364 | continue; | ||
1365 | |||
1366 | sd = tmp; | ||
1367 | } | ||
1475 | 1368 | ||
1476 | while (sd) { | 1369 | while (sd) { |
1477 | struct sched_group *group; | 1370 | struct sched_group *group; |
1478 | int new_cpu, weight; | 1371 | int weight; |
1479 | 1372 | ||
1480 | if (!(sd->flags & flag)) { | 1373 | if (!(sd->flags & flag)) { |
1481 | sd = sd->child; | 1374 | sd = sd->child; |
@@ -1508,7 +1401,7 @@ static int sched_balance_self(int cpu, int flag) | |||
1508 | /* while loop will break here if sd == NULL */ | 1401 | /* while loop will break here if sd == NULL */ |
1509 | } | 1402 | } |
1510 | 1403 | ||
1511 | return cpu; | 1404 | return new_cpu; |
1512 | } | 1405 | } |
1513 | #endif /* CONFIG_SMP */ | 1406 | #endif /* CONFIG_SMP */ |
1514 | 1407 | ||