aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c233
1 files changed, 63 insertions, 170 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2eb5b934715..09d19f77eb3a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1062,83 +1062,6 @@ static void yield_task_fair(struct rq *rq)
1062 se->vruntime = rightmost->vruntime + 1; 1062 se->vruntime = rightmost->vruntime + 1;
1063} 1063}
1064 1064
1065/*
1066 * wake_idle() will wake a task on an idle cpu if task->cpu is
1067 * not idle and an idle cpu is available. The span of cpus to
1068 * search starts with cpus closest then further out as needed,
1069 * so we always favor a closer, idle cpu.
1070 * Domains may include CPUs that are not usable for migration,
1071 * hence we need to mask them out (rq->rd->online)
1072 *
1073 * Returns the CPU we should wake onto.
1074 */
1075#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1076
1077#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
1078
1079static int wake_idle(int cpu, struct task_struct *p)
1080{
1081 struct sched_domain *sd;
1082 int i;
1083 unsigned int chosen_wakeup_cpu;
1084 int this_cpu;
1085 struct rq *task_rq = task_rq(p);
1086
1087 /*
1088 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
1089 * are idle and this is not a kernel thread and this task's affinity
1090 * allows it to be moved to preferred cpu, then just move!
1091 */
1092
1093 this_cpu = smp_processor_id();
1094 chosen_wakeup_cpu =
1095 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
1096
1097 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
1098 idle_cpu(cpu) && idle_cpu(this_cpu) &&
1099 p->mm && !(p->flags & PF_KTHREAD) &&
1100 cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
1101 return chosen_wakeup_cpu;
1102
1103 /*
1104 * If it is idle, then it is the best cpu to run this task.
1105 *
1106 * This cpu is also the best, if it has more than one task already.
1107 * Siblings must be also busy(in most cases) as they didn't already
1108 * pickup the extra load from this cpu and hence we need not check
1109 * sibling runqueue info. This will avoid the checks and cache miss
1110 * penalities associated with that.
1111 */
1112 if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
1113 return cpu;
1114
1115 for_each_domain(cpu, sd) {
1116 if ((sd->flags & SD_WAKE_IDLE)
1117 || ((sd->flags & SD_WAKE_IDLE_FAR)
1118 && !task_hot(p, task_rq->clock, sd))) {
1119 for_each_cpu_and(i, sched_domain_span(sd),
1120 &p->cpus_allowed) {
1121 if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
1122 if (i != task_cpu(p)) {
1123 schedstat_inc(p,
1124 se.nr_wakeups_idle);
1125 }
1126 return i;
1127 }
1128 }
1129 } else {
1130 break;
1131 }
1132 }
1133 return cpu;
1134}
1135#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
1136static inline int wake_idle(int cpu, struct task_struct *p)
1137{
1138 return cpu;
1139}
1140#endif
1141
1142#ifdef CONFIG_SMP 1065#ifdef CONFIG_SMP
1143 1066
1144#ifdef CONFIG_FAIR_GROUP_SCHED 1067#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1225,21 +1148,22 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1225 1148
1226#endif 1149#endif
1227 1150
1228static int 1151static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1229wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1230 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1231 int idx, unsigned long load, unsigned long this_load,
1232 unsigned int imbalance)
1233{ 1152{
1234 struct task_struct *curr = this_rq->curr; 1153 struct task_struct *curr = current;
1235 struct task_group *tg; 1154 unsigned long this_load, load;
1236 unsigned long tl = this_load; 1155 int idx, this_cpu, prev_cpu;
1237 unsigned long tl_per_task; 1156 unsigned long tl_per_task;
1157 unsigned int imbalance;
1158 struct task_group *tg;
1238 unsigned long weight; 1159 unsigned long weight;
1239 int balanced; 1160 int balanced;
1240 1161
1241 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1162 idx = sd->wake_idx;
1242 return 0; 1163 this_cpu = smp_processor_id();
1164 prev_cpu = task_cpu(p);
1165 load = source_load(prev_cpu, idx);
1166 this_load = target_load(this_cpu, idx);
1243 1167
1244 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || 1168 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1245 p->se.avg_overlap > sysctl_sched_migration_cost)) 1169 p->se.avg_overlap > sysctl_sched_migration_cost))
@@ -1254,24 +1178,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1254 tg = task_group(current); 1178 tg = task_group(current);
1255 weight = current->se.load.weight; 1179 weight = current->se.load.weight;
1256 1180
1257 tl += effective_load(tg, this_cpu, -weight, -weight); 1181 this_load += effective_load(tg, this_cpu, -weight, -weight);
1258 load += effective_load(tg, prev_cpu, 0, -weight); 1182 load += effective_load(tg, prev_cpu, 0, -weight);
1259 } 1183 }
1260 1184
1261 tg = task_group(p); 1185 tg = task_group(p);
1262 weight = p->se.load.weight; 1186 weight = p->se.load.weight;
1263 1187
1188 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1189
1264 /* 1190 /*
1265 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1191 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1266 * due to the sync cause above having dropped tl to 0, we'll always have 1192 * due to the sync cause above having dropped this_load to 0, we'll
1267 * an imbalance, but there's really nothing you can do about that, so 1193 * always have an imbalance, but there's really nothing you can do
1268 * that's good too. 1194 * about that, so that's good too.
1269 * 1195 *
1270 * Otherwise check if either cpus are near enough in load to allow this 1196 * Otherwise check if either cpus are near enough in load to allow this
1271 * task to be woken on this_cpu. 1197 * task to be woken on this_cpu.
1272 */ 1198 */
1273 balanced = !tl || 1199 balanced = !this_load ||
1274 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= 1200 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
1275 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1201 imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
1276 1202
1277 /* 1203 /*
@@ -1285,14 +1211,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1285 schedstat_inc(p, se.nr_wakeups_affine_attempts); 1211 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1286 tl_per_task = cpu_avg_load_per_task(this_cpu); 1212 tl_per_task = cpu_avg_load_per_task(this_cpu);
1287 1213
1288 if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= 1214 if (balanced ||
1289 tl_per_task)) { 1215 (this_load <= load &&
1216 this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
1290 /* 1217 /*
1291 * This domain has SD_WAKE_AFFINE and 1218 * This domain has SD_WAKE_AFFINE and
1292 * p is cache cold in this domain, and 1219 * p is cache cold in this domain, and
1293 * there is no bad imbalance. 1220 * there is no bad imbalance.
1294 */ 1221 */
1295 schedstat_inc(this_sd, ttwu_move_affine); 1222 schedstat_inc(sd, ttwu_move_affine);
1296 schedstat_inc(p, se.nr_wakeups_affine); 1223 schedstat_inc(p, se.nr_wakeups_affine);
1297 1224
1298 return 1; 1225 return 1;
@@ -1300,72 +1227,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1300 return 0; 1227 return 0;
1301} 1228}
1302 1229
1303static int sched_balance_self(int cpu, int flag);
1304
1305static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
1306{
1307 struct sched_domain *sd, *this_sd = NULL;
1308 int prev_cpu, this_cpu, new_cpu;
1309 unsigned long load, this_load;
1310 struct rq *this_rq;
1311 unsigned int imbalance;
1312 int idx;
1313
1314 prev_cpu = task_cpu(p);
1315 this_cpu = smp_processor_id();
1316 this_rq = cpu_rq(this_cpu);
1317 new_cpu = prev_cpu;
1318
1319 if (flag != SD_BALANCE_WAKE)
1320 return sched_balance_self(this_cpu, flag);
1321
1322 /*
1323 * 'this_sd' is the first domain that both
1324 * this_cpu and prev_cpu are present in:
1325 */
1326 for_each_domain(this_cpu, sd) {
1327 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
1328 this_sd = sd;
1329 break;
1330 }
1331 }
1332
1333 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
1334 goto out;
1335
1336 /*
1337 * Check for affine wakeup and passive balancing possibilities.
1338 */
1339 if (!this_sd)
1340 goto out;
1341
1342 idx = this_sd->wake_idx;
1343
1344 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1345
1346 load = source_load(prev_cpu, idx);
1347 this_load = target_load(this_cpu, idx);
1348
1349 if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
1350 load, this_load, imbalance))
1351 return this_cpu;
1352
1353 /*
1354 * Start passive balancing when half the imbalance_pct
1355 * limit is reached.
1356 */
1357 if (this_sd->flags & SD_WAKE_BALANCE) {
1358 if (imbalance*this_load <= 100*load) {
1359 schedstat_inc(this_sd, ttwu_move_balance);
1360 schedstat_inc(p, se.nr_wakeups_passive);
1361 return this_cpu;
1362 }
1363 }
1364
1365out:
1366 return wake_idle(new_cpu, p);
1367}
1368
1369/* 1230/*
1370 * find_idlest_group finds and returns the least busy CPU group within the 1231 * find_idlest_group finds and returns the least busy CPU group within the
1371 * domain. 1232 * domain.
@@ -1455,10 +1316,20 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1455 * 1316 *
1456 * preempt must be disabled. 1317 * preempt must be disabled.
1457 */ 1318 */
1458static int sched_balance_self(int cpu, int flag) 1319static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
1459{ 1320{
1460 struct task_struct *t = current; 1321 struct task_struct *t = current;
1461 struct sched_domain *tmp, *sd = NULL; 1322 struct sched_domain *tmp, *sd = NULL;
1323 int cpu = smp_processor_id();
1324 int prev_cpu = task_cpu(p);
1325 int new_cpu = cpu;
1326 int want_affine = 0;
1327
1328 if (flag & SD_BALANCE_WAKE) {
1329 if (sched_feat(AFFINE_WAKEUPS))
1330 want_affine = 1;
1331 new_cpu = prev_cpu;
1332 }
1462 1333
1463 for_each_domain(cpu, tmp) { 1334 for_each_domain(cpu, tmp) {
1464 /* 1335 /*
@@ -1466,16 +1337,38 @@ static int sched_balance_self(int cpu, int flag)
1466 */ 1337 */
1467 if (tmp->flags & SD_POWERSAVINGS_BALANCE) 1338 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
1468 break; 1339 break;
1469 if (tmp->flags & flag)
1470 sd = tmp;
1471 }
1472 1340
1473 if (sd) 1341 switch (flag) {
1474 update_shares(sd); 1342 case SD_BALANCE_WAKE:
1343 if (!sched_feat(LB_WAKEUP_UPDATE))
1344 break;
1345 case SD_BALANCE_FORK:
1346 case SD_BALANCE_EXEC:
1347 if (root_task_group_empty())
1348 break;
1349 update_shares(tmp);
1350 default:
1351 break;
1352 }
1353
1354 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
1355 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
1356
1357 if (wake_affine(tmp, p, sync))
1358 return cpu;
1359
1360 want_affine = 0;
1361 }
1362
1363 if (!(tmp->flags & flag))
1364 continue;
1365
1366 sd = tmp;
1367 }
1475 1368
1476 while (sd) { 1369 while (sd) {
1477 struct sched_group *group; 1370 struct sched_group *group;
1478 int new_cpu, weight; 1371 int weight;
1479 1372
1480 if (!(sd->flags & flag)) { 1373 if (!(sd->flags & flag)) {
1481 sd = sd->child; 1374 sd = sd->child;
@@ -1508,7 +1401,7 @@ static int sched_balance_self(int cpu, int flag)
1508 /* while loop will break here if sd == NULL */ 1401 /* while loop will break here if sd == NULL */
1509 } 1402 }
1510 1403
1511 return cpu; 1404 return new_cpu;
1512} 1405}
1513#endif /* CONFIG_SMP */ 1406#endif /* CONFIG_SMP */
1514 1407