diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-06-25 17:57:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-25 19:24:44 -0400 |
commit | 476d139c218e44e045e4bc6d4cc02b010b343939 (patch) | |
tree | 82a6537b829b2b35156fba5a312f4e44273a4356 /kernel | |
parent | 674311d5b411e9042df4fdf7aef0b3c8217b6240 (diff) |
[PATCH] sched: consolidate sbe sbf
Consolidate balance-on-exec with balance-on-fork. This is made easy by the
sched-domains RCU patches.
As well as the general goodness of code reduction, this allows the runqueues
to be unlocked during balance-on-fork.
schedstats is a problem. Maybe just have balance-on-event instead of
distinguishing fork and exec?
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 21 | ||||
-rw-r--r-- | kernel/sched.c | 174 |
2 files changed, 80 insertions, 115 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index a28d11e10877..2c7806873bfd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1003,9 +1003,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1003 | p->pdeath_signal = 0; | 1003 | p->pdeath_signal = 0; |
1004 | p->exit_state = 0; | 1004 | p->exit_state = 0; |
1005 | 1005 | ||
1006 | /* Perform scheduler related setup */ | ||
1007 | sched_fork(p); | ||
1008 | |||
1009 | /* | 1006 | /* |
1010 | * Ok, make it visible to the rest of the system. | 1007 | * Ok, make it visible to the rest of the system. |
1011 | * We dont wake it up yet. | 1008 | * We dont wake it up yet. |
@@ -1014,18 +1011,24 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1014 | INIT_LIST_HEAD(&p->ptrace_children); | 1011 | INIT_LIST_HEAD(&p->ptrace_children); |
1015 | INIT_LIST_HEAD(&p->ptrace_list); | 1012 | INIT_LIST_HEAD(&p->ptrace_list); |
1016 | 1013 | ||
1014 | /* Perform scheduler related setup. Assign this task to a CPU. */ | ||
1015 | sched_fork(p, clone_flags); | ||
1016 | |||
1017 | /* Need tasklist lock for parent etc handling! */ | 1017 | /* Need tasklist lock for parent etc handling! */ |
1018 | write_lock_irq(&tasklist_lock); | 1018 | write_lock_irq(&tasklist_lock); |
1019 | 1019 | ||
1020 | /* | 1020 | /* |
1021 | * The task hasn't been attached yet, so cpus_allowed mask cannot | 1021 | * The task hasn't been attached yet, so its cpus_allowed mask will |
1022 | * have changed. The cpus_allowed mask of the parent may have | 1022 | * not be changed, nor will its assigned CPU. |
1023 | * changed after it was copied first time, and it may then move to | 1023 | * |
1024 | * another CPU - so we re-copy it here and set the child's CPU to | 1024 | * The cpus_allowed mask of the parent may have changed after it was |
1025 | * the parent's CPU. This avoids alot of nasty races. | 1025 | * copied first time - so re-copy it here, then check the child's CPU |
1026 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1027 | * parent's CPU). This avoids alot of nasty races. | ||
1026 | */ | 1028 | */ |
1027 | p->cpus_allowed = current->cpus_allowed; | 1029 | p->cpus_allowed = current->cpus_allowed; |
1028 | set_task_cpu(p, smp_processor_id()); | 1030 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed))) |
1031 | set_task_cpu(p, smp_processor_id()); | ||
1029 | 1032 | ||
1030 | /* | 1033 | /* |
1031 | * Check for pending SIGKILL! The new thread should not be allowed | 1034 | * Check for pending SIGKILL! The new thread should not be allowed |
diff --git a/kernel/sched.c b/kernel/sched.c index 54ce787b6207..579da278e72f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1021,8 +1021,59 @@ static int find_idlest_cpu(struct sched_group *group, int this_cpu) | |||
1021 | return idlest; | 1021 | return idlest; |
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | /* | ||
1025 | * sched_balance_self: balance the current task (running on cpu) in domains | ||
1026 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | ||
1027 | * SD_BALANCE_EXEC. | ||
1028 | * | ||
1029 | * Balance, ie. select the least loaded group. | ||
1030 | * | ||
1031 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
1032 | * | ||
1033 | * preempt must be disabled. | ||
1034 | */ | ||
1035 | static int sched_balance_self(int cpu, int flag) | ||
1036 | { | ||
1037 | struct task_struct *t = current; | ||
1038 | struct sched_domain *tmp, *sd = NULL; | ||
1024 | 1039 | ||
1025 | #endif | 1040 | for_each_domain(cpu, tmp) |
1041 | if (tmp->flags & flag) | ||
1042 | sd = tmp; | ||
1043 | |||
1044 | while (sd) { | ||
1045 | cpumask_t span; | ||
1046 | struct sched_group *group; | ||
1047 | int new_cpu; | ||
1048 | int weight; | ||
1049 | |||
1050 | span = sd->span; | ||
1051 | group = find_idlest_group(sd, t, cpu); | ||
1052 | if (!group) | ||
1053 | goto nextlevel; | ||
1054 | |||
1055 | new_cpu = find_idlest_cpu(group, cpu); | ||
1056 | if (new_cpu == -1 || new_cpu == cpu) | ||
1057 | goto nextlevel; | ||
1058 | |||
1059 | /* Now try balancing at a lower domain level */ | ||
1060 | cpu = new_cpu; | ||
1061 | nextlevel: | ||
1062 | sd = NULL; | ||
1063 | weight = cpus_weight(span); | ||
1064 | for_each_domain(cpu, tmp) { | ||
1065 | if (weight <= cpus_weight(tmp->span)) | ||
1066 | break; | ||
1067 | if (tmp->flags & flag) | ||
1068 | sd = tmp; | ||
1069 | } | ||
1070 | /* while loop will break here if sd == NULL */ | ||
1071 | } | ||
1072 | |||
1073 | return cpu; | ||
1074 | } | ||
1075 | |||
1076 | #endif /* CONFIG_SMP */ | ||
1026 | 1077 | ||
1027 | /* | 1078 | /* |
1028 | * wake_idle() will wake a task on an idle cpu if task->cpu is | 1079 | * wake_idle() will wake a task on an idle cpu if task->cpu is |
@@ -1240,8 +1291,15 @@ int fastcall wake_up_state(task_t *p, unsigned int state) | |||
1240 | * Perform scheduler related setup for a newly forked process p. | 1291 | * Perform scheduler related setup for a newly forked process p. |
1241 | * p is forked by current. | 1292 | * p is forked by current. |
1242 | */ | 1293 | */ |
1243 | void fastcall sched_fork(task_t *p) | 1294 | void fastcall sched_fork(task_t *p, int clone_flags) |
1244 | { | 1295 | { |
1296 | int cpu = get_cpu(); | ||
1297 | |||
1298 | #ifdef CONFIG_SMP | ||
1299 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
1300 | #endif | ||
1301 | set_task_cpu(p, cpu); | ||
1302 | |||
1245 | /* | 1303 | /* |
1246 | * We mark the process as running here, but have not actually | 1304 | * We mark the process as running here, but have not actually |
1247 | * inserted it onto the runqueue yet. This guarantees that | 1305 | * inserted it onto the runqueue yet. This guarantees that |
@@ -1282,12 +1340,10 @@ void fastcall sched_fork(task_t *p) | |||
1282 | * runqueue lock is not a problem. | 1340 | * runqueue lock is not a problem. |
1283 | */ | 1341 | */ |
1284 | current->time_slice = 1; | 1342 | current->time_slice = 1; |
1285 | preempt_disable(); | ||
1286 | scheduler_tick(); | 1343 | scheduler_tick(); |
1287 | local_irq_enable(); | 1344 | } |
1288 | preempt_enable(); | 1345 | local_irq_enable(); |
1289 | } else | 1346 | put_cpu(); |
1290 | local_irq_enable(); | ||
1291 | } | 1347 | } |
1292 | 1348 | ||
1293 | /* | 1349 | /* |
@@ -1302,64 +1358,12 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | |||
1302 | unsigned long flags; | 1358 | unsigned long flags; |
1303 | int this_cpu, cpu; | 1359 | int this_cpu, cpu; |
1304 | runqueue_t *rq, *this_rq; | 1360 | runqueue_t *rq, *this_rq; |
1305 | #ifdef CONFIG_SMP | ||
1306 | struct sched_domain *tmp, *sd = NULL; | ||
1307 | #endif | ||
1308 | 1361 | ||
1309 | rq = task_rq_lock(p, &flags); | 1362 | rq = task_rq_lock(p, &flags); |
1310 | BUG_ON(p->state != TASK_RUNNING); | 1363 | BUG_ON(p->state != TASK_RUNNING); |
1311 | this_cpu = smp_processor_id(); | 1364 | this_cpu = smp_processor_id(); |
1312 | cpu = task_cpu(p); | 1365 | cpu = task_cpu(p); |
1313 | 1366 | ||
1314 | #ifdef CONFIG_SMP | ||
1315 | for_each_domain(cpu, tmp) | ||
1316 | if (tmp->flags & SD_BALANCE_FORK) | ||
1317 | sd = tmp; | ||
1318 | |||
1319 | if (sd) { | ||
1320 | cpumask_t span; | ||
1321 | int new_cpu; | ||
1322 | struct sched_group *group; | ||
1323 | |||
1324 | again: | ||
1325 | schedstat_inc(sd, sbf_cnt); | ||
1326 | span = sd->span; | ||
1327 | cpu = task_cpu(p); | ||
1328 | group = find_idlest_group(sd, p, cpu); | ||
1329 | if (!group) { | ||
1330 | schedstat_inc(sd, sbf_balanced); | ||
1331 | goto nextlevel; | ||
1332 | } | ||
1333 | |||
1334 | new_cpu = find_idlest_cpu(group, cpu); | ||
1335 | if (new_cpu == -1 || new_cpu == cpu) { | ||
1336 | schedstat_inc(sd, sbf_balanced); | ||
1337 | goto nextlevel; | ||
1338 | } | ||
1339 | |||
1340 | if (cpu_isset(new_cpu, p->cpus_allowed)) { | ||
1341 | schedstat_inc(sd, sbf_pushed); | ||
1342 | set_task_cpu(p, new_cpu); | ||
1343 | task_rq_unlock(rq, &flags); | ||
1344 | rq = task_rq_lock(p, &flags); | ||
1345 | cpu = task_cpu(p); | ||
1346 | } | ||
1347 | |||
1348 | /* Now try balancing at a lower domain level */ | ||
1349 | nextlevel: | ||
1350 | sd = NULL; | ||
1351 | for_each_domain(cpu, tmp) { | ||
1352 | if (cpus_subset(span, tmp->span)) | ||
1353 | break; | ||
1354 | if (tmp->flags & SD_BALANCE_FORK) | ||
1355 | sd = tmp; | ||
1356 | } | ||
1357 | |||
1358 | if (sd) | ||
1359 | goto again; | ||
1360 | } | ||
1361 | |||
1362 | #endif | ||
1363 | /* | 1367 | /* |
1364 | * We decrease the sleep average of forking parents | 1368 | * We decrease the sleep average of forking parents |
1365 | * and children as well, to keep max-interactive tasks | 1369 | * and children as well, to keep max-interactive tasks |
@@ -1708,58 +1712,16 @@ out: | |||
1708 | } | 1712 | } |
1709 | 1713 | ||
1710 | /* | 1714 | /* |
1711 | * sched_exec(): find the highest-level, exec-balance-capable | 1715 | * sched_exec - execve() is a valuable balancing opportunity, because at |
1712 | * domain and try to migrate the task to the least loaded CPU. | 1716 | * this point the task has the smallest effective memory and cache footprint. |
1713 | * | ||
1714 | * execve() is a valuable balancing opportunity, because at this point | ||
1715 | * the task has the smallest effective memory and cache footprint. | ||
1716 | */ | 1717 | */ |
1717 | void sched_exec(void) | 1718 | void sched_exec(void) |
1718 | { | 1719 | { |
1719 | struct sched_domain *tmp, *sd = NULL; | ||
1720 | int new_cpu, this_cpu = get_cpu(); | 1720 | int new_cpu, this_cpu = get_cpu(); |
1721 | 1721 | new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); | |
1722 | for_each_domain(this_cpu, tmp) | ||
1723 | if (tmp->flags & SD_BALANCE_EXEC) | ||
1724 | sd = tmp; | ||
1725 | |||
1726 | if (sd) { | ||
1727 | cpumask_t span; | ||
1728 | struct sched_group *group; | ||
1729 | again: | ||
1730 | schedstat_inc(sd, sbe_cnt); | ||
1731 | span = sd->span; | ||
1732 | group = find_idlest_group(sd, current, this_cpu); | ||
1733 | if (!group) { | ||
1734 | schedstat_inc(sd, sbe_balanced); | ||
1735 | goto nextlevel; | ||
1736 | } | ||
1737 | new_cpu = find_idlest_cpu(group, this_cpu); | ||
1738 | if (new_cpu == -1 || new_cpu == this_cpu) { | ||
1739 | schedstat_inc(sd, sbe_balanced); | ||
1740 | goto nextlevel; | ||
1741 | } | ||
1742 | |||
1743 | schedstat_inc(sd, sbe_pushed); | ||
1744 | put_cpu(); | ||
1745 | sched_migrate_task(current, new_cpu); | ||
1746 | |||
1747 | /* Now try balancing at a lower domain level */ | ||
1748 | this_cpu = get_cpu(); | ||
1749 | nextlevel: | ||
1750 | sd = NULL; | ||
1751 | for_each_domain(this_cpu, tmp) { | ||
1752 | if (cpus_subset(span, tmp->span)) | ||
1753 | break; | ||
1754 | if (tmp->flags & SD_BALANCE_EXEC) | ||
1755 | sd = tmp; | ||
1756 | } | ||
1757 | |||
1758 | if (sd) | ||
1759 | goto again; | ||
1760 | } | ||
1761 | |||
1762 | put_cpu(); | 1722 | put_cpu(); |
1723 | if (new_cpu != this_cpu) | ||
1724 | sched_migrate_task(current, new_cpu); | ||
1763 | } | 1725 | } |
1764 | 1726 | ||
1765 | /* | 1727 | /* |