aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/sched.c174
3 files changed, 81 insertions, 116 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d27be9337425..edb2c69a8873 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -930,7 +930,7 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
930#else 930#else
931 static inline void kick_process(struct task_struct *tsk) { } 931 static inline void kick_process(struct task_struct *tsk) { }
932#endif 932#endif
933extern void FASTCALL(sched_fork(task_t * p)); 933extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
934extern void FASTCALL(sched_exit(task_t * p)); 934extern void FASTCALL(sched_exit(task_t * p));
935 935
936extern int in_group_p(gid_t); 936extern int in_group_p(gid_t);
diff --git a/kernel/fork.c b/kernel/fork.c
index a28d11e10877..2c7806873bfd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1003,9 +1003,6 @@ static task_t *copy_process(unsigned long clone_flags,
1003 p->pdeath_signal = 0; 1003 p->pdeath_signal = 0;
1004 p->exit_state = 0; 1004 p->exit_state = 0;
1005 1005
1006 /* Perform scheduler related setup */
1007 sched_fork(p);
1008
1009 /* 1006 /*
1010 * Ok, make it visible to the rest of the system. 1007 * Ok, make it visible to the rest of the system.
1011 * We dont wake it up yet. 1008 * We dont wake it up yet.
@@ -1014,18 +1011,24 @@ static task_t *copy_process(unsigned long clone_flags,
1014 INIT_LIST_HEAD(&p->ptrace_children); 1011 INIT_LIST_HEAD(&p->ptrace_children);
1015 INIT_LIST_HEAD(&p->ptrace_list); 1012 INIT_LIST_HEAD(&p->ptrace_list);
1016 1013
1014 /* Perform scheduler related setup. Assign this task to a CPU. */
1015 sched_fork(p, clone_flags);
1016
1017 /* Need tasklist lock for parent etc handling! */ 1017 /* Need tasklist lock for parent etc handling! */
1018 write_lock_irq(&tasklist_lock); 1018 write_lock_irq(&tasklist_lock);
1019 1019
1020 /* 1020 /*
1021 * The task hasn't been attached yet, so cpus_allowed mask cannot 1021 * The task hasn't been attached yet, so its cpus_allowed mask will
1022 * have changed. The cpus_allowed mask of the parent may have 1022 * not be changed, nor will its assigned CPU.
1023 * changed after it was copied first time, and it may then move to 1023 *
1024 * another CPU - so we re-copy it here and set the child's CPU to 1024 * The cpus_allowed mask of the parent may have changed after it was
1025 * the parent's CPU. This avoids alot of nasty races. 1025 * copied first time - so re-copy it here, then check the child's CPU
1026 * to ensure it is on a valid CPU (and if not, just force it back to
1027 * parent's CPU). This avoids alot of nasty races.
1026 */ 1028 */
1027 p->cpus_allowed = current->cpus_allowed; 1029 p->cpus_allowed = current->cpus_allowed;
1028 set_task_cpu(p, smp_processor_id()); 1030 if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed)))
1031 set_task_cpu(p, smp_processor_id());
1029 1032
1030 /* 1033 /*
1031 * Check for pending SIGKILL! The new thread should not be allowed 1034 * Check for pending SIGKILL! The new thread should not be allowed
diff --git a/kernel/sched.c b/kernel/sched.c
index 54ce787b6207..579da278e72f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1021,8 +1021,59 @@ static int find_idlest_cpu(struct sched_group *group, int this_cpu)
1021 return idlest; 1021 return idlest;
1022} 1022}
1023 1023
1024/*
1025 * sched_balance_self: balance the current task (running on cpu) in domains
1026 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
1027 * SD_BALANCE_EXEC.
1028 *
1029 * Balance, ie. select the least loaded group.
1030 *
1031 * Returns the target CPU number, or the same CPU if no balancing is needed.
1032 *
1033 * preempt must be disabled.
1034 */
1035static int sched_balance_self(int cpu, int flag)
1036{
1037 struct task_struct *t = current;
1038 struct sched_domain *tmp, *sd = NULL;
1024 1039
1025#endif 1040 for_each_domain(cpu, tmp)
1041 if (tmp->flags & flag)
1042 sd = tmp;
1043
1044 while (sd) {
1045 cpumask_t span;
1046 struct sched_group *group;
1047 int new_cpu;
1048 int weight;
1049
1050 span = sd->span;
1051 group = find_idlest_group(sd, t, cpu);
1052 if (!group)
1053 goto nextlevel;
1054
1055 new_cpu = find_idlest_cpu(group, cpu);
1056 if (new_cpu == -1 || new_cpu == cpu)
1057 goto nextlevel;
1058
1059 /* Now try balancing at a lower domain level */
1060 cpu = new_cpu;
1061nextlevel:
1062 sd = NULL;
1063 weight = cpus_weight(span);
1064 for_each_domain(cpu, tmp) {
1065 if (weight <= cpus_weight(tmp->span))
1066 break;
1067 if (tmp->flags & flag)
1068 sd = tmp;
1069 }
1070 /* while loop will break here if sd == NULL */
1071 }
1072
1073 return cpu;
1074}
1075
1076#endif /* CONFIG_SMP */
1026 1077
1027/* 1078/*
1028 * wake_idle() will wake a task on an idle cpu if task->cpu is 1079 * wake_idle() will wake a task on an idle cpu if task->cpu is
@@ -1240,8 +1291,15 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1240 * Perform scheduler related setup for a newly forked process p. 1291 * Perform scheduler related setup for a newly forked process p.
1241 * p is forked by current. 1292 * p is forked by current.
1242 */ 1293 */
1243void fastcall sched_fork(task_t *p) 1294void fastcall sched_fork(task_t *p, int clone_flags)
1244{ 1295{
1296 int cpu = get_cpu();
1297
1298#ifdef CONFIG_SMP
1299 cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
1300#endif
1301 set_task_cpu(p, cpu);
1302
1245 /* 1303 /*
1246 * We mark the process as running here, but have not actually 1304 * We mark the process as running here, but have not actually
1247 * inserted it onto the runqueue yet. This guarantees that 1305 * inserted it onto the runqueue yet. This guarantees that
@@ -1282,12 +1340,10 @@ void fastcall sched_fork(task_t *p)
1282 * runqueue lock is not a problem. 1340 * runqueue lock is not a problem.
1283 */ 1341 */
1284 current->time_slice = 1; 1342 current->time_slice = 1;
1285 preempt_disable();
1286 scheduler_tick(); 1343 scheduler_tick();
1287 local_irq_enable(); 1344 }
1288 preempt_enable(); 1345 local_irq_enable();
1289 } else 1346 put_cpu();
1290 local_irq_enable();
1291} 1347}
1292 1348
1293/* 1349/*
@@ -1302,64 +1358,12 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
1302 unsigned long flags; 1358 unsigned long flags;
1303 int this_cpu, cpu; 1359 int this_cpu, cpu;
1304 runqueue_t *rq, *this_rq; 1360 runqueue_t *rq, *this_rq;
1305#ifdef CONFIG_SMP
1306 struct sched_domain *tmp, *sd = NULL;
1307#endif
1308 1361
1309 rq = task_rq_lock(p, &flags); 1362 rq = task_rq_lock(p, &flags);
1310 BUG_ON(p->state != TASK_RUNNING); 1363 BUG_ON(p->state != TASK_RUNNING);
1311 this_cpu = smp_processor_id(); 1364 this_cpu = smp_processor_id();
1312 cpu = task_cpu(p); 1365 cpu = task_cpu(p);
1313 1366
1314#ifdef CONFIG_SMP
1315 for_each_domain(cpu, tmp)
1316 if (tmp->flags & SD_BALANCE_FORK)
1317 sd = tmp;
1318
1319 if (sd) {
1320 cpumask_t span;
1321 int new_cpu;
1322 struct sched_group *group;
1323
1324again:
1325 schedstat_inc(sd, sbf_cnt);
1326 span = sd->span;
1327 cpu = task_cpu(p);
1328 group = find_idlest_group(sd, p, cpu);
1329 if (!group) {
1330 schedstat_inc(sd, sbf_balanced);
1331 goto nextlevel;
1332 }
1333
1334 new_cpu = find_idlest_cpu(group, cpu);
1335 if (new_cpu == -1 || new_cpu == cpu) {
1336 schedstat_inc(sd, sbf_balanced);
1337 goto nextlevel;
1338 }
1339
1340 if (cpu_isset(new_cpu, p->cpus_allowed)) {
1341 schedstat_inc(sd, sbf_pushed);
1342 set_task_cpu(p, new_cpu);
1343 task_rq_unlock(rq, &flags);
1344 rq = task_rq_lock(p, &flags);
1345 cpu = task_cpu(p);
1346 }
1347
1348 /* Now try balancing at a lower domain level */
1349nextlevel:
1350 sd = NULL;
1351 for_each_domain(cpu, tmp) {
1352 if (cpus_subset(span, tmp->span))
1353 break;
1354 if (tmp->flags & SD_BALANCE_FORK)
1355 sd = tmp;
1356 }
1357
1358 if (sd)
1359 goto again;
1360 }
1361
1362#endif
1363 /* 1367 /*
1364 * We decrease the sleep average of forking parents 1368 * We decrease the sleep average of forking parents
1365 * and children as well, to keep max-interactive tasks 1369 * and children as well, to keep max-interactive tasks
@@ -1708,58 +1712,16 @@ out:
1708} 1712}
1709 1713
1710/* 1714/*
1711 * sched_exec(): find the highest-level, exec-balance-capable 1715 * sched_exec - execve() is a valuable balancing opportunity, because at
1712 * domain and try to migrate the task to the least loaded CPU. 1716 * this point the task has the smallest effective memory and cache footprint.
1713 *
1714 * execve() is a valuable balancing opportunity, because at this point
1715 * the task has the smallest effective memory and cache footprint.
1716 */ 1717 */
1717void sched_exec(void) 1718void sched_exec(void)
1718{ 1719{
1719 struct sched_domain *tmp, *sd = NULL;
1720 int new_cpu, this_cpu = get_cpu(); 1720 int new_cpu, this_cpu = get_cpu();
1721 1721 new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
1722 for_each_domain(this_cpu, tmp)
1723 if (tmp->flags & SD_BALANCE_EXEC)
1724 sd = tmp;
1725
1726 if (sd) {
1727 cpumask_t span;
1728 struct sched_group *group;
1729again:
1730 schedstat_inc(sd, sbe_cnt);
1731 span = sd->span;
1732 group = find_idlest_group(sd, current, this_cpu);
1733 if (!group) {
1734 schedstat_inc(sd, sbe_balanced);
1735 goto nextlevel;
1736 }
1737 new_cpu = find_idlest_cpu(group, this_cpu);
1738 if (new_cpu == -1 || new_cpu == this_cpu) {
1739 schedstat_inc(sd, sbe_balanced);
1740 goto nextlevel;
1741 }
1742
1743 schedstat_inc(sd, sbe_pushed);
1744 put_cpu();
1745 sched_migrate_task(current, new_cpu);
1746
1747 /* Now try balancing at a lower domain level */
1748 this_cpu = get_cpu();
1749nextlevel:
1750 sd = NULL;
1751 for_each_domain(this_cpu, tmp) {
1752 if (cpus_subset(span, tmp->span))
1753 break;
1754 if (tmp->flags & SD_BALANCE_EXEC)
1755 sd = tmp;
1756 }
1757
1758 if (sd)
1759 goto again;
1760 }
1761
1762 put_cpu(); 1722 put_cpu();
1723 if (new_cpu != this_cpu)
1724 sched_migrate_task(current, new_cpu);
1763} 1725}
1764 1726
1765/* 1727/*