aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/asm-x86_64/topology.h2
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/topology.h2
-rw-r--r--kernel/sched.c164
4 files changed, 119 insertions, 59 deletions
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 9cb7459ce722..802d09b9c99f 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -44,9 +44,11 @@ extern int __node_distance(int, int);
44 .idle_idx = 2, \ 44 .idle_idx = 2, \
45 .newidle_idx = 1, \ 45 .newidle_idx = 1, \
46 .wake_idx = 1, \ 46 .wake_idx = 1, \
47 .forkexec_idx = 1, \
47 .per_cpu_gain = 100, \ 48 .per_cpu_gain = 100, \
48 .flags = SD_LOAD_BALANCE \ 49 .flags = SD_LOAD_BALANCE \
49 | SD_BALANCE_NEWIDLE \ 50 | SD_BALANCE_NEWIDLE \
51 | SD_BALANCE_FORK \
50 | SD_BALANCE_EXEC \ 52 | SD_BALANCE_EXEC \
51 | SD_WAKE_BALANCE, \ 53 | SD_WAKE_BALANCE, \
52 .last_balance = jiffies, \ 54 .last_balance = jiffies, \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 664981ac1fb6..613491d3a875 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -460,10 +460,11 @@ enum idle_type
460#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ 460#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
461#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ 461#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */
462#define SD_BALANCE_EXEC 4 /* Balance on exec */ 462#define SD_BALANCE_EXEC 4 /* Balance on exec */
463#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */ 463#define SD_BALANCE_FORK 8 /* Balance on fork, clone */
464#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */ 464#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */
465#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */ 465#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */
466#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */ 466#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */
467#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */
467 468
468struct sched_group { 469struct sched_group {
469 struct sched_group *next; /* Must be a circular list */ 470 struct sched_group *next; /* Must be a circular list */
@@ -492,6 +493,7 @@ struct sched_domain {
492 unsigned int idle_idx; 493 unsigned int idle_idx;
493 unsigned int newidle_idx; 494 unsigned int newidle_idx;
494 unsigned int wake_idx; 495 unsigned int wake_idx;
496 unsigned int forkexec_idx;
495 int flags; /* See SD_* */ 497 int flags; /* See SD_* */
496 498
497 /* Runtime fields. */ 499 /* Runtime fields. */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index b23ec64df7f1..665597207def 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -93,6 +93,7 @@
93 .idle_idx = 0, \ 93 .idle_idx = 0, \
94 .newidle_idx = 0, \ 94 .newidle_idx = 0, \
95 .wake_idx = 0, \ 95 .wake_idx = 0, \
96 .forkexec_idx = 0, \
96 .flags = SD_LOAD_BALANCE \ 97 .flags = SD_LOAD_BALANCE \
97 | SD_BALANCE_NEWIDLE \ 98 | SD_BALANCE_NEWIDLE \
98 | SD_BALANCE_EXEC \ 99 | SD_BALANCE_EXEC \
@@ -123,6 +124,7 @@
123 .idle_idx = 0, \ 124 .idle_idx = 0, \
124 .newidle_idx = 1, \ 125 .newidle_idx = 1, \
125 .wake_idx = 1, \ 126 .wake_idx = 1, \
127 .forkexec_idx = 0, \
126 .flags = SD_LOAD_BALANCE \ 128 .flags = SD_LOAD_BALANCE \
127 | SD_BALANCE_NEWIDLE \ 129 | SD_BALANCE_NEWIDLE \
128 | SD_BALANCE_EXEC \ 130 | SD_BALANCE_EXEC \
diff --git a/kernel/sched.c b/kernel/sched.c
index 396724a2519f..7ecc237e2aab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -893,6 +893,79 @@ static inline unsigned long target_load(int cpu, int type)
893 return max(rq->cpu_load[type-1], load_now); 893 return max(rq->cpu_load[type-1], load_now);
894} 894}
895 895
896/*
897 * find_idlest_group finds and returns the least busy CPU group within the
898 * domain.
899 */
900static struct sched_group *
901find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
902{
903 struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
904 unsigned long min_load = ULONG_MAX, this_load = 0;
905 int load_idx = sd->forkexec_idx;
906 int imbalance = 100 + (sd->imbalance_pct-100)/2;
907
908 do {
909 unsigned long load, avg_load;
910 int local_group;
911 int i;
912
913 local_group = cpu_isset(this_cpu, group->cpumask);
914 /* XXX: put a cpus allowed check */
915
916 /* Tally up the load of all CPUs in the group */
917 avg_load = 0;
918
919 for_each_cpu_mask(i, group->cpumask) {
920 /* Bias balancing toward cpus of our domain */
921 if (local_group)
922 load = source_load(i, load_idx);
923 else
924 load = target_load(i, load_idx);
925
926 avg_load += load;
927 }
928
929 /* Adjust by relative CPU power of the group */
930 avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
931
932 if (local_group) {
933 this_load = avg_load;
934 this = group;
935 } else if (avg_load < min_load) {
936 min_load = avg_load;
937 idlest = group;
938 }
939 group = group->next;
940 } while (group != sd->groups);
941
942 if (!idlest || 100*this_load < imbalance*min_load)
943 return NULL;
944 return idlest;
945}
946
947/*
948 * find_idlest_queue - find the idlest runqueue among the cpus in group.
949 */
950static int find_idlest_cpu(struct sched_group *group, int this_cpu)
951{
952 unsigned long load, min_load = ULONG_MAX;
953 int idlest = -1;
954 int i;
955
956 for_each_cpu_mask(i, group->cpumask) {
957 load = source_load(i, 0);
958
959 if (load < min_load || (load == min_load && i == this_cpu)) {
960 min_load = load;
961 idlest = i;
962 }
963 }
964
965 return idlest;
966}
967
968
896#endif 969#endif
897 970
898/* 971/*
@@ -1107,11 +1180,6 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1107 return try_to_wake_up(p, state, 0); 1180 return try_to_wake_up(p, state, 0);
1108} 1181}
1109 1182
1110#ifdef CONFIG_SMP
1111static int find_idlest_cpu(struct task_struct *p, int this_cpu,
1112 struct sched_domain *sd);
1113#endif
1114
1115/* 1183/*
1116 * Perform scheduler related setup for a newly forked process p. 1184 * Perform scheduler related setup for a newly forked process p.
1117 * p is forked by current. 1185 * p is forked by current.
@@ -1181,12 +1249,38 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
1181 unsigned long flags; 1249 unsigned long flags;
1182 int this_cpu, cpu; 1250 int this_cpu, cpu;
1183 runqueue_t *rq, *this_rq; 1251 runqueue_t *rq, *this_rq;
1252#ifdef CONFIG_SMP
1253 struct sched_domain *tmp, *sd = NULL;
1254#endif
1184 1255
1185 rq = task_rq_lock(p, &flags); 1256 rq = task_rq_lock(p, &flags);
1186 cpu = task_cpu(p); 1257 BUG_ON(p->state != TASK_RUNNING);
1187 this_cpu = smp_processor_id(); 1258 this_cpu = smp_processor_id();
1259 cpu = task_cpu(p);
1188 1260
1189 BUG_ON(p->state != TASK_RUNNING); 1261#ifdef CONFIG_SMP
1262 for_each_domain(cpu, tmp)
1263 if (tmp->flags & SD_BALANCE_FORK)
1264 sd = tmp;
1265
1266 if (sd) {
1267 struct sched_group *group;
1268
1269 cpu = task_cpu(p);
1270 group = find_idlest_group(sd, p, cpu);
1271 if (group) {
1272 int new_cpu;
1273 new_cpu = find_idlest_cpu(group, cpu);
1274 if (new_cpu != -1 && new_cpu != cpu &&
1275 cpu_isset(new_cpu, p->cpus_allowed)) {
1276 set_task_cpu(p, new_cpu);
1277 task_rq_unlock(rq, &flags);
1278 rq = task_rq_lock(p, &flags);
1279 cpu = task_cpu(p);
1280 }
1281 }
1282 }
1283#endif
1190 1284
1191 /* 1285 /*
1192 * We decrease the sleep average of forking parents 1286 * We decrease the sleep average of forking parents
@@ -1481,51 +1575,6 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1481} 1575}
1482 1576
1483/* 1577/*
1484 * find_idlest_cpu - find the least busy runqueue.
1485 */
1486static int find_idlest_cpu(struct task_struct *p, int this_cpu,
1487 struct sched_domain *sd)
1488{
1489 unsigned long load, min_load, this_load;
1490 int i, min_cpu;
1491 cpumask_t mask;
1492
1493 min_cpu = UINT_MAX;
1494 min_load = ULONG_MAX;
1495
1496 cpus_and(mask, sd->span, p->cpus_allowed);
1497
1498 for_each_cpu_mask(i, mask) {
1499 load = target_load(i, sd->wake_idx);
1500
1501 if (load < min_load) {
1502 min_cpu = i;
1503 min_load = load;
1504
1505 /* break out early on an idle CPU: */
1506 if (!min_load)
1507 break;
1508 }
1509 }
1510
1511 /* add +1 to account for the new task */
1512 this_load = source_load(this_cpu, sd->wake_idx) + SCHED_LOAD_SCALE;
1513
1514 /*
1515 * Would with the addition of the new task to the
1516 * current CPU there be an imbalance between this
1517 * CPU and the idlest CPU?
1518 *
1519 * Use half of the balancing threshold - new-context is
1520 * a good opportunity to balance.
1521 */
1522 if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100)
1523 return min_cpu;
1524
1525 return this_cpu;
1526}
1527
1528/*
1529 * If dest_cpu is allowed for this process, migrate the task to it. 1578 * If dest_cpu is allowed for this process, migrate the task to it.
1530 * This is accomplished by forcing the cpu_allowed mask to only 1579 * This is accomplished by forcing the cpu_allowed mask to only
1531 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 1580 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
@@ -1578,8 +1627,15 @@ void sched_exec(void)
1578 sd = tmp; 1627 sd = tmp;
1579 1628
1580 if (sd) { 1629 if (sd) {
1630 struct sched_group *group;
1581 schedstat_inc(sd, sbe_attempts); 1631 schedstat_inc(sd, sbe_attempts);
1582 new_cpu = find_idlest_cpu(current, this_cpu, sd); 1632 group = find_idlest_group(sd, current, this_cpu);
1633 if (!group)
1634 goto out;
1635 new_cpu = find_idlest_cpu(group, this_cpu);
1636 if (new_cpu == -1)
1637 goto out;
1638
1583 if (new_cpu != this_cpu) { 1639 if (new_cpu != this_cpu) {
1584 schedstat_inc(sd, sbe_pushed); 1640 schedstat_inc(sd, sbe_pushed);
1585 put_cpu(); 1641 put_cpu();
@@ -1792,12 +1848,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1792 if (local_group) { 1848 if (local_group) {
1793 this_load = avg_load; 1849 this_load = avg_load;
1794 this = group; 1850 this = group;
1795 goto nextgroup;
1796 } else if (avg_load > max_load) { 1851 } else if (avg_load > max_load) {
1797 max_load = avg_load; 1852 max_load = avg_load;
1798 busiest = group; 1853 busiest = group;
1799 } 1854 }
1800nextgroup:
1801 group = group->next; 1855 group = group->next;
1802 } while (group != sd->groups); 1856 } while (group != sd->groups);
1803 1857