diff options
-rw-r--r-- | include/asm-x86_64/topology.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 10 | ||||
-rw-r--r-- | include/linux/topology.h | 2 | ||||
-rw-r--r-- | kernel/sched.c | 164 |
4 files changed, 119 insertions, 59 deletions
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 9cb7459ce722..802d09b9c99f 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h | |||
@@ -44,9 +44,11 @@ extern int __node_distance(int, int); | |||
44 | .idle_idx = 2, \ | 44 | .idle_idx = 2, \ |
45 | .newidle_idx = 1, \ | 45 | .newidle_idx = 1, \ |
46 | .wake_idx = 1, \ | 46 | .wake_idx = 1, \ |
47 | .forkexec_idx = 1, \ | ||
47 | .per_cpu_gain = 100, \ | 48 | .per_cpu_gain = 100, \ |
48 | .flags = SD_LOAD_BALANCE \ | 49 | .flags = SD_LOAD_BALANCE \ |
49 | | SD_BALANCE_NEWIDLE \ | 50 | | SD_BALANCE_NEWIDLE \ |
51 | | SD_BALANCE_FORK \ | ||
50 | | SD_BALANCE_EXEC \ | 52 | | SD_BALANCE_EXEC \ |
51 | | SD_WAKE_BALANCE, \ | 53 | | SD_WAKE_BALANCE, \ |
52 | .last_balance = jiffies, \ | 54 | .last_balance = jiffies, \ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 664981ac1fb6..613491d3a875 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -460,10 +460,11 @@ enum idle_type | |||
460 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ | 460 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ |
461 | #define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ | 461 | #define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ |
462 | #define SD_BALANCE_EXEC 4 /* Balance on exec */ | 462 | #define SD_BALANCE_EXEC 4 /* Balance on exec */ |
463 | #define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */ | 463 | #define SD_BALANCE_FORK 8 /* Balance on fork, clone */ |
464 | #define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */ | 464 | #define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ |
465 | #define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */ | 465 | #define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ |
466 | #define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */ | 466 | #define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ |
467 | #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ | ||
467 | 468 | ||
468 | struct sched_group { | 469 | struct sched_group { |
469 | struct sched_group *next; /* Must be a circular list */ | 470 | struct sched_group *next; /* Must be a circular list */ |
@@ -492,6 +493,7 @@ struct sched_domain { | |||
492 | unsigned int idle_idx; | 493 | unsigned int idle_idx; |
493 | unsigned int newidle_idx; | 494 | unsigned int newidle_idx; |
494 | unsigned int wake_idx; | 495 | unsigned int wake_idx; |
496 | unsigned int forkexec_idx; | ||
495 | int flags; /* See SD_* */ | 497 | int flags; /* See SD_* */ |
496 | 498 | ||
497 | /* Runtime fields. */ | 499 | /* Runtime fields. */ |
diff --git a/include/linux/topology.h b/include/linux/topology.h index b23ec64df7f1..665597207def 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
@@ -93,6 +93,7 @@ | |||
93 | .idle_idx = 0, \ | 93 | .idle_idx = 0, \ |
94 | .newidle_idx = 0, \ | 94 | .newidle_idx = 0, \ |
95 | .wake_idx = 0, \ | 95 | .wake_idx = 0, \ |
96 | .forkexec_idx = 0, \ | ||
96 | .flags = SD_LOAD_BALANCE \ | 97 | .flags = SD_LOAD_BALANCE \ |
97 | | SD_BALANCE_NEWIDLE \ | 98 | | SD_BALANCE_NEWIDLE \ |
98 | | SD_BALANCE_EXEC \ | 99 | | SD_BALANCE_EXEC \ |
@@ -123,6 +124,7 @@ | |||
123 | .idle_idx = 0, \ | 124 | .idle_idx = 0, \ |
124 | .newidle_idx = 1, \ | 125 | .newidle_idx = 1, \ |
125 | .wake_idx = 1, \ | 126 | .wake_idx = 1, \ |
127 | .forkexec_idx = 0, \ | ||
126 | .flags = SD_LOAD_BALANCE \ | 128 | .flags = SD_LOAD_BALANCE \ |
127 | | SD_BALANCE_NEWIDLE \ | 129 | | SD_BALANCE_NEWIDLE \ |
128 | | SD_BALANCE_EXEC \ | 130 | | SD_BALANCE_EXEC \ |
diff --git a/kernel/sched.c b/kernel/sched.c index 396724a2519f..7ecc237e2aab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -893,6 +893,79 @@ static inline unsigned long target_load(int cpu, int type) | |||
893 | return max(rq->cpu_load[type-1], load_now); | 893 | return max(rq->cpu_load[type-1], load_now); |
894 | } | 894 | } |
895 | 895 | ||
896 | /* | ||
897 | * find_idlest_group finds and returns the least busy CPU group within the | ||
898 | * domain. | ||
899 | */ | ||
900 | static struct sched_group * | ||
901 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | ||
902 | { | ||
903 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; | ||
904 | unsigned long min_load = ULONG_MAX, this_load = 0; | ||
905 | int load_idx = sd->forkexec_idx; | ||
906 | int imbalance = 100 + (sd->imbalance_pct-100)/2; | ||
907 | |||
908 | do { | ||
909 | unsigned long load, avg_load; | ||
910 | int local_group; | ||
911 | int i; | ||
912 | |||
913 | local_group = cpu_isset(this_cpu, group->cpumask); | ||
914 | /* XXX: put a cpus allowed check */ | ||
915 | |||
916 | /* Tally up the load of all CPUs in the group */ | ||
917 | avg_load = 0; | ||
918 | |||
919 | for_each_cpu_mask(i, group->cpumask) { | ||
920 | /* Bias balancing toward cpus of our domain */ | ||
921 | if (local_group) | ||
922 | load = source_load(i, load_idx); | ||
923 | else | ||
924 | load = target_load(i, load_idx); | ||
925 | |||
926 | avg_load += load; | ||
927 | } | ||
928 | |||
929 | /* Adjust by relative CPU power of the group */ | ||
930 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
931 | |||
932 | if (local_group) { | ||
933 | this_load = avg_load; | ||
934 | this = group; | ||
935 | } else if (avg_load < min_load) { | ||
936 | min_load = avg_load; | ||
937 | idlest = group; | ||
938 | } | ||
939 | group = group->next; | ||
940 | } while (group != sd->groups); | ||
941 | |||
942 | if (!idlest || 100*this_load < imbalance*min_load) | ||
943 | return NULL; | ||
944 | return idlest; | ||
945 | } | ||
946 | |||
947 | /* | ||
948 | * find_idlest_queue - find the idlest runqueue among the cpus in group. | ||
949 | */ | ||
950 | static int find_idlest_cpu(struct sched_group *group, int this_cpu) | ||
951 | { | ||
952 | unsigned long load, min_load = ULONG_MAX; | ||
953 | int idlest = -1; | ||
954 | int i; | ||
955 | |||
956 | for_each_cpu_mask(i, group->cpumask) { | ||
957 | load = source_load(i, 0); | ||
958 | |||
959 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
960 | min_load = load; | ||
961 | idlest = i; | ||
962 | } | ||
963 | } | ||
964 | |||
965 | return idlest; | ||
966 | } | ||
967 | |||
968 | |||
896 | #endif | 969 | #endif |
897 | 970 | ||
898 | /* | 971 | /* |
@@ -1107,11 +1180,6 @@ int fastcall wake_up_state(task_t *p, unsigned int state) | |||
1107 | return try_to_wake_up(p, state, 0); | 1180 | return try_to_wake_up(p, state, 0); |
1108 | } | 1181 | } |
1109 | 1182 | ||
1110 | #ifdef CONFIG_SMP | ||
1111 | static int find_idlest_cpu(struct task_struct *p, int this_cpu, | ||
1112 | struct sched_domain *sd); | ||
1113 | #endif | ||
1114 | |||
1115 | /* | 1183 | /* |
1116 | * Perform scheduler related setup for a newly forked process p. | 1184 | * Perform scheduler related setup for a newly forked process p. |
1117 | * p is forked by current. | 1185 | * p is forked by current. |
@@ -1181,12 +1249,38 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | |||
1181 | unsigned long flags; | 1249 | unsigned long flags; |
1182 | int this_cpu, cpu; | 1250 | int this_cpu, cpu; |
1183 | runqueue_t *rq, *this_rq; | 1251 | runqueue_t *rq, *this_rq; |
1252 | #ifdef CONFIG_SMP | ||
1253 | struct sched_domain *tmp, *sd = NULL; | ||
1254 | #endif | ||
1184 | 1255 | ||
1185 | rq = task_rq_lock(p, &flags); | 1256 | rq = task_rq_lock(p, &flags); |
1186 | cpu = task_cpu(p); | 1257 | BUG_ON(p->state != TASK_RUNNING); |
1187 | this_cpu = smp_processor_id(); | 1258 | this_cpu = smp_processor_id(); |
1259 | cpu = task_cpu(p); | ||
1188 | 1260 | ||
1189 | BUG_ON(p->state != TASK_RUNNING); | 1261 | #ifdef CONFIG_SMP |
1262 | for_each_domain(cpu, tmp) | ||
1263 | if (tmp->flags & SD_BALANCE_FORK) | ||
1264 | sd = tmp; | ||
1265 | |||
1266 | if (sd) { | ||
1267 | struct sched_group *group; | ||
1268 | |||
1269 | cpu = task_cpu(p); | ||
1270 | group = find_idlest_group(sd, p, cpu); | ||
1271 | if (group) { | ||
1272 | int new_cpu; | ||
1273 | new_cpu = find_idlest_cpu(group, cpu); | ||
1274 | if (new_cpu != -1 && new_cpu != cpu && | ||
1275 | cpu_isset(new_cpu, p->cpus_allowed)) { | ||
1276 | set_task_cpu(p, new_cpu); | ||
1277 | task_rq_unlock(rq, &flags); | ||
1278 | rq = task_rq_lock(p, &flags); | ||
1279 | cpu = task_cpu(p); | ||
1280 | } | ||
1281 | } | ||
1282 | } | ||
1283 | #endif | ||
1190 | 1284 | ||
1191 | /* | 1285 | /* |
1192 | * We decrease the sleep average of forking parents | 1286 | * We decrease the sleep average of forking parents |
@@ -1481,51 +1575,6 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) | |||
1481 | } | 1575 | } |
1482 | 1576 | ||
1483 | /* | 1577 | /* |
1484 | * find_idlest_cpu - find the least busy runqueue. | ||
1485 | */ | ||
1486 | static int find_idlest_cpu(struct task_struct *p, int this_cpu, | ||
1487 | struct sched_domain *sd) | ||
1488 | { | ||
1489 | unsigned long load, min_load, this_load; | ||
1490 | int i, min_cpu; | ||
1491 | cpumask_t mask; | ||
1492 | |||
1493 | min_cpu = UINT_MAX; | ||
1494 | min_load = ULONG_MAX; | ||
1495 | |||
1496 | cpus_and(mask, sd->span, p->cpus_allowed); | ||
1497 | |||
1498 | for_each_cpu_mask(i, mask) { | ||
1499 | load = target_load(i, sd->wake_idx); | ||
1500 | |||
1501 | if (load < min_load) { | ||
1502 | min_cpu = i; | ||
1503 | min_load = load; | ||
1504 | |||
1505 | /* break out early on an idle CPU: */ | ||
1506 | if (!min_load) | ||
1507 | break; | ||
1508 | } | ||
1509 | } | ||
1510 | |||
1511 | /* add +1 to account for the new task */ | ||
1512 | this_load = source_load(this_cpu, sd->wake_idx) + SCHED_LOAD_SCALE; | ||
1513 | |||
1514 | /* | ||
1515 | * Would with the addition of the new task to the | ||
1516 | * current CPU there be an imbalance between this | ||
1517 | * CPU and the idlest CPU? | ||
1518 | * | ||
1519 | * Use half of the balancing threshold - new-context is | ||
1520 | * a good opportunity to balance. | ||
1521 | */ | ||
1522 | if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100) | ||
1523 | return min_cpu; | ||
1524 | |||
1525 | return this_cpu; | ||
1526 | } | ||
1527 | |||
1528 | /* | ||
1529 | * If dest_cpu is allowed for this process, migrate the task to it. | 1578 | * If dest_cpu is allowed for this process, migrate the task to it. |
1530 | * This is accomplished by forcing the cpu_allowed mask to only | 1579 | * This is accomplished by forcing the cpu_allowed mask to only |
1531 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | 1580 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then |
@@ -1578,8 +1627,15 @@ void sched_exec(void) | |||
1578 | sd = tmp; | 1627 | sd = tmp; |
1579 | 1628 | ||
1580 | if (sd) { | 1629 | if (sd) { |
1630 | struct sched_group *group; | ||
1581 | schedstat_inc(sd, sbe_attempts); | 1631 | schedstat_inc(sd, sbe_attempts); |
1582 | new_cpu = find_idlest_cpu(current, this_cpu, sd); | 1632 | group = find_idlest_group(sd, current, this_cpu); |
1633 | if (!group) | ||
1634 | goto out; | ||
1635 | new_cpu = find_idlest_cpu(group, this_cpu); | ||
1636 | if (new_cpu == -1) | ||
1637 | goto out; | ||
1638 | |||
1583 | if (new_cpu != this_cpu) { | 1639 | if (new_cpu != this_cpu) { |
1584 | schedstat_inc(sd, sbe_pushed); | 1640 | schedstat_inc(sd, sbe_pushed); |
1585 | put_cpu(); | 1641 | put_cpu(); |
@@ -1792,12 +1848,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1792 | if (local_group) { | 1848 | if (local_group) { |
1793 | this_load = avg_load; | 1849 | this_load = avg_load; |
1794 | this = group; | 1850 | this = group; |
1795 | goto nextgroup; | ||
1796 | } else if (avg_load > max_load) { | 1851 | } else if (avg_load > max_load) { |
1797 | max_load = avg_load; | 1852 | max_load = avg_load; |
1798 | busiest = group; | 1853 | busiest = group; |
1799 | } | 1854 | } |
1800 | nextgroup: | ||
1801 | group = group->next; | 1855 | group = group->next; |
1802 | } while (group != sd->groups); | 1856 | } while (group != sd->groups); |
1803 | 1857 | ||