diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 233 |
1 files changed, 170 insertions, 63 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 18b95520a2e2..dbd4490afec1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -875,7 +875,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) | |||
| 875 | * smp_call_function() if an IPI is sent by the same process we are | 875 | * smp_call_function() if an IPI is sent by the same process we are |
| 876 | * waiting to become inactive. | 876 | * waiting to become inactive. |
| 877 | */ | 877 | */ |
| 878 | void wait_task_inactive(task_t * p) | 878 | void wait_task_inactive(task_t *p) |
| 879 | { | 879 | { |
| 880 | unsigned long flags; | 880 | unsigned long flags; |
| 881 | runqueue_t *rq; | 881 | runqueue_t *rq; |
| @@ -966,8 +966,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
| 966 | int local_group; | 966 | int local_group; |
| 967 | int i; | 967 | int i; |
| 968 | 968 | ||
| 969 | /* Skip over this group if it has no CPUs allowed */ | ||
| 970 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) | ||
| 971 | goto nextgroup; | ||
| 972 | |||
| 969 | local_group = cpu_isset(this_cpu, group->cpumask); | 973 | local_group = cpu_isset(this_cpu, group->cpumask); |
| 970 | /* XXX: put a cpus allowed check */ | ||
| 971 | 974 | ||
| 972 | /* Tally up the load of all CPUs in the group */ | 975 | /* Tally up the load of all CPUs in the group */ |
| 973 | avg_load = 0; | 976 | avg_load = 0; |
| @@ -992,6 +995,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
| 992 | min_load = avg_load; | 995 | min_load = avg_load; |
| 993 | idlest = group; | 996 | idlest = group; |
| 994 | } | 997 | } |
| 998 | nextgroup: | ||
| 995 | group = group->next; | 999 | group = group->next; |
| 996 | } while (group != sd->groups); | 1000 | } while (group != sd->groups); |
| 997 | 1001 | ||
| @@ -1003,13 +1007,18 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
| 1003 | /* | 1007 | /* |
| 1004 | * find_idlest_queue - find the idlest runqueue among the cpus in group. | 1008 | * find_idlest_queue - find the idlest runqueue among the cpus in group. |
| 1005 | */ | 1009 | */ |
| 1006 | static int find_idlest_cpu(struct sched_group *group, int this_cpu) | 1010 | static int |
| 1011 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
| 1007 | { | 1012 | { |
| 1013 | cpumask_t tmp; | ||
| 1008 | unsigned long load, min_load = ULONG_MAX; | 1014 | unsigned long load, min_load = ULONG_MAX; |
| 1009 | int idlest = -1; | 1015 | int idlest = -1; |
| 1010 | int i; | 1016 | int i; |
| 1011 | 1017 | ||
| 1012 | for_each_cpu_mask(i, group->cpumask) { | 1018 | /* Traverse only the allowed CPUs */ |
| 1019 | cpus_and(tmp, group->cpumask, p->cpus_allowed); | ||
| 1020 | |||
| 1021 | for_each_cpu_mask(i, tmp) { | ||
| 1013 | load = source_load(i, 0); | 1022 | load = source_load(i, 0); |
| 1014 | 1023 | ||
| 1015 | if (load < min_load || (load == min_load && i == this_cpu)) { | 1024 | if (load < min_load || (load == min_load && i == this_cpu)) { |
| @@ -1052,7 +1061,7 @@ static int sched_balance_self(int cpu, int flag) | |||
| 1052 | if (!group) | 1061 | if (!group) |
| 1053 | goto nextlevel; | 1062 | goto nextlevel; |
| 1054 | 1063 | ||
| 1055 | new_cpu = find_idlest_cpu(group, cpu); | 1064 | new_cpu = find_idlest_cpu(group, t, cpu); |
| 1056 | if (new_cpu == -1 || new_cpu == cpu) | 1065 | if (new_cpu == -1 || new_cpu == cpu) |
| 1057 | goto nextlevel; | 1066 | goto nextlevel; |
| 1058 | 1067 | ||
| @@ -1127,7 +1136,7 @@ static inline int wake_idle(int cpu, task_t *p) | |||
| 1127 | * | 1136 | * |
| 1128 | * returns failure only if the task is already active. | 1137 | * returns failure only if the task is already active. |
| 1129 | */ | 1138 | */ |
| 1130 | static int try_to_wake_up(task_t * p, unsigned int state, int sync) | 1139 | static int try_to_wake_up(task_t *p, unsigned int state, int sync) |
| 1131 | { | 1140 | { |
| 1132 | int cpu, this_cpu, success = 0; | 1141 | int cpu, this_cpu, success = 0; |
| 1133 | unsigned long flags; | 1142 | unsigned long flags; |
| @@ -1252,6 +1261,16 @@ out_activate: | |||
| 1252 | } | 1261 | } |
| 1253 | 1262 | ||
| 1254 | /* | 1263 | /* |
| 1264 | * Tasks that have marked their sleep as noninteractive get | ||
| 1265 | * woken up without updating their sleep average. (i.e. their | ||
| 1266 | * sleep is handled in a priority-neutral manner, no priority | ||
| 1267 | * boost and no penalty.) | ||
| 1268 | */ | ||
| 1269 | if (old_state & TASK_NONINTERACTIVE) | ||
| 1270 | __activate_task(p, rq); | ||
| 1271 | else | ||
| 1272 | activate_task(p, rq, cpu == this_cpu); | ||
| 1273 | /* | ||
| 1255 | * Sync wakeups (i.e. those types of wakeups where the waker | 1274 | * Sync wakeups (i.e. those types of wakeups where the waker |
| 1256 | * has indicated that it will leave the CPU in short order) | 1275 | * has indicated that it will leave the CPU in short order) |
| 1257 | * don't trigger a preemption, if the woken up task will run on | 1276 | * don't trigger a preemption, if the woken up task will run on |
| @@ -1259,7 +1278,6 @@ out_activate: | |||
| 1259 | * the waker guarantees that the freshly woken up task is going | 1278 | * the waker guarantees that the freshly woken up task is going |
| 1260 | * to be considered on this CPU.) | 1279 | * to be considered on this CPU.) |
| 1261 | */ | 1280 | */ |
| 1262 | activate_task(p, rq, cpu == this_cpu); | ||
| 1263 | if (!sync || cpu != this_cpu) { | 1281 | if (!sync || cpu != this_cpu) { |
| 1264 | if (TASK_PREEMPTS_CURR(p, rq)) | 1282 | if (TASK_PREEMPTS_CURR(p, rq)) |
| 1265 | resched_task(rq->curr); | 1283 | resched_task(rq->curr); |
| @@ -1274,7 +1292,7 @@ out: | |||
| 1274 | return success; | 1292 | return success; |
| 1275 | } | 1293 | } |
| 1276 | 1294 | ||
| 1277 | int fastcall wake_up_process(task_t * p) | 1295 | int fastcall wake_up_process(task_t *p) |
| 1278 | { | 1296 | { |
| 1279 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | | 1297 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | |
| 1280 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); | 1298 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); |
| @@ -1353,7 +1371,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
| 1353 | * that must be done for every newly created context, then puts the task | 1371 | * that must be done for every newly created context, then puts the task |
| 1354 | * on the runqueue and wakes it. | 1372 | * on the runqueue and wakes it. |
| 1355 | */ | 1373 | */ |
| 1356 | void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | 1374 | void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) |
| 1357 | { | 1375 | { |
| 1358 | unsigned long flags; | 1376 | unsigned long flags; |
| 1359 | int this_cpu, cpu; | 1377 | int this_cpu, cpu; |
| @@ -1436,7 +1454,7 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | |||
| 1436 | * artificially, because any timeslice recovered here | 1454 | * artificially, because any timeslice recovered here |
| 1437 | * was given away by the parent in the first place.) | 1455 | * was given away by the parent in the first place.) |
| 1438 | */ | 1456 | */ |
| 1439 | void fastcall sched_exit(task_t * p) | 1457 | void fastcall sched_exit(task_t *p) |
| 1440 | { | 1458 | { |
| 1441 | unsigned long flags; | 1459 | unsigned long flags; |
| 1442 | runqueue_t *rq; | 1460 | runqueue_t *rq; |
| @@ -1511,6 +1529,10 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) | |||
| 1511 | * Manfred Spraul <manfred@colorfullife.com> | 1529 | * Manfred Spraul <manfred@colorfullife.com> |
| 1512 | */ | 1530 | */ |
| 1513 | prev_task_flags = prev->flags; | 1531 | prev_task_flags = prev->flags; |
| 1532 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
| 1533 | /* this is a valid case when another task releases the spinlock */ | ||
| 1534 | rq->lock.owner = current; | ||
| 1535 | #endif | ||
| 1514 | finish_arch_switch(prev); | 1536 | finish_arch_switch(prev); |
| 1515 | finish_lock_switch(rq, prev); | 1537 | finish_lock_switch(rq, prev); |
| 1516 | if (mm) | 1538 | if (mm) |
| @@ -1753,7 +1775,8 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
| 1753 | */ | 1775 | */ |
| 1754 | static inline | 1776 | static inline |
| 1755 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, | 1777 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, |
| 1756 | struct sched_domain *sd, enum idle_type idle, int *all_pinned) | 1778 | struct sched_domain *sd, enum idle_type idle, |
| 1779 | int *all_pinned) | ||
| 1757 | { | 1780 | { |
| 1758 | /* | 1781 | /* |
| 1759 | * We do not migrate tasks that are: | 1782 | * We do not migrate tasks that are: |
| @@ -1883,10 +1906,11 @@ out: | |||
| 1883 | */ | 1906 | */ |
| 1884 | static struct sched_group * | 1907 | static struct sched_group * |
| 1885 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 1908 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 1886 | unsigned long *imbalance, enum idle_type idle) | 1909 | unsigned long *imbalance, enum idle_type idle, int *sd_idle) |
| 1887 | { | 1910 | { |
| 1888 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 1911 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
| 1889 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 1912 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
| 1913 | unsigned long max_pull; | ||
| 1890 | int load_idx; | 1914 | int load_idx; |
| 1891 | 1915 | ||
| 1892 | max_load = this_load = total_load = total_pwr = 0; | 1916 | max_load = this_load = total_load = total_pwr = 0; |
| @@ -1908,6 +1932,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1908 | avg_load = 0; | 1932 | avg_load = 0; |
| 1909 | 1933 | ||
| 1910 | for_each_cpu_mask(i, group->cpumask) { | 1934 | for_each_cpu_mask(i, group->cpumask) { |
| 1935 | if (*sd_idle && !idle_cpu(i)) | ||
| 1936 | *sd_idle = 0; | ||
| 1937 | |||
| 1911 | /* Bias balancing toward cpus of our domain */ | 1938 | /* Bias balancing toward cpus of our domain */ |
| 1912 | if (local_group) | 1939 | if (local_group) |
| 1913 | load = target_load(i, load_idx); | 1940 | load = target_load(i, load_idx); |
| @@ -1933,7 +1960,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1933 | group = group->next; | 1960 | group = group->next; |
| 1934 | } while (group != sd->groups); | 1961 | } while (group != sd->groups); |
| 1935 | 1962 | ||
| 1936 | if (!busiest || this_load >= max_load) | 1963 | if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE) |
| 1937 | goto out_balanced; | 1964 | goto out_balanced; |
| 1938 | 1965 | ||
| 1939 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; | 1966 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; |
| @@ -1953,8 +1980,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1953 | * by pulling tasks to us. Be careful of negative numbers as they'll | 1980 | * by pulling tasks to us. Be careful of negative numbers as they'll |
| 1954 | * appear as very large values with unsigned longs. | 1981 | * appear as very large values with unsigned longs. |
| 1955 | */ | 1982 | */ |
| 1983 | |||
| 1984 | /* Don't want to pull so many tasks that a group would go idle */ | ||
| 1985 | max_pull = min(max_load - avg_load, max_load - SCHED_LOAD_SCALE); | ||
| 1986 | |||
| 1956 | /* How much load to actually move to equalise the imbalance */ | 1987 | /* How much load to actually move to equalise the imbalance */ |
| 1957 | *imbalance = min((max_load - avg_load) * busiest->cpu_power, | 1988 | *imbalance = min(max_pull * busiest->cpu_power, |
| 1958 | (avg_load - this_load) * this->cpu_power) | 1989 | (avg_load - this_load) * this->cpu_power) |
| 1959 | / SCHED_LOAD_SCALE; | 1990 | / SCHED_LOAD_SCALE; |
| 1960 | 1991 | ||
| @@ -2051,11 +2082,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2051 | unsigned long imbalance; | 2082 | unsigned long imbalance; |
| 2052 | int nr_moved, all_pinned = 0; | 2083 | int nr_moved, all_pinned = 0; |
| 2053 | int active_balance = 0; | 2084 | int active_balance = 0; |
| 2085 | int sd_idle = 0; | ||
| 2086 | |||
| 2087 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2088 | sd_idle = 1; | ||
| 2054 | 2089 | ||
| 2055 | spin_lock(&this_rq->lock); | ||
| 2056 | schedstat_inc(sd, lb_cnt[idle]); | 2090 | schedstat_inc(sd, lb_cnt[idle]); |
| 2057 | 2091 | ||
| 2058 | group = find_busiest_group(sd, this_cpu, &imbalance, idle); | 2092 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); |
| 2059 | if (!group) { | 2093 | if (!group) { |
| 2060 | schedstat_inc(sd, lb_nobusyg[idle]); | 2094 | schedstat_inc(sd, lb_nobusyg[idle]); |
| 2061 | goto out_balanced; | 2095 | goto out_balanced; |
| @@ -2079,19 +2113,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2079 | * still unbalanced. nr_moved simply stays zero, so it is | 2113 | * still unbalanced. nr_moved simply stays zero, so it is |
| 2080 | * correctly treated as an imbalance. | 2114 | * correctly treated as an imbalance. |
| 2081 | */ | 2115 | */ |
| 2082 | double_lock_balance(this_rq, busiest); | 2116 | double_rq_lock(this_rq, busiest); |
| 2083 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2117 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
| 2084 | imbalance, sd, idle, | 2118 | imbalance, sd, idle, &all_pinned); |
| 2085 | &all_pinned); | 2119 | double_rq_unlock(this_rq, busiest); |
| 2086 | spin_unlock(&busiest->lock); | ||
| 2087 | 2120 | ||
| 2088 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2121 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 2089 | if (unlikely(all_pinned)) | 2122 | if (unlikely(all_pinned)) |
| 2090 | goto out_balanced; | 2123 | goto out_balanced; |
| 2091 | } | 2124 | } |
| 2092 | 2125 | ||
| 2093 | spin_unlock(&this_rq->lock); | ||
| 2094 | |||
| 2095 | if (!nr_moved) { | 2126 | if (!nr_moved) { |
| 2096 | schedstat_inc(sd, lb_failed[idle]); | 2127 | schedstat_inc(sd, lb_failed[idle]); |
| 2097 | sd->nr_balance_failed++; | 2128 | sd->nr_balance_failed++; |
| @@ -2099,6 +2130,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2099 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 2130 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
| 2100 | 2131 | ||
| 2101 | spin_lock(&busiest->lock); | 2132 | spin_lock(&busiest->lock); |
| 2133 | |||
| 2134 | /* don't kick the migration_thread, if the curr | ||
| 2135 | * task on busiest cpu can't be moved to this_cpu | ||
| 2136 | */ | ||
| 2137 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { | ||
| 2138 | spin_unlock(&busiest->lock); | ||
| 2139 | all_pinned = 1; | ||
| 2140 | goto out_one_pinned; | ||
| 2141 | } | ||
| 2142 | |||
| 2102 | if (!busiest->active_balance) { | 2143 | if (!busiest->active_balance) { |
| 2103 | busiest->active_balance = 1; | 2144 | busiest->active_balance = 1; |
| 2104 | busiest->push_cpu = this_cpu; | 2145 | busiest->push_cpu = this_cpu; |
| @@ -2131,19 +2172,23 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2131 | sd->balance_interval *= 2; | 2172 | sd->balance_interval *= 2; |
| 2132 | } | 2173 | } |
| 2133 | 2174 | ||
| 2175 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2176 | return -1; | ||
| 2134 | return nr_moved; | 2177 | return nr_moved; |
| 2135 | 2178 | ||
| 2136 | out_balanced: | 2179 | out_balanced: |
| 2137 | spin_unlock(&this_rq->lock); | ||
| 2138 | |||
| 2139 | schedstat_inc(sd, lb_balanced[idle]); | 2180 | schedstat_inc(sd, lb_balanced[idle]); |
| 2140 | 2181 | ||
| 2141 | sd->nr_balance_failed = 0; | 2182 | sd->nr_balance_failed = 0; |
| 2183 | |||
| 2184 | out_one_pinned: | ||
| 2142 | /* tune up the balancing interval */ | 2185 | /* tune up the balancing interval */ |
| 2143 | if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || | 2186 | if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || |
| 2144 | (sd->balance_interval < sd->max_interval)) | 2187 | (sd->balance_interval < sd->max_interval)) |
| 2145 | sd->balance_interval *= 2; | 2188 | sd->balance_interval *= 2; |
| 2146 | 2189 | ||
| 2190 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2191 | return -1; | ||
| 2147 | return 0; | 2192 | return 0; |
| 2148 | } | 2193 | } |
| 2149 | 2194 | ||
| @@ -2161,9 +2206,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
| 2161 | runqueue_t *busiest = NULL; | 2206 | runqueue_t *busiest = NULL; |
| 2162 | unsigned long imbalance; | 2207 | unsigned long imbalance; |
| 2163 | int nr_moved = 0; | 2208 | int nr_moved = 0; |
| 2209 | int sd_idle = 0; | ||
| 2210 | |||
| 2211 | if (sd->flags & SD_SHARE_CPUPOWER) | ||
| 2212 | sd_idle = 1; | ||
| 2164 | 2213 | ||
| 2165 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2214 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
| 2166 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); | 2215 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); |
| 2167 | if (!group) { | 2216 | if (!group) { |
| 2168 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2217 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
| 2169 | goto out_balanced; | 2218 | goto out_balanced; |
| @@ -2177,22 +2226,30 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
| 2177 | 2226 | ||
| 2178 | BUG_ON(busiest == this_rq); | 2227 | BUG_ON(busiest == this_rq); |
| 2179 | 2228 | ||
| 2180 | /* Attempt to move tasks */ | ||
| 2181 | double_lock_balance(this_rq, busiest); | ||
| 2182 | |||
| 2183 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); | 2229 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); |
| 2184 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2230 | |
| 2231 | nr_moved = 0; | ||
| 2232 | if (busiest->nr_running > 1) { | ||
| 2233 | /* Attempt to move tasks */ | ||
| 2234 | double_lock_balance(this_rq, busiest); | ||
| 2235 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | ||
| 2185 | imbalance, sd, NEWLY_IDLE, NULL); | 2236 | imbalance, sd, NEWLY_IDLE, NULL); |
| 2186 | if (!nr_moved) | 2237 | spin_unlock(&busiest->lock); |
| 2238 | } | ||
| 2239 | |||
| 2240 | if (!nr_moved) { | ||
| 2187 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2241 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
| 2188 | else | 2242 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) |
| 2243 | return -1; | ||
| 2244 | } else | ||
| 2189 | sd->nr_balance_failed = 0; | 2245 | sd->nr_balance_failed = 0; |
| 2190 | 2246 | ||
| 2191 | spin_unlock(&busiest->lock); | ||
| 2192 | return nr_moved; | 2247 | return nr_moved; |
| 2193 | 2248 | ||
| 2194 | out_balanced: | 2249 | out_balanced: |
| 2195 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2250 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
| 2251 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2252 | return -1; | ||
| 2196 | sd->nr_balance_failed = 0; | 2253 | sd->nr_balance_failed = 0; |
| 2197 | return 0; | 2254 | return 0; |
| 2198 | } | 2255 | } |
| @@ -2317,7 +2374,11 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, | |||
| 2317 | 2374 | ||
| 2318 | if (j - sd->last_balance >= interval) { | 2375 | if (j - sd->last_balance >= interval) { |
| 2319 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2376 | if (load_balance(this_cpu, this_rq, sd, idle)) { |
| 2320 | /* We've pulled tasks over so no longer idle */ | 2377 | /* |
| 2378 | * We've pulled tasks over so either we're no | ||
| 2379 | * longer idle, or one of our SMT siblings is | ||
| 2380 | * not idle. | ||
| 2381 | */ | ||
| 2321 | idle = NOT_IDLE; | 2382 | idle = NOT_IDLE; |
| 2322 | } | 2383 | } |
| 2323 | sd->last_balance += interval; | 2384 | sd->last_balance += interval; |
| @@ -2576,6 +2637,13 @@ out: | |||
| 2576 | } | 2637 | } |
| 2577 | 2638 | ||
| 2578 | #ifdef CONFIG_SCHED_SMT | 2639 | #ifdef CONFIG_SCHED_SMT |
| 2640 | static inline void wakeup_busy_runqueue(runqueue_t *rq) | ||
| 2641 | { | ||
| 2642 | /* If an SMT runqueue is sleeping due to priority reasons wake it up */ | ||
| 2643 | if (rq->curr == rq->idle && rq->nr_running) | ||
| 2644 | resched_task(rq->idle); | ||
| 2645 | } | ||
| 2646 | |||
| 2579 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | 2647 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) |
| 2580 | { | 2648 | { |
| 2581 | struct sched_domain *tmp, *sd = NULL; | 2649 | struct sched_domain *tmp, *sd = NULL; |
| @@ -2609,12 +2677,7 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | |||
| 2609 | for_each_cpu_mask(i, sibling_map) { | 2677 | for_each_cpu_mask(i, sibling_map) { |
| 2610 | runqueue_t *smt_rq = cpu_rq(i); | 2678 | runqueue_t *smt_rq = cpu_rq(i); |
| 2611 | 2679 | ||
| 2612 | /* | 2680 | wakeup_busy_runqueue(smt_rq); |
| 2613 | * If an SMT sibling task is sleeping due to priority | ||
| 2614 | * reasons wake it up now. | ||
| 2615 | */ | ||
| 2616 | if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running) | ||
| 2617 | resched_task(smt_rq->idle); | ||
| 2618 | } | 2681 | } |
| 2619 | 2682 | ||
| 2620 | for_each_cpu_mask(i, sibling_map) | 2683 | for_each_cpu_mask(i, sibling_map) |
| @@ -2625,6 +2688,16 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | |||
| 2625 | */ | 2688 | */ |
| 2626 | } | 2689 | } |
| 2627 | 2690 | ||
| 2691 | /* | ||
| 2692 | * number of 'lost' timeslices this task wont be able to fully | ||
| 2693 | * utilize, if another task runs on a sibling. This models the | ||
| 2694 | * slowdown effect of other tasks running on siblings: | ||
| 2695 | */ | ||
| 2696 | static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) | ||
| 2697 | { | ||
| 2698 | return p->time_slice * (100 - sd->per_cpu_gain) / 100; | ||
| 2699 | } | ||
| 2700 | |||
| 2628 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | 2701 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) |
| 2629 | { | 2702 | { |
| 2630 | struct sched_domain *tmp, *sd = NULL; | 2703 | struct sched_domain *tmp, *sd = NULL; |
| @@ -2668,6 +2741,10 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | |||
| 2668 | runqueue_t *smt_rq = cpu_rq(i); | 2741 | runqueue_t *smt_rq = cpu_rq(i); |
| 2669 | task_t *smt_curr = smt_rq->curr; | 2742 | task_t *smt_curr = smt_rq->curr; |
| 2670 | 2743 | ||
| 2744 | /* Kernel threads do not participate in dependent sleeping */ | ||
| 2745 | if (!p->mm || !smt_curr->mm || rt_task(p)) | ||
| 2746 | goto check_smt_task; | ||
| 2747 | |||
| 2671 | /* | 2748 | /* |
| 2672 | * If a user task with lower static priority than the | 2749 | * If a user task with lower static priority than the |
| 2673 | * running task on the SMT sibling is trying to schedule, | 2750 | * running task on the SMT sibling is trying to schedule, |
| @@ -2676,21 +2753,45 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | |||
| 2676 | * task from using an unfair proportion of the | 2753 | * task from using an unfair proportion of the |
| 2677 | * physical cpu's resources. -ck | 2754 | * physical cpu's resources. -ck |
| 2678 | */ | 2755 | */ |
| 2679 | if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2756 | if (rt_task(smt_curr)) { |
| 2680 | task_timeslice(p) || rt_task(smt_curr)) && | 2757 | /* |
| 2681 | p->mm && smt_curr->mm && !rt_task(p)) | 2758 | * With real time tasks we run non-rt tasks only |
| 2682 | ret = 1; | 2759 | * per_cpu_gain% of the time. |
| 2760 | */ | ||
| 2761 | if ((jiffies % DEF_TIMESLICE) > | ||
| 2762 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) | ||
| 2763 | ret = 1; | ||
| 2764 | } else | ||
| 2765 | if (smt_curr->static_prio < p->static_prio && | ||
| 2766 | !TASK_PREEMPTS_CURR(p, smt_rq) && | ||
| 2767 | smt_slice(smt_curr, sd) > task_timeslice(p)) | ||
| 2768 | ret = 1; | ||
| 2769 | |||
| 2770 | check_smt_task: | ||
| 2771 | if ((!smt_curr->mm && smt_curr != smt_rq->idle) || | ||
| 2772 | rt_task(smt_curr)) | ||
| 2773 | continue; | ||
| 2774 | if (!p->mm) { | ||
| 2775 | wakeup_busy_runqueue(smt_rq); | ||
| 2776 | continue; | ||
| 2777 | } | ||
| 2683 | 2778 | ||
| 2684 | /* | 2779 | /* |
| 2685 | * Reschedule a lower priority task on the SMT sibling, | 2780 | * Reschedule a lower priority task on the SMT sibling for |
| 2686 | * or wake it up if it has been put to sleep for priority | 2781 | * it to be put to sleep, or wake it up if it has been put to |
| 2687 | * reasons. | 2782 | * sleep for priority reasons to see if it should run now. |
| 2688 | */ | 2783 | */ |
| 2689 | if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2784 | if (rt_task(p)) { |
| 2690 | task_timeslice(smt_curr) || rt_task(p)) && | 2785 | if ((jiffies % DEF_TIMESLICE) > |
| 2691 | smt_curr->mm && p->mm && !rt_task(smt_curr)) || | 2786 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) |
| 2692 | (smt_curr == smt_rq->idle && smt_rq->nr_running)) | 2787 | resched_task(smt_curr); |
| 2693 | resched_task(smt_curr); | 2788 | } else { |
| 2789 | if (TASK_PREEMPTS_CURR(p, smt_rq) && | ||
| 2790 | smt_slice(p, sd) > task_timeslice(smt_curr)) | ||
| 2791 | resched_task(smt_curr); | ||
| 2792 | else | ||
| 2793 | wakeup_busy_runqueue(smt_rq); | ||
| 2794 | } | ||
| 2694 | } | 2795 | } |
| 2695 | out_unlock: | 2796 | out_unlock: |
| 2696 | for_each_cpu_mask(i, sibling_map) | 2797 | for_each_cpu_mask(i, sibling_map) |
| @@ -2888,6 +2989,7 @@ switch_tasks: | |||
| 2888 | if (next == rq->idle) | 2989 | if (next == rq->idle) |
| 2889 | schedstat_inc(rq, sched_goidle); | 2990 | schedstat_inc(rq, sched_goidle); |
| 2890 | prefetch(next); | 2991 | prefetch(next); |
| 2992 | prefetch_stack(next); | ||
| 2891 | clear_tsk_need_resched(prev); | 2993 | clear_tsk_need_resched(prev); |
| 2892 | rcu_qsctr_inc(task_cpu(prev)); | 2994 | rcu_qsctr_inc(task_cpu(prev)); |
| 2893 | 2995 | ||
| @@ -3015,7 +3117,8 @@ need_resched: | |||
| 3015 | 3117 | ||
| 3016 | #endif /* CONFIG_PREEMPT */ | 3118 | #endif /* CONFIG_PREEMPT */ |
| 3017 | 3119 | ||
| 3018 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) | 3120 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, |
| 3121 | void *key) | ||
| 3019 | { | 3122 | { |
| 3020 | task_t *p = curr->private; | 3123 | task_t *p = curr->private; |
| 3021 | return try_to_wake_up(p, mode, sync); | 3124 | return try_to_wake_up(p, mode, sync); |
| @@ -3057,7 +3160,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
| 3057 | * @key: is directly passed to the wakeup function | 3160 | * @key: is directly passed to the wakeup function |
| 3058 | */ | 3161 | */ |
| 3059 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, | 3162 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, |
| 3060 | int nr_exclusive, void *key) | 3163 | int nr_exclusive, void *key) |
| 3061 | { | 3164 | { |
| 3062 | unsigned long flags; | 3165 | unsigned long flags; |
| 3063 | 3166 | ||
| @@ -3089,7 +3192,8 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | |||
| 3089 | * | 3192 | * |
| 3090 | * On UP it can prevent extra preemption. | 3193 | * On UP it can prevent extra preemption. |
| 3091 | */ | 3194 | */ |
| 3092 | void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | 3195 | void fastcall |
| 3196 | __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | ||
| 3093 | { | 3197 | { |
| 3094 | unsigned long flags; | 3198 | unsigned long flags; |
| 3095 | int sync = 1; | 3199 | int sync = 1; |
| @@ -3280,7 +3384,8 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) | |||
| 3280 | 3384 | ||
| 3281 | EXPORT_SYMBOL(interruptible_sleep_on); | 3385 | EXPORT_SYMBOL(interruptible_sleep_on); |
| 3282 | 3386 | ||
| 3283 | long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3387 | long fastcall __sched |
| 3388 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | ||
| 3284 | { | 3389 | { |
| 3285 | SLEEP_ON_VAR | 3390 | SLEEP_ON_VAR |
| 3286 | 3391 | ||
| @@ -3499,7 +3604,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) | |||
| 3499 | * @policy: new policy. | 3604 | * @policy: new policy. |
| 3500 | * @param: structure containing the new RT priority. | 3605 | * @param: structure containing the new RT priority. |
| 3501 | */ | 3606 | */ |
| 3502 | int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) | 3607 | int sched_setscheduler(struct task_struct *p, int policy, |
| 3608 | struct sched_param *param) | ||
| 3503 | { | 3609 | { |
| 3504 | int retval; | 3610 | int retval; |
| 3505 | int oldprio, oldpolicy = -1; | 3611 | int oldprio, oldpolicy = -1; |
| @@ -3519,7 +3625,7 @@ recheck: | |||
| 3519 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. | 3625 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. |
| 3520 | */ | 3626 | */ |
| 3521 | if (param->sched_priority < 0 || | 3627 | if (param->sched_priority < 0 || |
| 3522 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 3628 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || |
| 3523 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 3629 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) |
| 3524 | return -EINVAL; | 3630 | return -EINVAL; |
| 3525 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) | 3631 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) |
| @@ -3582,7 +3688,8 @@ recheck: | |||
| 3582 | } | 3688 | } |
| 3583 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 3689 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
| 3584 | 3690 | ||
| 3585 | static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 3691 | static int |
| 3692 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | ||
| 3586 | { | 3693 | { |
| 3587 | int retval; | 3694 | int retval; |
| 3588 | struct sched_param lparam; | 3695 | struct sched_param lparam; |
| @@ -3849,7 +3956,7 @@ asmlinkage long sys_sched_yield(void) | |||
| 3849 | if (rt_task(current)) | 3956 | if (rt_task(current)) |
| 3850 | target = rq->active; | 3957 | target = rq->active; |
| 3851 | 3958 | ||
| 3852 | if (current->array->nr_active == 1) { | 3959 | if (array->nr_active == 1) { |
| 3853 | schedstat_inc(rq, yld_act_empty); | 3960 | schedstat_inc(rq, yld_act_empty); |
| 3854 | if (!rq->expired->nr_active) | 3961 | if (!rq->expired->nr_active) |
| 3855 | schedstat_inc(rq, yld_both_empty); | 3962 | schedstat_inc(rq, yld_both_empty); |
| @@ -3913,7 +4020,7 @@ EXPORT_SYMBOL(cond_resched); | |||
| 3913 | * operations here to prevent schedule() from being called twice (once via | 4020 | * operations here to prevent schedule() from being called twice (once via |
| 3914 | * spin_unlock(), once by hand). | 4021 | * spin_unlock(), once by hand). |
| 3915 | */ | 4022 | */ |
| 3916 | int cond_resched_lock(spinlock_t * lock) | 4023 | int cond_resched_lock(spinlock_t *lock) |
| 3917 | { | 4024 | { |
| 3918 | int ret = 0; | 4025 | int ret = 0; |
| 3919 | 4026 | ||
| @@ -4096,7 +4203,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) | |||
| 4096 | return list_entry(p->sibling.next,struct task_struct,sibling); | 4203 | return list_entry(p->sibling.next,struct task_struct,sibling); |
| 4097 | } | 4204 | } |
| 4098 | 4205 | ||
| 4099 | static void show_task(task_t * p) | 4206 | static void show_task(task_t *p) |
| 4100 | { | 4207 | { |
| 4101 | task_t *relative; | 4208 | task_t *relative; |
| 4102 | unsigned state; | 4209 | unsigned state; |
| @@ -4122,7 +4229,7 @@ static void show_task(task_t * p) | |||
| 4122 | #endif | 4229 | #endif |
| 4123 | #ifdef CONFIG_DEBUG_STACK_USAGE | 4230 | #ifdef CONFIG_DEBUG_STACK_USAGE |
| 4124 | { | 4231 | { |
| 4125 | unsigned long * n = (unsigned long *) (p->thread_info+1); | 4232 | unsigned long *n = (unsigned long *) (p->thread_info+1); |
| 4126 | while (!*n) | 4233 | while (!*n) |
| 4127 | n++; | 4234 | n++; |
| 4128 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); | 4235 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); |
| @@ -4331,7 +4438,7 @@ out: | |||
| 4331 | * thread migration by bumping thread off CPU then 'pushing' onto | 4438 | * thread migration by bumping thread off CPU then 'pushing' onto |
| 4332 | * another runqueue. | 4439 | * another runqueue. |
| 4333 | */ | 4440 | */ |
| 4334 | static int migration_thread(void * data) | 4441 | static int migration_thread(void *data) |
| 4335 | { | 4442 | { |
| 4336 | runqueue_t *rq; | 4443 | runqueue_t *rq; |
| 4337 | int cpu = (long)data; | 4444 | int cpu = (long)data; |
