aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c148
-rw-r--r--kernel/sysctl.c7
5 files changed, 90 insertions, 71 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0542774914d4..4e9fad271c30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1942,8 +1942,6 @@ int sched_rt_handler(struct ctl_table *table, int write,
1942 void __user *buffer, size_t *lenp, 1942 void __user *buffer, size_t *lenp,
1943 loff_t *ppos); 1943 loff_t *ppos);
1944 1944
1945extern unsigned int sysctl_sched_compat_yield;
1946
1947#ifdef CONFIG_SCHED_AUTOGROUP 1945#ifdef CONFIG_SCHED_AUTOGROUP
1948extern unsigned int sysctl_sched_autogroup_enabled; 1946extern unsigned int sysctl_sched_autogroup_enabled;
1949 1947
diff --git a/kernel/sched.c b/kernel/sched.c
index 477e1bcc63f9..ae5e1a19b9d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
324 * 'curr' points to currently running entity on this cfs_rq. 324 * 'curr' points to currently running entity on this cfs_rq.
325 * It is set to NULL otherwise (i.e when none are currently running). 325 * It is set to NULL otherwise (i.e when none are currently running).
326 */ 326 */
327 struct sched_entity *curr, *next, *last; 327 struct sched_entity *curr, *next, *last, *skip;
328 328
329 unsigned int nr_spread_over; 329 unsigned int nr_spread_over;
330 330
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index eb6cb8edd075..7bacd83a4158 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
179 179
180 raw_spin_lock_irqsave(&rq->lock, flags); 180 raw_spin_lock_irqsave(&rq->lock, flags);
181 if (cfs_rq->rb_leftmost) 181 if (cfs_rq->rb_leftmost)
182 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; 182 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
183 last = __pick_last_entity(cfs_rq); 183 last = __pick_last_entity(cfs_rq);
184 if (last) 184 if (last)
185 max_vruntime = last->vruntime; 185 max_vruntime = last->vruntime;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a785e08202cf..c0fbeb992833 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8;
69unsigned int sysctl_sched_child_runs_first __read_mostly; 69unsigned int sysctl_sched_child_runs_first __read_mostly;
70 70
71/* 71/*
72 * sys_sched_yield() compat mode
73 *
74 * This option switches the agressive yield implementation of the
75 * old scheduler back on.
76 */
77unsigned int __read_mostly sysctl_sched_compat_yield;
78
79/*
80 * SCHED_OTHER wake-up granularity. 72 * SCHED_OTHER wake-up granularity.
81 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 73 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
82 * 74 *
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
419 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 411 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
420} 412}
421 413
422static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 414static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
423{ 415{
424 struct rb_node *left = cfs_rq->rb_leftmost; 416 struct rb_node *left = cfs_rq->rb_leftmost;
425 417
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
429 return rb_entry(left, struct sched_entity, run_node); 421 return rb_entry(left, struct sched_entity, run_node);
430} 422}
431 423
424static struct sched_entity *__pick_next_entity(struct sched_entity *se)
425{
426 struct rb_node *next = rb_next(&se->run_node);
427
428 if (!next)
429 return NULL;
430
431 return rb_entry(next, struct sched_entity, run_node);
432}
433
434#ifdef CONFIG_SCHED_DEBUG
432static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 435static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
433{ 436{
434 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 437 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
443 * Scheduling class statistics methods: 446 * Scheduling class statistics methods:
444 */ 447 */
445 448
446#ifdef CONFIG_SCHED_DEBUG
447int sched_proc_update_handler(struct ctl_table *table, int write, 449int sched_proc_update_handler(struct ctl_table *table, int write,
448 void __user *buffer, size_t *lenp, 450 void __user *buffer, size_t *lenp,
449 loff_t *ppos) 451 loff_t *ppos)
@@ -1017,6 +1019,17 @@ static void __clear_buddies_next(struct sched_entity *se)
1017 } 1019 }
1018} 1020}
1019 1021
1022static void __clear_buddies_skip(struct sched_entity *se)
1023{
1024 for_each_sched_entity(se) {
1025 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1026 if (cfs_rq->skip == se)
1027 cfs_rq->skip = NULL;
1028 else
1029 break;
1030 }
1031}
1032
1020static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1033static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1021{ 1034{
1022 if (cfs_rq->last == se) 1035 if (cfs_rq->last == se)
@@ -1024,6 +1037,9 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1024 1037
1025 if (cfs_rq->next == se) 1038 if (cfs_rq->next == se)
1026 __clear_buddies_next(se); 1039 __clear_buddies_next(se);
1040
1041 if (cfs_rq->skip == se)
1042 __clear_buddies_skip(se);
1027} 1043}
1028 1044
1029static void 1045static void
@@ -1099,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1099 return; 1115 return;
1100 1116
1101 if (cfs_rq->nr_running > 1) { 1117 if (cfs_rq->nr_running > 1) {
1102 struct sched_entity *se = __pick_next_entity(cfs_rq); 1118 struct sched_entity *se = __pick_first_entity(cfs_rq);
1103 s64 delta = curr->vruntime - se->vruntime; 1119 s64 delta = curr->vruntime - se->vruntime;
1104 1120
1105 if (delta < 0) 1121 if (delta < 0)
@@ -1143,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
1143static int 1159static int
1144wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); 1160wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
1145 1161
1162/*
1163 * Pick the next process, keeping these things in mind, in this order:
1164 * 1) keep things fair between processes/task groups
1165 * 2) pick the "next" process, since someone really wants that to run
1166 * 3) pick the "last" process, for cache locality
1167 * 4) do not run the "skip" process, if something else is available
1168 */
1146static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 1169static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1147{ 1170{
1148 struct sched_entity *se = __pick_next_entity(cfs_rq); 1171 struct sched_entity *se = __pick_first_entity(cfs_rq);
1149 struct sched_entity *left = se; 1172 struct sched_entity *left = se;
1150 1173
1151 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) 1174 /*
1152 se = cfs_rq->next; 1175 * Avoid running the skip buddy, if running something else can
1176 * be done without getting too unfair.
1177 */
1178 if (cfs_rq->skip == se) {
1179 struct sched_entity *second = __pick_next_entity(se);
1180 if (second && wakeup_preempt_entity(second, left) < 1)
1181 se = second;
1182 }
1153 1183
1154 /* 1184 /*
1155 * Prefer last buddy, try to return the CPU to a preempted task. 1185 * Prefer last buddy, try to return the CPU to a preempted task.
@@ -1157,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1157 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) 1187 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
1158 se = cfs_rq->last; 1188 se = cfs_rq->last;
1159 1189
1190 /*
1191 * Someone really wants this to run. If it's not unfair, run it.
1192 */
1193 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
1194 se = cfs_rq->next;
1195
1160 clear_buddies(cfs_rq, se); 1196 clear_buddies(cfs_rq, se);
1161 1197
1162 return se; 1198 return se;
@@ -1333,52 +1369,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1333 hrtick_update(rq); 1369 hrtick_update(rq);
1334} 1370}
1335 1371
1336/*
1337 * sched_yield() support is very simple - we dequeue and enqueue.
1338 *
1339 * If compat_yield is turned on then we requeue to the end of the tree.
1340 */
1341static void yield_task_fair(struct rq *rq)
1342{
1343 struct task_struct *curr = rq->curr;
1344 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1345 struct sched_entity *rightmost, *se = &curr->se;
1346
1347 /*
1348 * Are we the only task in the tree?
1349 */
1350 if (unlikely(rq->nr_running == 1))
1351 return;
1352
1353 clear_buddies(cfs_rq, se);
1354
1355 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
1356 update_rq_clock(rq);
1357 /*
1358 * Update run-time statistics of the 'current'.
1359 */
1360 update_curr(cfs_rq);
1361
1362 return;
1363 }
1364 /*
1365 * Find the rightmost entry in the rbtree:
1366 */
1367 rightmost = __pick_last_entity(cfs_rq);
1368 /*
1369 * Already in the rightmost position?
1370 */
1371 if (unlikely(!rightmost || entity_before(rightmost, se)))
1372 return;
1373
1374 /*
1375 * Minimally necessary key value to be last in the tree:
1376 * Upon rescheduling, sched_class::put_prev_task() will place
1377 * 'current' within the tree based on its new key value.
1378 */
1379 se->vruntime = rightmost->vruntime + 1;
1380}
1381
1382#ifdef CONFIG_SMP 1372#ifdef CONFIG_SMP
1383 1373
1384static void task_waking_fair(struct rq *rq, struct task_struct *p) 1374static void task_waking_fair(struct rq *rq, struct task_struct *p)
@@ -1849,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
1849 } 1839 }
1850} 1840}
1851 1841
1842static void set_skip_buddy(struct sched_entity *se)
1843{
1844 if (likely(task_of(se)->policy != SCHED_IDLE)) {
1845 for_each_sched_entity(se)
1846 cfs_rq_of(se)->skip = se;
1847 }
1848}
1849
1852/* 1850/*
1853 * Preempt the current task with a newly woken task if needed: 1851 * Preempt the current task with a newly woken task if needed:
1854 */ 1852 */
@@ -1947,6 +1945,36 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
1947 } 1945 }
1948} 1946}
1949 1947
1948/*
1949 * sched_yield() is very simple
1950 *
1951 * The magic of dealing with the ->skip buddy is in pick_next_entity.
1952 */
1953static void yield_task_fair(struct rq *rq)
1954{
1955 struct task_struct *curr = rq->curr;
1956 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1957 struct sched_entity *se = &curr->se;
1958
1959 /*
1960 * Are we the only task in the tree?
1961 */
1962 if (unlikely(rq->nr_running == 1))
1963 return;
1964
1965 clear_buddies(cfs_rq, se);
1966
1967 if (curr->policy != SCHED_BATCH) {
1968 update_rq_clock(rq);
1969 /*
1970 * Update run-time statistics of the 'current'.
1971 */
1972 update_curr(cfs_rq);
1973 }
1974
1975 set_skip_buddy(se);
1976}
1977
1950#ifdef CONFIG_SMP 1978#ifdef CONFIG_SMP
1951/************************************************** 1979/**************************************************
1952 * Fair scheduling class load-balancing methods: 1980 * Fair scheduling class load-balancing methods:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bc86bb32e126..cbfda7e2416e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -360,13 +360,6 @@ static struct ctl_table kern_table[] = {
360 .mode = 0644, 360 .mode = 0644,
361 .proc_handler = sched_rt_handler, 361 .proc_handler = sched_rt_handler,
362 }, 362 },
363 {
364 .procname = "sched_compat_yield",
365 .data = &sysctl_sched_compat_yield,
366 .maxlen = sizeof(unsigned int),
367 .mode = 0644,
368 .proc_handler = proc_dointvec,
369 },
370#ifdef CONFIG_SCHED_AUTOGROUP 363#ifdef CONFIG_SCHED_AUTOGROUP
371 { 364 {
372 .procname = "sched_autogroup_enabled", 365 .procname = "sched_autogroup_enabled",