aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2007-09-19 17:34:46 -0400
committerIngo Molnar <mingo@elte.hu>2007-09-19 17:34:46 -0400
commit1799e35d5baab6e06168b46cc78b968e728ea3d1 (patch)
treecccf64a62fa9106aa18253371b675925c0582bab
parenta88a8eff1e6e32d3288986a9d36c6a449c032d3a (diff)
sched: add /proc/sys/kernel/sched_compat_yield
add /proc/sys/kernel/sched_compat_yield to make sys_sched_yield() more agressive, by moving the yielding task to the last position in the rbtree. with sched_compat_yield=0: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2539 mingo 20 0 1576 252 204 R 50 0.0 0:02.03 loop_yield 2541 mingo 20 0 1576 244 196 R 50 0.0 0:02.05 loop with sched_compat_yield=1: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2584 mingo 20 0 1576 248 196 R 99 0.0 0:52.45 loop 2582 mingo 20 0 1576 256 204 R 0 0.0 0:00.00 loop_yield Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/sched_fair.c63
-rw-r--r--kernel/sysctl.c8
4 files changed, 67 insertions, 10 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5445eaec6908..3de79016f2a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1406,6 +1406,7 @@ extern unsigned int sysctl_sched_wakeup_granularity;
1406extern unsigned int sysctl_sched_batch_wakeup_granularity; 1406extern unsigned int sysctl_sched_batch_wakeup_granularity;
1407extern unsigned int sysctl_sched_stat_granularity; 1407extern unsigned int sysctl_sched_stat_granularity;
1408extern unsigned int sysctl_sched_runtime_limit; 1408extern unsigned int sysctl_sched_runtime_limit;
1409extern unsigned int sysctl_sched_compat_yield;
1409extern unsigned int sysctl_sched_child_runs_first; 1410extern unsigned int sysctl_sched_child_runs_first;
1410extern unsigned int sysctl_sched_features; 1411extern unsigned int sysctl_sched_features;
1411 1412
diff --git a/kernel/sched.c b/kernel/sched.c
index deeb1f8e0c30..63e0971c8fbb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4550,10 +4550,7 @@ asmlinkage long sys_sched_yield(void)
4550 struct rq *rq = this_rq_lock(); 4550 struct rq *rq = this_rq_lock();
4551 4551
4552 schedstat_inc(rq, yld_cnt); 4552 schedstat_inc(rq, yld_cnt);
4553 if (unlikely(rq->nr_running == 1)) 4553 current->sched_class->yield_task(rq, current);
4554 schedstat_inc(rq, yld_act_empty);
4555 else
4556 current->sched_class->yield_task(rq, current);
4557 4554
4558 /* 4555 /*
4559 * Since we are going to call schedule() anyway, there's 4556 * Since we are going to call schedule() anyway, there's
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 892616bf2c77..c9fbe8e73a45 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -43,6 +43,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
43unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; 43unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
44 44
45/* 45/*
46 * sys_sched_yield() compat mode
47 *
48 * This option switches the agressive yield implementation of the
49 * old scheduler back on.
50 */
51unsigned int __read_mostly sysctl_sched_compat_yield;
52
53/*
46 * SCHED_BATCH wake-up granularity. 54 * SCHED_BATCH wake-up granularity.
47 * (default: 25 msec, units: nanoseconds) 55 * (default: 25 msec, units: nanoseconds)
48 * 56 *
@@ -897,19 +905,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
897} 905}
898 906
899/* 907/*
900 * sched_yield() support is very simple - we dequeue and enqueue 908 * sched_yield() support is very simple - we dequeue and enqueue.
909 *
910 * If compat_yield is turned on then we requeue to the end of the tree.
901 */ 911 */
902static void yield_task_fair(struct rq *rq, struct task_struct *p) 912static void yield_task_fair(struct rq *rq, struct task_struct *p)
903{ 913{
904 struct cfs_rq *cfs_rq = task_cfs_rq(p); 914 struct cfs_rq *cfs_rq = task_cfs_rq(p);
915 struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
916 struct sched_entity *rightmost, *se = &p->se;
917 struct rb_node *parent;
905 918
906 __update_rq_clock(rq);
907 /* 919 /*
908 * Dequeue and enqueue the task to update its 920 * Are we the only task in the tree?
909 * position within the tree: 921 */
922 if (unlikely(cfs_rq->nr_running == 1))
923 return;
924
925 if (likely(!sysctl_sched_compat_yield)) {
926 __update_rq_clock(rq);
927 /*
928 * Dequeue and enqueue the task to update its
929 * position within the tree:
930 */
931 dequeue_entity(cfs_rq, &p->se, 0);
932 enqueue_entity(cfs_rq, &p->se, 0);
933
934 return;
935 }
936 /*
937 * Find the rightmost entry in the rbtree:
910 */ 938 */
911 dequeue_entity(cfs_rq, &p->se, 0); 939 do {
912 enqueue_entity(cfs_rq, &p->se, 0); 940 parent = *link;
941 link = &parent->rb_right;
942 } while (*link);
943
944 rightmost = rb_entry(parent, struct sched_entity, run_node);
945 /*
946 * Already in the rightmost position?
947 */
948 if (unlikely(rightmost == se))
949 return;
950
951 /*
952 * Minimally necessary key value to be last in the tree:
953 */
954 se->fair_key = rightmost->fair_key + 1;
955
956 if (cfs_rq->rb_leftmost == &se->run_node)
957 cfs_rq->rb_leftmost = rb_next(&se->run_node);
958 /*
959 * Relink the task to the rightmost position:
960 */
961 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
962 rb_link_node(&se->run_node, parent, link);
963 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
913} 964}
914 965
915/* 966/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ace893c17c9..53a456ebf6d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -303,6 +303,14 @@ static ctl_table kern_table[] = {
303 .proc_handler = &proc_dointvec, 303 .proc_handler = &proc_dointvec,
304 }, 304 },
305#endif 305#endif
306 {
307 .ctl_name = CTL_UNNUMBERED,
308 .procname = "sched_compat_yield",
309 .data = &sysctl_sched_compat_yield,
310 .maxlen = sizeof(unsigned int),
311 .mode = 0644,
312 .proc_handler = &proc_dointvec,
313 },
306#ifdef CONFIG_PROVE_LOCKING 314#ifdef CONFIG_PROVE_LOCKING
307 { 315 {
308 .ctl_name = CTL_UNNUMBERED, 316 .ctl_name = CTL_UNNUMBERED,