Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-sched

* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-sched: sched: Fix bug in SCHED_IDLE interaction with group scheduling sched: Fix rt_rq->pushable_tasks initialization in init_rt_rq() sched: Reset sched stats on fork() sched_rt: Fix overload bug on rt group scheduling sched: Documentation/sched-rt-group: Fix style issues & bump version
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-07-16 13:18:29 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-07-16 13:18:29 -0400
commit: 4b0a84043e0c14088958fddb62f416d050368011 (patch)
tree: cf5df6cef66a178c35e335240a756798caf2c399
parent: 63f7a330014ad29b662638caabd8e96fe945b9ed (diff)
parent: d07387b490b1c43bfcb9f3900faf96f2dafb2630 (diff)
4 files changed, 59 insertions, 18 deletions
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 1df7f9cdab05..86eabe6c3419 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -73,7 +73,7 @@ The remaining CPU time will be used for user input and other tasks. Because
 realtime tasks have explicitly allocated the CPU time they need to perform
 their tasks, buffer underruns in the graphics or audio can be eliminated.
-NOTE: the above example is not fully implemented as of yet (2.6.25). We still
+NOTE: the above example is not fully implemented yet. We still
 lack an EDF scheduler to make non-uniform periods usable.
@@ -140,14 +140,15 @@ The other option is:
 .o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups")
-This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_us"
+This uses the /cgroup virtual file system and
-to control the CPU time reserved for each control group instead.
+"/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each
+control group instead.
 For more information on working with control groups, you should read
 Documentation/cgroups/cgroups.txt as well.
-Group settings are checked against the following limits in order to keep the configuration
+Group settings are checked against the following limits in order to keep the
-schedulable:
+configuration schedulable:
   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
@@ -189,7 +190,7 @@ Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
 the biggest challenge as the current linux PI infrastructure is geared towards
 the limited static priority levels 0-99. With deadline scheduling you need to
 do deadline inheritance (since priority is inversely proportional to the
-deadline delta (deadline - now).
+deadline delta (deadline - now)).
 This means the whole PI machinery will have to be reworked - and that is one of
 the most complex pieces of code we have.
diff --git a/kernel/sched.c b/kernel/sched.c
index 01f55ada3598..98972d366fdc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
 #endif
 #ifdef CONFIG_SMP
        unsigned long rt_nr_migratory;
+        unsigned long rt_nr_total;
        int overloaded;
        struct plist_head pushable_tasks;
 #endif
@@ -2571,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
        p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
 #ifdef CONFIG_SCHEDSTATS
-        p->se.wait_start                = 0;
+        p->se.wait_start                        = 0;
-        p->se.sum_sleep_runtime         = 0;
+        p->se.wait_max                          = 0;
-        p->se.sleep_start               = 0;
+        p->se.wait_count                        = 0;
-        p->se.block_start               = 0;
+        p->se.wait_sum                          = 0;
-        p->se.sleep_max                 = 0;
-        p->se.block_max                 = 0;
+        p->se.sleep_start                       = 0;
-        p->se.exec_max                  = 0;
+        p->se.sleep_max                         = 0;
-        p->se.slice_max                 = 0;
+        p->se.sum_sleep_runtime                 = 0;
-        p->se.wait_max                  = 0;
+        p->se.block_start                       = 0;
+        p->se.block_max                         = 0;
+        p->se.exec_max                          = 0;
+        p->se.slice_max                         = 0;
+        p->se.nr_migrations_cold                = 0;
+        p->se.nr_failed_migrations_affine       = 0;
+        p->se.nr_failed_migrations_running      = 0;
+        p->se.nr_failed_migrations_hot          = 0;
+        p->se.nr_forced_migrations              = 0;
+        p->se.nr_forced2_migrations             = 0;
+        p->se.nr_wakeups                        = 0;
+        p->se.nr_wakeups_sync                   = 0;
+        p->se.nr_wakeups_migrate                = 0;
+        p->se.nr_wakeups_local                  = 0;
+        p->se.nr_wakeups_remote                 = 0;
+        p->se.nr_wakeups_affine                 = 0;
+        p->se.nr_wakeups_affine_attempts        = 0;
+        p->se.nr_wakeups_passive                = 0;
+        p->se.nr_wakeups_idle                   = 0;
 #endif
        INIT_LIST_HEAD(&p->rt.run_list);
@@ -9074,7 +9097,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
-        plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
+        plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
 #endif
        rt_rq->rt_time = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba7fd6e9556f..7c248dc30f41 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -687,7 +687,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
                         * all of which have the same weight.
                         */
                        if (sched_feat(NORMALIZED_SLEEPER) &&
-                                        task_of(se)->policy != SCHED_IDLE)
+                                        (!entity_is_task(se) ||
+                                         task_of(se)->policy != SCHED_IDLE))
                                thresh = calc_delta_fair(thresh, se);
                        vruntime -= thresh;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9bf0d2a73045..3918e01994e0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 #ifdef CONFIG_RT_GROUP_SCHED
+#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 {
        return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 #else /* CONFIG_RT_GROUP_SCHED */
+#define rt_entity_is_task(rt_se) (1)
 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 {
        return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
 static void update_rt_migration(struct rt_rq *rt_rq)
 {
-        if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
+        if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
                if (!rt_rq->overloaded) {
                        rt_set_overload(rq_of_rt_rq(rt_rq));
                        rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+        if (!rt_entity_is_task(rt_se))
+                return;
+        rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+        rt_rq->rt_nr_total++;
        if (rt_se->nr_cpus_allowed > 1)
                rt_rq->rt_nr_migratory++;
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+        if (!rt_entity_is_task(rt_se))
+                return;
+        rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+        rt_rq->rt_nr_total--;
        if (rt_se->nr_cpus_allowed > 1)
                rt_rq->rt_nr_migratory--;
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-07-16 13:18:29 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-07-16 13:18:29 -0400
commit	4b0a84043e0c14088958fddb62f416d050368011 (patch)
tree	cf5df6cef66a178c35e335240a756798caf2c399
parent	63f7a330014ad29b662638caabd8e96fe945b9ed (diff)
parent	d07387b490b1c43bfcb9f3900faf96f2dafb2630 (diff)

diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 1df7f9cdab05..86eabe6c3419 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt
@@ -73,7 +73,7 @@ The remaining CPU time will be used for user input and other tasks. Because
73	realtime tasks have explicitly allocated the CPU time they need to perform	73	realtime tasks have explicitly allocated the CPU time they need to perform
74	their tasks, buffer underruns in the graphics or audio can be eliminated.	74	their tasks, buffer underruns in the graphics or audio can be eliminated.
75		75
76	NOTE: the above example is not fully implemented as of yet (2.6.25). We still	76	NOTE: the above example is not fully implemented yet. We still
77	lack an EDF scheduler to make non-uniform periods usable.	77	lack an EDF scheduler to make non-uniform periods usable.
78		78
79		79
@@ -140,14 +140,15 @@ The other option is:
140		140
141	.o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups")	141	.o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups")
142		142
143	This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_us"	143	This uses the /cgroup virtual file system and
144	to control the CPU time reserved for each control group instead.	144	"/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each
		145	control group instead.
145		146
146	For more information on working with control groups, you should read	147	For more information on working with control groups, you should read
147	Documentation/cgroups/cgroups.txt as well.	148	Documentation/cgroups/cgroups.txt as well.
148		149
149	Group settings are checked against the following limits in order to keep the configuration	150	Group settings are checked against the following limits in order to keep the
150	schedulable:	151	configuration schedulable:
151		152
152	\Sum_{i} runtime_{i} / global_period <= global_runtime / global_period	153	\Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
153		154
@@ -189,7 +190,7 @@ Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
189	the biggest challenge as the current linux PI infrastructure is geared towards	190	the biggest challenge as the current linux PI infrastructure is geared towards
190	the limited static priority levels 0-99. With deadline scheduling you need to	191	the limited static priority levels 0-99. With deadline scheduling you need to
191	do deadline inheritance (since priority is inversely proportional to the	192	do deadline inheritance (since priority is inversely proportional to the
192	deadline delta (deadline - now).	193	deadline delta (deadline - now)).
193		194
194	This means the whole PI machinery will have to be reworked - and that is one of	195	This means the whole PI machinery will have to be reworked - and that is one of
195	the most complex pieces of code we have.	196	the most complex pieces of code we have.


diff --git a/kernel/sched.c b/kernel/sched.c index 01f55ada3598..98972d366fdc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
493	#endif	493	#endif
494	#ifdef CONFIG_SMP	494	#ifdef CONFIG_SMP
495	unsigned long rt_nr_migratory;	495	unsigned long rt_nr_migratory;
		496	unsigned long rt_nr_total;
496	int overloaded;	497	int overloaded;
497	struct plist_head pushable_tasks;	498	struct plist_head pushable_tasks;
498	#endif	499	#endif
@@ -2571,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
2571	p->se.avg_wakeup = sysctl_sched_wakeup_granularity;	2572	p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2572		2573
2573	#ifdef CONFIG_SCHEDSTATS	2574	#ifdef CONFIG_SCHEDSTATS
2574	p->se.wait_start = 0;	2575	p->se.wait_start = 0;
2575	p->se.sum_sleep_runtime = 0;	2576	p->se.wait_max = 0;
2576	p->se.sleep_start = 0;	2577	p->se.wait_count = 0;
2577	p->se.block_start = 0;	2578	p->se.wait_sum = 0;
2578	p->se.sleep_max = 0;	2579
2579	p->se.block_max = 0;	2580	p->se.sleep_start = 0;
2580	p->se.exec_max = 0;	2581	p->se.sleep_max = 0;
2581	p->se.slice_max = 0;	2582	p->se.sum_sleep_runtime = 0;
2582	p->se.wait_max = 0;	2583
		2584	p->se.block_start = 0;
		2585	p->se.block_max = 0;
		2586	p->se.exec_max = 0;
		2587	p->se.slice_max = 0;
		2588
		2589	p->se.nr_migrations_cold = 0;
		2590	p->se.nr_failed_migrations_affine = 0;
		2591	p->se.nr_failed_migrations_running = 0;
		2592	p->se.nr_failed_migrations_hot = 0;
		2593	p->se.nr_forced_migrations = 0;
		2594	p->se.nr_forced2_migrations = 0;
		2595
		2596	p->se.nr_wakeups = 0;
		2597	p->se.nr_wakeups_sync = 0;
		2598	p->se.nr_wakeups_migrate = 0;
		2599	p->se.nr_wakeups_local = 0;
		2600	p->se.nr_wakeups_remote = 0;
		2601	p->se.nr_wakeups_affine = 0;
		2602	p->se.nr_wakeups_affine_attempts = 0;
		2603	p->se.nr_wakeups_passive = 0;
		2604	p->se.nr_wakeups_idle = 0;
		2605
2583	#endif	2606	#endif
2584		2607
2585	INIT_LIST_HEAD(&p->rt.run_list);	2608	INIT_LIST_HEAD(&p->rt.run_list);
@@ -9074,7 +9097,7 @@ static void init_rt_rq(struct rt_rq rt_rq, struct rq rq)
9074	#ifdef CONFIG_SMP	9097	#ifdef CONFIG_SMP
9075	rt_rq->rt_nr_migratory = 0;	9098	rt_rq->rt_nr_migratory = 0;
9076	rt_rq->overloaded = 0;	9099	rt_rq->overloaded = 0;
9077	plist_head_init(&rq->rt.pushable_tasks, &rq->lock);	9100	plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
9078	#endif	9101	#endif
9079		9102
9080	rt_rq->rt_time = 0;	9103	rt_rq->rt_time = 0;


diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ba7fd6e9556f..7c248dc30f41 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -687,7 +687,8 @@ place_entity(struct cfs_rq cfs_rq, struct sched_entity se, int initial)
687	* all of which have the same weight.	687	* all of which have the same weight.
688	*/	688	*/
689	if (sched_feat(NORMALIZED_SLEEPER) &&	689	if (sched_feat(NORMALIZED_SLEEPER) &&
690	task_of(se)->policy != SCHED_IDLE)	690	(!entity_is_task(se) \|\|
		691	task_of(se)->policy != SCHED_IDLE))
691	thresh = calc_delta_fair(thresh, se);	692	thresh = calc_delta_fair(thresh, se);
692		693
693	vruntime -= thresh;	694	vruntime -= thresh;


diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 9bf0d2a73045..3918e01994e0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct rt_task_of(struct sched_rt_entity rt_se)
10		10
11	#ifdef CONFIG_RT_GROUP_SCHED	11	#ifdef CONFIG_RT_GROUP_SCHED
12		12
		13	#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
		14
13	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)	15	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)
14	{	16	{
15	return rt_rq->rq;	17	return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq rt_rq_of_se(struct sched_rt_entity rt_se)
22		24
23	#else /* CONFIG_RT_GROUP_SCHED */	25	#else /* CONFIG_RT_GROUP_SCHED */
24		26
		27	#define rt_entity_is_task(rt_se) (1)
		28
25	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)	29	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)
26	{	30	{
27	return container_of(rt_rq, struct rq, rt);	31	return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
73		77
74	static void update_rt_migration(struct rt_rq *rt_rq)	78	static void update_rt_migration(struct rt_rq *rt_rq)
75	{	79	{
76	if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {	80	if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
77	if (!rt_rq->overloaded) {	81	if (!rt_rq->overloaded) {
78	rt_set_overload(rq_of_rt_rq(rt_rq));	82	rt_set_overload(rq_of_rt_rq(rt_rq));
79	rt_rq->overloaded = 1;	83	rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
86		90
87	static void inc_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)	91	static void inc_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
88	{	92	{
		93	if (!rt_entity_is_task(rt_se))
		94	return;
		95
		96	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
		97
		98	rt_rq->rt_nr_total++;
89	if (rt_se->nr_cpus_allowed > 1)	99	if (rt_se->nr_cpus_allowed > 1)
90	rt_rq->rt_nr_migratory++;	100	rt_rq->rt_nr_migratory++;
91		101
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
94		104
95	static void dec_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)	105	static void dec_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
96	{	106	{
		107	if (!rt_entity_is_task(rt_se))
		108	return;
		109
		110	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
		111
		112	rt_rq->rt_nr_total--;
97	if (rt_se->nr_cpus_allowed > 1)	113	if (rt_se->nr_cpus_allowed > 1)
98	rt_rq->rt_nr_migratory--;	114	rt_rq->rt_nr_migratory--;
99		115