aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-09-22 07:53:15 -0400
committerIngo Molnar <mingo@elte.hu>2010-10-18 12:41:58 -0400
commit34f971f6f7988be4d014eec3e3526bee6d007ffa (patch)
treea662e0701f81f019418b55aa1354f833277b9c40
parent4924627423d5e286136ad2520f5be536345ae590 (diff)
sched: Create special class for stop/migrate work
In order to separate the stop/migrate work thread from the SCHED_FIFO implementation, create a special class for it that is of higher priority than SCHED_FIFO itself. This currently solves a problem where cpu-hotplug consumes so much cpu-time that the SCHED_FIFO class gets throttled, but has the bandwidth replenishment timer pending on the now dead cpu. It is also required for when we add the planned deadline scheduling class above SCHED_FIFO, as the stop/migrate thread still needs to transcent those tasks. Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1285165776.2275.1022.camel@laptop> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/sched_stoptask.c108
-rw-r--r--kernel/stop_machine.c8
3 files changed, 158 insertions, 12 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 7f522832250c..5f64fed56a44 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -486,7 +486,7 @@ struct rq {
486 */ 486 */
487 unsigned long nr_uninterruptible; 487 unsigned long nr_uninterruptible;
488 488
489 struct task_struct *curr, *idle; 489 struct task_struct *curr, *idle, *stop;
490 unsigned long next_balance; 490 unsigned long next_balance;
491 struct mm_struct *prev_mm; 491 struct mm_struct *prev_mm;
492 492
@@ -1837,7 +1837,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1837 1837
1838static const struct sched_class rt_sched_class; 1838static const struct sched_class rt_sched_class;
1839 1839
1840#define sched_class_highest (&rt_sched_class) 1840#define sched_class_highest (&stop_sched_class)
1841#define for_each_class(class) \ 1841#define for_each_class(class) \
1842 for (class = sched_class_highest; class; class = class->next) 1842 for (class = sched_class_highest; class; class = class->next)
1843 1843
@@ -1917,10 +1917,41 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1917#include "sched_idletask.c" 1917#include "sched_idletask.c"
1918#include "sched_fair.c" 1918#include "sched_fair.c"
1919#include "sched_rt.c" 1919#include "sched_rt.c"
1920#include "sched_stoptask.c"
1920#ifdef CONFIG_SCHED_DEBUG 1921#ifdef CONFIG_SCHED_DEBUG
1921# include "sched_debug.c" 1922# include "sched_debug.c"
1922#endif 1923#endif
1923 1924
1925void sched_set_stop_task(int cpu, struct task_struct *stop)
1926{
1927 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
1928 struct task_struct *old_stop = cpu_rq(cpu)->stop;
1929
1930 if (stop) {
1931 /*
1932 * Make it appear like a SCHED_FIFO task, its something
1933 * userspace knows about and won't get confused about.
1934 *
1935 * Also, it will make PI more or less work without too
1936 * much confusion -- but then, stop work should not
1937 * rely on PI working anyway.
1938 */
1939 sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
1940
1941 stop->sched_class = &stop_sched_class;
1942 }
1943
1944 cpu_rq(cpu)->stop = stop;
1945
1946 if (old_stop) {
1947 /*
1948 * Reset it back to a normal scheduling class so that
1949 * it can die in pieces.
1950 */
1951 old_stop->sched_class = &rt_sched_class;
1952 }
1953}
1954
1924/* 1955/*
1925 * __normal_prio - return the priority that is based on the static prio 1956 * __normal_prio - return the priority that is based on the static prio
1926 */ 1957 */
@@ -3720,17 +3751,13 @@ pick_next_task(struct rq *rq)
3720 return p; 3751 return p;
3721 } 3752 }
3722 3753
3723 class = sched_class_highest; 3754 for_each_class(class) {
3724 for ( ; ; ) {
3725 p = class->pick_next_task(rq); 3755 p = class->pick_next_task(rq);
3726 if (p) 3756 if (p)
3727 return p; 3757 return p;
3728 /*
3729 * Will never be NULL as the idle class always
3730 * returns a non-NULL p:
3731 */
3732 class = class->next;
3733 } 3758 }
3759
3760 BUG(); /* the idle class will always have a runnable task */
3734} 3761}
3735 3762
3736/* 3763/*
@@ -4659,6 +4686,15 @@ recheck:
4659 */ 4686 */
4660 rq = __task_rq_lock(p); 4687 rq = __task_rq_lock(p);
4661 4688
4689 /*
4690 * Changing the policy of the stop threads its a very bad idea
4691 */
4692 if (p == rq->stop) {
4693 __task_rq_unlock(rq);
4694 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4695 return -EINVAL;
4696 }
4697
4662#ifdef CONFIG_RT_GROUP_SCHED 4698#ifdef CONFIG_RT_GROUP_SCHED
4663 if (user) { 4699 if (user) {
4664 /* 4700 /*
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
new file mode 100644
index 000000000000..45bddc0c1048
--- /dev/null
+++ b/kernel/sched_stoptask.c
@@ -0,0 +1,108 @@
1/*
2 * stop-task scheduling class.
3 *
4 * The stop task is the highest priority task in the system, it preempts
5 * everything and will be preempted by nothing.
6 *
7 * See kernel/stop_machine.c
8 */
9
10#ifdef CONFIG_SMP
11static int
12select_task_rq_stop(struct rq *rq, struct task_struct *p,
13 int sd_flag, int flags)
14{
15 return task_cpu(p); /* stop tasks as never migrate */
16}
17#endif /* CONFIG_SMP */
18
19static void
20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
21{
22 resched_task(rq->curr); /* we preempt everything */
23}
24
25static struct task_struct *pick_next_task_stop(struct rq *rq)
26{
27 struct task_struct *stop = rq->stop;
28
29 if (stop && stop->state == TASK_RUNNING)
30 return stop;
31
32 return NULL;
33}
34
35static void
36enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
37{
38}
39
40static void
41dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
42{
43}
44
45static void yield_task_stop(struct rq *rq)
46{
47 BUG(); /* the stop task should never yield, its pointless. */
48}
49
50static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
51{
52}
53
54static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
55{
56}
57
58static void set_curr_task_stop(struct rq *rq)
59{
60}
61
62static void switched_to_stop(struct rq *rq, struct task_struct *p,
63 int running)
64{
65 BUG(); /* its impossible to change to this class */
66}
67
68static void prio_changed_stop(struct rq *rq, struct task_struct *p,
69 int oldprio, int running)
70{
71 BUG(); /* how!?, what priority? */
72}
73
74static unsigned int
75get_rr_interval_stop(struct rq *rq, struct task_struct *task)
76{
77 return 0;
78}
79
80/*
81 * Simple, special scheduling class for the per-CPU stop tasks:
82 */
83static const struct sched_class stop_sched_class = {
84 .next = &rt_sched_class,
85
86 .enqueue_task = enqueue_task_stop,
87 .dequeue_task = dequeue_task_stop,
88 .yield_task = yield_task_stop,
89
90 .check_preempt_curr = check_preempt_curr_stop,
91
92 .pick_next_task = pick_next_task_stop,
93 .put_prev_task = put_prev_task_stop,
94
95#ifdef CONFIG_SMP
96 .select_task_rq = select_task_rq_stop,
97#endif
98
99 .set_curr_task = set_curr_task_stop,
100 .task_tick = task_tick_stop,
101
102 .get_rr_interval = get_rr_interval_stop,
103
104 .prio_changed = prio_changed_stop,
105 .switched_to = switched_to_stop,
106
107 /* no .task_new for stop tasks */
108};
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4372ccb25127..090c28812ce1 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -287,11 +287,12 @@ repeat:
287 goto repeat; 287 goto repeat;
288} 288}
289 289
290extern void sched_set_stop_task(int cpu, struct task_struct *stop);
291
290/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ 292/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
291static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, 293static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
292 unsigned long action, void *hcpu) 294 unsigned long action, void *hcpu)
293{ 295{
294 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
295 unsigned int cpu = (unsigned long)hcpu; 296 unsigned int cpu = (unsigned long)hcpu;
296 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 297 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
297 struct task_struct *p; 298 struct task_struct *p;
@@ -304,13 +305,13 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
304 cpu); 305 cpu);
305 if (IS_ERR(p)) 306 if (IS_ERR(p))
306 return NOTIFY_BAD; 307 return NOTIFY_BAD;
307 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
308 get_task_struct(p); 308 get_task_struct(p);
309 kthread_bind(p, cpu);
310 sched_set_stop_task(cpu, p);
309 stopper->thread = p; 311 stopper->thread = p;
310 break; 312 break;
311 313
312 case CPU_ONLINE: 314 case CPU_ONLINE:
313 kthread_bind(stopper->thread, cpu);
314 /* strictly unnecessary, as first user will wake it */ 315 /* strictly unnecessary, as first user will wake it */
315 wake_up_process(stopper->thread); 316 wake_up_process(stopper->thread);
316 /* mark enabled */ 317 /* mark enabled */
@@ -325,6 +326,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
325 { 326 {
326 struct cpu_stop_work *work; 327 struct cpu_stop_work *work;
327 328
329 sched_set_stop_task(cpu, NULL);
328 /* kill the stopper */ 330 /* kill the stopper */
329 kthread_stop(stopper->thread); 331 kthread_stop(stopper->thread);
330 /* drain remaining works */ 332 /* drain remaining works */