aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2008-05-04 18:07:35 -0400
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2008-05-04 18:07:35 -0400
commit6b06d1ce233787655eb21b624ed924806768b36c (patch)
treec0ffcd31b0831719615e83b63b2022d41faf6208
parent9b4ccbc27ea5d1a35e79391ca5a500b32cd253a1 (diff)
LITMUS: avoid using the same stack on two CPUs in global schedulers
This change fixes a race where a job could be executed on more than one CPU, which to random crashes.
-rw-r--r--include/litmus/rt_param.h10
-rw-r--r--kernel/sched.c6
-rw-r--r--litmus/sched_gsn_edf.c9
-rw-r--r--litmus/sched_litmus.c47
4 files changed, 60 insertions, 12 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 76be2fe4be..a5e939daa5 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -111,6 +111,16 @@ struct rt_param {
111 */ 111 */
112 volatile int scheduled_on; 112 volatile int scheduled_on;
113 113
114 /* Is the stack of the task currently in use? Currently, this
115 * is the responsibility of the plugin to update this field.
116 * Maybe become part of the LITMUS core some day.
117 *
118 * Used by GSN-EDF.
119 *
120 * Be careful to avoid deadlocks!
121 */
122 volatile int stack_in_use;
123
114 /* This field can be used by plugins to store where the task 124 /* This field can be used by plugins to store where the task
115 * is currently linked. It is the responsibility of the plugin 125 * is currently linked. It is the responsibility of the plugin
116 * to avoid race conditions. 126 * to avoid race conditions.
diff --git a/kernel/sched.c b/kernel/sched.c
index 441996e08c..d9e876fea8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1897,6 +1897,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1897 finish_arch_switch(prev); 1897 finish_arch_switch(prev);
1898 litmus->finish_switch(prev); 1898 litmus->finish_switch(prev);
1899 finish_lock_switch(rq, prev); 1899 finish_lock_switch(rq, prev);
1900 prev->rt_param.stack_in_use = NO_CPU;
1900 fire_sched_in_preempt_notifiers(current); 1901 fire_sched_in_preempt_notifiers(current);
1901 if (mm) 1902 if (mm)
1902 mmdrop(mm); 1903 mmdrop(mm);
@@ -3679,6 +3680,7 @@ need_resched_nonpreemptible:
3679 rq->curr = next; 3680 rq->curr = next;
3680 ++*switch_count; 3681 ++*switch_count;
3681 3682
3683 TRACE_TASK(next, "switched to\n");
3682 context_switch(rq, prev, next); /* unlocks the rq */ 3684 context_switch(rq, prev, next); /* unlocks the rq */
3683 } else 3685 } else
3684 spin_unlock_irq(&rq->lock); 3686 spin_unlock_irq(&rq->lock);
@@ -4391,8 +4393,10 @@ recheck:
4391 oldprio = p->prio; 4393 oldprio = p->prio;
4392 __setscheduler(rq, p, policy, param->sched_priority); 4394 __setscheduler(rq, p, policy, param->sched_priority);
4393 4395
4394 if (policy == SCHED_LITMUS) 4396 if (policy == SCHED_LITMUS) {
4397 p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
4395 litmus->task_new(p, on_rq, running); 4398 litmus->task_new(p, on_rq, running);
4399 }
4396 4400
4397 if (on_rq) { 4401 if (on_rq) {
4398 if (running) 4402 if (running)
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index c988e91e6e..eb0f4c0b36 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -466,10 +466,8 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
466 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 466 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
467 else if (exists && !next) 467 else if (exists && !next)
468 TRACE("becomes idle at %llu.\n", litmus_clock()); 468 TRACE("becomes idle at %llu.\n", litmus_clock());
469 /* don't race with a concurrent switch */ 469
470 if (next && prev != next) 470
471 while (next->rt_param.scheduled_on != NO_CPU)
472 cpu_relax();
473 return next; 471 return next;
474} 472}
475 473
@@ -481,9 +479,6 @@ static void gsnedf_finish_switch(struct task_struct *prev)
481 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 479 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
482 480
483 entry->scheduled = is_realtime(current) ? current : NULL; 481 entry->scheduled = is_realtime(current) ? current : NULL;
484
485 prev->rt_param.scheduled_on = NO_CPU;
486 current->rt_param.scheduled_on = smp_processor_id();
487} 482}
488 483
489 484
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index feb0159033..ab52ae9510 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -21,10 +21,13 @@ static void litmus_tick(struct rq *rq, struct task_struct *p)
21 litmus->tick(p); 21 litmus->tick(p);
22} 22}
23 23
24#define NO_CPU -1
25
24static void litmus_schedule(struct rq *rq, struct task_struct *prev) 26static void litmus_schedule(struct rq *rq, struct task_struct *prev)
25{ 27{
26 struct rq* other_rq; 28 struct rq* other_rq;
27 long prev_state; 29 long prev_state;
30 lt_t _maybe_deadlock = 0;
28 /* WARNING: rq is _not_ locked! */ 31 /* WARNING: rq is _not_ locked! */
29 if (is_realtime(prev)) 32 if (is_realtime(prev))
30 update_time_litmus(rq, prev); 33 update_time_litmus(rq, prev);
@@ -43,11 +46,43 @@ static void litmus_schedule(struct rq *rq, struct task_struct *prev)
43 */ 46 */
44 prev_state = prev->state; 47 prev_state = prev->state;
45 spin_unlock(&rq->lock); 48 spin_unlock(&rq->lock);
49
50 /* Don't race with a concurrent switch.
51 * This could deadlock in the case of cross or circular migrations.
52 * It's the job of the plugin to make sure that doesn't happen.
53 */
54 TRACE_TASK(rq->litmus_next, "stack_in_use=%d\n",
55 rq->litmus_next->rt_param.stack_in_use);
56 if (rq->litmus_next->rt_param.stack_in_use != NO_CPU) {
57 TRACE_TASK(rq->litmus_next, "waiting to deschedule\n");
58 _maybe_deadlock = litmus_clock();
59 }
60 while (rq->litmus_next->rt_param.stack_in_use != NO_CPU) {
61 cpu_relax();
62 mb();
63 if (rq->litmus_next->rt_param.stack_in_use == NO_CPU)
64 TRACE_TASK(rq->litmus_next, "descheduled. Proceeding.\n");
65 if (lt_before(_maybe_deadlock + 10000000, litmus_clock())) {
66 /* We've been spinning for 10ms.
67 * Something can't be right!
68 * Let's abandon the task and bail out; at least
69 * we will have debug info instead of a hard deadlock.
70 */
71 TRACE_TASK(rq->litmus_next,
72 "stack too long in use. Deadlock?\n");
73 rq->litmus_next = NULL;
74
75 /* bail out */
76 spin_lock(&rq->lock);
77 return;
78 }
79 }
80
46 double_rq_lock(rq, other_rq); 81 double_rq_lock(rq, other_rq);
47 if (prev->state != prev_state) { 82 if (prev->state != prev_state) {
48 TRACE_TASK(prev, 83 TRACE_TASK(prev,
49 "state changed while we dropped" 84 "state changed while we dropped"
50 " the lock: now=%d, old=%d", 85 " the lock: now=%d, old=%d\n",
51 prev->state, prev_state); 86 prev->state, prev_state);
52 if (prev_state && !prev->state) { 87 if (prev_state && !prev->state) {
53 /* prev task became unblocked 88 /* prev task became unblocked
@@ -61,7 +96,7 @@ static void litmus_schedule(struct rq *rq, struct task_struct *prev)
61 96
62 set_task_cpu(rq->litmus_next, smp_processor_id()); 97 set_task_cpu(rq->litmus_next, smp_processor_id());
63 98
64 /* now that we have the lock we need to make sure a 99 /* DEBUG: now that we have the lock we need to make sure a
65 * couple of things still hold: 100 * couple of things still hold:
66 * - it is still a real-time task 101 * - it is still a real-time task
67 * - it is still runnable (could have been stopped) 102 * - it is still runnable (could have been stopped)
@@ -71,12 +106,16 @@ static void litmus_schedule(struct rq *rq, struct task_struct *prev)
71 /* BAD BAD BAD */ 106 /* BAD BAD BAD */
72 TRACE_TASK(rq->litmus_next, 107 TRACE_TASK(rq->litmus_next,
73 "migration invariant FAILED: rt=%d running=%d\n", 108 "migration invariant FAILED: rt=%d running=%d\n",
74 is_realtime(rq->litmus_next), 109 is_realtime(rq->litmus_next),
75 is_running(rq->litmus_next)); 110 is_running(rq->litmus_next));
111 /* drop the task */
112 rq->litmus_next = NULL;
76 } 113 }
77 /* release the other CPU's runqueue, but keep ours */ 114 /* release the other CPU's runqueue, but keep ours */
78 spin_unlock(&other_rq->lock); 115 spin_unlock(&other_rq->lock);
79 } 116 }
117 if (rq->litmus_next)
118 rq->litmus_next->rt_param.stack_in_use = rq->cpu;
80} 119}
81 120
82static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, int wakeup) 121static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, int wakeup)