diff options
author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2014-09-12 07:31:08 -0400 |
---|---|---|
committer | Bjoern Brandenburg <bbb@mpi-sws.org> | 2014-09-12 07:45:29 -0400 |
commit | adc5c2babea29bfb1dc3297e41500b95c146fb41 (patch) | |
tree | b589601fd0907b77c551dc550b95f9244a5415a0 | |
parent | 9ec32807edf33064734a815c949668704a07c5f3 (diff) |
P-RES: fix rare deadlock via hrtimer_start()
There's a rare condition under which the current call to hrtimer_start()
in pres_update_timer() may result in deadlock.
pres_update_timer() // holds runqueue lock and state->lock
-> hrtimer_start()
-> raise_softirq_irqoff()
-> wakeup_softirqd()
-> wake_up_process()
-> acquires runqueue lock()
To avoid this, we need to call __hrtimer_start_range_ns() with the
'wakeup' flag set to zero.
While at it, also drop the state->lock before calling into hrtimer(),
to avoid making the scheduler critical section longer than necessary.
-rw-r--r-- | litmus/sched_pres.c | 51 |
1 files changed, 32 insertions, 19 deletions
diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c index 2c777ec34658..13a47a831794 100644 --- a/litmus/sched_pres.c +++ b/litmus/sched_pres.c | |||
@@ -63,7 +63,8 @@ static void task_arrives(struct task_struct *tsk) | |||
63 | res->ops->client_arrives(res, client); | 63 | res->ops->client_arrives(res, client); |
64 | } | 64 | } |
65 | 65 | ||
66 | static void pres_update_timer(struct pres_cpu_state *state) | 66 | /* NOTE: drops state->lock */ |
67 | static void pres_update_timer_and_unlock(struct pres_cpu_state *state) | ||
67 | { | 68 | { |
68 | int local; | 69 | int local; |
69 | lt_t update, now; | 70 | lt_t update, now; |
@@ -77,6 +78,10 @@ static void pres_update_timer(struct pres_cpu_state *state) | |||
77 | */ | 78 | */ |
78 | local = local_cpu_state() == state; | 79 | local = local_cpu_state() == state; |
79 | 80 | ||
81 | /* Must drop state lock before calling into hrtimer_start(), which | ||
82 | * may raise a softirq, which in turn may wake ksoftirqd. */ | ||
83 | raw_spin_unlock(&state->lock); | ||
84 | |||
80 | if (update <= now) { | 85 | if (update <= now) { |
81 | litmus_reschedule(state->cpu); | 86 | litmus_reschedule(state->cpu); |
82 | } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) { | 87 | } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) { |
@@ -86,8 +91,13 @@ static void pres_update_timer(struct pres_cpu_state *state) | |||
86 | TRACE("canceling timer...\n"); | 91 | TRACE("canceling timer...\n"); |
87 | hrtimer_cancel(&state->timer); | 92 | hrtimer_cancel(&state->timer); |
88 | TRACE("setting scheduler timer for %llu\n", update); | 93 | TRACE("setting scheduler timer for %llu\n", update); |
89 | hrtimer_start(&state->timer, ns_to_ktime(update), | 94 | /* We cannot use hrtimer_start() here because the |
90 | HRTIMER_MODE_ABS_PINNED); | 95 | * wakeup flag must be set to zero. */ |
96 | __hrtimer_start_range_ns(&state->timer, | ||
97 | ns_to_ktime(update), | ||
98 | 0 /* timer coalescing slack */, | ||
99 | HRTIMER_MODE_ABS_PINNED, | ||
100 | 0 /* wakeup */); | ||
91 | } | 101 | } |
92 | } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) { | 102 | } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) { |
93 | /* Poke remote core only if timer needs to be set earlier than | 103 | /* Poke remote core only if timer needs to be set earlier than |
@@ -170,14 +180,13 @@ static struct task_struct* pres_schedule(struct task_struct * prev) | |||
170 | /* figure out what to schedule next */ | 180 | /* figure out what to schedule next */ |
171 | state->scheduled = sup_dispatch(&state->sup_env); | 181 | state->scheduled = sup_dispatch(&state->sup_env); |
172 | 182 | ||
173 | /* program scheduler timer */ | ||
174 | state->sup_env.will_schedule = false; | ||
175 | pres_update_timer(state); | ||
176 | |||
177 | /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */ | 183 | /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */ |
178 | sched_state_task_picked(); | 184 | sched_state_task_picked(); |
179 | 185 | ||
180 | raw_spin_unlock(&state->lock); | 186 | /* program scheduler timer */ |
187 | state->sup_env.will_schedule = false; | ||
188 | /* NOTE: drops state->lock */ | ||
189 | pres_update_timer_and_unlock(state); | ||
181 | 190 | ||
182 | if (prev != state->scheduled && is_realtime(prev)) | 191 | if (prev != state->scheduled && is_realtime(prev)) |
183 | TRACE_TASK(prev, "descheduled.\n"); | 192 | TRACE_TASK(prev, "descheduled.\n"); |
@@ -222,9 +231,11 @@ static void pres_task_resume(struct task_struct *tsk) | |||
222 | * at the moment. */ | 231 | * at the moment. */ |
223 | sup_update_time(&state->sup_env, litmus_clock()); | 232 | sup_update_time(&state->sup_env, litmus_clock()); |
224 | task_arrives(tsk); | 233 | task_arrives(tsk); |
225 | pres_update_timer(state); | 234 | /* NOTE: drops state->lock */ |
226 | } | 235 | pres_update_timer_and_unlock(state); |
227 | raw_spin_unlock_irqrestore(&state->lock, flags); | 236 | local_irq_restore(flags); |
237 | } else | ||
238 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
228 | 239 | ||
229 | resume_legacy_task_model_updates(tsk); | 240 | resume_legacy_task_model_updates(tsk); |
230 | } | 241 | } |
@@ -315,10 +326,11 @@ static void pres_task_new(struct task_struct *tsk, int on_runqueue, | |||
315 | * [see comment in pres_task_resume()] */ | 326 | * [see comment in pres_task_resume()] */ |
316 | sup_update_time(&state->sup_env, litmus_clock()); | 327 | sup_update_time(&state->sup_env, litmus_clock()); |
317 | task_arrives(tsk); | 328 | task_arrives(tsk); |
318 | pres_update_timer(state); | 329 | /* NOTE: drops state->lock */ |
319 | } | 330 | pres_update_timer_and_unlock(state); |
320 | 331 | local_irq_restore(flags); | |
321 | raw_spin_unlock_irqrestore(&state->lock, flags); | 332 | } else |
333 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
322 | 334 | ||
323 | task_new_legacy_task_model_updates(tsk); | 335 | task_new_legacy_task_model_updates(tsk); |
324 | } | 336 | } |
@@ -340,10 +352,11 @@ static void pres_task_exit(struct task_struct *tsk) | |||
340 | * [see comment in pres_task_resume()] */ | 352 | * [see comment in pres_task_resume()] */ |
341 | sup_update_time(&state->sup_env, litmus_clock()); | 353 | sup_update_time(&state->sup_env, litmus_clock()); |
342 | task_departs(tsk, 0); | 354 | task_departs(tsk, 0); |
343 | pres_update_timer(state); | 355 | /* NOTE: drops state->lock */ |
344 | } | 356 | pres_update_timer_and_unlock(state); |
345 | 357 | local_irq_restore(flags); | |
346 | raw_spin_unlock_irqrestore(&state->lock, flags); | 358 | } else |
359 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
347 | 360 | ||
348 | kfree(tsk_rt(tsk)->plugin_state); | 361 | kfree(tsk_rt(tsk)->plugin_state); |
349 | tsk_rt(tsk)->plugin_state = NULL; | 362 | tsk_rt(tsk)->plugin_state = NULL; |