diff options
| author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2014-09-12 07:31:08 -0400 |
|---|---|---|
| committer | Namhoon Kim <namhoonk@cs.unc.edu> | 2014-11-03 21:59:04 -0500 |
| commit | 4841253863ef57e0b91d169b0080ce079d54fe6f (patch) | |
| tree | e086695ed0f01f996821e001918d53ed3da276d4 | |
| parent | 563999251e34d52bfbc47889cabd763714d020e1 (diff) | |
P-RES: fix rare deadlock via hrtimer_start()
There's a rare condition under which the current call to hrtimer_start()
in pres_update_timer() may result in deadlock.
pres_update_timer() // holds runqueue lock and state->lock
-> hrtimer_start()
-> raise_softirq_irqoff()
-> wakeup_softirqd()
-> wake_up_process()
-> acquires runqueue lock()
To avoid this, we need to call __hrtimer_start_range_ns() with the
'wakeup' flag set to zero.
While at it, also drop the state->lock before calling into hrtimer(),
to avoid making the scheduler critical section longer than necessary.
| -rw-r--r-- | litmus/sched_pres.c | 51 |
1 files changed, 32 insertions, 19 deletions
diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c index 2c777ec34658..13a47a831794 100644 --- a/litmus/sched_pres.c +++ b/litmus/sched_pres.c | |||
| @@ -63,7 +63,8 @@ static void task_arrives(struct task_struct *tsk) | |||
| 63 | res->ops->client_arrives(res, client); | 63 | res->ops->client_arrives(res, client); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | static void pres_update_timer(struct pres_cpu_state *state) | 66 | /* NOTE: drops state->lock */ |
| 67 | static void pres_update_timer_and_unlock(struct pres_cpu_state *state) | ||
| 67 | { | 68 | { |
| 68 | int local; | 69 | int local; |
| 69 | lt_t update, now; | 70 | lt_t update, now; |
| @@ -77,6 +78,10 @@ static void pres_update_timer(struct pres_cpu_state *state) | |||
| 77 | */ | 78 | */ |
| 78 | local = local_cpu_state() == state; | 79 | local = local_cpu_state() == state; |
| 79 | 80 | ||
| 81 | /* Must drop state lock before calling into hrtimer_start(), which | ||
| 82 | * may raise a softirq, which in turn may wake ksoftirqd. */ | ||
| 83 | raw_spin_unlock(&state->lock); | ||
| 84 | |||
| 80 | if (update <= now) { | 85 | if (update <= now) { |
| 81 | litmus_reschedule(state->cpu); | 86 | litmus_reschedule(state->cpu); |
| 82 | } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) { | 87 | } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) { |
| @@ -86,8 +91,13 @@ static void pres_update_timer(struct pres_cpu_state *state) | |||
| 86 | TRACE("canceling timer...\n"); | 91 | TRACE("canceling timer...\n"); |
| 87 | hrtimer_cancel(&state->timer); | 92 | hrtimer_cancel(&state->timer); |
| 88 | TRACE("setting scheduler timer for %llu\n", update); | 93 | TRACE("setting scheduler timer for %llu\n", update); |
| 89 | hrtimer_start(&state->timer, ns_to_ktime(update), | 94 | /* We cannot use hrtimer_start() here because the |
| 90 | HRTIMER_MODE_ABS_PINNED); | 95 | * wakeup flag must be set to zero. */ |
| 96 | __hrtimer_start_range_ns(&state->timer, | ||
| 97 | ns_to_ktime(update), | ||
| 98 | 0 /* timer coalescing slack */, | ||
| 99 | HRTIMER_MODE_ABS_PINNED, | ||
| 100 | 0 /* wakeup */); | ||
| 91 | } | 101 | } |
| 92 | } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) { | 102 | } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) { |
| 93 | /* Poke remote core only if timer needs to be set earlier than | 103 | /* Poke remote core only if timer needs to be set earlier than |
| @@ -170,14 +180,13 @@ static struct task_struct* pres_schedule(struct task_struct * prev) | |||
| 170 | /* figure out what to schedule next */ | 180 | /* figure out what to schedule next */ |
| 171 | state->scheduled = sup_dispatch(&state->sup_env); | 181 | state->scheduled = sup_dispatch(&state->sup_env); |
| 172 | 182 | ||
| 173 | /* program scheduler timer */ | ||
| 174 | state->sup_env.will_schedule = false; | ||
| 175 | pres_update_timer(state); | ||
| 176 | |||
| 177 | /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */ | 183 | /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */ |
| 178 | sched_state_task_picked(); | 184 | sched_state_task_picked(); |
| 179 | 185 | ||
| 180 | raw_spin_unlock(&state->lock); | 186 | /* program scheduler timer */ |
| 187 | state->sup_env.will_schedule = false; | ||
| 188 | /* NOTE: drops state->lock */ | ||
| 189 | pres_update_timer_and_unlock(state); | ||
| 181 | 190 | ||
| 182 | if (prev != state->scheduled && is_realtime(prev)) | 191 | if (prev != state->scheduled && is_realtime(prev)) |
| 183 | TRACE_TASK(prev, "descheduled.\n"); | 192 | TRACE_TASK(prev, "descheduled.\n"); |
| @@ -222,9 +231,11 @@ static void pres_task_resume(struct task_struct *tsk) | |||
| 222 | * at the moment. */ | 231 | * at the moment. */ |
| 223 | sup_update_time(&state->sup_env, litmus_clock()); | 232 | sup_update_time(&state->sup_env, litmus_clock()); |
| 224 | task_arrives(tsk); | 233 | task_arrives(tsk); |
| 225 | pres_update_timer(state); | 234 | /* NOTE: drops state->lock */ |
| 226 | } | 235 | pres_update_timer_and_unlock(state); |
| 227 | raw_spin_unlock_irqrestore(&state->lock, flags); | 236 | local_irq_restore(flags); |
| 237 | } else | ||
| 238 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
| 228 | 239 | ||
| 229 | resume_legacy_task_model_updates(tsk); | 240 | resume_legacy_task_model_updates(tsk); |
| 230 | } | 241 | } |
| @@ -315,10 +326,11 @@ static void pres_task_new(struct task_struct *tsk, int on_runqueue, | |||
| 315 | * [see comment in pres_task_resume()] */ | 326 | * [see comment in pres_task_resume()] */ |
| 316 | sup_update_time(&state->sup_env, litmus_clock()); | 327 | sup_update_time(&state->sup_env, litmus_clock()); |
| 317 | task_arrives(tsk); | 328 | task_arrives(tsk); |
| 318 | pres_update_timer(state); | 329 | /* NOTE: drops state->lock */ |
| 319 | } | 330 | pres_update_timer_and_unlock(state); |
| 320 | 331 | local_irq_restore(flags); | |
| 321 | raw_spin_unlock_irqrestore(&state->lock, flags); | 332 | } else |
| 333 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
| 322 | 334 | ||
| 323 | task_new_legacy_task_model_updates(tsk); | 335 | task_new_legacy_task_model_updates(tsk); |
| 324 | } | 336 | } |
| @@ -340,10 +352,11 @@ static void pres_task_exit(struct task_struct *tsk) | |||
| 340 | * [see comment in pres_task_resume()] */ | 352 | * [see comment in pres_task_resume()] */ |
| 341 | sup_update_time(&state->sup_env, litmus_clock()); | 353 | sup_update_time(&state->sup_env, litmus_clock()); |
| 342 | task_departs(tsk, 0); | 354 | task_departs(tsk, 0); |
| 343 | pres_update_timer(state); | 355 | /* NOTE: drops state->lock */ |
| 344 | } | 356 | pres_update_timer_and_unlock(state); |
| 345 | 357 | local_irq_restore(flags); | |
| 346 | raw_spin_unlock_irqrestore(&state->lock, flags); | 358 | } else |
| 359 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
| 347 | 360 | ||
| 348 | kfree(tsk_rt(tsk)->plugin_state); | 361 | kfree(tsk_rt(tsk)->plugin_state); |
| 349 | tsk_rt(tsk)->plugin_state = NULL; | 362 | tsk_rt(tsk)->plugin_state = NULL; |
