aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/litmus.c
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-06-10 13:24:54 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-06-10 13:24:54 -0400
commit53d9688562a712a9b3e0265b51f2e60fec103799 (patch)
treec525e4527e4e3c85d1818389257de8f1ebd6c78c /litmus/litmus.c
parent5980b2330359d002e4e2278327ff80307504b848 (diff)
Avoid deadlock when switching task policy to BACKGROUND (ugly)wip-master-2.6.33-rt
When (re)setting Litmus task policy to BACKGROUND, kfree is called while in atomic context. This cannot be done in PreemptRT as it will deadlock (this commit fixes the below trace). Properly freeing rt-task structures while not holding proper locks breaks various assumptions in the code and therefore these operations cannot be easily postponed (to a moment when we don't hold the locks). The solution is a bad hack and if the policy is reset to BACKGROUND task we leak some memory. Better solutions are very welcomed. [ 52.850018] ======================================================= [ 52.850018] [ INFO: possible circular locking dependency detected ] [ 52.850018] 2.6.33.5-rt22-litmus2010 #441 [ 52.850018] ------------------------------------------------------- [ 52.850018] longtest_g/1637 is trying to acquire lock: [ 52.850018] (per_cpu__lock_slab_locked){......}, at: [<ffffffff8142b783>] rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [ 52.850018] but task is already holding lock: [ 52.850018] (&rq->lock){-...-.}, at: [<ffffffff8102ee52>] __sched_setscheduler+0x112/0x4f0 [ 52.850018] [ 52.850018] which lock already depends on the new lock. [ 52.850018] [ 52.850018] [ 52.850018] the existing dependency chain (in reverse order) is: [ 52.850018] [ 52.850018] -> #2 (&rq->lock){-...-.}: [ 52.850018] [<ffffffff81060954>] __lock_acquire+0x13c4/0x1cd0 [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142cc76>] _raw_spin_lock+0x36/0x50 [ 52.850018] [<ffffffff8102551e>] task_rq_lock+0x5e/0xb0 [ 52.850018] [<ffffffff8102fa44>] try_to_wake_up+0x64/0x420 [ 52.850018] [<ffffffff8102fe65>] wake_up_process_mutex+0x15/0x20 [ 52.850018] [<ffffffff8106708e>] wakeup_next_waiter+0x9e/0x1b0 [ 52.850018] [<ffffffff8142b70f>] rt_spin_lock_slowunlock+0x4f/0x90 [ 52.850018] [<ffffffff8142c419>] rt_spin_unlock+0x49/0x50 [ 52.850018] [<ffffffff8102ceb4>] complete+0x44/0x50 [ 52.850018] [<ffffffff8104d07c>] kthread+0x7c/0xc0 [ 52.850018] [<ffffffff81003214>] kernel_thread_helper+0x4/0x10 [ 52.850018] [ 52.850018] -> #1 (&p->pi_lock){......}: [ 52.850018] [<ffffffff81060954>] __lock_acquire+0x13c4/0x1cd0 [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142cc76>] _raw_spin_lock+0x36/0x50 [ 52.850018] [<ffffffff81066859>] task_blocks_on_rt_mutex+0x39/0x210 [ 52.850018] [<ffffffff8142b9c3>] rt_spin_lock_slowlock+0x273/0x360 [ 52.850018] [<ffffffff8142c383>] rt_spin_lock+0x43/0x90 [ 52.850018] [<ffffffff810ab23e>] _slab_irq_disable+0x4e/0x70 [ 52.850018] [<ffffffff810ab93f>] kmem_cache_free+0x1f/0xf0 [ 52.850018] [<ffffffff810b34a1>] file_free_rcu+0x31/0x40 [ 52.850018] [<ffffffff81074a48>] __rcu_process_callbacks+0x128/0x3a0 [ 52.850018] [<ffffffff81074d3b>] rcu_process_callbacks+0x7b/0x90 [ 52.850018] [<ffffffff8103a96f>] run_ksoftirqd+0x14f/0x310 [ 52.850018] [<ffffffff8104d0a6>] kthread+0xa6/0xc0 [ 52.850018] [<ffffffff81003214>] kernel_thread_helper+0x4/0x10 [ 52.850018] [ 52.850018] -> #0 (per_cpu__lock_slab_locked){......}: [ 52.850018] [<ffffffff8106120c>] __lock_acquire+0x1c7c/0x1cd0 [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142cd91>] _raw_spin_lock_irqsave+0x41/0x60 [ 52.850018] [<ffffffff8142b783>] rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff8142c383>] rt_spin_lock+0x43/0x90 [ 52.850018] [<ffffffff810ab23e>] _slab_irq_disable+0x4e/0x70 [ 52.850018] [<ffffffff810ab93f>] kmem_cache_free+0x1f/0xf0 [ 52.850018] [<ffffffff811d1374>] litmus_exit_task+0x84/0x130 [ 52.850018] [<ffffffff8102f11f>] __sched_setscheduler+0x3df/0x4f0 [ 52.850018] [<ffffffff8102f24e>] sched_setscheduler+0xe/0x10 [ 52.850018] [<ffffffff8102f30d>] do_sched_setscheduler+0xbd/0x100 [ 52.850018] [<ffffffff8102f384>] sys_sched_setscheduler+0x14/0x20 [ 52.850018] [<ffffffff8100246b>] system_call_fastpath+0x16/0x1b [ 52.850018] [ 52.850018] other info that might help us debug this: [ 52.850018] [ 52.850018] 3 locks held by longtest_g/1637: [ 52.850018] #0: (rcu_read_lock){.+.+..}, at: [<ffffffff8102f2bf>] do_sched_setscheduler+0x6f/0x100 [ 52.850018] #1: (&p->pi_lock){......}, at: [<ffffffff8102ee0e>] __sched_setscheduler+0xce/0x4f0 [ 52.850018] #2: (&rq->lock){-...-.}, at: [<ffffffff8102ee52>] __sched_setscheduler+0x112/0x4f0 [ 52.850018] [ 52.850018] stack backtrace: [ 52.850018] Pid: 1637, comm: longtest_g Tainted: G W 2.6.33.5-rt22-litmus2010 #441 [ 52.850018] Call Trace: [ 52.850018] [<ffffffff8105ef10>] print_circular_bug+0x100/0x110 [ 52.850018] [<ffffffff8106120c>] __lock_acquire+0x1c7c/0x1cd0 [ 52.850018] [<ffffffff81429659>] ? printk+0x67/0x69 [ 52.850018] [<ffffffff8100246b>] ? system_call_fastpath+0x16/0x1b [ 52.850018] [<ffffffff8100246b>] ? system_call_fastpath+0x16/0x1b [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142b783>] ? rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff8142cd91>] _raw_spin_lock_irqsave+0x41/0x60 [ 52.850018] [<ffffffff8142b783>] ? rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff81005649>] ? dump_trace+0x129/0x330 [ 52.850018] [<ffffffff8142b783>] rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff8142c383>] rt_spin_lock+0x43/0x90 [ 52.850018] [<ffffffff810ab23e>] _slab_irq_disable+0x4e/0x70 [ 52.850018] [<ffffffff810ab93f>] kmem_cache_free+0x1f/0xf0 [ 52.850018] [<ffffffff811d1374>] litmus_exit_task+0x84/0x130 [ 52.850018] [<ffffffff810242d8>] ? dequeue_task+0x48/0x90 [ 52.850018] [<ffffffff8102f11f>] __sched_setscheduler+0x3df/0x4f0 [ 52.850018] [<ffffffff8102f24e>] sched_setscheduler+0xe/0x10 [ 52.850018] [<ffffffff8102f30d>] do_sched_setscheduler+0xbd/0x100 [ 52.850018] [<ffffffff8102f2bf>] ? do_sched_setscheduler+0x6f/0x100 [ 52.850018] [<ffffffff8142bf02>] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 52.850018] [<ffffffff8102f384>] sys_sched_setscheduler+0x14/0x20 [ 52.850018] [<ffffffff8100246b>] system_call_fastpath+0x16/0x1b
Diffstat (limited to 'litmus/litmus.c')
-rw-r--r--litmus/litmus.c22
1 files changed, 18 insertions, 4 deletions
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 447f8e8af04d..fc63e6dee84a 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -386,7 +386,7 @@ out:
386 return retval; 386 return retval;
387} 387}
388 388
389void litmus_exit_task(struct task_struct* tsk) 389void litmus_exit_task(struct task_struct* tsk, int free_mem)
390{ 390{
391 if (is_realtime(tsk)) { 391 if (is_realtime(tsk)) {
392 sched_trace_task_completion(tsk, 1); 392 sched_trace_task_completion(tsk, 1);
@@ -394,8 +394,22 @@ void litmus_exit_task(struct task_struct* tsk)
394 litmus->task_exit(tsk); 394 litmus->task_exit(tsk);
395 395
396 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); 396 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
397 bheap_node_free(tsk_rt(tsk)->heap_node); 397 if (free_mem) {
398 release_heap_free(tsk_rt(tsk)->rel_heap); 398 bheap_node_free(tsk_rt(tsk)->heap_node);
399 release_heap_free(tsk_rt(tsk)->rel_heap);
400 } else {
401 /* XXX: in PreemptRT calling kfree here will deadlock
402 * postponing the free when the rq->lock and the
403 * pi_lock are released don't work (it breaks
404 * other assumptions in the code). This is ugly,
405 * but for now we leak memory. To avoid the leakage
406 * the user should not reset the state of a litmus
407 * task to a background task.
408 */
409 tsk_rt(tsk)->heap_node = NULL;
410 hrtimer_cancel(&(tsk_rt(tsk)->rel_heap)->timer);
411 tsk_rt(tsk)->rel_heap = NULL;
412 }
399 413
400 atomic_dec(&rt_task_count); 414 atomic_dec(&rt_task_count);
401 reinit_litmus_state(tsk, 1); 415 reinit_litmus_state(tsk, 1);
@@ -500,7 +514,7 @@ void exit_litmus(struct task_struct *dead_tsk)
500 514
501 /* main cleanup only for RT tasks */ 515 /* main cleanup only for RT tasks */
502 if (is_realtime(dead_tsk)) 516 if (is_realtime(dead_tsk))
503 litmus_exit_task(dead_tsk); 517 litmus_exit_task(dead_tsk, 1);
504} 518}
505 519
506 520