aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-06-10 13:24:54 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-06-10 13:24:54 -0400
commit53d9688562a712a9b3e0265b51f2e60fec103799 (patch)
treec525e4527e4e3c85d1818389257de8f1ebd6c78c /kernel/sched.c
parent5980b2330359d002e4e2278327ff80307504b848 (diff)
Avoid deadlock when switching task policy to BACKGROUND (ugly)wip-master-2.6.33-rt
When (re)setting Litmus task policy to BACKGROUND, kfree is called while in atomic context. This cannot be done in PreemptRT as it will deadlock (this commit fixes the below trace). Properly freeing rt-task structures while not holding proper locks breaks various assumptions in the code and therefore these operations cannot be easily postponed (to a moment when we don't hold the locks). The solution is a bad hack and if the policy is reset to BACKGROUND task we leak some memory. Better solutions are very welcomed. [ 52.850018] ======================================================= [ 52.850018] [ INFO: possible circular locking dependency detected ] [ 52.850018] 2.6.33.5-rt22-litmus2010 #441 [ 52.850018] ------------------------------------------------------- [ 52.850018] longtest_g/1637 is trying to acquire lock: [ 52.850018] (per_cpu__lock_slab_locked){......}, at: [<ffffffff8142b783>] rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [ 52.850018] but task is already holding lock: [ 52.850018] (&rq->lock){-...-.}, at: [<ffffffff8102ee52>] __sched_setscheduler+0x112/0x4f0 [ 52.850018] [ 52.850018] which lock already depends on the new lock. [ 52.850018] [ 52.850018] [ 52.850018] the existing dependency chain (in reverse order) is: [ 52.850018] [ 52.850018] -> #2 (&rq->lock){-...-.}: [ 52.850018] [<ffffffff81060954>] __lock_acquire+0x13c4/0x1cd0 [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142cc76>] _raw_spin_lock+0x36/0x50 [ 52.850018] [<ffffffff8102551e>] task_rq_lock+0x5e/0xb0 [ 52.850018] [<ffffffff8102fa44>] try_to_wake_up+0x64/0x420 [ 52.850018] [<ffffffff8102fe65>] wake_up_process_mutex+0x15/0x20 [ 52.850018] [<ffffffff8106708e>] wakeup_next_waiter+0x9e/0x1b0 [ 52.850018] [<ffffffff8142b70f>] rt_spin_lock_slowunlock+0x4f/0x90 [ 52.850018] [<ffffffff8142c419>] rt_spin_unlock+0x49/0x50 [ 52.850018] [<ffffffff8102ceb4>] complete+0x44/0x50 [ 52.850018] [<ffffffff8104d07c>] kthread+0x7c/0xc0 [ 52.850018] [<ffffffff81003214>] kernel_thread_helper+0x4/0x10 [ 52.850018] [ 52.850018] -> #1 (&p->pi_lock){......}: [ 52.850018] [<ffffffff81060954>] __lock_acquire+0x13c4/0x1cd0 [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142cc76>] _raw_spin_lock+0x36/0x50 [ 52.850018] [<ffffffff81066859>] task_blocks_on_rt_mutex+0x39/0x210 [ 52.850018] [<ffffffff8142b9c3>] rt_spin_lock_slowlock+0x273/0x360 [ 52.850018] [<ffffffff8142c383>] rt_spin_lock+0x43/0x90 [ 52.850018] [<ffffffff810ab23e>] _slab_irq_disable+0x4e/0x70 [ 52.850018] [<ffffffff810ab93f>] kmem_cache_free+0x1f/0xf0 [ 52.850018] [<ffffffff810b34a1>] file_free_rcu+0x31/0x40 [ 52.850018] [<ffffffff81074a48>] __rcu_process_callbacks+0x128/0x3a0 [ 52.850018] [<ffffffff81074d3b>] rcu_process_callbacks+0x7b/0x90 [ 52.850018] [<ffffffff8103a96f>] run_ksoftirqd+0x14f/0x310 [ 52.850018] [<ffffffff8104d0a6>] kthread+0xa6/0xc0 [ 52.850018] [<ffffffff81003214>] kernel_thread_helper+0x4/0x10 [ 52.850018] [ 52.850018] -> #0 (per_cpu__lock_slab_locked){......}: [ 52.850018] [<ffffffff8106120c>] __lock_acquire+0x1c7c/0x1cd0 [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142cd91>] _raw_spin_lock_irqsave+0x41/0x60 [ 52.850018] [<ffffffff8142b783>] rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff8142c383>] rt_spin_lock+0x43/0x90 [ 52.850018] [<ffffffff810ab23e>] _slab_irq_disable+0x4e/0x70 [ 52.850018] [<ffffffff810ab93f>] kmem_cache_free+0x1f/0xf0 [ 52.850018] [<ffffffff811d1374>] litmus_exit_task+0x84/0x130 [ 52.850018] [<ffffffff8102f11f>] __sched_setscheduler+0x3df/0x4f0 [ 52.850018] [<ffffffff8102f24e>] sched_setscheduler+0xe/0x10 [ 52.850018] [<ffffffff8102f30d>] do_sched_setscheduler+0xbd/0x100 [ 52.850018] [<ffffffff8102f384>] sys_sched_setscheduler+0x14/0x20 [ 52.850018] [<ffffffff8100246b>] system_call_fastpath+0x16/0x1b [ 52.850018] [ 52.850018] other info that might help us debug this: [ 52.850018] [ 52.850018] 3 locks held by longtest_g/1637: [ 52.850018] #0: (rcu_read_lock){.+.+..}, at: [<ffffffff8102f2bf>] do_sched_setscheduler+0x6f/0x100 [ 52.850018] #1: (&p->pi_lock){......}, at: [<ffffffff8102ee0e>] __sched_setscheduler+0xce/0x4f0 [ 52.850018] #2: (&rq->lock){-...-.}, at: [<ffffffff8102ee52>] __sched_setscheduler+0x112/0x4f0 [ 52.850018] [ 52.850018] stack backtrace: [ 52.850018] Pid: 1637, comm: longtest_g Tainted: G W 2.6.33.5-rt22-litmus2010 #441 [ 52.850018] Call Trace: [ 52.850018] [<ffffffff8105ef10>] print_circular_bug+0x100/0x110 [ 52.850018] [<ffffffff8106120c>] __lock_acquire+0x1c7c/0x1cd0 [ 52.850018] [<ffffffff81429659>] ? printk+0x67/0x69 [ 52.850018] [<ffffffff8100246b>] ? system_call_fastpath+0x16/0x1b [ 52.850018] [<ffffffff8100246b>] ? system_call_fastpath+0x16/0x1b [ 52.850018] [<ffffffff810612bc>] lock_acquire+0x5c/0x80 [ 52.850018] [<ffffffff8142b783>] ? rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff8142cd91>] _raw_spin_lock_irqsave+0x41/0x60 [ 52.850018] [<ffffffff8142b783>] ? rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff81005649>] ? dump_trace+0x129/0x330 [ 52.850018] [<ffffffff8142b783>] rt_spin_lock_slowlock+0x33/0x360 [ 52.850018] [<ffffffff8142c383>] rt_spin_lock+0x43/0x90 [ 52.850018] [<ffffffff810ab23e>] _slab_irq_disable+0x4e/0x70 [ 52.850018] [<ffffffff810ab93f>] kmem_cache_free+0x1f/0xf0 [ 52.850018] [<ffffffff811d1374>] litmus_exit_task+0x84/0x130 [ 52.850018] [<ffffffff810242d8>] ? dequeue_task+0x48/0x90 [ 52.850018] [<ffffffff8102f11f>] __sched_setscheduler+0x3df/0x4f0 [ 52.850018] [<ffffffff8102f24e>] sched_setscheduler+0xe/0x10 [ 52.850018] [<ffffffff8102f30d>] do_sched_setscheduler+0xbd/0x100 [ 52.850018] [<ffffffff8102f2bf>] ? do_sched_setscheduler+0x6f/0x100 [ 52.850018] [<ffffffff8142bf02>] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 52.850018] [<ffffffff8102f384>] sys_sched_setscheduler+0x14/0x20 [ 52.850018] [<ffffffff8100246b>] system_call_fastpath+0x16/0x1b
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 0166daa4a374..5969c8b25689 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -905,7 +905,7 @@ static __read_mostly int scheduler_running;
905 * 905 *
906 * Litmus RT: with the Preempt-RT patch we rely on rt-threads 906 * Litmus RT: with the Preempt-RT patch we rely on rt-threads
907 * to set timers. If the RT throttling gets activated we experience 907 * to set timers. If the RT throttling gets activated we experience
908 * very long latencies. Change from 950000 to -1 -> RUNTIME_INF 908 * very long latencies. Change from 950000 to -1 -> RUNTIME_INF
909 */ 909 */
910int sysctl_sched_rt_runtime = -1; 910int sysctl_sched_rt_runtime = -1;
911 911
@@ -6805,7 +6805,7 @@ recheck:
6805 p->sched_reset_on_fork = reset_on_fork; 6805 p->sched_reset_on_fork = reset_on_fork;
6806 6806
6807 if (p->policy == SCHED_LITMUS) 6807 if (p->policy == SCHED_LITMUS)
6808 litmus_exit_task(p); 6808 litmus_exit_task(p, 0);
6809 6809
6810 oldprio = p->prio; 6810 oldprio = p->prio;
6811 prev_class = p->sched_class; 6811 prev_class = p->sched_class;