diff options
author | Peter Zijlstra <peterz@infradead.org> | 2011-05-30 07:34:51 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-31 04:01:48 -0400 |
commit | d72bce0e67e8afc6eb959f656013cbb577426f1e (patch) | |
tree | 9c93d4df9aa895d6f2f555e0cf50e7ae5ebaded4 /kernel | |
parent | 55922c9d1b84b89cb946c777fddccb3247e7df2c (diff) |
rcu: Cure load woes
Commit cc3ce5176d83 (rcu: Start RCU kthreads in TASK_INTERRUPTIBLE
state) fudges a sleeping task' state, resulting in the scheduler seeing
a TASK_UNINTERRUPTIBLE task going to sleep, but a TASK_INTERRUPTIBLE
task waking up. The result is unbalanced load calculation.
The problem that patch tried to address is that the RCU threads could
stay in UNINTERRUPTIBLE state for quite a while and triggering the hung
task detector due to on-demand wake-ups.
Cure the problem differently by always giving the tasks at least one
wake-up once the CPU is fully up and running, this will kick them out of
the initial UNINTERRUPTIBLE state and into the regular INTERRUPTIBLE
wait state.
[ The alternative would be teaching kthread_create() to start threads as
INTERRUPTIBLE but that needs a tad more thought. ]
Reported-by: Damien Wyart <damien.wyart@free.fr>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul E. McKenney <paul.mckenney@linaro.org>
Link: http://lkml.kernel.org/r/1306755291.1200.2872.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 54 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 11 |
2 files changed, 56 insertions, 9 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 77a7671dd147..89419ff92e99 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1648 | if (IS_ERR(t)) | 1648 | if (IS_ERR(t)) |
1649 | return PTR_ERR(t); | 1649 | return PTR_ERR(t); |
1650 | kthread_bind(t, cpu); | 1650 | kthread_bind(t, cpu); |
1651 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1652 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | 1651 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; |
1653 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | 1652 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); |
1654 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | 1653 | per_cpu(rcu_cpu_kthread_task, cpu) = t; |
@@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1756 | if (IS_ERR(t)) | 1755 | if (IS_ERR(t)) |
1757 | return PTR_ERR(t); | 1756 | return PTR_ERR(t); |
1758 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1757 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1759 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1760 | rnp->node_kthread_task = t; | 1758 | rnp->node_kthread_task = t; |
1761 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1759 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1762 | sp.sched_priority = 99; | 1760 | sp.sched_priority = 99; |
@@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1765 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); | 1763 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); |
1766 | } | 1764 | } |
1767 | 1765 | ||
1766 | static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); | ||
1767 | |||
1768 | /* | 1768 | /* |
1769 | * Spawn all kthreads -- called as soon as the scheduler is running. | 1769 | * Spawn all kthreads -- called as soon as the scheduler is running. |
1770 | */ | 1770 | */ |
@@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void) | |||
1772 | { | 1772 | { |
1773 | int cpu; | 1773 | int cpu; |
1774 | struct rcu_node *rnp; | 1774 | struct rcu_node *rnp; |
1775 | struct task_struct *t; | ||
1775 | 1776 | ||
1776 | rcu_kthreads_spawnable = 1; | 1777 | rcu_kthreads_spawnable = 1; |
1777 | for_each_possible_cpu(cpu) { | 1778 | for_each_possible_cpu(cpu) { |
1778 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1779 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
1779 | if (cpu_online(cpu)) | 1780 | if (cpu_online(cpu)) { |
1780 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1781 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1782 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
1783 | if (t) | ||
1784 | wake_up_process(t); | ||
1785 | } | ||
1781 | } | 1786 | } |
1782 | rnp = rcu_get_root(rcu_state); | 1787 | rnp = rcu_get_root(rcu_state); |
1783 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1788 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1789 | if (rnp->node_kthread_task) | ||
1790 | wake_up_process(rnp->node_kthread_task); | ||
1784 | if (NUM_RCU_NODES > 1) { | 1791 | if (NUM_RCU_NODES > 1) { |
1785 | rcu_for_each_leaf_node(rcu_state, rnp) | 1792 | rcu_for_each_leaf_node(rcu_state, rnp) { |
1786 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1793 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1794 | t = rnp->node_kthread_task; | ||
1795 | if (t) | ||
1796 | wake_up_process(t); | ||
1797 | rcu_wake_one_boost_kthread(rnp); | ||
1798 | } | ||
1787 | } | 1799 | } |
1788 | return 0; | 1800 | return 0; |
1789 | } | 1801 | } |
@@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2188 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2200 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
2189 | } | 2201 | } |
2190 | 2202 | ||
2191 | static void __cpuinit rcu_online_cpu(int cpu) | 2203 | static void __cpuinit rcu_prepare_cpu(int cpu) |
2192 | { | 2204 | { |
2193 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 2205 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); |
2194 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 2206 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); |
2195 | rcu_preempt_init_percpu_data(cpu); | 2207 | rcu_preempt_init_percpu_data(cpu); |
2196 | } | 2208 | } |
2197 | 2209 | ||
2198 | static void __cpuinit rcu_online_kthreads(int cpu) | 2210 | static void __cpuinit rcu_prepare_kthreads(int cpu) |
2199 | { | 2211 | { |
2200 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 2212 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2201 | struct rcu_node *rnp = rdp->mynode; | 2213 | struct rcu_node *rnp = rdp->mynode; |
@@ -2209,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) | |||
2209 | } | 2221 | } |
2210 | 2222 | ||
2211 | /* | 2223 | /* |
2224 | * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, | ||
2225 | * but the RCU threads are woken on demand, and if demand is low this | ||
2226 | * could be a while triggering the hung task watchdog. | ||
2227 | * | ||
2228 | * In order to avoid this, poke all tasks once the CPU is fully | ||
2229 | * up and running. | ||
2230 | */ | ||
2231 | static void __cpuinit rcu_online_kthreads(int cpu) | ||
2232 | { | ||
2233 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||
2234 | struct rcu_node *rnp = rdp->mynode; | ||
2235 | struct task_struct *t; | ||
2236 | |||
2237 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
2238 | if (t) | ||
2239 | wake_up_process(t); | ||
2240 | |||
2241 | t = rnp->node_kthread_task; | ||
2242 | if (t) | ||
2243 | wake_up_process(t); | ||
2244 | |||
2245 | rcu_wake_one_boost_kthread(rnp); | ||
2246 | } | ||
2247 | |||
2248 | /* | ||
2212 | * Handle CPU online/offline notification events. | 2249 | * Handle CPU online/offline notification events. |
2213 | */ | 2250 | */ |
2214 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | 2251 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, |
@@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2221 | switch (action) { | 2258 | switch (action) { |
2222 | case CPU_UP_PREPARE: | 2259 | case CPU_UP_PREPARE: |
2223 | case CPU_UP_PREPARE_FROZEN: | 2260 | case CPU_UP_PREPARE_FROZEN: |
2224 | rcu_online_cpu(cpu); | 2261 | rcu_prepare_cpu(cpu); |
2225 | rcu_online_kthreads(cpu); | 2262 | rcu_prepare_kthreads(cpu); |
2226 | break; | 2263 | break; |
2227 | case CPU_ONLINE: | 2264 | case CPU_ONLINE: |
2265 | rcu_online_kthreads(cpu); | ||
2228 | case CPU_DOWN_FAILED: | 2266 | case CPU_DOWN_FAILED: |
2229 | rcu_node_kthread_setaffinity(rnp, -1); | 2267 | rcu_node_kthread_setaffinity(rnp, -1); |
2230 | rcu_cpu_kthread_setrt(cpu, 1); | 2268 | rcu_cpu_kthread_setrt(cpu, 1); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a767b7dac365..c8bff3099a89 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1295 | if (IS_ERR(t)) | 1295 | if (IS_ERR(t)) |
1296 | return PTR_ERR(t); | 1296 | return PTR_ERR(t); |
1297 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1297 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1298 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1299 | rnp->boost_kthread_task = t; | 1298 | rnp->boost_kthread_task = t; |
1300 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1299 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1301 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1300 | sp.sched_priority = RCU_KTHREAD_PRIO; |
@@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1303 | return 0; | 1302 | return 0; |
1304 | } | 1303 | } |
1305 | 1304 | ||
1305 | static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) | ||
1306 | { | ||
1307 | if (rnp->boost_kthread_task) | ||
1308 | wake_up_process(rnp->boost_kthread_task); | ||
1309 | } | ||
1310 | |||
1306 | #else /* #ifdef CONFIG_RCU_BOOST */ | 1311 | #else /* #ifdef CONFIG_RCU_BOOST */ |
1307 | 1312 | ||
1308 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | 1313 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
@@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1326 | return 0; | 1331 | return 0; |
1327 | } | 1332 | } |
1328 | 1333 | ||
1334 | static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) | ||
1335 | { | ||
1336 | } | ||
1337 | |||
1329 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | 1338 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
1330 | 1339 | ||
1331 | #ifndef CONFIG_SMP | 1340 | #ifndef CONFIG_SMP |