diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-20 18:56:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-20 18:56:25 -0400 |
commit | cf6ace16a3cd8b728fb0afa68368fd40bbeae19f (patch) | |
tree | 489c64380668e8c5a29d3f36f37554e4b081a647 | |
parent | acc11eab70591744369722280c9ce162a6193494 (diff) | |
parent | d1e9ae47a0285d3f1699e8219ce50f656243b93f (diff) |
Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
signal: align __lock_task_sighand() irq disabling and RCU
softirq,rcu: Inform RCU of irq_exit() activity
sched: Add irq_{enter,exit}() to scheduler_ipi()
rcu: protect __rcu_read_unlock() against scheduler-using irq handlers
rcu: Streamline code produced by __rcu_read_unlock()
rcu: Fix RCU_BOOST race handling current->rcu_read_unlock_special
rcu: decrease rcu_report_exp_rnp coupling with scheduler
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 53 | ||||
-rw-r--r-- | kernel/sched.c | 44 | ||||
-rw-r--r-- | kernel/signal.c | 19 | ||||
-rw-r--r-- | kernel/softirq.c | 12 |
5 files changed, 103 insertions, 28 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index bde99d5358dc..14a6c7b545de 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1260,6 +1260,9 @@ struct task_struct { | |||
1260 | #ifdef CONFIG_PREEMPT_RCU | 1260 | #ifdef CONFIG_PREEMPT_RCU |
1261 | int rcu_read_lock_nesting; | 1261 | int rcu_read_lock_nesting; |
1262 | char rcu_read_unlock_special; | 1262 | char rcu_read_unlock_special; |
1263 | #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) | ||
1264 | int rcu_boosted; | ||
1265 | #endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
1263 | struct list_head rcu_node_entry; | 1266 | struct list_head rcu_node_entry; |
1264 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | 1267 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
1265 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1268 | #ifdef CONFIG_TREE_PREEMPT_RCU |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 75113cb7c4fb..8aafbb80b8b0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -68,6 +68,7 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | |||
68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 69 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
70 | 70 | ||
71 | static void rcu_read_unlock_special(struct task_struct *t); | ||
71 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 72 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
72 | 73 | ||
73 | /* | 74 | /* |
@@ -147,7 +148,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
147 | struct rcu_data *rdp; | 148 | struct rcu_data *rdp; |
148 | struct rcu_node *rnp; | 149 | struct rcu_node *rnp; |
149 | 150 | ||
150 | if (t->rcu_read_lock_nesting && | 151 | if (t->rcu_read_lock_nesting > 0 && |
151 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 152 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
152 | 153 | ||
153 | /* Possibly blocking in an RCU read-side critical section. */ | 154 | /* Possibly blocking in an RCU read-side critical section. */ |
@@ -190,6 +191,14 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
190 | rnp->gp_tasks = &t->rcu_node_entry; | 191 | rnp->gp_tasks = &t->rcu_node_entry; |
191 | } | 192 | } |
192 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 193 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
194 | } else if (t->rcu_read_lock_nesting < 0 && | ||
195 | t->rcu_read_unlock_special) { | ||
196 | |||
197 | /* | ||
198 | * Complete exit from RCU read-side critical section on | ||
199 | * behalf of preempted instance of __rcu_read_unlock(). | ||
200 | */ | ||
201 | rcu_read_unlock_special(t); | ||
193 | } | 202 | } |
194 | 203 | ||
195 | /* | 204 | /* |
@@ -284,7 +293,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, | |||
284 | * notify RCU core processing or task having blocked during the RCU | 293 | * notify RCU core processing or task having blocked during the RCU |
285 | * read-side critical section. | 294 | * read-side critical section. |
286 | */ | 295 | */ |
287 | static void rcu_read_unlock_special(struct task_struct *t) | 296 | static noinline void rcu_read_unlock_special(struct task_struct *t) |
288 | { | 297 | { |
289 | int empty; | 298 | int empty; |
290 | int empty_exp; | 299 | int empty_exp; |
@@ -309,7 +318,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
309 | } | 318 | } |
310 | 319 | ||
311 | /* Hardware IRQ handlers cannot block. */ | 320 | /* Hardware IRQ handlers cannot block. */ |
312 | if (in_irq()) { | 321 | if (in_irq() || in_serving_softirq()) { |
313 | local_irq_restore(flags); | 322 | local_irq_restore(flags); |
314 | return; | 323 | return; |
315 | } | 324 | } |
@@ -342,6 +351,11 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
342 | #ifdef CONFIG_RCU_BOOST | 351 | #ifdef CONFIG_RCU_BOOST |
343 | if (&t->rcu_node_entry == rnp->boost_tasks) | 352 | if (&t->rcu_node_entry == rnp->boost_tasks) |
344 | rnp->boost_tasks = np; | 353 | rnp->boost_tasks = np; |
354 | /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | ||
355 | if (t->rcu_boosted) { | ||
356 | special |= RCU_READ_UNLOCK_BOOSTED; | ||
357 | t->rcu_boosted = 0; | ||
358 | } | ||
345 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 359 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
346 | t->rcu_blocked_node = NULL; | 360 | t->rcu_blocked_node = NULL; |
347 | 361 | ||
@@ -358,7 +372,6 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
358 | #ifdef CONFIG_RCU_BOOST | 372 | #ifdef CONFIG_RCU_BOOST |
359 | /* Unboost if we were boosted. */ | 373 | /* Unboost if we were boosted. */ |
360 | if (special & RCU_READ_UNLOCK_BOOSTED) { | 374 | if (special & RCU_READ_UNLOCK_BOOSTED) { |
361 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
362 | rt_mutex_unlock(t->rcu_boost_mutex); | 375 | rt_mutex_unlock(t->rcu_boost_mutex); |
363 | t->rcu_boost_mutex = NULL; | 376 | t->rcu_boost_mutex = NULL; |
364 | } | 377 | } |
@@ -387,13 +400,22 @@ void __rcu_read_unlock(void) | |||
387 | struct task_struct *t = current; | 400 | struct task_struct *t = current; |
388 | 401 | ||
389 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | 402 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
390 | --t->rcu_read_lock_nesting; | 403 | if (t->rcu_read_lock_nesting != 1) |
391 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | 404 | --t->rcu_read_lock_nesting; |
392 | if (t->rcu_read_lock_nesting == 0 && | 405 | else { |
393 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 406 | t->rcu_read_lock_nesting = INT_MIN; |
394 | rcu_read_unlock_special(t); | 407 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
408 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
409 | rcu_read_unlock_special(t); | ||
410 | barrier(); /* ->rcu_read_unlock_special load before assign */ | ||
411 | t->rcu_read_lock_nesting = 0; | ||
412 | } | ||
395 | #ifdef CONFIG_PROVE_LOCKING | 413 | #ifdef CONFIG_PROVE_LOCKING |
396 | WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); | 414 | { |
415 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
416 | |||
417 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | ||
418 | } | ||
397 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | 419 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ |
398 | } | 420 | } |
399 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 421 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); |
@@ -589,7 +611,8 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
589 | rcu_preempt_qs(cpu); | 611 | rcu_preempt_qs(cpu); |
590 | return; | 612 | return; |
591 | } | 613 | } |
592 | if (per_cpu(rcu_preempt_data, cpu).qs_pending) | 614 | if (t->rcu_read_lock_nesting > 0 && |
615 | per_cpu(rcu_preempt_data, cpu).qs_pending) | ||
593 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | 616 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; |
594 | } | 617 | } |
595 | 618 | ||
@@ -695,9 +718,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
695 | 718 | ||
696 | raw_spin_lock_irqsave(&rnp->lock, flags); | 719 | raw_spin_lock_irqsave(&rnp->lock, flags); |
697 | for (;;) { | 720 | for (;;) { |
698 | if (!sync_rcu_preempt_exp_done(rnp)) | 721 | if (!sync_rcu_preempt_exp_done(rnp)) { |
722 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
699 | break; | 723 | break; |
724 | } | ||
700 | if (rnp->parent == NULL) { | 725 | if (rnp->parent == NULL) { |
726 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
701 | wake_up(&sync_rcu_preempt_exp_wq); | 727 | wake_up(&sync_rcu_preempt_exp_wq); |
702 | break; | 728 | break; |
703 | } | 729 | } |
@@ -707,7 +733,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
707 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 733 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
708 | rnp->expmask &= ~mask; | 734 | rnp->expmask &= ~mask; |
709 | } | 735 | } |
710 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
711 | } | 736 | } |
712 | 737 | ||
713 | /* | 738 | /* |
@@ -1174,7 +1199,7 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1174 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1199 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1175 | rt_mutex_init_proxy_locked(&mtx, t); | 1200 | rt_mutex_init_proxy_locked(&mtx, t); |
1176 | t->rcu_boost_mutex = &mtx; | 1201 | t->rcu_boost_mutex = &mtx; |
1177 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | 1202 | t->rcu_boosted = 1; |
1178 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1179 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1204 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1180 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1205 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 14168c49a154..fde6ff903525 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
2544 | } | 2544 | } |
2545 | 2545 | ||
2546 | #ifdef CONFIG_SMP | 2546 | #ifdef CONFIG_SMP |
2547 | static void sched_ttwu_pending(void) | 2547 | static void sched_ttwu_do_pending(struct task_struct *list) |
2548 | { | 2548 | { |
2549 | struct rq *rq = this_rq(); | 2549 | struct rq *rq = this_rq(); |
2550 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2551 | |||
2552 | if (!list) | ||
2553 | return; | ||
2554 | 2550 | ||
2555 | raw_spin_lock(&rq->lock); | 2551 | raw_spin_lock(&rq->lock); |
2556 | 2552 | ||
@@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void) | |||
2563 | raw_spin_unlock(&rq->lock); | 2559 | raw_spin_unlock(&rq->lock); |
2564 | } | 2560 | } |
2565 | 2561 | ||
2562 | #ifdef CONFIG_HOTPLUG_CPU | ||
2563 | |||
2564 | static void sched_ttwu_pending(void) | ||
2565 | { | ||
2566 | struct rq *rq = this_rq(); | ||
2567 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2568 | |||
2569 | if (!list) | ||
2570 | return; | ||
2571 | |||
2572 | sched_ttwu_do_pending(list); | ||
2573 | } | ||
2574 | |||
2575 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
2576 | |||
2566 | void scheduler_ipi(void) | 2577 | void scheduler_ipi(void) |
2567 | { | 2578 | { |
2568 | sched_ttwu_pending(); | 2579 | struct rq *rq = this_rq(); |
2580 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2581 | |||
2582 | if (!list) | ||
2583 | return; | ||
2584 | |||
2585 | /* | ||
2586 | * Not all reschedule IPI handlers call irq_enter/irq_exit, since | ||
2587 | * traditionally all their work was done from the interrupt return | ||
2588 | * path. Now that we actually do some work, we need to make sure | ||
2589 | * we do call them. | ||
2590 | * | ||
2591 | * Some archs already do call them, luckily irq_enter/exit nest | ||
2592 | * properly. | ||
2593 | * | ||
2594 | * Arguably we should visit all archs and update all handlers, | ||
2595 | * however a fair share of IPIs are still resched only so this would | ||
2596 | * somewhat pessimize the simple resched case. | ||
2597 | */ | ||
2598 | irq_enter(); | ||
2599 | sched_ttwu_do_pending(list); | ||
2600 | irq_exit(); | ||
2569 | } | 2601 | } |
2570 | 2602 | ||
2571 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 2603 | static void ttwu_queue_remote(struct task_struct *p, int cpu) |
diff --git a/kernel/signal.c b/kernel/signal.c index ff7678603328..415d85d6f6c6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1178,18 +1178,25 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |||
1178 | { | 1178 | { |
1179 | struct sighand_struct *sighand; | 1179 | struct sighand_struct *sighand; |
1180 | 1180 | ||
1181 | rcu_read_lock(); | ||
1182 | for (;;) { | 1181 | for (;;) { |
1182 | local_irq_save(*flags); | ||
1183 | rcu_read_lock(); | ||
1183 | sighand = rcu_dereference(tsk->sighand); | 1184 | sighand = rcu_dereference(tsk->sighand); |
1184 | if (unlikely(sighand == NULL)) | 1185 | if (unlikely(sighand == NULL)) { |
1186 | rcu_read_unlock(); | ||
1187 | local_irq_restore(*flags); | ||
1185 | break; | 1188 | break; |
1189 | } | ||
1186 | 1190 | ||
1187 | spin_lock_irqsave(&sighand->siglock, *flags); | 1191 | spin_lock(&sighand->siglock); |
1188 | if (likely(sighand == tsk->sighand)) | 1192 | if (likely(sighand == tsk->sighand)) { |
1193 | rcu_read_unlock(); | ||
1189 | break; | 1194 | break; |
1190 | spin_unlock_irqrestore(&sighand->siglock, *flags); | 1195 | } |
1196 | spin_unlock(&sighand->siglock); | ||
1197 | rcu_read_unlock(); | ||
1198 | local_irq_restore(*flags); | ||
1191 | } | 1199 | } |
1192 | rcu_read_unlock(); | ||
1193 | 1200 | ||
1194 | return sighand; | 1201 | return sighand; |
1195 | } | 1202 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 40cf63ddd4b3..fca82c32042b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -315,16 +315,24 @@ static inline void invoke_softirq(void) | |||
315 | { | 315 | { |
316 | if (!force_irqthreads) | 316 | if (!force_irqthreads) |
317 | __do_softirq(); | 317 | __do_softirq(); |
318 | else | 318 | else { |
319 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
320 | SOFTIRQ_OFFSET); | ||
319 | wakeup_softirqd(); | 321 | wakeup_softirqd(); |
322 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
323 | } | ||
320 | } | 324 | } |
321 | #else | 325 | #else |
322 | static inline void invoke_softirq(void) | 326 | static inline void invoke_softirq(void) |
323 | { | 327 | { |
324 | if (!force_irqthreads) | 328 | if (!force_irqthreads) |
325 | do_softirq(); | 329 | do_softirq(); |
326 | else | 330 | else { |
331 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
332 | SOFTIRQ_OFFSET); | ||
327 | wakeup_softirqd(); | 333 | wakeup_softirqd(); |
334 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
335 | } | ||
328 | } | 336 | } |
329 | #endif | 337 | #endif |
330 | 338 | ||