diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-29 11:12:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-29 11:12:20 -0400 |
commit | fefcfd431b5181615e7da628e34c5227c895b85a (patch) | |
tree | 0da42ba9d66c6e9a12b6352b5be41f2fa023c91b /kernel | |
parent | 37c2ca24119f7dd89cbbb43833df1e6c5fb3417b (diff) | |
parent | 89061d3d58e1f0742139605dc6a7950aa1ecc019 (diff) |
Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
futex: Move drop_futex_key_refs out of spinlock'ed region
rcu: Fix TREE_PREEMPT_RCU CPU_HOTPLUG bad-luck hang
rcu: Stopgap fix for synchronize_rcu_expedited() for TREE_PREEMPT_RCU
rcu: Prevent RCU IPI storms in presence of high call_rcu() load
futex: Check for NULL keys in match_futex
futex: Handle spurious wake up
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/futex.c | 24 | ||||
-rw-r--r-- | kernel/rcutree.c | 44 | ||||
-rw-r--r-- | kernel/rcutree.h | 10 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 46 |
4 files changed, 102 insertions, 22 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 4949d336d88d..642f3bbaacc7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key) | |||
150 | */ | 150 | */ |
151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) | 151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) |
152 | { | 152 | { |
153 | return (key1->both.word == key2->both.word | 153 | return (key1 && key2 |
154 | && key1->both.word == key2->both.word | ||
154 | && key1->both.ptr == key2->both.ptr | 155 | && key1->both.ptr == key2->both.ptr |
155 | && key1->both.offset == key2->both.offset); | 156 | && key1->both.offset == key2->both.offset); |
156 | } | 157 | } |
@@ -1028,7 +1029,6 @@ static inline | |||
1028 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | 1029 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
1029 | struct futex_hash_bucket *hb) | 1030 | struct futex_hash_bucket *hb) |
1030 | { | 1031 | { |
1031 | drop_futex_key_refs(&q->key); | ||
1032 | get_futex_key_refs(key); | 1032 | get_futex_key_refs(key); |
1033 | q->key = *key; | 1033 | q->key = *key; |
1034 | 1034 | ||
@@ -1226,6 +1226,7 @@ retry_private: | |||
1226 | */ | 1226 | */ |
1227 | if (ret == 1) { | 1227 | if (ret == 1) { |
1228 | WARN_ON(pi_state); | 1228 | WARN_ON(pi_state); |
1229 | drop_count++; | ||
1229 | task_count++; | 1230 | task_count++; |
1230 | ret = get_futex_value_locked(&curval2, uaddr2); | 1231 | ret = get_futex_value_locked(&curval2, uaddr2); |
1231 | if (!ret) | 1232 | if (!ret) |
@@ -1304,6 +1305,7 @@ retry_private: | |||
1304 | if (ret == 1) { | 1305 | if (ret == 1) { |
1305 | /* We got the lock. */ | 1306 | /* We got the lock. */ |
1306 | requeue_pi_wake_futex(this, &key2, hb2); | 1307 | requeue_pi_wake_futex(this, &key2, hb2); |
1308 | drop_count++; | ||
1307 | continue; | 1309 | continue; |
1308 | } else if (ret) { | 1310 | } else if (ret) { |
1309 | /* -EDEADLK */ | 1311 | /* -EDEADLK */ |
@@ -1791,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1791 | current->timer_slack_ns); | 1793 | current->timer_slack_ns); |
1792 | } | 1794 | } |
1793 | 1795 | ||
1796 | retry: | ||
1794 | /* Prepare to wait on uaddr. */ | 1797 | /* Prepare to wait on uaddr. */ |
1795 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1798 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
1796 | if (ret) | 1799 | if (ret) |
@@ -1808,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1808 | goto out_put_key; | 1811 | goto out_put_key; |
1809 | 1812 | ||
1810 | /* | 1813 | /* |
1811 | * We expect signal_pending(current), but another thread may | 1814 | * We expect signal_pending(current), but we might be the |
1812 | * have handled it for us already. | 1815 | * victim of a spurious wakeup as well. |
1813 | */ | 1816 | */ |
1817 | if (!signal_pending(current)) { | ||
1818 | put_futex_key(fshared, &q.key); | ||
1819 | goto retry; | ||
1820 | } | ||
1821 | |||
1814 | ret = -ERESTARTSYS; | 1822 | ret = -ERESTARTSYS; |
1815 | if (!abs_time) | 1823 | if (!abs_time) |
1816 | goto out_put_key; | 1824 | goto out_put_key; |
@@ -2118,9 +2126,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2118 | */ | 2126 | */ |
2119 | plist_del(&q->list, &q->list.plist); | 2127 | plist_del(&q->list, &q->list.plist); |
2120 | 2128 | ||
2129 | /* Handle spurious wakeups gracefully */ | ||
2130 | ret = -EAGAIN; | ||
2121 | if (timeout && !timeout->task) | 2131 | if (timeout && !timeout->task) |
2122 | ret = -ETIMEDOUT; | 2132 | ret = -ETIMEDOUT; |
2123 | else | 2133 | else if (signal_pending(current)) |
2124 | ret = -ERESTARTNOINTR; | 2134 | ret = -ERESTARTNOINTR; |
2125 | } | 2135 | } |
2126 | return ret; | 2136 | return ret; |
@@ -2198,6 +2208,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2198 | debug_rt_mutex_init_waiter(&rt_waiter); | 2208 | debug_rt_mutex_init_waiter(&rt_waiter); |
2199 | rt_waiter.task = NULL; | 2209 | rt_waiter.task = NULL; |
2200 | 2210 | ||
2211 | retry: | ||
2201 | key2 = FUTEX_KEY_INIT; | 2212 | key2 = FUTEX_KEY_INIT; |
2202 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); | 2213 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); |
2203 | if (unlikely(ret != 0)) | 2214 | if (unlikely(ret != 0)) |
@@ -2292,6 +2303,9 @@ out_put_keys: | |||
2292 | out_key2: | 2303 | out_key2: |
2293 | put_futex_key(fshared, &key2); | 2304 | put_futex_key(fshared, &key2); |
2294 | 2305 | ||
2306 | /* Spurious wakeup ? */ | ||
2307 | if (ret == -EAGAIN) | ||
2308 | goto retry; | ||
2295 | out: | 2309 | out: |
2296 | if (to) { | 2310 | if (to) { |
2297 | hrtimer_cancel(&to->timer); | 2311 | hrtimer_cancel(&to->timer); |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 705f02ac7433..0536125b0497 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -913,7 +913,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
913 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 913 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
914 | break; | 914 | break; |
915 | } | 915 | } |
916 | rcu_preempt_offline_tasks(rsp, rnp, rdp); | 916 | |
917 | /* | ||
918 | * If there was a task blocking the current grace period, | ||
919 | * and if all CPUs have checked in, we need to propagate | ||
920 | * the quiescent state up the rcu_node hierarchy. But that | ||
921 | * is inconvenient at the moment due to deadlock issues if | ||
922 | * this should end the current grace period. So set the | ||
923 | * offlined CPU's bit in ->qsmask in order to force the | ||
924 | * next force_quiescent_state() invocation to clean up this | ||
925 | * mess in a deadlock-free manner. | ||
926 | */ | ||
927 | if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask) | ||
928 | rnp->qsmask |= mask; | ||
929 | |||
917 | mask = rnp->grpmask; | 930 | mask = rnp->grpmask; |
918 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 931 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
919 | rnp = rnp->parent; | 932 | rnp = rnp->parent; |
@@ -958,7 +971,7 @@ static void rcu_offline_cpu(int cpu) | |||
958 | * Invoke any RCU callbacks that have made it to the end of their grace | 971 | * Invoke any RCU callbacks that have made it to the end of their grace |
959 | * period. Thottle as specified by rdp->blimit. | 972 | * period. Thottle as specified by rdp->blimit. |
960 | */ | 973 | */ |
961 | static void rcu_do_batch(struct rcu_data *rdp) | 974 | static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) |
962 | { | 975 | { |
963 | unsigned long flags; | 976 | unsigned long flags; |
964 | struct rcu_head *next, *list, **tail; | 977 | struct rcu_head *next, *list, **tail; |
@@ -1011,6 +1024,13 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
1011 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 1024 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) |
1012 | rdp->blimit = blimit; | 1025 | rdp->blimit = blimit; |
1013 | 1026 | ||
1027 | /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ | ||
1028 | if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { | ||
1029 | rdp->qlen_last_fqs_check = 0; | ||
1030 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1031 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | ||
1032 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1033 | |||
1014 | local_irq_restore(flags); | 1034 | local_irq_restore(flags); |
1015 | 1035 | ||
1016 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1036 | /* Re-raise the RCU softirq if there are callbacks remaining. */ |
@@ -1224,7 +1244,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1224 | } | 1244 | } |
1225 | 1245 | ||
1226 | /* If there are callbacks ready, invoke them. */ | 1246 | /* If there are callbacks ready, invoke them. */ |
1227 | rcu_do_batch(rdp); | 1247 | rcu_do_batch(rsp, rdp); |
1228 | } | 1248 | } |
1229 | 1249 | ||
1230 | /* | 1250 | /* |
@@ -1288,10 +1308,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1288 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | 1308 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ |
1289 | } | 1309 | } |
1290 | 1310 | ||
1291 | /* Force the grace period if too many callbacks or too long waiting. */ | 1311 | /* |
1292 | if (unlikely(++rdp->qlen > qhimark)) { | 1312 | * Force the grace period if too many callbacks or too long waiting. |
1313 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
1314 | * if some other CPU has recently done so. Also, don't bother | ||
1315 | * invoking force_quiescent_state() if the newly enqueued callback | ||
1316 | * is the only one waiting for a grace period to complete. | ||
1317 | */ | ||
1318 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
1293 | rdp->blimit = LONG_MAX; | 1319 | rdp->blimit = LONG_MAX; |
1294 | force_quiescent_state(rsp, 0); | 1320 | if (rsp->n_force_qs == rdp->n_force_qs_snap && |
1321 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1322 | force_quiescent_state(rsp, 0); | ||
1323 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1324 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1295 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) | 1325 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) |
1296 | force_quiescent_state(rsp, 1); | 1326 | force_quiescent_state(rsp, 1); |
1297 | local_irq_restore(flags); | 1327 | local_irq_restore(flags); |
@@ -1523,6 +1553,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
1523 | rdp->beenonline = 1; /* We have now been online. */ | 1553 | rdp->beenonline = 1; /* We have now been online. */ |
1524 | rdp->preemptable = preemptable; | 1554 | rdp->preemptable = preemptable; |
1525 | rdp->passed_quiesc_completed = lastcomp - 1; | 1555 | rdp->passed_quiesc_completed = lastcomp - 1; |
1556 | rdp->qlen_last_fqs_check = 0; | ||
1557 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1526 | rdp->blimit = blimit; | 1558 | rdp->blimit = blimit; |
1527 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1559 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1528 | 1560 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b40ac5706040..1823c6e20609 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -167,6 +167,10 @@ struct rcu_data { | |||
167 | struct rcu_head *nxtlist; | 167 | struct rcu_head *nxtlist; |
168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; | 168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; |
169 | long qlen; /* # of queued callbacks */ | 169 | long qlen; /* # of queued callbacks */ |
170 | long qlen_last_fqs_check; | ||
171 | /* qlen at last check for QS forcing */ | ||
172 | unsigned long n_force_qs_snap; | ||
173 | /* did other CPU force QS recently? */ | ||
170 | long blimit; /* Upper limit on a processed batch */ | 174 | long blimit; /* Upper limit on a processed batch */ |
171 | 175 | ||
172 | #ifdef CONFIG_NO_HZ | 176 | #ifdef CONFIG_NO_HZ |
@@ -302,9 +306,9 @@ static void rcu_print_task_stall(struct rcu_node *rnp); | |||
302 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 306 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
303 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 307 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
304 | #ifdef CONFIG_HOTPLUG_CPU | 308 | #ifdef CONFIG_HOTPLUG_CPU |
305 | static void rcu_preempt_offline_tasks(struct rcu_state *rsp, | 309 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
306 | struct rcu_node *rnp, | 310 | struct rcu_node *rnp, |
307 | struct rcu_data *rdp); | 311 | struct rcu_data *rdp); |
308 | static void rcu_preempt_offline_cpu(int cpu); | 312 | static void rcu_preempt_offline_cpu(int cpu); |
309 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 313 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
310 | static void rcu_preempt_check_callbacks(int cpu); | 314 | static void rcu_preempt_check_callbacks(int cpu); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c0cb783aa16a..ef2a58c2b9d5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -304,21 +304,25 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | |||
304 | * parent is to remove the need for rcu_read_unlock_special() to | 304 | * parent is to remove the need for rcu_read_unlock_special() to |
305 | * make more than two attempts to acquire the target rcu_node's lock. | 305 | * make more than two attempts to acquire the target rcu_node's lock. |
306 | * | 306 | * |
307 | * Returns 1 if there was previously a task blocking the current grace | ||
308 | * period on the specified rcu_node structure. | ||
309 | * | ||
307 | * The caller must hold rnp->lock with irqs disabled. | 310 | * The caller must hold rnp->lock with irqs disabled. |
308 | */ | 311 | */ |
309 | static void rcu_preempt_offline_tasks(struct rcu_state *rsp, | 312 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
310 | struct rcu_node *rnp, | 313 | struct rcu_node *rnp, |
311 | struct rcu_data *rdp) | 314 | struct rcu_data *rdp) |
312 | { | 315 | { |
313 | int i; | 316 | int i; |
314 | struct list_head *lp; | 317 | struct list_head *lp; |
315 | struct list_head *lp_root; | 318 | struct list_head *lp_root; |
319 | int retval = rcu_preempted_readers(rnp); | ||
316 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 320 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
317 | struct task_struct *tp; | 321 | struct task_struct *tp; |
318 | 322 | ||
319 | if (rnp == rnp_root) { | 323 | if (rnp == rnp_root) { |
320 | WARN_ONCE(1, "Last CPU thought to be offlined?"); | 324 | WARN_ONCE(1, "Last CPU thought to be offlined?"); |
321 | return; /* Shouldn't happen: at least one CPU online. */ | 325 | return 0; /* Shouldn't happen: at least one CPU online. */ |
322 | } | 326 | } |
323 | WARN_ON_ONCE(rnp != rdp->mynode && | 327 | WARN_ON_ONCE(rnp != rdp->mynode && |
324 | (!list_empty(&rnp->blocked_tasks[0]) || | 328 | (!list_empty(&rnp->blocked_tasks[0]) || |
@@ -342,6 +346,8 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
342 | spin_unlock(&rnp_root->lock); /* irqs remain disabled */ | 346 | spin_unlock(&rnp_root->lock); /* irqs remain disabled */ |
343 | } | 347 | } |
344 | } | 348 | } |
349 | |||
350 | return retval; | ||
345 | } | 351 | } |
346 | 352 | ||
347 | /* | 353 | /* |
@@ -393,6 +399,17 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
393 | EXPORT_SYMBOL_GPL(call_rcu); | 399 | EXPORT_SYMBOL_GPL(call_rcu); |
394 | 400 | ||
395 | /* | 401 | /* |
402 | * Wait for an rcu-preempt grace period. We are supposed to expedite the | ||
403 | * grace period, but this is the crude slow compatability hack, so just | ||
404 | * invoke synchronize_rcu(). | ||
405 | */ | ||
406 | void synchronize_rcu_expedited(void) | ||
407 | { | ||
408 | synchronize_rcu(); | ||
409 | } | ||
410 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||
411 | |||
412 | /* | ||
396 | * Check to see if there is any immediate preemptable-RCU-related work | 413 | * Check to see if there is any immediate preemptable-RCU-related work |
397 | * to be done. | 414 | * to be done. |
398 | */ | 415 | */ |
@@ -521,12 +538,15 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | |||
521 | 538 | ||
522 | /* | 539 | /* |
523 | * Because preemptable RCU does not exist, it never needs to migrate | 540 | * Because preemptable RCU does not exist, it never needs to migrate |
524 | * tasks that were blocked within RCU read-side critical sections. | 541 | * tasks that were blocked within RCU read-side critical sections, and |
542 | * such non-existent tasks cannot possibly have been blocking the current | ||
543 | * grace period. | ||
525 | */ | 544 | */ |
526 | static void rcu_preempt_offline_tasks(struct rcu_state *rsp, | 545 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
527 | struct rcu_node *rnp, | 546 | struct rcu_node *rnp, |
528 | struct rcu_data *rdp) | 547 | struct rcu_data *rdp) |
529 | { | 548 | { |
549 | return 0; | ||
530 | } | 550 | } |
531 | 551 | ||
532 | /* | 552 | /* |
@@ -565,6 +585,16 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
565 | EXPORT_SYMBOL_GPL(call_rcu); | 585 | EXPORT_SYMBOL_GPL(call_rcu); |
566 | 586 | ||
567 | /* | 587 | /* |
588 | * Wait for an rcu-preempt grace period, but make it happen quickly. | ||
589 | * But because preemptable RCU does not exist, map to rcu-sched. | ||
590 | */ | ||
591 | void synchronize_rcu_expedited(void) | ||
592 | { | ||
593 | synchronize_sched_expedited(); | ||
594 | } | ||
595 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||
596 | |||
597 | /* | ||
568 | * Because preemptable RCU does not exist, it never has any work to do. | 598 | * Because preemptable RCU does not exist, it never has any work to do. |
569 | */ | 599 | */ |
570 | static int rcu_preempt_pending(int cpu) | 600 | static int rcu_preempt_pending(int cpu) |