diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-28 15:56:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-28 15:56:32 -0400 |
commit | 1ba4b8cb94e59b17fd0142a509eb583695c36db6 (patch) | |
tree | e42d1967025670401758d32964a5fa048f59f10a /kernel | |
parent | c4a227d89f758e582fd167bb15245f2704de99ef (diff) | |
parent | cc3ce5176d83cd8ae1134f86e208ea758d6cb78e (diff) |
Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
rcu: Start RCU kthreads in TASK_INTERRUPTIBLE state
rcu: Remove waitqueue usage for cpu, node, and boost kthreads
rcu: Avoid acquiring rcu_node locks in timer functions
atomic: Add atomic_or()
Documentation: Add statistics about nested locks
rcu: Decrease memory-barrier usage based on semi-formal proof
rcu: Make rcu_enter_nohz() pay attention to nesting
rcu: Don't do reschedule unless in irq
rcu: Remove old memory barriers from rcu_process_callbacks()
rcu: Add memory barriers
rcu: Fix unpaired rcu_irq_enter() from locking selftests
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 164 | ||||
-rw-r--r-- | kernel/rcutree.h | 30 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 24 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 12 |
4 files changed, 94 insertions, 136 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f07d2f03181a..77a7671dd147 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -36,7 +36,7 @@ | |||
36 | #include <linux/interrupt.h> | 36 | #include <linux/interrupt.h> |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/nmi.h> | 38 | #include <linux/nmi.h> |
39 | #include <asm/atomic.h> | 39 | #include <linux/atomic.h> |
40 | #include <linux/bitops.h> | 40 | #include <linux/bitops.h> |
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |||
95 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 95 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
96 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | 96 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); |
97 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 97 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
98 | static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); | ||
99 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | 98 | DEFINE_PER_CPU(char, rcu_cpu_has_work); |
100 | static char rcu_kthreads_spawnable; | 99 | static char rcu_kthreads_spawnable; |
101 | 100 | ||
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); | |||
163 | #ifdef CONFIG_NO_HZ | 162 | #ifdef CONFIG_NO_HZ |
164 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 163 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
165 | .dynticks_nesting = 1, | 164 | .dynticks_nesting = 1, |
166 | .dynticks = 1, | 165 | .dynticks = ATOMIC_INIT(1), |
167 | }; | 166 | }; |
168 | #endif /* #ifdef CONFIG_NO_HZ */ | 167 | #endif /* #ifdef CONFIG_NO_HZ */ |
169 | 168 | ||
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void) | |||
322 | unsigned long flags; | 321 | unsigned long flags; |
323 | struct rcu_dynticks *rdtp; | 322 | struct rcu_dynticks *rdtp; |
324 | 323 | ||
325 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
326 | local_irq_save(flags); | 324 | local_irq_save(flags); |
327 | rdtp = &__get_cpu_var(rcu_dynticks); | 325 | rdtp = &__get_cpu_var(rcu_dynticks); |
328 | rdtp->dynticks++; | 326 | if (--rdtp->dynticks_nesting) { |
329 | rdtp->dynticks_nesting--; | 327 | local_irq_restore(flags); |
330 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | 328 | return; |
329 | } | ||
330 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
331 | smp_mb__before_atomic_inc(); /* See above. */ | ||
332 | atomic_inc(&rdtp->dynticks); | ||
333 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
334 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
331 | local_irq_restore(flags); | 335 | local_irq_restore(flags); |
336 | |||
337 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
338 | if (in_irq() && | ||
339 | (__get_cpu_var(rcu_sched_data).nxtlist || | ||
340 | __get_cpu_var(rcu_bh_data).nxtlist || | ||
341 | rcu_preempt_needs_cpu(smp_processor_id()))) | ||
342 | set_need_resched(); | ||
332 | } | 343 | } |
333 | 344 | ||
334 | /* | 345 | /* |
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void) | |||
344 | 355 | ||
345 | local_irq_save(flags); | 356 | local_irq_save(flags); |
346 | rdtp = &__get_cpu_var(rcu_dynticks); | 357 | rdtp = &__get_cpu_var(rcu_dynticks); |
347 | rdtp->dynticks++; | 358 | if (rdtp->dynticks_nesting++) { |
348 | rdtp->dynticks_nesting++; | 359 | local_irq_restore(flags); |
349 | WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); | 360 | return; |
361 | } | ||
362 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | ||
363 | atomic_inc(&rdtp->dynticks); | ||
364 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||
365 | smp_mb__after_atomic_inc(); /* See above. */ | ||
366 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
350 | local_irq_restore(flags); | 367 | local_irq_restore(flags); |
351 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
352 | } | 368 | } |
353 | 369 | ||
354 | /** | 370 | /** |
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void) | |||
362 | { | 378 | { |
363 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 379 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
364 | 380 | ||
365 | if (rdtp->dynticks & 0x1) | 381 | if (rdtp->dynticks_nmi_nesting == 0 && |
382 | (atomic_read(&rdtp->dynticks) & 0x1)) | ||
366 | return; | 383 | return; |
367 | rdtp->dynticks_nmi++; | 384 | rdtp->dynticks_nmi_nesting++; |
368 | WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); | 385 | smp_mb__before_atomic_inc(); /* Force delay from prior write. */ |
369 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | 386 | atomic_inc(&rdtp->dynticks); |
387 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||
388 | smp_mb__after_atomic_inc(); /* See above. */ | ||
389 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
370 | } | 390 | } |
371 | 391 | ||
372 | /** | 392 | /** |
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void) | |||
380 | { | 400 | { |
381 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 401 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
382 | 402 | ||
383 | if (rdtp->dynticks & 0x1) | 403 | if (rdtp->dynticks_nmi_nesting == 0 || |
404 | --rdtp->dynticks_nmi_nesting != 0) | ||
384 | return; | 405 | return; |
385 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | 406 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
386 | rdtp->dynticks_nmi++; | 407 | smp_mb__before_atomic_inc(); /* See above. */ |
387 | WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); | 408 | atomic_inc(&rdtp->dynticks); |
409 | smp_mb__after_atomic_inc(); /* Force delay to next write. */ | ||
410 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
388 | } | 411 | } |
389 | 412 | ||
390 | /** | 413 | /** |
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void) | |||
395 | */ | 418 | */ |
396 | void rcu_irq_enter(void) | 419 | void rcu_irq_enter(void) |
397 | { | 420 | { |
398 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 421 | rcu_exit_nohz(); |
399 | |||
400 | if (rdtp->dynticks_nesting++) | ||
401 | return; | ||
402 | rdtp->dynticks++; | ||
403 | WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); | ||
404 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
405 | } | 422 | } |
406 | 423 | ||
407 | /** | 424 | /** |
@@ -413,18 +430,7 @@ void rcu_irq_enter(void) | |||
413 | */ | 430 | */ |
414 | void rcu_irq_exit(void) | 431 | void rcu_irq_exit(void) |
415 | { | 432 | { |
416 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 433 | rcu_enter_nohz(); |
417 | |||
418 | if (--rdtp->dynticks_nesting) | ||
419 | return; | ||
420 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
421 | rdtp->dynticks++; | ||
422 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | ||
423 | |||
424 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
425 | if (__this_cpu_read(rcu_sched_data.nxtlist) || | ||
426 | __this_cpu_read(rcu_bh_data.nxtlist)) | ||
427 | set_need_resched(); | ||
428 | } | 434 | } |
429 | 435 | ||
430 | #ifdef CONFIG_SMP | 436 | #ifdef CONFIG_SMP |
@@ -436,19 +442,8 @@ void rcu_irq_exit(void) | |||
436 | */ | 442 | */ |
437 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | 443 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
438 | { | 444 | { |
439 | int ret; | 445 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); |
440 | int snap; | 446 | return 0; |
441 | int snap_nmi; | ||
442 | |||
443 | snap = rdp->dynticks->dynticks; | ||
444 | snap_nmi = rdp->dynticks->dynticks_nmi; | ||
445 | smp_mb(); /* Order sampling of snap with end of grace period. */ | ||
446 | rdp->dynticks_snap = snap; | ||
447 | rdp->dynticks_nmi_snap = snap_nmi; | ||
448 | ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); | ||
449 | if (ret) | ||
450 | rdp->dynticks_fqs++; | ||
451 | return ret; | ||
452 | } | 447 | } |
453 | 448 | ||
454 | /* | 449 | /* |
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
459 | */ | 454 | */ |
460 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 455 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
461 | { | 456 | { |
462 | long curr; | 457 | unsigned long curr; |
463 | long curr_nmi; | 458 | unsigned long snap; |
464 | long snap; | ||
465 | long snap_nmi; | ||
466 | 459 | ||
467 | curr = rdp->dynticks->dynticks; | 460 | curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); |
468 | snap = rdp->dynticks_snap; | 461 | snap = (unsigned long)rdp->dynticks_snap; |
469 | curr_nmi = rdp->dynticks->dynticks_nmi; | ||
470 | snap_nmi = rdp->dynticks_nmi_snap; | ||
471 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
472 | 462 | ||
473 | /* | 463 | /* |
474 | * If the CPU passed through or entered a dynticks idle phase with | 464 | * If the CPU passed through or entered a dynticks idle phase with |
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
478 | * read-side critical section that started before the beginning | 468 | * read-side critical section that started before the beginning |
479 | * of the current RCU grace period. | 469 | * of the current RCU grace period. |
480 | */ | 470 | */ |
481 | if ((curr != snap || (curr & 0x1) == 0) && | 471 | if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { |
482 | (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { | ||
483 | rdp->dynticks_fqs++; | 472 | rdp->dynticks_fqs++; |
484 | return 1; | 473 | return 1; |
485 | } | 474 | } |
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
908 | unsigned long gp_duration; | 897 | unsigned long gp_duration; |
909 | 898 | ||
910 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 899 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
900 | |||
901 | /* | ||
902 | * Ensure that all grace-period and pre-grace-period activity | ||
903 | * is seen before the assignment to rsp->completed. | ||
904 | */ | ||
905 | smp_mb(); /* See above block comment. */ | ||
911 | gp_duration = jiffies - rsp->gp_start; | 906 | gp_duration = jiffies - rsp->gp_start; |
912 | if (gp_duration > rsp->gp_max) | 907 | if (gp_duration > rsp->gp_max) |
913 | rsp->gp_max = gp_duration; | 908 | rsp->gp_max = gp_duration; |
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1455 | */ | 1450 | */ |
1456 | static void rcu_process_callbacks(void) | 1451 | static void rcu_process_callbacks(void) |
1457 | { | 1452 | { |
1458 | /* | ||
1459 | * Memory references from any prior RCU read-side critical sections | ||
1460 | * executed by the interrupted code must be seen before any RCU | ||
1461 | * grace-period manipulations below. | ||
1462 | */ | ||
1463 | smp_mb(); /* See above block comment. */ | ||
1464 | |||
1465 | __rcu_process_callbacks(&rcu_sched_state, | 1453 | __rcu_process_callbacks(&rcu_sched_state, |
1466 | &__get_cpu_var(rcu_sched_data)); | 1454 | &__get_cpu_var(rcu_sched_data)); |
1467 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1455 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1468 | rcu_preempt_process_callbacks(); | 1456 | rcu_preempt_process_callbacks(); |
1469 | 1457 | ||
1470 | /* | ||
1471 | * Memory references from any later RCU read-side critical sections | ||
1472 | * executed by the interrupted code must be seen after any RCU | ||
1473 | * grace-period manipulations above. | ||
1474 | */ | ||
1475 | smp_mb(); /* See above block comment. */ | ||
1476 | |||
1477 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | 1458 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ |
1478 | rcu_needs_cpu_flush(); | 1459 | rcu_needs_cpu_flush(); |
1479 | } | 1460 | } |
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void) | |||
1494 | local_irq_restore(flags); | 1475 | local_irq_restore(flags); |
1495 | return; | 1476 | return; |
1496 | } | 1477 | } |
1497 | wake_up(&__get_cpu_var(rcu_cpu_wq)); | 1478 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); |
1498 | local_irq_restore(flags); | 1479 | local_irq_restore(flags); |
1499 | } | 1480 | } |
1500 | 1481 | ||
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | |||
1544 | */ | 1525 | */ |
1545 | static void rcu_cpu_kthread_timer(unsigned long arg) | 1526 | static void rcu_cpu_kthread_timer(unsigned long arg) |
1546 | { | 1527 | { |
1547 | unsigned long flags; | ||
1548 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); | 1528 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); |
1549 | struct rcu_node *rnp = rdp->mynode; | 1529 | struct rcu_node *rnp = rdp->mynode; |
1550 | 1530 | ||
1551 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1531 | atomic_or(rdp->grpmask, &rnp->wakemask); |
1552 | rnp->wakemask |= rdp->grpmask; | ||
1553 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1554 | invoke_rcu_node_kthread(rnp); | 1532 | invoke_rcu_node_kthread(rnp); |
1555 | } | 1533 | } |
1556 | 1534 | ||
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg) | |||
1617 | unsigned long flags; | 1595 | unsigned long flags; |
1618 | int spincnt = 0; | 1596 | int spincnt = 0; |
1619 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); | 1597 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); |
1620 | wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu); | ||
1621 | char work; | 1598 | char work; |
1622 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | 1599 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); |
1623 | 1600 | ||
1624 | for (;;) { | 1601 | for (;;) { |
1625 | *statusp = RCU_KTHREAD_WAITING; | 1602 | *statusp = RCU_KTHREAD_WAITING; |
1626 | wait_event_interruptible(*wqp, | 1603 | rcu_wait(*workp != 0 || kthread_should_stop()); |
1627 | *workp != 0 || kthread_should_stop()); | ||
1628 | local_bh_disable(); | 1604 | local_bh_disable(); |
1629 | if (rcu_cpu_kthread_should_stop(cpu)) { | 1605 | if (rcu_cpu_kthread_should_stop(cpu)) { |
1630 | local_bh_enable(); | 1606 | local_bh_enable(); |
@@ -1672,10 +1648,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1672 | if (IS_ERR(t)) | 1648 | if (IS_ERR(t)) |
1673 | return PTR_ERR(t); | 1649 | return PTR_ERR(t); |
1674 | kthread_bind(t, cpu); | 1650 | kthread_bind(t, cpu); |
1651 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1675 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | 1652 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; |
1676 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | 1653 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); |
1677 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | 1654 | per_cpu(rcu_cpu_kthread_task, cpu) = t; |
1678 | wake_up_process(t); | ||
1679 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1655 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1680 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1656 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1681 | return 0; | 1657 | return 0; |
@@ -1698,11 +1674,10 @@ static int rcu_node_kthread(void *arg) | |||
1698 | 1674 | ||
1699 | for (;;) { | 1675 | for (;;) { |
1700 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; | 1676 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; |
1701 | wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); | 1677 | rcu_wait(atomic_read(&rnp->wakemask) != 0); |
1702 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; | 1678 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; |
1703 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1679 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1704 | mask = rnp->wakemask; | 1680 | mask = atomic_xchg(&rnp->wakemask, 0); |
1705 | rnp->wakemask = 0; | ||
1706 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | 1681 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ |
1707 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { | 1682 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { |
1708 | if ((mask & 0x1) == 0) | 1683 | if ((mask & 0x1) == 0) |
@@ -1781,9 +1756,9 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1781 | if (IS_ERR(t)) | 1756 | if (IS_ERR(t)) |
1782 | return PTR_ERR(t); | 1757 | return PTR_ERR(t); |
1783 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1758 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1759 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1784 | rnp->node_kthread_task = t; | 1760 | rnp->node_kthread_task = t; |
1785 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1761 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1786 | wake_up_process(t); | ||
1787 | sp.sched_priority = 99; | 1762 | sp.sched_priority = 99; |
1788 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1763 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1789 | } | 1764 | } |
@@ -1800,21 +1775,16 @@ static int __init rcu_spawn_kthreads(void) | |||
1800 | 1775 | ||
1801 | rcu_kthreads_spawnable = 1; | 1776 | rcu_kthreads_spawnable = 1; |
1802 | for_each_possible_cpu(cpu) { | 1777 | for_each_possible_cpu(cpu) { |
1803 | init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); | ||
1804 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1778 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
1805 | if (cpu_online(cpu)) | 1779 | if (cpu_online(cpu)) |
1806 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1780 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1807 | } | 1781 | } |
1808 | rnp = rcu_get_root(rcu_state); | 1782 | rnp = rcu_get_root(rcu_state); |
1809 | init_waitqueue_head(&rnp->node_wq); | ||
1810 | rcu_init_boost_waitqueue(rnp); | ||
1811 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1783 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1812 | if (NUM_RCU_NODES > 1) | 1784 | if (NUM_RCU_NODES > 1) { |
1813 | rcu_for_each_leaf_node(rcu_state, rnp) { | 1785 | rcu_for_each_leaf_node(rcu_state, rnp) |
1814 | init_waitqueue_head(&rnp->node_wq); | ||
1815 | rcu_init_boost_waitqueue(rnp); | ||
1816 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1786 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1817 | } | 1787 | } |
1818 | return 0; | 1788 | return 0; |
1819 | } | 1789 | } |
1820 | early_initcall(rcu_spawn_kthreads); | 1790 | early_initcall(rcu_spawn_kthreads); |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 257664815d5d..7b9a08b4aaea 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -84,11 +84,9 @@ | |||
84 | * Dynticks per-CPU state. | 84 | * Dynticks per-CPU state. |
85 | */ | 85 | */ |
86 | struct rcu_dynticks { | 86 | struct rcu_dynticks { |
87 | int dynticks_nesting; /* Track nesting level, sort of. */ | 87 | int dynticks_nesting; /* Track irq/process nesting level. */ |
88 | int dynticks; /* Even value for dynticks-idle, else odd. */ | 88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
89 | int dynticks_nmi; /* Even value for either dynticks-idle or */ | 89 | atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ |
90 | /* not in nmi handler, else odd. So this */ | ||
91 | /* remains even for nmi from irq handler. */ | ||
92 | }; | 90 | }; |
93 | 91 | ||
94 | /* RCU's kthread states for tracing. */ | 92 | /* RCU's kthread states for tracing. */ |
@@ -121,7 +119,9 @@ struct rcu_node { | |||
121 | /* elements that need to drain to allow the */ | 119 | /* elements that need to drain to allow the */ |
122 | /* current expedited grace period to */ | 120 | /* current expedited grace period to */ |
123 | /* complete (only for TREE_PREEMPT_RCU). */ | 121 | /* complete (only for TREE_PREEMPT_RCU). */ |
124 | unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ | 122 | atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ |
123 | /* Since this has meaning only for leaf */ | ||
124 | /* rcu_node structures, 32 bits suffices. */ | ||
125 | unsigned long qsmaskinit; | 125 | unsigned long qsmaskinit; |
126 | /* Per-GP initial value for qsmask & expmask. */ | 126 | /* Per-GP initial value for qsmask & expmask. */ |
127 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 127 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
@@ -159,9 +159,6 @@ struct rcu_node { | |||
159 | struct task_struct *boost_kthread_task; | 159 | struct task_struct *boost_kthread_task; |
160 | /* kthread that takes care of priority */ | 160 | /* kthread that takes care of priority */ |
161 | /* boosting for this rcu_node structure. */ | 161 | /* boosting for this rcu_node structure. */ |
162 | wait_queue_head_t boost_wq; | ||
163 | /* Wait queue on which to park the boost */ | ||
164 | /* kthread. */ | ||
165 | unsigned int boost_kthread_status; | 162 | unsigned int boost_kthread_status; |
166 | /* State of boost_kthread_task for tracing. */ | 163 | /* State of boost_kthread_task for tracing. */ |
167 | unsigned long n_tasks_boosted; | 164 | unsigned long n_tasks_boosted; |
@@ -188,9 +185,6 @@ struct rcu_node { | |||
188 | /* kthread that takes care of this rcu_node */ | 185 | /* kthread that takes care of this rcu_node */ |
189 | /* structure, for example, awakening the */ | 186 | /* structure, for example, awakening the */ |
190 | /* per-CPU kthreads as needed. */ | 187 | /* per-CPU kthreads as needed. */ |
191 | wait_queue_head_t node_wq; | ||
192 | /* Wait queue on which to park the per-node */ | ||
193 | /* kthread. */ | ||
194 | unsigned int node_kthread_status; | 188 | unsigned int node_kthread_status; |
195 | /* State of node_kthread_task for tracing. */ | 189 | /* State of node_kthread_task for tracing. */ |
196 | } ____cacheline_internodealigned_in_smp; | 190 | } ____cacheline_internodealigned_in_smp; |
@@ -284,7 +278,6 @@ struct rcu_data { | |||
284 | /* 3) dynticks interface. */ | 278 | /* 3) dynticks interface. */ |
285 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ | 279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ |
286 | int dynticks_snap; /* Per-GP tracking for dynticks. */ | 280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ |
287 | int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ | ||
288 | #endif /* #ifdef CONFIG_NO_HZ */ | 281 | #endif /* #ifdef CONFIG_NO_HZ */ |
289 | 282 | ||
290 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ | 283 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ |
@@ -337,6 +330,16 @@ struct rcu_data { | |||
337 | /* scheduling clock irq */ | 330 | /* scheduling clock irq */ |
338 | /* before ratting on them. */ | 331 | /* before ratting on them. */ |
339 | 332 | ||
333 | #define rcu_wait(cond) \ | ||
334 | do { \ | ||
335 | for (;;) { \ | ||
336 | set_current_state(TASK_INTERRUPTIBLE); \ | ||
337 | if (cond) \ | ||
338 | break; \ | ||
339 | schedule(); \ | ||
340 | } \ | ||
341 | __set_current_state(TASK_RUNNING); \ | ||
342 | } while (0) | ||
340 | 343 | ||
341 | /* | 344 | /* |
342 | * RCU global state, including node hierarchy. This hierarchy is | 345 | * RCU global state, including node hierarchy. This hierarchy is |
@@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | |||
446 | static void rcu_preempt_send_cbs_to_online(void); | 449 | static void rcu_preempt_send_cbs_to_online(void); |
447 | static void __init __rcu_init_preempt(void); | 450 | static void __init __rcu_init_preempt(void); |
448 | static void rcu_needs_cpu_flush(void); | 451 | static void rcu_needs_cpu_flush(void); |
449 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp); | ||
450 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 452 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
451 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 453 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
452 | cpumask_var_t cm); | 454 | cpumask_var_t cm); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3f6559a5f5cd..a767b7dac365 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg) | |||
1196 | 1196 | ||
1197 | for (;;) { | 1197 | for (;;) { |
1198 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | 1198 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; |
1199 | wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || | 1199 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); |
1200 | rnp->exp_tasks); | ||
1201 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | 1200 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; |
1202 | more2boost = rcu_boost(rnp); | 1201 | more2boost = rcu_boost(rnp); |
1203 | if (more2boost) | 1202 | if (more2boost) |
@@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
1275 | } | 1274 | } |
1276 | 1275 | ||
1277 | /* | 1276 | /* |
1278 | * Initialize the RCU-boost waitqueue. | ||
1279 | */ | ||
1280 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) | ||
1281 | { | ||
1282 | init_waitqueue_head(&rnp->boost_wq); | ||
1283 | } | ||
1284 | |||
1285 | /* | ||
1286 | * Create an RCU-boost kthread for the specified node if one does not | 1277 | * Create an RCU-boost kthread for the specified node if one does not |
1287 | * already exist. We only create this kthread for preemptible RCU. | 1278 | * already exist. We only create this kthread for preemptible RCU. |
1288 | * Returns zero if all is well, a negated errno otherwise. | 1279 | * Returns zero if all is well, a negated errno otherwise. |
@@ -1304,9 +1295,9 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1304 | if (IS_ERR(t)) | 1295 | if (IS_ERR(t)) |
1305 | return PTR_ERR(t); | 1296 | return PTR_ERR(t); |
1306 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1297 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1298 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1307 | rnp->boost_kthread_task = t; | 1299 | rnp->boost_kthread_task = t; |
1308 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1300 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1309 | wake_up_process(t); | ||
1310 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1301 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1311 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1302 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1312 | return 0; | 1303 | return 0; |
@@ -1328,10 +1319,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
1328 | { | 1319 | { |
1329 | } | 1320 | } |
1330 | 1321 | ||
1331 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) | ||
1332 | { | ||
1333 | } | ||
1334 | |||
1335 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1322 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
1336 | struct rcu_node *rnp, | 1323 | struct rcu_node *rnp, |
1337 | int rnp_index) | 1324 | int rnp_index) |
@@ -1520,7 +1507,6 @@ int rcu_needs_cpu(int cpu) | |||
1520 | { | 1507 | { |
1521 | int c = 0; | 1508 | int c = 0; |
1522 | int snap; | 1509 | int snap; |
1523 | int snap_nmi; | ||
1524 | int thatcpu; | 1510 | int thatcpu; |
1525 | 1511 | ||
1526 | /* Check for being in the holdoff period. */ | 1512 | /* Check for being in the holdoff period. */ |
@@ -1531,10 +1517,10 @@ int rcu_needs_cpu(int cpu) | |||
1531 | for_each_online_cpu(thatcpu) { | 1517 | for_each_online_cpu(thatcpu) { |
1532 | if (thatcpu == cpu) | 1518 | if (thatcpu == cpu) |
1533 | continue; | 1519 | continue; |
1534 | snap = per_cpu(rcu_dynticks, thatcpu).dynticks; | 1520 | snap = atomic_add_return(0, &per_cpu(rcu_dynticks, |
1535 | snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; | 1521 | thatcpu).dynticks); |
1536 | smp_mb(); /* Order sampling of snap with end of grace period. */ | 1522 | smp_mb(); /* Order sampling of snap with end of grace period. */ |
1537 | if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { | 1523 | if ((snap & 0x1) != 0) { |
1538 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 1524 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
1539 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 1525 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
1540 | return rcu_needs_cpu_quick_check(cpu); | 1526 | return rcu_needs_cpu_quick_check(cpu); |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index aa0fd72b4bc7..9678cc3650f5 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
69 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 69 | rdp->passed_quiesc, rdp->passed_quiesc_completed, |
70 | rdp->qs_pending); | 70 | rdp->qs_pending); |
71 | #ifdef CONFIG_NO_HZ | 71 | #ifdef CONFIG_NO_HZ |
72 | seq_printf(m, " dt=%d/%d dn=%d df=%lu", | 72 | seq_printf(m, " dt=%d/%d/%d df=%lu", |
73 | rdp->dynticks->dynticks, | 73 | atomic_read(&rdp->dynticks->dynticks), |
74 | rdp->dynticks->dynticks_nesting, | 74 | rdp->dynticks->dynticks_nesting, |
75 | rdp->dynticks->dynticks_nmi, | 75 | rdp->dynticks->dynticks_nmi_nesting, |
76 | rdp->dynticks_fqs); | 76 | rdp->dynticks_fqs); |
77 | #endif /* #ifdef CONFIG_NO_HZ */ | 77 | #endif /* #ifdef CONFIG_NO_HZ */ |
78 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 78 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
141 | rdp->qs_pending); | 141 | rdp->qs_pending); |
142 | #ifdef CONFIG_NO_HZ | 142 | #ifdef CONFIG_NO_HZ |
143 | seq_printf(m, ",%d,%d,%d,%lu", | 143 | seq_printf(m, ",%d,%d,%d,%lu", |
144 | rdp->dynticks->dynticks, | 144 | atomic_read(&rdp->dynticks->dynticks), |
145 | rdp->dynticks->dynticks_nesting, | 145 | rdp->dynticks->dynticks_nesting, |
146 | rdp->dynticks->dynticks_nmi, | 146 | rdp->dynticks->dynticks_nmi_nesting, |
147 | rdp->dynticks_fqs); | 147 | rdp->dynticks_fqs); |
148 | #endif /* #ifdef CONFIG_NO_HZ */ | 148 | #endif /* #ifdef CONFIG_NO_HZ */ |
149 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 149 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
167 | { | 167 | { |
168 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); | 168 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); |
169 | #ifdef CONFIG_NO_HZ | 169 | #ifdef CONFIG_NO_HZ |
170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); | 170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
171 | #endif /* #ifdef CONFIG_NO_HZ */ | 171 | #endif /* #ifdef CONFIG_NO_HZ */ |
172 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); | 172 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); |
173 | #ifdef CONFIG_TREE_PREEMPT_RCU | 173 | #ifdef CONFIG_TREE_PREEMPT_RCU |