aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c164
1 files changed, 67 insertions, 97 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f07d2f03181a..77a7671dd147 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -36,7 +36,7 @@
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/nmi.h> 38#include <linux/nmi.h>
39#include <asm/atomic.h> 39#include <linux/atomic.h>
40#include <linux/bitops.h> 40#include <linux/bitops.h>
41#include <linux/module.h> 41#include <linux/module.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
98static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
99DEFINE_PER_CPU(char, rcu_cpu_has_work); 98DEFINE_PER_CPU(char, rcu_cpu_has_work);
100static char rcu_kthreads_spawnable; 99static char rcu_kthreads_spawnable;
101 100
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
163#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
164DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
165 .dynticks_nesting = 1, 164 .dynticks_nesting = 1,
166 .dynticks = 1, 165 .dynticks = ATOMIC_INIT(1),
167}; 166};
168#endif /* #ifdef CONFIG_NO_HZ */ 167#endif /* #ifdef CONFIG_NO_HZ */
169 168
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void)
322 unsigned long flags; 321 unsigned long flags;
323 struct rcu_dynticks *rdtp; 322 struct rcu_dynticks *rdtp;
324 323
325 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
326 local_irq_save(flags); 324 local_irq_save(flags);
327 rdtp = &__get_cpu_var(rcu_dynticks); 325 rdtp = &__get_cpu_var(rcu_dynticks);
328 rdtp->dynticks++; 326 if (--rdtp->dynticks_nesting) {
329 rdtp->dynticks_nesting--; 327 local_irq_restore(flags);
330 WARN_ON_ONCE(rdtp->dynticks & 0x1); 328 return;
329 }
330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
331 smp_mb__before_atomic_inc(); /* See above. */
332 atomic_inc(&rdtp->dynticks);
333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
331 local_irq_restore(flags); 335 local_irq_restore(flags);
336
337 /* If the interrupt queued a callback, get out of dyntick mode. */
338 if (in_irq() &&
339 (__get_cpu_var(rcu_sched_data).nxtlist ||
340 __get_cpu_var(rcu_bh_data).nxtlist ||
341 rcu_preempt_needs_cpu(smp_processor_id())))
342 set_need_resched();
332} 343}
333 344
334/* 345/*
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void)
344 355
345 local_irq_save(flags); 356 local_irq_save(flags);
346 rdtp = &__get_cpu_var(rcu_dynticks); 357 rdtp = &__get_cpu_var(rcu_dynticks);
347 rdtp->dynticks++; 358 if (rdtp->dynticks_nesting++) {
348 rdtp->dynticks_nesting++; 359 local_irq_restore(flags);
349 WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); 360 return;
361 }
362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
363 atomic_inc(&rdtp->dynticks);
364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
365 smp_mb__after_atomic_inc(); /* See above. */
366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
350 local_irq_restore(flags); 367 local_irq_restore(flags);
351 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
352} 368}
353 369
354/** 370/**
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void)
362{ 378{
363 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
364 380
365 if (rdtp->dynticks & 0x1) 381 if (rdtp->dynticks_nmi_nesting == 0 &&
382 (atomic_read(&rdtp->dynticks) & 0x1))
366 return; 383 return;
367 rdtp->dynticks_nmi++; 384 rdtp->dynticks_nmi_nesting++;
368 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); 385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
369 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 386 atomic_inc(&rdtp->dynticks);
387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
388 smp_mb__after_atomic_inc(); /* See above. */
389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
370} 390}
371 391
372/** 392/**
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void)
380{ 400{
381 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
382 402
383 if (rdtp->dynticks & 0x1) 403 if (rdtp->dynticks_nmi_nesting == 0 ||
404 --rdtp->dynticks_nmi_nesting != 0)
384 return; 405 return;
385 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
386 rdtp->dynticks_nmi++; 407 smp_mb__before_atomic_inc(); /* See above. */
387 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); 408 atomic_inc(&rdtp->dynticks);
409 smp_mb__after_atomic_inc(); /* Force delay to next write. */
410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
388} 411}
389 412
390/** 413/**
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void)
395 */ 418 */
396void rcu_irq_enter(void) 419void rcu_irq_enter(void)
397{ 420{
398 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 421 rcu_exit_nohz();
399
400 if (rdtp->dynticks_nesting++)
401 return;
402 rdtp->dynticks++;
403 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
404 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
405} 422}
406 423
407/** 424/**
@@ -413,18 +430,7 @@ void rcu_irq_enter(void)
413 */ 430 */
414void rcu_irq_exit(void) 431void rcu_irq_exit(void)
415{ 432{
416 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 433 rcu_enter_nohz();
417
418 if (--rdtp->dynticks_nesting)
419 return;
420 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
421 rdtp->dynticks++;
422 WARN_ON_ONCE(rdtp->dynticks & 0x1);
423
424 /* If the interrupt queued a callback, get out of dyntick mode. */
425 if (__this_cpu_read(rcu_sched_data.nxtlist) ||
426 __this_cpu_read(rcu_bh_data.nxtlist))
427 set_need_resched();
428} 434}
429 435
430#ifdef CONFIG_SMP 436#ifdef CONFIG_SMP
@@ -436,19 +442,8 @@ void rcu_irq_exit(void)
436 */ 442 */
437static int dyntick_save_progress_counter(struct rcu_data *rdp) 443static int dyntick_save_progress_counter(struct rcu_data *rdp)
438{ 444{
439 int ret; 445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
440 int snap; 446 return 0;
441 int snap_nmi;
442
443 snap = rdp->dynticks->dynticks;
444 snap_nmi = rdp->dynticks->dynticks_nmi;
445 smp_mb(); /* Order sampling of snap with end of grace period. */
446 rdp->dynticks_snap = snap;
447 rdp->dynticks_nmi_snap = snap_nmi;
448 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
449 if (ret)
450 rdp->dynticks_fqs++;
451 return ret;
452} 447}
453 448
454/* 449/*
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
459 */ 454 */
460static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 455static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
461{ 456{
462 long curr; 457 unsigned long curr;
463 long curr_nmi; 458 unsigned long snap;
464 long snap;
465 long snap_nmi;
466 459
467 curr = rdp->dynticks->dynticks; 460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
468 snap = rdp->dynticks_snap; 461 snap = (unsigned long)rdp->dynticks_snap;
469 curr_nmi = rdp->dynticks->dynticks_nmi;
470 snap_nmi = rdp->dynticks_nmi_snap;
471 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
472 462
473 /* 463 /*
474 * If the CPU passed through or entered a dynticks idle phase with 464 * If the CPU passed through or entered a dynticks idle phase with
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
478 * read-side critical section that started before the beginning 468 * read-side critical section that started before the beginning
479 * of the current RCU grace period. 469 * of the current RCU grace period.
480 */ 470 */
481 if ((curr != snap || (curr & 0x1) == 0) && 471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
482 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
483 rdp->dynticks_fqs++; 472 rdp->dynticks_fqs++;
484 return 1; 473 return 1;
485 } 474 }
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
908 unsigned long gp_duration; 897 unsigned long gp_duration;
909 898
910 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
900
901 /*
902 * Ensure that all grace-period and pre-grace-period activity
903 * is seen before the assignment to rsp->completed.
904 */
905 smp_mb(); /* See above block comment. */
911 gp_duration = jiffies - rsp->gp_start; 906 gp_duration = jiffies - rsp->gp_start;
912 if (gp_duration > rsp->gp_max) 907 if (gp_duration > rsp->gp_max)
913 rsp->gp_max = gp_duration; 908 rsp->gp_max = gp_duration;
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1455 */ 1450 */
1456static void rcu_process_callbacks(void) 1451static void rcu_process_callbacks(void)
1457{ 1452{
1458 /*
1459 * Memory references from any prior RCU read-side critical sections
1460 * executed by the interrupted code must be seen before any RCU
1461 * grace-period manipulations below.
1462 */
1463 smp_mb(); /* See above block comment. */
1464
1465 __rcu_process_callbacks(&rcu_sched_state, 1453 __rcu_process_callbacks(&rcu_sched_state,
1466 &__get_cpu_var(rcu_sched_data)); 1454 &__get_cpu_var(rcu_sched_data));
1467 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1455 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1468 rcu_preempt_process_callbacks(); 1456 rcu_preempt_process_callbacks();
1469 1457
1470 /*
1471 * Memory references from any later RCU read-side critical sections
1472 * executed by the interrupted code must be seen after any RCU
1473 * grace-period manipulations above.
1474 */
1475 smp_mb(); /* See above block comment. */
1476
1477 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1458 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1478 rcu_needs_cpu_flush(); 1459 rcu_needs_cpu_flush();
1479} 1460}
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void)
1494 local_irq_restore(flags); 1475 local_irq_restore(flags);
1495 return; 1476 return;
1496 } 1477 }
1497 wake_up(&__get_cpu_var(rcu_cpu_wq)); 1478 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1498 local_irq_restore(flags); 1479 local_irq_restore(flags);
1499} 1480}
1500 1481
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1544 */ 1525 */
1545static void rcu_cpu_kthread_timer(unsigned long arg) 1526static void rcu_cpu_kthread_timer(unsigned long arg)
1546{ 1527{
1547 unsigned long flags;
1548 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1528 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1549 struct rcu_node *rnp = rdp->mynode; 1529 struct rcu_node *rnp = rdp->mynode;
1550 1530
1551 raw_spin_lock_irqsave(&rnp->lock, flags); 1531 atomic_or(rdp->grpmask, &rnp->wakemask);
1552 rnp->wakemask |= rdp->grpmask;
1553 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1554 invoke_rcu_node_kthread(rnp); 1532 invoke_rcu_node_kthread(rnp);
1555} 1533}
1556 1534
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg)
1617 unsigned long flags; 1595 unsigned long flags;
1618 int spincnt = 0; 1596 int spincnt = 0;
1619 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1597 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1620 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1621 char work; 1598 char work;
1622 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1599 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1623 1600
1624 for (;;) { 1601 for (;;) {
1625 *statusp = RCU_KTHREAD_WAITING; 1602 *statusp = RCU_KTHREAD_WAITING;
1626 wait_event_interruptible(*wqp, 1603 rcu_wait(*workp != 0 || kthread_should_stop());
1627 *workp != 0 || kthread_should_stop());
1628 local_bh_disable(); 1604 local_bh_disable();
1629 if (rcu_cpu_kthread_should_stop(cpu)) { 1605 if (rcu_cpu_kthread_should_stop(cpu)) {
1630 local_bh_enable(); 1606 local_bh_enable();
@@ -1672,10 +1648,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1672 if (IS_ERR(t)) 1648 if (IS_ERR(t))
1673 return PTR_ERR(t); 1649 return PTR_ERR(t);
1674 kthread_bind(t, cpu); 1650 kthread_bind(t, cpu);
1651 set_task_state(t, TASK_INTERRUPTIBLE);
1675 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1652 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1676 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1653 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1677 per_cpu(rcu_cpu_kthread_task, cpu) = t; 1654 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1678 wake_up_process(t);
1679 sp.sched_priority = RCU_KTHREAD_PRIO; 1655 sp.sched_priority = RCU_KTHREAD_PRIO;
1680 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1656 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1681 return 0; 1657 return 0;
@@ -1698,11 +1674,10 @@ static int rcu_node_kthread(void *arg)
1698 1674
1699 for (;;) { 1675 for (;;) {
1700 rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1676 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1701 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); 1677 rcu_wait(atomic_read(&rnp->wakemask) != 0);
1702 rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1678 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1703 raw_spin_lock_irqsave(&rnp->lock, flags); 1679 raw_spin_lock_irqsave(&rnp->lock, flags);
1704 mask = rnp->wakemask; 1680 mask = atomic_xchg(&rnp->wakemask, 0);
1705 rnp->wakemask = 0;
1706 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1681 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1707 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1682 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1708 if ((mask & 0x1) == 0) 1683 if ((mask & 0x1) == 0)
@@ -1781,9 +1756,9 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1781 if (IS_ERR(t)) 1756 if (IS_ERR(t))
1782 return PTR_ERR(t); 1757 return PTR_ERR(t);
1783 raw_spin_lock_irqsave(&rnp->lock, flags); 1758 raw_spin_lock_irqsave(&rnp->lock, flags);
1759 set_task_state(t, TASK_INTERRUPTIBLE);
1784 rnp->node_kthread_task = t; 1760 rnp->node_kthread_task = t;
1785 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1761 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1786 wake_up_process(t);
1787 sp.sched_priority = 99; 1762 sp.sched_priority = 99;
1788 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1763 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1789 } 1764 }
@@ -1800,21 +1775,16 @@ static int __init rcu_spawn_kthreads(void)
1800 1775
1801 rcu_kthreads_spawnable = 1; 1776 rcu_kthreads_spawnable = 1;
1802 for_each_possible_cpu(cpu) { 1777 for_each_possible_cpu(cpu) {
1803 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1804 per_cpu(rcu_cpu_has_work, cpu) = 0; 1778 per_cpu(rcu_cpu_has_work, cpu) = 0;
1805 if (cpu_online(cpu)) 1779 if (cpu_online(cpu))
1806 (void)rcu_spawn_one_cpu_kthread(cpu); 1780 (void)rcu_spawn_one_cpu_kthread(cpu);
1807 } 1781 }
1808 rnp = rcu_get_root(rcu_state); 1782 rnp = rcu_get_root(rcu_state);
1809 init_waitqueue_head(&rnp->node_wq);
1810 rcu_init_boost_waitqueue(rnp);
1811 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1783 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1812 if (NUM_RCU_NODES > 1) 1784 if (NUM_RCU_NODES > 1) {
1813 rcu_for_each_leaf_node(rcu_state, rnp) { 1785 rcu_for_each_leaf_node(rcu_state, rnp)
1814 init_waitqueue_head(&rnp->node_wq);
1815 rcu_init_boost_waitqueue(rnp);
1816 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1786 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1817 } 1787 }
1818 return 0; 1788 return 0;
1819} 1789}
1820early_initcall(rcu_spawn_kthreads); 1790early_initcall(rcu_spawn_kthreads);