aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2008-08-05 12:21:44 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-15 10:08:47 -0400
commit1f7b94cd3d564901f9e04a8bc5832ae7bfd690a0 (patch)
tree32da3db422dd590cadd879bf6e5f8c5aa7b2a2a2
parent293a17ebc944c958e24e6ffbd1d5a49abdbf489e (diff)
rcu: classic RCU locking and memory-barrier cleanups
This patch simplifies the locking and memory-barrier usage in the Classic RCU grace-period-detection mechanism, incorporating Lai Jiangshan's feedback from the earlier version (http://lkml.org/lkml/2008/8/1/400 and http://lkml.org/lkml/2008/8/3/43). Passed 10 hours of rcutorture concurrent with CPUs being put online and taken offline on a 128-hardware-thread Power machine. My apologies to whoever in the Eastern Hemisphere was planning to use this machine over the Western Hemisphere night, but it was sitting idle and... So this is ready for tip/core/rcu. This patch is in preparation for moving to a hierarchical algorithm to allow the very large SMP machines -- requested by some people at OLS, and there seem to have been a few recent patches in the 4096-CPU direction as well. The general idea is to move to a much more conservative concurrency design, then apply a hierarchy to reduce contention on the global lock by a few orders of magnitude (larger machines would see greater reductions). The reason for taking a conservative approach is that this code isn't on any fast path. Prototype in progress. This patch is against the linux-tip git tree (tip/core/rcu). If you wish to test this against 2.6.26, use the following set of patches: http://www.rdrop.com/users/paulmck/patches/2.6.26-ljsimp-1.patch http://www.rdrop.com/users/paulmck/patches/2.6.26-ljsimpfix-3.patch The first patch combines commits 5127bed588a2f8f3a1f732de2a8a190b7df5dce3 and 3cac97cbb14aed00d83eb33d4613b0fe3aaea863 from Lai Jiangshan <laijs@cn.fujitsu.com>, and the second patch contains my changes. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/rcuclassic.c51
1 files changed, 41 insertions, 10 deletions
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index dab2676d9d72..5de126630b10 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -87,6 +87,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
87 int cpu; 87 int cpu;
88 cpumask_t cpumask; 88 cpumask_t cpumask;
89 set_need_resched(); 89 set_need_resched();
90 spin_lock(&rcp->lock);
90 if (unlikely(!rcp->signaled)) { 91 if (unlikely(!rcp->signaled)) {
91 rcp->signaled = 1; 92 rcp->signaled = 1;
92 /* 93 /*
@@ -112,6 +113,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
112 for_each_cpu_mask_nr(cpu, cpumask) 113 for_each_cpu_mask_nr(cpu, cpumask)
113 smp_send_reschedule(cpu); 114 smp_send_reschedule(cpu);
114 } 115 }
116 spin_unlock(&rcp->lock);
115} 117}
116#else 118#else
117static inline void force_quiescent_state(struct rcu_data *rdp, 119static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -125,7 +127,9 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
125 struct rcu_data *rdp) 127 struct rcu_data *rdp)
126{ 128{
127 long batch; 129 long batch;
128 smp_mb(); /* reads the most recently updated value of rcu->cur. */ 130
131 head->next = NULL;
132 smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
129 133
130 /* 134 /*
131 * Determine the batch number of this callback. 135 * Determine the batch number of this callback.
@@ -175,7 +179,6 @@ void call_rcu(struct rcu_head *head,
175 unsigned long flags; 179 unsigned long flags;
176 180
177 head->func = func; 181 head->func = func;
178 head->next = NULL;
179 local_irq_save(flags); 182 local_irq_save(flags);
180 __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data)); 183 __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
181 local_irq_restore(flags); 184 local_irq_restore(flags);
@@ -204,7 +207,6 @@ void call_rcu_bh(struct rcu_head *head,
204 unsigned long flags; 207 unsigned long flags;
205 208
206 head->func = func; 209 head->func = func;
207 head->next = NULL;
208 local_irq_save(flags); 210 local_irq_save(flags);
209 __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); 211 __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
210 local_irq_restore(flags); 212 local_irq_restore(flags);
@@ -467,17 +469,17 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
467static void __rcu_offline_cpu(struct rcu_data *this_rdp, 469static void __rcu_offline_cpu(struct rcu_data *this_rdp,
468 struct rcu_ctrlblk *rcp, struct rcu_data *rdp) 470 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
469{ 471{
470 /* if the cpu going offline owns the grace period 472 /*
473 * if the cpu going offline owns the grace period
471 * we can block indefinitely waiting for it, so flush 474 * we can block indefinitely waiting for it, so flush
472 * it here 475 * it here
473 */ 476 */
474 spin_lock_bh(&rcp->lock); 477 spin_lock_bh(&rcp->lock);
475 if (rcp->cur != rcp->completed) 478 if (rcp->cur != rcp->completed)
476 cpu_quiet(rdp->cpu, rcp); 479 cpu_quiet(rdp->cpu, rcp);
477 spin_unlock_bh(&rcp->lock);
478 /* spin_lock implies smp_mb() */
479 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1); 480 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
480 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1); 481 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
482 spin_unlock_bh(&rcp->lock);
481 483
482 local_irq_disable(); 484 local_irq_disable();
483 this_rdp->qlen += rdp->qlen; 485 this_rdp->qlen += rdp->qlen;
@@ -511,16 +513,19 @@ static void rcu_offline_cpu(int cpu)
511static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, 513static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
512 struct rcu_data *rdp) 514 struct rcu_data *rdp)
513{ 515{
516 long completed_snap;
517
514 if (rdp->nxtlist) { 518 if (rdp->nxtlist) {
515 local_irq_disable(); 519 local_irq_disable();
520 completed_snap = ACCESS_ONCE(rcp->completed);
516 521
517 /* 522 /*
518 * move the other grace-period-completed entries to 523 * move the other grace-period-completed entries to
519 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily 524 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
520 */ 525 */
521 if (!rcu_batch_before(rcp->completed, rdp->batch)) 526 if (!rcu_batch_before(completed_snap, rdp->batch))
522 rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2]; 527 rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
523 else if (!rcu_batch_before(rcp->completed, rdp->batch - 1)) 528 else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
524 rdp->nxttail[0] = rdp->nxttail[1]; 529 rdp->nxttail[0] = rdp->nxttail[1];
525 530
526 /* 531 /*
@@ -561,8 +566,24 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
561 566
562static void rcu_process_callbacks(struct softirq_action *unused) 567static void rcu_process_callbacks(struct softirq_action *unused)
563{ 568{
569 /*
570 * Memory references from any prior RCU read-side critical sections
571 * executed by the interrupted code must be see before any RCU
572 * grace-period manupulations below.
573 */
574
575 smp_mb(); /* See above block comment. */
576
564 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); 577 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
565 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); 578 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
579
580 /*
581 * Memory references from any later RCU read-side critical sections
582 * executed by the interrupted code must be see after any RCU
583 * grace-period manupulations above.
584 */
585
586 smp_mb(); /* See above block comment. */
566} 587}
567 588
568static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) 589static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
@@ -571,13 +592,15 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
571 check_cpu_stall(rcp, rdp); 592 check_cpu_stall(rcp, rdp);
572 593
573 if (rdp->nxtlist) { 594 if (rdp->nxtlist) {
595 long completed_snap = ACCESS_ONCE(rcp->completed);
596
574 /* 597 /*
575 * This cpu has pending rcu entries and the grace period 598 * This cpu has pending rcu entries and the grace period
576 * for them has completed. 599 * for them has completed.
577 */ 600 */
578 if (!rcu_batch_before(rcp->completed, rdp->batch)) 601 if (!rcu_batch_before(completed_snap, rdp->batch))
579 return 1; 602 return 1;
580 if (!rcu_batch_before(rcp->completed, rdp->batch - 1) && 603 if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
581 rdp->nxttail[0] != rdp->nxttail[1]) 604 rdp->nxttail[0] != rdp->nxttail[1])
582 return 1; 605 return 1;
583 if (rdp->nxttail[0] != &rdp->nxtlist) 606 if (rdp->nxttail[0] != &rdp->nxtlist)
@@ -628,6 +651,12 @@ int rcu_needs_cpu(int cpu)
628 return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu); 651 return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
629} 652}
630 653
654/*
655 * Top-level function driving RCU grace-period detection, normally
656 * invoked from the scheduler-clock interrupt. This function simply
657 * increments counters that are read only from softirq by this same
658 * CPU, so there are no memory barriers required.
659 */
631void rcu_check_callbacks(int cpu, int user) 660void rcu_check_callbacks(int cpu, int user)
632{ 661{
633 if (user || 662 if (user ||
@@ -671,6 +700,7 @@ void rcu_check_callbacks(int cpu, int user)
671static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, 700static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
672 struct rcu_data *rdp) 701 struct rcu_data *rdp)
673{ 702{
703 spin_lock(&rcp->lock);
674 memset(rdp, 0, sizeof(*rdp)); 704 memset(rdp, 0, sizeof(*rdp));
675 rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist; 705 rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
676 rdp->donetail = &rdp->donelist; 706 rdp->donetail = &rdp->donelist;
@@ -678,6 +708,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
678 rdp->qs_pending = 0; 708 rdp->qs_pending = 0;
679 rdp->cpu = cpu; 709 rdp->cpu = cpu;
680 rdp->blimit = blimit; 710 rdp->blimit = blimit;
711 spin_unlock(&rcp->lock);
681} 712}
682 713
683static void __cpuinit rcu_online_cpu(int cpu) 714static void __cpuinit rcu_online_cpu(int cpu)