aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/rcuclassic.h
diff options
context:
space:
mode:
authorLai Jiangshan <laijs@cn.fujitsu.com>2008-07-06 05:23:59 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-18 10:07:33 -0400
commit5127bed588a2f8f3a1f732de2a8a190b7df5dce3 (patch)
treebf79321ffa4c1b7c1071bea8ad1a3eb6eb7b888e /include/linux/rcuclassic.h
parent3cac97cbb14aed00d83eb33d4613b0fe3aaea863 (diff)
rcu classic: new algorithm for callbacks-processing(v2)
This is v2, it's a little deference from v1 that I had send to lkml. use ACCESS_ONCE use rcu_batch_after/rcu_batch_before for batch # comparison. rcutorture test result: (hotplugs: do cpu-online/offline once per second) No CONFIG_NO_HZ: OK, 12hours No CONFIG_NO_HZ, hotplugs: OK, 12hours CONFIG_NO_HZ=y: OK, 24hours CONFIG_NO_HZ=y, hotplugs: Failed. (Failed also without my patch applied, exactly the same bug occurred, http://lkml.org/lkml/2008/7/3/24) v1's email thread: http://lkml.org/lkml/2008/6/2/539 v1's description: The code/algorithm of the implement of current callbacks-processing is very efficient and technical. But when I studied it and I found a disadvantage: In multi-CPU systems, when a new RCU callback is being queued(call_rcu[_bh]), this callback will be invoked after the grace period for the batch with batch number = rcp->cur+2 has completed very very likely in current implement. Actually, this callback can be invoked after the grace period for the batch with batch number = rcp->cur+1 has completed. The delay of invocation means that latency of synchronize_rcu() is extended. But more important thing is that the callbacks usually free memory, and these works are delayed too! it's necessary for reclaimer to free memory as soon as possible when left memory is few. A very simple way can solve this problem: a field(struct rcu_head::batch) is added to record the batch number for the RCU callback. And when a new RCU callback is being queued, we determine the batch number for this callback(head->batch = rcp->cur+1) and we move this callback to rdp->donelist if we find that head->batch <= rcp->completed when we process callbacks. This simple way reduces the wait time for invocation a lot. (about 2.5Grace Period -> 1.5Grace Period in average in multi-CPU systems) This is my algorithm. But I do not add any field for struct rcu_head in my implement. We just need to memorize the last 2 batches and their batch number, because these 2 batches include all entries that for whom the grace period hasn't completed. So we use a special linked-list rather than add a field. Please see the comment of struct rcu_data. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Dipankar Sarma <dipankar@in.ibm.com> Cc: Gautham Shenoy <ego@in.ibm.com> Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/linux/rcuclassic.h')
-rw-r--r--include/linux/rcuclassic.h26
1 files changed, 17 insertions, 9 deletions
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index c847e59c6006..04c728147be0 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -66,11 +66,7 @@ static inline int rcu_batch_after(long a, long b)
66 return (a - b) > 0; 66 return (a - b) > 0;
67} 67}
68 68
69/* 69/* Per-CPU data for Read-Copy UPdate. */
70 * Per-CPU data for Read-Copy UPdate.
71 * nxtlist - new callbacks are added here
72 * curlist - current batch for which quiescent cycle started if any
73 */
74struct rcu_data { 70struct rcu_data {
75 /* 1) quiescent state handling : */ 71 /* 1) quiescent state handling : */
76 long quiescbatch; /* Batch # for grace period */ 72 long quiescbatch; /* Batch # for grace period */
@@ -78,12 +74,24 @@ struct rcu_data {
78 int qs_pending; /* core waits for quiesc state */ 74 int qs_pending; /* core waits for quiesc state */
79 75
80 /* 2) batch handling */ 76 /* 2) batch handling */
81 long batch; /* Batch # for current RCU batch */ 77 /*
78 * if nxtlist is not NULL, then:
79 * batch:
80 * The batch # for the last entry of nxtlist
81 * [*nxttail[1], NULL = *nxttail[2]):
82 * Entries that batch # <= batch
83 * [*nxttail[0], *nxttail[1]):
84 * Entries that batch # <= batch - 1
85 * [nxtlist, *nxttail[0]):
86 * Entries that batch # <= batch - 2
87 * The grace period for these entries has completed, and
88 * the other grace-period-completed entries may be moved
89 * here temporarily in rcu_process_callbacks().
90 */
91 long batch;
82 struct rcu_head *nxtlist; 92 struct rcu_head *nxtlist;
83 struct rcu_head **nxttail; 93 struct rcu_head **nxttail[3];
84 long qlen; /* # of queued callbacks */ 94 long qlen; /* # of queued callbacks */
85 struct rcu_head *curlist;
86 struct rcu_head **curtail;
87 struct rcu_head *donelist; 95 struct rcu_head *donelist;
88 struct rcu_head **donetail; 96 struct rcu_head **donetail;
89 long blimit; /* Upper limit on a processed batch */ 97 long blimit; /* Upper limit on a processed batch */