lib: proportion: fix underflow in prop_norm_percpu()

Zhe Jiang noticed that its possible to underflow pl->events in prop_norm_percpu() when the value returned by percpu_counter_read() is less than the error on that read and the period delay > 1. In that case half might not trigger the batch increment and the value will be identical on the next iteration, causing the same half to be subtracted again and again. Fix this by rewriting the division as a single subtraction instead of a subtraction loop and using percpu_counter_sum() when the value returned by percpu_counter_read() is smaller than the error. The latter is still needed if we want pl->events to shrink properly in the error region. [akpm@linux-foundation.org: cleanups] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: Jiang Zhe <zhe.jiang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2007-12-22 17:03:29 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-12-23 15:54:37 -0500
commit: f16b34aa13e8c55085f346bcf07afb2312c56c0a (patch)
tree: 4fba40d99a5907694a1a1de8023f1431204f4f98 /lib
parent: cc295d0e95063809af31971e4aec1d809247f13b (diff)
1 files changed, 16 insertions, 21 deletions
diff --git a/lib/proportions.c b/lib/proportions.c
index 332d8c58184..9508d9a7af3 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -190,6 +190,8 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 * PERCPU
 */
+#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 int prop_local_init_percpu(struct prop_local_percpu *pl)
 {
        spin_lock_init(&pl->lock);
@@ -230,31 +232,24 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
        spin_lock_irqsave(&pl->lock, flags);
        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
        /*
         * For each missed period, we half the local counter.
         * basically:
         *   pl->events >> (global_period - pl->period);
-         *
-         * but since the distributed nature of percpu counters make division
-         * rather hard, use a regular subtraction loop. This is safe, because
-         * the events will only every be incremented, hence the subtraction
-         * can never result in a negative number.
         */
-        while (pl->period != global_period) {
+        period = (global_period - pl->period) >> (pg->shift - 1);
-                unsigned long val = percpu_counter_read(&pl->events);
+        if (period < BITS_PER_LONG) {
-                unsigned long half = (val + 1) >> 1;
+                s64 val = percpu_counter_read(&pl->events);
-                /*
+                if (val < (nr_cpu_ids * PROP_BATCH))
-                 * Half of zero won't be much less, break out.
+                        val = percpu_counter_sum(&pl->events);
-                 * This limits the loop to shift iterations, even
-                 * if we missed a million.
+                __percpu_counter_add(&pl->events, -val + (val >> period),
-                 */
+                                        PROP_BATCH);
-                if (!val)
+        } else
-                        break;
+                percpu_counter_set(&pl->events, 0);
-                percpu_counter_add(&pl->events, -half);
-                pl->period += period;
-        }
        pl->period = global_period;
        spin_unlock_irqrestore(&pl->lock, flags);
 }
@@ -267,7 +262,7 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
        struct prop_global *pg = prop_get_global(pd);
        prop_norm_percpu(pg, pl);
-        percpu_counter_add(&pl->events, 1);
+        __percpu_counter_add(&pl->events, 1, PROP_BATCH);
        percpu_counter_add(&pg->events, 1);
        prop_put_global(pd, pg);
 }
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2007-12-22 17:03:29 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-12-23 15:54:37 -0500
commit	f16b34aa13e8c55085f346bcf07afb2312c56c0a (patch)
tree	4fba40d99a5907694a1a1de8023f1431204f4f98 /lib
parent	cc295d0e95063809af31971e4aec1d809247f13b (diff)

diff --git a/lib/proportions.c b/lib/proportions.c index 332d8c58184..9508d9a7af3 100644 --- a/lib/proportions.c +++ b/lib/proportions.c
@@ -190,6 +190,8 @@ prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
190	* PERCPU	190	* PERCPU
191	*/	191	*/
192		192
		193	#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
		194
193	int prop_local_init_percpu(struct prop_local_percpu *pl)	195	int prop_local_init_percpu(struct prop_local_percpu *pl)
194	{	196	{
195	spin_lock_init(&pl->lock);	197	spin_lock_init(&pl->lock);
@@ -230,31 +232,24 @@ void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
230		232
231	spin_lock_irqsave(&pl->lock, flags);	233	spin_lock_irqsave(&pl->lock, flags);
232	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);	234	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
		235
233	/*	236	/*
234	* For each missed period, we half the local counter.	237	* For each missed period, we half the local counter.
235	* basically:	238	* basically:
236	* pl->events >> (global_period - pl->period);	239	* pl->events >> (global_period - pl->period);
237	*
238	* but since the distributed nature of percpu counters make division
239	* rather hard, use a regular subtraction loop. This is safe, because
240	* the events will only every be incremented, hence the subtraction
241	* can never result in a negative number.
242	*/	240	*/
243	while (pl->period != global_period) {	241	period = (global_period - pl->period) >> (pg->shift - 1);
244	unsigned long val = percpu_counter_read(&pl->events);	242	if (period < BITS_PER_LONG) {
245	unsigned long half = (val + 1) >> 1;	243	s64 val = percpu_counter_read(&pl->events);
246		244
247	/*	245	if (val < (nr_cpu_ids * PROP_BATCH))
248	* Half of zero won't be much less, break out.	246	val = percpu_counter_sum(&pl->events);
249	* This limits the loop to shift iterations, even	247
250	* if we missed a million.	248	__percpu_counter_add(&pl->events, -val + (val >> period),
251	*/	249	PROP_BATCH);
252	if (!val)	250	} else
253	break;	251	percpu_counter_set(&pl->events, 0);
254		252
255	percpu_counter_add(&pl->events, -half);
256	pl->period += period;
257	}
258	pl->period = global_period;	253	pl->period = global_period;
259	spin_unlock_irqrestore(&pl->lock, flags);	254	spin_unlock_irqrestore(&pl->lock, flags);
260	}	255	}
@@ -267,7 +262,7 @@ void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
267	struct prop_global *pg = prop_get_global(pd);	262	struct prop_global *pg = prop_get_global(pd);
268		263
269	prop_norm_percpu(pg, pl);	264	prop_norm_percpu(pg, pl);
270	percpu_counter_add(&pl->events, 1);	265	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
271	percpu_counter_add(&pg->events, 1);	266	percpu_counter_add(&pg->events, 1);
272	prop_put_global(pd, pg);	267	prop_put_global(pd, pg);
273	}	268	}