1 files changed, 16 insertions, 21 deletions
diff --git a/lib/proportions.c b/lib/proportions.c
index 332d8c58184d..9508d9a7af3e 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -190,6 +190,8 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 * PERCPU
 */
+#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 int prop_local_init_percpu(struct prop_local_percpu *pl)
 {
        spin_lock_init(&pl->lock);
@@ -230,31 +232,24 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
        spin_lock_irqsave(&pl->lock, flags);
        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
        /*
         * For each missed period, we half the local counter.
         * basically:
         *   pl->events >> (global_period - pl->period);
-         *
-         * but since the distributed nature of percpu counters make division
-         * rather hard, use a regular subtraction loop. This is safe, because
-         * the events will only every be incremented, hence the subtraction
-         * can never result in a negative number.
         */
-        while (pl->period != global_period) {
+        period = (global_period - pl->period) >> (pg->shift - 1);
-                unsigned long val = percpu_counter_read(&pl->events);
+        if (period < BITS_PER_LONG) {
-                unsigned long half = (val + 1) >> 1;
+                s64 val = percpu_counter_read(&pl->events);
-                /*
+                if (val < (nr_cpu_ids * PROP_BATCH))
-                 * Half of zero won't be much less, break out.
+                        val = percpu_counter_sum(&pl->events);
-                 * This limits the loop to shift iterations, even
-                 * if we missed a million.
+                __percpu_counter_add(&pl->events, -val + (val >> period),
-                 */
+                                        PROP_BATCH);
-                if (!val)
+        } else
-                        break;
+                percpu_counter_set(&pl->events, 0);
-                percpu_counter_add(&pl->events, -half);
-                pl->period += period;
-        }
        pl->period = global_period;
        spin_unlock_irqrestore(&pl->lock, flags);
 }
@@ -267,7 +262,7 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
        struct prop_global *pg = prop_get_global(pd);
        prop_norm_percpu(pg, pl);
-        percpu_counter_add(&pl->events, 1);
+        __percpu_counter_add(&pl->events, 1, PROP_BATCH);
        percpu_counter_add(&pg->events, 1);
        prop_put_global(pd, pg);
 }

diff --git a/lib/proportions.c b/lib/proportions.c index 332d8c58184d..9508d9a7af3e 100644 --- a/lib/proportions.c +++ b/lib/proportions.c
@@ -190,6 +190,8 @@ prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
190	* PERCPU	190	* PERCPU
191	*/	191	*/
192		192
		193	#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
		194
193	int prop_local_init_percpu(struct prop_local_percpu *pl)	195	int prop_local_init_percpu(struct prop_local_percpu *pl)
194	{	196	{
195	spin_lock_init(&pl->lock);	197	spin_lock_init(&pl->lock);
@@ -230,31 +232,24 @@ void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
230		232
231	spin_lock_irqsave(&pl->lock, flags);	233	spin_lock_irqsave(&pl->lock, flags);
232	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);	234	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
		235
233	/*	236	/*
234	* For each missed period, we half the local counter.	237	* For each missed period, we half the local counter.
235	* basically:	238	* basically:
236	* pl->events >> (global_period - pl->period);	239	* pl->events >> (global_period - pl->period);
237	*
238	* but since the distributed nature of percpu counters make division
239	* rather hard, use a regular subtraction loop. This is safe, because
240	* the events will only every be incremented, hence the subtraction
241	* can never result in a negative number.
242	*/	240	*/
243	while (pl->period != global_period) {	241	period = (global_period - pl->period) >> (pg->shift - 1);
244	unsigned long val = percpu_counter_read(&pl->events);	242	if (period < BITS_PER_LONG) {
245	unsigned long half = (val + 1) >> 1;	243	s64 val = percpu_counter_read(&pl->events);
246		244
247	/*	245	if (val < (nr_cpu_ids * PROP_BATCH))
248	* Half of zero won't be much less, break out.	246	val = percpu_counter_sum(&pl->events);
249	* This limits the loop to shift iterations, even	247
250	* if we missed a million.	248	__percpu_counter_add(&pl->events, -val + (val >> period),
251	*/	249	PROP_BATCH);
252	if (!val)	250	} else
253	break;	251	percpu_counter_set(&pl->events, 0);
254		252
255	percpu_counter_add(&pl->events, -half);
256	pl->period += period;
257	}
258	pl->period = global_period;	253	pl->period = global_period;
259	spin_unlock_irqrestore(&pl->lock, flags);	254	spin_unlock_irqrestore(&pl->lock, flags);
260	}	255	}
@@ -267,7 +262,7 @@ void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
267	struct prop_global *pg = prop_get_global(pd);	262	struct prop_global *pg = prop_get_global(pd);
268		263
269	prop_norm_percpu(pg, pl);	264	prop_norm_percpu(pg, pl);
270	percpu_counter_add(&pl->events, 1);	265	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
271	percpu_counter_add(&pg->events, 1);	266	percpu_counter_add(&pg->events, 1);
272	prop_put_global(pd, pg);	267	prop_put_global(pd, pg);
273	}	268	}