lib: floating proportions

Given a set of objects, floating proportions aims to efficiently give the proportional 'activity' of a single item as compared to the whole set. Where 'activity' is a measure of a temporal property of the items. It is efficient in that it need not inspect any other items of the set in order to provide the answer. It is not even needed to know how many other items there are. It has one parameter, and that is the period of 'time' over which the 'activity' is measured. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2007-10-17 02:25:49 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-10-17 11:42:45 -0400
commit: 145ca25eb2fbd20d4faf1bad4628c7650332058f (patch)
tree: ee5bebd5b7b1f0ced5e30c8f29f1ff5301aa9d0a
parent: 69cb51d18c1ed593009d9a620cac49d0dcf15dc8 (diff)
3 files changed, 505 insertions, 1 deletions
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
new file mode 100644
index 000000000000..2c3b3cad92be
--- /dev/null
+++ b/include/linux/proportions.h
@@ -0,0 +1,119 @@
+/*
+ * FLoating proportions
+ *
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * This file contains the public data structure and API definitions.
+ */
+#ifndef _LINUX_PROPORTIONS_H
+#define _LINUX_PROPORTIONS_H
+#include <linux/percpu_counter.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+struct prop_global {
+        /*
+         * The period over which we differentiate
+         *
+         *   period = 2^shift
+         */
+        int shift;
+        /*
+         * The total event counter aka 'time'.
+         *
+         * Treated as an unsigned long; the lower 'shift - 1' bits are the
+         * counter bits, the remaining upper bits the period counter.
+         */
+        struct percpu_counter events;
+};
+/*
+ * global proportion descriptor
+ *
+ * this is needed to consitently flip prop_global structures.
+ */
+struct prop_descriptor {
+        int index;
+        struct prop_global pg[2];
+        struct mutex mutex;             /* serialize the prop_global switch */
+};
+int prop_descriptor_init(struct prop_descriptor *pd, int shift);
+void prop_change_shift(struct prop_descriptor *pd, int new_shift);
+/*
+ * ----- PERCPU ------
+ */
+struct prop_local_percpu {
+        /*
+         * the local events counter
+         */
+        struct percpu_counter events;
+        /*
+         * snapshot of the last seen global state
+         */
+        int shift;
+        unsigned long period;
+        spinlock_t lock;                /* protect the snapshot state */
+};
+int prop_local_init_percpu(struct prop_local_percpu *pl);
+void prop_local_destroy_percpu(struct prop_local_percpu *pl);
+void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
+void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
+                long *numerator, long *denominator);
+static inline
+void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        __prop_inc_percpu(pd, pl);
+        local_irq_restore(flags);
+}
+/*
+ * ----- SINGLE ------
+ */
+struct prop_local_single {
+        /*
+         * the local events counter
+         */
+        unsigned long events;
+        /*
+         * snapshot of the last seen global state
+         * and a lock protecting this state
+         */
+        int shift;
+        unsigned long period;
+        spinlock_t lock;                /* protect the snapshot state */
+};
+#define INIT_PROP_LOCAL_SINGLE(name)                    \
+{       .lock = __SPIN_LOCK_UNLOCKED(name.lock),        \
+}
+int prop_local_init_single(struct prop_local_single *pl);
+void prop_local_destroy_single(struct prop_local_single *pl);
+void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
+void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
+                long *numerator, long *denominator);
+static inline
+void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        __prop_inc_single(pd, pl);
+        local_irq_restore(flags);
+}
+#endif /* _LINUX_PROPORTIONS_H */
diff --git a/lib/Makefile b/lib/Makefile
index 6c4ea33bb2cb..c5f215d509d3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -5,7 +5,8 @@
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
         rbtree.o radix-tree.o dump_stack.o \
         idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
-         sha1.o irq_regs.o reciprocal_div.o argv_split.o
+         sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+         proportions.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/proportions.c b/lib/proportions.c
new file mode 100644
index 000000000000..332d8c58184d
--- /dev/null
+++ b/lib/proportions.c
@@ -0,0 +1,384 @@
+/*
+ * Floating proportions
+ *
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Description:
+ *
+ * The floating proportion is a time derivative with an exponentially decaying
+ * history:
+ *
+ *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
+ *
+ * Where j is an element from {prop_local}, x_{j} is j's number of events,
+ * and i the time period over which the differential is taken. So d/dt_{-i} is
+ * the differential over the i-th last period.
+ *
+ * The decaying history gives smooth transitions. The time differential carries
+ * the notion of speed.
+ *
+ * The denominator is 2^(1+i) because we want the series to be normalised, ie.
+ *
+ *   \Sum_{i=0} 1/2^(1+i) = 1
+ *
+ * Further more, if we measure time (t) in the same events as x; so that:
+ *
+ *   t = \Sum_{j} x_{j}
+ *
+ * we get that:
+ *
+ *   \Sum_{j} p_{j} = 1
+ *
+ * Writing this in an iterative fashion we get (dropping the 'd's):
+ *
+ *   if (++x_{j}, ++t > period)
+ *     t /= 2;
+ *     for_each (j)
+ *       x_{j} /= 2;
+ *
+ * so that:
+ *
+ *   p_{j} = x_{j} / t;
+ *
+ * We optimize away the '/= 2' for the global time delta by noting that:
+ *
+ *   if (++t > period) t /= 2:
+ *
+ * Can be approximated by:
+ *
+ *   period/2 + (++t % period/2)
+ *
+ * [ Furthermore, when we choose period to be 2^n it can be written in terms of
+ *   binary operations and wraparound artefacts disappear. ]
+ *
+ * Also note that this yields a natural counter of the elapsed periods:
+ *
+ *   c = t / (period/2)
+ *
+ * [ Its monotonic increasing property can be applied to mitigate the wrap-
+ *   around issue. ]
+ *
+ * This allows us to do away with the loop over all prop_locals on each period
+ * expiration. By remembering the period count under which it was last accessed
+ * as c_{j}, we can obtain the number of 'missed' cycles from:
+ *
+ *   c - c_{j}
+ *
+ * We can then lazily catch up to the global period count every time we are
+ * going to use x_{j}, by doing:
+ *
+ *   x_{j} /= 2^(c - c_{j}), c_{j} = c
+ */
+#include <linux/proportions.h>
+#include <linux/rcupdate.h>
+/*
+ * Limit the time part in order to ensure there are some bits left for the
+ * cycle counter.
+ */
+#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
+int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+{
+        int err;
+        if (shift > PROP_MAX_SHIFT)
+                shift = PROP_MAX_SHIFT;
+        pd->index = 0;
+        pd->pg[0].shift = shift;
+        mutex_init(&pd->mutex);
+        err = percpu_counter_init_irq(&pd->pg[0].events, 0);
+        if (err)
+                goto out;
+        err = percpu_counter_init_irq(&pd->pg[1].events, 0);
+        if (err)
+                percpu_counter_destroy(&pd->pg[0].events);
+out:
+        return err;
+}
+/*
+ * We have two copies, and flip between them to make it seem like an atomic
+ * update. The update is not really atomic wrt the events counter, but
+ * it is internally consistent with the bit layout depending on shift.
+ *
+ * We copy the events count, move the bits around and flip the index.
+ */
+void prop_change_shift(struct prop_descriptor *pd, int shift)
+{
+        int index;
+        int offset;
+        u64 events;
+        unsigned long flags;
+        if (shift > PROP_MAX_SHIFT)
+                shift = PROP_MAX_SHIFT;
+        mutex_lock(&pd->mutex);
+        index = pd->index ^ 1;
+        offset = pd->pg[pd->index].shift - shift;
+        if (!offset)
+                goto out;
+        pd->pg[index].shift = shift;
+        local_irq_save(flags);
+        events = percpu_counter_sum(&pd->pg[pd->index].events);
+        if (offset < 0)
+                events <<= -offset;
+        else
+                events >>= offset;
+        percpu_counter_set(&pd->pg[index].events, events);
+        /*
+         * ensure the new pg is fully written before the switch
+         */
+        smp_wmb();
+        pd->index = index;
+        local_irq_restore(flags);
+        synchronize_rcu();
+out:
+        mutex_unlock(&pd->mutex);
+}
+/*
+ * wrap the access to the data in an rcu_read_lock() section;
+ * this is used to track the active references.
+ */
+static struct prop_global *prop_get_global(struct prop_descriptor *pd)
+{
+        int index;
+        rcu_read_lock();
+        index = pd->index;
+        /*
+         * match the wmb from vcd_flip()
+         */
+        smp_rmb();
+        return &pd->pg[index];
+}
+static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
+{
+        rcu_read_unlock();
+}
+static void
+prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
+{
+        int offset = *pl_shift - new_shift;
+        if (!offset)
+                return;
+        if (offset < 0)
+                *pl_period <<= -offset;
+        else
+                *pl_period >>= offset;
+        *pl_shift = new_shift;
+}
+/*
+ * PERCPU
+ */
+int prop_local_init_percpu(struct prop_local_percpu *pl)
+{
+        spin_lock_init(&pl->lock);
+        pl->shift = 0;
+        pl->period = 0;
+        return percpu_counter_init_irq(&pl->events, 0);
+}
+void prop_local_destroy_percpu(struct prop_local_percpu *pl)
+{
+        percpu_counter_destroy(&pl->events);
+}
+/*
+ * Catch up with missed period expirations.
+ *
+ *   until (c_{j} == c)
+ *     x_{j} -= x_{j}/2;
+ *     c_{j}++;
+ */
+static
+void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
+{
+        unsigned long period = 1UL << (pg->shift - 1);
+        unsigned long period_mask = ~(period - 1);
+        unsigned long global_period;
+        unsigned long flags;
+        global_period = percpu_counter_read(&pg->events);
+        global_period &= period_mask;
+        /*
+         * Fast path - check if the local and global period count still match
+         * outside of the lock.
+         */
+        if (pl->period == global_period)
+                return;
+        spin_lock_irqsave(&pl->lock, flags);
+        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+        /*
+         * For each missed period, we half the local counter.
+         * basically:
+         *   pl->events >> (global_period - pl->period);
+         *
+         * but since the distributed nature of percpu counters make division
+         * rather hard, use a regular subtraction loop. This is safe, because
+         * the events will only every be incremented, hence the subtraction
+         * can never result in a negative number.
+         */
+        while (pl->period != global_period) {
+                unsigned long val = percpu_counter_read(&pl->events);
+                unsigned long half = (val + 1) >> 1;
+                /*
+                 * Half of zero won't be much less, break out.
+                 * This limits the loop to shift iterations, even
+                 * if we missed a million.
+                 */
+                if (!val)
+                        break;
+                percpu_counter_add(&pl->events, -half);
+                pl->period += period;
+        }
+        pl->period = global_period;
+        spin_unlock_irqrestore(&pl->lock, flags);
+}
+/*
+ *   ++x_{j}, ++t
+ */
+void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
+{
+        struct prop_global *pg = prop_get_global(pd);
+        prop_norm_percpu(pg, pl);
+        percpu_counter_add(&pl->events, 1);
+        percpu_counter_add(&pg->events, 1);
+        prop_put_global(pd, pg);
+}
+/*
+ * Obtain a fraction of this proportion
+ *
+ *   p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_percpu(struct prop_descriptor *pd,
+                struct prop_local_percpu *pl,
+                long *numerator, long *denominator)
+{
+        struct prop_global *pg = prop_get_global(pd);
+        unsigned long period_2 = 1UL << (pg->shift - 1);
+        unsigned long counter_mask = period_2 - 1;
+        unsigned long global_count;
+        prop_norm_percpu(pg, pl);
+        *numerator = percpu_counter_read_positive(&pl->events);
+        global_count = percpu_counter_read(&pg->events);
+        *denominator = period_2 + (global_count & counter_mask);
+        prop_put_global(pd, pg);
+}
+/*
+ * SINGLE
+ */
+int prop_local_init_single(struct prop_local_single *pl)
+{
+        spin_lock_init(&pl->lock);
+        pl->shift = 0;
+        pl->period = 0;
+        pl->events = 0;
+        return 0;
+}
+void prop_local_destroy_single(struct prop_local_single *pl)
+{
+}
+/*
+ * Catch up with missed period expirations.
+ */
+static
+void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
+{
+        unsigned long period = 1UL << (pg->shift - 1);
+        unsigned long period_mask = ~(period - 1);
+        unsigned long global_period;
+        unsigned long flags;
+        global_period = percpu_counter_read(&pg->events);
+        global_period &= period_mask;
+        /*
+         * Fast path - check if the local and global period count still match
+         * outside of the lock.
+         */
+        if (pl->period == global_period)
+                return;
+        spin_lock_irqsave(&pl->lock, flags);
+        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+        /*
+         * For each missed period, we half the local counter.
+         */
+        period = (global_period - pl->period) >> (pg->shift - 1);
+        if (likely(period < BITS_PER_LONG))
+                pl->events >>= period;
+        else
+                pl->events = 0;
+        pl->period = global_period;
+        spin_unlock_irqrestore(&pl->lock, flags);
+}
+/*
+ *   ++x_{j}, ++t
+ */
+void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
+{
+        struct prop_global *pg = prop_get_global(pd);
+        prop_norm_single(pg, pl);
+        pl->events++;
+        percpu_counter_add(&pg->events, 1);
+        prop_put_global(pd, pg);
+}
+/*
+ * Obtain a fraction of this proportion
+ *
+ *   p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_single(struct prop_descriptor *pd,
+                struct prop_local_single *pl,
+                long *numerator, long *denominator)
+{
+        struct prop_global *pg = prop_get_global(pd);
+        unsigned long period_2 = 1UL << (pg->shift - 1);
+        unsigned long counter_mask = period_2 - 1;
+        unsigned long global_count;
+        prop_norm_single(pg, pl);
+        *numerator = pl->events;
+        global_count = percpu_counter_read(&pg->events);
+        *denominator = period_2 + (global_count & counter_mask);
+        prop_put_global(pd, pg);
+}
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2007-10-17 02:25:49 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-10-17 11:42:45 -0400
commit	145ca25eb2fbd20d4faf1bad4628c7650332058f (patch)
tree	ee5bebd5b7b1f0ced5e30c8f29f1ff5301aa9d0a
parent	69cb51d18c1ed593009d9a620cac49d0dcf15dc8 (diff)

diff --git a/include/linux/proportions.h b/include/linux/proportions.h new file mode 100644 index 000000000000..2c3b3cad92be --- /dev/null +++ b/include/linux/proportions.h
@@ -0,0 +1,119 @@
		1	/*
		2	* FLoating proportions
		3	*
		4	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
		5	*
		6	* This file contains the public data structure and API definitions.
		7	*/
		8
		9	#ifndef _LINUX_PROPORTIONS_H
		10	#define _LINUX_PROPORTIONS_H
		11
		12	#include <linux/percpu_counter.h>
		13	#include <linux/spinlock.h>
		14	#include <linux/mutex.h>
		15
		16	struct prop_global {
		17	/*
		18	* The period over which we differentiate
		19	*
		20	* period = 2^shift
		21	*/
		22	int shift;
		23	/*
		24	* The total event counter aka 'time'.
		25	*
		26	* Treated as an unsigned long; the lower 'shift - 1' bits are the
		27	* counter bits, the remaining upper bits the period counter.
		28	*/
		29	struct percpu_counter events;
		30	};
		31
		32	/*
		33	* global proportion descriptor
		34	*
		35	* this is needed to consitently flip prop_global structures.
		36	*/
		37	struct prop_descriptor {
		38	int index;
		39	struct prop_global pg[2];
		40	struct mutex mutex; /* serialize the prop_global switch */
		41	};
		42
		43	int prop_descriptor_init(struct prop_descriptor *pd, int shift);
		44	void prop_change_shift(struct prop_descriptor *pd, int new_shift);
		45
		46	/*
		47	* ----- PERCPU ------
		48	*/
		49
		50	struct prop_local_percpu {
		51	/*
		52	* the local events counter
		53	*/
		54	struct percpu_counter events;
		55
		56	/*
		57	* snapshot of the last seen global state
		58	*/
		59	int shift;
		60	unsigned long period;
		61	spinlock_t lock; /* protect the snapshot state */
		62	};
		63
		64	int prop_local_init_percpu(struct prop_local_percpu *pl);
		65	void prop_local_destroy_percpu(struct prop_local_percpu *pl);
		66	void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl);
		67	void prop_fraction_percpu(struct prop_descriptor pd, struct prop_local_percpu pl,
		68	long numerator, long denominator);
		69
		70	static inline
		71	void prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
		72	{
		73	unsigned long flags;
		74
		75	local_irq_save(flags);
		76	__prop_inc_percpu(pd, pl);
		77	local_irq_restore(flags);
		78	}
		79
		80	/*
		81	* ----- SINGLE ------
		82	*/
		83
		84	struct prop_local_single {
		85	/*
		86	* the local events counter
		87	*/
		88	unsigned long events;
		89
		90	/*
		91	* snapshot of the last seen global state
		92	* and a lock protecting this state
		93	*/
		94	int shift;
		95	unsigned long period;
		96	spinlock_t lock; /* protect the snapshot state */
		97	};
		98
		99	#define INIT_PROP_LOCAL_SINGLE(name) \
		100	{ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
		101	}
		102
		103	int prop_local_init_single(struct prop_local_single *pl);
		104	void prop_local_destroy_single(struct prop_local_single *pl);
		105	void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl);
		106	void prop_fraction_single(struct prop_descriptor pd, struct prop_local_single pl,
		107	long numerator, long denominator);
		108
		109	static inline
		110	void prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)
		111	{
		112	unsigned long flags;
		113
		114	local_irq_save(flags);
		115	__prop_inc_single(pd, pl);
		116	local_irq_restore(flags);
		117	}
		118
		119	#endif /* _LINUX_PROPORTIONS_H */


diff --git a/lib/Makefile b/lib/Makefile index 6c4ea33bb2cb..c5f215d509d3 100644 --- a/lib/Makefile +++ b/lib/Makefile
@@ -5,7 +5,8 @@
5	lib-y := ctype.o string.o vsprintf.o cmdline.o \	5	lib-y := ctype.o string.o vsprintf.o cmdline.o \
6	rbtree.o radix-tree.o dump_stack.o \	6	rbtree.o radix-tree.o dump_stack.o \
7	idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \	7	idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
8	sha1.o irq_regs.o reciprocal_div.o argv_split.o	8	sha1.o irq_regs.o reciprocal_div.o argv_split.o \
		9	proportions.o
9		10
10	lib-$(CONFIG_MMU) += ioremap.o	11	lib-$(CONFIG_MMU) += ioremap.o
11	lib-$(CONFIG_SMP) += cpumask.o	12	lib-$(CONFIG_SMP) += cpumask.o


diff --git a/lib/proportions.c b/lib/proportions.c new file mode 100644 index 000000000000..332d8c58184d --- /dev/null +++ b/lib/proportions.c
@@ -0,0 +1,384 @@
		1	/*
		2	* Floating proportions
		3	*
		4	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
		5	*
		6	* Description:
		7	*
		8	* The floating proportion is a time derivative with an exponentially decaying
		9	* history:
		10	*
		11	* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
		12	*
		13	* Where j is an element from {prop_local}, x_{j} is j's number of events,
		14	* and i the time period over which the differential is taken. So d/dt_{-i} is
		15	* the differential over the i-th last period.
		16	*
		17	* The decaying history gives smooth transitions. The time differential carries
		18	* the notion of speed.
		19	*
		20	* The denominator is 2^(1+i) because we want the series to be normalised, ie.
		21	*
		22	* \Sum_{i=0} 1/2^(1+i) = 1
		23	*
		24	* Further more, if we measure time (t) in the same events as x; so that:
		25	*
		26	* t = \Sum_{j} x_{j}
		27	*
		28	* we get that:
		29	*
		30	* \Sum_{j} p_{j} = 1
		31	*
		32	* Writing this in an iterative fashion we get (dropping the 'd's):
		33	*
		34	* if (++x_{j}, ++t > period)
		35	* t /= 2;
		36	* for_each (j)
		37	* x_{j} /= 2;
		38	*
		39	* so that:
		40	*
		41	* p_{j} = x_{j} / t;
		42	*
		43	* We optimize away the '/= 2' for the global time delta by noting that:
		44	*
		45	* if (++t > period) t /= 2:
		46	*
		47	* Can be approximated by:
		48	*
		49	* period/2 + (++t % period/2)
		50	*
		51	* [ Furthermore, when we choose period to be 2^n it can be written in terms of
		52	* binary operations and wraparound artefacts disappear. ]
		53	*
		54	* Also note that this yields a natural counter of the elapsed periods:
		55	*
		56	* c = t / (period/2)
		57	*
		58	* [ Its monotonic increasing property can be applied to mitigate the wrap-
		59	* around issue. ]
		60	*
		61	* This allows us to do away with the loop over all prop_locals on each period
		62	* expiration. By remembering the period count under which it was last accessed
		63	* as c_{j}, we can obtain the number of 'missed' cycles from:
		64	*
		65	* c - c_{j}
		66	*
		67	* We can then lazily catch up to the global period count every time we are
		68	* going to use x_{j}, by doing:
		69	*
		70	* x_{j} /= 2^(c - c_{j}), c_{j} = c
		71	*/
		72
		73	#include <linux/proportions.h>
		74	#include <linux/rcupdate.h>
		75
		76	/*
		77	* Limit the time part in order to ensure there are some bits left for the
		78	* cycle counter.
		79	*/
		80	#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
		81
		82	int prop_descriptor_init(struct prop_descriptor *pd, int shift)
		83	{
		84	int err;
		85
		86	if (shift > PROP_MAX_SHIFT)
		87	shift = PROP_MAX_SHIFT;
		88
		89	pd->index = 0;
		90	pd->pg[0].shift = shift;
		91	mutex_init(&pd->mutex);
		92	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
		93	if (err)
		94	goto out;
		95
		96	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
		97	if (err)
		98	percpu_counter_destroy(&pd->pg[0].events);
		99
		100	out:
		101	return err;
		102	}
		103
		104	/*
		105	* We have two copies, and flip between them to make it seem like an atomic
		106	* update. The update is not really atomic wrt the events counter, but
		107	* it is internally consistent with the bit layout depending on shift.
		108	*
		109	* We copy the events count, move the bits around and flip the index.
		110	*/
		111	void prop_change_shift(struct prop_descriptor *pd, int shift)
		112	{
		113	int index;
		114	int offset;
		115	u64 events;
		116	unsigned long flags;
		117
		118	if (shift > PROP_MAX_SHIFT)
		119	shift = PROP_MAX_SHIFT;
		120
		121	mutex_lock(&pd->mutex);
		122
		123	index = pd->index ^ 1;
		124	offset = pd->pg[pd->index].shift - shift;
		125	if (!offset)
		126	goto out;
		127
		128	pd->pg[index].shift = shift;
		129
		130	local_irq_save(flags);
		131	events = percpu_counter_sum(&pd->pg[pd->index].events);
		132	if (offset < 0)
		133	events <<= -offset;
		134	else
		135	events >>= offset;
		136	percpu_counter_set(&pd->pg[index].events, events);
		137
		138	/*
		139	* ensure the new pg is fully written before the switch
		140	*/
		141	smp_wmb();
		142	pd->index = index;
		143	local_irq_restore(flags);
		144
		145	synchronize_rcu();
		146
		147	out:
		148	mutex_unlock(&pd->mutex);
		149	}
		150
		151	/*
		152	* wrap the access to the data in an rcu_read_lock() section;
		153	* this is used to track the active references.
		154	*/
		155	static struct prop_global prop_get_global(struct prop_descriptor pd)
		156	{
		157	int index;
		158
		159	rcu_read_lock();
		160	index = pd->index;
		161	/*
		162	* match the wmb from vcd_flip()
		163	*/
		164	smp_rmb();
		165	return &pd->pg[index];
		166	}
		167
		168	static void prop_put_global(struct prop_descriptor pd, struct prop_global pg)
		169	{
		170	rcu_read_unlock();
		171	}
		172
		173	static void
		174	prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
		175	{
		176	int offset = *pl_shift - new_shift;
		177
		178	if (!offset)
		179	return;
		180
		181	if (offset < 0)
		182	*pl_period <<= -offset;
		183	else
		184	*pl_period >>= offset;
		185
		186	*pl_shift = new_shift;
		187	}
		188
		189	/*
		190	* PERCPU
		191	*/
		192
		193	int prop_local_init_percpu(struct prop_local_percpu *pl)
		194	{
		195	spin_lock_init(&pl->lock);
		196	pl->shift = 0;
		197	pl->period = 0;
		198	return percpu_counter_init_irq(&pl->events, 0);
		199	}
		200
		201	void prop_local_destroy_percpu(struct prop_local_percpu *pl)
		202	{
		203	percpu_counter_destroy(&pl->events);
		204	}
		205
		206	/*
		207	* Catch up with missed period expirations.
		208	*
		209	* until (c_{j} == c)
		210	* x_{j} -= x_{j}/2;
		211	* c_{j}++;
		212	*/
		213	static
		214	void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
		215	{
		216	unsigned long period = 1UL << (pg->shift - 1);
		217	unsigned long period_mask = ~(period - 1);
		218	unsigned long global_period;
		219	unsigned long flags;
		220
		221	global_period = percpu_counter_read(&pg->events);
		222	global_period &= period_mask;
		223
		224	/*
		225	* Fast path - check if the local and global period count still match
		226	* outside of the lock.
		227	*/
		228	if (pl->period == global_period)
		229	return;
		230
		231	spin_lock_irqsave(&pl->lock, flags);
		232	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
		233	/*
		234	* For each missed period, we half the local counter.
		235	* basically:
		236	* pl->events >> (global_period - pl->period);
		237	*
		238	* but since the distributed nature of percpu counters make division
		239	* rather hard, use a regular subtraction loop. This is safe, because
		240	* the events will only every be incremented, hence the subtraction
		241	* can never result in a negative number.
		242	*/
		243	while (pl->period != global_period) {
		244	unsigned long val = percpu_counter_read(&pl->events);
		245	unsigned long half = (val + 1) >> 1;
		246
		247	/*
		248	* Half of zero won't be much less, break out.
		249	* This limits the loop to shift iterations, even
		250	* if we missed a million.
		251	*/
		252	if (!val)
		253	break;
		254
		255	percpu_counter_add(&pl->events, -half);
		256	pl->period += period;
		257	}
		258	pl->period = global_period;
		259	spin_unlock_irqrestore(&pl->lock, flags);
		260	}
		261
		262	/*
		263	* ++x_{j}, ++t
		264	*/
		265	void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
		266	{
		267	struct prop_global *pg = prop_get_global(pd);
		268
		269	prop_norm_percpu(pg, pl);
		270	percpu_counter_add(&pl->events, 1);
		271	percpu_counter_add(&pg->events, 1);
		272	prop_put_global(pd, pg);
		273	}
		274
		275	/*
		276	* Obtain a fraction of this proportion
		277	*
		278	* p_{j} = x_{j} / (period/2 + t % period/2)
		279	*/
		280	void prop_fraction_percpu(struct prop_descriptor *pd,
		281	struct prop_local_percpu *pl,
		282	long numerator, long denominator)
		283	{
		284	struct prop_global *pg = prop_get_global(pd);
		285	unsigned long period_2 = 1UL << (pg->shift - 1);
		286	unsigned long counter_mask = period_2 - 1;
		287	unsigned long global_count;
		288
		289	prop_norm_percpu(pg, pl);
		290	*numerator = percpu_counter_read_positive(&pl->events);
		291
		292	global_count = percpu_counter_read(&pg->events);
		293	*denominator = period_2 + (global_count & counter_mask);
		294
		295	prop_put_global(pd, pg);
		296	}
		297
		298	/*
		299	* SINGLE
		300	*/
		301
		302	int prop_local_init_single(struct prop_local_single *pl)
		303	{
		304	spin_lock_init(&pl->lock);
		305	pl->shift = 0;
		306	pl->period = 0;
		307	pl->events = 0;
		308	return 0;
		309	}
		310
		311	void prop_local_destroy_single(struct prop_local_single *pl)
		312	{
		313	}
		314
		315	/*
		316	* Catch up with missed period expirations.
		317	*/
		318	static
		319	void prop_norm_single(struct prop_global pg, struct prop_local_single pl)
		320	{
		321	unsigned long period = 1UL << (pg->shift - 1);
		322	unsigned long period_mask = ~(period - 1);
		323	unsigned long global_period;
		324	unsigned long flags;
		325
		326	global_period = percpu_counter_read(&pg->events);
		327	global_period &= period_mask;
		328
		329	/*
		330	* Fast path - check if the local and global period count still match
		331	* outside of the lock.
		332	*/
		333	if (pl->period == global_period)
		334	return;
		335
		336	spin_lock_irqsave(&pl->lock, flags);
		337	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
		338	/*
		339	* For each missed period, we half the local counter.
		340	*/
		341	period = (global_period - pl->period) >> (pg->shift - 1);
		342	if (likely(period < BITS_PER_LONG))
		343	pl->events >>= period;
		344	else
		345	pl->events = 0;
		346	pl->period = global_period;
		347	spin_unlock_irqrestore(&pl->lock, flags);
		348	}
		349
		350	/*
		351	* ++x_{j}, ++t
		352	*/
		353	void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)
		354	{
		355	struct prop_global *pg = prop_get_global(pd);
		356
		357	prop_norm_single(pg, pl);
		358	pl->events++;
		359	percpu_counter_add(&pg->events, 1);
		360	prop_put_global(pd, pg);
		361	}
		362
		363	/*
		364	* Obtain a fraction of this proportion
		365	*
		366	* p_{j} = x_{j} / (period/2 + t % period/2)
		367	*/
		368	void prop_fraction_single(struct prop_descriptor *pd,
		369	struct prop_local_single *pl,
		370	long numerator, long denominator)
		371	{
		372	struct prop_global *pg = prop_get_global(pd);
		373	unsigned long period_2 = 1UL << (pg->shift - 1);
		374	unsigned long counter_mask = period_2 - 1;
		375	unsigned long global_count;
		376
		377	prop_norm_single(pg, pl);
		378	*numerator = pl->events;
		379
		380	global_count = percpu_counter_read(&pg->events);
		381	*denominator = period_2 + (global_count & counter_mask);
		382
		383	prop_put_global(pd, pg);
		384	}