aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2007-10-17 02:25:49 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 11:42:45 -0400
commit145ca25eb2fbd20d4faf1bad4628c7650332058f (patch)
treeee5bebd5b7b1f0ced5e30c8f29f1ff5301aa9d0a
parent69cb51d18c1ed593009d9a620cac49d0dcf15dc8 (diff)
lib: floating proportions
Given a set of objects, floating proportions aims to efficiently give the proportional 'activity' of a single item as compared to the whole set. Where 'activity' is a measure of a temporal property of the items. It is efficient in that it need not inspect any other items of the set in order to provide the answer. It is not even needed to know how many other items there are. It has one parameter, and that is the period of 'time' over which the 'activity' is measured. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/proportions.h119
-rw-r--r--lib/Makefile3
-rw-r--r--lib/proportions.c384
3 files changed, 505 insertions, 1 deletions
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
new file mode 100644
index 000000000000..2c3b3cad92be
--- /dev/null
+++ b/include/linux/proportions.h
@@ -0,0 +1,119 @@
1/*
2 * FLoating proportions
3 *
4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 * This file contains the public data structure and API definitions.
7 */
8
9#ifndef _LINUX_PROPORTIONS_H
10#define _LINUX_PROPORTIONS_H
11
12#include <linux/percpu_counter.h>
13#include <linux/spinlock.h>
14#include <linux/mutex.h>
15
16struct prop_global {
17 /*
18 * The period over which we differentiate
19 *
20 * period = 2^shift
21 */
22 int shift;
23 /*
24 * The total event counter aka 'time'.
25 *
26 * Treated as an unsigned long; the lower 'shift - 1' bits are the
27 * counter bits, the remaining upper bits the period counter.
28 */
29 struct percpu_counter events;
30};
31
32/*
33 * global proportion descriptor
34 *
35 * this is needed to consitently flip prop_global structures.
36 */
37struct prop_descriptor {
38 int index;
39 struct prop_global pg[2];
40 struct mutex mutex; /* serialize the prop_global switch */
41};
42
43int prop_descriptor_init(struct prop_descriptor *pd, int shift);
44void prop_change_shift(struct prop_descriptor *pd, int new_shift);
45
46/*
47 * ----- PERCPU ------
48 */
49
50struct prop_local_percpu {
51 /*
52 * the local events counter
53 */
54 struct percpu_counter events;
55
56 /*
57 * snapshot of the last seen global state
58 */
59 int shift;
60 unsigned long period;
61 spinlock_t lock; /* protect the snapshot state */
62};
63
64int prop_local_init_percpu(struct prop_local_percpu *pl);
65void prop_local_destroy_percpu(struct prop_local_percpu *pl);
66void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
67void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
68 long *numerator, long *denominator);
69
70static inline
71void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
72{
73 unsigned long flags;
74
75 local_irq_save(flags);
76 __prop_inc_percpu(pd, pl);
77 local_irq_restore(flags);
78}
79
80/*
81 * ----- SINGLE ------
82 */
83
84struct prop_local_single {
85 /*
86 * the local events counter
87 */
88 unsigned long events;
89
90 /*
91 * snapshot of the last seen global state
92 * and a lock protecting this state
93 */
94 int shift;
95 unsigned long period;
96 spinlock_t lock; /* protect the snapshot state */
97};
98
99#define INIT_PROP_LOCAL_SINGLE(name) \
100{ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
101}
102
103int prop_local_init_single(struct prop_local_single *pl);
104void prop_local_destroy_single(struct prop_local_single *pl);
105void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
106void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
107 long *numerator, long *denominator);
108
109static inline
110void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
111{
112 unsigned long flags;
113
114 local_irq_save(flags);
115 __prop_inc_single(pd, pl);
116 local_irq_restore(flags);
117}
118
119#endif /* _LINUX_PROPORTIONS_H */
diff --git a/lib/Makefile b/lib/Makefile
index 6c4ea33bb2cb..c5f215d509d3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -5,7 +5,8 @@
5lib-y := ctype.o string.o vsprintf.o cmdline.o \ 5lib-y := ctype.o string.o vsprintf.o cmdline.o \
6 rbtree.o radix-tree.o dump_stack.o \ 6 rbtree.o radix-tree.o dump_stack.o \
7 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ 7 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
8 sha1.o irq_regs.o reciprocal_div.o argv_split.o 8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
9 proportions.o
9 10
10lib-$(CONFIG_MMU) += ioremap.o 11lib-$(CONFIG_MMU) += ioremap.o
11lib-$(CONFIG_SMP) += cpumask.o 12lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/proportions.c b/lib/proportions.c
new file mode 100644
index 000000000000..332d8c58184d
--- /dev/null
+++ b/lib/proportions.c
@@ -0,0 +1,384 @@
1/*
2 * Floating proportions
3 *
4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 * Description:
7 *
8 * The floating proportion is a time derivative with an exponentially decaying
9 * history:
10 *
11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
12 *
13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
14 * and i the time period over which the differential is taken. So d/dt_{-i} is
15 * the differential over the i-th last period.
16 *
17 * The decaying history gives smooth transitions. The time differential carries
18 * the notion of speed.
19 *
20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
21 *
22 * \Sum_{i=0} 1/2^(1+i) = 1
23 *
24 * Further more, if we measure time (t) in the same events as x; so that:
25 *
26 * t = \Sum_{j} x_{j}
27 *
28 * we get that:
29 *
30 * \Sum_{j} p_{j} = 1
31 *
32 * Writing this in an iterative fashion we get (dropping the 'd's):
33 *
34 * if (++x_{j}, ++t > period)
35 * t /= 2;
36 * for_each (j)
37 * x_{j} /= 2;
38 *
39 * so that:
40 *
41 * p_{j} = x_{j} / t;
42 *
43 * We optimize away the '/= 2' for the global time delta by noting that:
44 *
45 * if (++t > period) t /= 2:
46 *
47 * Can be approximated by:
48 *
49 * period/2 + (++t % period/2)
50 *
51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52 * binary operations and wraparound artefacts disappear. ]
53 *
54 * Also note that this yields a natural counter of the elapsed periods:
55 *
56 * c = t / (period/2)
57 *
58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
59 * around issue. ]
60 *
61 * This allows us to do away with the loop over all prop_locals on each period
62 * expiration. By remembering the period count under which it was last accessed
63 * as c_{j}, we can obtain the number of 'missed' cycles from:
64 *
65 * c - c_{j}
66 *
67 * We can then lazily catch up to the global period count every time we are
68 * going to use x_{j}, by doing:
69 *
70 * x_{j} /= 2^(c - c_{j}), c_{j} = c
71 */
72
73#include <linux/proportions.h>
74#include <linux/rcupdate.h>
75
76/*
77 * Limit the time part in order to ensure there are some bits left for the
78 * cycle counter.
79 */
80#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
81
82int prop_descriptor_init(struct prop_descriptor *pd, int shift)
83{
84 int err;
85
86 if (shift > PROP_MAX_SHIFT)
87 shift = PROP_MAX_SHIFT;
88
89 pd->index = 0;
90 pd->pg[0].shift = shift;
91 mutex_init(&pd->mutex);
92 err = percpu_counter_init_irq(&pd->pg[0].events, 0);
93 if (err)
94 goto out;
95
96 err = percpu_counter_init_irq(&pd->pg[1].events, 0);
97 if (err)
98 percpu_counter_destroy(&pd->pg[0].events);
99
100out:
101 return err;
102}
103
104/*
105 * We have two copies, and flip between them to make it seem like an atomic
106 * update. The update is not really atomic wrt the events counter, but
107 * it is internally consistent with the bit layout depending on shift.
108 *
109 * We copy the events count, move the bits around and flip the index.
110 */
111void prop_change_shift(struct prop_descriptor *pd, int shift)
112{
113 int index;
114 int offset;
115 u64 events;
116 unsigned long flags;
117
118 if (shift > PROP_MAX_SHIFT)
119 shift = PROP_MAX_SHIFT;
120
121 mutex_lock(&pd->mutex);
122
123 index = pd->index ^ 1;
124 offset = pd->pg[pd->index].shift - shift;
125 if (!offset)
126 goto out;
127
128 pd->pg[index].shift = shift;
129
130 local_irq_save(flags);
131 events = percpu_counter_sum(&pd->pg[pd->index].events);
132 if (offset < 0)
133 events <<= -offset;
134 else
135 events >>= offset;
136 percpu_counter_set(&pd->pg[index].events, events);
137
138 /*
139 * ensure the new pg is fully written before the switch
140 */
141 smp_wmb();
142 pd->index = index;
143 local_irq_restore(flags);
144
145 synchronize_rcu();
146
147out:
148 mutex_unlock(&pd->mutex);
149}
150
151/*
152 * wrap the access to the data in an rcu_read_lock() section;
153 * this is used to track the active references.
154 */
155static struct prop_global *prop_get_global(struct prop_descriptor *pd)
156{
157 int index;
158
159 rcu_read_lock();
160 index = pd->index;
161 /*
162 * match the wmb from vcd_flip()
163 */
164 smp_rmb();
165 return &pd->pg[index];
166}
167
168static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
169{
170 rcu_read_unlock();
171}
172
173static void
174prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
175{
176 int offset = *pl_shift - new_shift;
177
178 if (!offset)
179 return;
180
181 if (offset < 0)
182 *pl_period <<= -offset;
183 else
184 *pl_period >>= offset;
185
186 *pl_shift = new_shift;
187}
188
189/*
190 * PERCPU
191 */
192
193int prop_local_init_percpu(struct prop_local_percpu *pl)
194{
195 spin_lock_init(&pl->lock);
196 pl->shift = 0;
197 pl->period = 0;
198 return percpu_counter_init_irq(&pl->events, 0);
199}
200
201void prop_local_destroy_percpu(struct prop_local_percpu *pl)
202{
203 percpu_counter_destroy(&pl->events);
204}
205
206/*
207 * Catch up with missed period expirations.
208 *
209 * until (c_{j} == c)
210 * x_{j} -= x_{j}/2;
211 * c_{j}++;
212 */
213static
214void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
215{
216 unsigned long period = 1UL << (pg->shift - 1);
217 unsigned long period_mask = ~(period - 1);
218 unsigned long global_period;
219 unsigned long flags;
220
221 global_period = percpu_counter_read(&pg->events);
222 global_period &= period_mask;
223
224 /*
225 * Fast path - check if the local and global period count still match
226 * outside of the lock.
227 */
228 if (pl->period == global_period)
229 return;
230
231 spin_lock_irqsave(&pl->lock, flags);
232 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
233 /*
234 * For each missed period, we half the local counter.
235 * basically:
236 * pl->events >> (global_period - pl->period);
237 *
238 * but since the distributed nature of percpu counters make division
239 * rather hard, use a regular subtraction loop. This is safe, because
240 * the events will only every be incremented, hence the subtraction
241 * can never result in a negative number.
242 */
243 while (pl->period != global_period) {
244 unsigned long val = percpu_counter_read(&pl->events);
245 unsigned long half = (val + 1) >> 1;
246
247 /*
248 * Half of zero won't be much less, break out.
249 * This limits the loop to shift iterations, even
250 * if we missed a million.
251 */
252 if (!val)
253 break;
254
255 percpu_counter_add(&pl->events, -half);
256 pl->period += period;
257 }
258 pl->period = global_period;
259 spin_unlock_irqrestore(&pl->lock, flags);
260}
261
262/*
263 * ++x_{j}, ++t
264 */
265void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
266{
267 struct prop_global *pg = prop_get_global(pd);
268
269 prop_norm_percpu(pg, pl);
270 percpu_counter_add(&pl->events, 1);
271 percpu_counter_add(&pg->events, 1);
272 prop_put_global(pd, pg);
273}
274
275/*
276 * Obtain a fraction of this proportion
277 *
278 * p_{j} = x_{j} / (period/2 + t % period/2)
279 */
280void prop_fraction_percpu(struct prop_descriptor *pd,
281 struct prop_local_percpu *pl,
282 long *numerator, long *denominator)
283{
284 struct prop_global *pg = prop_get_global(pd);
285 unsigned long period_2 = 1UL << (pg->shift - 1);
286 unsigned long counter_mask = period_2 - 1;
287 unsigned long global_count;
288
289 prop_norm_percpu(pg, pl);
290 *numerator = percpu_counter_read_positive(&pl->events);
291
292 global_count = percpu_counter_read(&pg->events);
293 *denominator = period_2 + (global_count & counter_mask);
294
295 prop_put_global(pd, pg);
296}
297
298/*
299 * SINGLE
300 */
301
302int prop_local_init_single(struct prop_local_single *pl)
303{
304 spin_lock_init(&pl->lock);
305 pl->shift = 0;
306 pl->period = 0;
307 pl->events = 0;
308 return 0;
309}
310
311void prop_local_destroy_single(struct prop_local_single *pl)
312{
313}
314
315/*
316 * Catch up with missed period expirations.
317 */
318static
319void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
320{
321 unsigned long period = 1UL << (pg->shift - 1);
322 unsigned long period_mask = ~(period - 1);
323 unsigned long global_period;
324 unsigned long flags;
325
326 global_period = percpu_counter_read(&pg->events);
327 global_period &= period_mask;
328
329 /*
330 * Fast path - check if the local and global period count still match
331 * outside of the lock.
332 */
333 if (pl->period == global_period)
334 return;
335
336 spin_lock_irqsave(&pl->lock, flags);
337 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
338 /*
339 * For each missed period, we half the local counter.
340 */
341 period = (global_period - pl->period) >> (pg->shift - 1);
342 if (likely(period < BITS_PER_LONG))
343 pl->events >>= period;
344 else
345 pl->events = 0;
346 pl->period = global_period;
347 spin_unlock_irqrestore(&pl->lock, flags);
348}
349
350/*
351 * ++x_{j}, ++t
352 */
353void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
354{
355 struct prop_global *pg = prop_get_global(pd);
356
357 prop_norm_single(pg, pl);
358 pl->events++;
359 percpu_counter_add(&pg->events, 1);
360 prop_put_global(pd, pg);
361}
362
363/*
364 * Obtain a fraction of this proportion
365 *
366 * p_{j} = x_{j} / (period/2 + t % period/2)
367 */
368void prop_fraction_single(struct prop_descriptor *pd,
369 struct prop_local_single *pl,
370 long *numerator, long *denominator)
371{
372 struct prop_global *pg = prop_get_global(pd);
373 unsigned long period_2 = 1UL << (pg->shift - 1);
374 unsigned long counter_mask = period_2 - 1;
375 unsigned long global_count;
376
377 prop_norm_single(pg, pl);
378 *numerator = pl->events;
379
380 global_count = percpu_counter_read(&pg->events);
381 *denominator = period_2 + (global_count & counter_mask);
382
383 prop_put_global(pd, pg);
384}