diff options
-rw-r--r-- | include/linux/proportions.h | 119 | ||||
-rw-r--r-- | lib/Makefile | 3 | ||||
-rw-r--r-- | lib/proportions.c | 384 |
3 files changed, 505 insertions, 1 deletions
diff --git a/include/linux/proportions.h b/include/linux/proportions.h new file mode 100644 index 000000000000..2c3b3cad92be --- /dev/null +++ b/include/linux/proportions.h | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * FLoating proportions | ||
3 | * | ||
4 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
5 | * | ||
6 | * This file contains the public data structure and API definitions. | ||
7 | */ | ||
8 | |||
9 | #ifndef _LINUX_PROPORTIONS_H | ||
10 | #define _LINUX_PROPORTIONS_H | ||
11 | |||
12 | #include <linux/percpu_counter.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/mutex.h> | ||
15 | |||
16 | struct prop_global { | ||
17 | /* | ||
18 | * The period over which we differentiate | ||
19 | * | ||
20 | * period = 2^shift | ||
21 | */ | ||
22 | int shift; | ||
23 | /* | ||
24 | * The total event counter aka 'time'. | ||
25 | * | ||
26 | * Treated as an unsigned long; the lower 'shift - 1' bits are the | ||
27 | * counter bits, the remaining upper bits the period counter. | ||
28 | */ | ||
29 | struct percpu_counter events; | ||
30 | }; | ||
31 | |||
32 | /* | ||
33 | * global proportion descriptor | ||
34 | * | ||
35 | * this is needed to consitently flip prop_global structures. | ||
36 | */ | ||
37 | struct prop_descriptor { | ||
38 | int index; | ||
39 | struct prop_global pg[2]; | ||
40 | struct mutex mutex; /* serialize the prop_global switch */ | ||
41 | }; | ||
42 | |||
43 | int prop_descriptor_init(struct prop_descriptor *pd, int shift); | ||
44 | void prop_change_shift(struct prop_descriptor *pd, int new_shift); | ||
45 | |||
46 | /* | ||
47 | * ----- PERCPU ------ | ||
48 | */ | ||
49 | |||
50 | struct prop_local_percpu { | ||
51 | /* | ||
52 | * the local events counter | ||
53 | */ | ||
54 | struct percpu_counter events; | ||
55 | |||
56 | /* | ||
57 | * snapshot of the last seen global state | ||
58 | */ | ||
59 | int shift; | ||
60 | unsigned long period; | ||
61 | spinlock_t lock; /* protect the snapshot state */ | ||
62 | }; | ||
63 | |||
64 | int prop_local_init_percpu(struct prop_local_percpu *pl); | ||
65 | void prop_local_destroy_percpu(struct prop_local_percpu *pl); | ||
66 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl); | ||
67 | void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl, | ||
68 | long *numerator, long *denominator); | ||
69 | |||
70 | static inline | ||
71 | void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | ||
72 | { | ||
73 | unsigned long flags; | ||
74 | |||
75 | local_irq_save(flags); | ||
76 | __prop_inc_percpu(pd, pl); | ||
77 | local_irq_restore(flags); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * ----- SINGLE ------ | ||
82 | */ | ||
83 | |||
84 | struct prop_local_single { | ||
85 | /* | ||
86 | * the local events counter | ||
87 | */ | ||
88 | unsigned long events; | ||
89 | |||
90 | /* | ||
91 | * snapshot of the last seen global state | ||
92 | * and a lock protecting this state | ||
93 | */ | ||
94 | int shift; | ||
95 | unsigned long period; | ||
96 | spinlock_t lock; /* protect the snapshot state */ | ||
97 | }; | ||
98 | |||
99 | #define INIT_PROP_LOCAL_SINGLE(name) \ | ||
100 | { .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ | ||
101 | } | ||
102 | |||
103 | int prop_local_init_single(struct prop_local_single *pl); | ||
104 | void prop_local_destroy_single(struct prop_local_single *pl); | ||
105 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl); | ||
106 | void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl, | ||
107 | long *numerator, long *denominator); | ||
108 | |||
109 | static inline | ||
110 | void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | ||
111 | { | ||
112 | unsigned long flags; | ||
113 | |||
114 | local_irq_save(flags); | ||
115 | __prop_inc_single(pd, pl); | ||
116 | local_irq_restore(flags); | ||
117 | } | ||
118 | |||
119 | #endif /* _LINUX_PROPORTIONS_H */ | ||
diff --git a/lib/Makefile b/lib/Makefile index 6c4ea33bb2cb..c5f215d509d3 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -5,7 +5,8 @@ | |||
5 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ | 5 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ |
6 | rbtree.o radix-tree.o dump_stack.o \ | 6 | rbtree.o radix-tree.o dump_stack.o \ |
7 | idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ | 7 | idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ |
8 | sha1.o irq_regs.o reciprocal_div.o argv_split.o | 8 | sha1.o irq_regs.o reciprocal_div.o argv_split.o \ |
9 | proportions.o | ||
9 | 10 | ||
10 | lib-$(CONFIG_MMU) += ioremap.o | 11 | lib-$(CONFIG_MMU) += ioremap.o |
11 | lib-$(CONFIG_SMP) += cpumask.o | 12 | lib-$(CONFIG_SMP) += cpumask.o |
diff --git a/lib/proportions.c b/lib/proportions.c new file mode 100644 index 000000000000..332d8c58184d --- /dev/null +++ b/lib/proportions.c | |||
@@ -0,0 +1,384 @@ | |||
1 | /* | ||
2 | * Floating proportions | ||
3 | * | ||
4 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
5 | * | ||
6 | * Description: | ||
7 | * | ||
8 | * The floating proportion is a time derivative with an exponentially decaying | ||
9 | * history: | ||
10 | * | ||
11 | * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | ||
12 | * | ||
13 | * Where j is an element from {prop_local}, x_{j} is j's number of events, | ||
14 | * and i the time period over which the differential is taken. So d/dt_{-i} is | ||
15 | * the differential over the i-th last period. | ||
16 | * | ||
17 | * The decaying history gives smooth transitions. The time differential carries | ||
18 | * the notion of speed. | ||
19 | * | ||
20 | * The denominator is 2^(1+i) because we want the series to be normalised, ie. | ||
21 | * | ||
22 | * \Sum_{i=0} 1/2^(1+i) = 1 | ||
23 | * | ||
24 | * Further more, if we measure time (t) in the same events as x; so that: | ||
25 | * | ||
26 | * t = \Sum_{j} x_{j} | ||
27 | * | ||
28 | * we get that: | ||
29 | * | ||
30 | * \Sum_{j} p_{j} = 1 | ||
31 | * | ||
32 | * Writing this in an iterative fashion we get (dropping the 'd's): | ||
33 | * | ||
34 | * if (++x_{j}, ++t > period) | ||
35 | * t /= 2; | ||
36 | * for_each (j) | ||
37 | * x_{j} /= 2; | ||
38 | * | ||
39 | * so that: | ||
40 | * | ||
41 | * p_{j} = x_{j} / t; | ||
42 | * | ||
43 | * We optimize away the '/= 2' for the global time delta by noting that: | ||
44 | * | ||
45 | * if (++t > period) t /= 2: | ||
46 | * | ||
47 | * Can be approximated by: | ||
48 | * | ||
49 | * period/2 + (++t % period/2) | ||
50 | * | ||
51 | * [ Furthermore, when we choose period to be 2^n it can be written in terms of | ||
52 | * binary operations and wraparound artefacts disappear. ] | ||
53 | * | ||
54 | * Also note that this yields a natural counter of the elapsed periods: | ||
55 | * | ||
56 | * c = t / (period/2) | ||
57 | * | ||
58 | * [ Its monotonic increasing property can be applied to mitigate the wrap- | ||
59 | * around issue. ] | ||
60 | * | ||
61 | * This allows us to do away with the loop over all prop_locals on each period | ||
62 | * expiration. By remembering the period count under which it was last accessed | ||
63 | * as c_{j}, we can obtain the number of 'missed' cycles from: | ||
64 | * | ||
65 | * c - c_{j} | ||
66 | * | ||
67 | * We can then lazily catch up to the global period count every time we are | ||
68 | * going to use x_{j}, by doing: | ||
69 | * | ||
70 | * x_{j} /= 2^(c - c_{j}), c_{j} = c | ||
71 | */ | ||
72 | |||
73 | #include <linux/proportions.h> | ||
74 | #include <linux/rcupdate.h> | ||
75 | |||
76 | /* | ||
77 | * Limit the time part in order to ensure there are some bits left for the | ||
78 | * cycle counter. | ||
79 | */ | ||
80 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | ||
81 | |||
82 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | ||
83 | { | ||
84 | int err; | ||
85 | |||
86 | if (shift > PROP_MAX_SHIFT) | ||
87 | shift = PROP_MAX_SHIFT; | ||
88 | |||
89 | pd->index = 0; | ||
90 | pd->pg[0].shift = shift; | ||
91 | mutex_init(&pd->mutex); | ||
92 | err = percpu_counter_init_irq(&pd->pg[0].events, 0); | ||
93 | if (err) | ||
94 | goto out; | ||
95 | |||
96 | err = percpu_counter_init_irq(&pd->pg[1].events, 0); | ||
97 | if (err) | ||
98 | percpu_counter_destroy(&pd->pg[0].events); | ||
99 | |||
100 | out: | ||
101 | return err; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * We have two copies, and flip between them to make it seem like an atomic | ||
106 | * update. The update is not really atomic wrt the events counter, but | ||
107 | * it is internally consistent with the bit layout depending on shift. | ||
108 | * | ||
109 | * We copy the events count, move the bits around and flip the index. | ||
110 | */ | ||
111 | void prop_change_shift(struct prop_descriptor *pd, int shift) | ||
112 | { | ||
113 | int index; | ||
114 | int offset; | ||
115 | u64 events; | ||
116 | unsigned long flags; | ||
117 | |||
118 | if (shift > PROP_MAX_SHIFT) | ||
119 | shift = PROP_MAX_SHIFT; | ||
120 | |||
121 | mutex_lock(&pd->mutex); | ||
122 | |||
123 | index = pd->index ^ 1; | ||
124 | offset = pd->pg[pd->index].shift - shift; | ||
125 | if (!offset) | ||
126 | goto out; | ||
127 | |||
128 | pd->pg[index].shift = shift; | ||
129 | |||
130 | local_irq_save(flags); | ||
131 | events = percpu_counter_sum(&pd->pg[pd->index].events); | ||
132 | if (offset < 0) | ||
133 | events <<= -offset; | ||
134 | else | ||
135 | events >>= offset; | ||
136 | percpu_counter_set(&pd->pg[index].events, events); | ||
137 | |||
138 | /* | ||
139 | * ensure the new pg is fully written before the switch | ||
140 | */ | ||
141 | smp_wmb(); | ||
142 | pd->index = index; | ||
143 | local_irq_restore(flags); | ||
144 | |||
145 | synchronize_rcu(); | ||
146 | |||
147 | out: | ||
148 | mutex_unlock(&pd->mutex); | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * wrap the access to the data in an rcu_read_lock() section; | ||
153 | * this is used to track the active references. | ||
154 | */ | ||
155 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | ||
156 | { | ||
157 | int index; | ||
158 | |||
159 | rcu_read_lock(); | ||
160 | index = pd->index; | ||
161 | /* | ||
162 | * match the wmb from vcd_flip() | ||
163 | */ | ||
164 | smp_rmb(); | ||
165 | return &pd->pg[index]; | ||
166 | } | ||
167 | |||
168 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | ||
169 | { | ||
170 | rcu_read_unlock(); | ||
171 | } | ||
172 | |||
173 | static void | ||
174 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | ||
175 | { | ||
176 | int offset = *pl_shift - new_shift; | ||
177 | |||
178 | if (!offset) | ||
179 | return; | ||
180 | |||
181 | if (offset < 0) | ||
182 | *pl_period <<= -offset; | ||
183 | else | ||
184 | *pl_period >>= offset; | ||
185 | |||
186 | *pl_shift = new_shift; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * PERCPU | ||
191 | */ | ||
192 | |||
193 | int prop_local_init_percpu(struct prop_local_percpu *pl) | ||
194 | { | ||
195 | spin_lock_init(&pl->lock); | ||
196 | pl->shift = 0; | ||
197 | pl->period = 0; | ||
198 | return percpu_counter_init_irq(&pl->events, 0); | ||
199 | } | ||
200 | |||
201 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | ||
202 | { | ||
203 | percpu_counter_destroy(&pl->events); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Catch up with missed period expirations. | ||
208 | * | ||
209 | * until (c_{j} == c) | ||
210 | * x_{j} -= x_{j}/2; | ||
211 | * c_{j}++; | ||
212 | */ | ||
213 | static | ||
214 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | ||
215 | { | ||
216 | unsigned long period = 1UL << (pg->shift - 1); | ||
217 | unsigned long period_mask = ~(period - 1); | ||
218 | unsigned long global_period; | ||
219 | unsigned long flags; | ||
220 | |||
221 | global_period = percpu_counter_read(&pg->events); | ||
222 | global_period &= period_mask; | ||
223 | |||
224 | /* | ||
225 | * Fast path - check if the local and global period count still match | ||
226 | * outside of the lock. | ||
227 | */ | ||
228 | if (pl->period == global_period) | ||
229 | return; | ||
230 | |||
231 | spin_lock_irqsave(&pl->lock, flags); | ||
232 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | ||
233 | /* | ||
234 | * For each missed period, we half the local counter. | ||
235 | * basically: | ||
236 | * pl->events >> (global_period - pl->period); | ||
237 | * | ||
238 | * but since the distributed nature of percpu counters make division | ||
239 | * rather hard, use a regular subtraction loop. This is safe, because | ||
240 | * the events will only every be incremented, hence the subtraction | ||
241 | * can never result in a negative number. | ||
242 | */ | ||
243 | while (pl->period != global_period) { | ||
244 | unsigned long val = percpu_counter_read(&pl->events); | ||
245 | unsigned long half = (val + 1) >> 1; | ||
246 | |||
247 | /* | ||
248 | * Half of zero won't be much less, break out. | ||
249 | * This limits the loop to shift iterations, even | ||
250 | * if we missed a million. | ||
251 | */ | ||
252 | if (!val) | ||
253 | break; | ||
254 | |||
255 | percpu_counter_add(&pl->events, -half); | ||
256 | pl->period += period; | ||
257 | } | ||
258 | pl->period = global_period; | ||
259 | spin_unlock_irqrestore(&pl->lock, flags); | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * ++x_{j}, ++t | ||
264 | */ | ||
265 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | ||
266 | { | ||
267 | struct prop_global *pg = prop_get_global(pd); | ||
268 | |||
269 | prop_norm_percpu(pg, pl); | ||
270 | percpu_counter_add(&pl->events, 1); | ||
271 | percpu_counter_add(&pg->events, 1); | ||
272 | prop_put_global(pd, pg); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Obtain a fraction of this proportion | ||
277 | * | ||
278 | * p_{j} = x_{j} / (period/2 + t % period/2) | ||
279 | */ | ||
280 | void prop_fraction_percpu(struct prop_descriptor *pd, | ||
281 | struct prop_local_percpu *pl, | ||
282 | long *numerator, long *denominator) | ||
283 | { | ||
284 | struct prop_global *pg = prop_get_global(pd); | ||
285 | unsigned long period_2 = 1UL << (pg->shift - 1); | ||
286 | unsigned long counter_mask = period_2 - 1; | ||
287 | unsigned long global_count; | ||
288 | |||
289 | prop_norm_percpu(pg, pl); | ||
290 | *numerator = percpu_counter_read_positive(&pl->events); | ||
291 | |||
292 | global_count = percpu_counter_read(&pg->events); | ||
293 | *denominator = period_2 + (global_count & counter_mask); | ||
294 | |||
295 | prop_put_global(pd, pg); | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * SINGLE | ||
300 | */ | ||
301 | |||
302 | int prop_local_init_single(struct prop_local_single *pl) | ||
303 | { | ||
304 | spin_lock_init(&pl->lock); | ||
305 | pl->shift = 0; | ||
306 | pl->period = 0; | ||
307 | pl->events = 0; | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | void prop_local_destroy_single(struct prop_local_single *pl) | ||
312 | { | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Catch up with missed period expirations. | ||
317 | */ | ||
318 | static | ||
319 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | ||
320 | { | ||
321 | unsigned long period = 1UL << (pg->shift - 1); | ||
322 | unsigned long period_mask = ~(period - 1); | ||
323 | unsigned long global_period; | ||
324 | unsigned long flags; | ||
325 | |||
326 | global_period = percpu_counter_read(&pg->events); | ||
327 | global_period &= period_mask; | ||
328 | |||
329 | /* | ||
330 | * Fast path - check if the local and global period count still match | ||
331 | * outside of the lock. | ||
332 | */ | ||
333 | if (pl->period == global_period) | ||
334 | return; | ||
335 | |||
336 | spin_lock_irqsave(&pl->lock, flags); | ||
337 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | ||
338 | /* | ||
339 | * For each missed period, we half the local counter. | ||
340 | */ | ||
341 | period = (global_period - pl->period) >> (pg->shift - 1); | ||
342 | if (likely(period < BITS_PER_LONG)) | ||
343 | pl->events >>= period; | ||
344 | else | ||
345 | pl->events = 0; | ||
346 | pl->period = global_period; | ||
347 | spin_unlock_irqrestore(&pl->lock, flags); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * ++x_{j}, ++t | ||
352 | */ | ||
353 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | ||
354 | { | ||
355 | struct prop_global *pg = prop_get_global(pd); | ||
356 | |||
357 | prop_norm_single(pg, pl); | ||
358 | pl->events++; | ||
359 | percpu_counter_add(&pg->events, 1); | ||
360 | prop_put_global(pd, pg); | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Obtain a fraction of this proportion | ||
365 | * | ||
366 | * p_{j} = x_{j} / (period/2 + t % period/2) | ||
367 | */ | ||
368 | void prop_fraction_single(struct prop_descriptor *pd, | ||
369 | struct prop_local_single *pl, | ||
370 | long *numerator, long *denominator) | ||
371 | { | ||
372 | struct prop_global *pg = prop_get_global(pd); | ||
373 | unsigned long period_2 = 1UL << (pg->shift - 1); | ||
374 | unsigned long counter_mask = period_2 - 1; | ||
375 | unsigned long global_count; | ||
376 | |||
377 | prop_norm_single(pg, pl); | ||
378 | *numerator = pl->events; | ||
379 | |||
380 | global_count = percpu_counter_read(&pg->events); | ||
381 | *denominator = period_2 + (global_count & counter_mask); | ||
382 | |||
383 | prop_put_global(pd, pg); | ||
384 | } | ||