diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 01:14:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 01:14:04 -0400 |
commit | 2e3ee613480563a6d5c01b57d342e65cc58c06df (patch) | |
tree | b6b82d1ade41f137bdb9a5a18d8aa446e149c8b2 /lib | |
parent | 1fad1e9a747687a7399bf58e87974f9b1bbcae06 (diff) | |
parent | 331cbdeedeb2f4ef01ccb761513708af0fe77098 (diff) |
Merge tag 'writeback-proportions' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux
Pull writeback updates from Wu Fengguang:
"Use time based periods to age the writeback proportions, which can
adapt equally well to fast/slow devices."
Fix up trivial conflict in comment in fs/sync.c
* tag 'writeback-proportions' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux:
writeback: Fix some comment errors
block: Convert BDI proportion calculations to flexible proportions
lib: Fix possible deadlock in flexible proportion code
lib: Proportions with flexible period
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/flex_proportions.c | 272 |
2 files changed, 273 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile index 9cb4104f47d9..42d283edc4d3 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ | |||
11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ | 11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ |
12 | idr.o int_sqrt.o extable.o prio_tree.o \ | 12 | idr.o int_sqrt.o extable.o prio_tree.o \ |
13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ | 13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | proportions.o prio_heap.o ratelimit.o show_mem.o \ | 14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ |
15 | is_single_threaded.o plist.o decompress.o | 15 | is_single_threaded.o plist.o decompress.o |
16 | 16 | ||
17 | lib-$(CONFIG_MMU) += ioremap.o | 17 | lib-$(CONFIG_MMU) += ioremap.o |
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c new file mode 100644 index 000000000000..c785554f9523 --- /dev/null +++ b/lib/flex_proportions.c | |||
@@ -0,0 +1,272 @@ | |||
1 | /* | ||
2 | * Floating proportions with flexible aging period | ||
3 | * | ||
4 | * Copyright (C) 2011, SUSE, Jan Kara <jack@suse.cz> | ||
5 | * | ||
6 | * The goal of this code is: Given different types of event, measure proportion | ||
7 | * of each type of event over time. The proportions are measured with | ||
8 | * exponentially decaying history to give smooth transitions. A formula | ||
9 | * expressing proportion of event of type 'j' is: | ||
10 | * | ||
11 | * p_{j} = (\Sum_{i>=0} x_{i,j}/2^{i+1})/(\Sum_{i>=0} x_i/2^{i+1}) | ||
12 | * | ||
13 | * Where x_{i,j} is j's number of events in i-th last time period and x_i is | ||
14 | * total number of events in i-th last time period. | ||
15 | * | ||
16 | * Note that p_{j}'s are normalised, i.e. | ||
17 | * | ||
18 | * \Sum_{j} p_{j} = 1, | ||
19 | * | ||
20 | * This formula can be straightforwardly computed by maintaing denominator | ||
21 | * (let's call it 'd') and for each event type its numerator (let's call it | ||
22 | * 'n_j'). When an event of type 'j' happens, we simply need to do: | ||
23 | * n_j++; d++; | ||
24 | * | ||
25 | * When a new period is declared, we could do: | ||
26 | * d /= 2 | ||
27 | * for each j | ||
28 | * n_j /= 2 | ||
29 | * | ||
30 | * To avoid iteration over all event types, we instead shift numerator of event | ||
31 | * j lazily when someone asks for a proportion of event j or when event j | ||
32 | * occurs. This can bit trivially implemented by remembering last period in | ||
33 | * which something happened with proportion of type j. | ||
34 | */ | ||
35 | #include <linux/flex_proportions.h> | ||
36 | |||
37 | int fprop_global_init(struct fprop_global *p) | ||
38 | { | ||
39 | int err; | ||
40 | |||
41 | p->period = 0; | ||
42 | /* Use 1 to avoid dealing with periods with 0 events... */ | ||
43 | err = percpu_counter_init(&p->events, 1); | ||
44 | if (err) | ||
45 | return err; | ||
46 | seqcount_init(&p->sequence); | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | void fprop_global_destroy(struct fprop_global *p) | ||
51 | { | ||
52 | percpu_counter_destroy(&p->events); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Declare @periods new periods. It is upto the caller to make sure period | ||
57 | * transitions cannot happen in parallel. | ||
58 | * | ||
59 | * The function returns true if the proportions are still defined and false | ||
60 | * if aging zeroed out all events. This can be used to detect whether declaring | ||
61 | * further periods has any effect. | ||
62 | */ | ||
63 | bool fprop_new_period(struct fprop_global *p, int periods) | ||
64 | { | ||
65 | u64 events; | ||
66 | unsigned long flags; | ||
67 | |||
68 | local_irq_save(flags); | ||
69 | events = percpu_counter_sum(&p->events); | ||
70 | /* | ||
71 | * Don't do anything if there are no events. | ||
72 | */ | ||
73 | if (events <= 1) { | ||
74 | local_irq_restore(flags); | ||
75 | return false; | ||
76 | } | ||
77 | write_seqcount_begin(&p->sequence); | ||
78 | if (periods < 64) | ||
79 | events -= events >> periods; | ||
80 | /* Use addition to avoid losing events happening between sum and set */ | ||
81 | percpu_counter_add(&p->events, -events); | ||
82 | p->period += periods; | ||
83 | write_seqcount_end(&p->sequence); | ||
84 | local_irq_restore(flags); | ||
85 | |||
86 | return true; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * ---- SINGLE ---- | ||
91 | */ | ||
92 | |||
93 | int fprop_local_init_single(struct fprop_local_single *pl) | ||
94 | { | ||
95 | pl->events = 0; | ||
96 | pl->period = 0; | ||
97 | raw_spin_lock_init(&pl->lock); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | void fprop_local_destroy_single(struct fprop_local_single *pl) | ||
102 | { | ||
103 | } | ||
104 | |||
105 | static void fprop_reflect_period_single(struct fprop_global *p, | ||
106 | struct fprop_local_single *pl) | ||
107 | { | ||
108 | unsigned int period = p->period; | ||
109 | unsigned long flags; | ||
110 | |||
111 | /* Fast path - period didn't change */ | ||
112 | if (pl->period == period) | ||
113 | return; | ||
114 | raw_spin_lock_irqsave(&pl->lock, flags); | ||
115 | /* Someone updated pl->period while we were spinning? */ | ||
116 | if (pl->period >= period) { | ||
117 | raw_spin_unlock_irqrestore(&pl->lock, flags); | ||
118 | return; | ||
119 | } | ||
120 | /* Aging zeroed our fraction? */ | ||
121 | if (period - pl->period < BITS_PER_LONG) | ||
122 | pl->events >>= period - pl->period; | ||
123 | else | ||
124 | pl->events = 0; | ||
125 | pl->period = period; | ||
126 | raw_spin_unlock_irqrestore(&pl->lock, flags); | ||
127 | } | ||
128 | |||
129 | /* Event of type pl happened */ | ||
130 | void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl) | ||
131 | { | ||
132 | fprop_reflect_period_single(p, pl); | ||
133 | pl->events++; | ||
134 | percpu_counter_add(&p->events, 1); | ||
135 | } | ||
136 | |||
137 | /* Return fraction of events of type pl */ | ||
138 | void fprop_fraction_single(struct fprop_global *p, | ||
139 | struct fprop_local_single *pl, | ||
140 | unsigned long *numerator, unsigned long *denominator) | ||
141 | { | ||
142 | unsigned int seq; | ||
143 | s64 num, den; | ||
144 | |||
145 | do { | ||
146 | seq = read_seqcount_begin(&p->sequence); | ||
147 | fprop_reflect_period_single(p, pl); | ||
148 | num = pl->events; | ||
149 | den = percpu_counter_read_positive(&p->events); | ||
150 | } while (read_seqcount_retry(&p->sequence, seq)); | ||
151 | |||
152 | /* | ||
153 | * Make fraction <= 1 and denominator > 0 even in presence of percpu | ||
154 | * counter errors | ||
155 | */ | ||
156 | if (den <= num) { | ||
157 | if (num) | ||
158 | den = num; | ||
159 | else | ||
160 | den = 1; | ||
161 | } | ||
162 | *denominator = den; | ||
163 | *numerator = num; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * ---- PERCPU ---- | ||
168 | */ | ||
169 | #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) | ||
170 | |||
171 | int fprop_local_init_percpu(struct fprop_local_percpu *pl) | ||
172 | { | ||
173 | int err; | ||
174 | |||
175 | err = percpu_counter_init(&pl->events, 0); | ||
176 | if (err) | ||
177 | return err; | ||
178 | pl->period = 0; | ||
179 | raw_spin_lock_init(&pl->lock); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | void fprop_local_destroy_percpu(struct fprop_local_percpu *pl) | ||
184 | { | ||
185 | percpu_counter_destroy(&pl->events); | ||
186 | } | ||
187 | |||
188 | static void fprop_reflect_period_percpu(struct fprop_global *p, | ||
189 | struct fprop_local_percpu *pl) | ||
190 | { | ||
191 | unsigned int period = p->period; | ||
192 | unsigned long flags; | ||
193 | |||
194 | /* Fast path - period didn't change */ | ||
195 | if (pl->period == period) | ||
196 | return; | ||
197 | raw_spin_lock_irqsave(&pl->lock, flags); | ||
198 | /* Someone updated pl->period while we were spinning? */ | ||
199 | if (pl->period >= period) { | ||
200 | raw_spin_unlock_irqrestore(&pl->lock, flags); | ||
201 | return; | ||
202 | } | ||
203 | /* Aging zeroed our fraction? */ | ||
204 | if (period - pl->period < BITS_PER_LONG) { | ||
205 | s64 val = percpu_counter_read(&pl->events); | ||
206 | |||
207 | if (val < (nr_cpu_ids * PROP_BATCH)) | ||
208 | val = percpu_counter_sum(&pl->events); | ||
209 | |||
210 | __percpu_counter_add(&pl->events, | ||
211 | -val + (val >> (period-pl->period)), PROP_BATCH); | ||
212 | } else | ||
213 | percpu_counter_set(&pl->events, 0); | ||
214 | pl->period = period; | ||
215 | raw_spin_unlock_irqrestore(&pl->lock, flags); | ||
216 | } | ||
217 | |||
218 | /* Event of type pl happened */ | ||
219 | void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) | ||
220 | { | ||
221 | fprop_reflect_period_percpu(p, pl); | ||
222 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); | ||
223 | percpu_counter_add(&p->events, 1); | ||
224 | } | ||
225 | |||
226 | void fprop_fraction_percpu(struct fprop_global *p, | ||
227 | struct fprop_local_percpu *pl, | ||
228 | unsigned long *numerator, unsigned long *denominator) | ||
229 | { | ||
230 | unsigned int seq; | ||
231 | s64 num, den; | ||
232 | |||
233 | do { | ||
234 | seq = read_seqcount_begin(&p->sequence); | ||
235 | fprop_reflect_period_percpu(p, pl); | ||
236 | num = percpu_counter_read_positive(&pl->events); | ||
237 | den = percpu_counter_read_positive(&p->events); | ||
238 | } while (read_seqcount_retry(&p->sequence, seq)); | ||
239 | |||
240 | /* | ||
241 | * Make fraction <= 1 and denominator > 0 even in presence of percpu | ||
242 | * counter errors | ||
243 | */ | ||
244 | if (den <= num) { | ||
245 | if (num) | ||
246 | den = num; | ||
247 | else | ||
248 | den = 1; | ||
249 | } | ||
250 | *denominator = den; | ||
251 | *numerator = num; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Like __fprop_inc_percpu() except that event is counted only if the given | ||
256 | * type has fraction smaller than @max_frac/FPROP_FRAC_BASE | ||
257 | */ | ||
258 | void __fprop_inc_percpu_max(struct fprop_global *p, | ||
259 | struct fprop_local_percpu *pl, int max_frac) | ||
260 | { | ||
261 | if (unlikely(max_frac < FPROP_FRAC_BASE)) { | ||
262 | unsigned long numerator, denominator; | ||
263 | |||
264 | fprop_fraction_percpu(p, pl, &numerator, &denominator); | ||
265 | if (numerator > | ||
266 | (((u64)denominator) * max_frac) >> FPROP_FRAC_SHIFT) | ||
267 | return; | ||
268 | } else | ||
269 | fprop_reflect_period_percpu(p, pl); | ||
270 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); | ||
271 | percpu_counter_add(&p->events, 1); | ||
272 | } | ||