aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 01:14:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 01:14:04 -0400
commit2e3ee613480563a6d5c01b57d342e65cc58c06df (patch)
treeb6b82d1ade41f137bdb9a5a18d8aa446e149c8b2
parent1fad1e9a747687a7399bf58e87974f9b1bbcae06 (diff)
parent331cbdeedeb2f4ef01ccb761513708af0fe77098 (diff)
Merge tag 'writeback-proportions' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux
Pull writeback updates from Wu Fengguang: "Use time based periods to age the writeback proportions, which can adapt equally well to fast/slow devices." Fix up trivial conflict in comment in fs/sync.c * tag 'writeback-proportions' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux: writeback: Fix some comment errors block: Convert BDI proportion calculations to flexible proportions lib: Fix possible deadlock in flexible proportion code lib: Proportions with flexible period
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/super.c2
-rw-r--r--include/linux/backing-dev.h4
-rw-r--r--include/linux/flex_proportions.h101
-rw-r--r--lib/Makefile2
-rw-r--r--lib/flex_proportions.c272
-rw-r--r--mm/backing-dev.c6
-rw-r--r--mm/page-writeback.c107
8 files changed, 448 insertions, 50 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8f660dd6137a..50d0b78130a1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -628,8 +628,8 @@ static long writeback_sb_inodes(struct super_block *sb,
628 } 628 }
629 629
630 /* 630 /*
631 * Don't bother with new inodes or inodes beeing freed, first 631 * Don't bother with new inodes or inodes being freed, first
632 * kind does not need peridic writeout yet, and for the latter 632 * kind does not need periodic writeout yet, and for the latter
633 * kind writeout is handled by the freer. 633 * kind writeout is handled by the freer.
634 */ 634 */
635 spin_lock(&inode->i_lock); 635 spin_lock(&inode->i_lock);
diff --git a/fs/super.c b/fs/super.c
index c743fb3be4b8..4c5d82f56ec4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -320,7 +320,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
320 320
321/* 321/*
322 * grab_super_passive - acquire a passive reference 322 * grab_super_passive - acquire a passive reference
323 * @s: reference we are trying to grab 323 * @sb: reference we are trying to grab
324 * 324 *
325 * Tries to acquire a passive reference. This is used in places where we 325 * Tries to acquire a passive reference. This is used in places where we
326 * cannot take an active reference but we need to ensure that the 326 * cannot take an active reference but we need to ensure that the
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index b1038bd686ac..489de625cd25 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -10,7 +10,7 @@
10 10
11#include <linux/percpu_counter.h> 11#include <linux/percpu_counter.h>
12#include <linux/log2.h> 12#include <linux/log2.h>
13#include <linux/proportions.h> 13#include <linux/flex_proportions.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
@@ -89,7 +89,7 @@ struct backing_dev_info {
89 unsigned long dirty_ratelimit; 89 unsigned long dirty_ratelimit;
90 unsigned long balanced_dirty_ratelimit; 90 unsigned long balanced_dirty_ratelimit;
91 91
92 struct prop_local_percpu completions; 92 struct fprop_local_percpu completions;
93 int dirty_exceeded; 93 int dirty_exceeded;
94 94
95 unsigned int min_ratio; 95 unsigned int min_ratio;
diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
new file mode 100644
index 000000000000..4ebc49fae391
--- /dev/null
+++ b/include/linux/flex_proportions.h
@@ -0,0 +1,101 @@
1/*
2 * Floating proportions with flexible aging period
3 *
4 * Copyright (C) 2011, SUSE, Jan Kara <jack@suse.cz>
5 */
6
7#ifndef _LINUX_FLEX_PROPORTIONS_H
8#define _LINUX_FLEX_PROPORTIONS_H
9
10#include <linux/percpu_counter.h>
11#include <linux/spinlock.h>
12#include <linux/seqlock.h>
13
14/*
15 * When maximum proportion of some event type is specified, this is the
16 * precision with which we allow limitting. Note that this creates an upper
17 * bound on the number of events per period like
18 * ULLONG_MAX >> FPROP_FRAC_SHIFT.
19 */
20#define FPROP_FRAC_SHIFT 10
21#define FPROP_FRAC_BASE (1UL << FPROP_FRAC_SHIFT)
22
23/*
24 * ---- Global proportion definitions ----
25 */
26struct fprop_global {
27 /* Number of events in the current period */
28 struct percpu_counter events;
29 /* Current period */
30 unsigned int period;
31 /* Synchronization with period transitions */
32 seqcount_t sequence;
33};
34
35int fprop_global_init(struct fprop_global *p);
36void fprop_global_destroy(struct fprop_global *p);
37bool fprop_new_period(struct fprop_global *p, int periods);
38
39/*
40 * ---- SINGLE ----
41 */
42struct fprop_local_single {
43 /* the local events counter */
44 unsigned long events;
45 /* Period in which we last updated events */
46 unsigned int period;
47 raw_spinlock_t lock; /* Protect period and numerator */
48};
49
50#define INIT_FPROP_LOCAL_SINGLE(name) \
51{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
52}
53
54int fprop_local_init_single(struct fprop_local_single *pl);
55void fprop_local_destroy_single(struct fprop_local_single *pl);
56void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl);
57void fprop_fraction_single(struct fprop_global *p,
58 struct fprop_local_single *pl, unsigned long *numerator,
59 unsigned long *denominator);
60
61static inline
62void fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl)
63{
64 unsigned long flags;
65
66 local_irq_save(flags);
67 __fprop_inc_single(p, pl);
68 local_irq_restore(flags);
69}
70
71/*
72 * ---- PERCPU ----
73 */
74struct fprop_local_percpu {
75 /* the local events counter */
76 struct percpu_counter events;
77 /* Period in which we last updated events */
78 unsigned int period;
79 raw_spinlock_t lock; /* Protect period and numerator */
80};
81
82int fprop_local_init_percpu(struct fprop_local_percpu *pl);
83void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
84void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
85void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
86 int max_frac);
87void fprop_fraction_percpu(struct fprop_global *p,
88 struct fprop_local_percpu *pl, unsigned long *numerator,
89 unsigned long *denominator);
90
91static inline
92void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
93{
94 unsigned long flags;
95
96 local_irq_save(flags);
97 __fprop_inc_percpu(p, pl);
98 local_irq_restore(flags);
99}
100
101#endif
diff --git a/lib/Makefile b/lib/Makefile
index 9cb4104f47d9..42d283edc4d3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
15 is_single_threaded.o plist.o decompress.o 15 is_single_threaded.o plist.o decompress.o
16 16
17lib-$(CONFIG_MMU) += ioremap.o 17lib-$(CONFIG_MMU) += ioremap.o
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
new file mode 100644
index 000000000000..c785554f9523
--- /dev/null
+++ b/lib/flex_proportions.c
@@ -0,0 +1,272 @@
1/*
2 * Floating proportions with flexible aging period
3 *
4 * Copyright (C) 2011, SUSE, Jan Kara <jack@suse.cz>
5 *
6 * The goal of this code is: Given different types of event, measure proportion
7 * of each type of event over time. The proportions are measured with
8 * exponentially decaying history to give smooth transitions. A formula
9 * expressing proportion of event of type 'j' is:
10 *
11 * p_{j} = (\Sum_{i>=0} x_{i,j}/2^{i+1})/(\Sum_{i>=0} x_i/2^{i+1})
12 *
13 * Where x_{i,j} is j's number of events in i-th last time period and x_i is
14 * total number of events in i-th last time period.
15 *
16 * Note that p_{j}'s are normalised, i.e.
17 *
18 * \Sum_{j} p_{j} = 1,
19 *
20 * This formula can be straightforwardly computed by maintaing denominator
21 * (let's call it 'd') and for each event type its numerator (let's call it
22 * 'n_j'). When an event of type 'j' happens, we simply need to do:
23 * n_j++; d++;
24 *
25 * When a new period is declared, we could do:
26 * d /= 2
27 * for each j
28 * n_j /= 2
29 *
30 * To avoid iteration over all event types, we instead shift numerator of event
31 * j lazily when someone asks for a proportion of event j or when event j
32 * occurs. This can bit trivially implemented by remembering last period in
33 * which something happened with proportion of type j.
34 */
35#include <linux/flex_proportions.h>
36
37int fprop_global_init(struct fprop_global *p)
38{
39 int err;
40
41 p->period = 0;
42 /* Use 1 to avoid dealing with periods with 0 events... */
43 err = percpu_counter_init(&p->events, 1);
44 if (err)
45 return err;
46 seqcount_init(&p->sequence);
47 return 0;
48}
49
50void fprop_global_destroy(struct fprop_global *p)
51{
52 percpu_counter_destroy(&p->events);
53}
54
55/*
56 * Declare @periods new periods. It is upto the caller to make sure period
57 * transitions cannot happen in parallel.
58 *
59 * The function returns true if the proportions are still defined and false
60 * if aging zeroed out all events. This can be used to detect whether declaring
61 * further periods has any effect.
62 */
63bool fprop_new_period(struct fprop_global *p, int periods)
64{
65 u64 events;
66 unsigned long flags;
67
68 local_irq_save(flags);
69 events = percpu_counter_sum(&p->events);
70 /*
71 * Don't do anything if there are no events.
72 */
73 if (events <= 1) {
74 local_irq_restore(flags);
75 return false;
76 }
77 write_seqcount_begin(&p->sequence);
78 if (periods < 64)
79 events -= events >> periods;
80 /* Use addition to avoid losing events happening between sum and set */
81 percpu_counter_add(&p->events, -events);
82 p->period += periods;
83 write_seqcount_end(&p->sequence);
84 local_irq_restore(flags);
85
86 return true;
87}
88
89/*
90 * ---- SINGLE ----
91 */
92
93int fprop_local_init_single(struct fprop_local_single *pl)
94{
95 pl->events = 0;
96 pl->period = 0;
97 raw_spin_lock_init(&pl->lock);
98 return 0;
99}
100
101void fprop_local_destroy_single(struct fprop_local_single *pl)
102{
103}
104
105static void fprop_reflect_period_single(struct fprop_global *p,
106 struct fprop_local_single *pl)
107{
108 unsigned int period = p->period;
109 unsigned long flags;
110
111 /* Fast path - period didn't change */
112 if (pl->period == period)
113 return;
114 raw_spin_lock_irqsave(&pl->lock, flags);
115 /* Someone updated pl->period while we were spinning? */
116 if (pl->period >= period) {
117 raw_spin_unlock_irqrestore(&pl->lock, flags);
118 return;
119 }
120 /* Aging zeroed our fraction? */
121 if (period - pl->period < BITS_PER_LONG)
122 pl->events >>= period - pl->period;
123 else
124 pl->events = 0;
125 pl->period = period;
126 raw_spin_unlock_irqrestore(&pl->lock, flags);
127}
128
129/* Event of type pl happened */
130void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl)
131{
132 fprop_reflect_period_single(p, pl);
133 pl->events++;
134 percpu_counter_add(&p->events, 1);
135}
136
137/* Return fraction of events of type pl */
138void fprop_fraction_single(struct fprop_global *p,
139 struct fprop_local_single *pl,
140 unsigned long *numerator, unsigned long *denominator)
141{
142 unsigned int seq;
143 s64 num, den;
144
145 do {
146 seq = read_seqcount_begin(&p->sequence);
147 fprop_reflect_period_single(p, pl);
148 num = pl->events;
149 den = percpu_counter_read_positive(&p->events);
150 } while (read_seqcount_retry(&p->sequence, seq));
151
152 /*
153 * Make fraction <= 1 and denominator > 0 even in presence of percpu
154 * counter errors
155 */
156 if (den <= num) {
157 if (num)
158 den = num;
159 else
160 den = 1;
161 }
162 *denominator = den;
163 *numerator = num;
164}
165
166/*
167 * ---- PERCPU ----
168 */
169#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
170
171int fprop_local_init_percpu(struct fprop_local_percpu *pl)
172{
173 int err;
174
175 err = percpu_counter_init(&pl->events, 0);
176 if (err)
177 return err;
178 pl->period = 0;
179 raw_spin_lock_init(&pl->lock);
180 return 0;
181}
182
183void fprop_local_destroy_percpu(struct fprop_local_percpu *pl)
184{
185 percpu_counter_destroy(&pl->events);
186}
187
188static void fprop_reflect_period_percpu(struct fprop_global *p,
189 struct fprop_local_percpu *pl)
190{
191 unsigned int period = p->period;
192 unsigned long flags;
193
194 /* Fast path - period didn't change */
195 if (pl->period == period)
196 return;
197 raw_spin_lock_irqsave(&pl->lock, flags);
198 /* Someone updated pl->period while we were spinning? */
199 if (pl->period >= period) {
200 raw_spin_unlock_irqrestore(&pl->lock, flags);
201 return;
202 }
203 /* Aging zeroed our fraction? */
204 if (period - pl->period < BITS_PER_LONG) {
205 s64 val = percpu_counter_read(&pl->events);
206
207 if (val < (nr_cpu_ids * PROP_BATCH))
208 val = percpu_counter_sum(&pl->events);
209
210 __percpu_counter_add(&pl->events,
211 -val + (val >> (period-pl->period)), PROP_BATCH);
212 } else
213 percpu_counter_set(&pl->events, 0);
214 pl->period = period;
215 raw_spin_unlock_irqrestore(&pl->lock, flags);
216}
217
218/* Event of type pl happened */
219void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
220{
221 fprop_reflect_period_percpu(p, pl);
222 __percpu_counter_add(&pl->events, 1, PROP_BATCH);
223 percpu_counter_add(&p->events, 1);
224}
225
226void fprop_fraction_percpu(struct fprop_global *p,
227 struct fprop_local_percpu *pl,
228 unsigned long *numerator, unsigned long *denominator)
229{
230 unsigned int seq;
231 s64 num, den;
232
233 do {
234 seq = read_seqcount_begin(&p->sequence);
235 fprop_reflect_period_percpu(p, pl);
236 num = percpu_counter_read_positive(&pl->events);
237 den = percpu_counter_read_positive(&p->events);
238 } while (read_seqcount_retry(&p->sequence, seq));
239
240 /*
241 * Make fraction <= 1 and denominator > 0 even in presence of percpu
242 * counter errors
243 */
244 if (den <= num) {
245 if (num)
246 den = num;
247 else
248 den = 1;
249 }
250 *denominator = den;
251 *numerator = num;
252}
253
254/*
255 * Like __fprop_inc_percpu() except that event is counted only if the given
256 * type has fraction smaller than @max_frac/FPROP_FRAC_BASE
257 */
258void __fprop_inc_percpu_max(struct fprop_global *p,
259 struct fprop_local_percpu *pl, int max_frac)
260{
261 if (unlikely(max_frac < FPROP_FRAC_BASE)) {
262 unsigned long numerator, denominator;
263
264 fprop_fraction_percpu(p, pl, &numerator, &denominator);
265 if (numerator >
266 (((u64)denominator) * max_frac) >> FPROP_FRAC_SHIFT)
267 return;
268 } else
269 fprop_reflect_period_percpu(p, pl);
270 __percpu_counter_add(&pl->events, 1, PROP_BATCH);
271 percpu_counter_add(&p->events, 1);
272}
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index dd8e2aafb07e..3387aea11209 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -677,7 +677,7 @@ int bdi_init(struct backing_dev_info *bdi)
677 677
678 bdi->min_ratio = 0; 678 bdi->min_ratio = 0;
679 bdi->max_ratio = 100; 679 bdi->max_ratio = 100;
680 bdi->max_prop_frac = PROP_FRAC_BASE; 680 bdi->max_prop_frac = FPROP_FRAC_BASE;
681 spin_lock_init(&bdi->wb_lock); 681 spin_lock_init(&bdi->wb_lock);
682 INIT_LIST_HEAD(&bdi->bdi_list); 682 INIT_LIST_HEAD(&bdi->bdi_list);
683 INIT_LIST_HEAD(&bdi->work_list); 683 INIT_LIST_HEAD(&bdi->work_list);
@@ -700,7 +700,7 @@ int bdi_init(struct backing_dev_info *bdi)
700 bdi->write_bandwidth = INIT_BW; 700 bdi->write_bandwidth = INIT_BW;
701 bdi->avg_write_bandwidth = INIT_BW; 701 bdi->avg_write_bandwidth = INIT_BW;
702 702
703 err = prop_local_init_percpu(&bdi->completions); 703 err = fprop_local_init_percpu(&bdi->completions);
704 704
705 if (err) { 705 if (err) {
706err: 706err:
@@ -744,7 +744,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
744 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 744 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
745 percpu_counter_destroy(&bdi->bdi_stat[i]); 745 percpu_counter_destroy(&bdi->bdi_stat[i]);
746 746
747 prop_local_destroy_percpu(&bdi->completions); 747 fprop_local_destroy_percpu(&bdi->completions);
748} 748}
749EXPORT_SYMBOL(bdi_destroy); 749EXPORT_SYMBOL(bdi_destroy);
750 750
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 93d8d2f7108c..e5363f34e025 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -34,6 +34,7 @@
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/buffer_head.h> /* __set_page_dirty_buffers */ 35#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37#include <linux/timer.h>
37#include <trace/events/writeback.h> 38#include <trace/events/writeback.h>
38 39
39/* 40/*
@@ -135,7 +136,20 @@ unsigned long global_dirty_limit;
135 * measured in page writeback completions. 136 * measured in page writeback completions.
136 * 137 *
137 */ 138 */
138static struct prop_descriptor vm_completions; 139static struct fprop_global writeout_completions;
140
141static void writeout_period(unsigned long t);
142/* Timer for aging of writeout_completions */
143static struct timer_list writeout_period_timer =
144 TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
145static unsigned long writeout_period_time = 0;
146
147/*
148 * Length of period for aging writeout fractions of bdis. This is an
149 * arbitrarily chosen number. The longer the period, the slower fractions will
150 * reflect changes in current writeout rate.
151 */
152#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
139 153
140/* 154/*
141 * Work out the current dirty-memory clamping and background writeout 155 * Work out the current dirty-memory clamping and background writeout
@@ -322,34 +336,6 @@ bool zone_dirty_ok(struct zone *zone)
322 zone_page_state(zone, NR_WRITEBACK) <= limit; 336 zone_page_state(zone, NR_WRITEBACK) <= limit;
323} 337}
324 338
325/*
326 * couple the period to the dirty_ratio:
327 *
328 * period/2 ~ roundup_pow_of_two(dirty limit)
329 */
330static int calc_period_shift(void)
331{
332 unsigned long dirty_total;
333
334 if (vm_dirty_bytes)
335 dirty_total = vm_dirty_bytes / PAGE_SIZE;
336 else
337 dirty_total = (vm_dirty_ratio * global_dirtyable_memory()) /
338 100;
339 return 2 + ilog2(dirty_total - 1);
340}
341
342/*
343 * update the period when the dirty threshold changes.
344 */
345static void update_completion_period(void)
346{
347 int shift = calc_period_shift();
348 prop_change_shift(&vm_completions, shift);
349
350 writeback_set_ratelimit();
351}
352
353int dirty_background_ratio_handler(struct ctl_table *table, int write, 339int dirty_background_ratio_handler(struct ctl_table *table, int write,
354 void __user *buffer, size_t *lenp, 340 void __user *buffer, size_t *lenp,
355 loff_t *ppos) 341 loff_t *ppos)
@@ -383,7 +369,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
383 369
384 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 370 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
385 if (ret == 0 && write && vm_dirty_ratio != old_ratio) { 371 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
386 update_completion_period(); 372 writeback_set_ratelimit();
387 vm_dirty_bytes = 0; 373 vm_dirty_bytes = 0;
388 } 374 }
389 return ret; 375 return ret;
@@ -398,12 +384,21 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
398 384
399 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 385 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
400 if (ret == 0 && write && vm_dirty_bytes != old_bytes) { 386 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
401 update_completion_period(); 387 writeback_set_ratelimit();
402 vm_dirty_ratio = 0; 388 vm_dirty_ratio = 0;
403 } 389 }
404 return ret; 390 return ret;
405} 391}
406 392
393static unsigned long wp_next_time(unsigned long cur_time)
394{
395 cur_time += VM_COMPLETIONS_PERIOD_LEN;
396 /* 0 has a special meaning... */
397 if (!cur_time)
398 return 1;
399 return cur_time;
400}
401
407/* 402/*
408 * Increment the BDI's writeout completion count and the global writeout 403 * Increment the BDI's writeout completion count and the global writeout
409 * completion count. Called from test_clear_page_writeback(). 404 * completion count. Called from test_clear_page_writeback().
@@ -411,8 +406,19 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
411static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) 406static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
412{ 407{
413 __inc_bdi_stat(bdi, BDI_WRITTEN); 408 __inc_bdi_stat(bdi, BDI_WRITTEN);
414 __prop_inc_percpu_max(&vm_completions, &bdi->completions, 409 __fprop_inc_percpu_max(&writeout_completions, &bdi->completions,
415 bdi->max_prop_frac); 410 bdi->max_prop_frac);
411 /* First event after period switching was turned off? */
412 if (!unlikely(writeout_period_time)) {
413 /*
414 * We can race with other __bdi_writeout_inc calls here but
415 * it does not cause any harm since the resulting time when
416 * timer will fire and what is in writeout_period_time will be
417 * roughly the same.
418 */
419 writeout_period_time = wp_next_time(jiffies);
420 mod_timer(&writeout_period_timer, writeout_period_time);
421 }
416} 422}
417 423
418void bdi_writeout_inc(struct backing_dev_info *bdi) 424void bdi_writeout_inc(struct backing_dev_info *bdi)
@@ -431,11 +437,33 @@ EXPORT_SYMBOL_GPL(bdi_writeout_inc);
431static void bdi_writeout_fraction(struct backing_dev_info *bdi, 437static void bdi_writeout_fraction(struct backing_dev_info *bdi,
432 long *numerator, long *denominator) 438 long *numerator, long *denominator)
433{ 439{
434 prop_fraction_percpu(&vm_completions, &bdi->completions, 440 fprop_fraction_percpu(&writeout_completions, &bdi->completions,
435 numerator, denominator); 441 numerator, denominator);
436} 442}
437 443
438/* 444/*
445 * On idle system, we can be called long after we scheduled because we use
446 * deferred timers so count with missed periods.
447 */
448static void writeout_period(unsigned long t)
449{
450 int miss_periods = (jiffies - writeout_period_time) /
451 VM_COMPLETIONS_PERIOD_LEN;
452
453 if (fprop_new_period(&writeout_completions, miss_periods + 1)) {
454 writeout_period_time = wp_next_time(writeout_period_time +
455 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
456 mod_timer(&writeout_period_timer, writeout_period_time);
457 } else {
458 /*
459 * Aging has zeroed all fractions. Stop wasting CPU on period
460 * updates.
461 */
462 writeout_period_time = 0;
463 }
464}
465
466/*
439 * bdi_min_ratio keeps the sum of the minimum dirty shares of all 467 * bdi_min_ratio keeps the sum of the minimum dirty shares of all
440 * registered backing devices, which, for obvious reasons, can not 468 * registered backing devices, which, for obvious reasons, can not
441 * exceed 100%. 469 * exceed 100%.
@@ -475,7 +503,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
475 ret = -EINVAL; 503 ret = -EINVAL;
476 } else { 504 } else {
477 bdi->max_ratio = max_ratio; 505 bdi->max_ratio = max_ratio;
478 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; 506 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
479 } 507 }
480 spin_unlock_bh(&bdi_lock); 508 spin_unlock_bh(&bdi_lock);
481 509
@@ -918,7 +946,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
918 * bdi->dirty_ratelimit = balanced_dirty_ratelimit; 946 * bdi->dirty_ratelimit = balanced_dirty_ratelimit;
919 * 947 *
920 * However to get a more stable dirty_ratelimit, the below elaborated 948 * However to get a more stable dirty_ratelimit, the below elaborated
921 * code makes use of task_ratelimit to filter out sigular points and 949 * code makes use of task_ratelimit to filter out singular points and
922 * limit the step size. 950 * limit the step size.
923 * 951 *
924 * The below code essentially only uses the relative value of 952 * The below code essentially only uses the relative value of
@@ -941,7 +969,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
941 * feel and care are stable dirty rate and small position error. 969 * feel and care are stable dirty rate and small position error.
942 * 970 *
943 * |task_ratelimit - dirty_ratelimit| is used to limit the step size 971 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
944 * and filter out the sigular points of balanced_dirty_ratelimit. Which 972 * and filter out the singular points of balanced_dirty_ratelimit. Which
945 * keeps jumping around randomly and can even leap far away at times 973 * keeps jumping around randomly and can even leap far away at times
946 * due to the small 200ms estimation period of dirty_rate (we want to 974 * due to the small 200ms estimation period of dirty_rate (we want to
947 * keep that period small to reduce time lags). 975 * keep that period small to reduce time lags).
@@ -1606,13 +1634,10 @@ static struct notifier_block __cpuinitdata ratelimit_nb = {
1606 */ 1634 */
1607void __init page_writeback_init(void) 1635void __init page_writeback_init(void)
1608{ 1636{
1609 int shift;
1610
1611 writeback_set_ratelimit(); 1637 writeback_set_ratelimit();
1612 register_cpu_notifier(&ratelimit_nb); 1638 register_cpu_notifier(&ratelimit_nb);
1613 1639
1614 shift = calc_period_shift(); 1640 fprop_global_init(&writeout_completions);
1615 prop_descriptor_init(&vm_completions, shift);
1616} 1641}
1617 1642
1618/** 1643/**