aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c107
1 files changed, 66 insertions, 41 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 93d8d2f7108c..e5363f34e025 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -34,6 +34,7 @@
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/buffer_head.h> /* __set_page_dirty_buffers */ 35#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37#include <linux/timer.h>
37#include <trace/events/writeback.h> 38#include <trace/events/writeback.h>
38 39
39/* 40/*
@@ -135,7 +136,20 @@ unsigned long global_dirty_limit;
135 * measured in page writeback completions. 136 * measured in page writeback completions.
136 * 137 *
137 */ 138 */
138static struct prop_descriptor vm_completions; 139static struct fprop_global writeout_completions;
140
141static void writeout_period(unsigned long t);
142/* Timer for aging of writeout_completions */
143static struct timer_list writeout_period_timer =
144 TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
145static unsigned long writeout_period_time = 0;
146
147/*
148 * Length of period for aging writeout fractions of bdis. This is an
149 * arbitrarily chosen number. The longer the period, the slower fractions will
150 * reflect changes in current writeout rate.
151 */
152#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
139 153
140/* 154/*
141 * Work out the current dirty-memory clamping and background writeout 155 * Work out the current dirty-memory clamping and background writeout
@@ -322,34 +336,6 @@ bool zone_dirty_ok(struct zone *zone)
322 zone_page_state(zone, NR_WRITEBACK) <= limit; 336 zone_page_state(zone, NR_WRITEBACK) <= limit;
323} 337}
324 338
325/*
326 * couple the period to the dirty_ratio:
327 *
328 * period/2 ~ roundup_pow_of_two(dirty limit)
329 */
330static int calc_period_shift(void)
331{
332 unsigned long dirty_total;
333
334 if (vm_dirty_bytes)
335 dirty_total = vm_dirty_bytes / PAGE_SIZE;
336 else
337 dirty_total = (vm_dirty_ratio * global_dirtyable_memory()) /
338 100;
339 return 2 + ilog2(dirty_total - 1);
340}
341
342/*
343 * update the period when the dirty threshold changes.
344 */
345static void update_completion_period(void)
346{
347 int shift = calc_period_shift();
348 prop_change_shift(&vm_completions, shift);
349
350 writeback_set_ratelimit();
351}
352
353int dirty_background_ratio_handler(struct ctl_table *table, int write, 339int dirty_background_ratio_handler(struct ctl_table *table, int write,
354 void __user *buffer, size_t *lenp, 340 void __user *buffer, size_t *lenp,
355 loff_t *ppos) 341 loff_t *ppos)
@@ -383,7 +369,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
383 369
384 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 370 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
385 if (ret == 0 && write && vm_dirty_ratio != old_ratio) { 371 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
386 update_completion_period(); 372 writeback_set_ratelimit();
387 vm_dirty_bytes = 0; 373 vm_dirty_bytes = 0;
388 } 374 }
389 return ret; 375 return ret;
@@ -398,12 +384,21 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
398 384
399 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 385 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
400 if (ret == 0 && write && vm_dirty_bytes != old_bytes) { 386 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
401 update_completion_period(); 387 writeback_set_ratelimit();
402 vm_dirty_ratio = 0; 388 vm_dirty_ratio = 0;
403 } 389 }
404 return ret; 390 return ret;
405} 391}
406 392
393static unsigned long wp_next_time(unsigned long cur_time)
394{
395 cur_time += VM_COMPLETIONS_PERIOD_LEN;
396 /* 0 has a special meaning... */
397 if (!cur_time)
398 return 1;
399 return cur_time;
400}
401
407/* 402/*
408 * Increment the BDI's writeout completion count and the global writeout 403 * Increment the BDI's writeout completion count and the global writeout
409 * completion count. Called from test_clear_page_writeback(). 404 * completion count. Called from test_clear_page_writeback().
@@ -411,8 +406,19 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
411static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) 406static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
412{ 407{
413 __inc_bdi_stat(bdi, BDI_WRITTEN); 408 __inc_bdi_stat(bdi, BDI_WRITTEN);
414 __prop_inc_percpu_max(&vm_completions, &bdi->completions, 409 __fprop_inc_percpu_max(&writeout_completions, &bdi->completions,
415 bdi->max_prop_frac); 410 bdi->max_prop_frac);
411 /* First event after period switching was turned off? */
412 if (!unlikely(writeout_period_time)) {
413 /*
414 * We can race with other __bdi_writeout_inc calls here but
415 * it does not cause any harm since the resulting time when
416 * timer will fire and what is in writeout_period_time will be
417 * roughly the same.
418 */
419 writeout_period_time = wp_next_time(jiffies);
420 mod_timer(&writeout_period_timer, writeout_period_time);
421 }
416} 422}
417 423
418void bdi_writeout_inc(struct backing_dev_info *bdi) 424void bdi_writeout_inc(struct backing_dev_info *bdi)
@@ -431,11 +437,33 @@ EXPORT_SYMBOL_GPL(bdi_writeout_inc);
431static void bdi_writeout_fraction(struct backing_dev_info *bdi, 437static void bdi_writeout_fraction(struct backing_dev_info *bdi,
432 long *numerator, long *denominator) 438 long *numerator, long *denominator)
433{ 439{
434 prop_fraction_percpu(&vm_completions, &bdi->completions, 440 fprop_fraction_percpu(&writeout_completions, &bdi->completions,
435 numerator, denominator); 441 numerator, denominator);
436} 442}
437 443
438/* 444/*
445 * On idle system, we can be called long after we scheduled because we use
446 * deferred timers so count with missed periods.
447 */
448static void writeout_period(unsigned long t)
449{
450 int miss_periods = (jiffies - writeout_period_time) /
451 VM_COMPLETIONS_PERIOD_LEN;
452
453 if (fprop_new_period(&writeout_completions, miss_periods + 1)) {
454 writeout_period_time = wp_next_time(writeout_period_time +
455 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
456 mod_timer(&writeout_period_timer, writeout_period_time);
457 } else {
458 /*
459 * Aging has zeroed all fractions. Stop wasting CPU on period
460 * updates.
461 */
462 writeout_period_time = 0;
463 }
464}
465
466/*
439 * bdi_min_ratio keeps the sum of the minimum dirty shares of all 467 * bdi_min_ratio keeps the sum of the minimum dirty shares of all
440 * registered backing devices, which, for obvious reasons, can not 468 * registered backing devices, which, for obvious reasons, can not
441 * exceed 100%. 469 * exceed 100%.
@@ -475,7 +503,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
475 ret = -EINVAL; 503 ret = -EINVAL;
476 } else { 504 } else {
477 bdi->max_ratio = max_ratio; 505 bdi->max_ratio = max_ratio;
478 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; 506 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
479 } 507 }
480 spin_unlock_bh(&bdi_lock); 508 spin_unlock_bh(&bdi_lock);
481 509
@@ -918,7 +946,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
918 * bdi->dirty_ratelimit = balanced_dirty_ratelimit; 946 * bdi->dirty_ratelimit = balanced_dirty_ratelimit;
919 * 947 *
920 * However to get a more stable dirty_ratelimit, the below elaborated 948 * However to get a more stable dirty_ratelimit, the below elaborated
921 * code makes use of task_ratelimit to filter out sigular points and 949 * code makes use of task_ratelimit to filter out singular points and
922 * limit the step size. 950 * limit the step size.
923 * 951 *
924 * The below code essentially only uses the relative value of 952 * The below code essentially only uses the relative value of
@@ -941,7 +969,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
941 * feel and care are stable dirty rate and small position error. 969 * feel and care are stable dirty rate and small position error.
942 * 970 *
943 * |task_ratelimit - dirty_ratelimit| is used to limit the step size 971 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
944 * and filter out the sigular points of balanced_dirty_ratelimit. Which 972 * and filter out the singular points of balanced_dirty_ratelimit. Which
945 * keeps jumping around randomly and can even leap far away at times 973 * keeps jumping around randomly and can even leap far away at times
946 * due to the small 200ms estimation period of dirty_rate (we want to 974 * due to the small 200ms estimation period of dirty_rate (we want to
947 * keep that period small to reduce time lags). 975 * keep that period small to reduce time lags).
@@ -1606,13 +1634,10 @@ static struct notifier_block __cpuinitdata ratelimit_nb = {
1606 */ 1634 */
1607void __init page_writeback_init(void) 1635void __init page_writeback_init(void)
1608{ 1636{
1609 int shift;
1610
1611 writeback_set_ratelimit(); 1637 writeback_set_ratelimit();
1612 register_cpu_notifier(&ratelimit_nb); 1638 register_cpu_notifier(&ratelimit_nb);
1613 1639
1614 shift = calc_period_shift(); 1640 fprop_global_init(&writeout_completions);
1615 prop_descriptor_init(&vm_completions, shift);
1616} 1641}
1617 1642
1618/** 1643/**