diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 107 |
1 files changed, 66 insertions, 41 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 93d8d2f7108c..e5363f34e025 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/buffer_head.h> /* __set_page_dirty_buffers */ | 35 | #include <linux/buffer_head.h> /* __set_page_dirty_buffers */ |
36 | #include <linux/pagevec.h> | 36 | #include <linux/pagevec.h> |
37 | #include <linux/timer.h> | ||
37 | #include <trace/events/writeback.h> | 38 | #include <trace/events/writeback.h> |
38 | 39 | ||
39 | /* | 40 | /* |
@@ -135,7 +136,20 @@ unsigned long global_dirty_limit; | |||
135 | * measured in page writeback completions. | 136 | * measured in page writeback completions. |
136 | * | 137 | * |
137 | */ | 138 | */ |
138 | static struct prop_descriptor vm_completions; | 139 | static struct fprop_global writeout_completions; |
140 | |||
141 | static void writeout_period(unsigned long t); | ||
142 | /* Timer for aging of writeout_completions */ | ||
143 | static struct timer_list writeout_period_timer = | ||
144 | TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0); | ||
145 | static unsigned long writeout_period_time = 0; | ||
146 | |||
147 | /* | ||
148 | * Length of period for aging writeout fractions of bdis. This is an | ||
149 | * arbitrarily chosen number. The longer the period, the slower fractions will | ||
150 | * reflect changes in current writeout rate. | ||
151 | */ | ||
152 | #define VM_COMPLETIONS_PERIOD_LEN (3*HZ) | ||
139 | 153 | ||
140 | /* | 154 | /* |
141 | * Work out the current dirty-memory clamping and background writeout | 155 | * Work out the current dirty-memory clamping and background writeout |
@@ -322,34 +336,6 @@ bool zone_dirty_ok(struct zone *zone) | |||
322 | zone_page_state(zone, NR_WRITEBACK) <= limit; | 336 | zone_page_state(zone, NR_WRITEBACK) <= limit; |
323 | } | 337 | } |
324 | 338 | ||
325 | /* | ||
326 | * couple the period to the dirty_ratio: | ||
327 | * | ||
328 | * period/2 ~ roundup_pow_of_two(dirty limit) | ||
329 | */ | ||
330 | static int calc_period_shift(void) | ||
331 | { | ||
332 | unsigned long dirty_total; | ||
333 | |||
334 | if (vm_dirty_bytes) | ||
335 | dirty_total = vm_dirty_bytes / PAGE_SIZE; | ||
336 | else | ||
337 | dirty_total = (vm_dirty_ratio * global_dirtyable_memory()) / | ||
338 | 100; | ||
339 | return 2 + ilog2(dirty_total - 1); | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * update the period when the dirty threshold changes. | ||
344 | */ | ||
345 | static void update_completion_period(void) | ||
346 | { | ||
347 | int shift = calc_period_shift(); | ||
348 | prop_change_shift(&vm_completions, shift); | ||
349 | |||
350 | writeback_set_ratelimit(); | ||
351 | } | ||
352 | |||
353 | int dirty_background_ratio_handler(struct ctl_table *table, int write, | 339 | int dirty_background_ratio_handler(struct ctl_table *table, int write, |
354 | void __user *buffer, size_t *lenp, | 340 | void __user *buffer, size_t *lenp, |
355 | loff_t *ppos) | 341 | loff_t *ppos) |
@@ -383,7 +369,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write, | |||
383 | 369 | ||
384 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 370 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
385 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { | 371 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { |
386 | update_completion_period(); | 372 | writeback_set_ratelimit(); |
387 | vm_dirty_bytes = 0; | 373 | vm_dirty_bytes = 0; |
388 | } | 374 | } |
389 | return ret; | 375 | return ret; |
@@ -398,12 +384,21 @@ int dirty_bytes_handler(struct ctl_table *table, int write, | |||
398 | 384 | ||
399 | ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); | 385 | ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
400 | if (ret == 0 && write && vm_dirty_bytes != old_bytes) { | 386 | if (ret == 0 && write && vm_dirty_bytes != old_bytes) { |
401 | update_completion_period(); | 387 | writeback_set_ratelimit(); |
402 | vm_dirty_ratio = 0; | 388 | vm_dirty_ratio = 0; |
403 | } | 389 | } |
404 | return ret; | 390 | return ret; |
405 | } | 391 | } |
406 | 392 | ||
393 | static unsigned long wp_next_time(unsigned long cur_time) | ||
394 | { | ||
395 | cur_time += VM_COMPLETIONS_PERIOD_LEN; | ||
396 | /* 0 has a special meaning... */ | ||
397 | if (!cur_time) | ||
398 | return 1; | ||
399 | return cur_time; | ||
400 | } | ||
401 | |||
407 | /* | 402 | /* |
408 | * Increment the BDI's writeout completion count and the global writeout | 403 | * Increment the BDI's writeout completion count and the global writeout |
409 | * completion count. Called from test_clear_page_writeback(). | 404 | * completion count. Called from test_clear_page_writeback(). |
@@ -411,8 +406,19 @@ int dirty_bytes_handler(struct ctl_table *table, int write, | |||
411 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) | 406 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) |
412 | { | 407 | { |
413 | __inc_bdi_stat(bdi, BDI_WRITTEN); | 408 | __inc_bdi_stat(bdi, BDI_WRITTEN); |
414 | __prop_inc_percpu_max(&vm_completions, &bdi->completions, | 409 | __fprop_inc_percpu_max(&writeout_completions, &bdi->completions, |
415 | bdi->max_prop_frac); | 410 | bdi->max_prop_frac); |
411 | /* First event after period switching was turned off? */ | ||
412 | if (!unlikely(writeout_period_time)) { | ||
413 | /* | ||
414 | * We can race with other __bdi_writeout_inc calls here but | ||
415 | * it does not cause any harm since the resulting time when | ||
416 | * timer will fire and what is in writeout_period_time will be | ||
417 | * roughly the same. | ||
418 | */ | ||
419 | writeout_period_time = wp_next_time(jiffies); | ||
420 | mod_timer(&writeout_period_timer, writeout_period_time); | ||
421 | } | ||
416 | } | 422 | } |
417 | 423 | ||
418 | void bdi_writeout_inc(struct backing_dev_info *bdi) | 424 | void bdi_writeout_inc(struct backing_dev_info *bdi) |
@@ -431,11 +437,33 @@ EXPORT_SYMBOL_GPL(bdi_writeout_inc); | |||
431 | static void bdi_writeout_fraction(struct backing_dev_info *bdi, | 437 | static void bdi_writeout_fraction(struct backing_dev_info *bdi, |
432 | long *numerator, long *denominator) | 438 | long *numerator, long *denominator) |
433 | { | 439 | { |
434 | prop_fraction_percpu(&vm_completions, &bdi->completions, | 440 | fprop_fraction_percpu(&writeout_completions, &bdi->completions, |
435 | numerator, denominator); | 441 | numerator, denominator); |
436 | } | 442 | } |
437 | 443 | ||
438 | /* | 444 | /* |
445 | * On idle system, we can be called long after we scheduled because we use | ||
446 | * deferred timers so count with missed periods. | ||
447 | */ | ||
448 | static void writeout_period(unsigned long t) | ||
449 | { | ||
450 | int miss_periods = (jiffies - writeout_period_time) / | ||
451 | VM_COMPLETIONS_PERIOD_LEN; | ||
452 | |||
453 | if (fprop_new_period(&writeout_completions, miss_periods + 1)) { | ||
454 | writeout_period_time = wp_next_time(writeout_period_time + | ||
455 | miss_periods * VM_COMPLETIONS_PERIOD_LEN); | ||
456 | mod_timer(&writeout_period_timer, writeout_period_time); | ||
457 | } else { | ||
458 | /* | ||
459 | * Aging has zeroed all fractions. Stop wasting CPU on period | ||
460 | * updates. | ||
461 | */ | ||
462 | writeout_period_time = 0; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | /* | ||
439 | * bdi_min_ratio keeps the sum of the minimum dirty shares of all | 467 | * bdi_min_ratio keeps the sum of the minimum dirty shares of all |
440 | * registered backing devices, which, for obvious reasons, can not | 468 | * registered backing devices, which, for obvious reasons, can not |
441 | * exceed 100%. | 469 | * exceed 100%. |
@@ -475,7 +503,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | |||
475 | ret = -EINVAL; | 503 | ret = -EINVAL; |
476 | } else { | 504 | } else { |
477 | bdi->max_ratio = max_ratio; | 505 | bdi->max_ratio = max_ratio; |
478 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 506 | bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; |
479 | } | 507 | } |
480 | spin_unlock_bh(&bdi_lock); | 508 | spin_unlock_bh(&bdi_lock); |
481 | 509 | ||
@@ -918,7 +946,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, | |||
918 | * bdi->dirty_ratelimit = balanced_dirty_ratelimit; | 946 | * bdi->dirty_ratelimit = balanced_dirty_ratelimit; |
919 | * | 947 | * |
920 | * However to get a more stable dirty_ratelimit, the below elaborated | 948 | * However to get a more stable dirty_ratelimit, the below elaborated |
921 | * code makes use of task_ratelimit to filter out sigular points and | 949 | * code makes use of task_ratelimit to filter out singular points and |
922 | * limit the step size. | 950 | * limit the step size. |
923 | * | 951 | * |
924 | * The below code essentially only uses the relative value of | 952 | * The below code essentially only uses the relative value of |
@@ -941,7 +969,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, | |||
941 | * feel and care are stable dirty rate and small position error. | 969 | * feel and care are stable dirty rate and small position error. |
942 | * | 970 | * |
943 | * |task_ratelimit - dirty_ratelimit| is used to limit the step size | 971 | * |task_ratelimit - dirty_ratelimit| is used to limit the step size |
944 | * and filter out the sigular points of balanced_dirty_ratelimit. Which | 972 | * and filter out the singular points of balanced_dirty_ratelimit. Which |
945 | * keeps jumping around randomly and can even leap far away at times | 973 | * keeps jumping around randomly and can even leap far away at times |
946 | * due to the small 200ms estimation period of dirty_rate (we want to | 974 | * due to the small 200ms estimation period of dirty_rate (we want to |
947 | * keep that period small to reduce time lags). | 975 | * keep that period small to reduce time lags). |
@@ -1606,13 +1634,10 @@ static struct notifier_block __cpuinitdata ratelimit_nb = { | |||
1606 | */ | 1634 | */ |
1607 | void __init page_writeback_init(void) | 1635 | void __init page_writeback_init(void) |
1608 | { | 1636 | { |
1609 | int shift; | ||
1610 | |||
1611 | writeback_set_ratelimit(); | 1637 | writeback_set_ratelimit(); |
1612 | register_cpu_notifier(&ratelimit_nb); | 1638 | register_cpu_notifier(&ratelimit_nb); |
1613 | 1639 | ||
1614 | shift = calc_period_shift(); | 1640 | fprop_global_init(&writeout_completions); |
1615 | prop_descriptor_init(&vm_completions, shift); | ||
1616 | } | 1641 | } |
1617 | 1642 | ||
1618 | /** | 1643 | /** |