diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 13:39:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 13:39:54 -0400 |
commit | f01ef569cddb1a8627b1c6b3a134998ad1cf4b22 (patch) | |
tree | 29ea1a0942c8549c24411e976cd6891c7e995e89 /mm/page-writeback.c | |
parent | a93a1329271038f0e8337061d3b41b3b212a851e (diff) | |
parent | bcff25fc8aa47a13faff8b4b992589813f7b450a (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback: (27 commits)
mm: properly reflect task dirty limits in dirty_exceeded logic
writeback: don't busy retry writeback on new/freeing inodes
writeback: scale IO chunk size up to half device bandwidth
writeback: trace global_dirty_state
writeback: introduce max-pause and pass-good dirty limits
writeback: introduce smoothed global dirty limit
writeback: consolidate variable names in balance_dirty_pages()
writeback: show bdi write bandwidth in debugfs
writeback: bdi write bandwidth estimation
writeback: account per-bdi accumulated written pages
writeback: make writeback_control.nr_to_write straight
writeback: skip tmpfs early in balance_dirty_pages_ratelimited_nr()
writeback: trace event writeback_queue_io
writeback: trace event writeback_single_inode
writeback: remove .nonblocking and .encountered_congestion
writeback: remove writeback_control.more_io
writeback: skip balance_dirty_pages() for in-memory fs
writeback: add bdi_dirty_limit() kernel-doc
writeback: avoid extra sync work at enqueue time
writeback: elevate queue_io() into wb_writeback()
...
Fix up trivial conflicts in fs/fs-writeback.c and mm/filemap.c
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 280 |
1 files changed, 243 insertions, 37 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d8767b381b9c..d1960744f881 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -37,6 +37,16 @@ | |||
37 | #include <trace/events/writeback.h> | 37 | #include <trace/events/writeback.h> |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Sleep at most 200ms at a time in balance_dirty_pages(). | ||
41 | */ | ||
42 | #define MAX_PAUSE max(HZ/5, 1) | ||
43 | |||
44 | /* | ||
45 | * Estimate write bandwidth at 200ms intervals. | ||
46 | */ | ||
47 | #define BANDWIDTH_INTERVAL max(HZ/5, 1) | ||
48 | |||
49 | /* | ||
40 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited | 50 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited |
41 | * will look to see if it needs to force writeback or throttling. | 51 | * will look to see if it needs to force writeback or throttling. |
42 | */ | 52 | */ |
@@ -111,6 +121,7 @@ EXPORT_SYMBOL(laptop_mode); | |||
111 | 121 | ||
112 | /* End of sysctl-exported parameters */ | 122 | /* End of sysctl-exported parameters */ |
113 | 123 | ||
124 | unsigned long global_dirty_limit; | ||
114 | 125 | ||
115 | /* | 126 | /* |
116 | * Scale the writeback cache size proportional to the relative writeout speeds. | 127 | * Scale the writeback cache size proportional to the relative writeout speeds. |
@@ -219,6 +230,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write, | |||
219 | */ | 230 | */ |
220 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) | 231 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) |
221 | { | 232 | { |
233 | __inc_bdi_stat(bdi, BDI_WRITTEN); | ||
222 | __prop_inc_percpu_max(&vm_completions, &bdi->completions, | 234 | __prop_inc_percpu_max(&vm_completions, &bdi->completions, |
223 | bdi->max_prop_frac); | 235 | bdi->max_prop_frac); |
224 | } | 236 | } |
@@ -244,13 +256,8 @@ void task_dirty_inc(struct task_struct *tsk) | |||
244 | static void bdi_writeout_fraction(struct backing_dev_info *bdi, | 256 | static void bdi_writeout_fraction(struct backing_dev_info *bdi, |
245 | long *numerator, long *denominator) | 257 | long *numerator, long *denominator) |
246 | { | 258 | { |
247 | if (bdi_cap_writeback_dirty(bdi)) { | 259 | prop_fraction_percpu(&vm_completions, &bdi->completions, |
248 | prop_fraction_percpu(&vm_completions, &bdi->completions, | ||
249 | numerator, denominator); | 260 | numerator, denominator); |
250 | } else { | ||
251 | *numerator = 0; | ||
252 | *denominator = 1; | ||
253 | } | ||
254 | } | 261 | } |
255 | 262 | ||
256 | static inline void task_dirties_fraction(struct task_struct *tsk, | 263 | static inline void task_dirties_fraction(struct task_struct *tsk, |
@@ -274,12 +281,13 @@ static inline void task_dirties_fraction(struct task_struct *tsk, | |||
274 | * effectively curb the growth of dirty pages. Light dirtiers with high enough | 281 | * effectively curb the growth of dirty pages. Light dirtiers with high enough |
275 | * dirty threshold may never get throttled. | 282 | * dirty threshold may never get throttled. |
276 | */ | 283 | */ |
284 | #define TASK_LIMIT_FRACTION 8 | ||
277 | static unsigned long task_dirty_limit(struct task_struct *tsk, | 285 | static unsigned long task_dirty_limit(struct task_struct *tsk, |
278 | unsigned long bdi_dirty) | 286 | unsigned long bdi_dirty) |
279 | { | 287 | { |
280 | long numerator, denominator; | 288 | long numerator, denominator; |
281 | unsigned long dirty = bdi_dirty; | 289 | unsigned long dirty = bdi_dirty; |
282 | u64 inv = dirty >> 3; | 290 | u64 inv = dirty / TASK_LIMIT_FRACTION; |
283 | 291 | ||
284 | task_dirties_fraction(tsk, &numerator, &denominator); | 292 | task_dirties_fraction(tsk, &numerator, &denominator); |
285 | inv *= numerator; | 293 | inv *= numerator; |
@@ -290,6 +298,12 @@ static unsigned long task_dirty_limit(struct task_struct *tsk, | |||
290 | return max(dirty, bdi_dirty/2); | 298 | return max(dirty, bdi_dirty/2); |
291 | } | 299 | } |
292 | 300 | ||
301 | /* Minimum limit for any task */ | ||
302 | static unsigned long task_min_dirty_limit(unsigned long bdi_dirty) | ||
303 | { | ||
304 | return bdi_dirty - bdi_dirty / TASK_LIMIT_FRACTION; | ||
305 | } | ||
306 | |||
293 | /* | 307 | /* |
294 | * | 308 | * |
295 | */ | 309 | */ |
@@ -397,6 +411,11 @@ unsigned long determine_dirtyable_memory(void) | |||
397 | return x + 1; /* Ensure that we never return 0 */ | 411 | return x + 1; /* Ensure that we never return 0 */ |
398 | } | 412 | } |
399 | 413 | ||
414 | static unsigned long hard_dirty_limit(unsigned long thresh) | ||
415 | { | ||
416 | return max(thresh, global_dirty_limit); | ||
417 | } | ||
418 | |||
400 | /* | 419 | /* |
401 | * global_dirty_limits - background-writeback and dirty-throttling thresholds | 420 | * global_dirty_limits - background-writeback and dirty-throttling thresholds |
402 | * | 421 | * |
@@ -435,12 +454,20 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) | |||
435 | } | 454 | } |
436 | *pbackground = background; | 455 | *pbackground = background; |
437 | *pdirty = dirty; | 456 | *pdirty = dirty; |
457 | trace_global_dirty_state(background, dirty); | ||
438 | } | 458 | } |
439 | 459 | ||
440 | /* | 460 | /** |
441 | * bdi_dirty_limit - @bdi's share of dirty throttling threshold | 461 | * bdi_dirty_limit - @bdi's share of dirty throttling threshold |
462 | * @bdi: the backing_dev_info to query | ||
463 | * @dirty: global dirty limit in pages | ||
442 | * | 464 | * |
443 | * Allocate high/low dirty limits to fast/slow devices, in order to prevent | 465 | * Returns @bdi's dirty limit in pages. The term "dirty" in the context of |
466 | * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. | ||
467 | * And the "limit" in the name is not seriously taken as hard limit in | ||
468 | * balance_dirty_pages(). | ||
469 | * | ||
470 | * It allocates high/low dirty limits to fast/slow devices, in order to prevent | ||
444 | * - starving fast devices | 471 | * - starving fast devices |
445 | * - piling up dirty pages (that will take long time to sync) on slow devices | 472 | * - piling up dirty pages (that will take long time to sync) on slow devices |
446 | * | 473 | * |
@@ -468,6 +495,153 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty) | |||
468 | return bdi_dirty; | 495 | return bdi_dirty; |
469 | } | 496 | } |
470 | 497 | ||
498 | static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, | ||
499 | unsigned long elapsed, | ||
500 | unsigned long written) | ||
501 | { | ||
502 | const unsigned long period = roundup_pow_of_two(3 * HZ); | ||
503 | unsigned long avg = bdi->avg_write_bandwidth; | ||
504 | unsigned long old = bdi->write_bandwidth; | ||
505 | u64 bw; | ||
506 | |||
507 | /* | ||
508 | * bw = written * HZ / elapsed | ||
509 | * | ||
510 | * bw * elapsed + write_bandwidth * (period - elapsed) | ||
511 | * write_bandwidth = --------------------------------------------------- | ||
512 | * period | ||
513 | */ | ||
514 | bw = written - bdi->written_stamp; | ||
515 | bw *= HZ; | ||
516 | if (unlikely(elapsed > period)) { | ||
517 | do_div(bw, elapsed); | ||
518 | avg = bw; | ||
519 | goto out; | ||
520 | } | ||
521 | bw += (u64)bdi->write_bandwidth * (period - elapsed); | ||
522 | bw >>= ilog2(period); | ||
523 | |||
524 | /* | ||
525 | * one more level of smoothing, for filtering out sudden spikes | ||
526 | */ | ||
527 | if (avg > old && old >= (unsigned long)bw) | ||
528 | avg -= (avg - old) >> 3; | ||
529 | |||
530 | if (avg < old && old <= (unsigned long)bw) | ||
531 | avg += (old - avg) >> 3; | ||
532 | |||
533 | out: | ||
534 | bdi->write_bandwidth = bw; | ||
535 | bdi->avg_write_bandwidth = avg; | ||
536 | } | ||
537 | |||
538 | /* | ||
539 | * The global dirtyable memory and dirty threshold could be suddenly knocked | ||
540 | * down by a large amount (eg. on the startup of KVM in a swapless system). | ||
541 | * This may throw the system into deep dirty exceeded state and throttle | ||
542 | * heavy/light dirtiers alike. To retain good responsiveness, maintain | ||
543 | * global_dirty_limit for tracking slowly down to the knocked down dirty | ||
544 | * threshold. | ||
545 | */ | ||
546 | static void update_dirty_limit(unsigned long thresh, unsigned long dirty) | ||
547 | { | ||
548 | unsigned long limit = global_dirty_limit; | ||
549 | |||
550 | /* | ||
551 | * Follow up in one step. | ||
552 | */ | ||
553 | if (limit < thresh) { | ||
554 | limit = thresh; | ||
555 | goto update; | ||
556 | } | ||
557 | |||
558 | /* | ||
559 | * Follow down slowly. Use the higher one as the target, because thresh | ||
560 | * may drop below dirty. This is exactly the reason to introduce | ||
561 | * global_dirty_limit which is guaranteed to lie above the dirty pages. | ||
562 | */ | ||
563 | thresh = max(thresh, dirty); | ||
564 | if (limit > thresh) { | ||
565 | limit -= (limit - thresh) >> 5; | ||
566 | goto update; | ||
567 | } | ||
568 | return; | ||
569 | update: | ||
570 | global_dirty_limit = limit; | ||
571 | } | ||
572 | |||
573 | static void global_update_bandwidth(unsigned long thresh, | ||
574 | unsigned long dirty, | ||
575 | unsigned long now) | ||
576 | { | ||
577 | static DEFINE_SPINLOCK(dirty_lock); | ||
578 | static unsigned long update_time; | ||
579 | |||
580 | /* | ||
581 | * check locklessly first to optimize away locking for the most time | ||
582 | */ | ||
583 | if (time_before(now, update_time + BANDWIDTH_INTERVAL)) | ||
584 | return; | ||
585 | |||
586 | spin_lock(&dirty_lock); | ||
587 | if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) { | ||
588 | update_dirty_limit(thresh, dirty); | ||
589 | update_time = now; | ||
590 | } | ||
591 | spin_unlock(&dirty_lock); | ||
592 | } | ||
593 | |||
594 | void __bdi_update_bandwidth(struct backing_dev_info *bdi, | ||
595 | unsigned long thresh, | ||
596 | unsigned long dirty, | ||
597 | unsigned long bdi_thresh, | ||
598 | unsigned long bdi_dirty, | ||
599 | unsigned long start_time) | ||
600 | { | ||
601 | unsigned long now = jiffies; | ||
602 | unsigned long elapsed = now - bdi->bw_time_stamp; | ||
603 | unsigned long written; | ||
604 | |||
605 | /* | ||
606 | * rate-limit, only update once every 200ms. | ||
607 | */ | ||
608 | if (elapsed < BANDWIDTH_INTERVAL) | ||
609 | return; | ||
610 | |||
611 | written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]); | ||
612 | |||
613 | /* | ||
614 | * Skip quiet periods when disk bandwidth is under-utilized. | ||
615 | * (at least 1s idle time between two flusher runs) | ||
616 | */ | ||
617 | if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) | ||
618 | goto snapshot; | ||
619 | |||
620 | if (thresh) | ||
621 | global_update_bandwidth(thresh, dirty, now); | ||
622 | |||
623 | bdi_update_write_bandwidth(bdi, elapsed, written); | ||
624 | |||
625 | snapshot: | ||
626 | bdi->written_stamp = written; | ||
627 | bdi->bw_time_stamp = now; | ||
628 | } | ||
629 | |||
630 | static void bdi_update_bandwidth(struct backing_dev_info *bdi, | ||
631 | unsigned long thresh, | ||
632 | unsigned long dirty, | ||
633 | unsigned long bdi_thresh, | ||
634 | unsigned long bdi_dirty, | ||
635 | unsigned long start_time) | ||
636 | { | ||
637 | if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL)) | ||
638 | return; | ||
639 | spin_lock(&bdi->wb.list_lock); | ||
640 | __bdi_update_bandwidth(bdi, thresh, dirty, bdi_thresh, bdi_dirty, | ||
641 | start_time); | ||
642 | spin_unlock(&bdi->wb.list_lock); | ||
643 | } | ||
644 | |||
471 | /* | 645 | /* |
472 | * balance_dirty_pages() must be called by processes which are generating dirty | 646 | * balance_dirty_pages() must be called by processes which are generating dirty |
473 | * data. It looks at the number of dirty pages in the machine and will force | 647 | * data. It looks at the number of dirty pages in the machine and will force |
@@ -478,27 +652,25 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty) | |||
478 | static void balance_dirty_pages(struct address_space *mapping, | 652 | static void balance_dirty_pages(struct address_space *mapping, |
479 | unsigned long write_chunk) | 653 | unsigned long write_chunk) |
480 | { | 654 | { |
481 | long nr_reclaimable, bdi_nr_reclaimable; | 655 | unsigned long nr_reclaimable, bdi_nr_reclaimable; |
482 | long nr_writeback, bdi_nr_writeback; | 656 | unsigned long nr_dirty; /* = file_dirty + writeback + unstable_nfs */ |
657 | unsigned long bdi_dirty; | ||
483 | unsigned long background_thresh; | 658 | unsigned long background_thresh; |
484 | unsigned long dirty_thresh; | 659 | unsigned long dirty_thresh; |
485 | unsigned long bdi_thresh; | 660 | unsigned long bdi_thresh; |
661 | unsigned long task_bdi_thresh; | ||
662 | unsigned long min_task_bdi_thresh; | ||
486 | unsigned long pages_written = 0; | 663 | unsigned long pages_written = 0; |
487 | unsigned long pause = 1; | 664 | unsigned long pause = 1; |
488 | bool dirty_exceeded = false; | 665 | bool dirty_exceeded = false; |
666 | bool clear_dirty_exceeded = true; | ||
489 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 667 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
668 | unsigned long start_time = jiffies; | ||
490 | 669 | ||
491 | for (;;) { | 670 | for (;;) { |
492 | struct writeback_control wbc = { | ||
493 | .sync_mode = WB_SYNC_NONE, | ||
494 | .older_than_this = NULL, | ||
495 | .nr_to_write = write_chunk, | ||
496 | .range_cyclic = 1, | ||
497 | }; | ||
498 | |||
499 | nr_reclaimable = global_page_state(NR_FILE_DIRTY) + | 671 | nr_reclaimable = global_page_state(NR_FILE_DIRTY) + |
500 | global_page_state(NR_UNSTABLE_NFS); | 672 | global_page_state(NR_UNSTABLE_NFS); |
501 | nr_writeback = global_page_state(NR_WRITEBACK); | 673 | nr_dirty = nr_reclaimable + global_page_state(NR_WRITEBACK); |
502 | 674 | ||
503 | global_dirty_limits(&background_thresh, &dirty_thresh); | 675 | global_dirty_limits(&background_thresh, &dirty_thresh); |
504 | 676 | ||
@@ -507,12 +679,12 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
507 | * catch-up. This avoids (excessively) small writeouts | 679 | * catch-up. This avoids (excessively) small writeouts |
508 | * when the bdi limits are ramping up. | 680 | * when the bdi limits are ramping up. |
509 | */ | 681 | */ |
510 | if (nr_reclaimable + nr_writeback <= | 682 | if (nr_dirty <= (background_thresh + dirty_thresh) / 2) |
511 | (background_thresh + dirty_thresh) / 2) | ||
512 | break; | 683 | break; |
513 | 684 | ||
514 | bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); | 685 | bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); |
515 | bdi_thresh = task_dirty_limit(current, bdi_thresh); | 686 | min_task_bdi_thresh = task_min_dirty_limit(bdi_thresh); |
687 | task_bdi_thresh = task_dirty_limit(current, bdi_thresh); | ||
516 | 688 | ||
517 | /* | 689 | /* |
518 | * In order to avoid the stacked BDI deadlock we need | 690 | * In order to avoid the stacked BDI deadlock we need |
@@ -524,12 +696,14 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
524 | * actually dirty; with m+n sitting in the percpu | 696 | * actually dirty; with m+n sitting in the percpu |
525 | * deltas. | 697 | * deltas. |
526 | */ | 698 | */ |
527 | if (bdi_thresh < 2*bdi_stat_error(bdi)) { | 699 | if (task_bdi_thresh < 2 * bdi_stat_error(bdi)) { |
528 | bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); | 700 | bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); |
529 | bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); | 701 | bdi_dirty = bdi_nr_reclaimable + |
702 | bdi_stat_sum(bdi, BDI_WRITEBACK); | ||
530 | } else { | 703 | } else { |
531 | bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); | 704 | bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); |
532 | bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); | 705 | bdi_dirty = bdi_nr_reclaimable + |
706 | bdi_stat(bdi, BDI_WRITEBACK); | ||
533 | } | 707 | } |
534 | 708 | ||
535 | /* | 709 | /* |
@@ -538,9 +712,10 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
538 | * bdi or process from holding back light ones; The latter is | 712 | * bdi or process from holding back light ones; The latter is |
539 | * the last resort safeguard. | 713 | * the last resort safeguard. |
540 | */ | 714 | */ |
541 | dirty_exceeded = | 715 | dirty_exceeded = (bdi_dirty > task_bdi_thresh) || |
542 | (bdi_nr_reclaimable + bdi_nr_writeback > bdi_thresh) | 716 | (nr_dirty > dirty_thresh); |
543 | || (nr_reclaimable + nr_writeback > dirty_thresh); | 717 | clear_dirty_exceeded = (bdi_dirty <= min_task_bdi_thresh) && |
718 | (nr_dirty <= dirty_thresh); | ||
544 | 719 | ||
545 | if (!dirty_exceeded) | 720 | if (!dirty_exceeded) |
546 | break; | 721 | break; |
@@ -548,6 +723,9 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
548 | if (!bdi->dirty_exceeded) | 723 | if (!bdi->dirty_exceeded) |
549 | bdi->dirty_exceeded = 1; | 724 | bdi->dirty_exceeded = 1; |
550 | 725 | ||
726 | bdi_update_bandwidth(bdi, dirty_thresh, nr_dirty, | ||
727 | bdi_thresh, bdi_dirty, start_time); | ||
728 | |||
551 | /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. | 729 | /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. |
552 | * Unstable writes are a feature of certain networked | 730 | * Unstable writes are a feature of certain networked |
553 | * filesystems (i.e. NFS) in which data may have been | 731 | * filesystems (i.e. NFS) in which data may have been |
@@ -557,17 +735,40 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
557 | * threshold otherwise wait until the disk writes catch | 735 | * threshold otherwise wait until the disk writes catch |
558 | * up. | 736 | * up. |
559 | */ | 737 | */ |
560 | trace_wbc_balance_dirty_start(&wbc, bdi); | 738 | trace_balance_dirty_start(bdi); |
561 | if (bdi_nr_reclaimable > bdi_thresh) { | 739 | if (bdi_nr_reclaimable > task_bdi_thresh) { |
562 | writeback_inodes_wb(&bdi->wb, &wbc); | 740 | pages_written += writeback_inodes_wb(&bdi->wb, |
563 | pages_written += write_chunk - wbc.nr_to_write; | 741 | write_chunk); |
564 | trace_wbc_balance_dirty_written(&wbc, bdi); | 742 | trace_balance_dirty_written(bdi, pages_written); |
565 | if (pages_written >= write_chunk) | 743 | if (pages_written >= write_chunk) |
566 | break; /* We've done our duty */ | 744 | break; /* We've done our duty */ |
567 | } | 745 | } |
568 | trace_wbc_balance_dirty_wait(&wbc, bdi); | ||
569 | __set_current_state(TASK_UNINTERRUPTIBLE); | 746 | __set_current_state(TASK_UNINTERRUPTIBLE); |
570 | io_schedule_timeout(pause); | 747 | io_schedule_timeout(pause); |
748 | trace_balance_dirty_wait(bdi); | ||
749 | |||
750 | dirty_thresh = hard_dirty_limit(dirty_thresh); | ||
751 | /* | ||
752 | * max-pause area. If dirty exceeded but still within this | ||
753 | * area, no need to sleep for more than 200ms: (a) 8 pages per | ||
754 | * 200ms is typically more than enough to curb heavy dirtiers; | ||
755 | * (b) the pause time limit makes the dirtiers more responsive. | ||
756 | */ | ||
757 | if (nr_dirty < dirty_thresh + | ||
758 | dirty_thresh / DIRTY_MAXPAUSE_AREA && | ||
759 | time_after(jiffies, start_time + MAX_PAUSE)) | ||
760 | break; | ||
761 | /* | ||
762 | * pass-good area. When some bdi gets blocked (eg. NFS server | ||
763 | * not responding), or write bandwidth dropped dramatically due | ||
764 | * to concurrent reads, or dirty threshold suddenly dropped and | ||
765 | * the dirty pages cannot be brought down anytime soon (eg. on | ||
766 | * slow USB stick), at least let go of the good bdi's. | ||
767 | */ | ||
768 | if (nr_dirty < dirty_thresh + | ||
769 | dirty_thresh / DIRTY_PASSGOOD_AREA && | ||
770 | bdi_dirty < bdi_thresh) | ||
771 | break; | ||
571 | 772 | ||
572 | /* | 773 | /* |
573 | * Increase the delay for each loop, up to our previous | 774 | * Increase the delay for each loop, up to our previous |
@@ -578,7 +779,8 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
578 | pause = HZ / 10; | 779 | pause = HZ / 10; |
579 | } | 780 | } |
580 | 781 | ||
581 | if (!dirty_exceeded && bdi->dirty_exceeded) | 782 | /* Clear dirty_exceeded flag only when no task can exceed the limit */ |
783 | if (clear_dirty_exceeded && bdi->dirty_exceeded) | ||
582 | bdi->dirty_exceeded = 0; | 784 | bdi->dirty_exceeded = 0; |
583 | 785 | ||
584 | if (writeback_in_progress(bdi)) | 786 | if (writeback_in_progress(bdi)) |
@@ -626,9 +828,13 @@ static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; | |||
626 | void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | 828 | void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, |
627 | unsigned long nr_pages_dirtied) | 829 | unsigned long nr_pages_dirtied) |
628 | { | 830 | { |
831 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
629 | unsigned long ratelimit; | 832 | unsigned long ratelimit; |
630 | unsigned long *p; | 833 | unsigned long *p; |
631 | 834 | ||
835 | if (!bdi_cap_account_dirty(bdi)) | ||
836 | return; | ||
837 | |||
632 | ratelimit = ratelimit_pages; | 838 | ratelimit = ratelimit_pages; |
633 | if (mapping->backing_dev_info->dirty_exceeded) | 839 | if (mapping->backing_dev_info->dirty_exceeded) |
634 | ratelimit = 8; | 840 | ratelimit = 8; |
@@ -892,12 +1098,12 @@ int write_cache_pages(struct address_space *mapping, | |||
892 | range_whole = 1; | 1098 | range_whole = 1; |
893 | cycled = 1; /* ignore range_cyclic tests */ | 1099 | cycled = 1; /* ignore range_cyclic tests */ |
894 | } | 1100 | } |
895 | if (wbc->sync_mode == WB_SYNC_ALL) | 1101 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
896 | tag = PAGECACHE_TAG_TOWRITE; | 1102 | tag = PAGECACHE_TAG_TOWRITE; |
897 | else | 1103 | else |
898 | tag = PAGECACHE_TAG_DIRTY; | 1104 | tag = PAGECACHE_TAG_DIRTY; |
899 | retry: | 1105 | retry: |
900 | if (wbc->sync_mode == WB_SYNC_ALL) | 1106 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
901 | tag_pages_for_writeback(mapping, index, end); | 1107 | tag_pages_for_writeback(mapping, index, end); |
902 | done_index = index; | 1108 | done_index = index; |
903 | while (!done && (index <= end)) { | 1109 | while (!done && (index <= end)) { |