diff options
author | Wu Fengguang <fengguang.wu@intel.com> | 2011-05-04 21:54:37 -0400 |
---|---|---|
committer | Wu Fengguang <fengguang.wu@intel.com> | 2011-07-10 01:09:01 -0400 |
commit | d46db3d58233be4be980eb1e42eebe7808bcabab (patch) | |
tree | 6d813b33938d915f0c0633e8615d1ffdcc554c96 | |
parent | 36715cef0770b7e2547892b7c3197fc024274630 (diff) |
writeback: make writeback_control.nr_to_write straight
Pass struct wb_writeback_work all the way down to writeback_sb_inodes(),
and initialize the struct writeback_control there.
struct writeback_control is basically designed to control writeback of a
single file, but we keep abuse it for writing multiple files in
writeback_sb_inodes() and its callers.
It immediately clean things up, e.g. suddenly wbc.nr_to_write vs
work->nr_pages starts to make sense, and instead of saving and restoring
pages_skipped in writeback_sb_inodes it can always start with a clean
zero value.
It also makes a neat IO pattern change: large dirty files are now
written in the full 4MB writeback chunk size, rather than whatever
remained quota in wbc->nr_to_write.
Acked-by: Jan Kara <jack@suse.cz>
Proposed-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
-rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 196 | ||||
-rw-r--r-- | include/linux/writeback.h | 6 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 39 | ||||
-rw-r--r-- | mm/backing-dev.c | 17 | ||||
-rw-r--r-- | mm/page-writeback.c | 17 |
6 files changed, 148 insertions, 129 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11c1efd..561262d35689 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2551,7 +2551,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2551 | }; | 2551 | }; |
2552 | struct writeback_control wbc_writepages = { | 2552 | struct writeback_control wbc_writepages = { |
2553 | .sync_mode = wbc->sync_mode, | 2553 | .sync_mode = wbc->sync_mode, |
2554 | .older_than_this = NULL, | ||
2555 | .nr_to_write = 64, | 2554 | .nr_to_write = 64, |
2556 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | 2555 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, |
2557 | .range_end = (loff_t)-1, | 2556 | .range_end = (loff_t)-1, |
@@ -2584,7 +2583,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2584 | }; | 2583 | }; |
2585 | struct writeback_control wbc_writepages = { | 2584 | struct writeback_control wbc_writepages = { |
2586 | .sync_mode = mode, | 2585 | .sync_mode = mode, |
2587 | .older_than_this = NULL, | ||
2588 | .nr_to_write = nr_pages * 2, | 2586 | .nr_to_write = nr_pages * 2, |
2589 | .range_start = start, | 2587 | .range_start = start, |
2590 | .range_end = end + 1, | 2588 | .range_end = end + 1, |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6caa98247a5b..2c947da39f6e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -30,11 +30,21 @@ | |||
30 | #include "internal.h" | 30 | #include "internal.h" |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * The maximum number of pages to writeout in a single bdi flush/kupdate | ||
34 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
35 | * enormous amounts of time, which would block a userspace task which has | ||
36 | * been forced to throttle against that inode. Also, the code reevaluates | ||
37 | * the dirty each time it has written this many pages. | ||
38 | */ | ||
39 | #define MAX_WRITEBACK_PAGES 1024L | ||
40 | |||
41 | /* | ||
33 | * Passed into wb_writeback(), essentially a subset of writeback_control | 42 | * Passed into wb_writeback(), essentially a subset of writeback_control |
34 | */ | 43 | */ |
35 | struct wb_writeback_work { | 44 | struct wb_writeback_work { |
36 | long nr_pages; | 45 | long nr_pages; |
37 | struct super_block *sb; | 46 | struct super_block *sb; |
47 | unsigned long *older_than_this; | ||
38 | enum writeback_sync_modes sync_mode; | 48 | enum writeback_sync_modes sync_mode; |
39 | unsigned int tagged_writepages:1; | 49 | unsigned int tagged_writepages:1; |
40 | unsigned int for_kupdate:1; | 50 | unsigned int for_kupdate:1; |
@@ -472,7 +482,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
472 | * No need to add it back to the LRU. | 482 | * No need to add it back to the LRU. |
473 | */ | 483 | */ |
474 | list_del_init(&inode->i_wb_list); | 484 | list_del_init(&inode->i_wb_list); |
475 | wbc->inodes_written++; | ||
476 | } | 485 | } |
477 | } | 486 | } |
478 | inode_sync_complete(inode); | 487 | inode_sync_complete(inode); |
@@ -506,6 +515,31 @@ static bool pin_sb_for_writeback(struct super_block *sb) | |||
506 | return false; | 515 | return false; |
507 | } | 516 | } |
508 | 517 | ||
518 | static long writeback_chunk_size(struct wb_writeback_work *work) | ||
519 | { | ||
520 | long pages; | ||
521 | |||
522 | /* | ||
523 | * WB_SYNC_ALL mode does livelock avoidance by syncing dirty | ||
524 | * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX | ||
525 | * here avoids calling into writeback_inodes_wb() more than once. | ||
526 | * | ||
527 | * The intended call sequence for WB_SYNC_ALL writeback is: | ||
528 | * | ||
529 | * wb_writeback() | ||
530 | * writeback_sb_inodes() <== called only once | ||
531 | * write_cache_pages() <== called once for each inode | ||
532 | * (quickly) tag currently dirty pages | ||
533 | * (maybe slowly) sync all tagged pages | ||
534 | */ | ||
535 | if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) | ||
536 | pages = LONG_MAX; | ||
537 | else | ||
538 | pages = min(MAX_WRITEBACK_PAGES, work->nr_pages); | ||
539 | |||
540 | return pages; | ||
541 | } | ||
542 | |||
509 | /* | 543 | /* |
510 | * Write a portion of b_io inodes which belong to @sb. | 544 | * Write a portion of b_io inodes which belong to @sb. |
511 | * | 545 | * |
@@ -513,18 +547,30 @@ static bool pin_sb_for_writeback(struct super_block *sb) | |||
513 | * inodes. Otherwise write only ones which go sequentially | 547 | * inodes. Otherwise write only ones which go sequentially |
514 | * in reverse order. | 548 | * in reverse order. |
515 | * | 549 | * |
516 | * Return 1, if the caller writeback routine should be | 550 | * Return the number of pages and/or inodes written. |
517 | * interrupted. Otherwise return 0. | ||
518 | */ | 551 | */ |
519 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | 552 | static long writeback_sb_inodes(struct super_block *sb, |
520 | struct writeback_control *wbc, bool only_this_sb) | 553 | struct bdi_writeback *wb, |
554 | struct wb_writeback_work *work) | ||
521 | { | 555 | { |
556 | struct writeback_control wbc = { | ||
557 | .sync_mode = work->sync_mode, | ||
558 | .tagged_writepages = work->tagged_writepages, | ||
559 | .for_kupdate = work->for_kupdate, | ||
560 | .for_background = work->for_background, | ||
561 | .range_cyclic = work->range_cyclic, | ||
562 | .range_start = 0, | ||
563 | .range_end = LLONG_MAX, | ||
564 | }; | ||
565 | unsigned long start_time = jiffies; | ||
566 | long write_chunk; | ||
567 | long wrote = 0; /* count both pages and inodes */ | ||
568 | |||
522 | while (!list_empty(&wb->b_io)) { | 569 | while (!list_empty(&wb->b_io)) { |
523 | long pages_skipped; | ||
524 | struct inode *inode = wb_inode(wb->b_io.prev); | 570 | struct inode *inode = wb_inode(wb->b_io.prev); |
525 | 571 | ||
526 | if (inode->i_sb != sb) { | 572 | if (inode->i_sb != sb) { |
527 | if (only_this_sb) { | 573 | if (work->sb) { |
528 | /* | 574 | /* |
529 | * We only want to write back data for this | 575 | * We only want to write back data for this |
530 | * superblock, move all inodes not belonging | 576 | * superblock, move all inodes not belonging |
@@ -539,7 +585,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
539 | * Bounce back to the caller to unpin this and | 585 | * Bounce back to the caller to unpin this and |
540 | * pin the next superblock. | 586 | * pin the next superblock. |
541 | */ | 587 | */ |
542 | return 0; | 588 | break; |
543 | } | 589 | } |
544 | 590 | ||
545 | /* | 591 | /* |
@@ -553,12 +599,18 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
553 | requeue_io(inode, wb); | 599 | requeue_io(inode, wb); |
554 | continue; | 600 | continue; |
555 | } | 601 | } |
556 | |||
557 | __iget(inode); | 602 | __iget(inode); |
603 | write_chunk = writeback_chunk_size(work); | ||
604 | wbc.nr_to_write = write_chunk; | ||
605 | wbc.pages_skipped = 0; | ||
606 | |||
607 | writeback_single_inode(inode, wb, &wbc); | ||
558 | 608 | ||
559 | pages_skipped = wbc->pages_skipped; | 609 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
560 | writeback_single_inode(inode, wb, wbc); | 610 | wrote += write_chunk - wbc.nr_to_write; |
561 | if (wbc->pages_skipped != pages_skipped) { | 611 | if (!(inode->i_state & I_DIRTY)) |
612 | wrote++; | ||
613 | if (wbc.pages_skipped) { | ||
562 | /* | 614 | /* |
563 | * writeback is not making progress due to locked | 615 | * writeback is not making progress due to locked |
564 | * buffers. Skip this inode for now. | 616 | * buffers. Skip this inode for now. |
@@ -570,17 +622,25 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
570 | iput(inode); | 622 | iput(inode); |
571 | cond_resched(); | 623 | cond_resched(); |
572 | spin_lock(&wb->list_lock); | 624 | spin_lock(&wb->list_lock); |
573 | if (wbc->nr_to_write <= 0) | 625 | /* |
574 | return 1; | 626 | * bail out to wb_writeback() often enough to check |
627 | * background threshold and other termination conditions. | ||
628 | */ | ||
629 | if (wrote) { | ||
630 | if (time_is_before_jiffies(start_time + HZ / 10UL)) | ||
631 | break; | ||
632 | if (work->nr_pages <= 0) | ||
633 | break; | ||
634 | } | ||
575 | } | 635 | } |
576 | /* b_io is empty */ | 636 | return wrote; |
577 | return 1; | ||
578 | } | 637 | } |
579 | 638 | ||
580 | static void __writeback_inodes_wb(struct bdi_writeback *wb, | 639 | static long __writeback_inodes_wb(struct bdi_writeback *wb, |
581 | struct writeback_control *wbc) | 640 | struct wb_writeback_work *work) |
582 | { | 641 | { |
583 | int ret = 0; | 642 | unsigned long start_time = jiffies; |
643 | long wrote = 0; | ||
584 | 644 | ||
585 | while (!list_empty(&wb->b_io)) { | 645 | while (!list_empty(&wb->b_io)) { |
586 | struct inode *inode = wb_inode(wb->b_io.prev); | 646 | struct inode *inode = wb_inode(wb->b_io.prev); |
@@ -590,33 +650,37 @@ static void __writeback_inodes_wb(struct bdi_writeback *wb, | |||
590 | requeue_io(inode, wb); | 650 | requeue_io(inode, wb); |
591 | continue; | 651 | continue; |
592 | } | 652 | } |
593 | ret = writeback_sb_inodes(sb, wb, wbc, false); | 653 | wrote += writeback_sb_inodes(sb, wb, work); |
594 | drop_super(sb); | 654 | drop_super(sb); |
595 | 655 | ||
596 | if (ret) | 656 | /* refer to the same tests at the end of writeback_sb_inodes */ |
597 | break; | 657 | if (wrote) { |
658 | if (time_is_before_jiffies(start_time + HZ / 10UL)) | ||
659 | break; | ||
660 | if (work->nr_pages <= 0) | ||
661 | break; | ||
662 | } | ||
598 | } | 663 | } |
599 | /* Leave any unwritten inodes on b_io */ | 664 | /* Leave any unwritten inodes on b_io */ |
665 | return wrote; | ||
600 | } | 666 | } |
601 | 667 | ||
602 | void writeback_inodes_wb(struct bdi_writeback *wb, | 668 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages) |
603 | struct writeback_control *wbc) | ||
604 | { | 669 | { |
670 | struct wb_writeback_work work = { | ||
671 | .nr_pages = nr_pages, | ||
672 | .sync_mode = WB_SYNC_NONE, | ||
673 | .range_cyclic = 1, | ||
674 | }; | ||
675 | |||
605 | spin_lock(&wb->list_lock); | 676 | spin_lock(&wb->list_lock); |
606 | if (list_empty(&wb->b_io)) | 677 | if (list_empty(&wb->b_io)) |
607 | queue_io(wb, wbc->older_than_this); | 678 | queue_io(wb, NULL); |
608 | __writeback_inodes_wb(wb, wbc); | 679 | __writeback_inodes_wb(wb, &work); |
609 | spin_unlock(&wb->list_lock); | 680 | spin_unlock(&wb->list_lock); |
610 | } | ||
611 | 681 | ||
612 | /* | 682 | return nr_pages - work.nr_pages; |
613 | * The maximum number of pages to writeout in a single bdi flush/kupdate | 683 | } |
614 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
615 | * enormous amounts of time, which would block a userspace task which has | ||
616 | * been forced to throttle against that inode. Also, the code reevaluates | ||
617 | * the dirty each time it has written this many pages. | ||
618 | */ | ||
619 | #define MAX_WRITEBACK_PAGES 1024 | ||
620 | 684 | ||
621 | static inline bool over_bground_thresh(void) | 685 | static inline bool over_bground_thresh(void) |
622 | { | 686 | { |
@@ -646,42 +710,13 @@ static inline bool over_bground_thresh(void) | |||
646 | static long wb_writeback(struct bdi_writeback *wb, | 710 | static long wb_writeback(struct bdi_writeback *wb, |
647 | struct wb_writeback_work *work) | 711 | struct wb_writeback_work *work) |
648 | { | 712 | { |
649 | struct writeback_control wbc = { | 713 | long nr_pages = work->nr_pages; |
650 | .sync_mode = work->sync_mode, | ||
651 | .tagged_writepages = work->tagged_writepages, | ||
652 | .older_than_this = NULL, | ||
653 | .for_kupdate = work->for_kupdate, | ||
654 | .for_background = work->for_background, | ||
655 | .range_cyclic = work->range_cyclic, | ||
656 | }; | ||
657 | unsigned long oldest_jif; | 714 | unsigned long oldest_jif; |
658 | long wrote = 0; | ||
659 | long write_chunk = MAX_WRITEBACK_PAGES; | ||
660 | struct inode *inode; | 715 | struct inode *inode; |
661 | 716 | long progress; | |
662 | if (!wbc.range_cyclic) { | ||
663 | wbc.range_start = 0; | ||
664 | wbc.range_end = LLONG_MAX; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * WB_SYNC_ALL mode does livelock avoidance by syncing dirty | ||
669 | * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX | ||
670 | * here avoids calling into writeback_inodes_wb() more than once. | ||
671 | * | ||
672 | * The intended call sequence for WB_SYNC_ALL writeback is: | ||
673 | * | ||
674 | * wb_writeback() | ||
675 | * writeback_sb_inodes() <== called only once | ||
676 | * write_cache_pages() <== called once for each inode | ||
677 | * (quickly) tag currently dirty pages | ||
678 | * (maybe slowly) sync all tagged pages | ||
679 | */ | ||
680 | if (wbc.sync_mode == WB_SYNC_ALL || wbc.tagged_writepages) | ||
681 | write_chunk = LONG_MAX; | ||
682 | 717 | ||
683 | oldest_jif = jiffies; | 718 | oldest_jif = jiffies; |
684 | wbc.older_than_this = &oldest_jif; | 719 | work->older_than_this = &oldest_jif; |
685 | 720 | ||
686 | spin_lock(&wb->list_lock); | 721 | spin_lock(&wb->list_lock); |
687 | for (;;) { | 722 | for (;;) { |
@@ -711,24 +746,17 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
711 | if (work->for_kupdate) { | 746 | if (work->for_kupdate) { |
712 | oldest_jif = jiffies - | 747 | oldest_jif = jiffies - |
713 | msecs_to_jiffies(dirty_expire_interval * 10); | 748 | msecs_to_jiffies(dirty_expire_interval * 10); |
714 | wbc.older_than_this = &oldest_jif; | 749 | work->older_than_this = &oldest_jif; |
715 | } | 750 | } |
716 | 751 | ||
717 | wbc.nr_to_write = write_chunk; | 752 | trace_writeback_start(wb->bdi, work); |
718 | wbc.pages_skipped = 0; | ||
719 | wbc.inodes_written = 0; | ||
720 | |||
721 | trace_wbc_writeback_start(&wbc, wb->bdi); | ||
722 | if (list_empty(&wb->b_io)) | 753 | if (list_empty(&wb->b_io)) |
723 | queue_io(wb, wbc.older_than_this); | 754 | queue_io(wb, work->older_than_this); |
724 | if (work->sb) | 755 | if (work->sb) |
725 | writeback_sb_inodes(work->sb, wb, &wbc, true); | 756 | progress = writeback_sb_inodes(work->sb, wb, work); |
726 | else | 757 | else |
727 | __writeback_inodes_wb(wb, &wbc); | 758 | progress = __writeback_inodes_wb(wb, work); |
728 | trace_wbc_writeback_written(&wbc, wb->bdi); | 759 | trace_writeback_written(wb->bdi, work); |
729 | |||
730 | work->nr_pages -= write_chunk - wbc.nr_to_write; | ||
731 | wrote += write_chunk - wbc.nr_to_write; | ||
732 | 760 | ||
733 | /* | 761 | /* |
734 | * Did we write something? Try for more | 762 | * Did we write something? Try for more |
@@ -738,9 +766,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
738 | * mean the overall work is done. So we keep looping as long | 766 | * mean the overall work is done. So we keep looping as long |
739 | * as made some progress on cleaning pages or inodes. | 767 | * as made some progress on cleaning pages or inodes. |
740 | */ | 768 | */ |
741 | if (wbc.nr_to_write < write_chunk) | 769 | if (progress) |
742 | continue; | ||
743 | if (wbc.inodes_written) | ||
744 | continue; | 770 | continue; |
745 | /* | 771 | /* |
746 | * No more inodes for IO, bail | 772 | * No more inodes for IO, bail |
@@ -753,8 +779,8 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
753 | * we'll just busyloop. | 779 | * we'll just busyloop. |
754 | */ | 780 | */ |
755 | if (!list_empty(&wb->b_more_io)) { | 781 | if (!list_empty(&wb->b_more_io)) { |
782 | trace_writeback_wait(wb->bdi, work); | ||
756 | inode = wb_inode(wb->b_more_io.prev); | 783 | inode = wb_inode(wb->b_more_io.prev); |
757 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
758 | spin_lock(&inode->i_lock); | 784 | spin_lock(&inode->i_lock); |
759 | inode_wait_for_writeback(inode, wb); | 785 | inode_wait_for_writeback(inode, wb); |
760 | spin_unlock(&inode->i_lock); | 786 | spin_unlock(&inode->i_lock); |
@@ -762,7 +788,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
762 | } | 788 | } |
763 | spin_unlock(&wb->list_lock); | 789 | spin_unlock(&wb->list_lock); |
764 | 790 | ||
765 | return wrote; | 791 | return nr_pages - work->nr_pages; |
766 | } | 792 | } |
767 | 793 | ||
768 | /* | 794 | /* |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 2f1b512bd6e0..df1b7f18f100 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -24,12 +24,9 @@ enum writeback_sync_modes { | |||
24 | */ | 24 | */ |
25 | struct writeback_control { | 25 | struct writeback_control { |
26 | enum writeback_sync_modes sync_mode; | 26 | enum writeback_sync_modes sync_mode; |
27 | unsigned long *older_than_this; /* If !NULL, only write back inodes | ||
28 | older than this */ | ||
29 | long nr_to_write; /* Write this many pages, and decrement | 27 | long nr_to_write; /* Write this many pages, and decrement |
30 | this for each page written */ | 28 | this for each page written */ |
31 | long pages_skipped; /* Pages which were not written */ | 29 | long pages_skipped; /* Pages which were not written */ |
32 | long inodes_written; /* # of inodes written (at least) */ | ||
33 | 30 | ||
34 | /* | 31 | /* |
35 | * For a_ops->writepages(): is start or end are non-zero then this is | 32 | * For a_ops->writepages(): is start or end are non-zero then this is |
@@ -56,8 +53,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); | |||
56 | int writeback_inodes_sb_if_idle(struct super_block *); | 53 | int writeback_inodes_sb_if_idle(struct super_block *); |
57 | int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); | 54 | int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); |
58 | void sync_inodes_sb(struct super_block *); | 55 | void sync_inodes_sb(struct super_block *); |
59 | void writeback_inodes_wb(struct bdi_writeback *wb, | 56 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages); |
60 | struct writeback_control *wbc); | ||
61 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); | 57 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); |
62 | void wakeup_flusher_threads(long nr_pages); | 58 | void wakeup_flusher_threads(long nr_pages); |
63 | 59 | ||
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 205d14919ef2..3e7662a0cfa3 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
@@ -62,6 +62,9 @@ DEFINE_EVENT(writeback_work_class, name, \ | |||
62 | DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); | 62 | DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); |
63 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); | 63 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); |
64 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); | 64 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); |
65 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); | ||
66 | DEFINE_WRITEBACK_WORK_EVENT(writeback_written); | ||
67 | DEFINE_WRITEBACK_WORK_EVENT(writeback_wait); | ||
65 | 68 | ||
66 | TRACE_EVENT(writeback_pages_written, | 69 | TRACE_EVENT(writeback_pages_written, |
67 | TP_PROTO(long pages_written), | 70 | TP_PROTO(long pages_written), |
@@ -101,6 +104,30 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_register); | |||
101 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); | 104 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); |
102 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); | 105 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); |
103 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); | 106 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); |
107 | DEFINE_WRITEBACK_EVENT(balance_dirty_start); | ||
108 | DEFINE_WRITEBACK_EVENT(balance_dirty_wait); | ||
109 | |||
110 | TRACE_EVENT(balance_dirty_written, | ||
111 | |||
112 | TP_PROTO(struct backing_dev_info *bdi, int written), | ||
113 | |||
114 | TP_ARGS(bdi, written), | ||
115 | |||
116 | TP_STRUCT__entry( | ||
117 | __array(char, name, 32) | ||
118 | __field(int, written) | ||
119 | ), | ||
120 | |||
121 | TP_fast_assign( | ||
122 | strncpy(__entry->name, dev_name(bdi->dev), 32); | ||
123 | __entry->written = written; | ||
124 | ), | ||
125 | |||
126 | TP_printk("bdi %s written %d", | ||
127 | __entry->name, | ||
128 | __entry->written | ||
129 | ) | ||
130 | ); | ||
104 | 131 | ||
105 | DECLARE_EVENT_CLASS(wbc_class, | 132 | DECLARE_EVENT_CLASS(wbc_class, |
106 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), | 133 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), |
@@ -114,7 +141,6 @@ DECLARE_EVENT_CLASS(wbc_class, | |||
114 | __field(int, for_background) | 141 | __field(int, for_background) |
115 | __field(int, for_reclaim) | 142 | __field(int, for_reclaim) |
116 | __field(int, range_cyclic) | 143 | __field(int, range_cyclic) |
117 | __field(unsigned long, older_than_this) | ||
118 | __field(long, range_start) | 144 | __field(long, range_start) |
119 | __field(long, range_end) | 145 | __field(long, range_end) |
120 | ), | 146 | ), |
@@ -128,14 +154,12 @@ DECLARE_EVENT_CLASS(wbc_class, | |||
128 | __entry->for_background = wbc->for_background; | 154 | __entry->for_background = wbc->for_background; |
129 | __entry->for_reclaim = wbc->for_reclaim; | 155 | __entry->for_reclaim = wbc->for_reclaim; |
130 | __entry->range_cyclic = wbc->range_cyclic; | 156 | __entry->range_cyclic = wbc->range_cyclic; |
131 | __entry->older_than_this = wbc->older_than_this ? | ||
132 | *wbc->older_than_this : 0; | ||
133 | __entry->range_start = (long)wbc->range_start; | 157 | __entry->range_start = (long)wbc->range_start; |
134 | __entry->range_end = (long)wbc->range_end; | 158 | __entry->range_end = (long)wbc->range_end; |
135 | ), | 159 | ), |
136 | 160 | ||
137 | TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " | 161 | TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " |
138 | "bgrd=%d reclm=%d cyclic=%d older=0x%lx " | 162 | "bgrd=%d reclm=%d cyclic=%d " |
139 | "start=0x%lx end=0x%lx", | 163 | "start=0x%lx end=0x%lx", |
140 | __entry->name, | 164 | __entry->name, |
141 | __entry->nr_to_write, | 165 | __entry->nr_to_write, |
@@ -145,7 +169,6 @@ DECLARE_EVENT_CLASS(wbc_class, | |||
145 | __entry->for_background, | 169 | __entry->for_background, |
146 | __entry->for_reclaim, | 170 | __entry->for_reclaim, |
147 | __entry->range_cyclic, | 171 | __entry->range_cyclic, |
148 | __entry->older_than_this, | ||
149 | __entry->range_start, | 172 | __entry->range_start, |
150 | __entry->range_end) | 173 | __entry->range_end) |
151 | ) | 174 | ) |
@@ -154,12 +177,6 @@ DECLARE_EVENT_CLASS(wbc_class, | |||
154 | DEFINE_EVENT(wbc_class, name, \ | 177 | DEFINE_EVENT(wbc_class, name, \ |
155 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \ | 178 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \ |
156 | TP_ARGS(wbc, bdi)) | 179 | TP_ARGS(wbc, bdi)) |
157 | DEFINE_WBC_EVENT(wbc_writeback_start); | ||
158 | DEFINE_WBC_EVENT(wbc_writeback_written); | ||
159 | DEFINE_WBC_EVENT(wbc_writeback_wait); | ||
160 | DEFINE_WBC_EVENT(wbc_balance_dirty_start); | ||
161 | DEFINE_WBC_EVENT(wbc_balance_dirty_written); | ||
162 | DEFINE_WBC_EVENT(wbc_balance_dirty_wait); | ||
163 | DEFINE_WBC_EVENT(wbc_writepage); | 180 | DEFINE_WBC_EVENT(wbc_writepage); |
164 | 181 | ||
165 | TRACE_EVENT(writeback_queue_io, | 182 | TRACE_EVENT(writeback_queue_io, |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5f6553ef1ba7..7ba303be5e03 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -260,18 +260,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
260 | return wb_has_dirty_io(&bdi->wb); | 260 | return wb_has_dirty_io(&bdi->wb); |
261 | } | 261 | } |
262 | 262 | ||
263 | static void bdi_flush_io(struct backing_dev_info *bdi) | ||
264 | { | ||
265 | struct writeback_control wbc = { | ||
266 | .sync_mode = WB_SYNC_NONE, | ||
267 | .older_than_this = NULL, | ||
268 | .range_cyclic = 1, | ||
269 | .nr_to_write = 1024, | ||
270 | }; | ||
271 | |||
272 | writeback_inodes_wb(&bdi->wb, &wbc); | ||
273 | } | ||
274 | |||
275 | /* | 263 | /* |
276 | * kupdated() used to do this. We cannot do it from the bdi_forker_thread() | 264 | * kupdated() used to do this. We cannot do it from the bdi_forker_thread() |
277 | * or we risk deadlocking on ->s_umount. The longer term solution would be | 265 | * or we risk deadlocking on ->s_umount. The longer term solution would be |
@@ -457,9 +445,10 @@ static int bdi_forker_thread(void *ptr) | |||
457 | if (IS_ERR(task)) { | 445 | if (IS_ERR(task)) { |
458 | /* | 446 | /* |
459 | * If thread creation fails, force writeout of | 447 | * If thread creation fails, force writeout of |
460 | * the bdi from the thread. | 448 | * the bdi from the thread. Hopefully 1024 is |
449 | * large enough for efficient IO. | ||
461 | */ | 450 | */ |
462 | bdi_flush_io(bdi); | 451 | writeback_inodes_wb(&bdi->wb, 1024); |
463 | } else { | 452 | } else { |
464 | /* | 453 | /* |
465 | * The spinlock makes sure we do not lose | 454 | * The spinlock makes sure we do not lose |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 1965d05a29cc..9d6ac2b6d942 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -491,13 +491,6 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
491 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 491 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
492 | 492 | ||
493 | for (;;) { | 493 | for (;;) { |
494 | struct writeback_control wbc = { | ||
495 | .sync_mode = WB_SYNC_NONE, | ||
496 | .older_than_this = NULL, | ||
497 | .nr_to_write = write_chunk, | ||
498 | .range_cyclic = 1, | ||
499 | }; | ||
500 | |||
501 | nr_reclaimable = global_page_state(NR_FILE_DIRTY) + | 494 | nr_reclaimable = global_page_state(NR_FILE_DIRTY) + |
502 | global_page_state(NR_UNSTABLE_NFS); | 495 | global_page_state(NR_UNSTABLE_NFS); |
503 | nr_writeback = global_page_state(NR_WRITEBACK); | 496 | nr_writeback = global_page_state(NR_WRITEBACK); |
@@ -559,17 +552,17 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
559 | * threshold otherwise wait until the disk writes catch | 552 | * threshold otherwise wait until the disk writes catch |
560 | * up. | 553 | * up. |
561 | */ | 554 | */ |
562 | trace_wbc_balance_dirty_start(&wbc, bdi); | 555 | trace_balance_dirty_start(bdi); |
563 | if (bdi_nr_reclaimable > bdi_thresh) { | 556 | if (bdi_nr_reclaimable > bdi_thresh) { |
564 | writeback_inodes_wb(&bdi->wb, &wbc); | 557 | pages_written += writeback_inodes_wb(&bdi->wb, |
565 | pages_written += write_chunk - wbc.nr_to_write; | 558 | write_chunk); |
566 | trace_wbc_balance_dirty_written(&wbc, bdi); | 559 | trace_balance_dirty_written(bdi, pages_written); |
567 | if (pages_written >= write_chunk) | 560 | if (pages_written >= write_chunk) |
568 | break; /* We've done our duty */ | 561 | break; /* We've done our duty */ |
569 | } | 562 | } |
570 | trace_wbc_balance_dirty_wait(&wbc, bdi); | ||
571 | __set_current_state(TASK_UNINTERRUPTIBLE); | 563 | __set_current_state(TASK_UNINTERRUPTIBLE); |
572 | io_schedule_timeout(pause); | 564 | io_schedule_timeout(pause); |
565 | trace_balance_dirty_wait(bdi); | ||
573 | 566 | ||
574 | /* | 567 | /* |
575 | * Increase the delay for each loop, up to our previous | 568 | * Increase the delay for each loop, up to our previous |