diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 105 |
1 files changed, 83 insertions, 22 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3d06ccc953aa..59c6e4956786 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -84,13 +84,9 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
84 | return list_entry(head, struct inode, i_wb_list); | 84 | return list_entry(head, struct inode, i_wb_list); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void bdi_queue_work(struct backing_dev_info *bdi, | 87 | /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ |
88 | struct wb_writeback_work *work) | 88 | static void bdi_wakeup_flusher(struct backing_dev_info *bdi) |
89 | { | 89 | { |
90 | trace_writeback_queue(bdi, work); | ||
91 | |||
92 | spin_lock_bh(&bdi->wb_lock); | ||
93 | list_add_tail(&work->list, &bdi->work_list); | ||
94 | if (bdi->wb.task) { | 90 | if (bdi->wb.task) { |
95 | wake_up_process(bdi->wb.task); | 91 | wake_up_process(bdi->wb.task); |
96 | } else { | 92 | } else { |
@@ -98,15 +94,26 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
98 | * The bdi thread isn't there, wake up the forker thread which | 94 | * The bdi thread isn't there, wake up the forker thread which |
99 | * will create and run it. | 95 | * will create and run it. |
100 | */ | 96 | */ |
101 | trace_writeback_nothread(bdi, work); | ||
102 | wake_up_process(default_backing_dev_info.wb.task); | 97 | wake_up_process(default_backing_dev_info.wb.task); |
103 | } | 98 | } |
99 | } | ||
100 | |||
101 | static void bdi_queue_work(struct backing_dev_info *bdi, | ||
102 | struct wb_writeback_work *work) | ||
103 | { | ||
104 | trace_writeback_queue(bdi, work); | ||
105 | |||
106 | spin_lock_bh(&bdi->wb_lock); | ||
107 | list_add_tail(&work->list, &bdi->work_list); | ||
108 | if (!bdi->wb.task) | ||
109 | trace_writeback_nothread(bdi, work); | ||
110 | bdi_wakeup_flusher(bdi); | ||
104 | spin_unlock_bh(&bdi->wb_lock); | 111 | spin_unlock_bh(&bdi->wb_lock); |
105 | } | 112 | } |
106 | 113 | ||
107 | static void | 114 | static void |
108 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | 115 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
109 | bool range_cyclic, bool for_background) | 116 | bool range_cyclic) |
110 | { | 117 | { |
111 | struct wb_writeback_work *work; | 118 | struct wb_writeback_work *work; |
112 | 119 | ||
@@ -126,7 +133,6 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
126 | work->sync_mode = WB_SYNC_NONE; | 133 | work->sync_mode = WB_SYNC_NONE; |
127 | work->nr_pages = nr_pages; | 134 | work->nr_pages = nr_pages; |
128 | work->range_cyclic = range_cyclic; | 135 | work->range_cyclic = range_cyclic; |
129 | work->for_background = for_background; | ||
130 | 136 | ||
131 | bdi_queue_work(bdi, work); | 137 | bdi_queue_work(bdi, work); |
132 | } | 138 | } |
@@ -144,7 +150,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
144 | */ | 150 | */ |
145 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 151 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
146 | { | 152 | { |
147 | __bdi_start_writeback(bdi, nr_pages, true, false); | 153 | __bdi_start_writeback(bdi, nr_pages, true); |
148 | } | 154 | } |
149 | 155 | ||
150 | /** | 156 | /** |
@@ -152,13 +158,21 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | |||
152 | * @bdi: the backing device to write from | 158 | * @bdi: the backing device to write from |
153 | * | 159 | * |
154 | * Description: | 160 | * Description: |
155 | * This does WB_SYNC_NONE background writeback. The IO is only | 161 | * This makes sure WB_SYNC_NONE background writeback happens. When |
156 | * started when this function returns, we make no guarentees on | 162 | * this function returns, it is only guaranteed that for given BDI |
157 | * completion. Caller need not hold sb s_umount semaphore. | 163 | * some IO is happening if we are over background dirty threshold. |
164 | * Caller need not hold sb s_umount semaphore. | ||
158 | */ | 165 | */ |
159 | void bdi_start_background_writeback(struct backing_dev_info *bdi) | 166 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
160 | { | 167 | { |
161 | __bdi_start_writeback(bdi, LONG_MAX, true, true); | 168 | /* |
169 | * We just wake up the flusher thread. It will perform background | ||
170 | * writeback as soon as there is no other work to do. | ||
171 | */ | ||
172 | trace_writeback_wake_background(bdi); | ||
173 | spin_lock_bh(&bdi->wb_lock); | ||
174 | bdi_wakeup_flusher(bdi); | ||
175 | spin_unlock_bh(&bdi->wb_lock); | ||
162 | } | 176 | } |
163 | 177 | ||
164 | /* | 178 | /* |
@@ -616,6 +630,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
616 | }; | 630 | }; |
617 | unsigned long oldest_jif; | 631 | unsigned long oldest_jif; |
618 | long wrote = 0; | 632 | long wrote = 0; |
633 | long write_chunk; | ||
619 | struct inode *inode; | 634 | struct inode *inode; |
620 | 635 | ||
621 | if (wbc.for_kupdate) { | 636 | if (wbc.for_kupdate) { |
@@ -628,6 +643,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
628 | wbc.range_end = LLONG_MAX; | 643 | wbc.range_end = LLONG_MAX; |
629 | } | 644 | } |
630 | 645 | ||
646 | /* | ||
647 | * WB_SYNC_ALL mode does livelock avoidance by syncing dirty | ||
648 | * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX | ||
649 | * here avoids calling into writeback_inodes_wb() more than once. | ||
650 | * | ||
651 | * The intended call sequence for WB_SYNC_ALL writeback is: | ||
652 | * | ||
653 | * wb_writeback() | ||
654 | * __writeback_inodes_sb() <== called only once | ||
655 | * write_cache_pages() <== called once for each inode | ||
656 | * (quickly) tag currently dirty pages | ||
657 | * (maybe slowly) sync all tagged pages | ||
658 | */ | ||
659 | if (wbc.sync_mode == WB_SYNC_NONE) | ||
660 | write_chunk = MAX_WRITEBACK_PAGES; | ||
661 | else | ||
662 | write_chunk = LONG_MAX; | ||
663 | |||
631 | wbc.wb_start = jiffies; /* livelock avoidance */ | 664 | wbc.wb_start = jiffies; /* livelock avoidance */ |
632 | for (;;) { | 665 | for (;;) { |
633 | /* | 666 | /* |
@@ -637,6 +670,16 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
637 | break; | 670 | break; |
638 | 671 | ||
639 | /* | 672 | /* |
673 | * Background writeout and kupdate-style writeback may | ||
674 | * run forever. Stop them if there is other work to do | ||
675 | * so that e.g. sync can proceed. They'll be restarted | ||
676 | * after the other works are all done. | ||
677 | */ | ||
678 | if ((work->for_background || work->for_kupdate) && | ||
679 | !list_empty(&wb->bdi->work_list)) | ||
680 | break; | ||
681 | |||
682 | /* | ||
640 | * For background writeout, stop when we are below the | 683 | * For background writeout, stop when we are below the |
641 | * background dirty threshold | 684 | * background dirty threshold |
642 | */ | 685 | */ |
@@ -644,7 +687,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
644 | break; | 687 | break; |
645 | 688 | ||
646 | wbc.more_io = 0; | 689 | wbc.more_io = 0; |
647 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 690 | wbc.nr_to_write = write_chunk; |
648 | wbc.pages_skipped = 0; | 691 | wbc.pages_skipped = 0; |
649 | 692 | ||
650 | trace_wbc_writeback_start(&wbc, wb->bdi); | 693 | trace_wbc_writeback_start(&wbc, wb->bdi); |
@@ -654,8 +697,8 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
654 | writeback_inodes_wb(wb, &wbc); | 697 | writeback_inodes_wb(wb, &wbc); |
655 | trace_wbc_writeback_written(&wbc, wb->bdi); | 698 | trace_wbc_writeback_written(&wbc, wb->bdi); |
656 | 699 | ||
657 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 700 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
658 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 701 | wrote += write_chunk - wbc.nr_to_write; |
659 | 702 | ||
660 | /* | 703 | /* |
661 | * If we consumed everything, see if we have more | 704 | * If we consumed everything, see if we have more |
@@ -670,7 +713,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
670 | /* | 713 | /* |
671 | * Did we write something? Try for more | 714 | * Did we write something? Try for more |
672 | */ | 715 | */ |
673 | if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) | 716 | if (wbc.nr_to_write < write_chunk) |
674 | continue; | 717 | continue; |
675 | /* | 718 | /* |
676 | * Nothing written. Wait for some inode to | 719 | * Nothing written. Wait for some inode to |
@@ -718,6 +761,23 @@ static unsigned long get_nr_dirty_pages(void) | |||
718 | get_nr_dirty_inodes(); | 761 | get_nr_dirty_inodes(); |
719 | } | 762 | } |
720 | 763 | ||
764 | static long wb_check_background_flush(struct bdi_writeback *wb) | ||
765 | { | ||
766 | if (over_bground_thresh()) { | ||
767 | |||
768 | struct wb_writeback_work work = { | ||
769 | .nr_pages = LONG_MAX, | ||
770 | .sync_mode = WB_SYNC_NONE, | ||
771 | .for_background = 1, | ||
772 | .range_cyclic = 1, | ||
773 | }; | ||
774 | |||
775 | return wb_writeback(wb, &work); | ||
776 | } | ||
777 | |||
778 | return 0; | ||
779 | } | ||
780 | |||
721 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 781 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
722 | { | 782 | { |
723 | unsigned long expired; | 783 | unsigned long expired; |
@@ -787,6 +847,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
787 | * Check for periodic writeback, kupdated() style | 847 | * Check for periodic writeback, kupdated() style |
788 | */ | 848 | */ |
789 | wrote += wb_check_old_data_flush(wb); | 849 | wrote += wb_check_old_data_flush(wb); |
850 | wrote += wb_check_background_flush(wb); | ||
790 | clear_bit(BDI_writeback_running, &wb->bdi->state); | 851 | clear_bit(BDI_writeback_running, &wb->bdi->state); |
791 | 852 | ||
792 | return wrote; | 853 | return wrote; |
@@ -873,7 +934,7 @@ void wakeup_flusher_threads(long nr_pages) | |||
873 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 934 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
874 | if (!bdi_has_dirty_io(bdi)) | 935 | if (!bdi_has_dirty_io(bdi)) |
875 | continue; | 936 | continue; |
876 | __bdi_start_writeback(bdi, nr_pages, false, false); | 937 | __bdi_start_writeback(bdi, nr_pages, false); |
877 | } | 938 | } |
878 | rcu_read_unlock(); | 939 | rcu_read_unlock(); |
879 | } | 940 | } |
@@ -1164,7 +1225,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); | |||
1164 | * @sb: the superblock | 1225 | * @sb: the superblock |
1165 | * | 1226 | * |
1166 | * This function writes and waits on any dirty inode belonging to this | 1227 | * This function writes and waits on any dirty inode belonging to this |
1167 | * super_block. The number of pages synced is returned. | 1228 | * super_block. |
1168 | */ | 1229 | */ |
1169 | void sync_inodes_sb(struct super_block *sb) | 1230 | void sync_inodes_sb(struct super_block *sb) |
1170 | { | 1231 | { |
@@ -1242,11 +1303,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1242 | EXPORT_SYMBOL(sync_inode); | 1303 | EXPORT_SYMBOL(sync_inode); |
1243 | 1304 | ||
1244 | /** | 1305 | /** |
1245 | * sync_inode - write an inode to disk | 1306 | * sync_inode_metadata - write an inode to disk |
1246 | * @inode: the inode to sync | 1307 | * @inode: the inode to sync |
1247 | * @wait: wait for I/O to complete. | 1308 | * @wait: wait for I/O to complete. |
1248 | * | 1309 | * |
1249 | * Write an inode to disk and adjust it's dirty state after completion. | 1310 | * Write an inode to disk and adjust its dirty state after completion. |
1250 | * | 1311 | * |
1251 | * Note: only writes the actual inode, no associated data or other metadata. | 1312 | * Note: only writes the actual inode, no associated data or other metadata. |
1252 | */ | 1313 | */ |