aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c105
1 files changed, 83 insertions, 22 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3d06ccc953aa..59c6e4956786 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -84,13 +84,9 @@ static inline struct inode *wb_inode(struct list_head *head)
84 return list_entry(head, struct inode, i_wb_list); 84 return list_entry(head, struct inode, i_wb_list);
85} 85}
86 86
87static void bdi_queue_work(struct backing_dev_info *bdi, 87/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
88 struct wb_writeback_work *work) 88static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
89{ 89{
90 trace_writeback_queue(bdi, work);
91
92 spin_lock_bh(&bdi->wb_lock);
93 list_add_tail(&work->list, &bdi->work_list);
94 if (bdi->wb.task) { 90 if (bdi->wb.task) {
95 wake_up_process(bdi->wb.task); 91 wake_up_process(bdi->wb.task);
96 } else { 92 } else {
@@ -98,15 +94,26 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
98 * The bdi thread isn't there, wake up the forker thread which 94 * The bdi thread isn't there, wake up the forker thread which
99 * will create and run it. 95 * will create and run it.
100 */ 96 */
101 trace_writeback_nothread(bdi, work);
102 wake_up_process(default_backing_dev_info.wb.task); 97 wake_up_process(default_backing_dev_info.wb.task);
103 } 98 }
99}
100
101static void bdi_queue_work(struct backing_dev_info *bdi,
102 struct wb_writeback_work *work)
103{
104 trace_writeback_queue(bdi, work);
105
106 spin_lock_bh(&bdi->wb_lock);
107 list_add_tail(&work->list, &bdi->work_list);
108 if (!bdi->wb.task)
109 trace_writeback_nothread(bdi, work);
110 bdi_wakeup_flusher(bdi);
104 spin_unlock_bh(&bdi->wb_lock); 111 spin_unlock_bh(&bdi->wb_lock);
105} 112}
106 113
107static void 114static void
108__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 115__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
109 bool range_cyclic, bool for_background) 116 bool range_cyclic)
110{ 117{
111 struct wb_writeback_work *work; 118 struct wb_writeback_work *work;
112 119
@@ -126,7 +133,6 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
126 work->sync_mode = WB_SYNC_NONE; 133 work->sync_mode = WB_SYNC_NONE;
127 work->nr_pages = nr_pages; 134 work->nr_pages = nr_pages;
128 work->range_cyclic = range_cyclic; 135 work->range_cyclic = range_cyclic;
129 work->for_background = for_background;
130 136
131 bdi_queue_work(bdi, work); 137 bdi_queue_work(bdi, work);
132} 138}
@@ -144,7 +150,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
144 */ 150 */
145void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) 151void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
146{ 152{
147 __bdi_start_writeback(bdi, nr_pages, true, false); 153 __bdi_start_writeback(bdi, nr_pages, true);
148} 154}
149 155
150/** 156/**
@@ -152,13 +158,21 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
152 * @bdi: the backing device to write from 158 * @bdi: the backing device to write from
153 * 159 *
154 * Description: 160 * Description:
155 * This does WB_SYNC_NONE background writeback. The IO is only 161 * This makes sure WB_SYNC_NONE background writeback happens. When
156 * started when this function returns, we make no guarentees on 162 * this function returns, it is only guaranteed that for given BDI
157 * completion. Caller need not hold sb s_umount semaphore. 163 * some IO is happening if we are over background dirty threshold.
164 * Caller need not hold sb s_umount semaphore.
158 */ 165 */
159void bdi_start_background_writeback(struct backing_dev_info *bdi) 166void bdi_start_background_writeback(struct backing_dev_info *bdi)
160{ 167{
161 __bdi_start_writeback(bdi, LONG_MAX, true, true); 168 /*
169 * We just wake up the flusher thread. It will perform background
170 * writeback as soon as there is no other work to do.
171 */
172 trace_writeback_wake_background(bdi);
173 spin_lock_bh(&bdi->wb_lock);
174 bdi_wakeup_flusher(bdi);
175 spin_unlock_bh(&bdi->wb_lock);
162} 176}
163 177
164/* 178/*
@@ -616,6 +630,7 @@ static long wb_writeback(struct bdi_writeback *wb,
616 }; 630 };
617 unsigned long oldest_jif; 631 unsigned long oldest_jif;
618 long wrote = 0; 632 long wrote = 0;
633 long write_chunk;
619 struct inode *inode; 634 struct inode *inode;
620 635
621 if (wbc.for_kupdate) { 636 if (wbc.for_kupdate) {
@@ -628,6 +643,24 @@ static long wb_writeback(struct bdi_writeback *wb,
628 wbc.range_end = LLONG_MAX; 643 wbc.range_end = LLONG_MAX;
629 } 644 }
630 645
646 /*
647 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
648 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
649 * here avoids calling into writeback_inodes_wb() more than once.
650 *
651 * The intended call sequence for WB_SYNC_ALL writeback is:
652 *
653 * wb_writeback()
654 * __writeback_inodes_sb() <== called only once
655 * write_cache_pages() <== called once for each inode
656 * (quickly) tag currently dirty pages
657 * (maybe slowly) sync all tagged pages
658 */
659 if (wbc.sync_mode == WB_SYNC_NONE)
660 write_chunk = MAX_WRITEBACK_PAGES;
661 else
662 write_chunk = LONG_MAX;
663
631 wbc.wb_start = jiffies; /* livelock avoidance */ 664 wbc.wb_start = jiffies; /* livelock avoidance */
632 for (;;) { 665 for (;;) {
633 /* 666 /*
@@ -637,6 +670,16 @@ static long wb_writeback(struct bdi_writeback *wb,
637 break; 670 break;
638 671
639 /* 672 /*
673 * Background writeout and kupdate-style writeback may
674 * run forever. Stop them if there is other work to do
675 * so that e.g. sync can proceed. They'll be restarted
676 * after the other works are all done.
677 */
678 if ((work->for_background || work->for_kupdate) &&
679 !list_empty(&wb->bdi->work_list))
680 break;
681
682 /*
640 * For background writeout, stop when we are below the 683 * For background writeout, stop when we are below the
641 * background dirty threshold 684 * background dirty threshold
642 */ 685 */
@@ -644,7 +687,7 @@ static long wb_writeback(struct bdi_writeback *wb,
644 break; 687 break;
645 688
646 wbc.more_io = 0; 689 wbc.more_io = 0;
647 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 690 wbc.nr_to_write = write_chunk;
648 wbc.pages_skipped = 0; 691 wbc.pages_skipped = 0;
649 692
650 trace_wbc_writeback_start(&wbc, wb->bdi); 693 trace_wbc_writeback_start(&wbc, wb->bdi);
@@ -654,8 +697,8 @@ static long wb_writeback(struct bdi_writeback *wb,
654 writeback_inodes_wb(wb, &wbc); 697 writeback_inodes_wb(wb, &wbc);
655 trace_wbc_writeback_written(&wbc, wb->bdi); 698 trace_wbc_writeback_written(&wbc, wb->bdi);
656 699
657 work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 700 work->nr_pages -= write_chunk - wbc.nr_to_write;
658 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 701 wrote += write_chunk - wbc.nr_to_write;
659 702
660 /* 703 /*
661 * If we consumed everything, see if we have more 704 * If we consumed everything, see if we have more
@@ -670,7 +713,7 @@ static long wb_writeback(struct bdi_writeback *wb,
670 /* 713 /*
671 * Did we write something? Try for more 714 * Did we write something? Try for more
672 */ 715 */
673 if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) 716 if (wbc.nr_to_write < write_chunk)
674 continue; 717 continue;
675 /* 718 /*
676 * Nothing written. Wait for some inode to 719 * Nothing written. Wait for some inode to
@@ -718,6 +761,23 @@ static unsigned long get_nr_dirty_pages(void)
718 get_nr_dirty_inodes(); 761 get_nr_dirty_inodes();
719} 762}
720 763
764static long wb_check_background_flush(struct bdi_writeback *wb)
765{
766 if (over_bground_thresh()) {
767
768 struct wb_writeback_work work = {
769 .nr_pages = LONG_MAX,
770 .sync_mode = WB_SYNC_NONE,
771 .for_background = 1,
772 .range_cyclic = 1,
773 };
774
775 return wb_writeback(wb, &work);
776 }
777
778 return 0;
779}
780
721static long wb_check_old_data_flush(struct bdi_writeback *wb) 781static long wb_check_old_data_flush(struct bdi_writeback *wb)
722{ 782{
723 unsigned long expired; 783 unsigned long expired;
@@ -787,6 +847,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
787 * Check for periodic writeback, kupdated() style 847 * Check for periodic writeback, kupdated() style
788 */ 848 */
789 wrote += wb_check_old_data_flush(wb); 849 wrote += wb_check_old_data_flush(wb);
850 wrote += wb_check_background_flush(wb);
790 clear_bit(BDI_writeback_running, &wb->bdi->state); 851 clear_bit(BDI_writeback_running, &wb->bdi->state);
791 852
792 return wrote; 853 return wrote;
@@ -873,7 +934,7 @@ void wakeup_flusher_threads(long nr_pages)
873 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 934 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
874 if (!bdi_has_dirty_io(bdi)) 935 if (!bdi_has_dirty_io(bdi))
875 continue; 936 continue;
876 __bdi_start_writeback(bdi, nr_pages, false, false); 937 __bdi_start_writeback(bdi, nr_pages, false);
877 } 938 }
878 rcu_read_unlock(); 939 rcu_read_unlock();
879} 940}
@@ -1164,7 +1225,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
1164 * @sb: the superblock 1225 * @sb: the superblock
1165 * 1226 *
1166 * This function writes and waits on any dirty inode belonging to this 1227 * This function writes and waits on any dirty inode belonging to this
1167 * super_block. The number of pages synced is returned. 1228 * super_block.
1168 */ 1229 */
1169void sync_inodes_sb(struct super_block *sb) 1230void sync_inodes_sb(struct super_block *sb)
1170{ 1231{
@@ -1242,11 +1303,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1242EXPORT_SYMBOL(sync_inode); 1303EXPORT_SYMBOL(sync_inode);
1243 1304
1244/** 1305/**
1245 * sync_inode - write an inode to disk 1306 * sync_inode_metadata - write an inode to disk
1246 * @inode: the inode to sync 1307 * @inode: the inode to sync
1247 * @wait: wait for I/O to complete. 1308 * @wait: wait for I/O to complete.
1248 * 1309 *
1249 * Write an inode to disk and adjust it's dirty state after completion. 1310 * Write an inode to disk and adjust its dirty state after completion.
1250 * 1311 *
1251 * Note: only writes the actual inode, no associated data or other metadata. 1312 * Note: only writes the actual inode, no associated data or other metadata.
1252 */ 1313 */