aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-09-16 09:13:54 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-09-16 09:18:52 -0400
commitb6e51316daede0633e9274e1e30391cfa4747877 (patch)
tree664476bb4e7c05dcce3ad908363b482134c68429 /fs
parentbcddc3f01c9122882c8b9f12ab94a934e55aef97 (diff)
writeback: separate starting of sync vs opportunistic writeback
bdi_start_writeback() is currently split into two paths, one for WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback() for WB_SYNC_ALL writeback and let bdi_start_writeback() handle only WB_SYNC_NONE. Push down the writeback_control allocation and only accept the parameters that make sense for each function. This cleans up the API considerably. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c132
-rw-r--r--fs/ubifs/budget.c20
2 files changed, 70 insertions, 82 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59b3ee63b624..5887328b5a06 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -74,14 +74,10 @@ static inline bool bdi_work_on_stack(struct bdi_work *work)
74} 74}
75 75
76static inline void bdi_work_init(struct bdi_work *work, 76static inline void bdi_work_init(struct bdi_work *work,
77 struct writeback_control *wbc) 77 struct wb_writeback_args *args)
78{ 78{
79 INIT_RCU_HEAD(&work->rcu_head); 79 INIT_RCU_HEAD(&work->rcu_head);
80 work->args.sb = wbc->sb; 80 work->args = *args;
81 work->args.nr_pages = wbc->nr_to_write;
82 work->args.sync_mode = wbc->sync_mode;
83 work->args.range_cyclic = wbc->range_cyclic;
84 work->args.for_kupdate = 0;
85 work->state = WS_USED; 81 work->state = WS_USED;
86} 82}
87 83
@@ -194,7 +190,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
194} 190}
195 191
196static void bdi_alloc_queue_work(struct backing_dev_info *bdi, 192static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
197 struct writeback_control *wbc) 193 struct wb_writeback_args *args)
198{ 194{
199 struct bdi_work *work; 195 struct bdi_work *work;
200 196
@@ -204,7 +200,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
204 */ 200 */
205 work = kmalloc(sizeof(*work), GFP_ATOMIC); 201 work = kmalloc(sizeof(*work), GFP_ATOMIC);
206 if (work) { 202 if (work) {
207 bdi_work_init(work, wbc); 203 bdi_work_init(work, args);
208 bdi_queue_work(bdi, work); 204 bdi_queue_work(bdi, work);
209 } else { 205 } else {
210 struct bdi_writeback *wb = &bdi->wb; 206 struct bdi_writeback *wb = &bdi->wb;
@@ -214,24 +210,54 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
214 } 210 }
215} 211}
216 212
217void bdi_start_writeback(struct writeback_control *wbc) 213/**
214 * bdi_sync_writeback - start and wait for writeback
215 * @bdi: the backing device to write from
216 * @sb: write inodes from this super_block
217 *
218 * Description:
219 * This does WB_SYNC_ALL data integrity writeback and waits for the
220 * IO to complete. Callers must hold the sb s_umount semaphore for
221 * reading, to avoid having the super disappear before we are done.
222 */
223static void bdi_sync_writeback(struct backing_dev_info *bdi,
224 struct super_block *sb)
218{ 225{
219 /* 226 struct wb_writeback_args args = {
220 * WB_SYNC_NONE is opportunistic writeback. If this allocation fails, 227 .sb = sb,
221 * bdi_queue_work() will wake up the thread and flush old data. This 228 .sync_mode = WB_SYNC_ALL,
222 * should ensure some amount of progress in freeing memory. 229 .nr_pages = LONG_MAX,
223 */ 230 .range_cyclic = 0,
224 if (wbc->sync_mode != WB_SYNC_ALL) 231 };
225 bdi_alloc_queue_work(wbc->bdi, wbc); 232 struct bdi_work work;
226 else {
227 struct bdi_work work;
228 233
229 bdi_work_init(&work, wbc); 234 bdi_work_init(&work, &args);
230 work.state |= WS_ONSTACK; 235 work.state |= WS_ONSTACK;
231 236
232 bdi_queue_work(wbc->bdi, &work); 237 bdi_queue_work(bdi, &work);
233 bdi_wait_on_work_clear(&work); 238 bdi_wait_on_work_clear(&work);
234 } 239}
240
241/**
242 * bdi_start_writeback - start writeback
243 * @bdi: the backing device to write from
244 * @nr_pages: the number of pages to write
245 *
246 * Description:
247 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
248 * started when this function returns, we make no guarentees on
249 * completion. Caller need not hold sb s_umount semaphore.
250 *
251 */
252void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
253{
254 struct wb_writeback_args args = {
255 .sync_mode = WB_SYNC_NONE,
256 .nr_pages = nr_pages,
257 .range_cyclic = 1,
258 };
259
260 bdi_alloc_queue_work(bdi, &args);
235} 261}
236 262
237/* 263/*
@@ -863,23 +889,25 @@ int bdi_writeback_task(struct bdi_writeback *wb)
863} 889}
864 890
865/* 891/*
866 * Schedule writeback for all backing devices. Can only be used for 892 * Schedule writeback for all backing devices. This does WB_SYNC_NONE
867 * WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback() 893 * writeback, for integrity writeback see bdi_sync_writeback().
868 * and pass in the superblock.
869 */ 894 */
870static void bdi_writeback_all(struct writeback_control *wbc) 895static void bdi_writeback_all(struct super_block *sb, long nr_pages)
871{ 896{
897 struct wb_writeback_args args = {
898 .sb = sb,
899 .nr_pages = nr_pages,
900 .sync_mode = WB_SYNC_NONE,
901 };
872 struct backing_dev_info *bdi; 902 struct backing_dev_info *bdi;
873 903
874 WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
875
876 rcu_read_lock(); 904 rcu_read_lock();
877 905
878 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 906 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
879 if (!bdi_has_dirty_io(bdi)) 907 if (!bdi_has_dirty_io(bdi))
880 continue; 908 continue;
881 909
882 bdi_alloc_queue_work(bdi, wbc); 910 bdi_alloc_queue_work(bdi, &args);
883 } 911 }
884 912
885 rcu_read_unlock(); 913 rcu_read_unlock();
@@ -891,17 +919,10 @@ static void bdi_writeback_all(struct writeback_control *wbc)
891 */ 919 */
892void wakeup_flusher_threads(long nr_pages) 920void wakeup_flusher_threads(long nr_pages)
893{ 921{
894 struct writeback_control wbc = {
895 .sync_mode = WB_SYNC_NONE,
896 .older_than_this = NULL,
897 .range_cyclic = 1,
898 };
899
900 if (nr_pages == 0) 922 if (nr_pages == 0)
901 nr_pages = global_page_state(NR_FILE_DIRTY) + 923 nr_pages = global_page_state(NR_FILE_DIRTY) +
902 global_page_state(NR_UNSTABLE_NFS); 924 global_page_state(NR_UNSTABLE_NFS);
903 wbc.nr_to_write = nr_pages; 925 bdi_writeback_all(NULL, nr_pages);
904 bdi_writeback_all(&wbc);
905} 926}
906 927
907static noinline void block_dump___mark_inode_dirty(struct inode *inode) 928static noinline void block_dump___mark_inode_dirty(struct inode *inode)
@@ -1048,7 +1069,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
1048 * on the writer throttling path, and we get decent balancing between many 1069 * on the writer throttling path, and we get decent balancing between many
1049 * throttled threads: we don't want them all piling up on inode_sync_wait. 1070 * throttled threads: we don't want them all piling up on inode_sync_wait.
1050 */ 1071 */
1051static void wait_sb_inodes(struct writeback_control *wbc) 1072static void wait_sb_inodes(struct super_block *sb)
1052{ 1073{
1053 struct inode *inode, *old_inode = NULL; 1074 struct inode *inode, *old_inode = NULL;
1054 1075
@@ -1056,7 +1077,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
1056 * We need to be protected against the filesystem going from 1077 * We need to be protected against the filesystem going from
1057 * r/o to r/w or vice versa. 1078 * r/o to r/w or vice versa.
1058 */ 1079 */
1059 WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); 1080 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1060 1081
1061 spin_lock(&inode_lock); 1082 spin_lock(&inode_lock);
1062 1083
@@ -1067,7 +1088,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
1067 * In which case, the inode may not be on the dirty list, but 1088 * In which case, the inode may not be on the dirty list, but
1068 * we still have to wait for that writeout. 1089 * we still have to wait for that writeout.
1069 */ 1090 */
1070 list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { 1091 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1071 struct address_space *mapping; 1092 struct address_space *mapping;
1072 1093
1073 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 1094 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
@@ -1107,14 +1128,8 @@ static void wait_sb_inodes(struct writeback_control *wbc)
1107 * for IO completion of submitted IO. The number of pages submitted is 1128 * for IO completion of submitted IO. The number of pages submitted is
1108 * returned. 1129 * returned.
1109 */ 1130 */
1110long writeback_inodes_sb(struct super_block *sb) 1131void writeback_inodes_sb(struct super_block *sb)
1111{ 1132{
1112 struct writeback_control wbc = {
1113 .sb = sb,
1114 .sync_mode = WB_SYNC_NONE,
1115 .range_start = 0,
1116 .range_end = LLONG_MAX,
1117 };
1118 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1133 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1119 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 1134 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1120 long nr_to_write; 1135 long nr_to_write;
@@ -1122,9 +1137,7 @@ long writeback_inodes_sb(struct super_block *sb)
1122 nr_to_write = nr_dirty + nr_unstable + 1137 nr_to_write = nr_dirty + nr_unstable +
1123 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1138 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1124 1139
1125 wbc.nr_to_write = nr_to_write; 1140 bdi_writeback_all(sb, nr_to_write);
1126 bdi_writeback_all(&wbc);
1127 return nr_to_write - wbc.nr_to_write;
1128} 1141}
1129EXPORT_SYMBOL(writeback_inodes_sb); 1142EXPORT_SYMBOL(writeback_inodes_sb);
1130 1143
@@ -1135,21 +1148,10 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1135 * This function writes and waits on any dirty inode belonging to this 1148 * This function writes and waits on any dirty inode belonging to this
1136 * super_block. The number of pages synced is returned. 1149 * super_block. The number of pages synced is returned.
1137 */ 1150 */
1138long sync_inodes_sb(struct super_block *sb) 1151void sync_inodes_sb(struct super_block *sb)
1139{ 1152{
1140 struct writeback_control wbc = { 1153 bdi_sync_writeback(sb->s_bdi, sb);
1141 .sb = sb, 1154 wait_sb_inodes(sb);
1142 .bdi = sb->s_bdi,
1143 .sync_mode = WB_SYNC_ALL,
1144 .range_start = 0,
1145 .range_end = LLONG_MAX,
1146 };
1147 long nr_to_write = LONG_MAX; /* doesn't actually matter */
1148
1149 wbc.nr_to_write = nr_to_write;
1150 bdi_start_writeback(&wbc);
1151 wait_sb_inodes(&wbc);
1152 return nr_to_write - wbc.nr_to_write;
1153} 1155}
1154EXPORT_SYMBOL(sync_inodes_sb); 1156EXPORT_SYMBOL(sync_inodes_sb);
1155 1157
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1c8991b0db13..ee1ce68fd98b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -54,29 +54,15 @@
54 * @nr_to_write: how many dirty pages to write-back 54 * @nr_to_write: how many dirty pages to write-back
55 * 55 *
56 * This function shrinks UBIFS liability by means of writing back some amount 56 * This function shrinks UBIFS liability by means of writing back some amount
57 * of dirty inodes and their pages. Returns the amount of pages which were 57 * of dirty inodes and their pages.
58 * written back. The returned value does not include dirty inodes which were
59 * synchronized.
60 * 58 *
61 * Note, this function synchronizes even VFS inodes which are locked 59 * Note, this function synchronizes even VFS inodes which are locked
62 * (@i_mutex) by the caller of the budgeting function, because write-back does 60 * (@i_mutex) by the caller of the budgeting function, because write-back does
63 * not touch @i_mutex. 61 * not touch @i_mutex.
64 */ 62 */
65static int shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
66{ 64{
67 int nr_written; 65 writeback_inodes_sb(c->vfs_sb);
68
69 nr_written = writeback_inodes_sb(c->vfs_sb);
70 if (!nr_written) {
71 /*
72 * Re-try again but wait on pages/inodes which are being
73 * written-back concurrently (e.g., by pdflush).
74 */
75 nr_written = sync_inodes_sb(c->vfs_sb);
76 }
77
78 dbg_budg("%d pages were written back", nr_written);
79 return nr_written;
80} 66}
81 67
82/** 68/**