diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-09-16 09:13:54 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-09-16 09:18:52 -0400 |
commit | b6e51316daede0633e9274e1e30391cfa4747877 (patch) | |
tree | 664476bb4e7c05dcce3ad908363b482134c68429 /fs | |
parent | bcddc3f01c9122882c8b9f12ab94a934e55aef97 (diff) |
writeback: separate starting of sync vs opportunistic writeback
bdi_start_writeback() is currently split into two paths, one for
WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback()
for WB_SYNC_ALL writeback and let bdi_start_writeback() handle
only WB_SYNC_NONE.
Push down the writeback_control allocation and only accept the
parameters that make sense for each function. This cleans up
the API considerably.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fs-writeback.c | 132 | ||||
-rw-r--r-- | fs/ubifs/budget.c | 20 |
2 files changed, 70 insertions, 82 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59b3ee63b624..5887328b5a06 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -74,14 +74,10 @@ static inline bool bdi_work_on_stack(struct bdi_work *work) | |||
74 | } | 74 | } |
75 | 75 | ||
76 | static inline void bdi_work_init(struct bdi_work *work, | 76 | static inline void bdi_work_init(struct bdi_work *work, |
77 | struct writeback_control *wbc) | 77 | struct wb_writeback_args *args) |
78 | { | 78 | { |
79 | INIT_RCU_HEAD(&work->rcu_head); | 79 | INIT_RCU_HEAD(&work->rcu_head); |
80 | work->args.sb = wbc->sb; | 80 | work->args = *args; |
81 | work->args.nr_pages = wbc->nr_to_write; | ||
82 | work->args.sync_mode = wbc->sync_mode; | ||
83 | work->args.range_cyclic = wbc->range_cyclic; | ||
84 | work->args.for_kupdate = 0; | ||
85 | work->state = WS_USED; | 81 | work->state = WS_USED; |
86 | } | 82 | } |
87 | 83 | ||
@@ -194,7 +190,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
194 | } | 190 | } |
195 | 191 | ||
196 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | 192 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
197 | struct writeback_control *wbc) | 193 | struct wb_writeback_args *args) |
198 | { | 194 | { |
199 | struct bdi_work *work; | 195 | struct bdi_work *work; |
200 | 196 | ||
@@ -204,7 +200,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
204 | */ | 200 | */ |
205 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 201 | work = kmalloc(sizeof(*work), GFP_ATOMIC); |
206 | if (work) { | 202 | if (work) { |
207 | bdi_work_init(work, wbc); | 203 | bdi_work_init(work, args); |
208 | bdi_queue_work(bdi, work); | 204 | bdi_queue_work(bdi, work); |
209 | } else { | 205 | } else { |
210 | struct bdi_writeback *wb = &bdi->wb; | 206 | struct bdi_writeback *wb = &bdi->wb; |
@@ -214,24 +210,54 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
214 | } | 210 | } |
215 | } | 211 | } |
216 | 212 | ||
217 | void bdi_start_writeback(struct writeback_control *wbc) | 213 | /** |
214 | * bdi_sync_writeback - start and wait for writeback | ||
215 | * @bdi: the backing device to write from | ||
216 | * @sb: write inodes from this super_block | ||
217 | * | ||
218 | * Description: | ||
219 | * This does WB_SYNC_ALL data integrity writeback and waits for the | ||
220 | * IO to complete. Callers must hold the sb s_umount semaphore for | ||
221 | * reading, to avoid having the super disappear before we are done. | ||
222 | */ | ||
223 | static void bdi_sync_writeback(struct backing_dev_info *bdi, | ||
224 | struct super_block *sb) | ||
218 | { | 225 | { |
219 | /* | 226 | struct wb_writeback_args args = { |
220 | * WB_SYNC_NONE is opportunistic writeback. If this allocation fails, | 227 | .sb = sb, |
221 | * bdi_queue_work() will wake up the thread and flush old data. This | 228 | .sync_mode = WB_SYNC_ALL, |
222 | * should ensure some amount of progress in freeing memory. | 229 | .nr_pages = LONG_MAX, |
223 | */ | 230 | .range_cyclic = 0, |
224 | if (wbc->sync_mode != WB_SYNC_ALL) | 231 | }; |
225 | bdi_alloc_queue_work(wbc->bdi, wbc); | 232 | struct bdi_work work; |
226 | else { | ||
227 | struct bdi_work work; | ||
228 | 233 | ||
229 | bdi_work_init(&work, wbc); | 234 | bdi_work_init(&work, &args); |
230 | work.state |= WS_ONSTACK; | 235 | work.state |= WS_ONSTACK; |
231 | 236 | ||
232 | bdi_queue_work(wbc->bdi, &work); | 237 | bdi_queue_work(bdi, &work); |
233 | bdi_wait_on_work_clear(&work); | 238 | bdi_wait_on_work_clear(&work); |
234 | } | 239 | } |
240 | |||
241 | /** | ||
242 | * bdi_start_writeback - start writeback | ||
243 | * @bdi: the backing device to write from | ||
244 | * @nr_pages: the number of pages to write | ||
245 | * | ||
246 | * Description: | ||
247 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | ||
248 | * started when this function returns, we make no guarentees on | ||
249 | * completion. Caller need not hold sb s_umount semaphore. | ||
250 | * | ||
251 | */ | ||
252 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | ||
253 | { | ||
254 | struct wb_writeback_args args = { | ||
255 | .sync_mode = WB_SYNC_NONE, | ||
256 | .nr_pages = nr_pages, | ||
257 | .range_cyclic = 1, | ||
258 | }; | ||
259 | |||
260 | bdi_alloc_queue_work(bdi, &args); | ||
235 | } | 261 | } |
236 | 262 | ||
237 | /* | 263 | /* |
@@ -863,23 +889,25 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
863 | } | 889 | } |
864 | 890 | ||
865 | /* | 891 | /* |
866 | * Schedule writeback for all backing devices. Can only be used for | 892 | * Schedule writeback for all backing devices. This does WB_SYNC_NONE |
867 | * WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback() | 893 | * writeback, for integrity writeback see bdi_sync_writeback(). |
868 | * and pass in the superblock. | ||
869 | */ | 894 | */ |
870 | static void bdi_writeback_all(struct writeback_control *wbc) | 895 | static void bdi_writeback_all(struct super_block *sb, long nr_pages) |
871 | { | 896 | { |
897 | struct wb_writeback_args args = { | ||
898 | .sb = sb, | ||
899 | .nr_pages = nr_pages, | ||
900 | .sync_mode = WB_SYNC_NONE, | ||
901 | }; | ||
872 | struct backing_dev_info *bdi; | 902 | struct backing_dev_info *bdi; |
873 | 903 | ||
874 | WARN_ON(wbc->sync_mode == WB_SYNC_ALL); | ||
875 | |||
876 | rcu_read_lock(); | 904 | rcu_read_lock(); |
877 | 905 | ||
878 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 906 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
879 | if (!bdi_has_dirty_io(bdi)) | 907 | if (!bdi_has_dirty_io(bdi)) |
880 | continue; | 908 | continue; |
881 | 909 | ||
882 | bdi_alloc_queue_work(bdi, wbc); | 910 | bdi_alloc_queue_work(bdi, &args); |
883 | } | 911 | } |
884 | 912 | ||
885 | rcu_read_unlock(); | 913 | rcu_read_unlock(); |
@@ -891,17 +919,10 @@ static void bdi_writeback_all(struct writeback_control *wbc) | |||
891 | */ | 919 | */ |
892 | void wakeup_flusher_threads(long nr_pages) | 920 | void wakeup_flusher_threads(long nr_pages) |
893 | { | 921 | { |
894 | struct writeback_control wbc = { | ||
895 | .sync_mode = WB_SYNC_NONE, | ||
896 | .older_than_this = NULL, | ||
897 | .range_cyclic = 1, | ||
898 | }; | ||
899 | |||
900 | if (nr_pages == 0) | 922 | if (nr_pages == 0) |
901 | nr_pages = global_page_state(NR_FILE_DIRTY) + | 923 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
902 | global_page_state(NR_UNSTABLE_NFS); | 924 | global_page_state(NR_UNSTABLE_NFS); |
903 | wbc.nr_to_write = nr_pages; | 925 | bdi_writeback_all(NULL, nr_pages); |
904 | bdi_writeback_all(&wbc); | ||
905 | } | 926 | } |
906 | 927 | ||
907 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 928 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
@@ -1048,7 +1069,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); | |||
1048 | * on the writer throttling path, and we get decent balancing between many | 1069 | * on the writer throttling path, and we get decent balancing between many |
1049 | * throttled threads: we don't want them all piling up on inode_sync_wait. | 1070 | * throttled threads: we don't want them all piling up on inode_sync_wait. |
1050 | */ | 1071 | */ |
1051 | static void wait_sb_inodes(struct writeback_control *wbc) | 1072 | static void wait_sb_inodes(struct super_block *sb) |
1052 | { | 1073 | { |
1053 | struct inode *inode, *old_inode = NULL; | 1074 | struct inode *inode, *old_inode = NULL; |
1054 | 1075 | ||
@@ -1056,7 +1077,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) | |||
1056 | * We need to be protected against the filesystem going from | 1077 | * We need to be protected against the filesystem going from |
1057 | * r/o to r/w or vice versa. | 1078 | * r/o to r/w or vice versa. |
1058 | */ | 1079 | */ |
1059 | WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); | 1080 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1060 | 1081 | ||
1061 | spin_lock(&inode_lock); | 1082 | spin_lock(&inode_lock); |
1062 | 1083 | ||
@@ -1067,7 +1088,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) | |||
1067 | * In which case, the inode may not be on the dirty list, but | 1088 | * In which case, the inode may not be on the dirty list, but |
1068 | * we still have to wait for that writeout. | 1089 | * we still have to wait for that writeout. |
1069 | */ | 1090 | */ |
1070 | list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { | 1091 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1071 | struct address_space *mapping; | 1092 | struct address_space *mapping; |
1072 | 1093 | ||
1073 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1094 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) |
@@ -1107,14 +1128,8 @@ static void wait_sb_inodes(struct writeback_control *wbc) | |||
1107 | * for IO completion of submitted IO. The number of pages submitted is | 1128 | * for IO completion of submitted IO. The number of pages submitted is |
1108 | * returned. | 1129 | * returned. |
1109 | */ | 1130 | */ |
1110 | long writeback_inodes_sb(struct super_block *sb) | 1131 | void writeback_inodes_sb(struct super_block *sb) |
1111 | { | 1132 | { |
1112 | struct writeback_control wbc = { | ||
1113 | .sb = sb, | ||
1114 | .sync_mode = WB_SYNC_NONE, | ||
1115 | .range_start = 0, | ||
1116 | .range_end = LLONG_MAX, | ||
1117 | }; | ||
1118 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1133 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
1119 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1134 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
1120 | long nr_to_write; | 1135 | long nr_to_write; |
@@ -1122,9 +1137,7 @@ long writeback_inodes_sb(struct super_block *sb) | |||
1122 | nr_to_write = nr_dirty + nr_unstable + | 1137 | nr_to_write = nr_dirty + nr_unstable + |
1123 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1138 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
1124 | 1139 | ||
1125 | wbc.nr_to_write = nr_to_write; | 1140 | bdi_writeback_all(sb, nr_to_write); |
1126 | bdi_writeback_all(&wbc); | ||
1127 | return nr_to_write - wbc.nr_to_write; | ||
1128 | } | 1141 | } |
1129 | EXPORT_SYMBOL(writeback_inodes_sb); | 1142 | EXPORT_SYMBOL(writeback_inodes_sb); |
1130 | 1143 | ||
@@ -1135,21 +1148,10 @@ EXPORT_SYMBOL(writeback_inodes_sb); | |||
1135 | * This function writes and waits on any dirty inode belonging to this | 1148 | * This function writes and waits on any dirty inode belonging to this |
1136 | * super_block. The number of pages synced is returned. | 1149 | * super_block. The number of pages synced is returned. |
1137 | */ | 1150 | */ |
1138 | long sync_inodes_sb(struct super_block *sb) | 1151 | void sync_inodes_sb(struct super_block *sb) |
1139 | { | 1152 | { |
1140 | struct writeback_control wbc = { | 1153 | bdi_sync_writeback(sb->s_bdi, sb); |
1141 | .sb = sb, | 1154 | wait_sb_inodes(sb); |
1142 | .bdi = sb->s_bdi, | ||
1143 | .sync_mode = WB_SYNC_ALL, | ||
1144 | .range_start = 0, | ||
1145 | .range_end = LLONG_MAX, | ||
1146 | }; | ||
1147 | long nr_to_write = LONG_MAX; /* doesn't actually matter */ | ||
1148 | |||
1149 | wbc.nr_to_write = nr_to_write; | ||
1150 | bdi_start_writeback(&wbc); | ||
1151 | wait_sb_inodes(&wbc); | ||
1152 | return nr_to_write - wbc.nr_to_write; | ||
1153 | } | 1155 | } |
1154 | EXPORT_SYMBOL(sync_inodes_sb); | 1156 | EXPORT_SYMBOL(sync_inodes_sb); |
1155 | 1157 | ||
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 1c8991b0db13..ee1ce68fd98b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -54,29 +54,15 @@ | |||
54 | * @nr_to_write: how many dirty pages to write-back | 54 | * @nr_to_write: how many dirty pages to write-back |
55 | * | 55 | * |
56 | * This function shrinks UBIFS liability by means of writing back some amount | 56 | * This function shrinks UBIFS liability by means of writing back some amount |
57 | * of dirty inodes and their pages. Returns the amount of pages which were | 57 | * of dirty inodes and their pages. |
58 | * written back. The returned value does not include dirty inodes which were | ||
59 | * synchronized. | ||
60 | * | 58 | * |
61 | * Note, this function synchronizes even VFS inodes which are locked | 59 | * Note, this function synchronizes even VFS inodes which are locked |
62 | * (@i_mutex) by the caller of the budgeting function, because write-back does | 60 | * (@i_mutex) by the caller of the budgeting function, because write-back does |
63 | * not touch @i_mutex. | 61 | * not touch @i_mutex. |
64 | */ | 62 | */ |
65 | static int shrink_liability(struct ubifs_info *c, int nr_to_write) | 63 | static void shrink_liability(struct ubifs_info *c, int nr_to_write) |
66 | { | 64 | { |
67 | int nr_written; | 65 | writeback_inodes_sb(c->vfs_sb); |
68 | |||
69 | nr_written = writeback_inodes_sb(c->vfs_sb); | ||
70 | if (!nr_written) { | ||
71 | /* | ||
72 | * Re-try again but wait on pages/inodes which are being | ||
73 | * written-back concurrently (e.g., by pdflush). | ||
74 | */ | ||
75 | nr_written = sync_inodes_sb(c->vfs_sb); | ||
76 | } | ||
77 | |||
78 | dbg_budg("%d pages were written back", nr_written); | ||
79 | return nr_written; | ||
80 | } | 66 | } |
81 | 67 | ||
82 | /** | 68 | /** |