diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2010-05-17 06:55:07 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-05-17 06:55:07 -0400 |
commit | e913fc825dc685a444cb4c1d0f9d32f372f59861 (patch) | |
tree | e470697e43ffe4028ac81c17d3ef90ee9f30bcfb /fs | |
parent | 69b62d01ec44fe0d505d89917392347732135a4d (diff) |
writeback: fix WB_SYNC_NONE writeback from umount
When umount calls sync_filesystem(), we first do a WB_SYNC_NONE
writeback to kick off writeback of pending dirty inodes, then follow
that up with a WB_SYNC_ALL to wait for it. Since umount already holds
the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all
writeback happens as WB_SYNC_ALL. This can greatly slow down umount,
since WB_SYNC_ALL writeback is a data integrity operation and thus
a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems
it's a lot slower.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fs-writeback.c | 48 | ||||
-rw-r--r-- | fs/sync.c | 2 |
2 files changed, 38 insertions, 12 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 760dc8d0b4ff..67db89786e7d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -45,6 +45,7 @@ struct wb_writeback_args { | |||
45 | int for_kupdate:1; | 45 | int for_kupdate:1; |
46 | int range_cyclic:1; | 46 | int range_cyclic:1; |
47 | int for_background:1; | 47 | int for_background:1; |
48 | int sb_pinned:1; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | /* | 51 | /* |
@@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
230 | .sync_mode = WB_SYNC_ALL, | 231 | .sync_mode = WB_SYNC_ALL, |
231 | .nr_pages = LONG_MAX, | 232 | .nr_pages = LONG_MAX, |
232 | .range_cyclic = 0, | 233 | .range_cyclic = 0, |
234 | /* | ||
235 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
236 | * lets make it explicitly clear. | ||
237 | */ | ||
238 | .sb_pinned = 1, | ||
233 | }; | 239 | }; |
234 | struct bdi_work work; | 240 | struct bdi_work work; |
235 | 241 | ||
@@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
245 | * @bdi: the backing device to write from | 251 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | 252 | * @sb: write inodes from this super_block |
247 | * @nr_pages: the number of pages to write | 253 | * @nr_pages: the number of pages to write |
254 | * @sb_locked: caller already holds sb umount sem. | ||
248 | * | 255 | * |
249 | * Description: | 256 | * Description: |
250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 257 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
251 | * started when this function returns, we make no guarentees on | 258 | * started when this function returns, we make no guarentees on |
252 | * completion. Caller need not hold sb s_umount semaphore. | 259 | * completion. Caller specifies whether sb umount sem is held already or not. |
253 | * | 260 | * |
254 | */ | 261 | */ |
255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 262 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
256 | long nr_pages) | 263 | long nr_pages, int sb_locked) |
257 | { | 264 | { |
258 | struct wb_writeback_args args = { | 265 | struct wb_writeback_args args = { |
259 | .sb = sb, | 266 | .sb = sb, |
260 | .sync_mode = WB_SYNC_NONE, | 267 | .sync_mode = WB_SYNC_NONE, |
261 | .nr_pages = nr_pages, | 268 | .nr_pages = nr_pages, |
262 | .range_cyclic = 1, | 269 | .range_cyclic = 1, |
270 | .sb_pinned = sb_locked, | ||
263 | }; | 271 | }; |
264 | 272 | ||
265 | /* | 273 | /* |
@@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
577 | /* | 585 | /* |
578 | * Caller must already hold the ref for this | 586 | * Caller must already hold the ref for this |
579 | */ | 587 | */ |
580 | if (wbc->sync_mode == WB_SYNC_ALL) { | 588 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { |
581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 589 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
582 | return SB_NOT_PINNED; | 590 | return SB_NOT_PINNED; |
583 | } | 591 | } |
@@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
751 | .for_kupdate = args->for_kupdate, | 759 | .for_kupdate = args->for_kupdate, |
752 | .for_background = args->for_background, | 760 | .for_background = args->for_background, |
753 | .range_cyclic = args->range_cyclic, | 761 | .range_cyclic = args->range_cyclic, |
762 | .sb_pinned = args->sb_pinned, | ||
754 | }; | 763 | }; |
755 | unsigned long oldest_jif; | 764 | unsigned long oldest_jif; |
756 | long wrote = 0; | 765 | long wrote = 0; |
@@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1193 | iput(old_inode); | 1202 | iput(old_inode); |
1194 | } | 1203 | } |
1195 | 1204 | ||
1205 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
1206 | { | ||
1207 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1208 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1209 | long nr_to_write; | ||
1210 | |||
1211 | nr_to_write = nr_dirty + nr_unstable + | ||
1212 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1213 | |||
1214 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
1215 | } | ||
1216 | |||
1196 | /** | 1217 | /** |
1197 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1218 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
1198 | * @sb: the superblock | 1219 | * @sb: the superblock |
@@ -1204,18 +1225,23 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1204 | */ | 1225 | */ |
1205 | void writeback_inodes_sb(struct super_block *sb) | 1226 | void writeback_inodes_sb(struct super_block *sb) |
1206 | { | 1227 | { |
1207 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1228 | __writeback_inodes_sb(sb, 0); |
1208 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1209 | long nr_to_write; | ||
1210 | |||
1211 | nr_to_write = nr_dirty + nr_unstable + | ||
1212 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1213 | |||
1214 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | ||
1215 | } | 1229 | } |
1216 | EXPORT_SYMBOL(writeback_inodes_sb); | 1230 | EXPORT_SYMBOL(writeback_inodes_sb); |
1217 | 1231 | ||
1218 | /** | 1232 | /** |
1233 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | ||
1234 | * @sb: the superblock | ||
1235 | * | ||
1236 | * Like writeback_inodes_sb(), except the caller already holds the | ||
1237 | * sb umount sem. | ||
1238 | */ | ||
1239 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
1240 | { | ||
1241 | __writeback_inodes_sb(sb, 1); | ||
1242 | } | ||
1243 | |||
1244 | /** | ||
1219 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1245 | * writeback_inodes_sb_if_idle - start writeback if none underway |
1220 | * @sb: the superblock | 1246 | * @sb: the superblock |
1221 | * | 1247 | * |
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
42 | if (wait) | 42 | if (wait) |
43 | sync_inodes_sb(sb); | 43 | sync_inodes_sb(sb); |
44 | else | 44 | else |
45 | writeback_inodes_sb(sb); | 45 | writeback_inodes_sb_locked(sb); |
46 | 46 | ||
47 | if (sb->s_op->sync_fs) | 47 | if (sb->s_op->sync_fs) |
48 | sb->s_op->sync_fs(sb, wait); | 48 | sb->s_op->sync_fs(sb, wait); |