aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2010-05-17 06:55:07 -0400
committerJens Axboe <jens.axboe@oracle.com>2010-05-17 06:55:07 -0400
commite913fc825dc685a444cb4c1d0f9d32f372f59861 (patch)
treee470697e43ffe4028ac81c17d3ef90ee9f30bcfb /fs
parent69b62d01ec44fe0d505d89917392347732135a4d (diff)
writeback: fix WB_SYNC_NONE writeback from umount
When umount calls sync_filesystem(), we first do a WB_SYNC_NONE writeback to kick off writeback of pending dirty inodes, then follow that up with a WB_SYNC_ALL to wait for it. Since umount already holds the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all writeback happens as WB_SYNC_ALL. This can greatly slow down umount, since WB_SYNC_ALL writeback is a data integrity operation and thus a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems it's a lot slower. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c48
-rw-r--r--fs/sync.c2
2 files changed, 38 insertions, 12 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 760dc8d0b4ff..67db89786e7d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,6 +45,7 @@ struct wb_writeback_args {
45 int for_kupdate:1; 45 int for_kupdate:1;
46 int range_cyclic:1; 46 int range_cyclic:1;
47 int for_background:1; 47 int for_background:1;
48 int sb_pinned:1;
48}; 49};
49 50
50/* 51/*
@@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
230 .sync_mode = WB_SYNC_ALL, 231 .sync_mode = WB_SYNC_ALL,
231 .nr_pages = LONG_MAX, 232 .nr_pages = LONG_MAX,
232 .range_cyclic = 0, 233 .range_cyclic = 0,
234 /*
235 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
236 * lets make it explicitly clear.
237 */
238 .sb_pinned = 1,
233 }; 239 };
234 struct bdi_work work; 240 struct bdi_work work;
235 241
@@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
245 * @bdi: the backing device to write from 251 * @bdi: the backing device to write from
246 * @sb: write inodes from this super_block 252 * @sb: write inodes from this super_block
247 * @nr_pages: the number of pages to write 253 * @nr_pages: the number of pages to write
254 * @sb_locked: caller already holds sb umount sem.
248 * 255 *
249 * Description: 256 * Description:
250 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 257 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
251 * started when this function returns, we make no guarentees on 258 * started when this function returns, we make no guarentees on
252 * completion. Caller need not hold sb s_umount semaphore. 259 * completion. Caller specifies whether sb umount sem is held already or not.
253 * 260 *
254 */ 261 */
255void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 262void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
256 long nr_pages) 263 long nr_pages, int sb_locked)
257{ 264{
258 struct wb_writeback_args args = { 265 struct wb_writeback_args args = {
259 .sb = sb, 266 .sb = sb,
260 .sync_mode = WB_SYNC_NONE, 267 .sync_mode = WB_SYNC_NONE,
261 .nr_pages = nr_pages, 268 .nr_pages = nr_pages,
262 .range_cyclic = 1, 269 .range_cyclic = 1,
270 .sb_pinned = sb_locked,
263 }; 271 };
264 272
265 /* 273 /*
@@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
577 /* 585 /*
578 * Caller must already hold the ref for this 586 * Caller must already hold the ref for this
579 */ 587 */
580 if (wbc->sync_mode == WB_SYNC_ALL) { 588 if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
581 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 589 WARN_ON(!rwsem_is_locked(&sb->s_umount));
582 return SB_NOT_PINNED; 590 return SB_NOT_PINNED;
583 } 591 }
@@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb,
751 .for_kupdate = args->for_kupdate, 759 .for_kupdate = args->for_kupdate,
752 .for_background = args->for_background, 760 .for_background = args->for_background,
753 .range_cyclic = args->range_cyclic, 761 .range_cyclic = args->range_cyclic,
762 .sb_pinned = args->sb_pinned,
754 }; 763 };
755 unsigned long oldest_jif; 764 unsigned long oldest_jif;
756 long wrote = 0; 765 long wrote = 0;
@@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb)
1193 iput(old_inode); 1202 iput(old_inode);
1194} 1203}
1195 1204
1205static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1206{
1207 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1208 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1209 long nr_to_write;
1210
1211 nr_to_write = nr_dirty + nr_unstable +
1212 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1213
1214 bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
1215}
1216
1196/** 1217/**
1197 * writeback_inodes_sb - writeback dirty inodes from given super_block 1218 * writeback_inodes_sb - writeback dirty inodes from given super_block
1198 * @sb: the superblock 1219 * @sb: the superblock
@@ -1204,18 +1225,23 @@ static void wait_sb_inodes(struct super_block *sb)
1204 */ 1225 */
1205void writeback_inodes_sb(struct super_block *sb) 1226void writeback_inodes_sb(struct super_block *sb)
1206{ 1227{
1207 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1228 __writeback_inodes_sb(sb, 0);
1208 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1209 long nr_to_write;
1210
1211 nr_to_write = nr_dirty + nr_unstable +
1212 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1213
1214 bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
1215} 1229}
1216EXPORT_SYMBOL(writeback_inodes_sb); 1230EXPORT_SYMBOL(writeback_inodes_sb);
1217 1231
1218/** 1232/**
1233 * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
1234 * @sb: the superblock
1235 *
1236 * Like writeback_inodes_sb(), except the caller already holds the
1237 * sb umount sem.
1238 */
1239void writeback_inodes_sb_locked(struct super_block *sb)
1240{
1241 __writeback_inodes_sb(sb, 1);
1242}
1243
1244/**
1219 * writeback_inodes_sb_if_idle - start writeback if none underway 1245 * writeback_inodes_sb_if_idle - start writeback if none underway
1220 * @sb: the superblock 1246 * @sb: the superblock
1221 * 1247 *
diff --git a/fs/sync.c b/fs/sync.c
index 92b228176f7c..de6a44192832 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
42 if (wait) 42 if (wait)
43 sync_inodes_sb(sb); 43 sync_inodes_sb(sb);
44 else 44 else
45 writeback_inodes_sb(sb); 45 writeback_inodes_sb_locked(sb);
46 46
47 if (sb->s_op->sync_fs) 47 if (sb->s_op->sync_fs)
48 sb->s_op->sync_fs(sb, wait); 48 sb->s_op->sync_fs(sb, wait);