aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2010-05-17 06:55:07 -0400
committerJens Axboe <jens.axboe@oracle.com>2010-05-17 06:55:07 -0400
commite913fc825dc685a444cb4c1d0f9d32f372f59861 (patch)
treee470697e43ffe4028ac81c17d3ef90ee9f30bcfb
parent69b62d01ec44fe0d505d89917392347732135a4d (diff)
writeback: fix WB_SYNC_NONE writeback from umount
When umount calls sync_filesystem(), we first do a WB_SYNC_NONE writeback to kick off writeback of pending dirty inodes, then follow that up with a WB_SYNC_ALL to wait for it. Since umount already holds the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all writeback happens as WB_SYNC_ALL. This can greatly slow down umount, since WB_SYNC_ALL writeback is a data integrity operation and thus a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems it's a lot slower. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--fs/fs-writeback.c48
-rw-r--r--fs/sync.c2
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/writeback.h10
-rw-r--r--mm/page-writeback.c4
5 files changed, 51 insertions, 15 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 760dc8d0b4ff..67db89786e7d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,6 +45,7 @@ struct wb_writeback_args {
45 int for_kupdate:1; 45 int for_kupdate:1;
46 int range_cyclic:1; 46 int range_cyclic:1;
47 int for_background:1; 47 int for_background:1;
48 int sb_pinned:1;
48}; 49};
49 50
50/* 51/*
@@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
230 .sync_mode = WB_SYNC_ALL, 231 .sync_mode = WB_SYNC_ALL,
231 .nr_pages = LONG_MAX, 232 .nr_pages = LONG_MAX,
232 .range_cyclic = 0, 233 .range_cyclic = 0,
234 /*
235 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
236 * lets make it explicitly clear.
237 */
238 .sb_pinned = 1,
233 }; 239 };
234 struct bdi_work work; 240 struct bdi_work work;
235 241
@@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
245 * @bdi: the backing device to write from 251 * @bdi: the backing device to write from
246 * @sb: write inodes from this super_block 252 * @sb: write inodes from this super_block
247 * @nr_pages: the number of pages to write 253 * @nr_pages: the number of pages to write
254 * @sb_locked: caller already holds sb umount sem.
248 * 255 *
249 * Description: 256 * Description:
250 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 257 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
251 * started when this function returns, we make no guarentees on 258 * started when this function returns, we make no guarentees on
252 * completion. Caller need not hold sb s_umount semaphore. 259 * completion. Caller specifies whether sb umount sem is held already or not.
253 * 260 *
254 */ 261 */
255void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 262void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
256 long nr_pages) 263 long nr_pages, int sb_locked)
257{ 264{
258 struct wb_writeback_args args = { 265 struct wb_writeback_args args = {
259 .sb = sb, 266 .sb = sb,
260 .sync_mode = WB_SYNC_NONE, 267 .sync_mode = WB_SYNC_NONE,
261 .nr_pages = nr_pages, 268 .nr_pages = nr_pages,
262 .range_cyclic = 1, 269 .range_cyclic = 1,
270 .sb_pinned = sb_locked,
263 }; 271 };
264 272
265 /* 273 /*
@@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
577 /* 585 /*
578 * Caller must already hold the ref for this 586 * Caller must already hold the ref for this
579 */ 587 */
580 if (wbc->sync_mode == WB_SYNC_ALL) { 588 if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
581 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 589 WARN_ON(!rwsem_is_locked(&sb->s_umount));
582 return SB_NOT_PINNED; 590 return SB_NOT_PINNED;
583 } 591 }
@@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb,
751 .for_kupdate = args->for_kupdate, 759 .for_kupdate = args->for_kupdate,
752 .for_background = args->for_background, 760 .for_background = args->for_background,
753 .range_cyclic = args->range_cyclic, 761 .range_cyclic = args->range_cyclic,
762 .sb_pinned = args->sb_pinned,
754 }; 763 };
755 unsigned long oldest_jif; 764 unsigned long oldest_jif;
756 long wrote = 0; 765 long wrote = 0;
@@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb)
1193 iput(old_inode); 1202 iput(old_inode);
1194} 1203}
1195 1204
1205static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1206{
1207 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1208 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1209 long nr_to_write;
1210
1211 nr_to_write = nr_dirty + nr_unstable +
1212 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1213
1214 bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
1215}
1216
1196/** 1217/**
1197 * writeback_inodes_sb - writeback dirty inodes from given super_block 1218 * writeback_inodes_sb - writeback dirty inodes from given super_block
1198 * @sb: the superblock 1219 * @sb: the superblock
@@ -1204,18 +1225,23 @@ static void wait_sb_inodes(struct super_block *sb)
1204 */ 1225 */
1205void writeback_inodes_sb(struct super_block *sb) 1226void writeback_inodes_sb(struct super_block *sb)
1206{ 1227{
1207 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1228 __writeback_inodes_sb(sb, 0);
1208 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1209 long nr_to_write;
1210
1211 nr_to_write = nr_dirty + nr_unstable +
1212 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1213
1214 bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
1215} 1229}
1216EXPORT_SYMBOL(writeback_inodes_sb); 1230EXPORT_SYMBOL(writeback_inodes_sb);
1217 1231
1218/** 1232/**
1233 * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
1234 * @sb: the superblock
1235 *
1236 * Like writeback_inodes_sb(), except the caller already holds the
1237 * sb umount sem.
1238 */
1239void writeback_inodes_sb_locked(struct super_block *sb)
1240{
1241 __writeback_inodes_sb(sb, 1);
1242}
1243
1244/**
1219 * writeback_inodes_sb_if_idle - start writeback if none underway 1245 * writeback_inodes_sb_if_idle - start writeback if none underway
1220 * @sb: the superblock 1246 * @sb: the superblock
1221 * 1247 *
diff --git a/fs/sync.c b/fs/sync.c
index 92b228176f7c..de6a44192832 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
42 if (wait) 42 if (wait)
43 sync_inodes_sb(sb); 43 sync_inodes_sb(sb);
44 else 44 else
45 writeback_inodes_sb(sb); 45 writeback_inodes_sb_locked(sb);
46 46
47 if (sb->s_op->sync_fs) 47 if (sb->s_op->sync_fs)
48 sb->s_op->sync_fs(sb, wait); 48 sb->s_op->sync_fs(sb, wait);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 7534979d83bd..ff8bac63213f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
106void bdi_unregister(struct backing_dev_info *bdi); 106void bdi_unregister(struct backing_dev_info *bdi);
107int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); 107int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
108void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 108void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
109 long nr_pages); 109 long nr_pages, int sb_locked);
110int bdi_writeback_task(struct bdi_writeback *wb); 110int bdi_writeback_task(struct bdi_writeback *wb);
111int bdi_has_dirty_io(struct backing_dev_info *bdi); 111int bdi_has_dirty_io(struct backing_dev_info *bdi);
112 112
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index eb38a2c645f6..47e1c686cb02 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -65,6 +65,15 @@ struct writeback_control {
65 * so we use a single control to update them 65 * so we use a single control to update them
66 */ 66 */
67 unsigned no_nrwrite_index_update:1; 67 unsigned no_nrwrite_index_update:1;
68
69 /*
70 * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE,
71 * the writeback code will pin the sb for the caller. However,
72 * for eg umount, the caller does WB_SYNC_NONE but already has
73 * the sb pinned. If the below is set, caller already has the
74 * sb pinned.
75 */
76 unsigned sb_pinned:1;
68}; 77};
69 78
70/* 79/*
@@ -73,6 +82,7 @@ struct writeback_control {
73struct bdi_writeback; 82struct bdi_writeback;
74int inode_wait(void *); 83int inode_wait(void *);
75void writeback_inodes_sb(struct super_block *); 84void writeback_inodes_sb(struct super_block *);
85void writeback_inodes_sb_locked(struct super_block *);
76int writeback_inodes_sb_if_idle(struct super_block *); 86int writeback_inodes_sb_if_idle(struct super_block *);
77void sync_inodes_sb(struct super_block *); 87void sync_inodes_sb(struct super_block *);
78void writeback_inodes_wbc(struct writeback_control *wbc); 88void writeback_inodes_wbc(struct writeback_control *wbc);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d0f2b3765f8d..53b2fcf2d283 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping,
597 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) 597 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
598 + global_page_state(NR_UNSTABLE_NFS)) 598 + global_page_state(NR_UNSTABLE_NFS))
599 > background_thresh))) 599 > background_thresh)))
600 bdi_start_writeback(bdi, NULL, 0); 600 bdi_start_writeback(bdi, NULL, 0, 0);
601} 601}
602 602
603void set_page_dirty_balance(struct page *page, int page_mkwrite) 603void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -705,7 +705,7 @@ void laptop_mode_timer_fn(unsigned long data)
705 */ 705 */
706 706
707 if (bdi_has_dirty_io(&q->backing_dev_info)) 707 if (bdi_has_dirty_io(&q->backing_dev_info))
708 bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); 708 bdi_start_writeback(&q->backing_dev_info, NULL, 0, nr_pages);
709} 709}
710 710
711/* 711/*