diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2010-05-17 06:55:07 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-05-17 06:55:07 -0400 |
commit | e913fc825dc685a444cb4c1d0f9d32f372f59861 (patch) | |
tree | e470697e43ffe4028ac81c17d3ef90ee9f30bcfb | |
parent | 69b62d01ec44fe0d505d89917392347732135a4d (diff) |
writeback: fix WB_SYNC_NONE writeback from umount
When umount calls sync_filesystem(), we first do a WB_SYNC_NONE
writeback to kick off writeback of pending dirty inodes, then follow
that up with a WB_SYNC_ALL to wait for it. Since umount already holds
the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all
writeback happens as WB_SYNC_ALL. This can greatly slow down umount,
since WB_SYNC_ALL writeback is a data integrity operation and thus
a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems
it's a lot slower.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/fs-writeback.c | 48 | ||||
-rw-r--r-- | fs/sync.c | 2 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
-rw-r--r-- | include/linux/writeback.h | 10 | ||||
-rw-r--r-- | mm/page-writeback.c | 4 |
5 files changed, 51 insertions, 15 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 760dc8d0b4ff..67db89786e7d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -45,6 +45,7 @@ struct wb_writeback_args { | |||
45 | int for_kupdate:1; | 45 | int for_kupdate:1; |
46 | int range_cyclic:1; | 46 | int range_cyclic:1; |
47 | int for_background:1; | 47 | int for_background:1; |
48 | int sb_pinned:1; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | /* | 51 | /* |
@@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
230 | .sync_mode = WB_SYNC_ALL, | 231 | .sync_mode = WB_SYNC_ALL, |
231 | .nr_pages = LONG_MAX, | 232 | .nr_pages = LONG_MAX, |
232 | .range_cyclic = 0, | 233 | .range_cyclic = 0, |
234 | /* | ||
235 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
236 | * lets make it explicitly clear. | ||
237 | */ | ||
238 | .sb_pinned = 1, | ||
233 | }; | 239 | }; |
234 | struct bdi_work work; | 240 | struct bdi_work work; |
235 | 241 | ||
@@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
245 | * @bdi: the backing device to write from | 251 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | 252 | * @sb: write inodes from this super_block |
247 | * @nr_pages: the number of pages to write | 253 | * @nr_pages: the number of pages to write |
254 | * @sb_locked: caller already holds sb umount sem. | ||
248 | * | 255 | * |
249 | * Description: | 256 | * Description: |
250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 257 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
251 | * started when this function returns, we make no guarentees on | 258 | * started when this function returns, we make no guarentees on |
252 | * completion. Caller need not hold sb s_umount semaphore. | 259 | * completion. Caller specifies whether sb umount sem is held already or not. |
253 | * | 260 | * |
254 | */ | 261 | */ |
255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 262 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
256 | long nr_pages) | 263 | long nr_pages, int sb_locked) |
257 | { | 264 | { |
258 | struct wb_writeback_args args = { | 265 | struct wb_writeback_args args = { |
259 | .sb = sb, | 266 | .sb = sb, |
260 | .sync_mode = WB_SYNC_NONE, | 267 | .sync_mode = WB_SYNC_NONE, |
261 | .nr_pages = nr_pages, | 268 | .nr_pages = nr_pages, |
262 | .range_cyclic = 1, | 269 | .range_cyclic = 1, |
270 | .sb_pinned = sb_locked, | ||
263 | }; | 271 | }; |
264 | 272 | ||
265 | /* | 273 | /* |
@@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
577 | /* | 585 | /* |
578 | * Caller must already hold the ref for this | 586 | * Caller must already hold the ref for this |
579 | */ | 587 | */ |
580 | if (wbc->sync_mode == WB_SYNC_ALL) { | 588 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { |
581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 589 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
582 | return SB_NOT_PINNED; | 590 | return SB_NOT_PINNED; |
583 | } | 591 | } |
@@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
751 | .for_kupdate = args->for_kupdate, | 759 | .for_kupdate = args->for_kupdate, |
752 | .for_background = args->for_background, | 760 | .for_background = args->for_background, |
753 | .range_cyclic = args->range_cyclic, | 761 | .range_cyclic = args->range_cyclic, |
762 | .sb_pinned = args->sb_pinned, | ||
754 | }; | 763 | }; |
755 | unsigned long oldest_jif; | 764 | unsigned long oldest_jif; |
756 | long wrote = 0; | 765 | long wrote = 0; |
@@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1193 | iput(old_inode); | 1202 | iput(old_inode); |
1194 | } | 1203 | } |
1195 | 1204 | ||
1205 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
1206 | { | ||
1207 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1208 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1209 | long nr_to_write; | ||
1210 | |||
1211 | nr_to_write = nr_dirty + nr_unstable + | ||
1212 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1213 | |||
1214 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
1215 | } | ||
1216 | |||
1196 | /** | 1217 | /** |
1197 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1218 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
1198 | * @sb: the superblock | 1219 | * @sb: the superblock |
@@ -1204,18 +1225,23 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1204 | */ | 1225 | */ |
1205 | void writeback_inodes_sb(struct super_block *sb) | 1226 | void writeback_inodes_sb(struct super_block *sb) |
1206 | { | 1227 | { |
1207 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1228 | __writeback_inodes_sb(sb, 0); |
1208 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1209 | long nr_to_write; | ||
1210 | |||
1211 | nr_to_write = nr_dirty + nr_unstable + | ||
1212 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1213 | |||
1214 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | ||
1215 | } | 1229 | } |
1216 | EXPORT_SYMBOL(writeback_inodes_sb); | 1230 | EXPORT_SYMBOL(writeback_inodes_sb); |
1217 | 1231 | ||
1218 | /** | 1232 | /** |
1233 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | ||
1234 | * @sb: the superblock | ||
1235 | * | ||
1236 | * Like writeback_inodes_sb(), except the caller already holds the | ||
1237 | * sb umount sem. | ||
1238 | */ | ||
1239 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
1240 | { | ||
1241 | __writeback_inodes_sb(sb, 1); | ||
1242 | } | ||
1243 | |||
1244 | /** | ||
1219 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1245 | * writeback_inodes_sb_if_idle - start writeback if none underway |
1220 | * @sb: the superblock | 1246 | * @sb: the superblock |
1221 | * | 1247 | * |
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
42 | if (wait) | 42 | if (wait) |
43 | sync_inodes_sb(sb); | 43 | sync_inodes_sb(sb); |
44 | else | 44 | else |
45 | writeback_inodes_sb(sb); | 45 | writeback_inodes_sb_locked(sb); |
46 | 46 | ||
47 | if (sb->s_op->sync_fs) | 47 | if (sb->s_op->sync_fs) |
48 | sb->s_op->sync_fs(sb, wait); | 48 | sb->s_op->sync_fs(sb, wait); |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7534979d83bd..ff8bac63213f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | |||
106 | void bdi_unregister(struct backing_dev_info *bdi); | 106 | void bdi_unregister(struct backing_dev_info *bdi); |
107 | int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); | 107 | int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); |
108 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 108 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
109 | long nr_pages); | 109 | long nr_pages, int sb_locked); |
110 | int bdi_writeback_task(struct bdi_writeback *wb); | 110 | int bdi_writeback_task(struct bdi_writeback *wb); |
111 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 111 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
112 | 112 | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eb38a2c645f6..47e1c686cb02 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -65,6 +65,15 @@ struct writeback_control { | |||
65 | * so we use a single control to update them | 65 | * so we use a single control to update them |
66 | */ | 66 | */ |
67 | unsigned no_nrwrite_index_update:1; | 67 | unsigned no_nrwrite_index_update:1; |
68 | |||
69 | /* | ||
70 | * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, | ||
71 | * the writeback code will pin the sb for the caller. However, | ||
72 | * for eg umount, the caller does WB_SYNC_NONE but already has | ||
73 | * the sb pinned. If the below is set, caller already has the | ||
74 | * sb pinned. | ||
75 | */ | ||
76 | unsigned sb_pinned:1; | ||
68 | }; | 77 | }; |
69 | 78 | ||
70 | /* | 79 | /* |
@@ -73,6 +82,7 @@ struct writeback_control { | |||
73 | struct bdi_writeback; | 82 | struct bdi_writeback; |
74 | int inode_wait(void *); | 83 | int inode_wait(void *); |
75 | void writeback_inodes_sb(struct super_block *); | 84 | void writeback_inodes_sb(struct super_block *); |
85 | void writeback_inodes_sb_locked(struct super_block *); | ||
76 | int writeback_inodes_sb_if_idle(struct super_block *); | 86 | int writeback_inodes_sb_if_idle(struct super_block *); |
77 | void sync_inodes_sb(struct super_block *); | 87 | void sync_inodes_sb(struct super_block *); |
78 | void writeback_inodes_wbc(struct writeback_control *wbc); | 88 | void writeback_inodes_wbc(struct writeback_control *wbc); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d0f2b3765f8d..53b2fcf2d283 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
597 | (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) | 597 | (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) |
598 | + global_page_state(NR_UNSTABLE_NFS)) | 598 | + global_page_state(NR_UNSTABLE_NFS)) |
599 | > background_thresh))) | 599 | > background_thresh))) |
600 | bdi_start_writeback(bdi, NULL, 0); | 600 | bdi_start_writeback(bdi, NULL, 0, 0); |
601 | } | 601 | } |
602 | 602 | ||
603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
@@ -705,7 +705,7 @@ void laptop_mode_timer_fn(unsigned long data) | |||
705 | */ | 705 | */ |
706 | 706 | ||
707 | if (bdi_has_dirty_io(&q->backing_dev_info)) | 707 | if (bdi_has_dirty_io(&q->backing_dev_info)) |
708 | bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); | 708 | bdi_start_writeback(&q->backing_dev_info, NULL, 0, nr_pages); |
709 | } | 709 | } |
710 | 710 | ||
711 | /* | 711 | /* |