diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 98 |
1 files changed, 76 insertions, 22 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 24e85ce11891..5c4161f1fd9a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -45,6 +45,7 @@ struct wb_writeback_args { | |||
45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
48 | unsigned int sb_pinned:1; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | /* | 51 | /* |
@@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
192 | } | 193 | } |
193 | 194 | ||
194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | 195 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
195 | struct wb_writeback_args *args) | 196 | struct wb_writeback_args *args, |
197 | int wait) | ||
196 | { | 198 | { |
197 | struct bdi_work *work; | 199 | struct bdi_work *work; |
198 | 200 | ||
@@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
204 | if (work) { | 206 | if (work) { |
205 | bdi_work_init(work, args); | 207 | bdi_work_init(work, args); |
206 | bdi_queue_work(bdi, work); | 208 | bdi_queue_work(bdi, work); |
209 | if (wait) | ||
210 | bdi_wait_on_work_clear(work); | ||
207 | } else { | 211 | } else { |
208 | struct bdi_writeback *wb = &bdi->wb; | 212 | struct bdi_writeback *wb = &bdi->wb; |
209 | 213 | ||
@@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
230 | .sync_mode = WB_SYNC_ALL, | 234 | .sync_mode = WB_SYNC_ALL, |
231 | .nr_pages = LONG_MAX, | 235 | .nr_pages = LONG_MAX, |
232 | .range_cyclic = 0, | 236 | .range_cyclic = 0, |
237 | /* | ||
238 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
239 | * lets make it explicitly clear. | ||
240 | */ | ||
241 | .sb_pinned = 1, | ||
233 | }; | 242 | }; |
234 | struct bdi_work work; | 243 | struct bdi_work work; |
235 | 244 | ||
@@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
245 | * @bdi: the backing device to write from | 254 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | 255 | * @sb: write inodes from this super_block |
247 | * @nr_pages: the number of pages to write | 256 | * @nr_pages: the number of pages to write |
257 | * @sb_locked: caller already holds sb umount sem. | ||
248 | * | 258 | * |
249 | * Description: | 259 | * Description: |
250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 260 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
251 | * started when this function returns, we make no guarentees on | 261 | * started when this function returns, we make no guarentees on |
252 | * completion. Caller need not hold sb s_umount semaphore. | 262 | * completion. Caller specifies whether sb umount sem is held already or not. |
253 | * | 263 | * |
254 | */ | 264 | */ |
255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 265 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
256 | long nr_pages) | 266 | long nr_pages, int sb_locked) |
257 | { | 267 | { |
258 | struct wb_writeback_args args = { | 268 | struct wb_writeback_args args = { |
259 | .sb = sb, | 269 | .sb = sb, |
260 | .sync_mode = WB_SYNC_NONE, | 270 | .sync_mode = WB_SYNC_NONE, |
261 | .nr_pages = nr_pages, | 271 | .nr_pages = nr_pages, |
262 | .range_cyclic = 1, | 272 | .range_cyclic = 1, |
273 | .sb_pinned = sb_locked, | ||
263 | }; | 274 | }; |
264 | 275 | ||
265 | /* | 276 | /* |
@@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | |||
271 | args.for_background = 1; | 282 | args.for_background = 1; |
272 | } | 283 | } |
273 | 284 | ||
274 | bdi_alloc_queue_work(bdi, &args); | 285 | bdi_alloc_queue_work(bdi, &args, sb_locked); |
275 | } | 286 | } |
276 | 287 | ||
277 | /* | 288 | /* |
@@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
452 | 463 | ||
453 | BUG_ON(inode->i_state & I_SYNC); | 464 | BUG_ON(inode->i_state & I_SYNC); |
454 | 465 | ||
455 | /* Set I_SYNC, reset I_DIRTY */ | 466 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
456 | dirty = inode->i_state & I_DIRTY; | ||
457 | inode->i_state |= I_SYNC; | 467 | inode->i_state |= I_SYNC; |
458 | inode->i_state &= ~I_DIRTY; | 468 | inode->i_state &= ~I_DIRTY_PAGES; |
459 | |||
460 | spin_unlock(&inode_lock); | 469 | spin_unlock(&inode_lock); |
461 | 470 | ||
462 | ret = do_writepages(mapping, wbc); | 471 | ret = do_writepages(mapping, wbc); |
@@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
472 | ret = err; | 481 | ret = err; |
473 | } | 482 | } |
474 | 483 | ||
484 | /* | ||
485 | * Some filesystems may redirty the inode during the writeback | ||
486 | * due to delalloc, clear dirty metadata flags right before | ||
487 | * write_inode() | ||
488 | */ | ||
489 | spin_lock(&inode_lock); | ||
490 | dirty = inode->i_state & I_DIRTY; | ||
491 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | ||
492 | spin_unlock(&inode_lock); | ||
475 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 493 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
476 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 494 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
477 | int err = write_inode(inode, wbc); | 495 | int err = write_inode(inode, wbc); |
@@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
577 | /* | 595 | /* |
578 | * Caller must already hold the ref for this | 596 | * Caller must already hold the ref for this |
579 | */ | 597 | */ |
580 | if (wbc->sync_mode == WB_SYNC_ALL) { | 598 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { |
581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 599 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
582 | return SB_NOT_PINNED; | 600 | return SB_NOT_PINNED; |
583 | } | 601 | } |
@@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
751 | .for_kupdate = args->for_kupdate, | 769 | .for_kupdate = args->for_kupdate, |
752 | .for_background = args->for_background, | 770 | .for_background = args->for_background, |
753 | .range_cyclic = args->range_cyclic, | 771 | .range_cyclic = args->range_cyclic, |
772 | .sb_pinned = args->sb_pinned, | ||
754 | }; | 773 | }; |
755 | unsigned long oldest_jif; | 774 | unsigned long oldest_jif; |
756 | long wrote = 0; | 775 | long wrote = 0; |
@@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
852 | unsigned long expired; | 871 | unsigned long expired; |
853 | long nr_pages; | 872 | long nr_pages; |
854 | 873 | ||
874 | /* | ||
875 | * When set to zero, disable periodic writeback | ||
876 | */ | ||
877 | if (!dirty_writeback_interval) | ||
878 | return 0; | ||
879 | |||
855 | expired = wb->last_old_flush + | 880 | expired = wb->last_old_flush + |
856 | msecs_to_jiffies(dirty_writeback_interval * 10); | 881 | msecs_to_jiffies(dirty_writeback_interval * 10); |
857 | if (time_before(jiffies, expired)) | 882 | if (time_before(jiffies, expired)) |
@@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
887 | 912 | ||
888 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 913 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
889 | struct wb_writeback_args args = work->args; | 914 | struct wb_writeback_args args = work->args; |
915 | int post_clear; | ||
890 | 916 | ||
891 | /* | 917 | /* |
892 | * Override sync mode, in case we must wait for completion | 918 | * Override sync mode, in case we must wait for completion |
@@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
894 | if (force_wait) | 920 | if (force_wait) |
895 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 921 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; |
896 | 922 | ||
923 | post_clear = WB_SYNC_ALL || args.sb_pinned; | ||
924 | |||
897 | /* | 925 | /* |
898 | * If this isn't a data integrity operation, just notify | 926 | * If this isn't a data integrity operation, just notify |
899 | * that we have seen this work and we are now starting it. | 927 | * that we have seen this work and we are now starting it. |
900 | */ | 928 | */ |
901 | if (args.sync_mode == WB_SYNC_NONE) | 929 | if (!post_clear) |
902 | wb_clear_pending(wb, work); | 930 | wb_clear_pending(wb, work); |
903 | 931 | ||
904 | wrote += wb_writeback(wb, &args); | 932 | wrote += wb_writeback(wb, &args); |
@@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
907 | * This is a data integrity writeback, so only do the | 935 | * This is a data integrity writeback, so only do the |
908 | * notification when we have completed the work. | 936 | * notification when we have completed the work. |
909 | */ | 937 | */ |
910 | if (args.sync_mode == WB_SYNC_ALL) | 938 | if (post_clear) |
911 | wb_clear_pending(wb, work); | 939 | wb_clear_pending(wb, work); |
912 | } | 940 | } |
913 | 941 | ||
@@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
947 | break; | 975 | break; |
948 | } | 976 | } |
949 | 977 | ||
950 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 978 | if (dirty_writeback_interval) { |
951 | schedule_timeout_interruptible(wait_jiffies); | 979 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); |
980 | schedule_timeout_interruptible(wait_jiffies); | ||
981 | } else { | ||
982 | set_current_state(TASK_INTERRUPTIBLE); | ||
983 | if (list_empty_careful(&wb->bdi->work_list) && | ||
984 | !kthread_should_stop()) | ||
985 | schedule(); | ||
986 | __set_current_state(TASK_RUNNING); | ||
987 | } | ||
988 | |||
952 | try_to_freeze(); | 989 | try_to_freeze(); |
953 | } | 990 | } |
954 | 991 | ||
@@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) | |||
974 | if (!bdi_has_dirty_io(bdi)) | 1011 | if (!bdi_has_dirty_io(bdi)) |
975 | continue; | 1012 | continue; |
976 | 1013 | ||
977 | bdi_alloc_queue_work(bdi, &args); | 1014 | bdi_alloc_queue_work(bdi, &args, 0); |
978 | } | 1015 | } |
979 | 1016 | ||
980 | rcu_read_unlock(); | 1017 | rcu_read_unlock(); |
@@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1183 | iput(old_inode); | 1220 | iput(old_inode); |
1184 | } | 1221 | } |
1185 | 1222 | ||
1223 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
1224 | { | ||
1225 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1226 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1227 | long nr_to_write; | ||
1228 | |||
1229 | nr_to_write = nr_dirty + nr_unstable + | ||
1230 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1231 | |||
1232 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
1233 | } | ||
1234 | |||
1186 | /** | 1235 | /** |
1187 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1236 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
1188 | * @sb: the superblock | 1237 | * @sb: the superblock |
@@ -1194,18 +1243,23 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1194 | */ | 1243 | */ |
1195 | void writeback_inodes_sb(struct super_block *sb) | 1244 | void writeback_inodes_sb(struct super_block *sb) |
1196 | { | 1245 | { |
1197 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1246 | __writeback_inodes_sb(sb, 0); |
1198 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1199 | long nr_to_write; | ||
1200 | |||
1201 | nr_to_write = nr_dirty + nr_unstable + | ||
1202 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1203 | |||
1204 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | ||
1205 | } | 1247 | } |
1206 | EXPORT_SYMBOL(writeback_inodes_sb); | 1248 | EXPORT_SYMBOL(writeback_inodes_sb); |
1207 | 1249 | ||
1208 | /** | 1250 | /** |
1251 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | ||
1252 | * @sb: the superblock | ||
1253 | * | ||
1254 | * Like writeback_inodes_sb(), except the caller already holds the | ||
1255 | * sb umount sem. | ||
1256 | */ | ||
1257 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
1258 | { | ||
1259 | __writeback_inodes_sb(sb, 1); | ||
1260 | } | ||
1261 | |||
1262 | /** | ||
1209 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1263 | * writeback_inodes_sb_if_idle - start writeback if none underway |
1210 | * @sb: the superblock | 1264 | * @sb: the superblock |
1211 | * | 1265 | * |