diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 198 |
1 files changed, 77 insertions, 121 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 91013ff7dd53..c54226be5294 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi) | |||
64 | clear_bit(BDI_pdflush, &bdi->state); | 64 | clear_bit(BDI_pdflush, &bdi->state); |
65 | } | 65 | } |
66 | 66 | ||
67 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | ||
68 | { | ||
69 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | ||
70 | struct dentry *dentry; | ||
71 | const char *name = "?"; | ||
72 | |||
73 | dentry = d_find_alias(inode); | ||
74 | if (dentry) { | ||
75 | spin_lock(&dentry->d_lock); | ||
76 | name = (const char *) dentry->d_name.name; | ||
77 | } | ||
78 | printk(KERN_DEBUG | ||
79 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
80 | current->comm, task_pid_nr(current), inode->i_ino, | ||
81 | name, inode->i_sb->s_id); | ||
82 | if (dentry) { | ||
83 | spin_unlock(&dentry->d_lock); | ||
84 | dput(dentry); | ||
85 | } | ||
86 | } | ||
87 | } | ||
88 | |||
67 | /** | 89 | /** |
68 | * __mark_inode_dirty - internal function | 90 | * __mark_inode_dirty - internal function |
69 | * @inode: inode to mark | 91 | * @inode: inode to mark |
@@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
114 | if ((inode->i_state & flags) == flags) | 136 | if ((inode->i_state & flags) == flags) |
115 | return; | 137 | return; |
116 | 138 | ||
117 | if (unlikely(block_dump)) { | 139 | if (unlikely(block_dump)) |
118 | struct dentry *dentry = NULL; | 140 | block_dump___mark_inode_dirty(inode); |
119 | const char *name = "?"; | ||
120 | |||
121 | if (!list_empty(&inode->i_dentry)) { | ||
122 | dentry = list_entry(inode->i_dentry.next, | ||
123 | struct dentry, d_alias); | ||
124 | if (dentry && dentry->d_name.name) | ||
125 | name = (const char *) dentry->d_name.name; | ||
126 | } | ||
127 | |||
128 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) | ||
129 | printk(KERN_DEBUG | ||
130 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
131 | current->comm, task_pid_nr(current), inode->i_ino, | ||
132 | name, inode->i_sb->s_id); | ||
133 | } | ||
134 | 141 | ||
135 | spin_lock(&inode_lock); | 142 | spin_lock(&inode_lock); |
136 | if ((inode->i_state & flags) != flags) { | 143 | if ((inode->i_state & flags) != flags) { |
@@ -271,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb) | |||
271 | EXPORT_SYMBOL(sb_has_dirty_inodes); | 278 | EXPORT_SYMBOL(sb_has_dirty_inodes); |
272 | 279 | ||
273 | /* | 280 | /* |
274 | * Write a single inode's dirty pages and inode data out to disk. | 281 | * Wait for writeback on an inode to complete. |
282 | */ | ||
283 | static void inode_wait_for_writeback(struct inode *inode) | ||
284 | { | ||
285 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); | ||
286 | wait_queue_head_t *wqh; | ||
287 | |||
288 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | ||
289 | do { | ||
290 | spin_unlock(&inode_lock); | ||
291 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | ||
292 | spin_lock(&inode_lock); | ||
293 | } while (inode->i_state & I_SYNC); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Write out an inode's dirty pages. Called under inode_lock. Either the | ||
298 | * caller has ref on the inode (either via __iget or via syscall against an fd) | ||
299 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | ||
300 | * | ||
275 | * If `wait' is set, wait on the writeout. | 301 | * If `wait' is set, wait on the writeout. |
276 | * | 302 | * |
277 | * The whole writeout design is quite complex and fragile. We want to avoid | 303 | * The whole writeout design is quite complex and fragile. We want to avoid |
@@ -281,15 +307,39 @@ EXPORT_SYMBOL(sb_has_dirty_inodes); | |||
281 | * Called under inode_lock. | 307 | * Called under inode_lock. |
282 | */ | 308 | */ |
283 | static int | 309 | static int |
284 | __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | 310 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
285 | { | 311 | { |
286 | unsigned dirty; | ||
287 | struct address_space *mapping = inode->i_mapping; | 312 | struct address_space *mapping = inode->i_mapping; |
288 | int wait = wbc->sync_mode == WB_SYNC_ALL; | 313 | int wait = wbc->sync_mode == WB_SYNC_ALL; |
314 | unsigned dirty; | ||
289 | int ret; | 315 | int ret; |
290 | 316 | ||
317 | if (!atomic_read(&inode->i_count)) | ||
318 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | ||
319 | else | ||
320 | WARN_ON(inode->i_state & I_WILL_FREE); | ||
321 | |||
322 | if (inode->i_state & I_SYNC) { | ||
323 | /* | ||
324 | * If this inode is locked for writeback and we are not doing | ||
325 | * writeback-for-data-integrity, move it to s_more_io so that | ||
326 | * writeback can proceed with the other inodes on s_io. | ||
327 | * | ||
328 | * We'll have another go at writing back this inode when we | ||
329 | * completed a full scan of s_io. | ||
330 | */ | ||
331 | if (!wait) { | ||
332 | requeue_io(inode); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * It's a data-integrity sync. We must wait. | ||
338 | */ | ||
339 | inode_wait_for_writeback(inode); | ||
340 | } | ||
341 | |||
291 | BUG_ON(inode->i_state & I_SYNC); | 342 | BUG_ON(inode->i_state & I_SYNC); |
292 | WARN_ON(inode->i_state & I_NEW); | ||
293 | 343 | ||
294 | /* Set I_SYNC, reset I_DIRTY */ | 344 | /* Set I_SYNC, reset I_DIRTY */ |
295 | dirty = inode->i_state & I_DIRTY; | 345 | dirty = inode->i_state & I_DIRTY; |
@@ -314,9 +364,8 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
314 | } | 364 | } |
315 | 365 | ||
316 | spin_lock(&inode_lock); | 366 | spin_lock(&inode_lock); |
317 | WARN_ON(inode->i_state & I_NEW); | ||
318 | inode->i_state &= ~I_SYNC; | 367 | inode->i_state &= ~I_SYNC; |
319 | if (!(inode->i_state & I_FREEING)) { | 368 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { |
320 | if (!(inode->i_state & I_DIRTY) && | 369 | if (!(inode->i_state & I_DIRTY) && |
321 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 370 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
322 | /* | 371 | /* |
@@ -385,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
385 | } | 434 | } |
386 | 435 | ||
387 | /* | 436 | /* |
388 | * Write out an inode's dirty pages. Called under inode_lock. Either the | ||
389 | * caller has ref on the inode (either via __iget or via syscall against an fd) | ||
390 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | ||
391 | */ | ||
392 | static int | ||
393 | __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | ||
394 | { | ||
395 | wait_queue_head_t *wqh; | ||
396 | |||
397 | if (!atomic_read(&inode->i_count)) | ||
398 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | ||
399 | else | ||
400 | WARN_ON(inode->i_state & I_WILL_FREE); | ||
401 | |||
402 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { | ||
403 | /* | ||
404 | * We're skipping this inode because it's locked, and we're not | ||
405 | * doing writeback-for-data-integrity. Move it to s_more_io so | ||
406 | * that writeback can proceed with the other inodes on s_io. | ||
407 | * We'll have another go at writing back this inode when we | ||
408 | * completed a full scan of s_io. | ||
409 | */ | ||
410 | requeue_io(inode); | ||
411 | return 0; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * It's a data-integrity sync. We must wait. | ||
416 | */ | ||
417 | if (inode->i_state & I_SYNC) { | ||
418 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); | ||
419 | |||
420 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | ||
421 | do { | ||
422 | spin_unlock(&inode_lock); | ||
423 | __wait_on_bit(wqh, &wq, inode_wait, | ||
424 | TASK_UNINTERRUPTIBLE); | ||
425 | spin_lock(&inode_lock); | ||
426 | } while (inode->i_state & I_SYNC); | ||
427 | } | ||
428 | return __sync_single_inode(inode, wbc); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Write out a superblock's list of dirty inodes. A wait will be performed | 437 | * Write out a superblock's list of dirty inodes. A wait will be performed |
433 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | 438 | * upon no inodes, all inodes or the final one, depending upon sync_mode. |
434 | * | 439 | * |
@@ -487,7 +492,7 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
487 | break; | 492 | break; |
488 | } | 493 | } |
489 | 494 | ||
490 | if (inode->i_state & I_NEW) { | 495 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
491 | requeue_io(inode); | 496 | requeue_io(inode); |
492 | continue; | 497 | continue; |
493 | } | 498 | } |
@@ -518,10 +523,10 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
518 | if (current_is_pdflush() && !writeback_acquire(bdi)) | 523 | if (current_is_pdflush() && !writeback_acquire(bdi)) |
519 | break; | 524 | break; |
520 | 525 | ||
521 | BUG_ON(inode->i_state & I_FREEING); | 526 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); |
522 | __iget(inode); | 527 | __iget(inode); |
523 | pages_skipped = wbc->pages_skipped; | 528 | pages_skipped = wbc->pages_skipped; |
524 | __writeback_single_inode(inode, wbc); | 529 | writeback_single_inode(inode, wbc); |
525 | if (current_is_pdflush()) | 530 | if (current_is_pdflush()) |
526 | writeback_release(bdi); | 531 | writeback_release(bdi); |
527 | if (wbc->pages_skipped != pages_skipped) { | 532 | if (wbc->pages_skipped != pages_skipped) { |
@@ -679,55 +684,6 @@ void sync_inodes_sb(struct super_block *sb, int wait) | |||
679 | } | 684 | } |
680 | 685 | ||
681 | /** | 686 | /** |
682 | * sync_inodes - writes all inodes to disk | ||
683 | * @wait: wait for completion | ||
684 | * | ||
685 | * sync_inodes() goes through each super block's dirty inode list, writes the | ||
686 | * inodes out, waits on the writeout and puts the inodes back on the normal | ||
687 | * list. | ||
688 | * | ||
689 | * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle | ||
690 | * part of the sync functions is that the blockdev "superblock" is processed | ||
691 | * last. This is because the write_inode() function of a typical fs will | ||
692 | * perform no I/O, but will mark buffers in the blockdev mapping as dirty. | ||
693 | * What we want to do is to perform all that dirtying first, and then write | ||
694 | * back all those inode blocks via the blockdev mapping in one sweep. So the | ||
695 | * additional (somewhat redundant) sync_blockdev() calls here are to make | ||
696 | * sure that really happens. Because if we call sync_inodes_sb(wait=1) with | ||
697 | * outstanding dirty inodes, the writeback goes block-at-a-time within the | ||
698 | * filesystem's write_inode(). This is extremely slow. | ||
699 | */ | ||
700 | static void __sync_inodes(int wait) | ||
701 | { | ||
702 | struct super_block *sb; | ||
703 | |||
704 | spin_lock(&sb_lock); | ||
705 | restart: | ||
706 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
707 | sb->s_count++; | ||
708 | spin_unlock(&sb_lock); | ||
709 | down_read(&sb->s_umount); | ||
710 | if (sb->s_root) { | ||
711 | sync_inodes_sb(sb, wait); | ||
712 | sync_blockdev(sb->s_bdev); | ||
713 | } | ||
714 | up_read(&sb->s_umount); | ||
715 | spin_lock(&sb_lock); | ||
716 | if (__put_super_and_need_restart(sb)) | ||
717 | goto restart; | ||
718 | } | ||
719 | spin_unlock(&sb_lock); | ||
720 | } | ||
721 | |||
722 | void sync_inodes(int wait) | ||
723 | { | ||
724 | __sync_inodes(0); | ||
725 | |||
726 | if (wait) | ||
727 | __sync_inodes(1); | ||
728 | } | ||
729 | |||
730 | /** | ||
731 | * write_inode_now - write an inode to disk | 687 | * write_inode_now - write an inode to disk |
732 | * @inode: inode to write to disk | 688 | * @inode: inode to write to disk |
733 | * @sync: whether the write should be synchronous or not | 689 | * @sync: whether the write should be synchronous or not |
@@ -752,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync) | |||
752 | 708 | ||
753 | might_sleep(); | 709 | might_sleep(); |
754 | spin_lock(&inode_lock); | 710 | spin_lock(&inode_lock); |
755 | ret = __writeback_single_inode(inode, &wbc); | 711 | ret = writeback_single_inode(inode, &wbc); |
756 | spin_unlock(&inode_lock); | 712 | spin_unlock(&inode_lock); |
757 | if (sync) | 713 | if (sync) |
758 | inode_sync_wait(inode); | 714 | inode_sync_wait(inode); |
@@ -776,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
776 | int ret; | 732 | int ret; |
777 | 733 | ||
778 | spin_lock(&inode_lock); | 734 | spin_lock(&inode_lock); |
779 | ret = __writeback_single_inode(inode, wbc); | 735 | ret = writeback_single_inode(inode, wbc); |
780 | spin_unlock(&inode_lock); | 736 | spin_unlock(&inode_lock); |
781 | return ret; | 737 | return ret; |
782 | } | 738 | } |