diff options
author | Nick Piggin <npiggin@suse.de> | 2009-01-06 17:40:25 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-06 18:59:09 -0500 |
commit | 38f21977663126fef53f5585e7f1653d8ebe55c4 (patch) | |
tree | be5ee7a264fea0d9f4b2d109b7e08b7a1ec794c9 | |
parent | 4f5a99d64c17470a784a6c68064207d82e3e74a5 (diff) |
fs: sync_sb_inodes fix
Fix data integrity semantics required by sys_sync, by iterating over all
inodes and waiting for any writeback pages after the initial writeout.
Comments explain the exact problem.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/fs-writeback.c | 60 |
1 files changed, 53 insertions, 7 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d99601af9e48..a9ee474f9691 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -440,6 +440,7 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
440 | struct writeback_control *wbc) | 440 | struct writeback_control *wbc) |
441 | { | 441 | { |
442 | const unsigned long start = jiffies; /* livelock avoidance */ | 442 | const unsigned long start = jiffies; /* livelock avoidance */ |
443 | int sync = wbc->sync_mode == WB_SYNC_ALL; | ||
443 | 444 | ||
444 | spin_lock(&inode_lock); | 445 | spin_lock(&inode_lock); |
445 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | 446 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) |
@@ -516,7 +517,49 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
516 | if (!list_empty(&sb->s_more_io)) | 517 | if (!list_empty(&sb->s_more_io)) |
517 | wbc->more_io = 1; | 518 | wbc->more_io = 1; |
518 | } | 519 | } |
519 | spin_unlock(&inode_lock); | 520 | |
521 | if (sync) { | ||
522 | struct inode *inode, *old_inode = NULL; | ||
523 | |||
524 | /* | ||
525 | * Data integrity sync. Must wait for all pages under writeback, | ||
526 | * because there may have been pages dirtied before our sync | ||
527 | * call, but which had writeout started before we write it out. | ||
528 | * In which case, the inode may not be on the dirty list, but | ||
529 | * we still have to wait for that writeout. | ||
530 | */ | ||
531 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | ||
532 | struct address_space *mapping; | ||
533 | |||
534 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) | ||
535 | continue; | ||
536 | mapping = inode->i_mapping; | ||
537 | if (mapping->nrpages == 0) | ||
538 | continue; | ||
539 | __iget(inode); | ||
540 | spin_unlock(&inode_lock); | ||
541 | /* | ||
542 | * We hold a reference to 'inode' so it couldn't have | ||
543 | * been removed from s_inodes list while we dropped the | ||
544 | * inode_lock. We cannot iput the inode now as we can | ||
545 | * be holding the last reference and we cannot iput it | ||
546 | * under inode_lock. So we keep the reference and iput | ||
547 | * it later. | ||
548 | */ | ||
549 | iput(old_inode); | ||
550 | old_inode = inode; | ||
551 | |||
552 | filemap_fdatawait(mapping); | ||
553 | |||
554 | cond_resched(); | ||
555 | |||
556 | spin_lock(&inode_lock); | ||
557 | } | ||
558 | spin_unlock(&inode_lock); | ||
559 | iput(old_inode); | ||
560 | } else | ||
561 | spin_unlock(&inode_lock); | ||
562 | |||
520 | return; /* Leave any unwritten inodes on s_io */ | 563 | return; /* Leave any unwritten inodes on s_io */ |
521 | } | 564 | } |
522 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); | 565 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); |
@@ -596,13 +639,16 @@ void sync_inodes_sb(struct super_block *sb, int wait) | |||
596 | .range_start = 0, | 639 | .range_start = 0, |
597 | .range_end = LLONG_MAX, | 640 | .range_end = LLONG_MAX, |
598 | }; | 641 | }; |
599 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
600 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
601 | 642 | ||
602 | wbc.nr_to_write = nr_dirty + nr_unstable + | 643 | if (!wait) { |
603 | (inodes_stat.nr_inodes - inodes_stat.nr_unused) + | 644 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
604 | nr_dirty + nr_unstable; | 645 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
605 | wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ | 646 | |
647 | wbc.nr_to_write = nr_dirty + nr_unstable + | ||
648 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
649 | } else | ||
650 | wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ | ||
651 | |||
606 | sync_sb_inodes(sb, &wbc); | 652 | sync_sb_inodes(sb, &wbc); |
607 | } | 653 | } |
608 | 654 | ||