aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2009-01-06 17:40:25 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 18:59:09 -0500
commit38f21977663126fef53f5585e7f1653d8ebe55c4 (patch)
treebe5ee7a264fea0d9f4b2d109b7e08b7a1ec794c9
parent4f5a99d64c17470a784a6c68064207d82e3e74a5 (diff)
fs: sync_sb_inodes fix
Fix data integrity semantics required by sys_sync, by iterating over all inodes and waiting for any writeback pages after the initial writeout. Comments explain the exact problem. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/fs-writeback.c60
1 files changed, 53 insertions, 7 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d99601af9e48..a9ee474f9691 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -440,6 +440,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
440 struct writeback_control *wbc) 440 struct writeback_control *wbc)
441{ 441{
442 const unsigned long start = jiffies; /* livelock avoidance */ 442 const unsigned long start = jiffies; /* livelock avoidance */
443 int sync = wbc->sync_mode == WB_SYNC_ALL;
443 444
444 spin_lock(&inode_lock); 445 spin_lock(&inode_lock);
445 if (!wbc->for_kupdate || list_empty(&sb->s_io)) 446 if (!wbc->for_kupdate || list_empty(&sb->s_io))
@@ -516,7 +517,49 @@ void generic_sync_sb_inodes(struct super_block *sb,
516 if (!list_empty(&sb->s_more_io)) 517 if (!list_empty(&sb->s_more_io))
517 wbc->more_io = 1; 518 wbc->more_io = 1;
518 } 519 }
519 spin_unlock(&inode_lock); 520
521 if (sync) {
522 struct inode *inode, *old_inode = NULL;
523
524 /*
525 * Data integrity sync. Must wait for all pages under writeback,
526 * because there may have been pages dirtied before our sync
527 * call, but which had writeout started before we write it out.
528 * In which case, the inode may not be on the dirty list, but
529 * we still have to wait for that writeout.
530 */
531 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
532 struct address_space *mapping;
533
534 if (inode->i_state & (I_FREEING|I_WILL_FREE))
535 continue;
536 mapping = inode->i_mapping;
537 if (mapping->nrpages == 0)
538 continue;
539 __iget(inode);
540 spin_unlock(&inode_lock);
541 /*
542 * We hold a reference to 'inode' so it couldn't have
543 * been removed from s_inodes list while we dropped the
544 * inode_lock. We cannot iput the inode now as we can
545 * be holding the last reference and we cannot iput it
546 * under inode_lock. So we keep the reference and iput
547 * it later.
548 */
549 iput(old_inode);
550 old_inode = inode;
551
552 filemap_fdatawait(mapping);
553
554 cond_resched();
555
556 spin_lock(&inode_lock);
557 }
558 spin_unlock(&inode_lock);
559 iput(old_inode);
560 } else
561 spin_unlock(&inode_lock);
562
520 return; /* Leave any unwritten inodes on s_io */ 563 return; /* Leave any unwritten inodes on s_io */
521} 564}
522EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); 565EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
@@ -596,13 +639,16 @@ void sync_inodes_sb(struct super_block *sb, int wait)
596 .range_start = 0, 639 .range_start = 0,
597 .range_end = LLONG_MAX, 640 .range_end = LLONG_MAX,
598 }; 641 };
599 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
600 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
601 642
602 wbc.nr_to_write = nr_dirty + nr_unstable + 643 if (!wait) {
603 (inodes_stat.nr_inodes - inodes_stat.nr_unused) + 644 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
604 nr_dirty + nr_unstable; 645 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
605 wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ 646
647 wbc.nr_to_write = nr_dirty + nr_unstable +
648 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
649 } else
650 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
651
606 sync_sb_inodes(sb, &wbc); 652 sync_sb_inodes(sb, &wbc);
607} 653}
608 654