diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 92 |
1 files changed, 56 insertions, 36 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d0ff0b8cf309..e5eaa62fd17f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -421,9 +421,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
421 | * If we're a pdlfush thread, then implement pdflush collision avoidance | 421 | * If we're a pdlfush thread, then implement pdflush collision avoidance |
422 | * against the entire list. | 422 | * against the entire list. |
423 | * | 423 | * |
424 | * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so | ||
425 | * that it can be located for waiting on in __writeback_single_inode(). | ||
426 | * | ||
427 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 424 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. |
428 | * This function assumes that the blockdev superblock's inodes are backed by | 425 | * This function assumes that the blockdev superblock's inodes are backed by |
429 | * a variety of queues, so all inodes are searched. For other superblocks, | 426 | * a variety of queues, so all inodes are searched. For other superblocks, |
@@ -443,6 +440,7 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
443 | struct writeback_control *wbc) | 440 | struct writeback_control *wbc) |
444 | { | 441 | { |
445 | const unsigned long start = jiffies; /* livelock avoidance */ | 442 | const unsigned long start = jiffies; /* livelock avoidance */ |
443 | int sync = wbc->sync_mode == WB_SYNC_ALL; | ||
446 | 444 | ||
447 | spin_lock(&inode_lock); | 445 | spin_lock(&inode_lock); |
448 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | 446 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) |
@@ -499,10 +497,6 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
499 | __iget(inode); | 497 | __iget(inode); |
500 | pages_skipped = wbc->pages_skipped; | 498 | pages_skipped = wbc->pages_skipped; |
501 | __writeback_single_inode(inode, wbc); | 499 | __writeback_single_inode(inode, wbc); |
502 | if (wbc->sync_mode == WB_SYNC_HOLD) { | ||
503 | inode->dirtied_when = jiffies; | ||
504 | list_move(&inode->i_list, &sb->s_dirty); | ||
505 | } | ||
506 | if (current_is_pdflush()) | 500 | if (current_is_pdflush()) |
507 | writeback_release(bdi); | 501 | writeback_release(bdi); |
508 | if (wbc->pages_skipped != pages_skipped) { | 502 | if (wbc->pages_skipped != pages_skipped) { |
@@ -523,7 +517,49 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
523 | if (!list_empty(&sb->s_more_io)) | 517 | if (!list_empty(&sb->s_more_io)) |
524 | wbc->more_io = 1; | 518 | wbc->more_io = 1; |
525 | } | 519 | } |
526 | spin_unlock(&inode_lock); | 520 | |
521 | if (sync) { | ||
522 | struct inode *inode, *old_inode = NULL; | ||
523 | |||
524 | /* | ||
525 | * Data integrity sync. Must wait for all pages under writeback, | ||
526 | * because there may have been pages dirtied before our sync | ||
527 | * call, but which had writeout started before we write it out. | ||
528 | * In which case, the inode may not be on the dirty list, but | ||
529 | * we still have to wait for that writeout. | ||
530 | */ | ||
531 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | ||
532 | struct address_space *mapping; | ||
533 | |||
534 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) | ||
535 | continue; | ||
536 | mapping = inode->i_mapping; | ||
537 | if (mapping->nrpages == 0) | ||
538 | continue; | ||
539 | __iget(inode); | ||
540 | spin_unlock(&inode_lock); | ||
541 | /* | ||
542 | * We hold a reference to 'inode' so it couldn't have | ||
543 | * been removed from s_inodes list while we dropped the | ||
544 | * inode_lock. We cannot iput the inode now as we can | ||
545 | * be holding the last reference and we cannot iput it | ||
546 | * under inode_lock. So we keep the reference and iput | ||
547 | * it later. | ||
548 | */ | ||
549 | iput(old_inode); | ||
550 | old_inode = inode; | ||
551 | |||
552 | filemap_fdatawait(mapping); | ||
553 | |||
554 | cond_resched(); | ||
555 | |||
556 | spin_lock(&inode_lock); | ||
557 | } | ||
558 | spin_unlock(&inode_lock); | ||
559 | iput(old_inode); | ||
560 | } else | ||
561 | spin_unlock(&inode_lock); | ||
562 | |||
527 | return; /* Leave any unwritten inodes on s_io */ | 563 | return; /* Leave any unwritten inodes on s_io */ |
528 | } | 564 | } |
529 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); | 565 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); |
@@ -588,8 +624,7 @@ restart: | |||
588 | 624 | ||
589 | /* | 625 | /* |
590 | * writeback and wait upon the filesystem's dirty inodes. The caller will | 626 | * writeback and wait upon the filesystem's dirty inodes. The caller will |
591 | * do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is | 627 | * do this in two passes - one to write, and one to wait. |
592 | * used to park the written inodes on sb->s_dirty for the wait pass. | ||
593 | * | 628 | * |
594 | * A finite limit is set on the number of pages which will be written. | 629 | * A finite limit is set on the number of pages which will be written. |
595 | * To prevent infinite livelock of sys_sync(). | 630 | * To prevent infinite livelock of sys_sync(). |
@@ -600,30 +635,21 @@ restart: | |||
600 | void sync_inodes_sb(struct super_block *sb, int wait) | 635 | void sync_inodes_sb(struct super_block *sb, int wait) |
601 | { | 636 | { |
602 | struct writeback_control wbc = { | 637 | struct writeback_control wbc = { |
603 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, | 638 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, |
604 | .range_start = 0, | 639 | .range_start = 0, |
605 | .range_end = LLONG_MAX, | 640 | .range_end = LLONG_MAX, |
606 | }; | 641 | }; |
607 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
608 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
609 | 642 | ||
610 | wbc.nr_to_write = nr_dirty + nr_unstable + | 643 | if (!wait) { |
611 | (inodes_stat.nr_inodes - inodes_stat.nr_unused) + | 644 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
612 | nr_dirty + nr_unstable; | 645 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
613 | wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ | ||
614 | sync_sb_inodes(sb, &wbc); | ||
615 | } | ||
616 | 646 | ||
617 | /* | 647 | wbc.nr_to_write = nr_dirty + nr_unstable + |
618 | * Rather lame livelock avoidance. | 648 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
619 | */ | 649 | } else |
620 | static void set_sb_syncing(int val) | 650 | wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ |
621 | { | 651 | |
622 | struct super_block *sb; | 652 | sync_sb_inodes(sb, &wbc); |
623 | spin_lock(&sb_lock); | ||
624 | list_for_each_entry_reverse(sb, &super_blocks, s_list) | ||
625 | sb->s_syncing = val; | ||
626 | spin_unlock(&sb_lock); | ||
627 | } | 653 | } |
628 | 654 | ||
629 | /** | 655 | /** |
@@ -652,9 +678,6 @@ static void __sync_inodes(int wait) | |||
652 | spin_lock(&sb_lock); | 678 | spin_lock(&sb_lock); |
653 | restart: | 679 | restart: |
654 | list_for_each_entry(sb, &super_blocks, s_list) { | 680 | list_for_each_entry(sb, &super_blocks, s_list) { |
655 | if (sb->s_syncing) | ||
656 | continue; | ||
657 | sb->s_syncing = 1; | ||
658 | sb->s_count++; | 681 | sb->s_count++; |
659 | spin_unlock(&sb_lock); | 682 | spin_unlock(&sb_lock); |
660 | down_read(&sb->s_umount); | 683 | down_read(&sb->s_umount); |
@@ -672,13 +695,10 @@ restart: | |||
672 | 695 | ||
673 | void sync_inodes(int wait) | 696 | void sync_inodes(int wait) |
674 | { | 697 | { |
675 | set_sb_syncing(0); | ||
676 | __sync_inodes(0); | 698 | __sync_inodes(0); |
677 | 699 | ||
678 | if (wait) { | 700 | if (wait) |
679 | set_sb_syncing(0); | ||
680 | __sync_inodes(1); | 701 | __sync_inodes(1); |
681 | } | ||
682 | } | 702 | } |
683 | 703 | ||
684 | /** | 704 | /** |