diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 202 |
1 files changed, 106 insertions, 96 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9d5360c4c2af..4b37f7cea4dd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/slab.h> | ||
19 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
20 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -242,6 +243,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
242 | /** | 243 | /** |
243 | * bdi_start_writeback - start writeback | 244 | * bdi_start_writeback - start writeback |
244 | * @bdi: the backing device to write from | 245 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | ||
245 | * @nr_pages: the number of pages to write | 247 | * @nr_pages: the number of pages to write |
246 | * | 248 | * |
247 | * Description: | 249 | * Description: |
@@ -380,10 +382,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | |||
380 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 382 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
381 | } | 383 | } |
382 | 384 | ||
383 | static int write_inode(struct inode *inode, int sync) | 385 | static int write_inode(struct inode *inode, struct writeback_control *wbc) |
384 | { | 386 | { |
385 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) | 387 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) |
386 | return inode->i_sb->s_op->write_inode(inode, sync); | 388 | return inode->i_sb->s_op->write_inode(inode, wbc); |
387 | return 0; | 389 | return 0; |
388 | } | 390 | } |
389 | 391 | ||
@@ -420,7 +422,6 @@ static int | |||
420 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 422 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
421 | { | 423 | { |
422 | struct address_space *mapping = inode->i_mapping; | 424 | struct address_space *mapping = inode->i_mapping; |
423 | int wait = wbc->sync_mode == WB_SYNC_ALL; | ||
424 | unsigned dirty; | 425 | unsigned dirty; |
425 | int ret; | 426 | int ret; |
426 | 427 | ||
@@ -438,7 +439,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
438 | * We'll have another go at writing back this inode when we | 439 | * We'll have another go at writing back this inode when we |
439 | * completed a full scan of b_io. | 440 | * completed a full scan of b_io. |
440 | */ | 441 | */ |
441 | if (!wait) { | 442 | if (wbc->sync_mode != WB_SYNC_ALL) { |
442 | requeue_io(inode); | 443 | requeue_io(inode); |
443 | return 0; | 444 | return 0; |
444 | } | 445 | } |
@@ -460,15 +461,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
460 | 461 | ||
461 | ret = do_writepages(mapping, wbc); | 462 | ret = do_writepages(mapping, wbc); |
462 | 463 | ||
463 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 464 | /* |
464 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 465 | * Make sure to wait on the data before writing out the metadata. |
465 | int err = write_inode(inode, wait); | 466 | * This is important for filesystems that modify metadata on data |
467 | * I/O completion. | ||
468 | */ | ||
469 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
470 | int err = filemap_fdatawait(mapping); | ||
466 | if (ret == 0) | 471 | if (ret == 0) |
467 | ret = err; | 472 | ret = err; |
468 | } | 473 | } |
469 | 474 | ||
470 | if (wait) { | 475 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
471 | int err = filemap_fdatawait(mapping); | 476 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
477 | int err = write_inode(inode, wbc); | ||
472 | if (ret == 0) | 478 | if (ret == 0) |
473 | ret = err; | 479 | ret = err; |
474 | } | 480 | } |
@@ -548,134 +554,85 @@ select_queue: | |||
548 | return ret; | 554 | return ret; |
549 | } | 555 | } |
550 | 556 | ||
551 | static void unpin_sb_for_writeback(struct super_block **psb) | 557 | static void unpin_sb_for_writeback(struct super_block *sb) |
552 | { | 558 | { |
553 | struct super_block *sb = *psb; | 559 | up_read(&sb->s_umount); |
554 | 560 | put_super(sb); | |
555 | if (sb) { | ||
556 | up_read(&sb->s_umount); | ||
557 | put_super(sb); | ||
558 | *psb = NULL; | ||
559 | } | ||
560 | } | 561 | } |
561 | 562 | ||
563 | enum sb_pin_state { | ||
564 | SB_PINNED, | ||
565 | SB_NOT_PINNED, | ||
566 | SB_PIN_FAILED | ||
567 | }; | ||
568 | |||
562 | /* | 569 | /* |
563 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 570 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned |
564 | * before calling writeback. So make sure that we do pin it, so it doesn't | 571 | * before calling writeback. So make sure that we do pin it, so it doesn't |
565 | * go away while we are writing inodes from it. | 572 | * go away while we are writing inodes from it. |
566 | * | ||
567 | * Returns 0 if the super was successfully pinned (or pinning wasn't needed), | ||
568 | * 1 if we failed. | ||
569 | */ | 573 | */ |
570 | static int pin_sb_for_writeback(struct writeback_control *wbc, | 574 | static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, |
571 | struct inode *inode, struct super_block **psb) | 575 | struct super_block *sb) |
572 | { | 576 | { |
573 | struct super_block *sb = inode->i_sb; | ||
574 | |||
575 | /* | ||
576 | * If this sb is already pinned, nothing more to do. If not and | ||
577 | * *psb is non-NULL, unpin the old one first | ||
578 | */ | ||
579 | if (sb == *psb) | ||
580 | return 0; | ||
581 | else if (*psb) | ||
582 | unpin_sb_for_writeback(psb); | ||
583 | |||
584 | /* | 577 | /* |
585 | * Caller must already hold the ref for this | 578 | * Caller must already hold the ref for this |
586 | */ | 579 | */ |
587 | if (wbc->sync_mode == WB_SYNC_ALL) { | 580 | if (wbc->sync_mode == WB_SYNC_ALL) { |
588 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
589 | return 0; | 582 | return SB_NOT_PINNED; |
590 | } | 583 | } |
591 | |||
592 | spin_lock(&sb_lock); | 584 | spin_lock(&sb_lock); |
593 | sb->s_count++; | 585 | sb->s_count++; |
594 | if (down_read_trylock(&sb->s_umount)) { | 586 | if (down_read_trylock(&sb->s_umount)) { |
595 | if (sb->s_root) { | 587 | if (sb->s_root) { |
596 | spin_unlock(&sb_lock); | 588 | spin_unlock(&sb_lock); |
597 | goto pinned; | 589 | return SB_PINNED; |
598 | } | 590 | } |
599 | /* | 591 | /* |
600 | * umounted, drop rwsem again and fall through to failure | 592 | * umounted, drop rwsem again and fall through to failure |
601 | */ | 593 | */ |
602 | up_read(&sb->s_umount); | 594 | up_read(&sb->s_umount); |
603 | } | 595 | } |
604 | |||
605 | sb->s_count--; | 596 | sb->s_count--; |
606 | spin_unlock(&sb_lock); | 597 | spin_unlock(&sb_lock); |
607 | return 1; | 598 | return SB_PIN_FAILED; |
608 | pinned: | ||
609 | *psb = sb; | ||
610 | return 0; | ||
611 | } | 599 | } |
612 | 600 | ||
613 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 601 | /* |
614 | struct writeback_control *wbc) | 602 | * Write a portion of b_io inodes which belong to @sb. |
603 | * If @wbc->sb != NULL, then find and write all such | ||
604 | * inodes. Otherwise write only ones which go sequentially | ||
605 | * in reverse order. | ||
606 | * Return 1, if the caller writeback routine should be | ||
607 | * interrupted. Otherwise return 0. | ||
608 | */ | ||
609 | static int writeback_sb_inodes(struct super_block *sb, | ||
610 | struct bdi_writeback *wb, | ||
611 | struct writeback_control *wbc) | ||
615 | { | 612 | { |
616 | struct super_block *sb = wbc->sb, *pin_sb = NULL; | ||
617 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | ||
618 | const unsigned long start = jiffies; /* livelock avoidance */ | ||
619 | |||
620 | spin_lock(&inode_lock); | ||
621 | |||
622 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
623 | queue_io(wb, wbc->older_than_this); | ||
624 | |||
625 | while (!list_empty(&wb->b_io)) { | 613 | while (!list_empty(&wb->b_io)) { |
626 | struct inode *inode = list_entry(wb->b_io.prev, | ||
627 | struct inode, i_list); | ||
628 | long pages_skipped; | 614 | long pages_skipped; |
629 | 615 | struct inode *inode = list_entry(wb->b_io.prev, | |
630 | /* | 616 | struct inode, i_list); |
631 | * super block given and doesn't match, skip this inode | 617 | if (wbc->sb && sb != inode->i_sb) { |
632 | */ | 618 | /* super block given and doesn't |
633 | if (sb && sb != inode->i_sb) { | 619 | match, skip this inode */ |
634 | redirty_tail(inode); | 620 | redirty_tail(inode); |
635 | continue; | 621 | continue; |
636 | } | 622 | } |
637 | 623 | if (sb != inode->i_sb) | |
638 | if (!bdi_cap_writeback_dirty(wb->bdi)) { | 624 | /* finish with this superblock */ |
639 | redirty_tail(inode); | 625 | return 0; |
640 | if (is_blkdev_sb) { | ||
641 | /* | ||
642 | * Dirty memory-backed blockdev: the ramdisk | ||
643 | * driver does this. Skip just this inode | ||
644 | */ | ||
645 | continue; | ||
646 | } | ||
647 | /* | ||
648 | * Dirty memory-backed inode against a filesystem other | ||
649 | * than the kernel-internal bdev filesystem. Skip the | ||
650 | * entire superblock. | ||
651 | */ | ||
652 | break; | ||
653 | } | ||
654 | |||
655 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 626 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
656 | requeue_io(inode); | 627 | requeue_io(inode); |
657 | continue; | 628 | continue; |
658 | } | 629 | } |
659 | |||
660 | if (wbc->nonblocking && bdi_write_congested(wb->bdi)) { | ||
661 | wbc->encountered_congestion = 1; | ||
662 | if (!is_blkdev_sb) | ||
663 | break; /* Skip a congested fs */ | ||
664 | requeue_io(inode); | ||
665 | continue; /* Skip a congested blockdev */ | ||
666 | } | ||
667 | |||
668 | /* | 630 | /* |
669 | * Was this inode dirtied after sync_sb_inodes was called? | 631 | * Was this inode dirtied after sync_sb_inodes was called? |
670 | * This keeps sync from extra jobs and livelock. | 632 | * This keeps sync from extra jobs and livelock. |
671 | */ | 633 | */ |
672 | if (inode_dirtied_after(inode, start)) | 634 | if (inode_dirtied_after(inode, wbc->wb_start)) |
673 | break; | 635 | return 1; |
674 | |||
675 | if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { | ||
676 | requeue_io(inode); | ||
677 | continue; | ||
678 | } | ||
679 | 636 | ||
680 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 637 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); |
681 | __iget(inode); | 638 | __iget(inode); |
@@ -694,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
694 | spin_lock(&inode_lock); | 651 | spin_lock(&inode_lock); |
695 | if (wbc->nr_to_write <= 0) { | 652 | if (wbc->nr_to_write <= 0) { |
696 | wbc->more_io = 1; | 653 | wbc->more_io = 1; |
697 | break; | 654 | return 1; |
698 | } | 655 | } |
699 | if (!list_empty(&wb->b_more_io)) | 656 | if (!list_empty(&wb->b_more_io)) |
700 | wbc->more_io = 1; | 657 | wbc->more_io = 1; |
701 | } | 658 | } |
659 | /* b_io is empty */ | ||
660 | return 1; | ||
661 | } | ||
702 | 662 | ||
703 | unpin_sb_for_writeback(&pin_sb); | 663 | static void writeback_inodes_wb(struct bdi_writeback *wb, |
664 | struct writeback_control *wbc) | ||
665 | { | ||
666 | int ret = 0; | ||
704 | 667 | ||
668 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
669 | spin_lock(&inode_lock); | ||
670 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
671 | queue_io(wb, wbc->older_than_this); | ||
672 | |||
673 | while (!list_empty(&wb->b_io)) { | ||
674 | struct inode *inode = list_entry(wb->b_io.prev, | ||
675 | struct inode, i_list); | ||
676 | struct super_block *sb = inode->i_sb; | ||
677 | enum sb_pin_state state; | ||
678 | |||
679 | if (wbc->sb && sb != wbc->sb) { | ||
680 | /* super block given and doesn't | ||
681 | match, skip this inode */ | ||
682 | redirty_tail(inode); | ||
683 | continue; | ||
684 | } | ||
685 | state = pin_sb_for_writeback(wbc, sb); | ||
686 | |||
687 | if (state == SB_PIN_FAILED) { | ||
688 | requeue_io(inode); | ||
689 | continue; | ||
690 | } | ||
691 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
692 | |||
693 | if (state == SB_PINNED) | ||
694 | unpin_sb_for_writeback(sb); | ||
695 | if (ret) | ||
696 | break; | ||
697 | } | ||
705 | spin_unlock(&inode_lock); | 698 | spin_unlock(&inode_lock); |
706 | /* Leave any unwritten inodes on b_io */ | 699 | /* Leave any unwritten inodes on b_io */ |
707 | } | 700 | } |
@@ -756,6 +749,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
756 | .sync_mode = args->sync_mode, | 749 | .sync_mode = args->sync_mode, |
757 | .older_than_this = NULL, | 750 | .older_than_this = NULL, |
758 | .for_kupdate = args->for_kupdate, | 751 | .for_kupdate = args->for_kupdate, |
752 | .for_background = args->for_background, | ||
759 | .range_cyclic = args->range_cyclic, | 753 | .range_cyclic = args->range_cyclic, |
760 | }; | 754 | }; |
761 | unsigned long oldest_jif; | 755 | unsigned long oldest_jif; |
@@ -787,7 +781,6 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
787 | break; | 781 | break; |
788 | 782 | ||
789 | wbc.more_io = 0; | 783 | wbc.more_io = 0; |
790 | wbc.encountered_congestion = 0; | ||
791 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 784 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
792 | wbc.pages_skipped = 0; | 785 | wbc.pages_skipped = 0; |
793 | writeback_inodes_wb(wb, &wbc); | 786 | writeback_inodes_wb(wb, &wbc); |
@@ -1213,6 +1206,23 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1213 | EXPORT_SYMBOL(writeback_inodes_sb); | 1206 | EXPORT_SYMBOL(writeback_inodes_sb); |
1214 | 1207 | ||
1215 | /** | 1208 | /** |
1209 | * writeback_inodes_sb_if_idle - start writeback if none underway | ||
1210 | * @sb: the superblock | ||
1211 | * | ||
1212 | * Invoke writeback_inodes_sb if no writeback is currently underway. | ||
1213 | * Returns 1 if writeback was started, 0 if not. | ||
1214 | */ | ||
1215 | int writeback_inodes_sb_if_idle(struct super_block *sb) | ||
1216 | { | ||
1217 | if (!writeback_in_progress(sb->s_bdi)) { | ||
1218 | writeback_inodes_sb(sb); | ||
1219 | return 1; | ||
1220 | } else | ||
1221 | return 0; | ||
1222 | } | ||
1223 | EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | ||
1224 | |||
1225 | /** | ||
1216 | * sync_inodes_sb - sync sb inode pages | 1226 | * sync_inodes_sb - sync sb inode pages |
1217 | * @sb: the superblock | 1227 | * @sb: the superblock |
1218 | * | 1228 | * |