diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 174 |
1 files changed, 105 insertions, 69 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 49bc1b8e8f19..4b37f7cea4dd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/slab.h> | ||
19 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
20 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -242,6 +243,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
242 | /** | 243 | /** |
243 | * bdi_start_writeback - start writeback | 244 | * bdi_start_writeback - start writeback |
244 | * @bdi: the backing device to write from | 245 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | ||
245 | * @nr_pages: the number of pages to write | 247 | * @nr_pages: the number of pages to write |
246 | * | 248 | * |
247 | * Description: | 249 | * Description: |
@@ -380,10 +382,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | |||
380 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 382 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
381 | } | 383 | } |
382 | 384 | ||
383 | static int write_inode(struct inode *inode, int sync) | 385 | static int write_inode(struct inode *inode, struct writeback_control *wbc) |
384 | { | 386 | { |
385 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) | 387 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) |
386 | return inode->i_sb->s_op->write_inode(inode, sync); | 388 | return inode->i_sb->s_op->write_inode(inode, wbc); |
387 | return 0; | 389 | return 0; |
388 | } | 390 | } |
389 | 391 | ||
@@ -420,7 +422,6 @@ static int | |||
420 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 422 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
421 | { | 423 | { |
422 | struct address_space *mapping = inode->i_mapping; | 424 | struct address_space *mapping = inode->i_mapping; |
423 | int wait = wbc->sync_mode == WB_SYNC_ALL; | ||
424 | unsigned dirty; | 425 | unsigned dirty; |
425 | int ret; | 426 | int ret; |
426 | 427 | ||
@@ -438,7 +439,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
438 | * We'll have another go at writing back this inode when we | 439 | * We'll have another go at writing back this inode when we |
439 | * completed a full scan of b_io. | 440 | * completed a full scan of b_io. |
440 | */ | 441 | */ |
441 | if (!wait) { | 442 | if (wbc->sync_mode != WB_SYNC_ALL) { |
442 | requeue_io(inode); | 443 | requeue_io(inode); |
443 | return 0; | 444 | return 0; |
444 | } | 445 | } |
@@ -460,15 +461,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
460 | 461 | ||
461 | ret = do_writepages(mapping, wbc); | 462 | ret = do_writepages(mapping, wbc); |
462 | 463 | ||
463 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 464 | /* |
464 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 465 | * Make sure to wait on the data before writing out the metadata. |
465 | int err = write_inode(inode, wait); | 466 | * This is important for filesystems that modify metadata on data |
467 | * I/O completion. | ||
468 | */ | ||
469 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
470 | int err = filemap_fdatawait(mapping); | ||
466 | if (ret == 0) | 471 | if (ret == 0) |
467 | ret = err; | 472 | ret = err; |
468 | } | 473 | } |
469 | 474 | ||
470 | if (wait) { | 475 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
471 | int err = filemap_fdatawait(mapping); | 476 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
477 | int err = write_inode(inode, wbc); | ||
472 | if (ret == 0) | 478 | if (ret == 0) |
473 | ret = err; | 479 | ret = err; |
474 | } | 480 | } |
@@ -548,108 +554,85 @@ select_queue: | |||
548 | return ret; | 554 | return ret; |
549 | } | 555 | } |
550 | 556 | ||
551 | static void unpin_sb_for_writeback(struct super_block **psb) | 557 | static void unpin_sb_for_writeback(struct super_block *sb) |
552 | { | 558 | { |
553 | struct super_block *sb = *psb; | 559 | up_read(&sb->s_umount); |
554 | 560 | put_super(sb); | |
555 | if (sb) { | ||
556 | up_read(&sb->s_umount); | ||
557 | put_super(sb); | ||
558 | *psb = NULL; | ||
559 | } | ||
560 | } | 561 | } |
561 | 562 | ||
563 | enum sb_pin_state { | ||
564 | SB_PINNED, | ||
565 | SB_NOT_PINNED, | ||
566 | SB_PIN_FAILED | ||
567 | }; | ||
568 | |||
562 | /* | 569 | /* |
563 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 570 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned |
564 | * before calling writeback. So make sure that we do pin it, so it doesn't | 571 | * before calling writeback. So make sure that we do pin it, so it doesn't |
565 | * go away while we are writing inodes from it. | 572 | * go away while we are writing inodes from it. |
566 | * | ||
567 | * Returns 0 if the super was successfully pinned (or pinning wasn't needed), | ||
568 | * 1 if we failed. | ||
569 | */ | 573 | */ |
570 | static int pin_sb_for_writeback(struct writeback_control *wbc, | 574 | static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, |
571 | struct inode *inode, struct super_block **psb) | 575 | struct super_block *sb) |
572 | { | 576 | { |
573 | struct super_block *sb = inode->i_sb; | ||
574 | |||
575 | /* | ||
576 | * If this sb is already pinned, nothing more to do. If not and | ||
577 | * *psb is non-NULL, unpin the old one first | ||
578 | */ | ||
579 | if (sb == *psb) | ||
580 | return 0; | ||
581 | else if (*psb) | ||
582 | unpin_sb_for_writeback(psb); | ||
583 | |||
584 | /* | 577 | /* |
585 | * Caller must already hold the ref for this | 578 | * Caller must already hold the ref for this |
586 | */ | 579 | */ |
587 | if (wbc->sync_mode == WB_SYNC_ALL) { | 580 | if (wbc->sync_mode == WB_SYNC_ALL) { |
588 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
589 | return 0; | 582 | return SB_NOT_PINNED; |
590 | } | 583 | } |
591 | |||
592 | spin_lock(&sb_lock); | 584 | spin_lock(&sb_lock); |
593 | sb->s_count++; | 585 | sb->s_count++; |
594 | if (down_read_trylock(&sb->s_umount)) { | 586 | if (down_read_trylock(&sb->s_umount)) { |
595 | if (sb->s_root) { | 587 | if (sb->s_root) { |
596 | spin_unlock(&sb_lock); | 588 | spin_unlock(&sb_lock); |
597 | goto pinned; | 589 | return SB_PINNED; |
598 | } | 590 | } |
599 | /* | 591 | /* |
600 | * umounted, drop rwsem again and fall through to failure | 592 | * umounted, drop rwsem again and fall through to failure |
601 | */ | 593 | */ |
602 | up_read(&sb->s_umount); | 594 | up_read(&sb->s_umount); |
603 | } | 595 | } |
604 | |||
605 | sb->s_count--; | 596 | sb->s_count--; |
606 | spin_unlock(&sb_lock); | 597 | spin_unlock(&sb_lock); |
607 | return 1; | 598 | return SB_PIN_FAILED; |
608 | pinned: | ||
609 | *psb = sb; | ||
610 | return 0; | ||
611 | } | 599 | } |
612 | 600 | ||
613 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 601 | /* |
614 | struct writeback_control *wbc) | 602 | * Write a portion of b_io inodes which belong to @sb. |
603 | * If @wbc->sb != NULL, then find and write all such | ||
604 | * inodes. Otherwise write only ones which go sequentially | ||
605 | * in reverse order. | ||
606 | * Return 1, if the caller writeback routine should be | ||
607 | * interrupted. Otherwise return 0. | ||
608 | */ | ||
609 | static int writeback_sb_inodes(struct super_block *sb, | ||
610 | struct bdi_writeback *wb, | ||
611 | struct writeback_control *wbc) | ||
615 | { | 612 | { |
616 | struct super_block *sb = wbc->sb, *pin_sb = NULL; | ||
617 | const unsigned long start = jiffies; /* livelock avoidance */ | ||
618 | |||
619 | spin_lock(&inode_lock); | ||
620 | |||
621 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
622 | queue_io(wb, wbc->older_than_this); | ||
623 | |||
624 | while (!list_empty(&wb->b_io)) { | 613 | while (!list_empty(&wb->b_io)) { |
625 | struct inode *inode = list_entry(wb->b_io.prev, | ||
626 | struct inode, i_list); | ||
627 | long pages_skipped; | 614 | long pages_skipped; |
628 | 615 | struct inode *inode = list_entry(wb->b_io.prev, | |
629 | /* | 616 | struct inode, i_list); |
630 | * super block given and doesn't match, skip this inode | 617 | if (wbc->sb && sb != inode->i_sb) { |
631 | */ | 618 | /* super block given and doesn't |
632 | if (sb && sb != inode->i_sb) { | 619 | match, skip this inode */ |
633 | redirty_tail(inode); | 620 | redirty_tail(inode); |
634 | continue; | 621 | continue; |
635 | } | 622 | } |
636 | 623 | if (sb != inode->i_sb) | |
624 | /* finish with this superblock */ | ||
625 | return 0; | ||
637 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 626 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
638 | requeue_io(inode); | 627 | requeue_io(inode); |
639 | continue; | 628 | continue; |
640 | } | 629 | } |
641 | |||
642 | /* | 630 | /* |
643 | * Was this inode dirtied after sync_sb_inodes was called? | 631 | * Was this inode dirtied after sync_sb_inodes was called? |
644 | * This keeps sync from extra jobs and livelock. | 632 | * This keeps sync from extra jobs and livelock. |
645 | */ | 633 | */ |
646 | if (inode_dirtied_after(inode, start)) | 634 | if (inode_dirtied_after(inode, wbc->wb_start)) |
647 | break; | 635 | return 1; |
648 | |||
649 | if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { | ||
650 | requeue_io(inode); | ||
651 | continue; | ||
652 | } | ||
653 | 636 | ||
654 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 637 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); |
655 | __iget(inode); | 638 | __iget(inode); |
@@ -668,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
668 | spin_lock(&inode_lock); | 651 | spin_lock(&inode_lock); |
669 | if (wbc->nr_to_write <= 0) { | 652 | if (wbc->nr_to_write <= 0) { |
670 | wbc->more_io = 1; | 653 | wbc->more_io = 1; |
671 | break; | 654 | return 1; |
672 | } | 655 | } |
673 | if (!list_empty(&wb->b_more_io)) | 656 | if (!list_empty(&wb->b_more_io)) |
674 | wbc->more_io = 1; | 657 | wbc->more_io = 1; |
675 | } | 658 | } |
659 | /* b_io is empty */ | ||
660 | return 1; | ||
661 | } | ||
662 | |||
663 | static void writeback_inodes_wb(struct bdi_writeback *wb, | ||
664 | struct writeback_control *wbc) | ||
665 | { | ||
666 | int ret = 0; | ||
676 | 667 | ||
677 | unpin_sb_for_writeback(&pin_sb); | 668 | wbc->wb_start = jiffies; /* livelock avoidance */ |
669 | spin_lock(&inode_lock); | ||
670 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
671 | queue_io(wb, wbc->older_than_this); | ||
678 | 672 | ||
673 | while (!list_empty(&wb->b_io)) { | ||
674 | struct inode *inode = list_entry(wb->b_io.prev, | ||
675 | struct inode, i_list); | ||
676 | struct super_block *sb = inode->i_sb; | ||
677 | enum sb_pin_state state; | ||
678 | |||
679 | if (wbc->sb && sb != wbc->sb) { | ||
680 | /* super block given and doesn't | ||
681 | match, skip this inode */ | ||
682 | redirty_tail(inode); | ||
683 | continue; | ||
684 | } | ||
685 | state = pin_sb_for_writeback(wbc, sb); | ||
686 | |||
687 | if (state == SB_PIN_FAILED) { | ||
688 | requeue_io(inode); | ||
689 | continue; | ||
690 | } | ||
691 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
692 | |||
693 | if (state == SB_PINNED) | ||
694 | unpin_sb_for_writeback(sb); | ||
695 | if (ret) | ||
696 | break; | ||
697 | } | ||
679 | spin_unlock(&inode_lock); | 698 | spin_unlock(&inode_lock); |
680 | /* Leave any unwritten inodes on b_io */ | 699 | /* Leave any unwritten inodes on b_io */ |
681 | } | 700 | } |
@@ -1187,6 +1206,23 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1187 | EXPORT_SYMBOL(writeback_inodes_sb); | 1206 | EXPORT_SYMBOL(writeback_inodes_sb); |
1188 | 1207 | ||
1189 | /** | 1208 | /** |
1209 | * writeback_inodes_sb_if_idle - start writeback if none underway | ||
1210 | * @sb: the superblock | ||
1211 | * | ||
1212 | * Invoke writeback_inodes_sb if no writeback is currently underway. | ||
1213 | * Returns 1 if writeback was started, 0 if not. | ||
1214 | */ | ||
1215 | int writeback_inodes_sb_if_idle(struct super_block *sb) | ||
1216 | { | ||
1217 | if (!writeback_in_progress(sb->s_bdi)) { | ||
1218 | writeback_inodes_sb(sb); | ||
1219 | return 1; | ||
1220 | } else | ||
1221 | return 0; | ||
1222 | } | ||
1223 | EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | ||
1224 | |||
1225 | /** | ||
1190 | * sync_inodes_sb - sync sb inode pages | 1226 | * sync_inodes_sb - sync sb inode pages |
1191 | * @sb: the superblock | 1227 | * @sb: the superblock |
1192 | * | 1228 | * |