aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c174
1 files changed, 105 insertions, 69 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..4b37f7cea4dd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -16,6 +16,7 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/slab.h>
19#include <linux/sched.h> 20#include <linux/sched.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
21#include <linux/mm.h> 22#include <linux/mm.h>
@@ -242,6 +243,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 243/**
243 * bdi_start_writeback - start writeback 244 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 245 * @bdi: the backing device to write from
246 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 247 * @nr_pages: the number of pages to write
246 * 248 *
247 * Description: 249 * Description:
@@ -380,10 +382,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
380 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 382 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
381} 383}
382 384
383static int write_inode(struct inode *inode, int sync) 385static int write_inode(struct inode *inode, struct writeback_control *wbc)
384{ 386{
385 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 387 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
386 return inode->i_sb->s_op->write_inode(inode, sync); 388 return inode->i_sb->s_op->write_inode(inode, wbc);
387 return 0; 389 return 0;
388} 390}
389 391
@@ -420,7 +422,6 @@ static int
420writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 422writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
421{ 423{
422 struct address_space *mapping = inode->i_mapping; 424 struct address_space *mapping = inode->i_mapping;
423 int wait = wbc->sync_mode == WB_SYNC_ALL;
424 unsigned dirty; 425 unsigned dirty;
425 int ret; 426 int ret;
426 427
@@ -438,7 +439,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
438 * We'll have another go at writing back this inode when we 439 * We'll have another go at writing back this inode when we
439 * completed a full scan of b_io. 440 * completed a full scan of b_io.
440 */ 441 */
441 if (!wait) { 442 if (wbc->sync_mode != WB_SYNC_ALL) {
442 requeue_io(inode); 443 requeue_io(inode);
443 return 0; 444 return 0;
444 } 445 }
@@ -460,15 +461,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
460 461
461 ret = do_writepages(mapping, wbc); 462 ret = do_writepages(mapping, wbc);
462 463
463 /* Don't write the inode if only I_DIRTY_PAGES was set */ 464 /*
464 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 465 * Make sure to wait on the data before writing out the metadata.
465 int err = write_inode(inode, wait); 466 * This is important for filesystems that modify metadata on data
467 * I/O completion.
468 */
469 if (wbc->sync_mode == WB_SYNC_ALL) {
470 int err = filemap_fdatawait(mapping);
466 if (ret == 0) 471 if (ret == 0)
467 ret = err; 472 ret = err;
468 } 473 }
469 474
470 if (wait) { 475 /* Don't write the inode if only I_DIRTY_PAGES was set */
471 int err = filemap_fdatawait(mapping); 476 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
477 int err = write_inode(inode, wbc);
472 if (ret == 0) 478 if (ret == 0)
473 ret = err; 479 ret = err;
474 } 480 }
@@ -548,108 +554,85 @@ select_queue:
548 return ret; 554 return ret;
549} 555}
550 556
551static void unpin_sb_for_writeback(struct super_block **psb) 557static void unpin_sb_for_writeback(struct super_block *sb)
552{ 558{
553 struct super_block *sb = *psb; 559 up_read(&sb->s_umount);
554 560 put_super(sb);
555 if (sb) {
556 up_read(&sb->s_umount);
557 put_super(sb);
558 *psb = NULL;
559 }
560} 561}
561 562
563enum sb_pin_state {
564 SB_PINNED,
565 SB_NOT_PINNED,
566 SB_PIN_FAILED
567};
568
562/* 569/*
563 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 570 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
564 * before calling writeback. So make sure that we do pin it, so it doesn't 571 * before calling writeback. So make sure that we do pin it, so it doesn't
565 * go away while we are writing inodes from it. 572 * go away while we are writing inodes from it.
566 *
567 * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
568 * 1 if we failed.
569 */ 573 */
570static int pin_sb_for_writeback(struct writeback_control *wbc, 574static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
571 struct inode *inode, struct super_block **psb) 575 struct super_block *sb)
572{ 576{
573 struct super_block *sb = inode->i_sb;
574
575 /*
576 * If this sb is already pinned, nothing more to do. If not and
577 * *psb is non-NULL, unpin the old one first
578 */
579 if (sb == *psb)
580 return 0;
581 else if (*psb)
582 unpin_sb_for_writeback(psb);
583
584 /* 577 /*
585 * Caller must already hold the ref for this 578 * Caller must already hold the ref for this
586 */ 579 */
587 if (wbc->sync_mode == WB_SYNC_ALL) { 580 if (wbc->sync_mode == WB_SYNC_ALL) {
588 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 581 WARN_ON(!rwsem_is_locked(&sb->s_umount));
589 return 0; 582 return SB_NOT_PINNED;
590 } 583 }
591
592 spin_lock(&sb_lock); 584 spin_lock(&sb_lock);
593 sb->s_count++; 585 sb->s_count++;
594 if (down_read_trylock(&sb->s_umount)) { 586 if (down_read_trylock(&sb->s_umount)) {
595 if (sb->s_root) { 587 if (sb->s_root) {
596 spin_unlock(&sb_lock); 588 spin_unlock(&sb_lock);
597 goto pinned; 589 return SB_PINNED;
598 } 590 }
599 /* 591 /*
600 * umounted, drop rwsem again and fall through to failure 592 * umounted, drop rwsem again and fall through to failure
601 */ 593 */
602 up_read(&sb->s_umount); 594 up_read(&sb->s_umount);
603 } 595 }
604
605 sb->s_count--; 596 sb->s_count--;
606 spin_unlock(&sb_lock); 597 spin_unlock(&sb_lock);
607 return 1; 598 return SB_PIN_FAILED;
608pinned:
609 *psb = sb;
610 return 0;
611} 599}
612 600
613static void writeback_inodes_wb(struct bdi_writeback *wb, 601/*
614 struct writeback_control *wbc) 602 * Write a portion of b_io inodes which belong to @sb.
603 * If @wbc->sb != NULL, then find and write all such
604 * inodes. Otherwise write only ones which go sequentially
605 * in reverse order.
606 * Return 1, if the caller writeback routine should be
607 * interrupted. Otherwise return 0.
608 */
609static int writeback_sb_inodes(struct super_block *sb,
610 struct bdi_writeback *wb,
611 struct writeback_control *wbc)
615{ 612{
616 struct super_block *sb = wbc->sb, *pin_sb = NULL;
617 const unsigned long start = jiffies; /* livelock avoidance */
618
619 spin_lock(&inode_lock);
620
621 if (!wbc->for_kupdate || list_empty(&wb->b_io))
622 queue_io(wb, wbc->older_than_this);
623
624 while (!list_empty(&wb->b_io)) { 613 while (!list_empty(&wb->b_io)) {
625 struct inode *inode = list_entry(wb->b_io.prev,
626 struct inode, i_list);
627 long pages_skipped; 614 long pages_skipped;
628 615 struct inode *inode = list_entry(wb->b_io.prev,
629 /* 616 struct inode, i_list);
630 * super block given and doesn't match, skip this inode 617 if (wbc->sb && sb != inode->i_sb) {
631 */ 618 /* super block given and doesn't
632 if (sb && sb != inode->i_sb) { 619 match, skip this inode */
633 redirty_tail(inode); 620 redirty_tail(inode);
634 continue; 621 continue;
635 } 622 }
636 623 if (sb != inode->i_sb)
624 /* finish with this superblock */
625 return 0;
637 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 626 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
638 requeue_io(inode); 627 requeue_io(inode);
639 continue; 628 continue;
640 } 629 }
641
642 /* 630 /*
643 * Was this inode dirtied after sync_sb_inodes was called? 631 * Was this inode dirtied after sync_sb_inodes was called?
644 * This keeps sync from extra jobs and livelock. 632 * This keeps sync from extra jobs and livelock.
645 */ 633 */
646 if (inode_dirtied_after(inode, start)) 634 if (inode_dirtied_after(inode, wbc->wb_start))
647 break; 635 return 1;
648
649 if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
650 requeue_io(inode);
651 continue;
652 }
653 636
654 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 637 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
655 __iget(inode); 638 __iget(inode);
@@ -668,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
668 spin_lock(&inode_lock); 651 spin_lock(&inode_lock);
669 if (wbc->nr_to_write <= 0) { 652 if (wbc->nr_to_write <= 0) {
670 wbc->more_io = 1; 653 wbc->more_io = 1;
671 break; 654 return 1;
672 } 655 }
673 if (!list_empty(&wb->b_more_io)) 656 if (!list_empty(&wb->b_more_io))
674 wbc->more_io = 1; 657 wbc->more_io = 1;
675 } 658 }
659 /* b_io is empty */
660 return 1;
661}
662
663static void writeback_inodes_wb(struct bdi_writeback *wb,
664 struct writeback_control *wbc)
665{
666 int ret = 0;
676 667
677 unpin_sb_for_writeback(&pin_sb); 668 wbc->wb_start = jiffies; /* livelock avoidance */
669 spin_lock(&inode_lock);
670 if (!wbc->for_kupdate || list_empty(&wb->b_io))
671 queue_io(wb, wbc->older_than_this);
678 672
673 while (!list_empty(&wb->b_io)) {
674 struct inode *inode = list_entry(wb->b_io.prev,
675 struct inode, i_list);
676 struct super_block *sb = inode->i_sb;
677 enum sb_pin_state state;
678
679 if (wbc->sb && sb != wbc->sb) {
680 /* super block given and doesn't
681 match, skip this inode */
682 redirty_tail(inode);
683 continue;
684 }
685 state = pin_sb_for_writeback(wbc, sb);
686
687 if (state == SB_PIN_FAILED) {
688 requeue_io(inode);
689 continue;
690 }
691 ret = writeback_sb_inodes(sb, wb, wbc);
692
693 if (state == SB_PINNED)
694 unpin_sb_for_writeback(sb);
695 if (ret)
696 break;
697 }
679 spin_unlock(&inode_lock); 698 spin_unlock(&inode_lock);
680 /* Leave any unwritten inodes on b_io */ 699 /* Leave any unwritten inodes on b_io */
681} 700}
@@ -1187,6 +1206,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1206EXPORT_SYMBOL(writeback_inodes_sb);
1188 1207
1189/** 1208/**
1209 * writeback_inodes_sb_if_idle - start writeback if none underway
1210 * @sb: the superblock
1211 *
1212 * Invoke writeback_inodes_sb if no writeback is currently underway.
1213 * Returns 1 if writeback was started, 0 if not.
1214 */
1215int writeback_inodes_sb_if_idle(struct super_block *sb)
1216{
1217 if (!writeback_in_progress(sb->s_bdi)) {
1218 writeback_inodes_sb(sb);
1219 return 1;
1220 } else
1221 return 0;
1222}
1223EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1224
1225/**
1190 * sync_inodes_sb - sync sb inode pages 1226 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1227 * @sb: the superblock
1192 * 1228 *