diff options
Diffstat (limited to 'fs/fs-writeback.c')
| -rw-r--r-- | fs/fs-writeback.c | 165 | 
1 files changed, 116 insertions, 49 deletions
| diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e1e5e19d21e..9d5360c4c2af 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -41,8 +41,9 @@ struct wb_writeback_args { | |||
| 41 | long nr_pages; | 41 | long nr_pages; | 
| 42 | struct super_block *sb; | 42 | struct super_block *sb; | 
| 43 | enum writeback_sync_modes sync_mode; | 43 | enum writeback_sync_modes sync_mode; | 
| 44 | int for_kupdate; | 44 | int for_kupdate:1; | 
| 45 | int range_cyclic; | 45 | int range_cyclic:1; | 
| 46 | int for_background:1; | ||
| 46 | }; | 47 | }; | 
| 47 | 48 | ||
| 48 | /* | 49 | /* | 
| @@ -249,14 +250,25 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
| 249 | * completion. Caller need not hold sb s_umount semaphore. | 250 | * completion. Caller need not hold sb s_umount semaphore. | 
| 250 | * | 251 | * | 
| 251 | */ | 252 | */ | 
| 252 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 253 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 
| 254 | long nr_pages) | ||
| 253 | { | 255 | { | 
| 254 | struct wb_writeback_args args = { | 256 | struct wb_writeback_args args = { | 
| 257 | .sb = sb, | ||
| 255 | .sync_mode = WB_SYNC_NONE, | 258 | .sync_mode = WB_SYNC_NONE, | 
| 256 | .nr_pages = nr_pages, | 259 | .nr_pages = nr_pages, | 
| 257 | .range_cyclic = 1, | 260 | .range_cyclic = 1, | 
| 258 | }; | 261 | }; | 
| 259 | 262 | ||
| 263 | /* | ||
| 264 | * We treat @nr_pages=0 as the special case to do background writeback, | ||
| 265 | * ie. to sync pages until the background dirty threshold is reached. | ||
| 266 | */ | ||
| 267 | if (!nr_pages) { | ||
| 268 | args.nr_pages = LONG_MAX; | ||
| 269 | args.for_background = 1; | ||
| 270 | } | ||
| 271 | |||
| 260 | bdi_alloc_queue_work(bdi, &args); | 272 | bdi_alloc_queue_work(bdi, &args); | 
| 261 | } | 273 | } | 
| 262 | 274 | ||
| @@ -310,7 +322,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) | |||
| 310 | * For inodes being constantly redirtied, dirtied_when can get stuck. | 322 | * For inodes being constantly redirtied, dirtied_when can get stuck. | 
| 311 | * It _appears_ to be in the future, but is actually in distant past. | 323 | * It _appears_ to be in the future, but is actually in distant past. | 
| 312 | * This test is necessary to prevent such wrapped-around relative times | 324 | * This test is necessary to prevent such wrapped-around relative times | 
| 313 | * from permanently stopping the whole pdflush writeback. | 325 | * from permanently stopping the whole bdi writeback. | 
| 314 | */ | 326 | */ | 
| 315 | ret = ret && time_before_eq(inode->dirtied_when, jiffies); | 327 | ret = ret && time_before_eq(inode->dirtied_when, jiffies); | 
| 316 | #endif | 328 | #endif | 
| @@ -324,13 +336,38 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
| 324 | struct list_head *dispatch_queue, | 336 | struct list_head *dispatch_queue, | 
| 325 | unsigned long *older_than_this) | 337 | unsigned long *older_than_this) | 
| 326 | { | 338 | { | 
| 339 | LIST_HEAD(tmp); | ||
| 340 | struct list_head *pos, *node; | ||
| 341 | struct super_block *sb = NULL; | ||
| 342 | struct inode *inode; | ||
| 343 | int do_sb_sort = 0; | ||
| 344 | |||
| 327 | while (!list_empty(delaying_queue)) { | 345 | while (!list_empty(delaying_queue)) { | 
| 328 | struct inode *inode = list_entry(delaying_queue->prev, | 346 | inode = list_entry(delaying_queue->prev, struct inode, i_list); | 
| 329 | struct inode, i_list); | ||
| 330 | if (older_than_this && | 347 | if (older_than_this && | 
| 331 | inode_dirtied_after(inode, *older_than_this)) | 348 | inode_dirtied_after(inode, *older_than_this)) | 
| 332 | break; | 349 | break; | 
| 333 | list_move(&inode->i_list, dispatch_queue); | 350 | if (sb && sb != inode->i_sb) | 
| 351 | do_sb_sort = 1; | ||
| 352 | sb = inode->i_sb; | ||
| 353 | list_move(&inode->i_list, &tmp); | ||
| 354 | } | ||
| 355 | |||
| 356 | /* just one sb in list, splice to dispatch_queue and we're done */ | ||
| 357 | if (!do_sb_sort) { | ||
| 358 | list_splice(&tmp, dispatch_queue); | ||
| 359 | return; | ||
| 360 | } | ||
| 361 | |||
| 362 | /* Move inodes from one superblock together */ | ||
| 363 | while (!list_empty(&tmp)) { | ||
| 364 | inode = list_entry(tmp.prev, struct inode, i_list); | ||
| 365 | sb = inode->i_sb; | ||
| 366 | list_for_each_prev_safe(pos, node, &tmp) { | ||
| 367 | inode = list_entry(pos, struct inode, i_list); | ||
| 368 | if (inode->i_sb == sb) | ||
| 369 | list_move(&inode->i_list, dispatch_queue); | ||
| 370 | } | ||
| 334 | } | 371 | } | 
| 335 | } | 372 | } | 
| 336 | 373 | ||
| @@ -439,8 +476,18 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 439 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); | 
| 440 | inode->i_state &= ~I_SYNC; | 477 | inode->i_state &= ~I_SYNC; | 
| 441 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 478 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 
| 442 | if (!(inode->i_state & I_DIRTY) && | 479 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 
| 443 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 480 | /* | 
| 481 | * More pages get dirtied by a fast dirtier. | ||
| 482 | */ | ||
| 483 | goto select_queue; | ||
| 484 | } else if (inode->i_state & I_DIRTY) { | ||
| 485 | /* | ||
| 486 | * At least XFS will redirty the inode during the | ||
| 487 | * writeback (delalloc) and on io completion (isize). | ||
| 488 | */ | ||
| 489 | redirty_tail(inode); | ||
| 490 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
| 444 | /* | 491 | /* | 
| 445 | * We didn't write back all the pages. nfs_writepages() | 492 | * We didn't write back all the pages. nfs_writepages() | 
| 446 | * sometimes bales out without doing anything. Redirty | 493 | * sometimes bales out without doing anything. Redirty | 
| @@ -462,6 +509,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 462 | * soon as the queue becomes uncongested. | 509 | * soon as the queue becomes uncongested. | 
| 463 | */ | 510 | */ | 
| 464 | inode->i_state |= I_DIRTY_PAGES; | 511 | inode->i_state |= I_DIRTY_PAGES; | 
| 512 | select_queue: | ||
| 465 | if (wbc->nr_to_write <= 0) { | 513 | if (wbc->nr_to_write <= 0) { | 
| 466 | /* | 514 | /* | 
| 467 | * slice used up: queue for next turn | 515 | * slice used up: queue for next turn | 
| @@ -484,12 +532,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 484 | inode->i_state |= I_DIRTY_PAGES; | 532 | inode->i_state |= I_DIRTY_PAGES; | 
| 485 | redirty_tail(inode); | 533 | redirty_tail(inode); | 
| 486 | } | 534 | } | 
| 487 | } else if (inode->i_state & I_DIRTY) { | ||
| 488 | /* | ||
| 489 | * Someone redirtied the inode while were writing back | ||
| 490 | * the pages. | ||
| 491 | */ | ||
| 492 | redirty_tail(inode); | ||
| 493 | } else if (atomic_read(&inode->i_count)) { | 535 | } else if (atomic_read(&inode->i_count)) { | 
| 494 | /* | 536 | /* | 
| 495 | * The inode is clean, inuse | 537 | * The inode is clean, inuse | 
| @@ -506,6 +548,17 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 506 | return ret; | 548 | return ret; | 
| 507 | } | 549 | } | 
| 508 | 550 | ||
| 551 | static void unpin_sb_for_writeback(struct super_block **psb) | ||
| 552 | { | ||
| 553 | struct super_block *sb = *psb; | ||
| 554 | |||
| 555 | if (sb) { | ||
| 556 | up_read(&sb->s_umount); | ||
| 557 | put_super(sb); | ||
| 558 | *psb = NULL; | ||
| 559 | } | ||
| 560 | } | ||
| 561 | |||
| 509 | /* | 562 | /* | 
| 510 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 563 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 
| 511 | * before calling writeback. So make sure that we do pin it, so it doesn't | 564 | * before calling writeback. So make sure that we do pin it, so it doesn't | 
| @@ -515,11 +568,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 515 | * 1 if we failed. | 568 | * 1 if we failed. | 
| 516 | */ | 569 | */ | 
| 517 | static int pin_sb_for_writeback(struct writeback_control *wbc, | 570 | static int pin_sb_for_writeback(struct writeback_control *wbc, | 
| 518 | struct inode *inode) | 571 | struct inode *inode, struct super_block **psb) | 
| 519 | { | 572 | { | 
| 520 | struct super_block *sb = inode->i_sb; | 573 | struct super_block *sb = inode->i_sb; | 
| 521 | 574 | ||
| 522 | /* | 575 | /* | 
| 576 | * If this sb is already pinned, nothing more to do. If not and | ||
| 577 | * *psb is non-NULL, unpin the old one first | ||
| 578 | */ | ||
| 579 | if (sb == *psb) | ||
| 580 | return 0; | ||
| 581 | else if (*psb) | ||
| 582 | unpin_sb_for_writeback(psb); | ||
| 583 | |||
| 584 | /* | ||
| 523 | * Caller must already hold the ref for this | 585 | * Caller must already hold the ref for this | 
| 524 | */ | 586 | */ | 
| 525 | if (wbc->sync_mode == WB_SYNC_ALL) { | 587 | if (wbc->sync_mode == WB_SYNC_ALL) { | 
| @@ -532,7 +594,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc, | |||
| 532 | if (down_read_trylock(&sb->s_umount)) { | 594 | if (down_read_trylock(&sb->s_umount)) { | 
| 533 | if (sb->s_root) { | 595 | if (sb->s_root) { | 
| 534 | spin_unlock(&sb_lock); | 596 | spin_unlock(&sb_lock); | 
| 535 | return 0; | 597 | goto pinned; | 
| 536 | } | 598 | } | 
| 537 | /* | 599 | /* | 
| 538 | * umounted, drop rwsem again and fall through to failure | 600 | * umounted, drop rwsem again and fall through to failure | 
| @@ -543,24 +605,15 @@ static int pin_sb_for_writeback(struct writeback_control *wbc, | |||
| 543 | sb->s_count--; | 605 | sb->s_count--; | 
| 544 | spin_unlock(&sb_lock); | 606 | spin_unlock(&sb_lock); | 
| 545 | return 1; | 607 | return 1; | 
| 546 | } | 608 | pinned: | 
| 547 | 609 | *psb = sb; | |
| 548 | static void unpin_sb_for_writeback(struct writeback_control *wbc, | 610 | return 0; | 
| 549 | struct inode *inode) | ||
| 550 | { | ||
| 551 | struct super_block *sb = inode->i_sb; | ||
| 552 | |||
| 553 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 554 | return; | ||
| 555 | |||
| 556 | up_read(&sb->s_umount); | ||
| 557 | put_super(sb); | ||
| 558 | } | 611 | } | 
| 559 | 612 | ||
| 560 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 613 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 
| 561 | struct writeback_control *wbc) | 614 | struct writeback_control *wbc) | 
| 562 | { | 615 | { | 
| 563 | struct super_block *sb = wbc->sb; | 616 | struct super_block *sb = wbc->sb, *pin_sb = NULL; | 
| 564 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | 617 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | 
| 565 | const unsigned long start = jiffies; /* livelock avoidance */ | 618 | const unsigned long start = jiffies; /* livelock avoidance */ | 
| 566 | 619 | ||
| @@ -619,7 +672,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 619 | if (inode_dirtied_after(inode, start)) | 672 | if (inode_dirtied_after(inode, start)) | 
| 620 | break; | 673 | break; | 
| 621 | 674 | ||
| 622 | if (pin_sb_for_writeback(wbc, inode)) { | 675 | if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { | 
| 623 | requeue_io(inode); | 676 | requeue_io(inode); | 
| 624 | continue; | 677 | continue; | 
| 625 | } | 678 | } | 
| @@ -628,7 +681,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 628 | __iget(inode); | 681 | __iget(inode); | 
| 629 | pages_skipped = wbc->pages_skipped; | 682 | pages_skipped = wbc->pages_skipped; | 
| 630 | writeback_single_inode(inode, wbc); | 683 | writeback_single_inode(inode, wbc); | 
| 631 | unpin_sb_for_writeback(wbc, inode); | ||
| 632 | if (wbc->pages_skipped != pages_skipped) { | 684 | if (wbc->pages_skipped != pages_skipped) { | 
| 633 | /* | 685 | /* | 
| 634 | * writeback is not making progress due to locked | 686 | * writeback is not making progress due to locked | 
| @@ -648,6 +700,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 648 | wbc->more_io = 1; | 700 | wbc->more_io = 1; | 
| 649 | } | 701 | } | 
| 650 | 702 | ||
| 703 | unpin_sb_for_writeback(&pin_sb); | ||
| 704 | |||
| 651 | spin_unlock(&inode_lock); | 705 | spin_unlock(&inode_lock); | 
| 652 | /* Leave any unwritten inodes on b_io */ | 706 | /* Leave any unwritten inodes on b_io */ | 
| 653 | } | 707 | } | 
| @@ -706,6 +760,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 706 | }; | 760 | }; | 
| 707 | unsigned long oldest_jif; | 761 | unsigned long oldest_jif; | 
| 708 | long wrote = 0; | 762 | long wrote = 0; | 
| 763 | struct inode *inode; | ||
| 709 | 764 | ||
| 710 | if (wbc.for_kupdate) { | 765 | if (wbc.for_kupdate) { | 
| 711 | wbc.older_than_this = &oldest_jif; | 766 | wbc.older_than_this = &oldest_jif; | 
| @@ -719,20 +774,16 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 719 | 774 | ||
| 720 | for (;;) { | 775 | for (;;) { | 
| 721 | /* | 776 | /* | 
| 722 | * Don't flush anything for non-integrity writeback where | 777 | * Stop writeback when nr_pages has been consumed | 
| 723 | * no nr_pages was given | ||
| 724 | */ | 778 | */ | 
| 725 | if (!args->for_kupdate && args->nr_pages <= 0 && | 779 | if (args->nr_pages <= 0) | 
| 726 | args->sync_mode == WB_SYNC_NONE) | ||
| 727 | break; | 780 | break; | 
| 728 | 781 | ||
| 729 | /* | 782 | /* | 
| 730 | * If no specific pages were given and this is just a | 783 | * For background writeout, stop when we are below the | 
| 731 | * periodic background writeout and we are below the | 784 | * background dirty threshold | 
| 732 | * background dirty threshold, don't do anything | ||
| 733 | */ | 785 | */ | 
| 734 | if (args->for_kupdate && args->nr_pages <= 0 && | 786 | if (args->for_background && !over_bground_thresh()) | 
| 735 | !over_bground_thresh()) | ||
| 736 | break; | 787 | break; | 
| 737 | 788 | ||
| 738 | wbc.more_io = 0; | 789 | wbc.more_io = 0; | 
| @@ -744,13 +795,32 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 744 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 795 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 
| 745 | 796 | ||
| 746 | /* | 797 | /* | 
| 747 | * If we ran out of stuff to write, bail unless more_io got set | 798 | * If we consumed everything, see if we have more | 
| 748 | */ | 799 | */ | 
| 749 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | 800 | if (wbc.nr_to_write <= 0) | 
| 750 | if (wbc.more_io && !wbc.for_kupdate) | 801 | continue; | 
| 751 | continue; | 802 | /* | 
| 803 | * Didn't write everything and we don't have more IO, bail | ||
| 804 | */ | ||
| 805 | if (!wbc.more_io) | ||
| 752 | break; | 806 | break; | 
| 807 | /* | ||
| 808 | * Did we write something? Try for more | ||
| 809 | */ | ||
| 810 | if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) | ||
| 811 | continue; | ||
| 812 | /* | ||
| 813 | * Nothing written. Wait for some inode to | ||
| 814 | * become available for writeback. Otherwise | ||
| 815 | * we'll just busyloop. | ||
| 816 | */ | ||
| 817 | spin_lock(&inode_lock); | ||
| 818 | if (!list_empty(&wb->b_more_io)) { | ||
| 819 | inode = list_entry(wb->b_more_io.prev, | ||
| 820 | struct inode, i_list); | ||
| 821 | inode_wait_for_writeback(inode); | ||
| 753 | } | 822 | } | 
| 823 | spin_unlock(&inode_lock); | ||
| 754 | } | 824 | } | 
| 755 | 825 | ||
| 756 | return wrote; | 826 | return wrote; | 
| @@ -1060,9 +1130,6 @@ EXPORT_SYMBOL(__mark_inode_dirty); | |||
| 1060 | * If older_than_this is non-NULL, then only write out inodes which | 1130 | * If older_than_this is non-NULL, then only write out inodes which | 
| 1061 | * had their first dirtying at a time earlier than *older_than_this. | 1131 | * had their first dirtying at a time earlier than *older_than_this. | 
| 1062 | * | 1132 | * | 
| 1063 | * If we're a pdlfush thread, then implement pdflush collision avoidance | ||
| 1064 | * against the entire list. | ||
| 1065 | * | ||
| 1066 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 1133 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 
| 1067 | * This function assumes that the blockdev superblock's inodes are backed by | 1134 | * This function assumes that the blockdev superblock's inodes are backed by | 
| 1068 | * a variety of queues, so all inodes are searched. For other superblocks, | 1135 | * a variety of queues, so all inodes are searched. For other superblocks, | 
| @@ -1141,7 +1208,7 @@ void writeback_inodes_sb(struct super_block *sb) | |||
| 1141 | nr_to_write = nr_dirty + nr_unstable + | 1208 | nr_to_write = nr_dirty + nr_unstable + | 
| 1142 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1209 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 
| 1143 | 1210 | ||
| 1144 | bdi_writeback_all(sb, nr_to_write); | 1211 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | 
| 1145 | } | 1212 | } | 
| 1146 | EXPORT_SYMBOL(writeback_inodes_sb); | 1213 | EXPORT_SYMBOL(writeback_inodes_sb); | 
| 1147 | 1214 | ||
