diff options
Diffstat (limited to 'fs/fs-writeback.c')
| -rw-r--r-- | fs/fs-writeback.c | 255 |
1 files changed, 143 insertions, 112 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d5be1693ac93..7d9d06ba184b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -26,15 +26,9 @@ | |||
| 26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
| 27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
| 28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
| 29 | #include <linux/tracepoint.h> | ||
| 29 | #include "internal.h" | 30 | #include "internal.h" |
| 30 | 31 | ||
| 31 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
| 32 | |||
| 33 | /* | ||
| 34 | * We don't actually have pdflush, but this one is exported though /proc... | ||
| 35 | */ | ||
| 36 | int nr_pdflush_threads; | ||
| 37 | |||
| 38 | /* | 32 | /* |
| 39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 33 | * Passed into wb_writeback(), essentially a subset of writeback_control |
| 40 | */ | 34 | */ |
| @@ -50,6 +44,21 @@ struct wb_writeback_work { | |||
| 50 | struct completion *done; /* set if the caller waits */ | 44 | struct completion *done; /* set if the caller waits */ |
| 51 | }; | 45 | }; |
| 52 | 46 | ||
| 47 | /* | ||
| 48 | * Include the creation of the trace points after defining the | ||
| 49 | * wb_writeback_work structure so that the definition remains local to this | ||
| 50 | * file. | ||
| 51 | */ | ||
| 52 | #define CREATE_TRACE_POINTS | ||
| 53 | #include <trace/events/writeback.h> | ||
| 54 | |||
| 55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
| 56 | |||
| 57 | /* | ||
| 58 | * We don't actually have pdflush, but this one is exported though /proc... | ||
| 59 | */ | ||
| 60 | int nr_pdflush_threads; | ||
| 61 | |||
| 53 | /** | 62 | /** |
| 54 | * writeback_in_progress - determine whether there is writeback in progress | 63 | * writeback_in_progress - determine whether there is writeback in progress |
| 55 | * @bdi: the device's backing_dev_info structure. | 64 | * @bdi: the device's backing_dev_info structure. |
| @@ -59,28 +68,27 @@ struct wb_writeback_work { | |||
| 59 | */ | 68 | */ |
| 60 | int writeback_in_progress(struct backing_dev_info *bdi) | 69 | int writeback_in_progress(struct backing_dev_info *bdi) |
| 61 | { | 70 | { |
| 62 | return !list_empty(&bdi->work_list); | 71 | return test_bit(BDI_writeback_running, &bdi->state); |
| 63 | } | 72 | } |
| 64 | 73 | ||
| 65 | static void bdi_queue_work(struct backing_dev_info *bdi, | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
| 66 | struct wb_writeback_work *work) | 75 | struct wb_writeback_work *work) |
| 67 | { | 76 | { |
| 68 | spin_lock(&bdi->wb_lock); | 77 | trace_writeback_queue(bdi, work); |
| 69 | list_add_tail(&work->list, &bdi->work_list); | ||
| 70 | spin_unlock(&bdi->wb_lock); | ||
| 71 | 78 | ||
| 72 | /* | 79 | spin_lock_bh(&bdi->wb_lock); |
| 73 | * If the default thread isn't there, make sure we add it. When | 80 | list_add_tail(&work->list, &bdi->work_list); |
| 74 | * it gets created and wakes up, we'll run this work. | 81 | if (bdi->wb.task) { |
| 75 | */ | 82 | wake_up_process(bdi->wb.task); |
| 76 | if (unlikely(list_empty_careful(&bdi->wb_list))) | 83 | } else { |
| 84 | /* | ||
| 85 | * The bdi thread isn't there, wake up the forker thread which | ||
| 86 | * will create and run it. | ||
| 87 | */ | ||
| 88 | trace_writeback_nothread(bdi, work); | ||
| 77 | wake_up_process(default_backing_dev_info.wb.task); | 89 | wake_up_process(default_backing_dev_info.wb.task); |
| 78 | else { | ||
| 79 | struct bdi_writeback *wb = &bdi->wb; | ||
| 80 | |||
| 81 | if (wb->task) | ||
| 82 | wake_up_process(wb->task); | ||
| 83 | } | 90 | } |
| 91 | spin_unlock_bh(&bdi->wb_lock); | ||
| 84 | } | 92 | } |
| 85 | 93 | ||
| 86 | static void | 94 | static void |
| @@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
| 95 | */ | 103 | */ |
| 96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 104 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
| 97 | if (!work) { | 105 | if (!work) { |
| 98 | if (bdi->wb.task) | 106 | if (bdi->wb.task) { |
| 107 | trace_writeback_nowork(bdi); | ||
| 99 | wake_up_process(bdi->wb.task); | 108 | wake_up_process(bdi->wb.task); |
| 109 | } | ||
| 100 | return; | 110 | return; |
| 101 | } | 111 | } |
| 102 | 112 | ||
| @@ -239,10 +249,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
| 239 | 249 | ||
| 240 | /* | 250 | /* |
| 241 | * Queue all expired dirty inodes for io, eldest first. | 251 | * Queue all expired dirty inodes for io, eldest first. |
| 252 | * Before | ||
| 253 | * newly dirtied b_dirty b_io b_more_io | ||
| 254 | * =============> gf edc BA | ||
| 255 | * After | ||
| 256 | * newly dirtied b_dirty b_io b_more_io | ||
| 257 | * =============> g fBAedc | ||
| 258 | * | | ||
| 259 | * +--> dequeue for IO | ||
| 242 | */ | 260 | */ |
| 243 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 261 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
| 244 | { | 262 | { |
| 245 | list_splice_init(&wb->b_more_io, wb->b_io.prev); | 263 | list_splice_init(&wb->b_more_io, &wb->b_io); |
| 246 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 264 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
| 247 | } | 265 | } |
| 248 | 266 | ||
| @@ -352,63 +370,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 352 | 370 | ||
| 353 | spin_lock(&inode_lock); | 371 | spin_lock(&inode_lock); |
| 354 | inode->i_state &= ~I_SYNC; | 372 | inode->i_state &= ~I_SYNC; |
| 355 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 373 | if (!(inode->i_state & I_FREEING)) { |
| 356 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 374 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
| 357 | /* | ||
| 358 | * More pages get dirtied by a fast dirtier. | ||
| 359 | */ | ||
| 360 | goto select_queue; | ||
| 361 | } else if (inode->i_state & I_DIRTY) { | ||
| 362 | /* | ||
| 363 | * At least XFS will redirty the inode during the | ||
| 364 | * writeback (delalloc) and on io completion (isize). | ||
| 365 | */ | ||
| 366 | redirty_tail(inode); | ||
| 367 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
| 368 | /* | 375 | /* |
| 369 | * We didn't write back all the pages. nfs_writepages() | 376 | * We didn't write back all the pages. nfs_writepages() |
| 370 | * sometimes bales out without doing anything. Redirty | 377 | * sometimes bales out without doing anything. |
| 371 | * the inode; Move it from b_io onto b_more_io/b_dirty. | ||
| 372 | */ | ||
| 373 | /* | ||
| 374 | * akpm: if the caller was the kupdate function we put | ||
| 375 | * this inode at the head of b_dirty so it gets first | ||
| 376 | * consideration. Otherwise, move it to the tail, for | ||
| 377 | * the reasons described there. I'm not really sure | ||
| 378 | * how much sense this makes. Presumably I had a good | ||
| 379 | * reasons for doing it this way, and I'd rather not | ||
| 380 | * muck with it at present. | ||
| 381 | */ | 378 | */ |
| 382 | if (wbc->for_kupdate) { | 379 | inode->i_state |= I_DIRTY_PAGES; |
| 380 | if (wbc->nr_to_write <= 0) { | ||
| 383 | /* | 381 | /* |
| 384 | * For the kupdate function we move the inode | 382 | * slice used up: queue for next turn |
| 385 | * to b_more_io so it will get more writeout as | ||
| 386 | * soon as the queue becomes uncongested. | ||
| 387 | */ | 383 | */ |
| 388 | inode->i_state |= I_DIRTY_PAGES; | 384 | requeue_io(inode); |
| 389 | select_queue: | ||
| 390 | if (wbc->nr_to_write <= 0) { | ||
| 391 | /* | ||
| 392 | * slice used up: queue for next turn | ||
| 393 | */ | ||
| 394 | requeue_io(inode); | ||
| 395 | } else { | ||
| 396 | /* | ||
| 397 | * somehow blocked: retry later | ||
| 398 | */ | ||
| 399 | redirty_tail(inode); | ||
| 400 | } | ||
| 401 | } else { | 385 | } else { |
| 402 | /* | 386 | /* |
| 403 | * Otherwise fully redirty the inode so that | 387 | * Writeback blocked by something other than |
| 404 | * other inodes on this superblock will get some | 388 | * congestion. Delay the inode for some time to |
| 405 | * writeout. Otherwise heavy writing to one | 389 | * avoid spinning on the CPU (100% iowait) |
| 406 | * file would indefinitely suspend writeout of | 390 | * retrying writeback of the dirty page/inode |
| 407 | * all the other files. | 391 | * that cannot be performed immediately. |
| 408 | */ | 392 | */ |
| 409 | inode->i_state |= I_DIRTY_PAGES; | ||
| 410 | redirty_tail(inode); | 393 | redirty_tail(inode); |
| 411 | } | 394 | } |
| 395 | } else if (inode->i_state & I_DIRTY) { | ||
| 396 | /* | ||
| 397 | * Filesystems can dirty the inode during writeback | ||
| 398 | * operations, such as delayed allocation during | ||
| 399 | * submission or metadata updates after data IO | ||
| 400 | * completion. | ||
| 401 | */ | ||
| 402 | redirty_tail(inode); | ||
| 412 | } else if (atomic_read(&inode->i_count)) { | 403 | } else if (atomic_read(&inode->i_count)) { |
| 413 | /* | 404 | /* |
| 414 | * The inode is clean, inuse | 405 | * The inode is clean, inuse |
| @@ -499,7 +490,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
| 499 | if (inode_dirtied_after(inode, wbc->wb_start)) | 490 | if (inode_dirtied_after(inode, wbc->wb_start)) |
| 500 | return 1; | 491 | return 1; |
| 501 | 492 | ||
| 502 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 493 | BUG_ON(inode->i_state & I_FREEING); |
| 503 | __iget(inode); | 494 | __iget(inode); |
| 504 | pages_skipped = wbc->pages_skipped; | 495 | pages_skipped = wbc->pages_skipped; |
| 505 | writeback_single_inode(inode, wbc); | 496 | writeback_single_inode(inode, wbc); |
| @@ -530,7 +521,8 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 530 | { | 521 | { |
| 531 | int ret = 0; | 522 | int ret = 0; |
| 532 | 523 | ||
| 533 | wbc->wb_start = jiffies; /* livelock avoidance */ | 524 | if (!wbc->wb_start) |
| 525 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
| 534 | spin_lock(&inode_lock); | 526 | spin_lock(&inode_lock); |
| 535 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 527 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
| 536 | queue_io(wb, wbc->older_than_this); | 528 | queue_io(wb, wbc->older_than_this); |
| @@ -559,7 +551,6 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
| 559 | { | 551 | { |
| 560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 552 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 561 | 553 | ||
| 562 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
| 563 | spin_lock(&inode_lock); | 554 | spin_lock(&inode_lock); |
| 564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 555 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
| 565 | queue_io(wb, wbc->older_than_this); | 556 | queue_io(wb, wbc->older_than_this); |
| @@ -580,7 +571,7 @@ static inline bool over_bground_thresh(void) | |||
| 580 | { | 571 | { |
| 581 | unsigned long background_thresh, dirty_thresh; | 572 | unsigned long background_thresh, dirty_thresh; |
| 582 | 573 | ||
| 583 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 574 | global_dirty_limits(&background_thresh, &dirty_thresh); |
| 584 | 575 | ||
| 585 | return (global_page_state(NR_FILE_DIRTY) + | 576 | return (global_page_state(NR_FILE_DIRTY) + |
| 586 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 577 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); |
| @@ -625,6 +616,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 625 | wbc.range_end = LLONG_MAX; | 616 | wbc.range_end = LLONG_MAX; |
| 626 | } | 617 | } |
| 627 | 618 | ||
| 619 | wbc.wb_start = jiffies; /* livelock avoidance */ | ||
| 628 | for (;;) { | 620 | for (;;) { |
| 629 | /* | 621 | /* |
| 630 | * Stop writeback when nr_pages has been consumed | 622 | * Stop writeback when nr_pages has been consumed |
| @@ -642,10 +634,14 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 642 | wbc.more_io = 0; | 634 | wbc.more_io = 0; |
| 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 635 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
| 644 | wbc.pages_skipped = 0; | 636 | wbc.pages_skipped = 0; |
| 637 | |||
| 638 | trace_wbc_writeback_start(&wbc, wb->bdi); | ||
| 645 | if (work->sb) | 639 | if (work->sb) |
| 646 | __writeback_inodes_sb(work->sb, wb, &wbc); | 640 | __writeback_inodes_sb(work->sb, wb, &wbc); |
| 647 | else | 641 | else |
| 648 | writeback_inodes_wb(wb, &wbc); | 642 | writeback_inodes_wb(wb, &wbc); |
| 643 | trace_wbc_writeback_written(&wbc, wb->bdi); | ||
| 644 | |||
| 649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 645 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
| 650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 646 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
| 651 | 647 | ||
| @@ -673,6 +669,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 673 | if (!list_empty(&wb->b_more_io)) { | 669 | if (!list_empty(&wb->b_more_io)) { |
| 674 | inode = list_entry(wb->b_more_io.prev, | 670 | inode = list_entry(wb->b_more_io.prev, |
| 675 | struct inode, i_list); | 671 | struct inode, i_list); |
| 672 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
| 676 | inode_wait_for_writeback(inode); | 673 | inode_wait_for_writeback(inode); |
| 677 | } | 674 | } |
| 678 | spin_unlock(&inode_lock); | 675 | spin_unlock(&inode_lock); |
| @@ -685,17 +682,17 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 685 | * Return the next wb_writeback_work struct that hasn't been processed yet. | 682 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
| 686 | */ | 683 | */ |
| 687 | static struct wb_writeback_work * | 684 | static struct wb_writeback_work * |
| 688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) | 685 | get_next_work_item(struct backing_dev_info *bdi) |
| 689 | { | 686 | { |
| 690 | struct wb_writeback_work *work = NULL; | 687 | struct wb_writeback_work *work = NULL; |
| 691 | 688 | ||
| 692 | spin_lock(&bdi->wb_lock); | 689 | spin_lock_bh(&bdi->wb_lock); |
| 693 | if (!list_empty(&bdi->work_list)) { | 690 | if (!list_empty(&bdi->work_list)) { |
| 694 | work = list_entry(bdi->work_list.next, | 691 | work = list_entry(bdi->work_list.next, |
| 695 | struct wb_writeback_work, list); | 692 | struct wb_writeback_work, list); |
| 696 | list_del_init(&work->list); | 693 | list_del_init(&work->list); |
| 697 | } | 694 | } |
| 698 | spin_unlock(&bdi->wb_lock); | 695 | spin_unlock_bh(&bdi->wb_lock); |
| 699 | return work; | 696 | return work; |
| 700 | } | 697 | } |
| 701 | 698 | ||
| @@ -743,7 +740,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 743 | struct wb_writeback_work *work; | 740 | struct wb_writeback_work *work; |
| 744 | long wrote = 0; | 741 | long wrote = 0; |
| 745 | 742 | ||
| 746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 743 | set_bit(BDI_writeback_running, &wb->bdi->state); |
| 744 | while ((work = get_next_work_item(bdi)) != NULL) { | ||
| 747 | /* | 745 | /* |
| 748 | * Override sync mode, in case we must wait for completion | 746 | * Override sync mode, in case we must wait for completion |
| 749 | * because this thread is exiting now. | 747 | * because this thread is exiting now. |
| @@ -751,6 +749,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 751 | if (force_wait) | 749 | if (force_wait) |
| 752 | work->sync_mode = WB_SYNC_ALL; | 750 | work->sync_mode = WB_SYNC_ALL; |
| 753 | 751 | ||
| 752 | trace_writeback_exec(bdi, work); | ||
| 753 | |||
| 754 | wrote += wb_writeback(wb, work); | 754 | wrote += wb_writeback(wb, work); |
| 755 | 755 | ||
| 756 | /* | 756 | /* |
| @@ -767,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 767 | * Check for periodic writeback, kupdated() style | 767 | * Check for periodic writeback, kupdated() style |
| 768 | */ | 768 | */ |
| 769 | wrote += wb_check_old_data_flush(wb); | 769 | wrote += wb_check_old_data_flush(wb); |
| 770 | clear_bit(BDI_writeback_running, &wb->bdi->state); | ||
| 770 | 771 | ||
| 771 | return wrote; | 772 | return wrote; |
| 772 | } | 773 | } |
| @@ -775,47 +776,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 775 | * Handle writeback of dirty data for the device backed by this bdi. Also | 776 | * Handle writeback of dirty data for the device backed by this bdi. Also |
| 776 | * wakes up periodically and does kupdated style flushing. | 777 | * wakes up periodically and does kupdated style flushing. |
| 777 | */ | 778 | */ |
| 778 | int bdi_writeback_task(struct bdi_writeback *wb) | 779 | int bdi_writeback_thread(void *data) |
| 779 | { | 780 | { |
| 780 | unsigned long last_active = jiffies; | 781 | struct bdi_writeback *wb = data; |
| 781 | unsigned long wait_jiffies = -1UL; | 782 | struct backing_dev_info *bdi = wb->bdi; |
| 782 | long pages_written; | 783 | long pages_written; |
| 783 | 784 | ||
| 785 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
| 786 | set_freezable(); | ||
| 787 | wb->last_active = jiffies; | ||
| 788 | |||
| 789 | /* | ||
| 790 | * Our parent may run at a different priority, just set us to normal | ||
| 791 | */ | ||
| 792 | set_user_nice(current, 0); | ||
| 793 | |||
| 794 | trace_writeback_thread_start(bdi); | ||
| 795 | |||
| 784 | while (!kthread_should_stop()) { | 796 | while (!kthread_should_stop()) { |
| 797 | /* | ||
| 798 | * Remove own delayed wake-up timer, since we are already awake | ||
| 799 | * and we'll take care of the preriodic write-back. | ||
| 800 | */ | ||
| 801 | del_timer(&wb->wakeup_timer); | ||
| 802 | |||
| 785 | pages_written = wb_do_writeback(wb, 0); | 803 | pages_written = wb_do_writeback(wb, 0); |
| 786 | 804 | ||
| 805 | trace_writeback_pages_written(pages_written); | ||
| 806 | |||
| 787 | if (pages_written) | 807 | if (pages_written) |
| 788 | last_active = jiffies; | 808 | wb->last_active = jiffies; |
| 789 | else if (wait_jiffies != -1UL) { | ||
| 790 | unsigned long max_idle; | ||
| 791 | 809 | ||
| 792 | /* | 810 | set_current_state(TASK_INTERRUPTIBLE); |
| 793 | * Longest period of inactivity that we tolerate. If we | 811 | if (!list_empty(&bdi->work_list)) { |
| 794 | * see dirty data again later, the task will get | 812 | __set_current_state(TASK_RUNNING); |
| 795 | * recreated automatically. | 813 | continue; |
| 796 | */ | ||
| 797 | max_idle = max(5UL * 60 * HZ, wait_jiffies); | ||
| 798 | if (time_after(jiffies, max_idle + last_active)) | ||
| 799 | break; | ||
| 800 | } | 814 | } |
| 801 | 815 | ||
| 802 | if (dirty_writeback_interval) { | 816 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) |
| 803 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 817 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); |
| 804 | schedule_timeout_interruptible(wait_jiffies); | 818 | else { |
| 805 | } else { | 819 | /* |
| 806 | set_current_state(TASK_INTERRUPTIBLE); | 820 | * We have nothing to do, so can go sleep without any |
| 807 | if (list_empty_careful(&wb->bdi->work_list) && | 821 | * timeout and save power. When a work is queued or |
| 808 | !kthread_should_stop()) | 822 | * something is made dirty - we will be woken up. |
| 809 | schedule(); | 823 | */ |
| 810 | __set_current_state(TASK_RUNNING); | 824 | schedule(); |
| 811 | } | 825 | } |
| 812 | 826 | ||
| 813 | try_to_freeze(); | 827 | try_to_freeze(); |
| 814 | } | 828 | } |
| 815 | 829 | ||
| 830 | /* Flush any work that raced with us exiting */ | ||
| 831 | if (!list_empty(&bdi->work_list)) | ||
| 832 | wb_do_writeback(wb, 1); | ||
| 833 | |||
| 834 | trace_writeback_thread_stop(bdi); | ||
| 816 | return 0; | 835 | return 0; |
| 817 | } | 836 | } |
| 818 | 837 | ||
| 838 | |||
| 819 | /* | 839 | /* |
| 820 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 840 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
| 821 | * the whole world. | 841 | * the whole world. |
| @@ -890,6 +910,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
| 890 | void __mark_inode_dirty(struct inode *inode, int flags) | 910 | void __mark_inode_dirty(struct inode *inode, int flags) |
| 891 | { | 911 | { |
| 892 | struct super_block *sb = inode->i_sb; | 912 | struct super_block *sb = inode->i_sb; |
| 913 | struct backing_dev_info *bdi = NULL; | ||
| 914 | bool wakeup_bdi = false; | ||
| 893 | 915 | ||
| 894 | /* | 916 | /* |
| 895 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 917 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
| @@ -935,7 +957,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
| 935 | if (hlist_unhashed(&inode->i_hash)) | 957 | if (hlist_unhashed(&inode->i_hash)) |
| 936 | goto out; | 958 | goto out; |
| 937 | } | 959 | } |
| 938 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 960 | if (inode->i_state & I_FREEING) |
| 939 | goto out; | 961 | goto out; |
| 940 | 962 | ||
| 941 | /* | 963 | /* |
| @@ -943,22 +965,31 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
| 943 | * reposition it (that would break b_dirty time-ordering). | 965 | * reposition it (that would break b_dirty time-ordering). |
| 944 | */ | 966 | */ |
| 945 | if (!was_dirty) { | 967 | if (!was_dirty) { |
| 946 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 968 | bdi = inode_to_bdi(inode); |
| 947 | struct backing_dev_info *bdi = wb->bdi; | 969 | |
| 948 | 970 | if (bdi_cap_writeback_dirty(bdi)) { | |
| 949 | if (bdi_cap_writeback_dirty(bdi) && | 971 | WARN(!test_bit(BDI_registered, &bdi->state), |
| 950 | !test_bit(BDI_registered, &bdi->state)) { | 972 | "bdi-%s not registered\n", bdi->name); |
| 951 | WARN_ON(1); | 973 | |
| 952 | printk(KERN_ERR "bdi-%s not registered\n", | 974 | /* |
| 953 | bdi->name); | 975 | * If this is the first dirty inode for this |
| 976 | * bdi, we have to wake-up the corresponding | ||
| 977 | * bdi thread to make sure background | ||
| 978 | * write-back happens later. | ||
| 979 | */ | ||
| 980 | if (!wb_has_dirty_io(&bdi->wb)) | ||
| 981 | wakeup_bdi = true; | ||
| 954 | } | 982 | } |
| 955 | 983 | ||
| 956 | inode->dirtied_when = jiffies; | 984 | inode->dirtied_when = jiffies; |
| 957 | list_move(&inode->i_list, &wb->b_dirty); | 985 | list_move(&inode->i_list, &bdi->wb.b_dirty); |
| 958 | } | 986 | } |
| 959 | } | 987 | } |
| 960 | out: | 988 | out: |
| 961 | spin_unlock(&inode_lock); | 989 | spin_unlock(&inode_lock); |
| 990 | |||
| 991 | if (wakeup_bdi) | ||
| 992 | bdi_wakeup_thread_delayed(bdi); | ||
| 962 | } | 993 | } |
| 963 | EXPORT_SYMBOL(__mark_inode_dirty); | 994 | EXPORT_SYMBOL(__mark_inode_dirty); |
| 964 | 995 | ||
| @@ -1001,7 +1032,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
| 1001 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1032 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
| 1002 | struct address_space *mapping; | 1033 | struct address_space *mapping; |
| 1003 | 1034 | ||
| 1004 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1035 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
| 1005 | continue; | 1036 | continue; |
| 1006 | mapping = inode->i_mapping; | 1037 | mapping = inode->i_mapping; |
| 1007 | if (mapping->nrpages == 0) | 1038 | if (mapping->nrpages == 0) |
