diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 255 |
1 files changed, 143 insertions, 112 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d5be1693ac9..7d9d06ba184 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -26,15 +26,9 @@ | |||
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/tracepoint.h> | ||
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
31 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
32 | |||
33 | /* | ||
34 | * We don't actually have pdflush, but this one is exported though /proc... | ||
35 | */ | ||
36 | int nr_pdflush_threads; | ||
37 | |||
38 | /* | 32 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 33 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 34 | */ |
@@ -50,6 +44,21 @@ struct wb_writeback_work { | |||
50 | struct completion *done; /* set if the caller waits */ | 44 | struct completion *done; /* set if the caller waits */ |
51 | }; | 45 | }; |
52 | 46 | ||
47 | /* | ||
48 | * Include the creation of the trace points after defining the | ||
49 | * wb_writeback_work structure so that the definition remains local to this | ||
50 | * file. | ||
51 | */ | ||
52 | #define CREATE_TRACE_POINTS | ||
53 | #include <trace/events/writeback.h> | ||
54 | |||
55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
56 | |||
57 | /* | ||
58 | * We don't actually have pdflush, but this one is exported though /proc... | ||
59 | */ | ||
60 | int nr_pdflush_threads; | ||
61 | |||
53 | /** | 62 | /** |
54 | * writeback_in_progress - determine whether there is writeback in progress | 63 | * writeback_in_progress - determine whether there is writeback in progress |
55 | * @bdi: the device's backing_dev_info structure. | 64 | * @bdi: the device's backing_dev_info structure. |
@@ -59,28 +68,27 @@ struct wb_writeback_work { | |||
59 | */ | 68 | */ |
60 | int writeback_in_progress(struct backing_dev_info *bdi) | 69 | int writeback_in_progress(struct backing_dev_info *bdi) |
61 | { | 70 | { |
62 | return !list_empty(&bdi->work_list); | 71 | return test_bit(BDI_writeback_running, &bdi->state); |
63 | } | 72 | } |
64 | 73 | ||
65 | static void bdi_queue_work(struct backing_dev_info *bdi, | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
66 | struct wb_writeback_work *work) | 75 | struct wb_writeback_work *work) |
67 | { | 76 | { |
68 | spin_lock(&bdi->wb_lock); | 77 | trace_writeback_queue(bdi, work); |
69 | list_add_tail(&work->list, &bdi->work_list); | ||
70 | spin_unlock(&bdi->wb_lock); | ||
71 | 78 | ||
72 | /* | 79 | spin_lock_bh(&bdi->wb_lock); |
73 | * If the default thread isn't there, make sure we add it. When | 80 | list_add_tail(&work->list, &bdi->work_list); |
74 | * it gets created and wakes up, we'll run this work. | 81 | if (bdi->wb.task) { |
75 | */ | 82 | wake_up_process(bdi->wb.task); |
76 | if (unlikely(list_empty_careful(&bdi->wb_list))) | 83 | } else { |
84 | /* | ||
85 | * The bdi thread isn't there, wake up the forker thread which | ||
86 | * will create and run it. | ||
87 | */ | ||
88 | trace_writeback_nothread(bdi, work); | ||
77 | wake_up_process(default_backing_dev_info.wb.task); | 89 | wake_up_process(default_backing_dev_info.wb.task); |
78 | else { | ||
79 | struct bdi_writeback *wb = &bdi->wb; | ||
80 | |||
81 | if (wb->task) | ||
82 | wake_up_process(wb->task); | ||
83 | } | 90 | } |
91 | spin_unlock_bh(&bdi->wb_lock); | ||
84 | } | 92 | } |
85 | 93 | ||
86 | static void | 94 | static void |
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
95 | */ | 103 | */ |
96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 104 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
97 | if (!work) { | 105 | if (!work) { |
98 | if (bdi->wb.task) | 106 | if (bdi->wb.task) { |
107 | trace_writeback_nowork(bdi); | ||
99 | wake_up_process(bdi->wb.task); | 108 | wake_up_process(bdi->wb.task); |
109 | } | ||
100 | return; | 110 | return; |
101 | } | 111 | } |
102 | 112 | ||
@@ -239,10 +249,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
239 | 249 | ||
240 | /* | 250 | /* |
241 | * Queue all expired dirty inodes for io, eldest first. | 251 | * Queue all expired dirty inodes for io, eldest first. |
252 | * Before | ||
253 | * newly dirtied b_dirty b_io b_more_io | ||
254 | * =============> gf edc BA | ||
255 | * After | ||
256 | * newly dirtied b_dirty b_io b_more_io | ||
257 | * =============> g fBAedc | ||
258 | * | | ||
259 | * +--> dequeue for IO | ||
242 | */ | 260 | */ |
243 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 261 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
244 | { | 262 | { |
245 | list_splice_init(&wb->b_more_io, wb->b_io.prev); | 263 | list_splice_init(&wb->b_more_io, &wb->b_io); |
246 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 264 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
247 | } | 265 | } |
248 | 266 | ||
@@ -352,63 +370,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
352 | 370 | ||
353 | spin_lock(&inode_lock); | 371 | spin_lock(&inode_lock); |
354 | inode->i_state &= ~I_SYNC; | 372 | inode->i_state &= ~I_SYNC; |
355 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 373 | if (!(inode->i_state & I_FREEING)) { |
356 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 374 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
357 | /* | ||
358 | * More pages get dirtied by a fast dirtier. | ||
359 | */ | ||
360 | goto select_queue; | ||
361 | } else if (inode->i_state & I_DIRTY) { | ||
362 | /* | ||
363 | * At least XFS will redirty the inode during the | ||
364 | * writeback (delalloc) and on io completion (isize). | ||
365 | */ | ||
366 | redirty_tail(inode); | ||
367 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
368 | /* | 375 | /* |
369 | * We didn't write back all the pages. nfs_writepages() | 376 | * We didn't write back all the pages. nfs_writepages() |
370 | * sometimes bales out without doing anything. Redirty | 377 | * sometimes bales out without doing anything. |
371 | * the inode; Move it from b_io onto b_more_io/b_dirty. | ||
372 | */ | ||
373 | /* | ||
374 | * akpm: if the caller was the kupdate function we put | ||
375 | * this inode at the head of b_dirty so it gets first | ||
376 | * consideration. Otherwise, move it to the tail, for | ||
377 | * the reasons described there. I'm not really sure | ||
378 | * how much sense this makes. Presumably I had a good | ||
379 | * reasons for doing it this way, and I'd rather not | ||
380 | * muck with it at present. | ||
381 | */ | 378 | */ |
382 | if (wbc->for_kupdate) { | 379 | inode->i_state |= I_DIRTY_PAGES; |
380 | if (wbc->nr_to_write <= 0) { | ||
383 | /* | 381 | /* |
384 | * For the kupdate function we move the inode | 382 | * slice used up: queue for next turn |
385 | * to b_more_io so it will get more writeout as | ||
386 | * soon as the queue becomes uncongested. | ||
387 | */ | 383 | */ |
388 | inode->i_state |= I_DIRTY_PAGES; | 384 | requeue_io(inode); |
389 | select_queue: | ||
390 | if (wbc->nr_to_write <= 0) { | ||
391 | /* | ||
392 | * slice used up: queue for next turn | ||
393 | */ | ||
394 | requeue_io(inode); | ||
395 | } else { | ||
396 | /* | ||
397 | * somehow blocked: retry later | ||
398 | */ | ||
399 | redirty_tail(inode); | ||
400 | } | ||
401 | } else { | 385 | } else { |
402 | /* | 386 | /* |
403 | * Otherwise fully redirty the inode so that | 387 | * Writeback blocked by something other than |
404 | * other inodes on this superblock will get some | 388 | * congestion. Delay the inode for some time to |
405 | * writeout. Otherwise heavy writing to one | 389 | * avoid spinning on the CPU (100% iowait) |
406 | * file would indefinitely suspend writeout of | 390 | * retrying writeback of the dirty page/inode |
407 | * all the other files. | 391 | * that cannot be performed immediately. |
408 | */ | 392 | */ |
409 | inode->i_state |= I_DIRTY_PAGES; | ||
410 | redirty_tail(inode); | 393 | redirty_tail(inode); |
411 | } | 394 | } |
395 | } else if (inode->i_state & I_DIRTY) { | ||
396 | /* | ||
397 | * Filesystems can dirty the inode during writeback | ||
398 | * operations, such as delayed allocation during | ||
399 | * submission or metadata updates after data IO | ||
400 | * completion. | ||
401 | */ | ||
402 | redirty_tail(inode); | ||
412 | } else if (atomic_read(&inode->i_count)) { | 403 | } else if (atomic_read(&inode->i_count)) { |
413 | /* | 404 | /* |
414 | * The inode is clean, inuse | 405 | * The inode is clean, inuse |
@@ -499,7 +490,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
499 | if (inode_dirtied_after(inode, wbc->wb_start)) | 490 | if (inode_dirtied_after(inode, wbc->wb_start)) |
500 | return 1; | 491 | return 1; |
501 | 492 | ||
502 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 493 | BUG_ON(inode->i_state & I_FREEING); |
503 | __iget(inode); | 494 | __iget(inode); |
504 | pages_skipped = wbc->pages_skipped; | 495 | pages_skipped = wbc->pages_skipped; |
505 | writeback_single_inode(inode, wbc); | 496 | writeback_single_inode(inode, wbc); |
@@ -530,7 +521,8 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
530 | { | 521 | { |
531 | int ret = 0; | 522 | int ret = 0; |
532 | 523 | ||
533 | wbc->wb_start = jiffies; /* livelock avoidance */ | 524 | if (!wbc->wb_start) |
525 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
534 | spin_lock(&inode_lock); | 526 | spin_lock(&inode_lock); |
535 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 527 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
536 | queue_io(wb, wbc->older_than_this); | 528 | queue_io(wb, wbc->older_than_this); |
@@ -559,7 +551,6 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
559 | { | 551 | { |
560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 552 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
561 | 553 | ||
562 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
563 | spin_lock(&inode_lock); | 554 | spin_lock(&inode_lock); |
564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 555 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
565 | queue_io(wb, wbc->older_than_this); | 556 | queue_io(wb, wbc->older_than_this); |
@@ -580,7 +571,7 @@ static inline bool over_bground_thresh(void) | |||
580 | { | 571 | { |
581 | unsigned long background_thresh, dirty_thresh; | 572 | unsigned long background_thresh, dirty_thresh; |
582 | 573 | ||
583 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 574 | global_dirty_limits(&background_thresh, &dirty_thresh); |
584 | 575 | ||
585 | return (global_page_state(NR_FILE_DIRTY) + | 576 | return (global_page_state(NR_FILE_DIRTY) + |
586 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 577 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); |
@@ -625,6 +616,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
625 | wbc.range_end = LLONG_MAX; | 616 | wbc.range_end = LLONG_MAX; |
626 | } | 617 | } |
627 | 618 | ||
619 | wbc.wb_start = jiffies; /* livelock avoidance */ | ||
628 | for (;;) { | 620 | for (;;) { |
629 | /* | 621 | /* |
630 | * Stop writeback when nr_pages has been consumed | 622 | * Stop writeback when nr_pages has been consumed |
@@ -642,10 +634,14 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
642 | wbc.more_io = 0; | 634 | wbc.more_io = 0; |
643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 635 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
644 | wbc.pages_skipped = 0; | 636 | wbc.pages_skipped = 0; |
637 | |||
638 | trace_wbc_writeback_start(&wbc, wb->bdi); | ||
645 | if (work->sb) | 639 | if (work->sb) |
646 | __writeback_inodes_sb(work->sb, wb, &wbc); | 640 | __writeback_inodes_sb(work->sb, wb, &wbc); |
647 | else | 641 | else |
648 | writeback_inodes_wb(wb, &wbc); | 642 | writeback_inodes_wb(wb, &wbc); |
643 | trace_wbc_writeback_written(&wbc, wb->bdi); | ||
644 | |||
649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 645 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 646 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
651 | 647 | ||
@@ -673,6 +669,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
673 | if (!list_empty(&wb->b_more_io)) { | 669 | if (!list_empty(&wb->b_more_io)) { |
674 | inode = list_entry(wb->b_more_io.prev, | 670 | inode = list_entry(wb->b_more_io.prev, |
675 | struct inode, i_list); | 671 | struct inode, i_list); |
672 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
676 | inode_wait_for_writeback(inode); | 673 | inode_wait_for_writeback(inode); |
677 | } | 674 | } |
678 | spin_unlock(&inode_lock); | 675 | spin_unlock(&inode_lock); |
@@ -685,17 +682,17 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
685 | * Return the next wb_writeback_work struct that hasn't been processed yet. | 682 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
686 | */ | 683 | */ |
687 | static struct wb_writeback_work * | 684 | static struct wb_writeback_work * |
688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) | 685 | get_next_work_item(struct backing_dev_info *bdi) |
689 | { | 686 | { |
690 | struct wb_writeback_work *work = NULL; | 687 | struct wb_writeback_work *work = NULL; |
691 | 688 | ||
692 | spin_lock(&bdi->wb_lock); | 689 | spin_lock_bh(&bdi->wb_lock); |
693 | if (!list_empty(&bdi->work_list)) { | 690 | if (!list_empty(&bdi->work_list)) { |
694 | work = list_entry(bdi->work_list.next, | 691 | work = list_entry(bdi->work_list.next, |
695 | struct wb_writeback_work, list); | 692 | struct wb_writeback_work, list); |
696 | list_del_init(&work->list); | 693 | list_del_init(&work->list); |
697 | } | 694 | } |
698 | spin_unlock(&bdi->wb_lock); | 695 | spin_unlock_bh(&bdi->wb_lock); |
699 | return work; | 696 | return work; |
700 | } | 697 | } |
701 | 698 | ||
@@ -743,7 +740,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
743 | struct wb_writeback_work *work; | 740 | struct wb_writeback_work *work; |
744 | long wrote = 0; | 741 | long wrote = 0; |
745 | 742 | ||
746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 743 | set_bit(BDI_writeback_running, &wb->bdi->state); |
744 | while ((work = get_next_work_item(bdi)) != NULL) { | ||
747 | /* | 745 | /* |
748 | * Override sync mode, in case we must wait for completion | 746 | * Override sync mode, in case we must wait for completion |
749 | * because this thread is exiting now. | 747 | * because this thread is exiting now. |
@@ -751,6 +749,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
751 | if (force_wait) | 749 | if (force_wait) |
752 | work->sync_mode = WB_SYNC_ALL; | 750 | work->sync_mode = WB_SYNC_ALL; |
753 | 751 | ||
752 | trace_writeback_exec(bdi, work); | ||
753 | |||
754 | wrote += wb_writeback(wb, work); | 754 | wrote += wb_writeback(wb, work); |
755 | 755 | ||
756 | /* | 756 | /* |
@@ -767,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
767 | * Check for periodic writeback, kupdated() style | 767 | * Check for periodic writeback, kupdated() style |
768 | */ | 768 | */ |
769 | wrote += wb_check_old_data_flush(wb); | 769 | wrote += wb_check_old_data_flush(wb); |
770 | clear_bit(BDI_writeback_running, &wb->bdi->state); | ||
770 | 771 | ||
771 | return wrote; | 772 | return wrote; |
772 | } | 773 | } |
@@ -775,47 +776,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
775 | * Handle writeback of dirty data for the device backed by this bdi. Also | 776 | * Handle writeback of dirty data for the device backed by this bdi. Also |
776 | * wakes up periodically and does kupdated style flushing. | 777 | * wakes up periodically and does kupdated style flushing. |
777 | */ | 778 | */ |
778 | int bdi_writeback_task(struct bdi_writeback *wb) | 779 | int bdi_writeback_thread(void *data) |
779 | { | 780 | { |
780 | unsigned long last_active = jiffies; | 781 | struct bdi_writeback *wb = data; |
781 | unsigned long wait_jiffies = -1UL; | 782 | struct backing_dev_info *bdi = wb->bdi; |
782 | long pages_written; | 783 | long pages_written; |
783 | 784 | ||
785 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
786 | set_freezable(); | ||
787 | wb->last_active = jiffies; | ||
788 | |||
789 | /* | ||
790 | * Our parent may run at a different priority, just set us to normal | ||
791 | */ | ||
792 | set_user_nice(current, 0); | ||
793 | |||
794 | trace_writeback_thread_start(bdi); | ||
795 | |||
784 | while (!kthread_should_stop()) { | 796 | while (!kthread_should_stop()) { |
797 | /* | ||
798 | * Remove own delayed wake-up timer, since we are already awake | ||
799 | * and we'll take care of the preriodic write-back. | ||
800 | */ | ||
801 | del_timer(&wb->wakeup_timer); | ||
802 | |||
785 | pages_written = wb_do_writeback(wb, 0); | 803 | pages_written = wb_do_writeback(wb, 0); |
786 | 804 | ||
805 | trace_writeback_pages_written(pages_written); | ||
806 | |||
787 | if (pages_written) | 807 | if (pages_written) |
788 | last_active = jiffies; | 808 | wb->last_active = jiffies; |
789 | else if (wait_jiffies != -1UL) { | ||
790 | unsigned long max_idle; | ||
791 | 809 | ||
792 | /* | 810 | set_current_state(TASK_INTERRUPTIBLE); |
793 | * Longest period of inactivity that we tolerate. If we | 811 | if (!list_empty(&bdi->work_list)) { |
794 | * see dirty data again later, the task will get | 812 | __set_current_state(TASK_RUNNING); |
795 | * recreated automatically. | 813 | continue; |
796 | */ | ||
797 | max_idle = max(5UL * 60 * HZ, wait_jiffies); | ||
798 | if (time_after(jiffies, max_idle + last_active)) | ||
799 | break; | ||
800 | } | 814 | } |
801 | 815 | ||
802 | if (dirty_writeback_interval) { | 816 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) |
803 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 817 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); |
804 | schedule_timeout_interruptible(wait_jiffies); | 818 | else { |
805 | } else { | 819 | /* |
806 | set_current_state(TASK_INTERRUPTIBLE); | 820 | * We have nothing to do, so can go sleep without any |
807 | if (list_empty_careful(&wb->bdi->work_list) && | 821 | * timeout and save power. When a work is queued or |
808 | !kthread_should_stop()) | 822 | * something is made dirty - we will be woken up. |
809 | schedule(); | 823 | */ |
810 | __set_current_state(TASK_RUNNING); | 824 | schedule(); |
811 | } | 825 | } |
812 | 826 | ||
813 | try_to_freeze(); | 827 | try_to_freeze(); |
814 | } | 828 | } |
815 | 829 | ||
830 | /* Flush any work that raced with us exiting */ | ||
831 | if (!list_empty(&bdi->work_list)) | ||
832 | wb_do_writeback(wb, 1); | ||
833 | |||
834 | trace_writeback_thread_stop(bdi); | ||
816 | return 0; | 835 | return 0; |
817 | } | 836 | } |
818 | 837 | ||
838 | |||
819 | /* | 839 | /* |
820 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 840 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
821 | * the whole world. | 841 | * the whole world. |
@@ -890,6 +910,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
890 | void __mark_inode_dirty(struct inode *inode, int flags) | 910 | void __mark_inode_dirty(struct inode *inode, int flags) |
891 | { | 911 | { |
892 | struct super_block *sb = inode->i_sb; | 912 | struct super_block *sb = inode->i_sb; |
913 | struct backing_dev_info *bdi = NULL; | ||
914 | bool wakeup_bdi = false; | ||
893 | 915 | ||
894 | /* | 916 | /* |
895 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 917 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -935,7 +957,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
935 | if (hlist_unhashed(&inode->i_hash)) | 957 | if (hlist_unhashed(&inode->i_hash)) |
936 | goto out; | 958 | goto out; |
937 | } | 959 | } |
938 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 960 | if (inode->i_state & I_FREEING) |
939 | goto out; | 961 | goto out; |
940 | 962 | ||
941 | /* | 963 | /* |
@@ -943,22 +965,31 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
943 | * reposition it (that would break b_dirty time-ordering). | 965 | * reposition it (that would break b_dirty time-ordering). |
944 | */ | 966 | */ |
945 | if (!was_dirty) { | 967 | if (!was_dirty) { |
946 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 968 | bdi = inode_to_bdi(inode); |
947 | struct backing_dev_info *bdi = wb->bdi; | 969 | |
948 | 970 | if (bdi_cap_writeback_dirty(bdi)) { | |
949 | if (bdi_cap_writeback_dirty(bdi) && | 971 | WARN(!test_bit(BDI_registered, &bdi->state), |
950 | !test_bit(BDI_registered, &bdi->state)) { | 972 | "bdi-%s not registered\n", bdi->name); |
951 | WARN_ON(1); | 973 | |
952 | printk(KERN_ERR "bdi-%s not registered\n", | 974 | /* |
953 | bdi->name); | 975 | * If this is the first dirty inode for this |
976 | * bdi, we have to wake-up the corresponding | ||
977 | * bdi thread to make sure background | ||
978 | * write-back happens later. | ||
979 | */ | ||
980 | if (!wb_has_dirty_io(&bdi->wb)) | ||
981 | wakeup_bdi = true; | ||
954 | } | 982 | } |
955 | 983 | ||
956 | inode->dirtied_when = jiffies; | 984 | inode->dirtied_when = jiffies; |
957 | list_move(&inode->i_list, &wb->b_dirty); | 985 | list_move(&inode->i_list, &bdi->wb.b_dirty); |
958 | } | 986 | } |
959 | } | 987 | } |
960 | out: | 988 | out: |
961 | spin_unlock(&inode_lock); | 989 | spin_unlock(&inode_lock); |
990 | |||
991 | if (wakeup_bdi) | ||
992 | bdi_wakeup_thread_delayed(bdi); | ||
962 | } | 993 | } |
963 | EXPORT_SYMBOL(__mark_inode_dirty); | 994 | EXPORT_SYMBOL(__mark_inode_dirty); |
964 | 995 | ||
@@ -1001,7 +1032,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1001 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1032 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1002 | struct address_space *mapping; | 1033 | struct address_space *mapping; |
1003 | 1034 | ||
1004 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1035 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
1005 | continue; | 1036 | continue; |
1006 | mapping = inode->i_mapping; | 1037 | mapping = inode->i_mapping; |
1007 | if (mapping->nrpages == 0) | 1038 | if (mapping->nrpages == 0) |