diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 263 |
1 files changed, 151 insertions, 112 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d5be1693ac93..ab38fef1c9a1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -26,15 +26,9 @@ | |||
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/tracepoint.h> | ||
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
31 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
32 | |||
33 | /* | ||
34 | * We don't actually have pdflush, but this one is exported though /proc... | ||
35 | */ | ||
36 | int nr_pdflush_threads; | ||
37 | |||
38 | /* | 32 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 33 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 34 | */ |
@@ -50,6 +44,19 @@ struct wb_writeback_work { | |||
50 | struct completion *done; /* set if the caller waits */ | 44 | struct completion *done; /* set if the caller waits */ |
51 | }; | 45 | }; |
52 | 46 | ||
47 | /* | ||
48 | * Include the creation of the trace points after defining the | ||
49 | * wb_writeback_work structure so that the definition remains local to this | ||
50 | * file. | ||
51 | */ | ||
52 | #define CREATE_TRACE_POINTS | ||
53 | #include <trace/events/writeback.h> | ||
54 | |||
55 | /* | ||
56 | * We don't actually have pdflush, but this one is exported though /proc... | ||
57 | */ | ||
58 | int nr_pdflush_threads; | ||
59 | |||
53 | /** | 60 | /** |
54 | * writeback_in_progress - determine whether there is writeback in progress | 61 | * writeback_in_progress - determine whether there is writeback in progress |
55 | * @bdi: the device's backing_dev_info structure. | 62 | * @bdi: the device's backing_dev_info structure. |
@@ -59,28 +66,37 @@ struct wb_writeback_work { | |||
59 | */ | 66 | */ |
60 | int writeback_in_progress(struct backing_dev_info *bdi) | 67 | int writeback_in_progress(struct backing_dev_info *bdi) |
61 | { | 68 | { |
62 | return !list_empty(&bdi->work_list); | 69 | return test_bit(BDI_writeback_running, &bdi->state); |
70 | } | ||
71 | |||
72 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | ||
73 | { | ||
74 | struct super_block *sb = inode->i_sb; | ||
75 | |||
76 | if (strcmp(sb->s_type->name, "bdev") == 0) | ||
77 | return inode->i_mapping->backing_dev_info; | ||
78 | |||
79 | return sb->s_bdi; | ||
63 | } | 80 | } |
64 | 81 | ||
65 | static void bdi_queue_work(struct backing_dev_info *bdi, | 82 | static void bdi_queue_work(struct backing_dev_info *bdi, |
66 | struct wb_writeback_work *work) | 83 | struct wb_writeback_work *work) |
67 | { | 84 | { |
68 | spin_lock(&bdi->wb_lock); | 85 | trace_writeback_queue(bdi, work); |
69 | list_add_tail(&work->list, &bdi->work_list); | ||
70 | spin_unlock(&bdi->wb_lock); | ||
71 | 86 | ||
72 | /* | 87 | spin_lock_bh(&bdi->wb_lock); |
73 | * If the default thread isn't there, make sure we add it. When | 88 | list_add_tail(&work->list, &bdi->work_list); |
74 | * it gets created and wakes up, we'll run this work. | 89 | if (bdi->wb.task) { |
75 | */ | 90 | wake_up_process(bdi->wb.task); |
76 | if (unlikely(list_empty_careful(&bdi->wb_list))) | 91 | } else { |
92 | /* | ||
93 | * The bdi thread isn't there, wake up the forker thread which | ||
94 | * will create and run it. | ||
95 | */ | ||
96 | trace_writeback_nothread(bdi, work); | ||
77 | wake_up_process(default_backing_dev_info.wb.task); | 97 | wake_up_process(default_backing_dev_info.wb.task); |
78 | else { | ||
79 | struct bdi_writeback *wb = &bdi->wb; | ||
80 | |||
81 | if (wb->task) | ||
82 | wake_up_process(wb->task); | ||
83 | } | 98 | } |
99 | spin_unlock_bh(&bdi->wb_lock); | ||
84 | } | 100 | } |
85 | 101 | ||
86 | static void | 102 | static void |
@@ -95,8 +111,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
95 | */ | 111 | */ |
96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 112 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
97 | if (!work) { | 113 | if (!work) { |
98 | if (bdi->wb.task) | 114 | if (bdi->wb.task) { |
115 | trace_writeback_nowork(bdi); | ||
99 | wake_up_process(bdi->wb.task); | 116 | wake_up_process(bdi->wb.task); |
117 | } | ||
100 | return; | 118 | return; |
101 | } | 119 | } |
102 | 120 | ||
@@ -239,10 +257,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
239 | 257 | ||
240 | /* | 258 | /* |
241 | * Queue all expired dirty inodes for io, eldest first. | 259 | * Queue all expired dirty inodes for io, eldest first. |
260 | * Before | ||
261 | * newly dirtied b_dirty b_io b_more_io | ||
262 | * =============> gf edc BA | ||
263 | * After | ||
264 | * newly dirtied b_dirty b_io b_more_io | ||
265 | * =============> g fBAedc | ||
266 | * | | ||
267 | * +--> dequeue for IO | ||
242 | */ | 268 | */ |
243 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 269 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
244 | { | 270 | { |
245 | list_splice_init(&wb->b_more_io, wb->b_io.prev); | 271 | list_splice_init(&wb->b_more_io, &wb->b_io); |
246 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 272 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
247 | } | 273 | } |
248 | 274 | ||
@@ -352,63 +378,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
352 | 378 | ||
353 | spin_lock(&inode_lock); | 379 | spin_lock(&inode_lock); |
354 | inode->i_state &= ~I_SYNC; | 380 | inode->i_state &= ~I_SYNC; |
355 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 381 | if (!(inode->i_state & I_FREEING)) { |
356 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 382 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
357 | /* | ||
358 | * More pages get dirtied by a fast dirtier. | ||
359 | */ | ||
360 | goto select_queue; | ||
361 | } else if (inode->i_state & I_DIRTY) { | ||
362 | /* | ||
363 | * At least XFS will redirty the inode during the | ||
364 | * writeback (delalloc) and on io completion (isize). | ||
365 | */ | ||
366 | redirty_tail(inode); | ||
367 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
368 | /* | 383 | /* |
369 | * We didn't write back all the pages. nfs_writepages() | 384 | * We didn't write back all the pages. nfs_writepages() |
370 | * sometimes bales out without doing anything. Redirty | 385 | * sometimes bales out without doing anything. |
371 | * the inode; Move it from b_io onto b_more_io/b_dirty. | ||
372 | */ | 386 | */ |
373 | /* | 387 | inode->i_state |= I_DIRTY_PAGES; |
374 | * akpm: if the caller was the kupdate function we put | 388 | if (wbc->nr_to_write <= 0) { |
375 | * this inode at the head of b_dirty so it gets first | ||
376 | * consideration. Otherwise, move it to the tail, for | ||
377 | * the reasons described there. I'm not really sure | ||
378 | * how much sense this makes. Presumably I had a good | ||
379 | * reasons for doing it this way, and I'd rather not | ||
380 | * muck with it at present. | ||
381 | */ | ||
382 | if (wbc->for_kupdate) { | ||
383 | /* | 389 | /* |
384 | * For the kupdate function we move the inode | 390 | * slice used up: queue for next turn |
385 | * to b_more_io so it will get more writeout as | ||
386 | * soon as the queue becomes uncongested. | ||
387 | */ | 391 | */ |
388 | inode->i_state |= I_DIRTY_PAGES; | 392 | requeue_io(inode); |
389 | select_queue: | ||
390 | if (wbc->nr_to_write <= 0) { | ||
391 | /* | ||
392 | * slice used up: queue for next turn | ||
393 | */ | ||
394 | requeue_io(inode); | ||
395 | } else { | ||
396 | /* | ||
397 | * somehow blocked: retry later | ||
398 | */ | ||
399 | redirty_tail(inode); | ||
400 | } | ||
401 | } else { | 393 | } else { |
402 | /* | 394 | /* |
403 | * Otherwise fully redirty the inode so that | 395 | * Writeback blocked by something other than |
404 | * other inodes on this superblock will get some | 396 | * congestion. Delay the inode for some time to |
405 | * writeout. Otherwise heavy writing to one | 397 | * avoid spinning on the CPU (100% iowait) |
406 | * file would indefinitely suspend writeout of | 398 | * retrying writeback of the dirty page/inode |
407 | * all the other files. | 399 | * that cannot be performed immediately. |
408 | */ | 400 | */ |
409 | inode->i_state |= I_DIRTY_PAGES; | ||
410 | redirty_tail(inode); | 401 | redirty_tail(inode); |
411 | } | 402 | } |
403 | } else if (inode->i_state & I_DIRTY) { | ||
404 | /* | ||
405 | * Filesystems can dirty the inode during writeback | ||
406 | * operations, such as delayed allocation during | ||
407 | * submission or metadata updates after data IO | ||
408 | * completion. | ||
409 | */ | ||
410 | redirty_tail(inode); | ||
412 | } else if (atomic_read(&inode->i_count)) { | 411 | } else if (atomic_read(&inode->i_count)) { |
413 | /* | 412 | /* |
414 | * The inode is clean, inuse | 413 | * The inode is clean, inuse |
@@ -499,7 +498,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
499 | if (inode_dirtied_after(inode, wbc->wb_start)) | 498 | if (inode_dirtied_after(inode, wbc->wb_start)) |
500 | return 1; | 499 | return 1; |
501 | 500 | ||
502 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 501 | BUG_ON(inode->i_state & I_FREEING); |
503 | __iget(inode); | 502 | __iget(inode); |
504 | pages_skipped = wbc->pages_skipped; | 503 | pages_skipped = wbc->pages_skipped; |
505 | writeback_single_inode(inode, wbc); | 504 | writeback_single_inode(inode, wbc); |
@@ -530,7 +529,8 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
530 | { | 529 | { |
531 | int ret = 0; | 530 | int ret = 0; |
532 | 531 | ||
533 | wbc->wb_start = jiffies; /* livelock avoidance */ | 532 | if (!wbc->wb_start) |
533 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
534 | spin_lock(&inode_lock); | 534 | spin_lock(&inode_lock); |
535 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 535 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
536 | queue_io(wb, wbc->older_than_this); | 536 | queue_io(wb, wbc->older_than_this); |
@@ -559,7 +559,6 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
559 | { | 559 | { |
560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
561 | 561 | ||
562 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
563 | spin_lock(&inode_lock); | 562 | spin_lock(&inode_lock); |
564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 563 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
565 | queue_io(wb, wbc->older_than_this); | 564 | queue_io(wb, wbc->older_than_this); |
@@ -580,7 +579,7 @@ static inline bool over_bground_thresh(void) | |||
580 | { | 579 | { |
581 | unsigned long background_thresh, dirty_thresh; | 580 | unsigned long background_thresh, dirty_thresh; |
582 | 581 | ||
583 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 582 | global_dirty_limits(&background_thresh, &dirty_thresh); |
584 | 583 | ||
585 | return (global_page_state(NR_FILE_DIRTY) + | 584 | return (global_page_state(NR_FILE_DIRTY) + |
586 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 585 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); |
@@ -625,6 +624,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
625 | wbc.range_end = LLONG_MAX; | 624 | wbc.range_end = LLONG_MAX; |
626 | } | 625 | } |
627 | 626 | ||
627 | wbc.wb_start = jiffies; /* livelock avoidance */ | ||
628 | for (;;) { | 628 | for (;;) { |
629 | /* | 629 | /* |
630 | * Stop writeback when nr_pages has been consumed | 630 | * Stop writeback when nr_pages has been consumed |
@@ -642,10 +642,14 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
642 | wbc.more_io = 0; | 642 | wbc.more_io = 0; |
643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
644 | wbc.pages_skipped = 0; | 644 | wbc.pages_skipped = 0; |
645 | |||
646 | trace_wbc_writeback_start(&wbc, wb->bdi); | ||
645 | if (work->sb) | 647 | if (work->sb) |
646 | __writeback_inodes_sb(work->sb, wb, &wbc); | 648 | __writeback_inodes_sb(work->sb, wb, &wbc); |
647 | else | 649 | else |
648 | writeback_inodes_wb(wb, &wbc); | 650 | writeback_inodes_wb(wb, &wbc); |
651 | trace_wbc_writeback_written(&wbc, wb->bdi); | ||
652 | |||
649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 653 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 654 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
651 | 655 | ||
@@ -673,6 +677,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
673 | if (!list_empty(&wb->b_more_io)) { | 677 | if (!list_empty(&wb->b_more_io)) { |
674 | inode = list_entry(wb->b_more_io.prev, | 678 | inode = list_entry(wb->b_more_io.prev, |
675 | struct inode, i_list); | 679 | struct inode, i_list); |
680 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
676 | inode_wait_for_writeback(inode); | 681 | inode_wait_for_writeback(inode); |
677 | } | 682 | } |
678 | spin_unlock(&inode_lock); | 683 | spin_unlock(&inode_lock); |
@@ -685,17 +690,17 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
685 | * Return the next wb_writeback_work struct that hasn't been processed yet. | 690 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
686 | */ | 691 | */ |
687 | static struct wb_writeback_work * | 692 | static struct wb_writeback_work * |
688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) | 693 | get_next_work_item(struct backing_dev_info *bdi) |
689 | { | 694 | { |
690 | struct wb_writeback_work *work = NULL; | 695 | struct wb_writeback_work *work = NULL; |
691 | 696 | ||
692 | spin_lock(&bdi->wb_lock); | 697 | spin_lock_bh(&bdi->wb_lock); |
693 | if (!list_empty(&bdi->work_list)) { | 698 | if (!list_empty(&bdi->work_list)) { |
694 | work = list_entry(bdi->work_list.next, | 699 | work = list_entry(bdi->work_list.next, |
695 | struct wb_writeback_work, list); | 700 | struct wb_writeback_work, list); |
696 | list_del_init(&work->list); | 701 | list_del_init(&work->list); |
697 | } | 702 | } |
698 | spin_unlock(&bdi->wb_lock); | 703 | spin_unlock_bh(&bdi->wb_lock); |
699 | return work; | 704 | return work; |
700 | } | 705 | } |
701 | 706 | ||
@@ -743,7 +748,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
743 | struct wb_writeback_work *work; | 748 | struct wb_writeback_work *work; |
744 | long wrote = 0; | 749 | long wrote = 0; |
745 | 750 | ||
746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 751 | set_bit(BDI_writeback_running, &wb->bdi->state); |
752 | while ((work = get_next_work_item(bdi)) != NULL) { | ||
747 | /* | 753 | /* |
748 | * Override sync mode, in case we must wait for completion | 754 | * Override sync mode, in case we must wait for completion |
749 | * because this thread is exiting now. | 755 | * because this thread is exiting now. |
@@ -751,6 +757,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
751 | if (force_wait) | 757 | if (force_wait) |
752 | work->sync_mode = WB_SYNC_ALL; | 758 | work->sync_mode = WB_SYNC_ALL; |
753 | 759 | ||
760 | trace_writeback_exec(bdi, work); | ||
761 | |||
754 | wrote += wb_writeback(wb, work); | 762 | wrote += wb_writeback(wb, work); |
755 | 763 | ||
756 | /* | 764 | /* |
@@ -767,6 +775,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
767 | * Check for periodic writeback, kupdated() style | 775 | * Check for periodic writeback, kupdated() style |
768 | */ | 776 | */ |
769 | wrote += wb_check_old_data_flush(wb); | 777 | wrote += wb_check_old_data_flush(wb); |
778 | clear_bit(BDI_writeback_running, &wb->bdi->state); | ||
770 | 779 | ||
771 | return wrote; | 780 | return wrote; |
772 | } | 781 | } |
@@ -775,47 +784,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
775 | * Handle writeback of dirty data for the device backed by this bdi. Also | 784 | * Handle writeback of dirty data for the device backed by this bdi. Also |
776 | * wakes up periodically and does kupdated style flushing. | 785 | * wakes up periodically and does kupdated style flushing. |
777 | */ | 786 | */ |
778 | int bdi_writeback_task(struct bdi_writeback *wb) | 787 | int bdi_writeback_thread(void *data) |
779 | { | 788 | { |
780 | unsigned long last_active = jiffies; | 789 | struct bdi_writeback *wb = data; |
781 | unsigned long wait_jiffies = -1UL; | 790 | struct backing_dev_info *bdi = wb->bdi; |
782 | long pages_written; | 791 | long pages_written; |
783 | 792 | ||
793 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
794 | set_freezable(); | ||
795 | wb->last_active = jiffies; | ||
796 | |||
797 | /* | ||
798 | * Our parent may run at a different priority, just set us to normal | ||
799 | */ | ||
800 | set_user_nice(current, 0); | ||
801 | |||
802 | trace_writeback_thread_start(bdi); | ||
803 | |||
784 | while (!kthread_should_stop()) { | 804 | while (!kthread_should_stop()) { |
805 | /* | ||
806 | * Remove own delayed wake-up timer, since we are already awake | ||
807 | * and we'll take care of the preriodic write-back. | ||
808 | */ | ||
809 | del_timer(&wb->wakeup_timer); | ||
810 | |||
785 | pages_written = wb_do_writeback(wb, 0); | 811 | pages_written = wb_do_writeback(wb, 0); |
786 | 812 | ||
813 | trace_writeback_pages_written(pages_written); | ||
814 | |||
787 | if (pages_written) | 815 | if (pages_written) |
788 | last_active = jiffies; | 816 | wb->last_active = jiffies; |
789 | else if (wait_jiffies != -1UL) { | ||
790 | unsigned long max_idle; | ||
791 | 817 | ||
792 | /* | 818 | set_current_state(TASK_INTERRUPTIBLE); |
793 | * Longest period of inactivity that we tolerate. If we | 819 | if (!list_empty(&bdi->work_list) || kthread_should_stop()) { |
794 | * see dirty data again later, the task will get | 820 | __set_current_state(TASK_RUNNING); |
795 | * recreated automatically. | 821 | continue; |
796 | */ | ||
797 | max_idle = max(5UL * 60 * HZ, wait_jiffies); | ||
798 | if (time_after(jiffies, max_idle + last_active)) | ||
799 | break; | ||
800 | } | 822 | } |
801 | 823 | ||
802 | if (dirty_writeback_interval) { | 824 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) |
803 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 825 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); |
804 | schedule_timeout_interruptible(wait_jiffies); | 826 | else { |
805 | } else { | 827 | /* |
806 | set_current_state(TASK_INTERRUPTIBLE); | 828 | * We have nothing to do, so can go sleep without any |
807 | if (list_empty_careful(&wb->bdi->work_list) && | 829 | * timeout and save power. When a work is queued or |
808 | !kthread_should_stop()) | 830 | * something is made dirty - we will be woken up. |
809 | schedule(); | 831 | */ |
810 | __set_current_state(TASK_RUNNING); | 832 | schedule(); |
811 | } | 833 | } |
812 | 834 | ||
813 | try_to_freeze(); | 835 | try_to_freeze(); |
814 | } | 836 | } |
815 | 837 | ||
838 | /* Flush any work that raced with us exiting */ | ||
839 | if (!list_empty(&bdi->work_list)) | ||
840 | wb_do_writeback(wb, 1); | ||
841 | |||
842 | trace_writeback_thread_stop(bdi); | ||
816 | return 0; | 843 | return 0; |
817 | } | 844 | } |
818 | 845 | ||
846 | |||
819 | /* | 847 | /* |
820 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 848 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
821 | * the whole world. | 849 | * the whole world. |
@@ -890,6 +918,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
890 | void __mark_inode_dirty(struct inode *inode, int flags) | 918 | void __mark_inode_dirty(struct inode *inode, int flags) |
891 | { | 919 | { |
892 | struct super_block *sb = inode->i_sb; | 920 | struct super_block *sb = inode->i_sb; |
921 | struct backing_dev_info *bdi = NULL; | ||
922 | bool wakeup_bdi = false; | ||
893 | 923 | ||
894 | /* | 924 | /* |
895 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 925 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -935,7 +965,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
935 | if (hlist_unhashed(&inode->i_hash)) | 965 | if (hlist_unhashed(&inode->i_hash)) |
936 | goto out; | 966 | goto out; |
937 | } | 967 | } |
938 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 968 | if (inode->i_state & I_FREEING) |
939 | goto out; | 969 | goto out; |
940 | 970 | ||
941 | /* | 971 | /* |
@@ -943,22 +973,31 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
943 | * reposition it (that would break b_dirty time-ordering). | 973 | * reposition it (that would break b_dirty time-ordering). |
944 | */ | 974 | */ |
945 | if (!was_dirty) { | 975 | if (!was_dirty) { |
946 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 976 | bdi = inode_to_bdi(inode); |
947 | struct backing_dev_info *bdi = wb->bdi; | 977 | |
948 | 978 | if (bdi_cap_writeback_dirty(bdi)) { | |
949 | if (bdi_cap_writeback_dirty(bdi) && | 979 | WARN(!test_bit(BDI_registered, &bdi->state), |
950 | !test_bit(BDI_registered, &bdi->state)) { | 980 | "bdi-%s not registered\n", bdi->name); |
951 | WARN_ON(1); | 981 | |
952 | printk(KERN_ERR "bdi-%s not registered\n", | 982 | /* |
953 | bdi->name); | 983 | * If this is the first dirty inode for this |
984 | * bdi, we have to wake-up the corresponding | ||
985 | * bdi thread to make sure background | ||
986 | * write-back happens later. | ||
987 | */ | ||
988 | if (!wb_has_dirty_io(&bdi->wb)) | ||
989 | wakeup_bdi = true; | ||
954 | } | 990 | } |
955 | 991 | ||
956 | inode->dirtied_when = jiffies; | 992 | inode->dirtied_when = jiffies; |
957 | list_move(&inode->i_list, &wb->b_dirty); | 993 | list_move(&inode->i_list, &bdi->wb.b_dirty); |
958 | } | 994 | } |
959 | } | 995 | } |
960 | out: | 996 | out: |
961 | spin_unlock(&inode_lock); | 997 | spin_unlock(&inode_lock); |
998 | |||
999 | if (wakeup_bdi) | ||
1000 | bdi_wakeup_thread_delayed(bdi); | ||
962 | } | 1001 | } |
963 | EXPORT_SYMBOL(__mark_inode_dirty); | 1002 | EXPORT_SYMBOL(__mark_inode_dirty); |
964 | 1003 | ||
@@ -1001,7 +1040,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1001 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1040 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1002 | struct address_space *mapping; | 1041 | struct address_space *mapping; |
1003 | 1042 | ||
1004 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1043 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
1005 | continue; | 1044 | continue; |
1006 | mapping = inode->i_mapping; | 1045 | mapping = inode->i_mapping; |
1007 | if (mapping->nrpages == 0) | 1046 | if (mapping->nrpages == 0) |