diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 253 |
1 files changed, 69 insertions, 184 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8cc06d5432b5..d5be1693ac93 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -38,43 +38,18 @@ int nr_pdflush_threads; | |||
38 | /* | 38 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 39 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 40 | */ |
41 | struct wb_writeback_args { | 41 | struct wb_writeback_work { |
42 | long nr_pages; | 42 | long nr_pages; |
43 | struct super_block *sb; | 43 | struct super_block *sb; |
44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
48 | }; | ||
49 | 48 | ||
50 | /* | ||
51 | * Work items for the bdi_writeback threads | ||
52 | */ | ||
53 | struct bdi_work { | ||
54 | struct list_head list; /* pending work list */ | 49 | struct list_head list; /* pending work list */ |
55 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | 50 | struct completion *done; /* set if the caller waits */ |
56 | |||
57 | unsigned long seen; /* threads that have seen this work */ | ||
58 | atomic_t pending; /* number of threads still to do work */ | ||
59 | |||
60 | struct wb_writeback_args args; /* writeback arguments */ | ||
61 | |||
62 | unsigned long state; /* flag bits, see WS_* */ | ||
63 | }; | ||
64 | |||
65 | enum { | ||
66 | WS_INPROGRESS = 0, | ||
67 | WS_ONSTACK, | ||
68 | }; | 51 | }; |
69 | 52 | ||
70 | static inline void bdi_work_init(struct bdi_work *work, | ||
71 | struct wb_writeback_args *args) | ||
72 | { | ||
73 | INIT_RCU_HEAD(&work->rcu_head); | ||
74 | work->args = *args; | ||
75 | __set_bit(WS_INPROGRESS, &work->state); | ||
76 | } | ||
77 | |||
78 | /** | 53 | /** |
79 | * writeback_in_progress - determine whether there is writeback in progress | 54 | * writeback_in_progress - determine whether there is writeback in progress |
80 | * @bdi: the device's backing_dev_info structure. | 55 | * @bdi: the device's backing_dev_info structure. |
@@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
87 | return !list_empty(&bdi->work_list); | 62 | return !list_empty(&bdi->work_list); |
88 | } | 63 | } |
89 | 64 | ||
90 | static void bdi_work_free(struct rcu_head *head) | 65 | static void bdi_queue_work(struct backing_dev_info *bdi, |
91 | { | 66 | struct wb_writeback_work *work) |
92 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | ||
93 | |||
94 | clear_bit(WS_INPROGRESS, &work->state); | ||
95 | smp_mb__after_clear_bit(); | ||
96 | wake_up_bit(&work->state, WS_INPROGRESS); | ||
97 | |||
98 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
99 | kfree(work); | ||
100 | } | ||
101 | |||
102 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
103 | { | ||
104 | /* | ||
105 | * The caller has retrieved the work arguments from this work, | ||
106 | * drop our reference. If this is the last ref, delete and free it | ||
107 | */ | ||
108 | if (atomic_dec_and_test(&work->pending)) { | ||
109 | struct backing_dev_info *bdi = wb->bdi; | ||
110 | |||
111 | spin_lock(&bdi->wb_lock); | ||
112 | list_del_rcu(&work->list); | ||
113 | spin_unlock(&bdi->wb_lock); | ||
114 | |||
115 | call_rcu(&work->rcu_head, bdi_work_free); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | ||
120 | { | 67 | { |
121 | work->seen = bdi->wb_mask; | ||
122 | BUG_ON(!work->seen); | ||
123 | atomic_set(&work->pending, bdi->wb_cnt); | ||
124 | BUG_ON(!bdi->wb_cnt); | ||
125 | |||
126 | /* | ||
127 | * list_add_tail_rcu() contains the necessary barriers to | ||
128 | * make sure the above stores are seen before the item is | ||
129 | * noticed on the list | ||
130 | */ | ||
131 | spin_lock(&bdi->wb_lock); | 68 | spin_lock(&bdi->wb_lock); |
132 | list_add_tail_rcu(&work->list, &bdi->work_list); | 69 | list_add_tail(&work->list, &bdi->work_list); |
133 | spin_unlock(&bdi->wb_lock); | 70 | spin_unlock(&bdi->wb_lock); |
134 | 71 | ||
135 | /* | 72 | /* |
@@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
146 | } | 83 | } |
147 | } | 84 | } |
148 | 85 | ||
149 | /* | 86 | static void |
150 | * Used for on-stack allocated work items. The caller needs to wait until | 87 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
151 | * the wb threads have acked the work before it's safe to continue. | 88 | bool range_cyclic, bool for_background) |
152 | */ | ||
153 | static void bdi_wait_on_work_done(struct bdi_work *work) | ||
154 | { | ||
155 | wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, | ||
156 | TASK_UNINTERRUPTIBLE); | ||
157 | } | ||
158 | |||
159 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
160 | struct wb_writeback_args *args) | ||
161 | { | 89 | { |
162 | struct bdi_work *work; | 90 | struct wb_writeback_work *work; |
163 | 91 | ||
164 | /* | 92 | /* |
165 | * This is WB_SYNC_NONE writeback, so if allocation fails just | 93 | * This is WB_SYNC_NONE writeback, so if allocation fails just |
166 | * wakeup the thread for old dirty data writeback | 94 | * wakeup the thread for old dirty data writeback |
167 | */ | 95 | */ |
168 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
169 | if (work) { | 97 | if (!work) { |
170 | bdi_work_init(work, args); | 98 | if (bdi->wb.task) |
171 | bdi_queue_work(bdi, work); | 99 | wake_up_process(bdi->wb.task); |
172 | } else { | 100 | return; |
173 | struct bdi_writeback *wb = &bdi->wb; | ||
174 | |||
175 | if (wb->task) | ||
176 | wake_up_process(wb->task); | ||
177 | } | 101 | } |
178 | } | ||
179 | 102 | ||
180 | /** | 103 | work->sync_mode = WB_SYNC_NONE; |
181 | * bdi_queue_work_onstack - start and wait for writeback | 104 | work->nr_pages = nr_pages; |
182 | * @args: parameters to control the work queue writeback | 105 | work->range_cyclic = range_cyclic; |
183 | * | 106 | work->for_background = for_background; |
184 | * Description: | ||
185 | * This function initiates writeback and waits for the operation to | ||
186 | * complete. Callers must hold the sb s_umount semaphore for | ||
187 | * reading, to avoid having the super disappear before we are done. | ||
188 | */ | ||
189 | static void bdi_queue_work_onstack(struct wb_writeback_args *args) | ||
190 | { | ||
191 | struct bdi_work work; | ||
192 | 107 | ||
193 | bdi_work_init(&work, args); | 108 | bdi_queue_work(bdi, work); |
194 | __set_bit(WS_ONSTACK, &work.state); | ||
195 | |||
196 | bdi_queue_work(args->sb->s_bdi, &work); | ||
197 | bdi_wait_on_work_done(&work); | ||
198 | } | 109 | } |
199 | 110 | ||
200 | /** | 111 | /** |
@@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) | |||
210 | */ | 121 | */ |
211 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 122 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
212 | { | 123 | { |
213 | struct wb_writeback_args args = { | 124 | __bdi_start_writeback(bdi, nr_pages, true, false); |
214 | .sync_mode = WB_SYNC_NONE, | ||
215 | .nr_pages = nr_pages, | ||
216 | .range_cyclic = 1, | ||
217 | }; | ||
218 | |||
219 | bdi_alloc_queue_work(bdi, &args); | ||
220 | } | 125 | } |
221 | 126 | ||
222 | /** | 127 | /** |
@@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | |||
230 | */ | 135 | */ |
231 | void bdi_start_background_writeback(struct backing_dev_info *bdi) | 136 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
232 | { | 137 | { |
233 | struct wb_writeback_args args = { | 138 | __bdi_start_writeback(bdi, LONG_MAX, true, true); |
234 | .sync_mode = WB_SYNC_NONE, | ||
235 | .nr_pages = LONG_MAX, | ||
236 | .for_background = 1, | ||
237 | .range_cyclic = 1, | ||
238 | }; | ||
239 | bdi_alloc_queue_work(bdi, &args); | ||
240 | } | 139 | } |
241 | 140 | ||
242 | /* | 141 | /* |
@@ -703,14 +602,14 @@ static inline bool over_bground_thresh(void) | |||
703 | * all dirty pages if they are all attached to "old" mappings. | 602 | * all dirty pages if they are all attached to "old" mappings. |
704 | */ | 603 | */ |
705 | static long wb_writeback(struct bdi_writeback *wb, | 604 | static long wb_writeback(struct bdi_writeback *wb, |
706 | struct wb_writeback_args *args) | 605 | struct wb_writeback_work *work) |
707 | { | 606 | { |
708 | struct writeback_control wbc = { | 607 | struct writeback_control wbc = { |
709 | .sync_mode = args->sync_mode, | 608 | .sync_mode = work->sync_mode, |
710 | .older_than_this = NULL, | 609 | .older_than_this = NULL, |
711 | .for_kupdate = args->for_kupdate, | 610 | .for_kupdate = work->for_kupdate, |
712 | .for_background = args->for_background, | 611 | .for_background = work->for_background, |
713 | .range_cyclic = args->range_cyclic, | 612 | .range_cyclic = work->range_cyclic, |
714 | }; | 613 | }; |
715 | unsigned long oldest_jif; | 614 | unsigned long oldest_jif; |
716 | long wrote = 0; | 615 | long wrote = 0; |
@@ -730,24 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
730 | /* | 629 | /* |
731 | * Stop writeback when nr_pages has been consumed | 630 | * Stop writeback when nr_pages has been consumed |
732 | */ | 631 | */ |
733 | if (args->nr_pages <= 0) | 632 | if (work->nr_pages <= 0) |
734 | break; | 633 | break; |
735 | 634 | ||
736 | /* | 635 | /* |
737 | * For background writeout, stop when we are below the | 636 | * For background writeout, stop when we are below the |
738 | * background dirty threshold | 637 | * background dirty threshold |
739 | */ | 638 | */ |
740 | if (args->for_background && !over_bground_thresh()) | 639 | if (work->for_background && !over_bground_thresh()) |
741 | break; | 640 | break; |
742 | 641 | ||
743 | wbc.more_io = 0; | 642 | wbc.more_io = 0; |
744 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
745 | wbc.pages_skipped = 0; | 644 | wbc.pages_skipped = 0; |
746 | if (args->sb) | 645 | if (work->sb) |
747 | __writeback_inodes_sb(args->sb, wb, &wbc); | 646 | __writeback_inodes_sb(work->sb, wb, &wbc); |
748 | else | 647 | else |
749 | writeback_inodes_wb(wb, &wbc); | 648 | writeback_inodes_wb(wb, &wbc); |
750 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
751 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
752 | 651 | ||
753 | /* | 652 | /* |
@@ -783,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
783 | } | 682 | } |
784 | 683 | ||
785 | /* | 684 | /* |
786 | * Return the next bdi_work struct that hasn't been processed by this | 685 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
787 | * wb thread yet. ->seen is initially set for each thread that exists | ||
788 | * for this device, when a thread first notices a piece of work it | ||
789 | * clears its bit. Depending on writeback type, the thread will notify | ||
790 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
791 | * it is done (WB_SYNC_ALL). | ||
792 | */ | 686 | */ |
793 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 687 | static struct wb_writeback_work * |
794 | struct bdi_writeback *wb) | 688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) |
795 | { | 689 | { |
796 | struct bdi_work *work, *ret = NULL; | 690 | struct wb_writeback_work *work = NULL; |
797 | |||
798 | rcu_read_lock(); | ||
799 | |||
800 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | ||
801 | if (!test_bit(wb->nr, &work->seen)) | ||
802 | continue; | ||
803 | clear_bit(wb->nr, &work->seen); | ||
804 | 691 | ||
805 | ret = work; | 692 | spin_lock(&bdi->wb_lock); |
806 | break; | 693 | if (!list_empty(&bdi->work_list)) { |
694 | work = list_entry(bdi->work_list.next, | ||
695 | struct wb_writeback_work, list); | ||
696 | list_del_init(&work->list); | ||
807 | } | 697 | } |
808 | 698 | spin_unlock(&bdi->wb_lock); | |
809 | rcu_read_unlock(); | 699 | return work; |
810 | return ret; | ||
811 | } | 700 | } |
812 | 701 | ||
813 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 702 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
@@ -832,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
832 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 721 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
833 | 722 | ||
834 | if (nr_pages) { | 723 | if (nr_pages) { |
835 | struct wb_writeback_args args = { | 724 | struct wb_writeback_work work = { |
836 | .nr_pages = nr_pages, | 725 | .nr_pages = nr_pages, |
837 | .sync_mode = WB_SYNC_NONE, | 726 | .sync_mode = WB_SYNC_NONE, |
838 | .for_kupdate = 1, | 727 | .for_kupdate = 1, |
839 | .range_cyclic = 1, | 728 | .range_cyclic = 1, |
840 | }; | 729 | }; |
841 | 730 | ||
842 | return wb_writeback(wb, &args); | 731 | return wb_writeback(wb, &work); |
843 | } | 732 | } |
844 | 733 | ||
845 | return 0; | 734 | return 0; |
@@ -851,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
851 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 740 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) |
852 | { | 741 | { |
853 | struct backing_dev_info *bdi = wb->bdi; | 742 | struct backing_dev_info *bdi = wb->bdi; |
854 | struct bdi_work *work; | 743 | struct wb_writeback_work *work; |
855 | long wrote = 0; | 744 | long wrote = 0; |
856 | 745 | ||
857 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
858 | struct wb_writeback_args args = work->args; | ||
859 | |||
860 | /* | 747 | /* |
861 | * Override sync mode, in case we must wait for completion | 748 | * Override sync mode, in case we must wait for completion |
749 | * because this thread is exiting now. | ||
862 | */ | 750 | */ |
863 | if (force_wait) | 751 | if (force_wait) |
864 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 752 | work->sync_mode = WB_SYNC_ALL; |
865 | 753 | ||
866 | /* | 754 | wrote += wb_writeback(wb, work); |
867 | * If this isn't a data integrity operation, just notify | ||
868 | * that we have seen this work and we are now starting it. | ||
869 | */ | ||
870 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
871 | wb_clear_pending(wb, work); | ||
872 | |||
873 | wrote += wb_writeback(wb, &args); | ||
874 | 755 | ||
875 | /* | 756 | /* |
876 | * This is a data integrity writeback, so only do the | 757 | * Notify the caller of completion if this is a synchronous |
877 | * notification when we have completed the work. | 758 | * work item, otherwise just free it. |
878 | */ | 759 | */ |
879 | if (test_bit(WS_ONSTACK, &work->state)) | 760 | if (work->done) |
880 | wb_clear_pending(wb, work); | 761 | complete(work->done); |
762 | else | ||
763 | kfree(work); | ||
881 | } | 764 | } |
882 | 765 | ||
883 | /* | 766 | /* |
@@ -940,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
940 | void wakeup_flusher_threads(long nr_pages) | 823 | void wakeup_flusher_threads(long nr_pages) |
941 | { | 824 | { |
942 | struct backing_dev_info *bdi; | 825 | struct backing_dev_info *bdi; |
943 | struct wb_writeback_args args = { | ||
944 | .sync_mode = WB_SYNC_NONE, | ||
945 | }; | ||
946 | 826 | ||
947 | if (nr_pages) { | 827 | if (!nr_pages) { |
948 | args.nr_pages = nr_pages; | 828 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
949 | } else { | ||
950 | args.nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
951 | global_page_state(NR_UNSTABLE_NFS); | 829 | global_page_state(NR_UNSTABLE_NFS); |
952 | } | 830 | } |
953 | 831 | ||
@@ -955,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages) | |||
955 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 833 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
956 | if (!bdi_has_dirty_io(bdi)) | 834 | if (!bdi_has_dirty_io(bdi)) |
957 | continue; | 835 | continue; |
958 | bdi_alloc_queue_work(bdi, &args); | 836 | __bdi_start_writeback(bdi, nr_pages, false, false); |
959 | } | 837 | } |
960 | rcu_read_unlock(); | 838 | rcu_read_unlock(); |
961 | } | 839 | } |
@@ -1164,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1164 | { | 1042 | { |
1165 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1043 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
1166 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1044 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
1167 | struct wb_writeback_args args = { | 1045 | DECLARE_COMPLETION_ONSTACK(done); |
1046 | struct wb_writeback_work work = { | ||
1168 | .sb = sb, | 1047 | .sb = sb, |
1169 | .sync_mode = WB_SYNC_NONE, | 1048 | .sync_mode = WB_SYNC_NONE, |
1049 | .done = &done, | ||
1170 | }; | 1050 | }; |
1171 | 1051 | ||
1172 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1052 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1173 | 1053 | ||
1174 | args.nr_pages = nr_dirty + nr_unstable + | 1054 | work.nr_pages = nr_dirty + nr_unstable + |
1175 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1055 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
1176 | 1056 | ||
1177 | bdi_queue_work_onstack(&args); | 1057 | bdi_queue_work(sb->s_bdi, &work); |
1058 | wait_for_completion(&done); | ||
1178 | } | 1059 | } |
1179 | EXPORT_SYMBOL(writeback_inodes_sb); | 1060 | EXPORT_SYMBOL(writeback_inodes_sb); |
1180 | 1061 | ||
@@ -1206,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | |||
1206 | */ | 1087 | */ |
1207 | void sync_inodes_sb(struct super_block *sb) | 1088 | void sync_inodes_sb(struct super_block *sb) |
1208 | { | 1089 | { |
1209 | struct wb_writeback_args args = { | 1090 | DECLARE_COMPLETION_ONSTACK(done); |
1091 | struct wb_writeback_work work = { | ||
1210 | .sb = sb, | 1092 | .sb = sb, |
1211 | .sync_mode = WB_SYNC_ALL, | 1093 | .sync_mode = WB_SYNC_ALL, |
1212 | .nr_pages = LONG_MAX, | 1094 | .nr_pages = LONG_MAX, |
1213 | .range_cyclic = 0, | 1095 | .range_cyclic = 0, |
1096 | .done = &done, | ||
1214 | }; | 1097 | }; |
1215 | 1098 | ||
1216 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1099 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1217 | 1100 | ||
1218 | bdi_queue_work_onstack(&args); | 1101 | bdi_queue_work(sb->s_bdi, &work); |
1102 | wait_for_completion(&done); | ||
1103 | |||
1219 | wait_sb_inodes(sb); | 1104 | wait_sb_inodes(sb); |
1220 | } | 1105 | } |
1221 | EXPORT_SYMBOL(sync_inodes_sb); | 1106 | EXPORT_SYMBOL(sync_inodes_sb); |