aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-07-01 02:49:28 -0400
committerIngo Molnar <mingo@elte.hu>2010-07-01 03:31:25 -0400
commit0a54cec0c25cc49e3b68b14c205f1f6cff13f5e1 (patch)
treeeb4e63ee9ae1fcaf9aa53a1668e55c09516052d9 /fs/fs-writeback.c
parentec8c27e04f89a7575ca2c4facb99152e03d6a99c (diff)
parent980019d74e4b2428362b36a0506519d6d9460800 (diff)
Merge branch 'linus' into core/rcu
Conflicts: fs/fs-writeback.c Merge reason: Resolve the conflict Note, i picked the version from Linus's tree, which effectively reverts the fs-writeback.c bits of: b97181f: fs: remove all rcu head initializations, except on_stack initializations As the upstream changes to this file changed this code heavily and the first attempt to resolve the conflict resulted in a non-booting kernel. It's safer to re-try this portion of the commit cleanly. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c279
1 files changed, 109 insertions, 170 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index af92100a7411..0609607d3955 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -63,45 +63,16 @@ struct bdi_work {
63}; 63};
64 64
65enum { 65enum {
66 WS_USED_B = 0, 66 WS_INPROGRESS = 0,
67 WS_ONSTACK_B, 67 WS_ONSTACK,
68}; 68};
69 69
70#define WS_USED (1 << WS_USED_B)
71#define WS_ONSTACK (1 << WS_ONSTACK_B)
72
73static inline bool bdi_work_on_stack(struct bdi_work *work)
74{
75 return test_bit(WS_ONSTACK_B, &work->state);
76}
77
78static inline void __bdi_work_init(struct bdi_work *work,
79 struct wb_writeback_args *args,
80 int on_stack)
81{
82 work->args = *args;
83 work->state = WS_USED;
84 if (on_stack) {
85 work->state |= WS_ONSTACK;
86 init_rcu_head_on_stack(&work->rcu_head);
87 }
88}
89
90static inline void bdi_work_init(struct bdi_work *work, 70static inline void bdi_work_init(struct bdi_work *work,
91 struct wb_writeback_args *args) 71 struct wb_writeback_args *args)
92{ 72{
93 __bdi_work_init(work, args, false); 73 INIT_RCU_HEAD(&work->rcu_head);
94} 74 work->args = *args;
95 75 __set_bit(WS_INPROGRESS, &work->state);
96static inline void bdi_work_init_on_stack(struct bdi_work *work,
97 struct wb_writeback_args *args)
98{
99 __bdi_work_init(work, args, true);
100}
101
102static inline void bdi_destroy_work_on_stack(struct bdi_work *work)
103{
104 destroy_rcu_head_on_stack(&work->rcu_head);
105} 76}
106 77
107/** 78/**
@@ -116,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
116 return !list_empty(&bdi->work_list); 87 return !list_empty(&bdi->work_list);
117} 88}
118 89
119static void bdi_work_clear(struct bdi_work *work)
120{
121 clear_bit(WS_USED_B, &work->state);
122 smp_mb__after_clear_bit();
123 /*
124 * work can have disappeared at this point. bit waitq functions
125 * should be able to tolerate this, provided bdi_sched_wait does
126 * not dereference it's pointer argument.
127 */
128 wake_up_bit(&work->state, WS_USED_B);
129}
130
131static void bdi_work_free(struct rcu_head *head) 90static void bdi_work_free(struct rcu_head *head)
132{ 91{
133 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); 92 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
134 93
135 if (!bdi_work_on_stack(work)) 94 clear_bit(WS_INPROGRESS, &work->state);
136 kfree(work); 95 smp_mb__after_clear_bit();
137 else 96 wake_up_bit(&work->state, WS_INPROGRESS);
138 bdi_work_clear(work);
139}
140
141static void wb_work_complete(struct bdi_work *work)
142{
143 const enum writeback_sync_modes sync_mode = work->args.sync_mode;
144 int onstack = bdi_work_on_stack(work);
145 97
146 /* 98 if (!test_bit(WS_ONSTACK, &work->state))
147 * For allocated work, we can clear the done/seen bit right here. 99 kfree(work);
148 * For on-stack work, we need to postpone both the clear and free
149 * to after the RCU grace period, since the stack could be invalidated
150 * as soon as bdi_work_clear() has done the wakeup.
151 */
152 if (!onstack)
153 bdi_work_clear(work);
154 if (sync_mode == WB_SYNC_NONE || onstack)
155 call_rcu(&work->rcu_head, bdi_work_free);
156} 100}
157 101
158static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) 102static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
@@ -168,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
168 list_del_rcu(&work->list); 112 list_del_rcu(&work->list);
169 spin_unlock(&bdi->wb_lock); 113 spin_unlock(&bdi->wb_lock);
170 114
171 wb_work_complete(work); 115 call_rcu(&work->rcu_head, bdi_work_free);
172 } 116 }
173} 117}
174 118
@@ -206,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
206 * Used for on-stack allocated work items. The caller needs to wait until 150 * Used for on-stack allocated work items. The caller needs to wait until
207 * the wb threads have acked the work before it's safe to continue. 151 * the wb threads have acked the work before it's safe to continue.
208 */ 152 */
209static void bdi_wait_on_work_clear(struct bdi_work *work) 153static void bdi_wait_on_work_done(struct bdi_work *work)
210{ 154{
211 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, 155 wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
212 TASK_UNINTERRUPTIBLE); 156 TASK_UNINTERRUPTIBLE);
213} 157}
214 158
@@ -234,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
234} 178}
235 179
236/** 180/**
237 * bdi_sync_writeback - start and wait for writeback 181 * bdi_queue_work_onstack - start and wait for writeback
238 * @bdi: the backing device to write from
239 * @sb: write inodes from this super_block 182 * @sb: write inodes from this super_block
240 * 183 *
241 * Description: 184 * Description:
242 * This does WB_SYNC_ALL data integrity writeback and waits for the 185 * This function initiates writeback and waits for the operation to
243 * IO to complete. Callers must hold the sb s_umount semaphore for 186 * complete. Callers must hold the sb s_umount semaphore for
244 * reading, to avoid having the super disappear before we are done. 187 * reading, to avoid having the super disappear before we are done.
245 */ 188 */
246static void bdi_sync_writeback(struct backing_dev_info *bdi, 189static void bdi_queue_work_onstack(struct wb_writeback_args *args)
247 struct super_block *sb)
248{ 190{
249 struct wb_writeback_args args = {
250 .sb = sb,
251 .sync_mode = WB_SYNC_ALL,
252 .nr_pages = LONG_MAX,
253 .range_cyclic = 0,
254 };
255 struct bdi_work work; 191 struct bdi_work work;
256 192
257 bdi_work_init_on_stack(&work, &args); 193 bdi_work_init(&work, args);
194 __set_bit(WS_ONSTACK, &work.state);
258 195
259 bdi_queue_work(bdi, &work); 196 bdi_queue_work(args->sb->s_bdi, &work);
260 bdi_wait_on_work_clear(&work); 197 bdi_wait_on_work_done(&work);
261 bdi_destroy_work_on_stack(&work);
262} 198}
263 199
264/** 200/**
265 * bdi_start_writeback - start writeback 201 * bdi_start_writeback - start writeback
266 * @bdi: the backing device to write from 202 * @bdi: the backing device to write from
267 * @sb: write inodes from this super_block
268 * @nr_pages: the number of pages to write 203 * @nr_pages: the number of pages to write
269 * 204 *
270 * Description: 205 * Description:
@@ -273,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
273 * completion. Caller need not hold sb s_umount semaphore. 208 * completion. Caller need not hold sb s_umount semaphore.
274 * 209 *
275 */ 210 */
276void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 211void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
277 long nr_pages)
278{ 212{
279 struct wb_writeback_args args = { 213 struct wb_writeback_args args = {
280 .sb = sb,
281 .sync_mode = WB_SYNC_NONE, 214 .sync_mode = WB_SYNC_NONE,
282 .nr_pages = nr_pages, 215 .nr_pages = nr_pages,
283 .range_cyclic = 1, 216 .range_cyclic = 1,
284 }; 217 };
285 218
286 /* 219 bdi_alloc_queue_work(bdi, &args);
287 * We treat @nr_pages=0 as the special case to do background writeback, 220}
288 * ie. to sync pages until the background dirty threshold is reached.
289 */
290 if (!nr_pages) {
291 args.nr_pages = LONG_MAX;
292 args.for_background = 1;
293 }
294 221
222/**
223 * bdi_start_background_writeback - start background writeback
224 * @bdi: the backing device to write from
225 *
226 * Description:
227 * This does WB_SYNC_NONE background writeback. The IO is only
228 * started when this function returns, we make no guarentees on
229 * completion. Caller need not hold sb s_umount semaphore.
230 */
231void bdi_start_background_writeback(struct backing_dev_info *bdi)
232{
233 struct wb_writeback_args args = {
234 .sync_mode = WB_SYNC_NONE,
235 .nr_pages = LONG_MAX,
236 .for_background = 1,
237 .range_cyclic = 1,
238 };
295 bdi_alloc_queue_work(bdi, &args); 239 bdi_alloc_queue_work(bdi, &args);
296} 240}
297 241
@@ -582,48 +526,30 @@ select_queue:
582 return ret; 526 return ret;
583} 527}
584 528
585static void unpin_sb_for_writeback(struct super_block *sb)
586{
587 up_read(&sb->s_umount);
588 put_super(sb);
589}
590
591enum sb_pin_state {
592 SB_PINNED,
593 SB_NOT_PINNED,
594 SB_PIN_FAILED
595};
596
597/* 529/*
598 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 530 * For background writeback the caller does not have the sb pinned
599 * before calling writeback. So make sure that we do pin it, so it doesn't 531 * before calling writeback. So make sure that we do pin it, so it doesn't
600 * go away while we are writing inodes from it. 532 * go away while we are writing inodes from it.
601 */ 533 */
602static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, 534static bool pin_sb_for_writeback(struct super_block *sb)
603 struct super_block *sb)
604{ 535{
605 /*
606 * Caller must already hold the ref for this
607 */
608 if (wbc->sync_mode == WB_SYNC_ALL) {
609 WARN_ON(!rwsem_is_locked(&sb->s_umount));
610 return SB_NOT_PINNED;
611 }
612 spin_lock(&sb_lock); 536 spin_lock(&sb_lock);
537 if (list_empty(&sb->s_instances)) {
538 spin_unlock(&sb_lock);
539 return false;
540 }
541
613 sb->s_count++; 542 sb->s_count++;
543 spin_unlock(&sb_lock);
544
614 if (down_read_trylock(&sb->s_umount)) { 545 if (down_read_trylock(&sb->s_umount)) {
615 if (sb->s_root) { 546 if (sb->s_root)
616 spin_unlock(&sb_lock); 547 return true;
617 return SB_PINNED;
618 }
619 /*
620 * umounted, drop rwsem again and fall through to failure
621 */
622 up_read(&sb->s_umount); 548 up_read(&sb->s_umount);
623 } 549 }
624 sb->s_count--; 550
625 spin_unlock(&sb_lock); 551 put_super(sb);
626 return SB_PIN_FAILED; 552 return false;
627} 553}
628 554
629/* 555/*
@@ -702,24 +628,31 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
702 struct inode *inode = list_entry(wb->b_io.prev, 628 struct inode *inode = list_entry(wb->b_io.prev,
703 struct inode, i_list); 629 struct inode, i_list);
704 struct super_block *sb = inode->i_sb; 630 struct super_block *sb = inode->i_sb;
705 enum sb_pin_state state;
706 631
707 if (wbc->sb && sb != wbc->sb) { 632 if (wbc->sb) {
708 /* super block given and doesn't 633 /*
709 match, skip this inode */ 634 * We are requested to write out inodes for a specific
710 redirty_tail(inode); 635 * superblock. This means we already have s_umount
711 continue; 636 * taken by the caller which also waits for us to
712 } 637 * complete the writeout.
713 state = pin_sb_for_writeback(wbc, sb); 638 */
639 if (sb != wbc->sb) {
640 redirty_tail(inode);
641 continue;
642 }
714 643
715 if (state == SB_PIN_FAILED) { 644 WARN_ON(!rwsem_is_locked(&sb->s_umount));
716 requeue_io(inode); 645
717 continue; 646 ret = writeback_sb_inodes(sb, wb, wbc);
647 } else {
648 if (!pin_sb_for_writeback(sb)) {
649 requeue_io(inode);
650 continue;
651 }
652 ret = writeback_sb_inodes(sb, wb, wbc);
653 drop_super(sb);
718 } 654 }
719 ret = writeback_sb_inodes(sb, wb, wbc);
720 655
721 if (state == SB_PINNED)
722 unpin_sb_for_writeback(sb);
723 if (ret) 656 if (ret)
724 break; 657 break;
725 } 658 }
@@ -932,7 +865,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
932 * If this isn't a data integrity operation, just notify 865 * If this isn't a data integrity operation, just notify
933 * that we have seen this work and we are now starting it. 866 * that we have seen this work and we are now starting it.
934 */ 867 */
935 if (args.sync_mode == WB_SYNC_NONE) 868 if (!test_bit(WS_ONSTACK, &work->state))
936 wb_clear_pending(wb, work); 869 wb_clear_pending(wb, work);
937 870
938 wrote += wb_writeback(wb, &args); 871 wrote += wb_writeback(wb, &args);
@@ -941,7 +874,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
941 * This is a data integrity writeback, so only do the 874 * This is a data integrity writeback, so only do the
942 * notification when we have completed the work. 875 * notification when we have completed the work.
943 */ 876 */
944 if (args.sync_mode == WB_SYNC_ALL) 877 if (test_bit(WS_ONSTACK, &work->state))
945 wb_clear_pending(wb, work); 878 wb_clear_pending(wb, work);
946 } 879 }
947 880
@@ -999,42 +932,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
999} 932}
1000 933
1001/* 934/*
1002 * Schedule writeback for all backing devices. This does WB_SYNC_NONE 935 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1003 * writeback, for integrity writeback see bdi_sync_writeback(). 936 * the whole world.
1004 */ 937 */
1005static void bdi_writeback_all(struct super_block *sb, long nr_pages) 938void wakeup_flusher_threads(long nr_pages)
1006{ 939{
940 struct backing_dev_info *bdi;
1007 struct wb_writeback_args args = { 941 struct wb_writeback_args args = {
1008 .sb = sb,
1009 .nr_pages = nr_pages,
1010 .sync_mode = WB_SYNC_NONE, 942 .sync_mode = WB_SYNC_NONE,
1011 }; 943 };
1012 struct backing_dev_info *bdi;
1013 944
1014 rcu_read_lock(); 945 if (nr_pages) {
946 args.nr_pages = nr_pages;
947 } else {
948 args.nr_pages = global_page_state(NR_FILE_DIRTY) +
949 global_page_state(NR_UNSTABLE_NFS);
950 }
1015 951
952 rcu_read_lock();
1016 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 953 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1017 if (!bdi_has_dirty_io(bdi)) 954 if (!bdi_has_dirty_io(bdi))
1018 continue; 955 continue;
1019
1020 bdi_alloc_queue_work(bdi, &args); 956 bdi_alloc_queue_work(bdi, &args);
1021 } 957 }
1022
1023 rcu_read_unlock(); 958 rcu_read_unlock();
1024} 959}
1025 960
1026/*
1027 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1028 * the whole world.
1029 */
1030void wakeup_flusher_threads(long nr_pages)
1031{
1032 if (nr_pages == 0)
1033 nr_pages = global_page_state(NR_FILE_DIRTY) +
1034 global_page_state(NR_UNSTABLE_NFS);
1035 bdi_writeback_all(NULL, nr_pages);
1036}
1037
1038static noinline void block_dump___mark_inode_dirty(struct inode *inode) 961static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1039{ 962{
1040 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 963 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1239,12 +1162,17 @@ void writeback_inodes_sb(struct super_block *sb)
1239{ 1162{
1240 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1163 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1241 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 1164 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1242 long nr_to_write; 1165 struct wb_writeback_args args = {
1166 .sb = sb,
1167 .sync_mode = WB_SYNC_NONE,
1168 };
1243 1169
1244 nr_to_write = nr_dirty + nr_unstable + 1170 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1171
1172 args.nr_pages = nr_dirty + nr_unstable +
1245 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1173 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1246 1174
1247 bdi_start_writeback(sb->s_bdi, sb, nr_to_write); 1175 bdi_queue_work_onstack(&args);
1248} 1176}
1249EXPORT_SYMBOL(writeback_inodes_sb); 1177EXPORT_SYMBOL(writeback_inodes_sb);
1250 1178
@@ -1258,7 +1186,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1258int writeback_inodes_sb_if_idle(struct super_block *sb) 1186int writeback_inodes_sb_if_idle(struct super_block *sb)
1259{ 1187{
1260 if (!writeback_in_progress(sb->s_bdi)) { 1188 if (!writeback_in_progress(sb->s_bdi)) {
1189 down_read(&sb->s_umount);
1261 writeback_inodes_sb(sb); 1190 writeback_inodes_sb(sb);
1191 up_read(&sb->s_umount);
1262 return 1; 1192 return 1;
1263 } else 1193 } else
1264 return 0; 1194 return 0;
@@ -1274,7 +1204,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1274 */ 1204 */
1275void sync_inodes_sb(struct super_block *sb) 1205void sync_inodes_sb(struct super_block *sb)
1276{ 1206{
1277 bdi_sync_writeback(sb->s_bdi, sb); 1207 struct wb_writeback_args args = {
1208 .sb = sb,
1209 .sync_mode = WB_SYNC_ALL,
1210 .nr_pages = LONG_MAX,
1211 .range_cyclic = 0,
1212 };
1213
1214 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1215
1216 bdi_queue_work_onstack(&args);
1278 wait_sb_inodes(sb); 1217 wait_sb_inodes(sb);
1279} 1218}
1280EXPORT_SYMBOL(sync_inodes_sb); 1219EXPORT_SYMBOL(sync_inodes_sb);