aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-03-22 07:23:41 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-03-24 21:17:51 -0400
commita66979abad090b2765a6c6790c9fdeab996833f2 (patch)
treee48b2d0fac8f96456286a503aeeb952620234961 /fs/fs-writeback.c
parent55fa6091d83160ca772fc37cebae45d42695a708 (diff)
fs: move i_wb_list out from under inode_lock
Protect the inode writeback list with a new global lock inode_wb_list_lock and use it to protect the list manipulations and traversals. This lock replaces the inode_lock as the inodes on the list can be validity checked while holding the inode->i_lock and hence the inode_lock is no longer needed to protect the list. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c76
1 files changed, 44 insertions, 32 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5de56a2182bb..ed800656356b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
176} 176}
177 177
178/* 178/*
179 * Remove the inode from the writeback list it is on.
180 */
181void inode_wb_list_del(struct inode *inode)
182{
183 spin_lock(&inode_wb_list_lock);
184 list_del_init(&inode->i_wb_list);
185 spin_unlock(&inode_wb_list_lock);
186}
187
188
189/*
179 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 190 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
180 * furthest end of its superblock's dirty-inode list. 191 * furthest end of its superblock's dirty-inode list.
181 * 192 *
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)
188{ 199{
189 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 200 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
190 201
202 assert_spin_locked(&inode_wb_list_lock);
191 if (!list_empty(&wb->b_dirty)) { 203 if (!list_empty(&wb->b_dirty)) {
192 struct inode *tail; 204 struct inode *tail;
193 205
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)
205{ 217{
206 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 218 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
207 219
220 assert_spin_locked(&inode_wb_list_lock);
208 list_move(&inode->i_wb_list, &wb->b_more_io); 221 list_move(&inode->i_wb_list, &wb->b_more_io);
209} 222}
210 223
211static void inode_sync_complete(struct inode *inode) 224static void inode_sync_complete(struct inode *inode)
212{ 225{
213 /* 226 /*
214 * Prevent speculative execution through spin_unlock(&inode_lock); 227 * Prevent speculative execution through
228 * spin_unlock(&inode_wb_list_lock);
215 */ 229 */
230
216 smp_mb(); 231 smp_mb();
217 wake_up_bit(&inode->i_state, __I_SYNC); 232 wake_up_bit(&inode->i_state, __I_SYNC);
218} 233}
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
286 */ 301 */
287static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 302static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
288{ 303{
304 assert_spin_locked(&inode_wb_list_lock);
289 list_splice_init(&wb->b_more_io, &wb->b_io); 305 list_splice_init(&wb->b_more_io, &wb->b_io);
290 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 306 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
291} 307}
@@ -308,25 +324,23 @@ static void inode_wait_for_writeback(struct inode *inode)
308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 324 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
309 while (inode->i_state & I_SYNC) { 325 while (inode->i_state & I_SYNC) {
310 spin_unlock(&inode->i_lock); 326 spin_unlock(&inode->i_lock);
311 spin_unlock(&inode_lock); 327 spin_unlock(&inode_wb_list_lock);
312 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 328 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
313 spin_lock(&inode_lock); 329 spin_lock(&inode_wb_list_lock);
314 spin_lock(&inode->i_lock); 330 spin_lock(&inode->i_lock);
315 } 331 }
316} 332}
317 333
318/* 334/*
319 * Write out an inode's dirty pages. Called under inode_lock. Either the 335 * Write out an inode's dirty pages. Called under inode_wb_list_lock. Either
320 * caller has ref on the inode (either via __iget or via syscall against an fd) 336 * the caller has an active reference on the inode or the inode has I_WILL_FREE
321 * or the inode has I_WILL_FREE set (via generic_forget_inode) 337 * set.
322 * 338 *
323 * If `wait' is set, wait on the writeout. 339 * If `wait' is set, wait on the writeout.
324 * 340 *
325 * The whole writeout design is quite complex and fragile. We want to avoid 341 * The whole writeout design is quite complex and fragile. We want to avoid
326 * starvation of particular inodes when others are being redirtied, prevent 342 * starvation of particular inodes when others are being redirtied, prevent
327 * livelocks, etc. 343 * livelocks, etc.
328 *
329 * Called under inode_lock.
330 */ 344 */
331static int 345static int
332writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 346writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -368,7 +382,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
368 inode->i_state |= I_SYNC; 382 inode->i_state |= I_SYNC;
369 inode->i_state &= ~I_DIRTY_PAGES; 383 inode->i_state &= ~I_DIRTY_PAGES;
370 spin_unlock(&inode->i_lock); 384 spin_unlock(&inode->i_lock);
371 spin_unlock(&inode_lock); 385 spin_unlock(&inode_wb_list_lock);
372 386
373 ret = do_writepages(mapping, wbc); 387 ret = do_writepages(mapping, wbc);
374 388
@@ -388,12 +402,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
388 * due to delalloc, clear dirty metadata flags right before 402 * due to delalloc, clear dirty metadata flags right before
389 * write_inode() 403 * write_inode()
390 */ 404 */
391 spin_lock(&inode_lock);
392 spin_lock(&inode->i_lock); 405 spin_lock(&inode->i_lock);
393 dirty = inode->i_state & I_DIRTY; 406 dirty = inode->i_state & I_DIRTY;
394 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 407 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
395 spin_unlock(&inode->i_lock); 408 spin_unlock(&inode->i_lock);
396 spin_unlock(&inode_lock);
397 /* Don't write the inode if only I_DIRTY_PAGES was set */ 409 /* Don't write the inode if only I_DIRTY_PAGES was set */
398 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 410 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
399 int err = write_inode(inode, wbc); 411 int err = write_inode(inode, wbc);
@@ -401,7 +413,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
401 ret = err; 413 ret = err;
402 } 414 }
403 415
404 spin_lock(&inode_lock); 416 spin_lock(&inode_wb_list_lock);
405 spin_lock(&inode->i_lock); 417 spin_lock(&inode->i_lock);
406 inode->i_state &= ~I_SYNC; 418 inode->i_state &= ~I_SYNC;
407 if (!(inode->i_state & I_FREEING)) { 419 if (!(inode->i_state & I_FREEING)) {
@@ -543,10 +555,10 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
543 */ 555 */
544 redirty_tail(inode); 556 redirty_tail(inode);
545 } 557 }
546 spin_unlock(&inode_lock); 558 spin_unlock(&inode_wb_list_lock);
547 iput(inode); 559 iput(inode);
548 cond_resched(); 560 cond_resched();
549 spin_lock(&inode_lock); 561 spin_lock(&inode_wb_list_lock);
550 if (wbc->nr_to_write <= 0) { 562 if (wbc->nr_to_write <= 0) {
551 wbc->more_io = 1; 563 wbc->more_io = 1;
552 return 1; 564 return 1;
@@ -565,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
565 577
566 if (!wbc->wb_start) 578 if (!wbc->wb_start)
567 wbc->wb_start = jiffies; /* livelock avoidance */ 579 wbc->wb_start = jiffies; /* livelock avoidance */
568 spin_lock(&inode_lock); 580 spin_lock(&inode_wb_list_lock);
569 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 581 if (!wbc->for_kupdate || list_empty(&wb->b_io))
570 queue_io(wb, wbc->older_than_this); 582 queue_io(wb, wbc->older_than_this);
571 583
@@ -583,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
583 if (ret) 595 if (ret)
584 break; 596 break;
585 } 597 }
586 spin_unlock(&inode_lock); 598 spin_unlock(&inode_wb_list_lock);
587 /* Leave any unwritten inodes on b_io */ 599 /* Leave any unwritten inodes on b_io */
588} 600}
589 601
@@ -592,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
592{ 604{
593 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 605 WARN_ON(!rwsem_is_locked(&sb->s_umount));
594 606
595 spin_lock(&inode_lock); 607 spin_lock(&inode_wb_list_lock);
596 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 608 if (!wbc->for_kupdate || list_empty(&wb->b_io))
597 queue_io(wb, wbc->older_than_this); 609 queue_io(wb, wbc->older_than_this);
598 writeback_sb_inodes(sb, wb, wbc, true); 610 writeback_sb_inodes(sb, wb, wbc, true);
599 spin_unlock(&inode_lock); 611 spin_unlock(&inode_wb_list_lock);
600} 612}
601 613
602/* 614/*
@@ -735,7 +747,7 @@ static long wb_writeback(struct bdi_writeback *wb,
735 * become available for writeback. Otherwise 747 * become available for writeback. Otherwise
736 * we'll just busyloop. 748 * we'll just busyloop.
737 */ 749 */
738 spin_lock(&inode_lock); 750 spin_lock(&inode_wb_list_lock);
739 if (!list_empty(&wb->b_more_io)) { 751 if (!list_empty(&wb->b_more_io)) {
740 inode = wb_inode(wb->b_more_io.prev); 752 inode = wb_inode(wb->b_more_io.prev);
741 trace_wbc_writeback_wait(&wbc, wb->bdi); 753 trace_wbc_writeback_wait(&wbc, wb->bdi);
@@ -743,7 +755,7 @@ static long wb_writeback(struct bdi_writeback *wb,
743 inode_wait_for_writeback(inode); 755 inode_wait_for_writeback(inode);
744 spin_unlock(&inode->i_lock); 756 spin_unlock(&inode->i_lock);
745 } 757 }
746 spin_unlock(&inode_lock); 758 spin_unlock(&inode_wb_list_lock);
747 } 759 }
748 760
749 return wrote; 761 return wrote;
@@ -1009,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1009{ 1021{
1010 struct super_block *sb = inode->i_sb; 1022 struct super_block *sb = inode->i_sb;
1011 struct backing_dev_info *bdi = NULL; 1023 struct backing_dev_info *bdi = NULL;
1012 bool wakeup_bdi = false;
1013 1024
1014 /* 1025 /*
1015 * Don't do this for I_DIRTY_PAGES - that doesn't actually 1026 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -1033,7 +1044,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1033 if (unlikely(block_dump)) 1044 if (unlikely(block_dump))
1034 block_dump___mark_inode_dirty(inode); 1045 block_dump___mark_inode_dirty(inode);
1035 1046
1036 spin_lock(&inode_lock);
1037 spin_lock(&inode->i_lock); 1047 spin_lock(&inode->i_lock);
1038 if ((inode->i_state & flags) != flags) { 1048 if ((inode->i_state & flags) != flags) {
1039 const int was_dirty = inode->i_state & I_DIRTY; 1049 const int was_dirty = inode->i_state & I_DIRTY;
@@ -1059,12 +1069,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1059 if (inode->i_state & I_FREEING) 1069 if (inode->i_state & I_FREEING)
1060 goto out_unlock_inode; 1070 goto out_unlock_inode;
1061 1071
1062 spin_unlock(&inode->i_lock);
1063 /* 1072 /*
1064 * If the inode was already on b_dirty/b_io/b_more_io, don't 1073 * If the inode was already on b_dirty/b_io/b_more_io, don't
1065 * reposition it (that would break b_dirty time-ordering). 1074 * reposition it (that would break b_dirty time-ordering).
1066 */ 1075 */
1067 if (!was_dirty) { 1076 if (!was_dirty) {
1077 bool wakeup_bdi = false;
1068 bdi = inode_to_bdi(inode); 1078 bdi = inode_to_bdi(inode);
1069 1079
1070 if (bdi_cap_writeback_dirty(bdi)) { 1080 if (bdi_cap_writeback_dirty(bdi)) {
@@ -1081,18 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1081 wakeup_bdi = true; 1091 wakeup_bdi = true;
1082 } 1092 }
1083 1093
1094 spin_unlock(&inode->i_lock);
1095 spin_lock(&inode_wb_list_lock);
1084 inode->dirtied_when = jiffies; 1096 inode->dirtied_when = jiffies;
1085 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1097 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1098 spin_unlock(&inode_wb_list_lock);
1099
1100 if (wakeup_bdi)
1101 bdi_wakeup_thread_delayed(bdi);
1102 return;
1086 } 1103 }
1087 goto out;
1088 } 1104 }
1089out_unlock_inode: 1105out_unlock_inode:
1090 spin_unlock(&inode->i_lock); 1106 spin_unlock(&inode->i_lock);
1091out:
1092 spin_unlock(&inode_lock);
1093 1107
1094 if (wakeup_bdi)
1095 bdi_wakeup_thread_delayed(bdi);
1096} 1108}
1097EXPORT_SYMBOL(__mark_inode_dirty); 1109EXPORT_SYMBOL(__mark_inode_dirty);
1098 1110
@@ -1296,9 +1308,9 @@ int write_inode_now(struct inode *inode, int sync)
1296 wbc.nr_to_write = 0; 1308 wbc.nr_to_write = 0;
1297 1309
1298 might_sleep(); 1310 might_sleep();
1299 spin_lock(&inode_lock); 1311 spin_lock(&inode_wb_list_lock);
1300 ret = writeback_single_inode(inode, &wbc); 1312 ret = writeback_single_inode(inode, &wbc);
1301 spin_unlock(&inode_lock); 1313 spin_unlock(&inode_wb_list_lock);
1302 if (sync) 1314 if (sync)
1303 inode_sync_wait(inode); 1315 inode_sync_wait(inode);
1304 return ret; 1316 return ret;
@@ -1320,9 +1332,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1320{ 1332{
1321 int ret; 1333 int ret;
1322 1334
1323 spin_lock(&inode_lock); 1335 spin_lock(&inode_wb_list_lock);
1324 ret = writeback_single_inode(inode, wbc); 1336 ret = writeback_single_inode(inode, wbc);
1325 spin_unlock(&inode_lock); 1337 spin_unlock(&inode_wb_list_lock);
1326 return ret; 1338 return ret;
1327} 1339}
1328EXPORT_SYMBOL(sync_inode); 1340EXPORT_SYMBOL(sync_inode);