aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-03-22 07:23:36 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-03-24 21:16:31 -0400
commit250df6ed274d767da844a5d9f05720b804240197 (patch)
treeb74f49a86c4451d9e3e82f90e3f791163025be21 /fs/fs-writeback.c
parent3dc8fe4dca9cd3e4aa828ed36451e2bcfd2350da (diff)
fs: protect inode->i_state with inode->i_lock
Protect inode state transitions and validity checks with the inode->i_lock. This enables us to make inode state transitions independently of the inode_lock and is the first step to peeling away the inode_lock from the code. This requires that __iget() is done atomically with i_state checks during list traversals so that we don't race with another thread marking the inode I_FREEING between the state check and grabbing the reference. Also remove the unlock_new_inode() memory barrier optimisation required to avoid taking the inode_lock when clearing I_NEW. Simplify the code by simply taking the inode->i_lock around the state change and wakeup. Because the wakeup is no longer tricky, remove the wake_up_inode() function and open code the wakeup where necessary. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c44
1 files changed, 34 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59c6e4956786..efd1ebe879cc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -306,10 +306,12 @@ static void inode_wait_for_writeback(struct inode *inode)
306 wait_queue_head_t *wqh; 306 wait_queue_head_t *wqh;
307 307
308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
309 while (inode->i_state & I_SYNC) { 309 while (inode->i_state & I_SYNC) {
310 spin_unlock(&inode->i_lock);
310 spin_unlock(&inode_lock); 311 spin_unlock(&inode_lock);
311 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 312 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
312 spin_lock(&inode_lock); 313 spin_lock(&inode_lock);
314 spin_lock(&inode->i_lock);
313 } 315 }
314} 316}
315 317
@@ -333,6 +335,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
333 unsigned dirty; 335 unsigned dirty;
334 int ret; 336 int ret;
335 337
338 spin_lock(&inode->i_lock);
336 if (!atomic_read(&inode->i_count)) 339 if (!atomic_read(&inode->i_count))
337 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 340 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
338 else 341 else
@@ -348,6 +351,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
348 * completed a full scan of b_io. 351 * completed a full scan of b_io.
349 */ 352 */
350 if (wbc->sync_mode != WB_SYNC_ALL) { 353 if (wbc->sync_mode != WB_SYNC_ALL) {
354 spin_unlock(&inode->i_lock);
351 requeue_io(inode); 355 requeue_io(inode);
352 return 0; 356 return 0;
353 } 357 }
@@ -363,6 +367,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
363 /* Set I_SYNC, reset I_DIRTY_PAGES */ 367 /* Set I_SYNC, reset I_DIRTY_PAGES */
364 inode->i_state |= I_SYNC; 368 inode->i_state |= I_SYNC;
365 inode->i_state &= ~I_DIRTY_PAGES; 369 inode->i_state &= ~I_DIRTY_PAGES;
370 spin_unlock(&inode->i_lock);
366 spin_unlock(&inode_lock); 371 spin_unlock(&inode_lock);
367 372
368 ret = do_writepages(mapping, wbc); 373 ret = do_writepages(mapping, wbc);
@@ -384,8 +389,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
384 * write_inode() 389 * write_inode()
385 */ 390 */
386 spin_lock(&inode_lock); 391 spin_lock(&inode_lock);
392 spin_lock(&inode->i_lock);
387 dirty = inode->i_state & I_DIRTY; 393 dirty = inode->i_state & I_DIRTY;
388 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 394 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
395 spin_unlock(&inode->i_lock);
389 spin_unlock(&inode_lock); 396 spin_unlock(&inode_lock);
390 /* Don't write the inode if only I_DIRTY_PAGES was set */ 397 /* Don't write the inode if only I_DIRTY_PAGES was set */
391 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 398 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
@@ -395,6 +402,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
395 } 402 }
396 403
397 spin_lock(&inode_lock); 404 spin_lock(&inode_lock);
405 spin_lock(&inode->i_lock);
398 inode->i_state &= ~I_SYNC; 406 inode->i_state &= ~I_SYNC;
399 if (!(inode->i_state & I_FREEING)) { 407 if (!(inode->i_state & I_FREEING)) {
400 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 408 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -436,6 +444,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
436 } 444 }
437 } 445 }
438 inode_sync_complete(inode); 446 inode_sync_complete(inode);
447 spin_unlock(&inode->i_lock);
439 return ret; 448 return ret;
440} 449}
441 450
@@ -506,7 +515,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
506 * kind does not need peridic writeout yet, and for the latter 515 * kind does not need peridic writeout yet, and for the latter
507 * kind writeout is handled by the freer. 516 * kind writeout is handled by the freer.
508 */ 517 */
518 spin_lock(&inode->i_lock);
509 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 519 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
520 spin_unlock(&inode->i_lock);
510 requeue_io(inode); 521 requeue_io(inode);
511 continue; 522 continue;
512 } 523 }
@@ -515,10 +526,14 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
515 * Was this inode dirtied after sync_sb_inodes was called? 526 * Was this inode dirtied after sync_sb_inodes was called?
516 * This keeps sync from extra jobs and livelock. 527 * This keeps sync from extra jobs and livelock.
517 */ 528 */
518 if (inode_dirtied_after(inode, wbc->wb_start)) 529 if (inode_dirtied_after(inode, wbc->wb_start)) {
530 spin_unlock(&inode->i_lock);
519 return 1; 531 return 1;
532 }
520 533
521 __iget(inode); 534 __iget(inode);
535 spin_unlock(&inode->i_lock);
536
522 pages_skipped = wbc->pages_skipped; 537 pages_skipped = wbc->pages_skipped;
523 writeback_single_inode(inode, wbc); 538 writeback_single_inode(inode, wbc);
524 if (wbc->pages_skipped != pages_skipped) { 539 if (wbc->pages_skipped != pages_skipped) {
@@ -724,7 +739,9 @@ static long wb_writeback(struct bdi_writeback *wb,
724 if (!list_empty(&wb->b_more_io)) { 739 if (!list_empty(&wb->b_more_io)) {
725 inode = wb_inode(wb->b_more_io.prev); 740 inode = wb_inode(wb->b_more_io.prev);
726 trace_wbc_writeback_wait(&wbc, wb->bdi); 741 trace_wbc_writeback_wait(&wbc, wb->bdi);
742 spin_lock(&inode->i_lock);
727 inode_wait_for_writeback(inode); 743 inode_wait_for_writeback(inode);
744 spin_unlock(&inode->i_lock);
728 } 745 }
729 spin_unlock(&inode_lock); 746 spin_unlock(&inode_lock);
730 } 747 }
@@ -1017,6 +1034,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1017 block_dump___mark_inode_dirty(inode); 1034 block_dump___mark_inode_dirty(inode);
1018 1035
1019 spin_lock(&inode_lock); 1036 spin_lock(&inode_lock);
1037 spin_lock(&inode->i_lock);
1020 if ((inode->i_state & flags) != flags) { 1038 if ((inode->i_state & flags) != flags) {
1021 const int was_dirty = inode->i_state & I_DIRTY; 1039 const int was_dirty = inode->i_state & I_DIRTY;
1022 1040
@@ -1028,7 +1046,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1028 * superblock list, based upon its state. 1046 * superblock list, based upon its state.
1029 */ 1047 */
1030 if (inode->i_state & I_SYNC) 1048 if (inode->i_state & I_SYNC)
1031 goto out; 1049 goto out_unlock_inode;
1032 1050
1033 /* 1051 /*
1034 * Only add valid (hashed) inodes to the superblock's 1052 * Only add valid (hashed) inodes to the superblock's
@@ -1036,11 +1054,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1036 */ 1054 */
1037 if (!S_ISBLK(inode->i_mode)) { 1055 if (!S_ISBLK(inode->i_mode)) {
1038 if (inode_unhashed(inode)) 1056 if (inode_unhashed(inode))
1039 goto out; 1057 goto out_unlock_inode;
1040 } 1058 }
1041 if (inode->i_state & I_FREEING) 1059 if (inode->i_state & I_FREEING)
1042 goto out; 1060 goto out_unlock_inode;
1043 1061
1062 spin_unlock(&inode->i_lock);
1044 /* 1063 /*
1045 * If the inode was already on b_dirty/b_io/b_more_io, don't 1064 * If the inode was already on b_dirty/b_io/b_more_io, don't
1046 * reposition it (that would break b_dirty time-ordering). 1065 * reposition it (that would break b_dirty time-ordering).
@@ -1065,7 +1084,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1065 inode->dirtied_when = jiffies; 1084 inode->dirtied_when = jiffies;
1066 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1085 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1067 } 1086 }
1087 goto out;
1068 } 1088 }
1089out_unlock_inode:
1090 spin_unlock(&inode->i_lock);
1069out: 1091out:
1070 spin_unlock(&inode_lock); 1092 spin_unlock(&inode_lock);
1071 1093
@@ -1111,14 +1133,16 @@ static void wait_sb_inodes(struct super_block *sb)
1111 * we still have to wait for that writeout. 1133 * we still have to wait for that writeout.
1112 */ 1134 */
1113 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1135 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1114 struct address_space *mapping; 1136 struct address_space *mapping = inode->i_mapping;
1115 1137
1116 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 1138 spin_lock(&inode->i_lock);
1117 continue; 1139 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
1118 mapping = inode->i_mapping; 1140 (mapping->nrpages == 0)) {
1119 if (mapping->nrpages == 0) 1141 spin_unlock(&inode->i_lock);
1120 continue; 1142 continue;
1143 }
1121 __iget(inode); 1144 __iget(inode);
1145 spin_unlock(&inode->i_lock);
1122 spin_unlock(&inode_lock); 1146 spin_unlock(&inode_lock);
1123 /* 1147 /*
1124 * We hold a reference to 'inode' so it couldn't have 1148 * We hold a reference to 'inode' so it couldn't have