diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-03-22 07:23:36 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-03-24 21:16:31 -0400 |
commit | 250df6ed274d767da844a5d9f05720b804240197 (patch) | |
tree | b74f49a86c4451d9e3e82f90e3f791163025be21 /fs/fs-writeback.c | |
parent | 3dc8fe4dca9cd3e4aa828ed36451e2bcfd2350da (diff) |
fs: protect inode->i_state with inode->i_lock
Protect inode state transitions and validity checks with the
inode->i_lock. This enables us to make inode state transitions
independently of the inode_lock and is the first step to peeling
away the inode_lock from the code.
This requires that __iget() is done atomically with i_state checks
during list traversals so that we don't race with another thread
marking the inode I_FREEING between the state check and grabbing the
reference.
Also remove the unlock_new_inode() memory barrier optimisation
required to avoid taking the inode_lock when clearing I_NEW.
Simplify the code by simply taking the inode->i_lock around the
state change and wakeup. Because the wakeup is no longer tricky,
remove the wake_up_inode() function and open code the wakeup where
necessary.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 44 |
1 files changed, 34 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59c6e4956786..efd1ebe879cc 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -306,10 +306,12 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
306 | wait_queue_head_t *wqh; | 306 | wait_queue_head_t *wqh; |
307 | 307 | ||
308 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 308 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
309 | while (inode->i_state & I_SYNC) { | 309 | while (inode->i_state & I_SYNC) { |
310 | spin_unlock(&inode->i_lock); | ||
310 | spin_unlock(&inode_lock); | 311 | spin_unlock(&inode_lock); |
311 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 312 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
312 | spin_lock(&inode_lock); | 313 | spin_lock(&inode_lock); |
314 | spin_lock(&inode->i_lock); | ||
313 | } | 315 | } |
314 | } | 316 | } |
315 | 317 | ||
@@ -333,6 +335,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
333 | unsigned dirty; | 335 | unsigned dirty; |
334 | int ret; | 336 | int ret; |
335 | 337 | ||
338 | spin_lock(&inode->i_lock); | ||
336 | if (!atomic_read(&inode->i_count)) | 339 | if (!atomic_read(&inode->i_count)) |
337 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | 340 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); |
338 | else | 341 | else |
@@ -348,6 +351,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
348 | * completed a full scan of b_io. | 351 | * completed a full scan of b_io. |
349 | */ | 352 | */ |
350 | if (wbc->sync_mode != WB_SYNC_ALL) { | 353 | if (wbc->sync_mode != WB_SYNC_ALL) { |
354 | spin_unlock(&inode->i_lock); | ||
351 | requeue_io(inode); | 355 | requeue_io(inode); |
352 | return 0; | 356 | return 0; |
353 | } | 357 | } |
@@ -363,6 +367,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
363 | /* Set I_SYNC, reset I_DIRTY_PAGES */ | 367 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
364 | inode->i_state |= I_SYNC; | 368 | inode->i_state |= I_SYNC; |
365 | inode->i_state &= ~I_DIRTY_PAGES; | 369 | inode->i_state &= ~I_DIRTY_PAGES; |
370 | spin_unlock(&inode->i_lock); | ||
366 | spin_unlock(&inode_lock); | 371 | spin_unlock(&inode_lock); |
367 | 372 | ||
368 | ret = do_writepages(mapping, wbc); | 373 | ret = do_writepages(mapping, wbc); |
@@ -384,8 +389,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
384 | * write_inode() | 389 | * write_inode() |
385 | */ | 390 | */ |
386 | spin_lock(&inode_lock); | 391 | spin_lock(&inode_lock); |
392 | spin_lock(&inode->i_lock); | ||
387 | dirty = inode->i_state & I_DIRTY; | 393 | dirty = inode->i_state & I_DIRTY; |
388 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | 394 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); |
395 | spin_unlock(&inode->i_lock); | ||
389 | spin_unlock(&inode_lock); | 396 | spin_unlock(&inode_lock); |
390 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 397 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
391 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 398 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
@@ -395,6 +402,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
395 | } | 402 | } |
396 | 403 | ||
397 | spin_lock(&inode_lock); | 404 | spin_lock(&inode_lock); |
405 | spin_lock(&inode->i_lock); | ||
398 | inode->i_state &= ~I_SYNC; | 406 | inode->i_state &= ~I_SYNC; |
399 | if (!(inode->i_state & I_FREEING)) { | 407 | if (!(inode->i_state & I_FREEING)) { |
400 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 408 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
@@ -436,6 +444,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
436 | } | 444 | } |
437 | } | 445 | } |
438 | inode_sync_complete(inode); | 446 | inode_sync_complete(inode); |
447 | spin_unlock(&inode->i_lock); | ||
439 | return ret; | 448 | return ret; |
440 | } | 449 | } |
441 | 450 | ||
@@ -506,7 +515,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
506 | * kind does not need peridic writeout yet, and for the latter | 515 | * kind does not need peridic writeout yet, and for the latter |
507 | * kind writeout is handled by the freer. | 516 | * kind writeout is handled by the freer. |
508 | */ | 517 | */ |
518 | spin_lock(&inode->i_lock); | ||
509 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | 519 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
520 | spin_unlock(&inode->i_lock); | ||
510 | requeue_io(inode); | 521 | requeue_io(inode); |
511 | continue; | 522 | continue; |
512 | } | 523 | } |
@@ -515,10 +526,14 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
515 | * Was this inode dirtied after sync_sb_inodes was called? | 526 | * Was this inode dirtied after sync_sb_inodes was called? |
516 | * This keeps sync from extra jobs and livelock. | 527 | * This keeps sync from extra jobs and livelock. |
517 | */ | 528 | */ |
518 | if (inode_dirtied_after(inode, wbc->wb_start)) | 529 | if (inode_dirtied_after(inode, wbc->wb_start)) { |
530 | spin_unlock(&inode->i_lock); | ||
519 | return 1; | 531 | return 1; |
532 | } | ||
520 | 533 | ||
521 | __iget(inode); | 534 | __iget(inode); |
535 | spin_unlock(&inode->i_lock); | ||
536 | |||
522 | pages_skipped = wbc->pages_skipped; | 537 | pages_skipped = wbc->pages_skipped; |
523 | writeback_single_inode(inode, wbc); | 538 | writeback_single_inode(inode, wbc); |
524 | if (wbc->pages_skipped != pages_skipped) { | 539 | if (wbc->pages_skipped != pages_skipped) { |
@@ -724,7 +739,9 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
724 | if (!list_empty(&wb->b_more_io)) { | 739 | if (!list_empty(&wb->b_more_io)) { |
725 | inode = wb_inode(wb->b_more_io.prev); | 740 | inode = wb_inode(wb->b_more_io.prev); |
726 | trace_wbc_writeback_wait(&wbc, wb->bdi); | 741 | trace_wbc_writeback_wait(&wbc, wb->bdi); |
742 | spin_lock(&inode->i_lock); | ||
727 | inode_wait_for_writeback(inode); | 743 | inode_wait_for_writeback(inode); |
744 | spin_unlock(&inode->i_lock); | ||
728 | } | 745 | } |
729 | spin_unlock(&inode_lock); | 746 | spin_unlock(&inode_lock); |
730 | } | 747 | } |
@@ -1017,6 +1034,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1017 | block_dump___mark_inode_dirty(inode); | 1034 | block_dump___mark_inode_dirty(inode); |
1018 | 1035 | ||
1019 | spin_lock(&inode_lock); | 1036 | spin_lock(&inode_lock); |
1037 | spin_lock(&inode->i_lock); | ||
1020 | if ((inode->i_state & flags) != flags) { | 1038 | if ((inode->i_state & flags) != flags) { |
1021 | const int was_dirty = inode->i_state & I_DIRTY; | 1039 | const int was_dirty = inode->i_state & I_DIRTY; |
1022 | 1040 | ||
@@ -1028,7 +1046,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1028 | * superblock list, based upon its state. | 1046 | * superblock list, based upon its state. |
1029 | */ | 1047 | */ |
1030 | if (inode->i_state & I_SYNC) | 1048 | if (inode->i_state & I_SYNC) |
1031 | goto out; | 1049 | goto out_unlock_inode; |
1032 | 1050 | ||
1033 | /* | 1051 | /* |
1034 | * Only add valid (hashed) inodes to the superblock's | 1052 | * Only add valid (hashed) inodes to the superblock's |
@@ -1036,11 +1054,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1036 | */ | 1054 | */ |
1037 | if (!S_ISBLK(inode->i_mode)) { | 1055 | if (!S_ISBLK(inode->i_mode)) { |
1038 | if (inode_unhashed(inode)) | 1056 | if (inode_unhashed(inode)) |
1039 | goto out; | 1057 | goto out_unlock_inode; |
1040 | } | 1058 | } |
1041 | if (inode->i_state & I_FREEING) | 1059 | if (inode->i_state & I_FREEING) |
1042 | goto out; | 1060 | goto out_unlock_inode; |
1043 | 1061 | ||
1062 | spin_unlock(&inode->i_lock); | ||
1044 | /* | 1063 | /* |
1045 | * If the inode was already on b_dirty/b_io/b_more_io, don't | 1064 | * If the inode was already on b_dirty/b_io/b_more_io, don't |
1046 | * reposition it (that would break b_dirty time-ordering). | 1065 | * reposition it (that would break b_dirty time-ordering). |
@@ -1065,7 +1084,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1065 | inode->dirtied_when = jiffies; | 1084 | inode->dirtied_when = jiffies; |
1066 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1085 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1067 | } | 1086 | } |
1087 | goto out; | ||
1068 | } | 1088 | } |
1089 | out_unlock_inode: | ||
1090 | spin_unlock(&inode->i_lock); | ||
1069 | out: | 1091 | out: |
1070 | spin_unlock(&inode_lock); | 1092 | spin_unlock(&inode_lock); |
1071 | 1093 | ||
@@ -1111,14 +1133,16 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1111 | * we still have to wait for that writeout. | 1133 | * we still have to wait for that writeout. |
1112 | */ | 1134 | */ |
1113 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1135 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1114 | struct address_space *mapping; | 1136 | struct address_space *mapping = inode->i_mapping; |
1115 | 1137 | ||
1116 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 1138 | spin_lock(&inode->i_lock); |
1117 | continue; | 1139 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || |
1118 | mapping = inode->i_mapping; | 1140 | (mapping->nrpages == 0)) { |
1119 | if (mapping->nrpages == 0) | 1141 | spin_unlock(&inode->i_lock); |
1120 | continue; | 1142 | continue; |
1143 | } | ||
1121 | __iget(inode); | 1144 | __iget(inode); |
1145 | spin_unlock(&inode->i_lock); | ||
1122 | spin_unlock(&inode_lock); | 1146 | spin_unlock(&inode_lock); |
1123 | /* | 1147 | /* |
1124 | * We hold a reference to 'inode' so it couldn't have | 1148 | * We hold a reference to 'inode' so it couldn't have |