diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-03-22 07:23:41 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-03-24 21:17:51 -0400 |
commit | a66979abad090b2765a6c6790c9fdeab996833f2 (patch) | |
tree | e48b2d0fac8f96456286a503aeeb952620234961 /fs/fs-writeback.c | |
parent | 55fa6091d83160ca772fc37cebae45d42695a708 (diff) |
fs: move i_wb_list out from under inode_lock
Protect the inode writeback list with a new global lock
inode_wb_list_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the list.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 76 |
1 files changed, 44 insertions, 32 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5de56a2182bb..ed800656356b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
176 | } | 176 | } |
177 | 177 | ||
178 | /* | 178 | /* |
179 | * Remove the inode from the writeback list it is on. | ||
180 | */ | ||
181 | void inode_wb_list_del(struct inode *inode) | ||
182 | { | ||
183 | spin_lock(&inode_wb_list_lock); | ||
184 | list_del_init(&inode->i_wb_list); | ||
185 | spin_unlock(&inode_wb_list_lock); | ||
186 | } | ||
187 | |||
188 | |||
189 | /* | ||
179 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the | 190 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
180 | * furthest end of its superblock's dirty-inode list. | 191 | * furthest end of its superblock's dirty-inode list. |
181 | * | 192 | * |
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode) | |||
188 | { | 199 | { |
189 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 200 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
190 | 201 | ||
202 | assert_spin_locked(&inode_wb_list_lock); | ||
191 | if (!list_empty(&wb->b_dirty)) { | 203 | if (!list_empty(&wb->b_dirty)) { |
192 | struct inode *tail; | 204 | struct inode *tail; |
193 | 205 | ||
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode) | |||
205 | { | 217 | { |
206 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 218 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
207 | 219 | ||
220 | assert_spin_locked(&inode_wb_list_lock); | ||
208 | list_move(&inode->i_wb_list, &wb->b_more_io); | 221 | list_move(&inode->i_wb_list, &wb->b_more_io); |
209 | } | 222 | } |
210 | 223 | ||
211 | static void inode_sync_complete(struct inode *inode) | 224 | static void inode_sync_complete(struct inode *inode) |
212 | { | 225 | { |
213 | /* | 226 | /* |
214 | * Prevent speculative execution through spin_unlock(&inode_lock); | 227 | * Prevent speculative execution through |
228 | * spin_unlock(&inode_wb_list_lock); | ||
215 | */ | 229 | */ |
230 | |||
216 | smp_mb(); | 231 | smp_mb(); |
217 | wake_up_bit(&inode->i_state, __I_SYNC); | 232 | wake_up_bit(&inode->i_state, __I_SYNC); |
218 | } | 233 | } |
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
286 | */ | 301 | */ |
287 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 302 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
288 | { | 303 | { |
304 | assert_spin_locked(&inode_wb_list_lock); | ||
289 | list_splice_init(&wb->b_more_io, &wb->b_io); | 305 | list_splice_init(&wb->b_more_io, &wb->b_io); |
290 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 306 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
291 | } | 307 | } |
@@ -308,25 +324,23 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
308 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 324 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
309 | while (inode->i_state & I_SYNC) { | 325 | while (inode->i_state & I_SYNC) { |
310 | spin_unlock(&inode->i_lock); | 326 | spin_unlock(&inode->i_lock); |
311 | spin_unlock(&inode_lock); | 327 | spin_unlock(&inode_wb_list_lock); |
312 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 328 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
313 | spin_lock(&inode_lock); | 329 | spin_lock(&inode_wb_list_lock); |
314 | spin_lock(&inode->i_lock); | 330 | spin_lock(&inode->i_lock); |
315 | } | 331 | } |
316 | } | 332 | } |
317 | 333 | ||
318 | /* | 334 | /* |
319 | * Write out an inode's dirty pages. Called under inode_lock. Either the | 335 | * Write out an inode's dirty pages. Called under inode_wb_list_lock. Either |
320 | * caller has ref on the inode (either via __iget or via syscall against an fd) | 336 | * the caller has an active reference on the inode or the inode has I_WILL_FREE |
321 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | 337 | * set. |
322 | * | 338 | * |
323 | * If `wait' is set, wait on the writeout. | 339 | * If `wait' is set, wait on the writeout. |
324 | * | 340 | * |
325 | * The whole writeout design is quite complex and fragile. We want to avoid | 341 | * The whole writeout design is quite complex and fragile. We want to avoid |
326 | * starvation of particular inodes when others are being redirtied, prevent | 342 | * starvation of particular inodes when others are being redirtied, prevent |
327 | * livelocks, etc. | 343 | * livelocks, etc. |
328 | * | ||
329 | * Called under inode_lock. | ||
330 | */ | 344 | */ |
331 | static int | 345 | static int |
332 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 346 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
@@ -368,7 +382,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
368 | inode->i_state |= I_SYNC; | 382 | inode->i_state |= I_SYNC; |
369 | inode->i_state &= ~I_DIRTY_PAGES; | 383 | inode->i_state &= ~I_DIRTY_PAGES; |
370 | spin_unlock(&inode->i_lock); | 384 | spin_unlock(&inode->i_lock); |
371 | spin_unlock(&inode_lock); | 385 | spin_unlock(&inode_wb_list_lock); |
372 | 386 | ||
373 | ret = do_writepages(mapping, wbc); | 387 | ret = do_writepages(mapping, wbc); |
374 | 388 | ||
@@ -388,12 +402,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
388 | * due to delalloc, clear dirty metadata flags right before | 402 | * due to delalloc, clear dirty metadata flags right before |
389 | * write_inode() | 403 | * write_inode() |
390 | */ | 404 | */ |
391 | spin_lock(&inode_lock); | ||
392 | spin_lock(&inode->i_lock); | 405 | spin_lock(&inode->i_lock); |
393 | dirty = inode->i_state & I_DIRTY; | 406 | dirty = inode->i_state & I_DIRTY; |
394 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | 407 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); |
395 | spin_unlock(&inode->i_lock); | 408 | spin_unlock(&inode->i_lock); |
396 | spin_unlock(&inode_lock); | ||
397 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 409 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
398 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 410 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
399 | int err = write_inode(inode, wbc); | 411 | int err = write_inode(inode, wbc); |
@@ -401,7 +413,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
401 | ret = err; | 413 | ret = err; |
402 | } | 414 | } |
403 | 415 | ||
404 | spin_lock(&inode_lock); | 416 | spin_lock(&inode_wb_list_lock); |
405 | spin_lock(&inode->i_lock); | 417 | spin_lock(&inode->i_lock); |
406 | inode->i_state &= ~I_SYNC; | 418 | inode->i_state &= ~I_SYNC; |
407 | if (!(inode->i_state & I_FREEING)) { | 419 | if (!(inode->i_state & I_FREEING)) { |
@@ -543,10 +555,10 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
543 | */ | 555 | */ |
544 | redirty_tail(inode); | 556 | redirty_tail(inode); |
545 | } | 557 | } |
546 | spin_unlock(&inode_lock); | 558 | spin_unlock(&inode_wb_list_lock); |
547 | iput(inode); | 559 | iput(inode); |
548 | cond_resched(); | 560 | cond_resched(); |
549 | spin_lock(&inode_lock); | 561 | spin_lock(&inode_wb_list_lock); |
550 | if (wbc->nr_to_write <= 0) { | 562 | if (wbc->nr_to_write <= 0) { |
551 | wbc->more_io = 1; | 563 | wbc->more_io = 1; |
552 | return 1; | 564 | return 1; |
@@ -565,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
565 | 577 | ||
566 | if (!wbc->wb_start) | 578 | if (!wbc->wb_start) |
567 | wbc->wb_start = jiffies; /* livelock avoidance */ | 579 | wbc->wb_start = jiffies; /* livelock avoidance */ |
568 | spin_lock(&inode_lock); | 580 | spin_lock(&inode_wb_list_lock); |
569 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 581 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
570 | queue_io(wb, wbc->older_than_this); | 582 | queue_io(wb, wbc->older_than_this); |
571 | 583 | ||
@@ -583,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
583 | if (ret) | 595 | if (ret) |
584 | break; | 596 | break; |
585 | } | 597 | } |
586 | spin_unlock(&inode_lock); | 598 | spin_unlock(&inode_wb_list_lock); |
587 | /* Leave any unwritten inodes on b_io */ | 599 | /* Leave any unwritten inodes on b_io */ |
588 | } | 600 | } |
589 | 601 | ||
@@ -592,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
592 | { | 604 | { |
593 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 605 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
594 | 606 | ||
595 | spin_lock(&inode_lock); | 607 | spin_lock(&inode_wb_list_lock); |
596 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 608 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
597 | queue_io(wb, wbc->older_than_this); | 609 | queue_io(wb, wbc->older_than_this); |
598 | writeback_sb_inodes(sb, wb, wbc, true); | 610 | writeback_sb_inodes(sb, wb, wbc, true); |
599 | spin_unlock(&inode_lock); | 611 | spin_unlock(&inode_wb_list_lock); |
600 | } | 612 | } |
601 | 613 | ||
602 | /* | 614 | /* |
@@ -735,7 +747,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
735 | * become available for writeback. Otherwise | 747 | * become available for writeback. Otherwise |
736 | * we'll just busyloop. | 748 | * we'll just busyloop. |
737 | */ | 749 | */ |
738 | spin_lock(&inode_lock); | 750 | spin_lock(&inode_wb_list_lock); |
739 | if (!list_empty(&wb->b_more_io)) { | 751 | if (!list_empty(&wb->b_more_io)) { |
740 | inode = wb_inode(wb->b_more_io.prev); | 752 | inode = wb_inode(wb->b_more_io.prev); |
741 | trace_wbc_writeback_wait(&wbc, wb->bdi); | 753 | trace_wbc_writeback_wait(&wbc, wb->bdi); |
@@ -743,7 +755,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
743 | inode_wait_for_writeback(inode); | 755 | inode_wait_for_writeback(inode); |
744 | spin_unlock(&inode->i_lock); | 756 | spin_unlock(&inode->i_lock); |
745 | } | 757 | } |
746 | spin_unlock(&inode_lock); | 758 | spin_unlock(&inode_wb_list_lock); |
747 | } | 759 | } |
748 | 760 | ||
749 | return wrote; | 761 | return wrote; |
@@ -1009,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1009 | { | 1021 | { |
1010 | struct super_block *sb = inode->i_sb; | 1022 | struct super_block *sb = inode->i_sb; |
1011 | struct backing_dev_info *bdi = NULL; | 1023 | struct backing_dev_info *bdi = NULL; |
1012 | bool wakeup_bdi = false; | ||
1013 | 1024 | ||
1014 | /* | 1025 | /* |
1015 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 1026 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -1033,7 +1044,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1033 | if (unlikely(block_dump)) | 1044 | if (unlikely(block_dump)) |
1034 | block_dump___mark_inode_dirty(inode); | 1045 | block_dump___mark_inode_dirty(inode); |
1035 | 1046 | ||
1036 | spin_lock(&inode_lock); | ||
1037 | spin_lock(&inode->i_lock); | 1047 | spin_lock(&inode->i_lock); |
1038 | if ((inode->i_state & flags) != flags) { | 1048 | if ((inode->i_state & flags) != flags) { |
1039 | const int was_dirty = inode->i_state & I_DIRTY; | 1049 | const int was_dirty = inode->i_state & I_DIRTY; |
@@ -1059,12 +1069,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1059 | if (inode->i_state & I_FREEING) | 1069 | if (inode->i_state & I_FREEING) |
1060 | goto out_unlock_inode; | 1070 | goto out_unlock_inode; |
1061 | 1071 | ||
1062 | spin_unlock(&inode->i_lock); | ||
1063 | /* | 1072 | /* |
1064 | * If the inode was already on b_dirty/b_io/b_more_io, don't | 1073 | * If the inode was already on b_dirty/b_io/b_more_io, don't |
1065 | * reposition it (that would break b_dirty time-ordering). | 1074 | * reposition it (that would break b_dirty time-ordering). |
1066 | */ | 1075 | */ |
1067 | if (!was_dirty) { | 1076 | if (!was_dirty) { |
1077 | bool wakeup_bdi = false; | ||
1068 | bdi = inode_to_bdi(inode); | 1078 | bdi = inode_to_bdi(inode); |
1069 | 1079 | ||
1070 | if (bdi_cap_writeback_dirty(bdi)) { | 1080 | if (bdi_cap_writeback_dirty(bdi)) { |
@@ -1081,18 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1081 | wakeup_bdi = true; | 1091 | wakeup_bdi = true; |
1082 | } | 1092 | } |
1083 | 1093 | ||
1094 | spin_unlock(&inode->i_lock); | ||
1095 | spin_lock(&inode_wb_list_lock); | ||
1084 | inode->dirtied_when = jiffies; | 1096 | inode->dirtied_when = jiffies; |
1085 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1097 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1098 | spin_unlock(&inode_wb_list_lock); | ||
1099 | |||
1100 | if (wakeup_bdi) | ||
1101 | bdi_wakeup_thread_delayed(bdi); | ||
1102 | return; | ||
1086 | } | 1103 | } |
1087 | goto out; | ||
1088 | } | 1104 | } |
1089 | out_unlock_inode: | 1105 | out_unlock_inode: |
1090 | spin_unlock(&inode->i_lock); | 1106 | spin_unlock(&inode->i_lock); |
1091 | out: | ||
1092 | spin_unlock(&inode_lock); | ||
1093 | 1107 | ||
1094 | if (wakeup_bdi) | ||
1095 | bdi_wakeup_thread_delayed(bdi); | ||
1096 | } | 1108 | } |
1097 | EXPORT_SYMBOL(__mark_inode_dirty); | 1109 | EXPORT_SYMBOL(__mark_inode_dirty); |
1098 | 1110 | ||
@@ -1296,9 +1308,9 @@ int write_inode_now(struct inode *inode, int sync) | |||
1296 | wbc.nr_to_write = 0; | 1308 | wbc.nr_to_write = 0; |
1297 | 1309 | ||
1298 | might_sleep(); | 1310 | might_sleep(); |
1299 | spin_lock(&inode_lock); | 1311 | spin_lock(&inode_wb_list_lock); |
1300 | ret = writeback_single_inode(inode, &wbc); | 1312 | ret = writeback_single_inode(inode, &wbc); |
1301 | spin_unlock(&inode_lock); | 1313 | spin_unlock(&inode_wb_list_lock); |
1302 | if (sync) | 1314 | if (sync) |
1303 | inode_sync_wait(inode); | 1315 | inode_sync_wait(inode); |
1304 | return ret; | 1316 | return ret; |
@@ -1320,9 +1332,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1320 | { | 1332 | { |
1321 | int ret; | 1333 | int ret; |
1322 | 1334 | ||
1323 | spin_lock(&inode_lock); | 1335 | spin_lock(&inode_wb_list_lock); |
1324 | ret = writeback_single_inode(inode, wbc); | 1336 | ret = writeback_single_inode(inode, wbc); |
1325 | spin_unlock(&inode_lock); | 1337 | spin_unlock(&inode_wb_list_lock); |
1326 | return ret; | 1338 | return ret; |
1327 | } | 1339 | } |
1328 | EXPORT_SYMBOL(sync_inode); | 1340 | EXPORT_SYMBOL(sync_inode); |