diff options
-rw-r--r-- | fs/fs-writeback.c | 66 | ||||
-rw-r--r-- | fs/inode.c | 8 | ||||
-rw-r--r-- | include/linux/fs.h | 7 | ||||
-rw-r--r-- | include/linux/writeback.h | 7 |
4 files changed, 65 insertions, 23 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5f2c68289610..8d2fb8c88cf3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -326,9 +326,12 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) | |||
326 | } | 326 | } |
327 | 327 | ||
328 | /* | 328 | /* |
329 | * Wait for writeback on an inode to complete. | 329 | * Wait for writeback on an inode to complete. Called with i_lock held. |
330 | * Caller must make sure inode cannot go away when we drop i_lock. | ||
330 | */ | 331 | */ |
331 | static void inode_wait_for_writeback(struct inode *inode) | 332 | static void __inode_wait_for_writeback(struct inode *inode) |
333 | __releases(inode->i_lock) | ||
334 | __acquires(inode->i_lock) | ||
332 | { | 335 | { |
333 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); | 336 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); |
334 | wait_queue_head_t *wqh; | 337 | wait_queue_head_t *wqh; |
@@ -342,6 +345,36 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
342 | } | 345 | } |
343 | 346 | ||
344 | /* | 347 | /* |
348 | * Wait for writeback on an inode to complete. Caller must have inode pinned. | ||
349 | */ | ||
350 | void inode_wait_for_writeback(struct inode *inode) | ||
351 | { | ||
352 | spin_lock(&inode->i_lock); | ||
353 | __inode_wait_for_writeback(inode); | ||
354 | spin_unlock(&inode->i_lock); | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * Sleep until I_SYNC is cleared. This function must be called with i_lock | ||
359 | * held and drops it. It is aimed for callers not holding any inode reference | ||
360 | * so once i_lock is dropped, inode can go away. | ||
361 | */ | ||
362 | static void inode_sleep_on_writeback(struct inode *inode) | ||
363 | __releases(inode->i_lock) | ||
364 | { | ||
365 | DEFINE_WAIT(wait); | ||
366 | wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | ||
367 | int sleep; | ||
368 | |||
369 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); | ||
370 | sleep = inode->i_state & I_SYNC; | ||
371 | spin_unlock(&inode->i_lock); | ||
372 | if (sleep) | ||
373 | schedule(); | ||
374 | finish_wait(wqh, &wait); | ||
375 | } | ||
376 | |||
377 | /* | ||
345 | * Find proper writeback list for the inode depending on its current state and | 378 | * Find proper writeback list for the inode depending on its current state and |
346 | * possibly also change of its state while we were doing writeback. Here we | 379 | * possibly also change of its state while we were doing writeback. Here we |
347 | * handle things such as livelock prevention or fairness of writeback among | 380 | * handle things such as livelock prevention or fairness of writeback among |
@@ -479,9 +512,11 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
479 | if (wbc->sync_mode != WB_SYNC_ALL) | 512 | if (wbc->sync_mode != WB_SYNC_ALL) |
480 | goto out; | 513 | goto out; |
481 | /* | 514 | /* |
482 | * It's a data-integrity sync. We must wait. | 515 | * It's a data-integrity sync. We must wait. Since callers hold |
516 | * inode reference or inode has I_WILL_FREE set, it cannot go | ||
517 | * away under us. | ||
483 | */ | 518 | */ |
484 | inode_wait_for_writeback(inode); | 519 | __inode_wait_for_writeback(inode); |
485 | } | 520 | } |
486 | WARN_ON(inode->i_state & I_SYNC); | 521 | WARN_ON(inode->i_state & I_SYNC); |
487 | /* | 522 | /* |
@@ -620,20 +655,28 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
620 | } | 655 | } |
621 | spin_unlock(&wb->list_lock); | 656 | spin_unlock(&wb->list_lock); |
622 | 657 | ||
623 | __iget(inode); | ||
624 | /* | 658 | /* |
625 | * We already requeued the inode if it had I_SYNC set and we | 659 | * We already requeued the inode if it had I_SYNC set and we |
626 | * are doing WB_SYNC_NONE writeback. So this catches only the | 660 | * are doing WB_SYNC_NONE writeback. So this catches only the |
627 | * WB_SYNC_ALL case. | 661 | * WB_SYNC_ALL case. |
628 | */ | 662 | */ |
629 | if (inode->i_state & I_SYNC) | 663 | if (inode->i_state & I_SYNC) { |
630 | inode_wait_for_writeback(inode); | 664 | /* Wait for I_SYNC. This function drops i_lock... */ |
665 | inode_sleep_on_writeback(inode); | ||
666 | /* Inode may be gone, start again */ | ||
667 | continue; | ||
668 | } | ||
631 | inode->i_state |= I_SYNC; | 669 | inode->i_state |= I_SYNC; |
632 | spin_unlock(&inode->i_lock); | 670 | spin_unlock(&inode->i_lock); |
671 | |||
633 | write_chunk = writeback_chunk_size(wb->bdi, work); | 672 | write_chunk = writeback_chunk_size(wb->bdi, work); |
634 | wbc.nr_to_write = write_chunk; | 673 | wbc.nr_to_write = write_chunk; |
635 | wbc.pages_skipped = 0; | 674 | wbc.pages_skipped = 0; |
636 | 675 | ||
676 | /* | ||
677 | * We use I_SYNC to pin the inode in memory. While it is set | ||
678 | * evict_inode() will wait so the inode cannot be freed. | ||
679 | */ | ||
637 | __writeback_single_inode(inode, wb, &wbc); | 680 | __writeback_single_inode(inode, wb, &wbc); |
638 | 681 | ||
639 | work->nr_pages -= write_chunk - wbc.nr_to_write; | 682 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
@@ -645,10 +688,7 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
645 | requeue_inode(inode, wb, &wbc); | 688 | requeue_inode(inode, wb, &wbc); |
646 | inode_sync_complete(inode); | 689 | inode_sync_complete(inode); |
647 | spin_unlock(&inode->i_lock); | 690 | spin_unlock(&inode->i_lock); |
648 | spin_unlock(&wb->list_lock); | 691 | cond_resched_lock(&wb->list_lock); |
649 | iput(inode); | ||
650 | cond_resched(); | ||
651 | spin_lock(&wb->list_lock); | ||
652 | /* | 692 | /* |
653 | * bail out to wb_writeback() often enough to check | 693 | * bail out to wb_writeback() often enough to check |
654 | * background threshold and other termination conditions. | 694 | * background threshold and other termination conditions. |
@@ -843,8 +883,8 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
843 | inode = wb_inode(wb->b_more_io.prev); | 883 | inode = wb_inode(wb->b_more_io.prev); |
844 | spin_lock(&inode->i_lock); | 884 | spin_lock(&inode->i_lock); |
845 | spin_unlock(&wb->list_lock); | 885 | spin_unlock(&wb->list_lock); |
846 | inode_wait_for_writeback(inode); | 886 | /* This function drops i_lock... */ |
847 | spin_unlock(&inode->i_lock); | 887 | inode_sleep_on_writeback(inode); |
848 | spin_lock(&wb->list_lock); | 888 | spin_lock(&wb->list_lock); |
849 | } | 889 | } |
850 | } | 890 | } |
diff --git a/fs/inode.c b/fs/inode.c index 02c0fa5e16a4..f4e145016611 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -530,7 +530,13 @@ static void evict(struct inode *inode) | |||
530 | 530 | ||
531 | inode_sb_list_del(inode); | 531 | inode_sb_list_del(inode); |
532 | 532 | ||
533 | inode_sync_wait(inode); | 533 | /* |
534 | * Wait for flusher thread to be done with the inode so that filesystem | ||
535 | * does not start destroying it while writeback is still running. Since | ||
536 | * the inode has I_FREEING set, flusher thread won't start new work on | ||
537 | * the inode. We just have to wait for running writeback to finish. | ||
538 | */ | ||
539 | inode_wait_for_writeback(inode); | ||
534 | 540 | ||
535 | if (op->evict_inode) { | 541 | if (op->evict_inode) { |
536 | op->evict_inode(inode); | 542 | op->evict_inode(inode); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index c79316c79ee3..1c71e7f4d234 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1753,9 +1753,10 @@ struct super_operations { | |||
1753 | * anew. Other functions will just ignore such inodes, | 1753 | * anew. Other functions will just ignore such inodes, |
1754 | * if appropriate. I_NEW is used for waiting. | 1754 | * if appropriate. I_NEW is used for waiting. |
1755 | * | 1755 | * |
1756 | * I_SYNC Synchonized write of dirty inode data. The bits is | 1756 | * I_SYNC Writeback of inode is running. The bit is set during |
1757 | * set during data writeback, and cleared with a wakeup | 1757 | * data writeback, and cleared with a wakeup on the bit |
1758 | * on the bit address once it is done. | 1758 | * address once it is done. The bit is also used to pin |
1759 | * the inode in memory for flusher thread. | ||
1759 | * | 1760 | * |
1760 | * I_REFERENCED Marks the inode as recently references on the LRU list. | 1761 | * I_REFERENCED Marks the inode as recently references on the LRU list. |
1761 | * | 1762 | * |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3309736ff059..6d0a0fcd80e7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -95,6 +95,7 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, | |||
95 | enum wb_reason reason); | 95 | enum wb_reason reason); |
96 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); | 96 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); |
97 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); | 97 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); |
98 | void inode_wait_for_writeback(struct inode *inode); | ||
98 | 99 | ||
99 | /* writeback.h requires fs.h; it, too, is not included from here. */ | 100 | /* writeback.h requires fs.h; it, too, is not included from here. */ |
100 | static inline void wait_on_inode(struct inode *inode) | 101 | static inline void wait_on_inode(struct inode *inode) |
@@ -102,12 +103,6 @@ static inline void wait_on_inode(struct inode *inode) | |||
102 | might_sleep(); | 103 | might_sleep(); |
103 | wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); | 104 | wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); |
104 | } | 105 | } |
105 | static inline void inode_sync_wait(struct inode *inode) | ||
106 | { | ||
107 | might_sleep(); | ||
108 | wait_on_bit(&inode->i_state, __I_SYNC, inode_wait, | ||
109 | TASK_UNINTERRUPTIBLE); | ||
110 | } | ||
111 | 106 | ||
112 | 107 | ||
113 | /* | 108 | /* |