aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nilfs2
diff options
context:
space:
mode:
authorRyusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>2015-02-05 15:25:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-05 16:35:29 -0500
commit7ef3ff2fea8bf5e4a21cef47ad87710a3d0fdb52 (patch)
treefae90c5009ed6bf5009cedf9646c0a4edd71304c /fs/nilfs2
parent81cca6fb1271b91f06b008976ea132bec2f14d86 (diff)
nilfs2: fix deadlock of segment constructor over I_SYNC flag
Nilfs2 eventually hangs in a stress test with fsstress program. This issue was caused by the following deadlock over I_SYNC flag between nilfs_segctor_thread() and writeback_sb_inodes(): nilfs_segctor_thread() nilfs_segctor_thread_construct() nilfs_segctor_unlock() nilfs_dispose_list() iput() iput_final() evict() inode_wait_for_writeback() * wait for I_SYNC flag writeback_sb_inodes() * set I_SYNC flag on inode->i_state __writeback_single_inode() do_writepages() nilfs_writepages() nilfs_construct_dsync_segment() nilfs_segctor_sync() * wait for completion of segment constructor inode_sync_complete() * clear I_SYNC flag after __writeback_single_inode() completed writeback_sb_inodes() calls do_writepages() for dirty inodes after setting I_SYNC flag on inode->i_state. do_writepages() in turn calls nilfs_writepages(), which can run segment constructor and wait for its completion. On the other hand, segment constructor calls iput(), which can call evict() and wait for the I_SYNC flag on inode_wait_for_writeback(). Since segment constructor doesn't know when I_SYNC will be set, it cannot know whether iput() will block or not unless inode->i_nlink has a non-zero count. We can prevent evict() from being called in iput() by implementing sop->drop_inode(), but it's not preferable to leave inodes with i_nlink == 0 for long periods because it even defers file truncation and inode deallocation. So, this instead resolves the deadlock by calling iput() asynchronously with a workqueue for inodes with i_nlink == 0. Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/nilfs2')
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/segment.c44
-rw-r--r--fs/nilfs2/segment.h5
3 files changed, 44 insertions, 7 deletions
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 91093cd74f0d..385704027575 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -141,7 +141,6 @@ enum {
141 * @ti_save: Backup of journal_info field of task_struct 141 * @ti_save: Backup of journal_info field of task_struct
142 * @ti_flags: Flags 142 * @ti_flags: Flags
143 * @ti_count: Nest level 143 * @ti_count: Nest level
144 * @ti_garbage: List of inode to be put when releasing semaphore
145 */ 144 */
146struct nilfs_transaction_info { 145struct nilfs_transaction_info {
147 u32 ti_magic; 146 u32 ti_magic;
@@ -150,7 +149,6 @@ struct nilfs_transaction_info {
150 one of other filesystems has a bug. */ 149 one of other filesystems has a bug. */
151 unsigned short ti_flags; 150 unsigned short ti_flags;
152 unsigned short ti_count; 151 unsigned short ti_count;
153 struct list_head ti_garbage;
154}; 152};
155 153
156/* ti_magic */ 154/* ti_magic */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7ef18fc656c2..469086b9f99b 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -305,7 +305,6 @@ static void nilfs_transaction_lock(struct super_block *sb,
305 ti->ti_count = 0; 305 ti->ti_count = 0;
306 ti->ti_save = cur_ti; 306 ti->ti_save = cur_ti;
307 ti->ti_magic = NILFS_TI_MAGIC; 307 ti->ti_magic = NILFS_TI_MAGIC;
308 INIT_LIST_HEAD(&ti->ti_garbage);
309 current->journal_info = ti; 308 current->journal_info = ti;
310 309
311 for (;;) { 310 for (;;) {
@@ -332,8 +331,6 @@ static void nilfs_transaction_unlock(struct super_block *sb)
332 331
333 up_write(&nilfs->ns_segctor_sem); 332 up_write(&nilfs->ns_segctor_sem);
334 current->journal_info = ti->ti_save; 333 current->journal_info = ti->ti_save;
335 if (!list_empty(&ti->ti_garbage))
336 nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
337} 334}
338 335
339static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 336static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -746,6 +743,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs,
746 } 743 }
747} 744}
748 745
746static void nilfs_iput_work_func(struct work_struct *work)
747{
748 struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
749 sc_iput_work);
750 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
751
752 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
753}
754
749static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, 755static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
750 struct nilfs_root *root) 756 struct nilfs_root *root)
751{ 757{
@@ -1900,8 +1906,8 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1900static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, 1906static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1901 struct the_nilfs *nilfs) 1907 struct the_nilfs *nilfs)
1902{ 1908{
1903 struct nilfs_transaction_info *ti = current->journal_info;
1904 struct nilfs_inode_info *ii, *n; 1909 struct nilfs_inode_info *ii, *n;
1910 int defer_iput = false;
1905 1911
1906 spin_lock(&nilfs->ns_inode_lock); 1912 spin_lock(&nilfs->ns_inode_lock);
1907 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 1913 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
@@ -1912,9 +1918,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1912 clear_bit(NILFS_I_BUSY, &ii->i_state); 1918 clear_bit(NILFS_I_BUSY, &ii->i_state);
1913 brelse(ii->i_bh); 1919 brelse(ii->i_bh);
1914 ii->i_bh = NULL; 1920 ii->i_bh = NULL;
1915 list_move_tail(&ii->i_dirty, &ti->ti_garbage); 1921 list_del_init(&ii->i_dirty);
1922 if (!ii->vfs_inode.i_nlink) {
1923 /*
1924 * Defer calling iput() to avoid a deadlock
1925 * over I_SYNC flag for inodes with i_nlink == 0
1926 */
1927 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1928 defer_iput = true;
1929 } else {
1930 spin_unlock(&nilfs->ns_inode_lock);
1931 iput(&ii->vfs_inode);
1932 spin_lock(&nilfs->ns_inode_lock);
1933 }
1916 } 1934 }
1917 spin_unlock(&nilfs->ns_inode_lock); 1935 spin_unlock(&nilfs->ns_inode_lock);
1936
1937 if (defer_iput)
1938 schedule_work(&sci->sc_iput_work);
1918} 1939}
1919 1940
1920/* 1941/*
@@ -2583,6 +2604,8 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2583 INIT_LIST_HEAD(&sci->sc_segbufs); 2604 INIT_LIST_HEAD(&sci->sc_segbufs);
2584 INIT_LIST_HEAD(&sci->sc_write_logs); 2605 INIT_LIST_HEAD(&sci->sc_write_logs);
2585 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2606 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2607 INIT_LIST_HEAD(&sci->sc_iput_queue);
2608 INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
2586 init_timer(&sci->sc_timer); 2609 init_timer(&sci->sc_timer);
2587 2610
2588 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2611 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2609,6 +2632,8 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2609 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2632 ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2610 nilfs_transaction_unlock(sci->sc_super); 2633 nilfs_transaction_unlock(sci->sc_super);
2611 2634
2635 flush_work(&sci->sc_iput_work);
2636
2612 } while (ret && retrycount-- > 0); 2637 } while (ret && retrycount-- > 0);
2613} 2638}
2614 2639
@@ -2633,6 +2658,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2633 || sci->sc_seq_request != sci->sc_seq_done); 2658 || sci->sc_seq_request != sci->sc_seq_done);
2634 spin_unlock(&sci->sc_state_lock); 2659 spin_unlock(&sci->sc_state_lock);
2635 2660
2661 if (flush_work(&sci->sc_iput_work))
2662 flag = true;
2663
2636 if (flag || !nilfs_segctor_confirm(sci)) 2664 if (flag || !nilfs_segctor_confirm(sci))
2637 nilfs_segctor_write_out(sci); 2665 nilfs_segctor_write_out(sci);
2638 2666
@@ -2642,6 +2670,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2642 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); 2670 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2643 } 2671 }
2644 2672
2673 if (!list_empty(&sci->sc_iput_queue)) {
2674 nilfs_warning(sci->sc_super, __func__,
2675 "iput queue is not empty\n");
2676 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
2677 }
2678
2645 WARN_ON(!list_empty(&sci->sc_segbufs)); 2679 WARN_ON(!list_empty(&sci->sc_segbufs));
2646 WARN_ON(!list_empty(&sci->sc_write_logs)); 2680 WARN_ON(!list_empty(&sci->sc_write_logs));
2647 2681
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 38a1d0013314..a48d6de1e02c 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -26,6 +26,7 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/workqueue.h>
29#include <linux/nilfs2_fs.h> 30#include <linux/nilfs2_fs.h>
30#include "nilfs.h" 31#include "nilfs.h"
31 32
@@ -92,6 +93,8 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 93 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 94 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 95 * @sc_gc_inodes: List of GC inodes having blocks to be written
96 * @sc_iput_queue: list of inodes for which iput should be done
97 * @sc_iput_work: work struct to defer iput call
95 * @sc_freesegs: array of segment numbers to be freed 98 * @sc_freesegs: array of segment numbers to be freed
96 * @sc_nfreesegs: number of segments on @sc_freesegs 99 * @sc_nfreesegs: number of segments on @sc_freesegs
97 * @sc_dsync_inode: inode whose data pages are written for a sync operation 100 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -135,6 +138,8 @@ struct nilfs_sc_info {
135 138
136 struct list_head sc_dirty_files; 139 struct list_head sc_dirty_files;
137 struct list_head sc_gc_inodes; 140 struct list_head sc_gc_inodes;
141 struct list_head sc_iput_queue;
142 struct work_struct sc_iput_work;
138 143
139 __u64 *sc_freesegs; 144 __u64 *sc_freesegs;
140 size_t sc_nfreesegs; 145 size_t sc_nfreesegs;