diff options
author | Bob Peterson <rpeterso@redhat.com> | 2011-03-17 16:19:58 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2011-04-18 10:23:50 -0400 |
commit | 44ad37d69b2cc421d5b5c7ad7fed16230685b092 (patch) | |
tree | 3632c63eef9e159947316f18d48054f082c0578e | |
parent | 001e8e8df4283dd4ef7a0297c012fce364c05cf1 (diff) |
GFS2: filesystem hang caused by incorrect lock order
This patch fixes a deadlock in GFS2 where two processes are trying
to reclaim an unlinked dinode:
One holds the inode glock and calls gfs2_lookup_by_inum trying to look
up the inode, which it can't, due to I_FREEING. The other has set
I_FREEING from vfs and is at the beginning of gfs2_delete_inode
waiting for the glock, which is held by the first. The solution is to
add a new non_block parameter to the gfs2_iget function that causes it
to return -ENOENT if the inode is being freed.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r-- | fs/gfs2/dir.c | 2 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 56 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 3 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 2 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 4 | ||||
-rw-r--r-- | fs/gfs2/super.c | 9 |
6 files changed, 55 insertions, 21 deletions
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 5c356d09c321..f789c5732b7c 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) | |||
1506 | inode = gfs2_inode_lookup(dir->i_sb, | 1506 | inode = gfs2_inode_lookup(dir->i_sb, |
1507 | be16_to_cpu(dent->de_type), | 1507 | be16_to_cpu(dent->de_type), |
1508 | be64_to_cpu(dent->de_inum.no_addr), | 1508 | be64_to_cpu(dent->de_inum.no_addr), |
1509 | be64_to_cpu(dent->de_inum.no_formal_ino)); | 1509 | be64_to_cpu(dent->de_inum.no_formal_ino), 0); |
1510 | brelse(bh); | 1510 | brelse(bh); |
1511 | return inode; | 1511 | return inode; |
1512 | } | 1512 | } |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 97d54a28776a..9134dcb89479 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -40,37 +40,61 @@ struct gfs2_inum_range_host { | |||
40 | u64 ir_length; | 40 | u64 ir_length; |
41 | }; | 41 | }; |
42 | 42 | ||
43 | struct gfs2_skip_data { | ||
44 | u64 no_addr; | ||
45 | int skipped; | ||
46 | int non_block; | ||
47 | }; | ||
48 | |||
43 | static int iget_test(struct inode *inode, void *opaque) | 49 | static int iget_test(struct inode *inode, void *opaque) |
44 | { | 50 | { |
45 | struct gfs2_inode *ip = GFS2_I(inode); | 51 | struct gfs2_inode *ip = GFS2_I(inode); |
46 | u64 *no_addr = opaque; | 52 | struct gfs2_skip_data *data = opaque; |
47 | 53 | ||
48 | if (ip->i_no_addr == *no_addr) | 54 | if (ip->i_no_addr == data->no_addr) { |
55 | if (data->non_block && | ||
56 | inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { | ||
57 | data->skipped = 1; | ||
58 | return 0; | ||
59 | } | ||
49 | return 1; | 60 | return 1; |
50 | 61 | } | |
51 | return 0; | 62 | return 0; |
52 | } | 63 | } |
53 | 64 | ||
54 | static int iget_set(struct inode *inode, void *opaque) | 65 | static int iget_set(struct inode *inode, void *opaque) |
55 | { | 66 | { |
56 | struct gfs2_inode *ip = GFS2_I(inode); | 67 | struct gfs2_inode *ip = GFS2_I(inode); |
57 | u64 *no_addr = opaque; | 68 | struct gfs2_skip_data *data = opaque; |
58 | 69 | ||
59 | inode->i_ino = (unsigned long)*no_addr; | 70 | if (data->skipped) |
60 | ip->i_no_addr = *no_addr; | 71 | return -ENOENT; |
72 | inode->i_ino = (unsigned long)(data->no_addr); | ||
73 | ip->i_no_addr = data->no_addr; | ||
61 | return 0; | 74 | return 0; |
62 | } | 75 | } |
63 | 76 | ||
64 | struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) | 77 | struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) |
65 | { | 78 | { |
66 | unsigned long hash = (unsigned long)no_addr; | 79 | unsigned long hash = (unsigned long)no_addr; |
67 | return ilookup5(sb, hash, iget_test, &no_addr); | 80 | struct gfs2_skip_data data; |
81 | |||
82 | data.no_addr = no_addr; | ||
83 | data.skipped = 0; | ||
84 | data.non_block = 0; | ||
85 | return ilookup5(sb, hash, iget_test, &data); | ||
68 | } | 86 | } |
69 | 87 | ||
70 | static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) | 88 | static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, |
89 | int non_block) | ||
71 | { | 90 | { |
91 | struct gfs2_skip_data data; | ||
72 | unsigned long hash = (unsigned long)no_addr; | 92 | unsigned long hash = (unsigned long)no_addr; |
73 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); | 93 | |
94 | data.no_addr = no_addr; | ||
95 | data.skipped = 0; | ||
96 | data.non_block = non_block; | ||
97 | return iget5_locked(sb, hash, iget_test, iget_set, &data); | ||
74 | } | 98 | } |
75 | 99 | ||
76 | /** | 100 | /** |
@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode) | |||
111 | * @sb: The super block | 135 | * @sb: The super block |
112 | * @no_addr: The inode number | 136 | * @no_addr: The inode number |
113 | * @type: The type of the inode | 137 | * @type: The type of the inode |
138 | * non_block: Can we block on inodes that are being freed? | ||
114 | * | 139 | * |
115 | * Returns: A VFS inode, or an error | 140 | * Returns: A VFS inode, or an error |
116 | */ | 141 | */ |
117 | 142 | ||
118 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, | 143 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, |
119 | u64 no_addr, u64 no_formal_ino) | 144 | u64 no_addr, u64 no_formal_ino, int non_block) |
120 | { | 145 | { |
121 | struct inode *inode; | 146 | struct inode *inode; |
122 | struct gfs2_inode *ip; | 147 | struct gfs2_inode *ip; |
123 | struct gfs2_glock *io_gl = NULL; | 148 | struct gfs2_glock *io_gl = NULL; |
124 | int error; | 149 | int error; |
125 | 150 | ||
126 | inode = gfs2_iget(sb, no_addr); | 151 | inode = gfs2_iget(sb, no_addr, non_block); |
127 | ip = GFS2_I(inode); | 152 | ip = GFS2_I(inode); |
128 | 153 | ||
129 | if (!inode) | 154 | if (!inode) |
@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, | |||
185 | { | 210 | { |
186 | struct super_block *sb = sdp->sd_vfs; | 211 | struct super_block *sb = sdp->sd_vfs; |
187 | struct gfs2_holder i_gh; | 212 | struct gfs2_holder i_gh; |
188 | struct inode *inode; | 213 | struct inode *inode = NULL; |
189 | int error; | 214 | int error; |
190 | 215 | ||
216 | /* Must not read in block until block type is verified */ | ||
191 | error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, | 217 | error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, |
192 | LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | 218 | LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); |
193 | if (error) | 219 | if (error) |
194 | return ERR_PTR(error); | 220 | return ERR_PTR(error); |
195 | 221 | ||
@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, | |||
197 | if (error) | 223 | if (error) |
198 | goto fail; | 224 | goto fail; |
199 | 225 | ||
200 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); | 226 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); |
201 | if (IS_ERR(inode)) | 227 | if (IS_ERR(inode)) |
202 | goto fail; | 228 | goto fail; |
203 | 229 | ||
@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
843 | goto fail_gunlock2; | 869 | goto fail_gunlock2; |
844 | 870 | ||
845 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, | 871 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, |
846 | inum.no_formal_ino); | 872 | inum.no_formal_ino, 0); |
847 | if (IS_ERR(inode)) | 873 | if (IS_ERR(inode)) |
848 | goto fail_gunlock2; | 874 | goto fail_gunlock2; |
849 | 875 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 3e00a66e7cbd..099ca305e518 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -97,7 +97,8 @@ err: | |||
97 | } | 97 | } |
98 | 98 | ||
99 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 99 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
100 | u64 no_addr, u64 no_formal_ino); | 100 | u64 no_addr, u64 no_formal_ino, |
101 | int non_block); | ||
101 | extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, | 102 | extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, |
102 | u64 *no_formal_ino, | 103 | u64 *no_formal_ino, |
103 | unsigned int blktype); | 104 | unsigned int blktype); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 42ef24355afb..d3c69eb91c74 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, | |||
430 | struct dentry *dentry; | 430 | struct dentry *dentry; |
431 | struct inode *inode; | 431 | struct inode *inode; |
432 | 432 | ||
433 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); | 433 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); |
434 | if (IS_ERR(inode)) { | 434 | if (IS_ERR(inode)) { |
435 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); | 435 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); |
436 | return PTR_ERR(inode); | 436 | return PTR_ERR(inode); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index cf930cd9664a..6fcae8469f6d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
945 | /* rgblk_search can return a block < goal, so we need to | 945 | /* rgblk_search can return a block < goal, so we need to |
946 | keep it marching forward. */ | 946 | keep it marching forward. */ |
947 | no_addr = block + rgd->rd_data0; | 947 | no_addr = block + rgd->rd_data0; |
948 | goal++; | 948 | goal = max(block + 1, goal + 1); |
949 | if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) | 949 | if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) |
950 | continue; | 950 | continue; |
951 | if (no_addr == skip) | 951 | if (no_addr == skip) |
@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
971 | found++; | 971 | found++; |
972 | 972 | ||
973 | /* Limit reclaim to sensible number of tasks */ | 973 | /* Limit reclaim to sensible number of tasks */ |
974 | if (found > 2*NR_CPUS) | 974 | if (found > NR_CPUS) |
975 | return; | 975 | return; |
976 | } | 976 | } |
977 | 977 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5b2cb8132f66..b9f28e66dad1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1327,7 +1327,8 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1327 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) | 1327 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) |
1328 | goto out; | 1328 | goto out; |
1329 | 1329 | ||
1330 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 1330 | /* Must not read inode block until block type has been verified */ |
1331 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); | ||
1331 | if (unlikely(error)) { | 1332 | if (unlikely(error)) { |
1332 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | 1333 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); |
1333 | goto out; | 1334 | goto out; |
@@ -1337,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1337 | if (error) | 1338 | if (error) |
1338 | goto out_truncate; | 1339 | goto out_truncate; |
1339 | 1340 | ||
1341 | if (test_bit(GIF_INVALID, &ip->i_flags)) { | ||
1342 | error = gfs2_inode_refresh(ip); | ||
1343 | if (error) | ||
1344 | goto out_truncate; | ||
1345 | } | ||
1346 | |||
1340 | ip->i_iopen_gh.gh_flags |= GL_NOCACHE; | 1347 | ip->i_iopen_gh.gh_flags |= GL_NOCACHE; |
1341 | gfs2_glock_dq_wait(&ip->i_iopen_gh); | 1348 | gfs2_glock_dq_wait(&ip->i_iopen_gh); |
1342 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); | 1349 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); |