aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/inode.c
diff options
context:
space:
mode:
authorAndreas Gruenbacher <agruenba@redhat.com>2016-06-14 13:22:27 -0400
committerBob Peterson <rpeterso@redhat.com>2016-06-27 10:47:07 -0400
commit3ce37b2cb4917674fa5b776e857dcea94c0e0835 (patch)
tree76adc95997a6e5ba7668630597c0872dde734a45 /fs/gfs2/inode.c
parent1e875f5a95a28b5286165db9fa832b0773657ddb (diff)
gfs2: Fix gfs2_lookup_by_inum lock inversion
The current gfs2_lookup_by_inum takes the glock of a presumed inode identified by block number, verifies that the block is indeed an inode, and then instantiates and reads the new inode via gfs2_inode_lookup. However, instantiating a new inode may block on freeing a previous instance of that inode (__wait_on_freeing_inode), and freeing an inode requires to take the glock already held, leading to lock inversion and deadlock. Fix this by first instantiating the new inode, then verifying that the block is an inode (if required), and then reading in the new inode, all in gfs2_inode_lookup. If the block we are looking for is not an inode, we discard the new inode via iget_failed, which marks inodes as bad and unhashes them. Other tasks waiting on that inode will get back a bad inode back from ilookup or iget_locked; in that case, retry the lookup. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Diffstat (limited to 'fs/gfs2/inode.c')
-rw-r--r--fs/gfs2/inode.c101
1 files changed, 73 insertions, 28 deletions
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 21dc784f66c2..6d5c6bbec416 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -39,7 +39,33 @@
39 39
40struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 40struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
41{ 41{
42 return ilookup(sb, (unsigned long)no_addr); 42 struct inode *inode;
43
44repeat:
45 inode = ilookup(sb, no_addr);
46 if (!inode)
47 return inode;
48 if (is_bad_inode(inode)) {
49 iput(inode);
50 goto repeat;
51 }
52 return inode;
53}
54
55static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
56{
57 struct inode *inode;
58
59repeat:
60 inode = iget_locked(sb, no_addr);
61 if (!inode)
62 return inode;
63 if (is_bad_inode(inode)) {
64 iput(inode);
65 goto repeat;
66 }
67 GFS2_I(inode)->i_no_addr = no_addr;
68 return inode;
43} 69}
44 70
45/** 71/**
@@ -78,26 +104,37 @@ static void gfs2_set_iop(struct inode *inode)
78/** 104/**
79 * gfs2_inode_lookup - Lookup an inode 105 * gfs2_inode_lookup - Lookup an inode
80 * @sb: The super block 106 * @sb: The super block
81 * @no_addr: The inode number
82 * @type: The type of the inode 107 * @type: The type of the inode
108 * @no_addr: The inode number
109 * @no_formal_ino: The inode generation number
110 * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED;
111 * GFS2_BLKST_FREE do indicate not to verify)
112 *
113 * If @type is DT_UNKNOWN, the inode type is fetched from disk.
114 *
115 * If @blktype is anything other than GFS2_BLKST_FREE (which is used as a
116 * placeholder because it doesn't otherwise make sense), the on-disk block type
117 * is verified to be @blktype.
83 * 118 *
84 * Returns: A VFS inode, or an error 119 * Returns: A VFS inode, or an error
85 */ 120 */
86 121
87struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, 122struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
88 u64 no_addr, u64 no_formal_ino) 123 u64 no_addr, u64 no_formal_ino,
124 unsigned int blktype)
89{ 125{
90 struct inode *inode; 126 struct inode *inode;
91 struct gfs2_inode *ip; 127 struct gfs2_inode *ip;
92 struct gfs2_glock *io_gl = NULL; 128 struct gfs2_glock *io_gl = NULL;
129 struct gfs2_holder i_gh;
130 bool unlock = false;
93 int error; 131 int error;
94 132
95 inode = iget_locked(sb, (unsigned long)no_addr); 133 inode = gfs2_iget(sb, no_addr);
96 if (!inode) 134 if (!inode)
97 return ERR_PTR(-ENOMEM); 135 return ERR_PTR(-ENOMEM);
98 136
99 ip = GFS2_I(inode); 137 ip = GFS2_I(inode);
100 ip->i_no_addr = no_addr;
101 138
102 if (inode->i_state & I_NEW) { 139 if (inode->i_state & I_NEW) {
103 struct gfs2_sbd *sdp = GFS2_SB(inode); 140 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -112,10 +149,30 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
112 if (unlikely(error)) 149 if (unlikely(error))
113 goto fail_put; 150 goto fail_put;
114 151
152 if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
153 /*
154 * The GL_SKIP flag indicates to skip reading the inode
155 * block. We read the inode with gfs2_inode_refresh
156 * after possibly checking the block type.
157 */
158 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
159 GL_SKIP, &i_gh);
160 if (error)
161 goto fail_put;
162 unlock = true;
163
164 if (blktype != GFS2_BLKST_FREE) {
165 error = gfs2_check_blk_type(sdp, no_addr,
166 blktype);
167 if (error)
168 goto fail_put;
169 }
170 }
171
115 set_bit(GIF_INVALID, &ip->i_flags); 172 set_bit(GIF_INVALID, &ip->i_flags);
116 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 173 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
117 if (unlikely(error)) 174 if (unlikely(error))
118 goto fail_iopen; 175 goto fail_put;
119 176
120 ip->i_iopen_gh.gh_gl->gl_object = ip; 177 ip->i_iopen_gh.gh_gl->gl_object = ip;
121 gfs2_glock_put(io_gl); 178 gfs2_glock_put(io_gl);
@@ -134,6 +191,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
134 unlock_new_inode(inode); 191 unlock_new_inode(inode);
135 } 192 }
136 193
194 if (unlock)
195 gfs2_glock_dq_uninit(&i_gh);
137 return inode; 196 return inode;
138 197
139fail_refresh: 198fail_refresh:
@@ -141,10 +200,11 @@ fail_refresh:
141 ip->i_iopen_gh.gh_gl->gl_object = NULL; 200 ip->i_iopen_gh.gh_gl->gl_object = NULL;
142 gfs2_glock_dq_wait(&ip->i_iopen_gh); 201 gfs2_glock_dq_wait(&ip->i_iopen_gh);
143 gfs2_holder_uninit(&ip->i_iopen_gh); 202 gfs2_holder_uninit(&ip->i_iopen_gh);
144fail_iopen: 203fail_put:
145 if (io_gl) 204 if (io_gl)
146 gfs2_glock_put(io_gl); 205 gfs2_glock_put(io_gl);
147fail_put: 206 if (unlock)
207 gfs2_glock_dq_uninit(&i_gh);
148 ip->i_gl->gl_object = NULL; 208 ip->i_gl->gl_object = NULL;
149fail: 209fail:
150 iget_failed(inode); 210 iget_failed(inode);
@@ -155,23 +215,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
155 u64 *no_formal_ino, unsigned int blktype) 215 u64 *no_formal_ino, unsigned int blktype)
156{ 216{
157 struct super_block *sb = sdp->sd_vfs; 217 struct super_block *sb = sdp->sd_vfs;
158 struct gfs2_holder i_gh; 218 struct inode *inode;
159 struct inode *inode = NULL;
160 int error; 219 int error;
161 220
162 /* Must not read in block until block type is verified */ 221 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype);
163 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
164 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
165 if (error)
166 return ERR_PTR(error);
167
168 error = gfs2_check_blk_type(sdp, no_addr, blktype);
169 if (error)
170 goto fail;
171
172 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
173 if (IS_ERR(inode)) 222 if (IS_ERR(inode))
174 goto fail; 223 return inode;
175 224
176 /* Two extra checks for NFS only */ 225 /* Two extra checks for NFS only */
177 if (no_formal_ino) { 226 if (no_formal_ino) {
@@ -182,16 +231,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
182 error = -EIO; 231 error = -EIO;
183 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) 232 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
184 goto fail_iput; 233 goto fail_iput;
185
186 error = 0;
187 } 234 }
235 return inode;
188 236
189fail:
190 gfs2_glock_dq_uninit(&i_gh);
191 return error ? ERR_PTR(error) : inode;
192fail_iput: 237fail_iput:
193 iput(inode); 238 iput(inode);
194 goto fail; 239 return ERR_PTR(error);
195} 240}
196 241
197 242