diff options
author | Jan Kara <jack@suse.cz> | 2011-04-21 17:47:16 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2011-04-29 17:20:03 -0400 |
commit | ae54870a1dc978a88377ae8af0780648f2ccd4dc (patch) | |
tree | 220268d2087adac3191a54805dd6c0c203d50606 /fs | |
parent | fafc9929c668f8bae6dd1f109f33a86d2cb3c460 (diff) |
ext3: Fix lock inversion in ext3_symlink()
ext3_symlink() cannot call __page_symlink() with transaction open.
__page_symlink() calls ext3_write_begin() which gets page lock which ranks
above transaction start (thus lock ordering is violated) and and also
ext3_write_begin() waits for a transaction commit when we run out of space
which never happens if we hold transaction open.
Fix the problem by stopping a transaction before calling __page_symlink()
(we have to be careful and put inode to orphan list so that it gets deleted
in case of crash) and starting another one after __page_symlink() returns
for addition of symlink into a directory.
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext3/namei.c | 67 |
1 files changed, 56 insertions, 11 deletions
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 32f3b8695859..f6ce3e79d315 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -2189,6 +2189,7 @@ static int ext3_symlink (struct inode * dir, | |||
2189 | handle_t *handle; | 2189 | handle_t *handle; |
2190 | struct inode * inode; | 2190 | struct inode * inode; |
2191 | int l, err, retries = 0; | 2191 | int l, err, retries = 0; |
2192 | int credits; | ||
2192 | 2193 | ||
2193 | l = strlen(symname)+1; | 2194 | l = strlen(symname)+1; |
2194 | if (l > dir->i_sb->s_blocksize) | 2195 | if (l > dir->i_sb->s_blocksize) |
@@ -2196,10 +2197,26 @@ static int ext3_symlink (struct inode * dir, | |||
2196 | 2197 | ||
2197 | dquot_initialize(dir); | 2198 | dquot_initialize(dir); |
2198 | 2199 | ||
2200 | if (l > EXT3_N_BLOCKS * 4) { | ||
2201 | /* | ||
2202 | * For non-fast symlinks, we just allocate inode and put it on | ||
2203 | * orphan list in the first transaction => we need bitmap, | ||
2204 | * group descriptor, sb, inode block, quota blocks. | ||
2205 | */ | ||
2206 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
2207 | } else { | ||
2208 | /* | ||
2209 | * Fast symlink. We have to add entry to directory | ||
2210 | * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS), | ||
2211 | * allocate new inode (bitmap, group descriptor, inode block, | ||
2212 | * quota blocks, sb is already counted in previous macros). | ||
2213 | */ | ||
2214 | credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
2215 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + | ||
2216 | EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
2217 | } | ||
2199 | retry: | 2218 | retry: |
2200 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | 2219 | handle = ext3_journal_start(dir, credits); |
2201 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + | ||
2202 | EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); | ||
2203 | if (IS_ERR(handle)) | 2220 | if (IS_ERR(handle)) |
2204 | return PTR_ERR(handle); | 2221 | return PTR_ERR(handle); |
2205 | 2222 | ||
@@ -2211,21 +2228,45 @@ retry: | |||
2211 | if (IS_ERR(inode)) | 2228 | if (IS_ERR(inode)) |
2212 | goto out_stop; | 2229 | goto out_stop; |
2213 | 2230 | ||
2214 | if (l > sizeof (EXT3_I(inode)->i_data)) { | 2231 | if (l > EXT3_N_BLOCKS * 4) { |
2215 | inode->i_op = &ext3_symlink_inode_operations; | 2232 | inode->i_op = &ext3_symlink_inode_operations; |
2216 | ext3_set_aops(inode); | 2233 | ext3_set_aops(inode); |
2217 | /* | 2234 | /* |
2218 | * page_symlink() calls into ext3_prepare/commit_write. | 2235 | * We cannot call page_symlink() with transaction started |
2219 | * We have a transaction open. All is sweetness. It also sets | 2236 | * because it calls into ext3_write_begin() which acquires page |
2220 | * i_size in generic_commit_write(). | 2237 | * lock which ranks below transaction start (and it can also |
2238 | * wait for journal commit if we are running out of space). So | ||
2239 | * we have to stop transaction now and restart it when symlink | ||
2240 | * contents is written. | ||
2241 | * | ||
2242 | * To keep fs consistent in case of crash, we have to put inode | ||
2243 | * to orphan list in the mean time. | ||
2221 | */ | 2244 | */ |
2245 | drop_nlink(inode); | ||
2246 | err = ext3_orphan_add(handle, inode); | ||
2247 | ext3_journal_stop(handle); | ||
2248 | if (err) | ||
2249 | goto err_drop_inode; | ||
2222 | err = __page_symlink(inode, symname, l, 1); | 2250 | err = __page_symlink(inode, symname, l, 1); |
2251 | if (err) | ||
2252 | goto err_drop_inode; | ||
2253 | /* | ||
2254 | * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS | ||
2255 | * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified | ||
2256 | */ | ||
2257 | handle = ext3_journal_start(dir, | ||
2258 | EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
2259 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); | ||
2260 | if (IS_ERR(handle)) { | ||
2261 | err = PTR_ERR(handle); | ||
2262 | goto err_drop_inode; | ||
2263 | } | ||
2264 | inc_nlink(inode); | ||
2265 | err = ext3_orphan_del(handle, inode); | ||
2223 | if (err) { | 2266 | if (err) { |
2267 | ext3_journal_stop(handle); | ||
2224 | drop_nlink(inode); | 2268 | drop_nlink(inode); |
2225 | unlock_new_inode(inode); | 2269 | goto err_drop_inode; |
2226 | ext3_mark_inode_dirty(handle, inode); | ||
2227 | iput (inode); | ||
2228 | goto out_stop; | ||
2229 | } | 2270 | } |
2230 | } else { | 2271 | } else { |
2231 | inode->i_op = &ext3_fast_symlink_inode_operations; | 2272 | inode->i_op = &ext3_fast_symlink_inode_operations; |
@@ -2239,6 +2280,10 @@ out_stop: | |||
2239 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) | 2280 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) |
2240 | goto retry; | 2281 | goto retry; |
2241 | return err; | 2282 | return err; |
2283 | err_drop_inode: | ||
2284 | unlock_new_inode(inode); | ||
2285 | iput (inode); | ||
2286 | return err; | ||
2242 | } | 2287 | } |
2243 | 2288 | ||
2244 | static int ext3_link (struct dentry * old_dentry, | 2289 | static int ext3_link (struct dentry * old_dentry, |