aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-02-10 01:20:05 -0500
committerTheodore Ts'o <tytso@mit.edu>2008-02-10 01:20:05 -0500
commit8009f9fb3067fef6c2ca0c16f6bac786ae28639d (patch)
treeacf8bbfd2833a63baa9d2194ed30bf7e7dcb3075
parent0040d9875dcccfcb2131417b10fbd9841bc5f05b (diff)
ext4: Fix circular locking dependency with migrate and rm.
In order to prevent a circular locking dependency when an unlink operation is racing with an ext4 migration, we delay taking i_data_sem until just before switch the inode format, and use i_mutex to prevent writes and truncates during the first part of the migration operation. Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/migrate.c117
1 files changed, 74 insertions, 43 deletions
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 9ee1f7cfb2c5..8c6c685b9d22 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
61 retval = ext4_journal_restart(handle, needed); 61 retval = ext4_journal_restart(handle, needed);
62 if (retval) 62 if (retval)
63 goto err_out; 63 goto err_out;
64 } 64 } else if (needed) {
65 if (needed) {
66 retval = ext4_journal_extend(handle, needed); 65 retval = ext4_journal_extend(handle, needed);
67 if (retval != 0) { 66 if (retval) {
68 /* 67 /*
69 * IF not able to extend the journal restart the journal 68 * IF not able to extend the journal restart the journal
70 */ 69 */
@@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
220 219
221} 220}
222 221
222static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
223{
224 int retval = 0, needed;
225
226 if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
227 return 0;
228 /*
229 * We are freeing a blocks. During this we touch
230 * superblock, group descriptor and block bitmap.
231 * So allocate a credit of 3. We may update
232 * quota (user and group).
233 */
234 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
235
236 if (ext4_journal_extend(handle, needed) != 0)
237 retval = ext4_journal_restart(handle, needed);
238
239 return retval;
240}
241
223static int free_dind_blocks(handle_t *handle, 242static int free_dind_blocks(handle_t *handle,
224 struct inode *inode, __le32 i_data) 243 struct inode *inode, __le32 i_data)
225{ 244{
@@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle,
234 253
235 tmp_idata = (__le32 *)bh->b_data; 254 tmp_idata = (__le32 *)bh->b_data;
236 for (i = 0; i < max_entries; i++) { 255 for (i = 0; i < max_entries; i++) {
237 if (tmp_idata[i]) 256 if (tmp_idata[i]) {
257 extend_credit_for_blkdel(handle, inode);
238 ext4_free_blocks(handle, inode, 258 ext4_free_blocks(handle, inode,
239 le32_to_cpu(tmp_idata[i]), 1, 1); 259 le32_to_cpu(tmp_idata[i]), 1, 1);
260 }
240 } 261 }
241 put_bh(bh); 262 put_bh(bh);
263 extend_credit_for_blkdel(handle, inode);
242 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 264 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
243 return 0; 265 return 0;
244} 266}
@@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle,
267 } 289 }
268 } 290 }
269 put_bh(bh); 291 put_bh(bh);
292 extend_credit_for_blkdel(handle, inode);
270 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 293 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
271 return 0; 294 return 0;
272} 295}
273 296
274static int free_ind_block(handle_t *handle, struct inode *inode) 297static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
275{ 298{
276 int retval; 299 int retval;
277 struct ext4_inode_info *ei = EXT4_I(inode);
278 300
279 if (ei->i_data[EXT4_IND_BLOCK]) 301 /* ei->i_data[EXT4_IND_BLOCK] */
302 if (i_data[0]) {
303 extend_credit_for_blkdel(handle, inode);
280 ext4_free_blocks(handle, inode, 304 ext4_free_blocks(handle, inode,
281 le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); 305 le32_to_cpu(i_data[0]), 1, 1);
306 }
282 307
283 if (ei->i_data[EXT4_DIND_BLOCK]) { 308 /* ei->i_data[EXT4_DIND_BLOCK] */
284 retval = free_dind_blocks(handle, inode, 309 if (i_data[1]) {
285 ei->i_data[EXT4_DIND_BLOCK]); 310 retval = free_dind_blocks(handle, inode, i_data[1]);
286 if (retval) 311 if (retval)
287 return retval; 312 return retval;
288 } 313 }
289 314
290 if (ei->i_data[EXT4_TIND_BLOCK]) { 315 /* ei->i_data[EXT4_TIND_BLOCK] */
291 retval = free_tind_blocks(handle, inode, 316 if (i_data[2]) {
292 ei->i_data[EXT4_TIND_BLOCK]); 317 retval = free_tind_blocks(handle, inode, i_data[2]);
293 if (retval) 318 if (retval)
294 return retval; 319 return retval;
295 } 320 }
@@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
297} 322}
298 323
299static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 324static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
300 struct inode *tmp_inode, int retval) 325 struct inode *tmp_inode)
301{ 326{
327 int retval;
328 __le32 i_data[3];
302 struct ext4_inode_info *ei = EXT4_I(inode); 329 struct ext4_inode_info *ei = EXT4_I(inode);
303 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 330 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
304 331
305 retval = free_ind_block(handle, inode);
306 if (retval)
307 goto err_out;
308
309 /* 332 /*
310 * One credit accounted for writing the 333 * One credit accounted for writing the
311 * i_data field of the original inode 334 * i_data field of the original inode
@@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
317 goto err_out; 340 goto err_out;
318 } 341 }
319 342
343 i_data[0] = ei->i_data[EXT4_IND_BLOCK];
344 i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
345 i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
346
347 down_write(&EXT4_I(inode)->i_data_sem);
320 /* 348 /*
321 * We have the extent map build with the tmp inode. 349 * We have the extent map build with the tmp inode.
322 * Now copy the i_data across 350 * Now copy the i_data across
@@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
336 spin_lock(&inode->i_lock); 364 spin_lock(&inode->i_lock);
337 inode->i_blocks += tmp_inode->i_blocks; 365 inode->i_blocks += tmp_inode->i_blocks;
338 spin_unlock(&inode->i_lock); 366 spin_unlock(&inode->i_lock);
367 up_write(&EXT4_I(inode)->i_data_sem);
339 368
369 /*
370 * We mark the inode dirty after, because we decrement the
371 * i_blocks when freeing the indirect meta-data blocks
372 */
373 retval = free_ind_block(handle, inode, i_data);
340 ext4_mark_inode_dirty(handle, inode); 374 ext4_mark_inode_dirty(handle, inode);
375
341err_out: 376err_out:
342 return retval; 377 return retval;
343} 378}
@@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
365 } 400 }
366 } 401 }
367 put_bh(bh); 402 put_bh(bh);
403 extend_credit_for_blkdel(handle, inode);
368 ext4_free_blocks(handle, inode, block, 1, 1); 404 ext4_free_blocks(handle, inode, block, 1, 1);
369 return retval; 405 return retval;
370} 406}
@@ -420,7 +456,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
420 */ 456 */
421 return retval; 457 return retval;
422 458
423 down_write(&EXT4_I(inode)->i_data_sem);
424 handle = ext4_journal_start(inode, 459 handle = ext4_journal_start(inode,
425 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 460 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
426 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 461 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -454,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
454 ext4_orphan_add(handle, tmp_inode); 489 ext4_orphan_add(handle, tmp_inode);
455 ext4_journal_stop(handle); 490 ext4_journal_stop(handle);
456 491
457 ei = EXT4_I(inode);
458 i_data = ei->i_data;
459 memset(&lb, 0, sizeof(lb));
460
461 /* 32 bit block address 4 bytes */
462 max_entries = inode->i_sb->s_blocksize >> 2;
463
464 /* 492 /*
465 * start with one credit accounted for 493 * start with one credit accounted for
466 * superblock modification. 494 * superblock modification.
@@ -469,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
469 * trascation that created the inode. Later as and 497 * trascation that created the inode. Later as and
470 * when we add extents we extent the journal 498 * when we add extents we extent the journal
471 */ 499 */
500 /*
501 * inode_mutex prevent write and truncate on the file. Read still goes
502 * through. We take i_data_sem in ext4_ext_swap_inode_data before we
503 * switch the inode format to prevent read.
504 */
505 mutex_lock(&(inode->i_mutex));
472 handle = ext4_journal_start(inode, 1); 506 handle = ext4_journal_start(inode, 1);
507
508 ei = EXT4_I(inode);
509 i_data = ei->i_data;
510 memset(&lb, 0, sizeof(lb));
511
512 /* 32 bit block address 4 bytes */
513 max_entries = inode->i_sb->s_blocksize >> 2;
473 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 514 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
474 if (i_data[i]) { 515 if (i_data[i]) {
475 retval = update_extent_range(handle, tmp_inode, 516 retval = update_extent_range(handle, tmp_inode,
@@ -507,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
507 */ 548 */
508 retval = finish_range(handle, tmp_inode, &lb); 549 retval = finish_range(handle, tmp_inode, &lb);
509err_out: 550err_out:
510 /*
511 * We are either freeing extent information or indirect
512 * blocks. During this we touch superblock, group descriptor
513 * and block bitmap. Later we mark the tmp_inode dirty
514 * via ext4_ext_tree_init. So allocate a credit of 4
515 * We may update quota (user and group).
516 *
517 * FIXME!! we may be touching bitmaps in different block groups.
518 */
519 if (ext4_journal_extend(handle,
520 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0)
521 ext4_journal_restart(handle,
522 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
523 if (retval) 551 if (retval)
524 /* 552 /*
525 * Failure case delete the extent information with the 553 * Failure case delete the extent information with the
@@ -528,7 +556,11 @@ err_out:
528 free_ext_block(handle, tmp_inode); 556 free_ext_block(handle, tmp_inode);
529 else 557 else
530 retval = ext4_ext_swap_inode_data(handle, inode, 558 retval = ext4_ext_swap_inode_data(handle, inode,
531 tmp_inode, retval); 559 tmp_inode);
560
561 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
562 if (ext4_journal_extend(handle, 1) != 0)
563 ext4_journal_restart(handle, 1);
532 564
533 /* 565 /*
534 * Mark the tmp_inode as of size zero 566 * Mark the tmp_inode as of size zero
@@ -556,8 +588,7 @@ err_out:
556 tmp_inode->i_nlink = 0; 588 tmp_inode->i_nlink = 0;
557 589
558 ext4_journal_stop(handle); 590 ext4_journal_stop(handle);
559 591 mutex_unlock(&(inode->i_mutex));
560 up_write(&EXT4_I(inode)->i_data_sem);
561 592
562 if (tmp_inode) 593 if (tmp_inode)
563 iput(tmp_inode); 594 iput(tmp_inode);