diff options
| author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-02-10 01:20:05 -0500 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2008-02-10 01:20:05 -0500 |
| commit | 8009f9fb3067fef6c2ca0c16f6bac786ae28639d (patch) | |
| tree | acf8bbfd2833a63baa9d2194ed30bf7e7dcb3075 | |
| parent | 0040d9875dcccfcb2131417b10fbd9841bc5f05b (diff) | |
ext4: Fix circular locking dependency with migrate and rm.
In order to prevent a circular locking dependency when an unlink
operation is racing with an ext4 migration, we delay taking i_data_sem
until just before switch the inode format, and use i_mutex to prevent
writes and truncates during the first part of the migration operation.
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
| -rw-r--r-- | fs/ext4/migrate.c | 117 |
1 files changed, 74 insertions, 43 deletions
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 9ee1f7cfb2c5..8c6c685b9d22 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
| @@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
| 61 | retval = ext4_journal_restart(handle, needed); | 61 | retval = ext4_journal_restart(handle, needed); |
| 62 | if (retval) | 62 | if (retval) |
| 63 | goto err_out; | 63 | goto err_out; |
| 64 | } | 64 | } else if (needed) { |
| 65 | if (needed) { | ||
| 66 | retval = ext4_journal_extend(handle, needed); | 65 | retval = ext4_journal_extend(handle, needed); |
| 67 | if (retval != 0) { | 66 | if (retval) { |
| 68 | /* | 67 | /* |
| 69 | * IF not able to extend the journal restart the journal | 68 | * IF not able to extend the journal restart the journal |
| 70 | */ | 69 | */ |
| @@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, | |||
| 220 | 219 | ||
| 221 | } | 220 | } |
| 222 | 221 | ||
| 222 | static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) | ||
| 223 | { | ||
| 224 | int retval = 0, needed; | ||
| 225 | |||
| 226 | if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) | ||
| 227 | return 0; | ||
| 228 | /* | ||
| 229 | * We are freeing a blocks. During this we touch | ||
| 230 | * superblock, group descriptor and block bitmap. | ||
| 231 | * So allocate a credit of 3. We may update | ||
| 232 | * quota (user and group). | ||
| 233 | */ | ||
| 234 | needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
| 235 | |||
| 236 | if (ext4_journal_extend(handle, needed) != 0) | ||
| 237 | retval = ext4_journal_restart(handle, needed); | ||
| 238 | |||
| 239 | return retval; | ||
| 240 | } | ||
| 241 | |||
| 223 | static int free_dind_blocks(handle_t *handle, | 242 | static int free_dind_blocks(handle_t *handle, |
| 224 | struct inode *inode, __le32 i_data) | 243 | struct inode *inode, __le32 i_data) |
| 225 | { | 244 | { |
| @@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle, | |||
| 234 | 253 | ||
| 235 | tmp_idata = (__le32 *)bh->b_data; | 254 | tmp_idata = (__le32 *)bh->b_data; |
| 236 | for (i = 0; i < max_entries; i++) { | 255 | for (i = 0; i < max_entries; i++) { |
| 237 | if (tmp_idata[i]) | 256 | if (tmp_idata[i]) { |
| 257 | extend_credit_for_blkdel(handle, inode); | ||
| 238 | ext4_free_blocks(handle, inode, | 258 | ext4_free_blocks(handle, inode, |
| 239 | le32_to_cpu(tmp_idata[i]), 1, 1); | 259 | le32_to_cpu(tmp_idata[i]), 1, 1); |
| 260 | } | ||
| 240 | } | 261 | } |
| 241 | put_bh(bh); | 262 | put_bh(bh); |
| 263 | extend_credit_for_blkdel(handle, inode); | ||
| 242 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | 264 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); |
| 243 | return 0; | 265 | return 0; |
| 244 | } | 266 | } |
| @@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle, | |||
| 267 | } | 289 | } |
| 268 | } | 290 | } |
| 269 | put_bh(bh); | 291 | put_bh(bh); |
| 292 | extend_credit_for_blkdel(handle, inode); | ||
| 270 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | 293 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); |
| 271 | return 0; | 294 | return 0; |
| 272 | } | 295 | } |
| 273 | 296 | ||
| 274 | static int free_ind_block(handle_t *handle, struct inode *inode) | 297 | static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) |
| 275 | { | 298 | { |
| 276 | int retval; | 299 | int retval; |
| 277 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 278 | 300 | ||
| 279 | if (ei->i_data[EXT4_IND_BLOCK]) | 301 | /* ei->i_data[EXT4_IND_BLOCK] */ |
| 302 | if (i_data[0]) { | ||
| 303 | extend_credit_for_blkdel(handle, inode); | ||
| 280 | ext4_free_blocks(handle, inode, | 304 | ext4_free_blocks(handle, inode, |
| 281 | le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); | 305 | le32_to_cpu(i_data[0]), 1, 1); |
| 306 | } | ||
| 282 | 307 | ||
| 283 | if (ei->i_data[EXT4_DIND_BLOCK]) { | 308 | /* ei->i_data[EXT4_DIND_BLOCK] */ |
| 284 | retval = free_dind_blocks(handle, inode, | 309 | if (i_data[1]) { |
| 285 | ei->i_data[EXT4_DIND_BLOCK]); | 310 | retval = free_dind_blocks(handle, inode, i_data[1]); |
| 286 | if (retval) | 311 | if (retval) |
| 287 | return retval; | 312 | return retval; |
| 288 | } | 313 | } |
| 289 | 314 | ||
| 290 | if (ei->i_data[EXT4_TIND_BLOCK]) { | 315 | /* ei->i_data[EXT4_TIND_BLOCK] */ |
| 291 | retval = free_tind_blocks(handle, inode, | 316 | if (i_data[2]) { |
| 292 | ei->i_data[EXT4_TIND_BLOCK]); | 317 | retval = free_tind_blocks(handle, inode, i_data[2]); |
| 293 | if (retval) | 318 | if (retval) |
| 294 | return retval; | 319 | return retval; |
| 295 | } | 320 | } |
| @@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode) | |||
| 297 | } | 322 | } |
| 298 | 323 | ||
| 299 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | 324 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, |
| 300 | struct inode *tmp_inode, int retval) | 325 | struct inode *tmp_inode) |
| 301 | { | 326 | { |
| 327 | int retval; | ||
| 328 | __le32 i_data[3]; | ||
| 302 | struct ext4_inode_info *ei = EXT4_I(inode); | 329 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 303 | struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); | 330 | struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); |
| 304 | 331 | ||
| 305 | retval = free_ind_block(handle, inode); | ||
| 306 | if (retval) | ||
| 307 | goto err_out; | ||
| 308 | |||
| 309 | /* | 332 | /* |
| 310 | * One credit accounted for writing the | 333 | * One credit accounted for writing the |
| 311 | * i_data field of the original inode | 334 | * i_data field of the original inode |
| @@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
| 317 | goto err_out; | 340 | goto err_out; |
| 318 | } | 341 | } |
| 319 | 342 | ||
| 343 | i_data[0] = ei->i_data[EXT4_IND_BLOCK]; | ||
| 344 | i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; | ||
| 345 | i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; | ||
| 346 | |||
| 347 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 320 | /* | 348 | /* |
| 321 | * We have the extent map build with the tmp inode. | 349 | * We have the extent map build with the tmp inode. |
| 322 | * Now copy the i_data across | 350 | * Now copy the i_data across |
| @@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
| 336 | spin_lock(&inode->i_lock); | 364 | spin_lock(&inode->i_lock); |
| 337 | inode->i_blocks += tmp_inode->i_blocks; | 365 | inode->i_blocks += tmp_inode->i_blocks; |
| 338 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
| 367 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 339 | 368 | ||
| 369 | /* | ||
| 370 | * We mark the inode dirty after, because we decrement the | ||
| 371 | * i_blocks when freeing the indirect meta-data blocks | ||
| 372 | */ | ||
| 373 | retval = free_ind_block(handle, inode, i_data); | ||
| 340 | ext4_mark_inode_dirty(handle, inode); | 374 | ext4_mark_inode_dirty(handle, inode); |
| 375 | |||
| 341 | err_out: | 376 | err_out: |
| 342 | return retval; | 377 | return retval; |
| 343 | } | 378 | } |
| @@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
| 365 | } | 400 | } |
| 366 | } | 401 | } |
| 367 | put_bh(bh); | 402 | put_bh(bh); |
| 403 | extend_credit_for_blkdel(handle, inode); | ||
| 368 | ext4_free_blocks(handle, inode, block, 1, 1); | 404 | ext4_free_blocks(handle, inode, block, 1, 1); |
| 369 | return retval; | 405 | return retval; |
| 370 | } | 406 | } |
| @@ -420,7 +456,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
| 420 | */ | 456 | */ |
| 421 | return retval; | 457 | return retval; |
| 422 | 458 | ||
| 423 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 424 | handle = ext4_journal_start(inode, | 459 | handle = ext4_journal_start(inode, |
| 425 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + | 460 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + |
| 426 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | 461 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + |
| @@ -454,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
| 454 | ext4_orphan_add(handle, tmp_inode); | 489 | ext4_orphan_add(handle, tmp_inode); |
| 455 | ext4_journal_stop(handle); | 490 | ext4_journal_stop(handle); |
| 456 | 491 | ||
| 457 | ei = EXT4_I(inode); | ||
| 458 | i_data = ei->i_data; | ||
| 459 | memset(&lb, 0, sizeof(lb)); | ||
| 460 | |||
| 461 | /* 32 bit block address 4 bytes */ | ||
| 462 | max_entries = inode->i_sb->s_blocksize >> 2; | ||
| 463 | |||
| 464 | /* | 492 | /* |
| 465 | * start with one credit accounted for | 493 | * start with one credit accounted for |
| 466 | * superblock modification. | 494 | * superblock modification. |
| @@ -469,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
| 469 | * trascation that created the inode. Later as and | 497 | * trascation that created the inode. Later as and |
| 470 | * when we add extents we extent the journal | 498 | * when we add extents we extent the journal |
| 471 | */ | 499 | */ |
| 500 | /* | ||
| 501 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
| 502 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
| 503 | * switch the inode format to prevent read. | ||
| 504 | */ | ||
| 505 | mutex_lock(&(inode->i_mutex)); | ||
| 472 | handle = ext4_journal_start(inode, 1); | 506 | handle = ext4_journal_start(inode, 1); |
| 507 | |||
| 508 | ei = EXT4_I(inode); | ||
| 509 | i_data = ei->i_data; | ||
| 510 | memset(&lb, 0, sizeof(lb)); | ||
| 511 | |||
| 512 | /* 32 bit block address 4 bytes */ | ||
| 513 | max_entries = inode->i_sb->s_blocksize >> 2; | ||
| 473 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { | 514 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { |
| 474 | if (i_data[i]) { | 515 | if (i_data[i]) { |
| 475 | retval = update_extent_range(handle, tmp_inode, | 516 | retval = update_extent_range(handle, tmp_inode, |
| @@ -507,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
| 507 | */ | 548 | */ |
| 508 | retval = finish_range(handle, tmp_inode, &lb); | 549 | retval = finish_range(handle, tmp_inode, &lb); |
| 509 | err_out: | 550 | err_out: |
| 510 | /* | ||
| 511 | * We are either freeing extent information or indirect | ||
| 512 | * blocks. During this we touch superblock, group descriptor | ||
| 513 | * and block bitmap. Later we mark the tmp_inode dirty | ||
| 514 | * via ext4_ext_tree_init. So allocate a credit of 4 | ||
| 515 | * We may update quota (user and group). | ||
| 516 | * | ||
| 517 | * FIXME!! we may be touching bitmaps in different block groups. | ||
| 518 | */ | ||
| 519 | if (ext4_journal_extend(handle, | ||
| 520 | 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) | ||
| 521 | ext4_journal_restart(handle, | ||
| 522 | 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | ||
| 523 | if (retval) | 551 | if (retval) |
| 524 | /* | 552 | /* |
| 525 | * Failure case delete the extent information with the | 553 | * Failure case delete the extent information with the |
| @@ -528,7 +556,11 @@ err_out: | |||
| 528 | free_ext_block(handle, tmp_inode); | 556 | free_ext_block(handle, tmp_inode); |
| 529 | else | 557 | else |
| 530 | retval = ext4_ext_swap_inode_data(handle, inode, | 558 | retval = ext4_ext_swap_inode_data(handle, inode, |
| 531 | tmp_inode, retval); | 559 | tmp_inode); |
| 560 | |||
| 561 | /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ | ||
| 562 | if (ext4_journal_extend(handle, 1) != 0) | ||
| 563 | ext4_journal_restart(handle, 1); | ||
| 532 | 564 | ||
| 533 | /* | 565 | /* |
| 534 | * Mark the tmp_inode as of size zero | 566 | * Mark the tmp_inode as of size zero |
| @@ -556,8 +588,7 @@ err_out: | |||
| 556 | tmp_inode->i_nlink = 0; | 588 | tmp_inode->i_nlink = 0; |
| 557 | 589 | ||
| 558 | ext4_journal_stop(handle); | 590 | ext4_journal_stop(handle); |
| 559 | 591 | mutex_unlock(&(inode->i_mutex)); | |
| 560 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 561 | 592 | ||
| 562 | if (tmp_inode) | 593 | if (tmp_inode) |
| 563 | iput(tmp_inode); | 594 | iput(tmp_inode); |
