aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2005-05-01 11:59:20 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-01 11:59:20 -0400
commitfe55c452368af263a9beec38ed29f6be85280524 (patch)
tree4f56bd643c5a9ddbb478745e0bc0fbabc9b9fd8e /fs/ext3
parentfaf8b24968ce6392ea68d9afc7de1ffbc38c1f6c (diff)
[PATCH] ext3: remove unnecessary race then retry in ext3_get_block
The extra race-with-truncate-then-retry logic around ext3_get_block_handle(), which was inherited from ext2, becomes unecessary for ext3, since we have already obtained the ei->truncate_sem in ext3_get_block_handle() before calling ext3_alloc_branch(). The ei->truncate_sem is already there to block concurrent truncate and block allocation on the same inode. So the inode's indirect addressing tree won't be changed after we grab that semaphore. We could, after get the semaphore, re-verify the branch is up-to-date or not. If it has been changed, then get the updated branch. If we still need block allocation, we will have a safe version of the branch to work with in the ext3_find_goal()/ext3_splice_branch(). The code becomes more readable after remove those retry logic. The patch also clean up some gotos in ext3_get_block_handle() to make it more readable. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/inode.c144
1 files changed, 61 insertions, 83 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 040eb288bb1c..ea5888688f94 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
455 * @goal: place to store the result. 455 * @goal: place to store the result.
456 * 456 *
457 * Normally this function find the prefered place for block allocation, 457 * Normally this function find the prefered place for block allocation,
458 * stores it in *@goal and returns zero. If the branch had been changed 458 * stores it in *@goal and returns zero.
459 * under us we return -EAGAIN.
460 */ 459 */
461 460
462static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], 461static unsigned long ext3_find_goal(struct inode *inode, long block,
463 Indirect *partial, unsigned long *goal) 462 Indirect chain[4], Indirect *partial)
464{ 463{
465 struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info; 464 struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
466 465
@@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
470 */ 469 */
471 if (block_i && (block == block_i->last_alloc_logical_block + 1) 470 if (block_i && (block == block_i->last_alloc_logical_block + 1)
472 && (block_i->last_alloc_physical_block != 0)) { 471 && (block_i->last_alloc_physical_block != 0)) {
473 *goal = block_i->last_alloc_physical_block + 1; 472 return block_i->last_alloc_physical_block + 1;
474 return 0;
475 } 473 }
476 474
477 if (verify_chain(chain, partial)) { 475 return ext3_find_near(inode, partial);
478 *goal = ext3_find_near(inode, partial);
479 return 0;
480 }
481 return -EAGAIN;
482} 476}
483 477
484/** 478/**
@@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
582 * @where: location of missing link 576 * @where: location of missing link
583 * @num: number of blocks we are adding 577 * @num: number of blocks we are adding
584 * 578 *
585 * This function verifies that chain (up to the missing link) had not 579 * This function fills the missing link and does all housekeeping needed in
586 * changed, fills the missing link and does all housekeeping needed in
587 * inode (->i_blocks, etc.). In case of success we end up with the full 580 * inode (->i_blocks, etc.). In case of success we end up with the full
588 * chain to new block and return 0. Otherwise (== chain had been changed) 581 * chain to new block and return 0.
589 * we free the new blocks (forgetting their buffer_heads, indeed) and
590 * return -EAGAIN.
591 */ 582 */
592 583
593static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, 584static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
@@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
608 if (err) 599 if (err)
609 goto err_out; 600 goto err_out;
610 } 601 }
611 /* Verify that place we are splicing to is still there and vacant */
612
613 if (!verify_chain(chain, where-1) || *where->p)
614 /* Writer: end */
615 goto changed;
616
617 /* That's it */ 602 /* That's it */
618 603
619 *where->p = where->key; 604 *where->p = where->key;
@@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
657 } 642 }
658 return err; 643 return err;
659 644
660changed:
661 /*
662 * AKPM: if where[i].bh isn't part of the current updating
663 * transaction then we explode nastily. Test this code path.
664 */
665 jbd_debug(1, "the chain changed: try again\n");
666 err = -EAGAIN;
667
668err_out: 645err_out:
669 for (i = 1; i < num; i++) { 646 for (i = 1; i < num; i++) {
670 BUFFER_TRACE(where[i].bh, "call journal_forget"); 647 BUFFER_TRACE(where[i].bh, "call journal_forget");
671 ext3_journal_forget(handle, where[i].bh); 648 ext3_journal_forget(handle, where[i].bh);
672 } 649 }
673 /* For the normal collision cleanup case, we free up the blocks.
674 * On genuine filesystem errors we don't even think about doing
675 * that. */
676 if (err == -EAGAIN)
677 for (i = 0; i < num; i++)
678 ext3_free_blocks(handle, inode,
679 le32_to_cpu(where[i].key), 1);
680 return err; 650 return err;
681} 651}
682 652
@@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
708 unsigned long goal; 678 unsigned long goal;
709 int left; 679 int left;
710 int boundary = 0; 680 int boundary = 0;
711 int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); 681 const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
712 struct ext3_inode_info *ei = EXT3_I(inode); 682 struct ext3_inode_info *ei = EXT3_I(inode);
713 683
714 J_ASSERT(handle != NULL || create == 0); 684 J_ASSERT(handle != NULL || create == 0);
@@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
716 if (depth == 0) 686 if (depth == 0)
717 goto out; 687 goto out;
718 688
719reread:
720 partial = ext3_get_branch(inode, depth, offsets, chain, &err); 689 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
721 690
722 /* Simplest case - block found, no allocation needed */ 691 /* Simplest case - block found, no allocation needed */
723 if (!partial) { 692 if (!partial) {
724 clear_buffer_new(bh_result); 693 clear_buffer_new(bh_result);
725got_it: 694 goto got_it;
726 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
727 if (boundary)
728 set_buffer_boundary(bh_result);
729 /* Clean up and exit */
730 partial = chain+depth-1; /* the whole chain */
731 goto cleanup;
732 } 695 }
733 696
734 /* Next simple case - plain lookup or failed read of indirect block */ 697 /* Next simple case - plain lookup or failed read of indirect block */
735 if (!create || err == -EIO) { 698 if (!create || err == -EIO)
736cleanup: 699 goto cleanup;
700
701 down(&ei->truncate_sem);
702
703 /*
704 * If the indirect block is missing while we are reading
705 * the chain(ext3_get_branch() returns -EAGAIN err), or
706 * if the chain has been changed after we grab the semaphore,
707 * (either because another process truncated this branch, or
708 * another get_block allocated this branch) re-grab the chain to see if
709 * the request block has been allocated or not.
710 *
711 * Since we already block the truncate/other get_block
712 * at this point, we will have the current copy of the chain when we
713 * splice the branch into the tree.
714 */
715 if (err == -EAGAIN || !verify_chain(chain, partial)) {
737 while (partial > chain) { 716 while (partial > chain) {
738 BUFFER_TRACE(partial->bh, "call brelse");
739 brelse(partial->bh); 717 brelse(partial->bh);
740 partial--; 718 partial--;
741 } 719 }
742 BUFFER_TRACE(bh_result, "returned"); 720 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
743out: 721 if (!partial) {
744 return err; 722 up(&ei->truncate_sem);
723 if (err)
724 goto cleanup;
725 clear_buffer_new(bh_result);
726 goto got_it;
727 }
745 } 728 }
746 729
747 /* 730 /*
748 * Indirect block might be removed by truncate while we were 731 * Okay, we need to do block allocation. Lazily initialize the block
749 * reading it. Handling of that case (forget what we've got and 732 * allocation info here if necessary
750 * reread) is taken out of the main path. 733 */
751 */ 734 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
752 if (err == -EAGAIN)
753 goto changed;
754
755 goal = 0;
756 down(&ei->truncate_sem);
757
758 /* lazy initialize the block allocation info here if necessary */
759 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) {
760 ext3_init_block_alloc_info(inode); 735 ext3_init_block_alloc_info(inode);
761 }
762 736
763 if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) { 737 goal = ext3_find_goal(inode, iblock, chain, partial);
764 up(&ei->truncate_sem);
765 goto changed;
766 }
767 738
768 left = (chain + depth) - partial; 739 left = (chain + depth) - partial;
769 740
@@ -771,38 +742,45 @@ out:
771 * Block out ext3_truncate while we alter the tree 742 * Block out ext3_truncate while we alter the tree
772 */ 743 */
773 err = ext3_alloc_branch(handle, inode, left, goal, 744 err = ext3_alloc_branch(handle, inode, left, goal,
774 offsets+(partial-chain), partial); 745 offsets + (partial - chain), partial);
775 746
776 /* The ext3_splice_branch call will free and forget any buffers 747 /*
748 * The ext3_splice_branch call will free and forget any buffers
777 * on the new chain if there is a failure, but that risks using 749 * on the new chain if there is a failure, but that risks using
778 * up transaction credits, especially for bitmaps where the 750 * up transaction credits, especially for bitmaps where the
779 * credits cannot be returned. Can we handle this somehow? We 751 * credits cannot be returned. Can we handle this somehow? We
780 * may need to return -EAGAIN upwards in the worst case. --sct */ 752 * may need to return -EAGAIN upwards in the worst case. --sct
753 */
781 if (!err) 754 if (!err)
782 err = ext3_splice_branch(handle, inode, iblock, chain, 755 err = ext3_splice_branch(handle, inode, iblock, chain,
783 partial, left); 756 partial, left);
784 /* i_disksize growing is protected by truncate_sem 757 /*
785 * don't forget to protect it if you're about to implement 758 * i_disksize growing is protected by truncate_sem. Don't forget to
786 * concurrent ext3_get_block() -bzzz */ 759 * protect it if you're about to implement concurrent
760 * ext3_get_block() -bzzz
761 */
787 if (!err && extend_disksize && inode->i_size > ei->i_disksize) 762 if (!err && extend_disksize && inode->i_size > ei->i_disksize)
788 ei->i_disksize = inode->i_size; 763 ei->i_disksize = inode->i_size;
789 up(&ei->truncate_sem); 764 up(&ei->truncate_sem);
790 if (err == -EAGAIN)
791 goto changed;
792 if (err) 765 if (err)
793 goto cleanup; 766 goto cleanup;
794 767
795 set_buffer_new(bh_result); 768 set_buffer_new(bh_result);
796 goto got_it; 769got_it:
797 770 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
798changed: 771 if (boundary)
772 set_buffer_boundary(bh_result);
773 /* Clean up and exit */
774 partial = chain + depth - 1; /* the whole chain */
775cleanup:
799 while (partial > chain) { 776 while (partial > chain) {
800 jbd_debug(1, "buffer chain changed, retrying\n"); 777 BUFFER_TRACE(partial->bh, "call brelse");
801 BUFFER_TRACE(partial->bh, "brelsing");
802 brelse(partial->bh); 778 brelse(partial->bh);
803 partial--; 779 partial--;
804 } 780 }
805 goto reread; 781 BUFFER_TRACE(bh_result, "returned");
782out:
783 return err;
806} 784}
807 785
808static int ext3_get_block(struct inode *inode, sector_t iblock, 786static int ext3_get_block(struct inode *inode, sector_t iblock,