aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2006-03-26 04:37:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-26 11:57:00 -0500
commit89747d369d34e333b9b60f10f333a0b727b4e4e2 (patch)
tree26b55849e666c7f6c7a7312b100756c3b591559f
parente2d53f9525790dfacbcf09f359536311d3913d98 (diff)
[PATCH] ext3_get_blocks: Mapping multiple blocks at a once
Currently ext3_get_block() only maps or allocates one block at a time. This is quite inefficient for sequential IO workload. I have posted a early implements a simply multiple block map and allocation with current ext3. The basic idea is allocating the 1st block in the existing way, and attempting to allocate the next adjacent blocks on a best effort basis. More description about the implementation could be found here: http://marc.theaimsgroup.com/?l=ext2-devel&m=112162230003522&w=2 The following the latest version of the patch: break the original patch into 5 patches, re-worked some logicals, and fixed some bugs. The break ups are: [patch 1] Adding map multiple blocks at a time in ext3_get_blocks() [patch 2] Extend ext3_get_blocks() to support multiple block allocation [patch 3] Implement multiple block allocation in ext3-try-to-allocate (called via ext3_new_block()). [patch 4] Proper accounting updates in ext3_new_blocks() [patch 5] Adjust reservation window size properly (by the given number of blocks to allocate) before block allocation to increase the possibility of allocating multiple blocks in a single call. Tests done so far includes fsx,tiobench and dbench. The following numbers collected from Direct IO tests (1G file creation/read) shows the system time have been greatly reduced (more than 50% on my 8 cpu system) with the patches. 1G file DIO write: 2.6.15 2.6.15+patches real 0m31.275s 0m31.161s user 0m0.000s 0m0.000s sys 0m3.384s 0m0.564s 1G file DIO read: 2.6.15 2.6.15+patches real 0m30.733s 0m30.624s user 0m0.000s 0m0.004s sys 0m0.748s 0m0.380s Some previous test we did on buffered IO with using multiple blocks allocation and delayed allocation shows noticeable improvement on throughput and system time. This patch: Add support of mapping multiple blocks in one call. This is useful for DIO reads and re-writes (where blocks are already allocated), also is in line with Christoph's proposal of using getblocks() in mpage_readpage() or mpage_readpages(). Signed-off-by: Mingming Cao <cmm@us.ibm.com> Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/ext3/dir.c5
-rw-r--r--fs/ext3/inode.c105
-rw-r--r--include/linux/ext3_fs.h6
3 files changed, 82 insertions, 34 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 773459164bb2..38bd3f6ec147 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -131,8 +131,9 @@ static int ext3_readdir(struct file * filp,
131 struct buffer_head *bh = NULL; 131 struct buffer_head *bh = NULL;
132 132
133 map_bh.b_state = 0; 133 map_bh.b_state = 0;
134 err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0); 134 err = ext3_get_blocks_handle(NULL, inode, blk, 1,
135 if (!err) { 135 &map_bh, 0, 0);
136 if (err > 0) {
136 page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, 137 page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
137 &filp->f_ra, 138 &filp->f_ra,
138 filp, 139 filp,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 76e22c9c9c6c..fcfb10f77120 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -330,7 +330,7 @@ static int ext3_block_to_path(struct inode *inode,
330 ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big"); 330 ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
331 } 331 }
332 if (boundary) 332 if (boundary)
333 *boundary = (i_block & (ptrs - 1)) == (final - 1); 333 *boundary = final - 1 - (i_block & (ptrs - 1));
334 return n; 334 return n;
335} 335}
336 336
@@ -669,11 +669,15 @@ err_out:
669 * akpm: `handle' can be NULL if create == 0. 669 * akpm: `handle' can be NULL if create == 0.
670 * 670 *
671 * The BKL may not be held on entry here. Be sure to take it early. 671 * The BKL may not be held on entry here. Be sure to take it early.
672 * return > 0, # of blocks mapped or allocated.
673 * return = 0, if plain lookup failed.
674 * return < 0, error case.
672 */ 675 */
673 676
674int 677int
675ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, 678ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
676 struct buffer_head *bh_result, int create, int extend_disksize) 679 unsigned long maxblocks, struct buffer_head *bh_result,
680 int create, int extend_disksize)
677{ 681{
678 int err = -EIO; 682 int err = -EIO;
679 int offsets[4]; 683 int offsets[4];
@@ -681,11 +685,15 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
681 Indirect *partial; 685 Indirect *partial;
682 unsigned long goal; 686 unsigned long goal;
683 int left; 687 int left;
684 int boundary = 0; 688 int blocks_to_boundary = 0;
685 const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); 689 int depth;
686 struct ext3_inode_info *ei = EXT3_I(inode); 690 struct ext3_inode_info *ei = EXT3_I(inode);
691 int count = 0;
692 unsigned long first_block = 0;
693
687 694
688 J_ASSERT(handle != NULL || create == 0); 695 J_ASSERT(handle != NULL || create == 0);
696 depth = ext3_block_to_path(inode, iblock, offsets, &blocks_to_boundary);
689 697
690 if (depth == 0) 698 if (depth == 0)
691 goto out; 699 goto out;
@@ -694,8 +702,31 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
694 702
695 /* Simplest case - block found, no allocation needed */ 703 /* Simplest case - block found, no allocation needed */
696 if (!partial) { 704 if (!partial) {
705 first_block = chain[depth - 1].key;
697 clear_buffer_new(bh_result); 706 clear_buffer_new(bh_result);
698 goto got_it; 707 count++;
708 /*map more blocks*/
709 while (count < maxblocks && count <= blocks_to_boundary) {
710 if (!verify_chain(chain, partial)) {
711 /*
712 * Indirect block might be removed by
713 * truncate while we were reading it.
714 * Handling of that case: forget what we've
715 * got now. Flag the err as EAGAIN, so it
716 * will reread.
717 */
718 err = -EAGAIN;
719 count = 0;
720 break;
721 }
722 if (le32_to_cpu(*(chain[depth-1].p+count) ==
723 (first_block + count)))
724 count++;
725 else
726 break;
727 }
728 if (err != -EAGAIN)
729 goto got_it;
699 } 730 }
700 731
701 /* Next simple case - plain lookup or failed read of indirect block */ 732 /* Next simple case - plain lookup or failed read of indirect block */
@@ -723,6 +754,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
723 } 754 }
724 partial = ext3_get_branch(inode, depth, offsets, chain, &err); 755 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
725 if (!partial) { 756 if (!partial) {
757 count++;
726 mutex_unlock(&ei->truncate_mutex); 758 mutex_unlock(&ei->truncate_mutex);
727 if (err) 759 if (err)
728 goto cleanup; 760 goto cleanup;
@@ -772,8 +804,9 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
772 set_buffer_new(bh_result); 804 set_buffer_new(bh_result);
773got_it: 805got_it:
774 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 806 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
775 if (boundary) 807 if (blocks_to_boundary == 0)
776 set_buffer_boundary(bh_result); 808 set_buffer_boundary(bh_result);
809 err = count;
777 /* Clean up and exit */ 810 /* Clean up and exit */
778 partial = chain + depth - 1; /* the whole chain */ 811 partial = chain + depth - 1; /* the whole chain */
779cleanup: 812cleanup:
@@ -787,21 +820,6 @@ out:
787 return err; 820 return err;
788} 821}
789 822
790static int ext3_get_block(struct inode *inode, sector_t iblock,
791 struct buffer_head *bh_result, int create)
792{
793 handle_t *handle = NULL;
794 int ret;
795
796 if (create) {
797 handle = ext3_journal_current_handle();
798 J_ASSERT(handle != 0);
799 }
800 ret = ext3_get_block_handle(handle, inode, iblock,
801 bh_result, create, 1);
802 return ret;
803}
804
805#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) 823#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
806 824
807static int 825static int
@@ -812,9 +830,12 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
812 handle_t *handle = journal_current_handle(); 830 handle_t *handle = journal_current_handle();
813 int ret = 0; 831 int ret = 0;
814 832
815 if (!handle) 833 if (!create)
816 goto get_block; /* A read */ 834 goto get_block; /* A read */
817 835
836 if (max_blocks == 1)
837 goto get_block; /* A single block get */
838
818 if (handle->h_transaction->t_state == T_LOCKED) { 839 if (handle->h_transaction->t_state == T_LOCKED) {
819 /* 840 /*
820 * Huge direct-io writes can hold off commits for long 841 * Huge direct-io writes can hold off commits for long
@@ -841,13 +862,31 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
841 } 862 }
842 863
843get_block: 864get_block:
844 if (ret == 0) 865 if (ret == 0) {
845 ret = ext3_get_block_handle(handle, inode, iblock, 866 ret = ext3_get_blocks_handle(handle, inode, iblock,
846 bh_result, create, 0); 867 max_blocks, bh_result, create, 0);
847 bh_result->b_size = (1 << inode->i_blkbits); 868 if (ret > 0) {
869 bh_result->b_size = (ret << inode->i_blkbits);
870 ret = 0;
871 }
872 }
848 return ret; 873 return ret;
849} 874}
850 875
876static int ext3_get_blocks(struct inode *inode, sector_t iblock,
877 unsigned long maxblocks, struct buffer_head *bh_result,
878 int create)
879{
880 return ext3_direct_io_get_blocks(inode, iblock, maxblocks,
881 bh_result, create);
882}
883
884static int ext3_get_block(struct inode *inode, sector_t iblock,
885 struct buffer_head *bh_result, int create)
886{
887 return ext3_get_blocks(inode, iblock, 1, bh_result, create);
888}
889
851/* 890/*
852 * `handle' can be NULL if create is zero 891 * `handle' can be NULL if create is zero
853 */ 892 */
@@ -862,8 +901,16 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
862 dummy.b_state = 0; 901 dummy.b_state = 0;
863 dummy.b_blocknr = -1000; 902 dummy.b_blocknr = -1000;
864 buffer_trace_init(&dummy.b_history); 903 buffer_trace_init(&dummy.b_history);
865 *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); 904 err = ext3_get_blocks_handle(handle, inode, block, 1,
866 if (!*errp && buffer_mapped(&dummy)) { 905 &dummy, create, 1);
906 if (err == 1) {
907 err = 0;
908 } else if (err >= 0) {
909 WARN_ON(1);
910 err = -EIO;
911 }
912 *errp = err;
913 if (!err && buffer_mapped(&dummy)) {
867 struct buffer_head *bh; 914 struct buffer_head *bh;
868 bh = sb_getblk(inode->i_sb, dummy.b_blocknr); 915 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
869 if (!bh) { 916 if (!bh) {
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index e7239f2f97a1..0adadd85fa66 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -775,9 +775,9 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
775int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); 775int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
776struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); 776struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
777struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); 777struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
778int ext3_get_block_handle(handle_t *handle, struct inode *inode, 778int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
779 sector_t iblock, struct buffer_head *bh_result, int create, 779 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
780 int extend_disksize); 780 int create, int extend_disksize);
781 781
782extern void ext3_read_inode (struct inode *); 782extern void ext3_read_inode (struct inode *);
783extern int ext3_write_inode (struct inode *, int); 783extern int ext3_write_inode (struct inode *, int);