diff options
author | Mingming Cao <cmm@us.ibm.com> | 2006-03-26 04:37:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-26 11:57:00 -0500 |
commit | 89747d369d34e333b9b60f10f333a0b727b4e4e2 (patch) | |
tree | 26b55849e666c7f6c7a7312b100756c3b591559f | |
parent | e2d53f9525790dfacbcf09f359536311d3913d98 (diff) |
[PATCH] ext3_get_blocks: Mapping multiple blocks at a once
Currently ext3_get_block() only maps or allocates one block at a time. This
is quite inefficient for sequential IO workload.
I have posted a early implements a simply multiple block map and allocation
with current ext3. The basic idea is allocating the 1st block in the existing
way, and attempting to allocate the next adjacent blocks on a best effort
basis. More description about the implementation could be found here:
http://marc.theaimsgroup.com/?l=ext2-devel&m=112162230003522&w=2
The following the latest version of the patch: break the original patch into 5
patches, re-worked some logicals, and fixed some bugs. The break ups are:
[patch 1] Adding map multiple blocks at a time in ext3_get_blocks()
[patch 2] Extend ext3_get_blocks() to support multiple block allocation
[patch 3] Implement multiple block allocation in ext3-try-to-allocate
(called via ext3_new_block()).
[patch 4] Proper accounting updates in ext3_new_blocks()
[patch 5] Adjust reservation window size properly (by the given number
of blocks to allocate) before block allocation to increase the
possibility of allocating multiple blocks in a single call.
Tests done so far includes fsx,tiobench and dbench. The following numbers
collected from Direct IO tests (1G file creation/read) shows the system time
have been greatly reduced (more than 50% on my 8 cpu system) with the patches.
1G file DIO write:
2.6.15 2.6.15+patches
real 0m31.275s 0m31.161s
user 0m0.000s 0m0.000s
sys 0m3.384s 0m0.564s
1G file DIO read:
2.6.15 2.6.15+patches
real 0m30.733s 0m30.624s
user 0m0.000s 0m0.004s
sys 0m0.748s 0m0.380s
Some previous test we did on buffered IO with using multiple blocks allocation
and delayed allocation shows noticeable improvement on throughput and system
time.
This patch:
Add support of mapping multiple blocks in one call.
This is useful for DIO reads and re-writes (where blocks are already
allocated), also is in line with Christoph's proposal of using getblocks() in
mpage_readpage() or mpage_readpages().
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/ext3/dir.c | 5 | ||||
-rw-r--r-- | fs/ext3/inode.c | 105 | ||||
-rw-r--r-- | include/linux/ext3_fs.h | 6 |
3 files changed, 82 insertions, 34 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 773459164bb2..38bd3f6ec147 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -131,8 +131,9 @@ static int ext3_readdir(struct file * filp, | |||
131 | struct buffer_head *bh = NULL; | 131 | struct buffer_head *bh = NULL; |
132 | 132 | ||
133 | map_bh.b_state = 0; | 133 | map_bh.b_state = 0; |
134 | err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0); | 134 | err = ext3_get_blocks_handle(NULL, inode, blk, 1, |
135 | if (!err) { | 135 | &map_bh, 0, 0); |
136 | if (err > 0) { | ||
136 | page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, | 137 | page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, |
137 | &filp->f_ra, | 138 | &filp->f_ra, |
138 | filp, | 139 | filp, |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 76e22c9c9c6c..fcfb10f77120 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -330,7 +330,7 @@ static int ext3_block_to_path(struct inode *inode, | |||
330 | ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big"); | 330 | ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big"); |
331 | } | 331 | } |
332 | if (boundary) | 332 | if (boundary) |
333 | *boundary = (i_block & (ptrs - 1)) == (final - 1); | 333 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
334 | return n; | 334 | return n; |
335 | } | 335 | } |
336 | 336 | ||
@@ -669,11 +669,15 @@ err_out: | |||
669 | * akpm: `handle' can be NULL if create == 0. | 669 | * akpm: `handle' can be NULL if create == 0. |
670 | * | 670 | * |
671 | * The BKL may not be held on entry here. Be sure to take it early. | 671 | * The BKL may not be held on entry here. Be sure to take it early. |
672 | * return > 0, # of blocks mapped or allocated. | ||
673 | * return = 0, if plain lookup failed. | ||
674 | * return < 0, error case. | ||
672 | */ | 675 | */ |
673 | 676 | ||
674 | int | 677 | int |
675 | ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, | 678 | ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock, |
676 | struct buffer_head *bh_result, int create, int extend_disksize) | 679 | unsigned long maxblocks, struct buffer_head *bh_result, |
680 | int create, int extend_disksize) | ||
677 | { | 681 | { |
678 | int err = -EIO; | 682 | int err = -EIO; |
679 | int offsets[4]; | 683 | int offsets[4]; |
@@ -681,11 +685,15 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, | |||
681 | Indirect *partial; | 685 | Indirect *partial; |
682 | unsigned long goal; | 686 | unsigned long goal; |
683 | int left; | 687 | int left; |
684 | int boundary = 0; | 688 | int blocks_to_boundary = 0; |
685 | const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); | 689 | int depth; |
686 | struct ext3_inode_info *ei = EXT3_I(inode); | 690 | struct ext3_inode_info *ei = EXT3_I(inode); |
691 | int count = 0; | ||
692 | unsigned long first_block = 0; | ||
693 | |||
687 | 694 | ||
688 | J_ASSERT(handle != NULL || create == 0); | 695 | J_ASSERT(handle != NULL || create == 0); |
696 | depth = ext3_block_to_path(inode, iblock, offsets, &blocks_to_boundary); | ||
689 | 697 | ||
690 | if (depth == 0) | 698 | if (depth == 0) |
691 | goto out; | 699 | goto out; |
@@ -694,8 +702,31 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, | |||
694 | 702 | ||
695 | /* Simplest case - block found, no allocation needed */ | 703 | /* Simplest case - block found, no allocation needed */ |
696 | if (!partial) { | 704 | if (!partial) { |
705 | first_block = chain[depth - 1].key; | ||
697 | clear_buffer_new(bh_result); | 706 | clear_buffer_new(bh_result); |
698 | goto got_it; | 707 | count++; |
708 | /*map more blocks*/ | ||
709 | while (count < maxblocks && count <= blocks_to_boundary) { | ||
710 | if (!verify_chain(chain, partial)) { | ||
711 | /* | ||
712 | * Indirect block might be removed by | ||
713 | * truncate while we were reading it. | ||
714 | * Handling of that case: forget what we've | ||
715 | * got now. Flag the err as EAGAIN, so it | ||
716 | * will reread. | ||
717 | */ | ||
718 | err = -EAGAIN; | ||
719 | count = 0; | ||
720 | break; | ||
721 | } | ||
722 | if (le32_to_cpu(*(chain[depth-1].p+count) == | ||
723 | (first_block + count))) | ||
724 | count++; | ||
725 | else | ||
726 | break; | ||
727 | } | ||
728 | if (err != -EAGAIN) | ||
729 | goto got_it; | ||
699 | } | 730 | } |
700 | 731 | ||
701 | /* Next simple case - plain lookup or failed read of indirect block */ | 732 | /* Next simple case - plain lookup or failed read of indirect block */ |
@@ -723,6 +754,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, | |||
723 | } | 754 | } |
724 | partial = ext3_get_branch(inode, depth, offsets, chain, &err); | 755 | partial = ext3_get_branch(inode, depth, offsets, chain, &err); |
725 | if (!partial) { | 756 | if (!partial) { |
757 | count++; | ||
726 | mutex_unlock(&ei->truncate_mutex); | 758 | mutex_unlock(&ei->truncate_mutex); |
727 | if (err) | 759 | if (err) |
728 | goto cleanup; | 760 | goto cleanup; |
@@ -772,8 +804,9 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, | |||
772 | set_buffer_new(bh_result); | 804 | set_buffer_new(bh_result); |
773 | got_it: | 805 | got_it: |
774 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 806 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); |
775 | if (boundary) | 807 | if (blocks_to_boundary == 0) |
776 | set_buffer_boundary(bh_result); | 808 | set_buffer_boundary(bh_result); |
809 | err = count; | ||
777 | /* Clean up and exit */ | 810 | /* Clean up and exit */ |
778 | partial = chain + depth - 1; /* the whole chain */ | 811 | partial = chain + depth - 1; /* the whole chain */ |
779 | cleanup: | 812 | cleanup: |
@@ -787,21 +820,6 @@ out: | |||
787 | return err; | 820 | return err; |
788 | } | 821 | } |
789 | 822 | ||
790 | static int ext3_get_block(struct inode *inode, sector_t iblock, | ||
791 | struct buffer_head *bh_result, int create) | ||
792 | { | ||
793 | handle_t *handle = NULL; | ||
794 | int ret; | ||
795 | |||
796 | if (create) { | ||
797 | handle = ext3_journal_current_handle(); | ||
798 | J_ASSERT(handle != 0); | ||
799 | } | ||
800 | ret = ext3_get_block_handle(handle, inode, iblock, | ||
801 | bh_result, create, 1); | ||
802 | return ret; | ||
803 | } | ||
804 | |||
805 | #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) | 823 | #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) |
806 | 824 | ||
807 | static int | 825 | static int |
@@ -812,9 +830,12 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock, | |||
812 | handle_t *handle = journal_current_handle(); | 830 | handle_t *handle = journal_current_handle(); |
813 | int ret = 0; | 831 | int ret = 0; |
814 | 832 | ||
815 | if (!handle) | 833 | if (!create) |
816 | goto get_block; /* A read */ | 834 | goto get_block; /* A read */ |
817 | 835 | ||
836 | if (max_blocks == 1) | ||
837 | goto get_block; /* A single block get */ | ||
838 | |||
818 | if (handle->h_transaction->t_state == T_LOCKED) { | 839 | if (handle->h_transaction->t_state == T_LOCKED) { |
819 | /* | 840 | /* |
820 | * Huge direct-io writes can hold off commits for long | 841 | * Huge direct-io writes can hold off commits for long |
@@ -841,13 +862,31 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock, | |||
841 | } | 862 | } |
842 | 863 | ||
843 | get_block: | 864 | get_block: |
844 | if (ret == 0) | 865 | if (ret == 0) { |
845 | ret = ext3_get_block_handle(handle, inode, iblock, | 866 | ret = ext3_get_blocks_handle(handle, inode, iblock, |
846 | bh_result, create, 0); | 867 | max_blocks, bh_result, create, 0); |
847 | bh_result->b_size = (1 << inode->i_blkbits); | 868 | if (ret > 0) { |
869 | bh_result->b_size = (ret << inode->i_blkbits); | ||
870 | ret = 0; | ||
871 | } | ||
872 | } | ||
848 | return ret; | 873 | return ret; |
849 | } | 874 | } |
850 | 875 | ||
876 | static int ext3_get_blocks(struct inode *inode, sector_t iblock, | ||
877 | unsigned long maxblocks, struct buffer_head *bh_result, | ||
878 | int create) | ||
879 | { | ||
880 | return ext3_direct_io_get_blocks(inode, iblock, maxblocks, | ||
881 | bh_result, create); | ||
882 | } | ||
883 | |||
884 | static int ext3_get_block(struct inode *inode, sector_t iblock, | ||
885 | struct buffer_head *bh_result, int create) | ||
886 | { | ||
887 | return ext3_get_blocks(inode, iblock, 1, bh_result, create); | ||
888 | } | ||
889 | |||
851 | /* | 890 | /* |
852 | * `handle' can be NULL if create is zero | 891 | * `handle' can be NULL if create is zero |
853 | */ | 892 | */ |
@@ -862,8 +901,16 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode, | |||
862 | dummy.b_state = 0; | 901 | dummy.b_state = 0; |
863 | dummy.b_blocknr = -1000; | 902 | dummy.b_blocknr = -1000; |
864 | buffer_trace_init(&dummy.b_history); | 903 | buffer_trace_init(&dummy.b_history); |
865 | *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); | 904 | err = ext3_get_blocks_handle(handle, inode, block, 1, |
866 | if (!*errp && buffer_mapped(&dummy)) { | 905 | &dummy, create, 1); |
906 | if (err == 1) { | ||
907 | err = 0; | ||
908 | } else if (err >= 0) { | ||
909 | WARN_ON(1); | ||
910 | err = -EIO; | ||
911 | } | ||
912 | *errp = err; | ||
913 | if (!err && buffer_mapped(&dummy)) { | ||
867 | struct buffer_head *bh; | 914 | struct buffer_head *bh; |
868 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); | 915 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); |
869 | if (!bh) { | 916 | if (!bh) { |
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index e7239f2f97a1..0adadd85fa66 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h | |||
@@ -775,9 +775,9 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned); | |||
775 | int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); | 775 | int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); |
776 | struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); | 776 | struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); |
777 | struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); | 777 | struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); |
778 | int ext3_get_block_handle(handle_t *handle, struct inode *inode, | 778 | int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, |
779 | sector_t iblock, struct buffer_head *bh_result, int create, | 779 | sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, |
780 | int extend_disksize); | 780 | int create, int extend_disksize); |
781 | 781 | ||
782 | extern void ext3_read_inode (struct inode *); | 782 | extern void ext3_read_inode (struct inode *); |
783 | extern int ext3_write_inode (struct inode *, int); | 783 | extern int ext3_write_inode (struct inode *, int); |