diff options
author | Theodore Ts'o <tytso@mit.edu> | 2008-10-09 23:53:47 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-10-09 23:53:47 -0400 |
commit | 240799cdf22bd789ea6852653c3b879d35ad0a6c (patch) | |
tree | e696b60cc103f23838b5c14d8d397f692abffbc3 /fs/ext4/inode.c | |
parent | 37515facd001942221d68171c81c1f46d54ffdd0 (diff) |
ext4: Use readahead when reading an inode from the inode table
With modern hard drives, reading 64k takes roughly the same time as
reading a 4k block. So request readahead for adjacent inode table
blocks to reduce the time it takes when iterating over directories
(especially when doing this in htree sort order) in a cold cache case.
With this patch, the time it takes to run "git status" on a kernel
tree after flushing the caches via "echo 3 > /proc/sys/vm/drop_caches"
is reduced by 21%.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 134 |
1 files changed, 64 insertions, 70 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 22fcbb67cd88..ef4ca3d4abc0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3833,41 +3833,6 @@ out_stop: | |||
3833 | ext4_journal_stop(handle); | 3833 | ext4_journal_stop(handle); |
3834 | } | 3834 | } |
3835 | 3835 | ||
3836 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | ||
3837 | unsigned long ino, struct ext4_iloc *iloc) | ||
3838 | { | ||
3839 | ext4_group_t block_group; | ||
3840 | unsigned long offset; | ||
3841 | ext4_fsblk_t block; | ||
3842 | struct ext4_group_desc *gdp; | ||
3843 | |||
3844 | if (!ext4_valid_inum(sb, ino)) { | ||
3845 | /* | ||
3846 | * This error is already checked for in namei.c unless we are | ||
3847 | * looking at an NFS filehandle, in which case no error | ||
3848 | * report is needed | ||
3849 | */ | ||
3850 | return 0; | ||
3851 | } | ||
3852 | |||
3853 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | ||
3854 | gdp = ext4_get_group_desc(sb, block_group, NULL); | ||
3855 | if (!gdp) | ||
3856 | return 0; | ||
3857 | |||
3858 | /* | ||
3859 | * Figure out the offset within the block group inode table | ||
3860 | */ | ||
3861 | offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * | ||
3862 | EXT4_INODE_SIZE(sb); | ||
3863 | block = ext4_inode_table(sb, gdp) + | ||
3864 | (offset >> EXT4_BLOCK_SIZE_BITS(sb)); | ||
3865 | |||
3866 | iloc->block_group = block_group; | ||
3867 | iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); | ||
3868 | return block; | ||
3869 | } | ||
3870 | |||
3871 | /* | 3836 | /* |
3872 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3837 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
3873 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3838 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
@@ -3877,19 +3842,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | |||
3877 | static int __ext4_get_inode_loc(struct inode *inode, | 3842 | static int __ext4_get_inode_loc(struct inode *inode, |
3878 | struct ext4_iloc *iloc, int in_mem) | 3843 | struct ext4_iloc *iloc, int in_mem) |
3879 | { | 3844 | { |
3880 | ext4_fsblk_t block; | 3845 | struct ext4_group_desc *gdp; |
3881 | struct buffer_head *bh; | 3846 | struct buffer_head *bh; |
3847 | struct super_block *sb = inode->i_sb; | ||
3848 | ext4_fsblk_t block; | ||
3849 | int inodes_per_block, inode_offset; | ||
3850 | |||
3851 | iloc->bh = 0; | ||
3852 | if (!ext4_valid_inum(sb, inode->i_ino)) | ||
3853 | return -EIO; | ||
3882 | 3854 | ||
3883 | block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 3855 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
3884 | if (!block) | 3856 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
3857 | if (!gdp) | ||
3885 | return -EIO; | 3858 | return -EIO; |
3886 | 3859 | ||
3887 | bh = sb_getblk(inode->i_sb, block); | 3860 | /* |
3861 | * Figure out the offset within the block group inode table | ||
3862 | */ | ||
3863 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | ||
3864 | inode_offset = ((inode->i_ino - 1) % | ||
3865 | EXT4_INODES_PER_GROUP(sb)); | ||
3866 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | ||
3867 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
3868 | |||
3869 | bh = sb_getblk(sb, block); | ||
3888 | if (!bh) { | 3870 | if (!bh) { |
3889 | ext4_error (inode->i_sb, "ext4_get_inode_loc", | 3871 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " |
3890 | "unable to read inode block - " | 3872 | "inode block - inode=%lu, block=%llu", |
3891 | "inode=%lu, block=%llu", | 3873 | inode->i_ino, block); |
3892 | inode->i_ino, block); | ||
3893 | return -EIO; | 3874 | return -EIO; |
3894 | } | 3875 | } |
3895 | if (!buffer_uptodate(bh)) { | 3876 | if (!buffer_uptodate(bh)) { |
@@ -3917,28 +3898,12 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3917 | */ | 3898 | */ |
3918 | if (in_mem) { | 3899 | if (in_mem) { |
3919 | struct buffer_head *bitmap_bh; | 3900 | struct buffer_head *bitmap_bh; |
3920 | struct ext4_group_desc *desc; | 3901 | int i, start; |
3921 | int inodes_per_buffer; | ||
3922 | int inode_offset, i; | ||
3923 | ext4_group_t block_group; | ||
3924 | int start; | ||
3925 | |||
3926 | block_group = (inode->i_ino - 1) / | ||
3927 | EXT4_INODES_PER_GROUP(inode->i_sb); | ||
3928 | inodes_per_buffer = bh->b_size / | ||
3929 | EXT4_INODE_SIZE(inode->i_sb); | ||
3930 | inode_offset = ((inode->i_ino - 1) % | ||
3931 | EXT4_INODES_PER_GROUP(inode->i_sb)); | ||
3932 | start = inode_offset & ~(inodes_per_buffer - 1); | ||
3933 | 3902 | ||
3934 | /* Is the inode bitmap in cache? */ | 3903 | start = inode_offset & ~(inodes_per_block - 1); |
3935 | desc = ext4_get_group_desc(inode->i_sb, | ||
3936 | block_group, NULL); | ||
3937 | if (!desc) | ||
3938 | goto make_io; | ||
3939 | 3904 | ||
3940 | bitmap_bh = sb_getblk(inode->i_sb, | 3905 | /* Is the inode bitmap in cache? */ |
3941 | ext4_inode_bitmap(inode->i_sb, desc)); | 3906 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3942 | if (!bitmap_bh) | 3907 | if (!bitmap_bh) |
3943 | goto make_io; | 3908 | goto make_io; |
3944 | 3909 | ||
@@ -3951,14 +3916,14 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3951 | brelse(bitmap_bh); | 3916 | brelse(bitmap_bh); |
3952 | goto make_io; | 3917 | goto make_io; |
3953 | } | 3918 | } |
3954 | for (i = start; i < start + inodes_per_buffer; i++) { | 3919 | for (i = start; i < start + inodes_per_block; i++) { |
3955 | if (i == inode_offset) | 3920 | if (i == inode_offset) |
3956 | continue; | 3921 | continue; |
3957 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3922 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
3958 | break; | 3923 | break; |
3959 | } | 3924 | } |
3960 | brelse(bitmap_bh); | 3925 | brelse(bitmap_bh); |
3961 | if (i == start + inodes_per_buffer) { | 3926 | if (i == start + inodes_per_block) { |
3962 | /* all other inodes are free, so skip I/O */ | 3927 | /* all other inodes are free, so skip I/O */ |
3963 | memset(bh->b_data, 0, bh->b_size); | 3928 | memset(bh->b_data, 0, bh->b_size); |
3964 | set_buffer_uptodate(bh); | 3929 | set_buffer_uptodate(bh); |
@@ -3969,6 +3934,36 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3969 | 3934 | ||
3970 | make_io: | 3935 | make_io: |
3971 | /* | 3936 | /* |
3937 | * If we need to do any I/O, try to pre-readahead extra | ||
3938 | * blocks from the inode table. | ||
3939 | */ | ||
3940 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | ||
3941 | ext4_fsblk_t b, end, table; | ||
3942 | unsigned num; | ||
3943 | |||
3944 | table = ext4_inode_table(sb, gdp); | ||
3945 | /* Make sure s_inode_readahead_blks is a power of 2 */ | ||
3946 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3947 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
3948 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
3949 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3950 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
3951 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | ||
3952 | if (table > b) | ||
3953 | b = table; | ||
3954 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | ||
3955 | num = EXT4_INODES_PER_GROUP(sb); | ||
3956 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3957 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3958 | num -= le16_to_cpu(gdp->bg_itable_unused); | ||
3959 | table += num / inodes_per_block; | ||
3960 | if (end > table) | ||
3961 | end = table; | ||
3962 | while (b <= end) | ||
3963 | sb_breadahead(sb, b++); | ||
3964 | } | ||
3965 | |||
3966 | /* | ||
3972 | * There are other valid inodes in the buffer, this inode | 3967 | * There are other valid inodes in the buffer, this inode |
3973 | * has in-inode xattrs, or we don't have this inode in memory. | 3968 | * has in-inode xattrs, or we don't have this inode in memory. |
3974 | * Read the block from disk. | 3969 | * Read the block from disk. |
@@ -3978,10 +3973,9 @@ make_io: | |||
3978 | submit_bh(READ_META, bh); | 3973 | submit_bh(READ_META, bh); |
3979 | wait_on_buffer(bh); | 3974 | wait_on_buffer(bh); |
3980 | if (!buffer_uptodate(bh)) { | 3975 | if (!buffer_uptodate(bh)) { |
3981 | ext4_error(inode->i_sb, "ext4_get_inode_loc", | 3976 | ext4_error(sb, __func__, |
3982 | "unable to read inode block - " | 3977 | "unable to read inode block - inode=%lu, " |
3983 | "inode=%lu, block=%llu", | 3978 | "block=%llu", inode->i_ino, block); |
3984 | inode->i_ino, block); | ||
3985 | brelse(bh); | 3979 | brelse(bh); |
3986 | return -EIO; | 3980 | return -EIO; |
3987 | } | 3981 | } |