aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2008-10-09 23:53:47 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-10-09 23:53:47 -0400
commit240799cdf22bd789ea6852653c3b879d35ad0a6c (patch)
treee696b60cc103f23838b5c14d8d397f692abffbc3 /fs/ext4
parent37515facd001942221d68171c81c1f46d54ffdd0 (diff)
ext4: Use readahead when reading an inode from the inode table
With modern hard drives, reading 64k takes roughly the same time as reading a 4k block. So request readahead for adjacent inode table blocks to reduce the time it takes when iterating over directories (especially when doing this in htree sort order) in a cold cache case. With this patch, the time it takes to run "git status" on a kernel tree after flushing the caches via "echo 3 > /proc/sys/vm/drop_caches" is reduced by 21%. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ext4_sb.h1
-rw-r--r--fs/ext4/inode.c134
-rw-r--r--fs/ext4/super.c27
4 files changed, 92 insertions, 72 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 163c44527dde..922d18720c9e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -790,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
790#define EXT4_DEF_RESUID 0 790#define EXT4_DEF_RESUID 0
791#define EXT4_DEF_RESGID 0 791#define EXT4_DEF_RESGID 0
792 792
793#define EXT4_DEF_INODE_READAHEAD_BLKS 32
794
793/* 795/*
794 * Default mount options 796 * Default mount options
795 */ 797 */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index f92af01138d4..94e0757522a6 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -52,6 +52,7 @@ struct ext4_sb_info {
52 int s_desc_per_block_bits; 52 int s_desc_per_block_bits;
53 int s_inode_size; 53 int s_inode_size;
54 int s_first_ino; 54 int s_first_ino;
55 unsigned int s_inode_readahead_blks;
55 spinlock_t s_next_gen_lock; 56 spinlock_t s_next_gen_lock;
56 u32 s_next_generation; 57 u32 s_next_generation;
57 u32 s_hash_seed[4]; 58 u32 s_hash_seed[4];
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 22fcbb67cd88..ef4ca3d4abc0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3833,41 +3833,6 @@ out_stop:
3833 ext4_journal_stop(handle); 3833 ext4_journal_stop(handle);
3834} 3834}
3835 3835
3836static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
3837 unsigned long ino, struct ext4_iloc *iloc)
3838{
3839 ext4_group_t block_group;
3840 unsigned long offset;
3841 ext4_fsblk_t block;
3842 struct ext4_group_desc *gdp;
3843
3844 if (!ext4_valid_inum(sb, ino)) {
3845 /*
3846 * This error is already checked for in namei.c unless we are
3847 * looking at an NFS filehandle, in which case no error
3848 * report is needed
3849 */
3850 return 0;
3851 }
3852
3853 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
3854 gdp = ext4_get_group_desc(sb, block_group, NULL);
3855 if (!gdp)
3856 return 0;
3857
3858 /*
3859 * Figure out the offset within the block group inode table
3860 */
3861 offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
3862 EXT4_INODE_SIZE(sb);
3863 block = ext4_inode_table(sb, gdp) +
3864 (offset >> EXT4_BLOCK_SIZE_BITS(sb));
3865
3866 iloc->block_group = block_group;
3867 iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
3868 return block;
3869}
3870
3871/* 3836/*
3872 * ext4_get_inode_loc returns with an extra refcount against the inode's 3837 * ext4_get_inode_loc returns with an extra refcount against the inode's
3873 * underlying buffer_head on success. If 'in_mem' is true, we have all 3838 * underlying buffer_head on success. If 'in_mem' is true, we have all
@@ -3877,19 +3842,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
3877static int __ext4_get_inode_loc(struct inode *inode, 3842static int __ext4_get_inode_loc(struct inode *inode,
3878 struct ext4_iloc *iloc, int in_mem) 3843 struct ext4_iloc *iloc, int in_mem)
3879{ 3844{
3880 ext4_fsblk_t block; 3845 struct ext4_group_desc *gdp;
3881 struct buffer_head *bh; 3846 struct buffer_head *bh;
3847 struct super_block *sb = inode->i_sb;
3848 ext4_fsblk_t block;
3849 int inodes_per_block, inode_offset;
3850
3851 iloc->bh = 0;
3852 if (!ext4_valid_inum(sb, inode->i_ino))
3853 return -EIO;
3882 3854
3883 block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); 3855 iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
3884 if (!block) 3856 gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
3857 if (!gdp)
3885 return -EIO; 3858 return -EIO;
3886 3859
3887 bh = sb_getblk(inode->i_sb, block); 3860 /*
3861 * Figure out the offset within the block group inode table
3862 */
3863 inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
3864 inode_offset = ((inode->i_ino - 1) %
3865 EXT4_INODES_PER_GROUP(sb));
3866 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
3867 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3868
3869 bh = sb_getblk(sb, block);
3888 if (!bh) { 3870 if (!bh) {
3889 ext4_error (inode->i_sb, "ext4_get_inode_loc", 3871 ext4_error(sb, "ext4_get_inode_loc", "unable to read "
3890 "unable to read inode block - " 3872 "inode block - inode=%lu, block=%llu",
3891 "inode=%lu, block=%llu", 3873 inode->i_ino, block);
3892 inode->i_ino, block);
3893 return -EIO; 3874 return -EIO;
3894 } 3875 }
3895 if (!buffer_uptodate(bh)) { 3876 if (!buffer_uptodate(bh)) {
@@ -3917,28 +3898,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
3917 */ 3898 */
3918 if (in_mem) { 3899 if (in_mem) {
3919 struct buffer_head *bitmap_bh; 3900 struct buffer_head *bitmap_bh;
3920 struct ext4_group_desc *desc; 3901 int i, start;
3921 int inodes_per_buffer;
3922 int inode_offset, i;
3923 ext4_group_t block_group;
3924 int start;
3925
3926 block_group = (inode->i_ino - 1) /
3927 EXT4_INODES_PER_GROUP(inode->i_sb);
3928 inodes_per_buffer = bh->b_size /
3929 EXT4_INODE_SIZE(inode->i_sb);
3930 inode_offset = ((inode->i_ino - 1) %
3931 EXT4_INODES_PER_GROUP(inode->i_sb));
3932 start = inode_offset & ~(inodes_per_buffer - 1);
3933 3902
3934 /* Is the inode bitmap in cache? */ 3903 start = inode_offset & ~(inodes_per_block - 1);
3935 desc = ext4_get_group_desc(inode->i_sb,
3936 block_group, NULL);
3937 if (!desc)
3938 goto make_io;
3939 3904
3940 bitmap_bh = sb_getblk(inode->i_sb, 3905 /* Is the inode bitmap in cache? */
3941 ext4_inode_bitmap(inode->i_sb, desc)); 3906 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3942 if (!bitmap_bh) 3907 if (!bitmap_bh)
3943 goto make_io; 3908 goto make_io;
3944 3909
@@ -3951,14 +3916,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
3951 brelse(bitmap_bh); 3916 brelse(bitmap_bh);
3952 goto make_io; 3917 goto make_io;
3953 } 3918 }
3954 for (i = start; i < start + inodes_per_buffer; i++) { 3919 for (i = start; i < start + inodes_per_block; i++) {
3955 if (i == inode_offset) 3920 if (i == inode_offset)
3956 continue; 3921 continue;
3957 if (ext4_test_bit(i, bitmap_bh->b_data)) 3922 if (ext4_test_bit(i, bitmap_bh->b_data))
3958 break; 3923 break;
3959 } 3924 }
3960 brelse(bitmap_bh); 3925 brelse(bitmap_bh);
3961 if (i == start + inodes_per_buffer) { 3926 if (i == start + inodes_per_block) {
3962 /* all other inodes are free, so skip I/O */ 3927 /* all other inodes are free, so skip I/O */
3963 memset(bh->b_data, 0, bh->b_size); 3928 memset(bh->b_data, 0, bh->b_size);
3964 set_buffer_uptodate(bh); 3929 set_buffer_uptodate(bh);
@@ -3969,6 +3934,36 @@ static int __ext4_get_inode_loc(struct inode *inode,
3969 3934
3970make_io: 3935make_io:
3971 /* 3936 /*
3937 * If we need to do any I/O, try to pre-readahead extra
3938 * blocks from the inode table.
3939 */
3940 if (EXT4_SB(sb)->s_inode_readahead_blks) {
3941 ext4_fsblk_t b, end, table;
3942 unsigned num;
3943
3944 table = ext4_inode_table(sb, gdp);
3945 /* Make sure s_inode_readahead_blks is a power of 2 */
3946 while (EXT4_SB(sb)->s_inode_readahead_blks &
3947 (EXT4_SB(sb)->s_inode_readahead_blks-1))
3948 EXT4_SB(sb)->s_inode_readahead_blks =
3949 (EXT4_SB(sb)->s_inode_readahead_blks &
3950 (EXT4_SB(sb)->s_inode_readahead_blks-1));
3951 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
3952 if (table > b)
3953 b = table;
3954 end = b + EXT4_SB(sb)->s_inode_readahead_blks;
3955 num = EXT4_INODES_PER_GROUP(sb);
3956 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3957 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3958 num -= le16_to_cpu(gdp->bg_itable_unused);
3959 table += num / inodes_per_block;
3960 if (end > table)
3961 end = table;
3962 while (b <= end)
3963 sb_breadahead(sb, b++);
3964 }
3965
3966 /*
3972 * There are other valid inodes in the buffer, this inode 3967 * There are other valid inodes in the buffer, this inode
3973 * has in-inode xattrs, or we don't have this inode in memory. 3968 * has in-inode xattrs, or we don't have this inode in memory.
3974 * Read the block from disk. 3969 * Read the block from disk.
@@ -3978,10 +3973,9 @@ make_io:
3978 submit_bh(READ_META, bh); 3973 submit_bh(READ_META, bh);
3979 wait_on_buffer(bh); 3974 wait_on_buffer(bh);
3980 if (!buffer_uptodate(bh)) { 3975 if (!buffer_uptodate(bh)) {
3981 ext4_error(inode->i_sb, "ext4_get_inode_loc", 3976 ext4_error(sb, __func__,
3982 "unable to read inode block - " 3977 "unable to read inode block - inode=%lu, "
3983 "inode=%lu, block=%llu", 3978 "block=%llu", inode->i_ino, block);
3984 inode->i_ino, block);
3985 brelse(bh); 3979 brelse(bh);
3986 return -EIO; 3980 return -EIO;
3987 } 3981 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9f5468fb06da..6583aee5177f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -515,8 +515,10 @@ static void ext4_put_super(struct super_block *sb)
515 mark_buffer_dirty(sbi->s_sbh); 515 mark_buffer_dirty(sbi->s_sbh);
516 ext4_commit_super(sb, es, 1); 516 ext4_commit_super(sb, es, 1);
517 } 517 }
518 if (sbi->s_proc) 518 if (sbi->s_proc) {
519 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
519 remove_proc_entry(sb->s_id, ext4_proc_root); 520 remove_proc_entry(sb->s_id, ext4_proc_root);
521 }
520 522
521 for (i = 0; i < sbi->s_gdb_count; i++) 523 for (i = 0; i < sbi->s_gdb_count; i++)
522 brelse(sbi->s_group_desc[i]); 524 brelse(sbi->s_group_desc[i]);
@@ -779,6 +781,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
779 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 781 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
780 seq_puts(seq, ",data=writeback"); 782 seq_puts(seq, ",data=writeback");
781 783
784 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
785 seq_printf(seq, ",inode_readahead_blks=%u",
786 sbi->s_inode_readahead_blks);
787
782 ext4_show_quota_options(seq, sb); 788 ext4_show_quota_options(seq, sb);
783 return 0; 789 return 0;
784} 790}
@@ -913,6 +919,7 @@ enum {
913 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 919 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
914 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 920 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
915 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, 921 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
922 Opt_inode_readahead_blks
916}; 923};
917 924
918static match_table_t tokens = { 925static match_table_t tokens = {
@@ -973,6 +980,7 @@ static match_table_t tokens = {
973 {Opt_resize, "resize"}, 980 {Opt_resize, "resize"},
974 {Opt_delalloc, "delalloc"}, 981 {Opt_delalloc, "delalloc"},
975 {Opt_nodelalloc, "nodelalloc"}, 982 {Opt_nodelalloc, "nodelalloc"},
983 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
976 {Opt_err, NULL}, 984 {Opt_err, NULL},
977}; 985};
978 986
@@ -1381,6 +1389,13 @@ set_qf_format:
1381 case Opt_delalloc: 1389 case Opt_delalloc:
1382 set_opt(sbi->s_mount_opt, DELALLOC); 1390 set_opt(sbi->s_mount_opt, DELALLOC);
1383 break; 1391 break;
1392 case Opt_inode_readahead_blks:
1393 if (match_int(&args[0], &option))
1394 return 0;
1395 if (option < 0 || option > (1 << 30))
1396 return 0;
1397 sbi->s_inode_readahead_blks = option;
1398 break;
1384 default: 1399 default:
1385 printk(KERN_ERR 1400 printk(KERN_ERR
1386 "EXT4-fs: Unrecognized mount option \"%s\" " 1401 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1938,6 +1953,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1938 sbi->s_mount_opt = 0; 1953 sbi->s_mount_opt = 0;
1939 sbi->s_resuid = EXT4_DEF_RESUID; 1954 sbi->s_resuid = EXT4_DEF_RESUID;
1940 sbi->s_resgid = EXT4_DEF_RESGID; 1955 sbi->s_resgid = EXT4_DEF_RESGID;
1956 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
1941 sbi->s_sb_block = sb_block; 1957 sbi->s_sb_block = sb_block;
1942 1958
1943 unlock_kernel(); 1959 unlock_kernel();
@@ -2234,6 +2250,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2234 if (ext4_proc_root) 2250 if (ext4_proc_root)
2235 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2251 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2236 2252
2253 if (sbi->s_proc)
2254 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2255 &ext4_ui_proc_fops,
2256 &sbi->s_inode_readahead_blks);
2257
2237 bgl_lock_init(&sbi->s_blockgroup_lock); 2258 bgl_lock_init(&sbi->s_blockgroup_lock);
2238 2259
2239 for (i = 0; i < db_count; i++) { 2260 for (i = 0; i < db_count; i++) {
@@ -2513,8 +2534,10 @@ failed_mount2:
2513 brelse(sbi->s_group_desc[i]); 2534 brelse(sbi->s_group_desc[i]);
2514 kfree(sbi->s_group_desc); 2535 kfree(sbi->s_group_desc);
2515failed_mount: 2536failed_mount:
2516 if (sbi->s_proc) 2537 if (sbi->s_proc) {
2538 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2517 remove_proc_entry(sb->s_id, ext4_proc_root); 2539 remove_proc_entry(sb->s_id, ext4_proc_root);
2540 }
2518#ifdef CONFIG_QUOTA 2541#ifdef CONFIG_QUOTA
2519 for (i = 0; i < MAXQUOTAS; i++) 2542 for (i = 0; i < MAXQUOTAS; i++)
2520 kfree(sbi->s_qf_names[i]); 2543 kfree(sbi->s_qf_names[i]);