diff options
author | Andrew Morton <akpm@osdl.org> | 2006-03-23 06:00:11 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-23 10:38:09 -0500 |
commit | d8733c2956968a01394a4d2a9e97a8b431a78776 (patch) | |
tree | 9743c9020eb5193efa4a0f102b3a7eb1d999c4fd | |
parent | b8e31edc10d160a8bf2159541d9d12f2079a0887 (diff) |
[PATCH] ext3_readdir: use generic readahead
Linus points out that ext3_readdir's readahead only cuts in when
ext3_readdir() is operating at the very start of the directory. So for large
directories we end up performing no readahead at all and we suck.
So take it all out and use the core VM's page_cache_readahead(). This means
that ext3 directory reads will use all of readahead's dynamic sizing goop.
Note that we're using the directory's filp->f_ra to hold the readahead state,
but readahead is actually being performed against the underlying blockdev's
address_space. Fortunately the readahead code is all set up to handle this.
Tested with printk. It works. I was struggling to find a real workload which
actually cared.
(The patch also exports page_cache_readahead() to GPL modules)
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/ext3/dir.c | 52 | ||||
-rw-r--r-- | fs/ext3/inode.c | 2 | ||||
-rw-r--r-- | include/linux/ext3_fs.h | 9 | ||||
-rw-r--r-- | mm/readahead.c | 1 |
4 files changed, 32 insertions, 32 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 832867aef3dc..773459164bb2 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -95,11 +95,10 @@ static int ext3_readdir(struct file * filp, | |||
95 | void * dirent, filldir_t filldir) | 95 | void * dirent, filldir_t filldir) |
96 | { | 96 | { |
97 | int error = 0; | 97 | int error = 0; |
98 | unsigned long offset, blk; | 98 | unsigned long offset; |
99 | int i, num, stored; | 99 | int i, stored; |
100 | struct buffer_head * bh, * tmp, * bha[16]; | 100 | struct ext3_dir_entry_2 *de; |
101 | struct ext3_dir_entry_2 * de; | 101 | struct super_block *sb; |
102 | struct super_block * sb; | ||
103 | int err; | 102 | int err; |
104 | struct inode *inode = filp->f_dentry->d_inode; | 103 | struct inode *inode = filp->f_dentry->d_inode; |
105 | int ret = 0; | 104 | int ret = 0; |
@@ -124,12 +123,29 @@ static int ext3_readdir(struct file * filp, | |||
124 | } | 123 | } |
125 | #endif | 124 | #endif |
126 | stored = 0; | 125 | stored = 0; |
127 | bh = NULL; | ||
128 | offset = filp->f_pos & (sb->s_blocksize - 1); | 126 | offset = filp->f_pos & (sb->s_blocksize - 1); |
129 | 127 | ||
130 | while (!error && !stored && filp->f_pos < inode->i_size) { | 128 | while (!error && !stored && filp->f_pos < inode->i_size) { |
131 | blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb); | 129 | unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb); |
132 | bh = ext3_bread(NULL, inode, blk, 0, &err); | 130 | struct buffer_head map_bh; |
131 | struct buffer_head *bh = NULL; | ||
132 | |||
133 | map_bh.b_state = 0; | ||
134 | err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0); | ||
135 | if (!err) { | ||
136 | page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, | ||
137 | &filp->f_ra, | ||
138 | filp, | ||
139 | map_bh.b_blocknr >> | ||
140 | (PAGE_CACHE_SHIFT - inode->i_blkbits), | ||
141 | 1); | ||
142 | bh = ext3_bread(NULL, inode, blk, 0, &err); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * We ignore I/O errors on directories so users have a chance | ||
147 | * of recovering data when there's a bad sector | ||
148 | */ | ||
133 | if (!bh) { | 149 | if (!bh) { |
134 | ext3_error (sb, "ext3_readdir", | 150 | ext3_error (sb, "ext3_readdir", |
135 | "directory #%lu contains a hole at offset %lu", | 151 | "directory #%lu contains a hole at offset %lu", |
@@ -138,26 +154,6 @@ static int ext3_readdir(struct file * filp, | |||
138 | continue; | 154 | continue; |
139 | } | 155 | } |
140 | 156 | ||
141 | /* | ||
142 | * Do the readahead | ||
143 | */ | ||
144 | if (!offset) { | ||
145 | for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0; | ||
146 | i > 0; i--) { | ||
147 | tmp = ext3_getblk (NULL, inode, ++blk, 0, &err); | ||
148 | if (tmp && !buffer_uptodate(tmp) && | ||
149 | !buffer_locked(tmp)) | ||
150 | bha[num++] = tmp; | ||
151 | else | ||
152 | brelse (tmp); | ||
153 | } | ||
154 | if (num) { | ||
155 | ll_rw_block (READA, num, bha); | ||
156 | for (i = 0; i < num; i++) | ||
157 | brelse (bha[i]); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | revalidate: | 157 | revalidate: |
162 | /* If the dir block has changed since the last call to | 158 | /* If the dir block has changed since the last call to |
163 | * readdir(2), then we might be pointing to an invalid | 159 | * readdir(2), then we might be pointing to an invalid |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 0384e539b88f..d59d5a667b0b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -671,7 +671,7 @@ err_out: | |||
671 | * The BKL may not be held on entry here. Be sure to take it early. | 671 | * The BKL may not be held on entry here. Be sure to take it early. |
672 | */ | 672 | */ |
673 | 673 | ||
674 | static int | 674 | int |
675 | ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, | 675 | ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, |
676 | struct buffer_head *bh_result, int create, int extend_disksize) | 676 | struct buffer_head *bh_result, int create, int extend_disksize) |
677 | { | 677 | { |
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index c0272d73ab20..e7239f2f97a1 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h | |||
@@ -772,9 +772,12 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned); | |||
772 | 772 | ||
773 | 773 | ||
774 | /* inode.c */ | 774 | /* inode.c */ |
775 | extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); | 775 | int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); |
776 | extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); | 776 | struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); |
777 | extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); | 777 | struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); |
778 | int ext3_get_block_handle(handle_t *handle, struct inode *inode, | ||
779 | sector_t iblock, struct buffer_head *bh_result, int create, | ||
780 | int extend_disksize); | ||
778 | 781 | ||
779 | extern void ext3_read_inode (struct inode *); | 782 | extern void ext3_read_inode (struct inode *); |
780 | extern int ext3_write_inode (struct inode *, int); | 783 | extern int ext3_write_inode (struct inode *, int); |
diff --git a/mm/readahead.c b/mm/readahead.c index 301b36c4a0ce..0f142a40984b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -555,6 +555,7 @@ recheck: | |||
555 | out: | 555 | out: |
556 | return ra->prev_page + 1; | 556 | return ra->prev_page + 1; |
557 | } | 557 | } |
558 | EXPORT_SYMBOL_GPL(page_cache_readahead); | ||
558 | 559 | ||
559 | /* | 560 | /* |
560 | * handle_ra_miss() is called when it is known that a page which should have | 561 | * handle_ra_miss() is called when it is known that a page which should have |