aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2006-03-23 06:00:11 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-23 10:38:09 -0500
commitd8733c2956968a01394a4d2a9e97a8b431a78776 (patch)
tree9743c9020eb5193efa4a0f102b3a7eb1d999c4fd
parentb8e31edc10d160a8bf2159541d9d12f2079a0887 (diff)
[PATCH] ext3_readdir: use generic readahead
Linus points out that ext3_readdir's readahead only cuts in when ext3_readdir() is operating at the very start of the directory. So for large directories we end up performing no readahead at all and we suck. So take it all out and use the core VM's page_cache_readahead(). This means that ext3 directory reads will use all of readahead's dynamic sizing goop. Note that we're using the directory's filp->f_ra to hold the readahead state, but readahead is actually being performed against the underlying blockdev's address_space. Fortunately the readahead code is all set up to handle this. Tested with printk. It works. I was struggling to find a real workload which actually cared. (The patch also exports page_cache_readahead() to GPL modules) Cc: "Stephen C. Tweedie" <sct@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/ext3/dir.c52
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--include/linux/ext3_fs.h9
-rw-r--r--mm/readahead.c1
4 files changed, 32 insertions, 32 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 832867aef3dc..773459164bb2 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -95,11 +95,10 @@ static int ext3_readdir(struct file * filp,
95 void * dirent, filldir_t filldir) 95 void * dirent, filldir_t filldir)
96{ 96{
97 int error = 0; 97 int error = 0;
98 unsigned long offset, blk; 98 unsigned long offset;
99 int i, num, stored; 99 int i, stored;
100 struct buffer_head * bh, * tmp, * bha[16]; 100 struct ext3_dir_entry_2 *de;
101 struct ext3_dir_entry_2 * de; 101 struct super_block *sb;
102 struct super_block * sb;
103 int err; 102 int err;
104 struct inode *inode = filp->f_dentry->d_inode; 103 struct inode *inode = filp->f_dentry->d_inode;
105 int ret = 0; 104 int ret = 0;
@@ -124,12 +123,29 @@ static int ext3_readdir(struct file * filp,
124 } 123 }
125#endif 124#endif
126 stored = 0; 125 stored = 0;
127 bh = NULL;
128 offset = filp->f_pos & (sb->s_blocksize - 1); 126 offset = filp->f_pos & (sb->s_blocksize - 1);
129 127
130 while (!error && !stored && filp->f_pos < inode->i_size) { 128 while (!error && !stored && filp->f_pos < inode->i_size) {
131 blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb); 129 unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
132 bh = ext3_bread(NULL, inode, blk, 0, &err); 130 struct buffer_head map_bh;
131 struct buffer_head *bh = NULL;
132
133 map_bh.b_state = 0;
134 err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0);
135 if (!err) {
136 page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
137 &filp->f_ra,
138 filp,
139 map_bh.b_blocknr >>
140 (PAGE_CACHE_SHIFT - inode->i_blkbits),
141 1);
142 bh = ext3_bread(NULL, inode, blk, 0, &err);
143 }
144
145 /*
146 * We ignore I/O errors on directories so users have a chance
147 * of recovering data when there's a bad sector
148 */
133 if (!bh) { 149 if (!bh) {
134 ext3_error (sb, "ext3_readdir", 150 ext3_error (sb, "ext3_readdir",
135 "directory #%lu contains a hole at offset %lu", 151 "directory #%lu contains a hole at offset %lu",
@@ -138,26 +154,6 @@ static int ext3_readdir(struct file * filp,
138 continue; 154 continue;
139 } 155 }
140 156
141 /*
142 * Do the readahead
143 */
144 if (!offset) {
145 for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0;
146 i > 0; i--) {
147 tmp = ext3_getblk (NULL, inode, ++blk, 0, &err);
148 if (tmp && !buffer_uptodate(tmp) &&
149 !buffer_locked(tmp))
150 bha[num++] = tmp;
151 else
152 brelse (tmp);
153 }
154 if (num) {
155 ll_rw_block (READA, num, bha);
156 for (i = 0; i < num; i++)
157 brelse (bha[i]);
158 }
159 }
160
161revalidate: 157revalidate:
162 /* If the dir block has changed since the last call to 158 /* If the dir block has changed since the last call to
163 * readdir(2), then we might be pointing to an invalid 159 * readdir(2), then we might be pointing to an invalid
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0384e539b88f..d59d5a667b0b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -671,7 +671,7 @@ err_out:
671 * The BKL may not be held on entry here. Be sure to take it early. 671 * The BKL may not be held on entry here. Be sure to take it early.
672 */ 672 */
673 673
674static int 674int
675ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, 675ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
676 struct buffer_head *bh_result, int create, int extend_disksize) 676 struct buffer_head *bh_result, int create, int extend_disksize)
677{ 677{
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index c0272d73ab20..e7239f2f97a1 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -772,9 +772,12 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
772 772
773 773
774/* inode.c */ 774/* inode.c */
775extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); 775int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
776extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); 776struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
777extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); 777struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
778int ext3_get_block_handle(handle_t *handle, struct inode *inode,
779 sector_t iblock, struct buffer_head *bh_result, int create,
780 int extend_disksize);
778 781
779extern void ext3_read_inode (struct inode *); 782extern void ext3_read_inode (struct inode *);
780extern int ext3_write_inode (struct inode *, int); 783extern int ext3_write_inode (struct inode *, int);
diff --git a/mm/readahead.c b/mm/readahead.c
index 301b36c4a0ce..0f142a40984b 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -555,6 +555,7 @@ recheck:
555out: 555out:
556 return ra->prev_page + 1; 556 return ra->prev_page + 1;
557} 557}
558EXPORT_SYMBOL_GPL(page_cache_readahead);
558 559
559/* 560/*
560 * handle_ra_miss() is called when it is known that a page which should have 561 * handle_ra_miss() is called when it is known that a page which should have