diff options
author | Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> | 2008-07-28 18:46:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-28 19:30:21 -0400 |
commit | 8ab22b9abb5c55413802e4adc9aa6223324547c3 (patch) | |
tree | cff3319e1275e8a7c083d492889ec6bd0c7712d3 /fs/ext4 | |
parent | d84a52f62f6a396ed77aa0052da74ca9e760b28a (diff) |
vfs: pagecache usage optimization for pagesize!=blocksize
When we read some part of a file through pagecache, if there is a
pagecache of corresponding index but this page is not uptodate, read IO
is issued and this page will be uptodate.
I think this is good for pagesize == blocksize environment but there is
room for improvement on pagesize != blocksize environment. Because in
this case a page can have multiple buffers and even if a page is not
uptodate, some buffers can be uptodate.
So I suggest that when all buffers which correspond to a part of a file
that we want to read are uptodate, use this pagecache and copy data from
this pagecache to user buffer even if a page is not uptodate. This can
reduce read IO and improve system throughput.
I wrote a benchmark program and got result number with this program.
This benchmark do:
1: mount and open a test file.
2: create a 512MB file.
3: close a file and umount.
4: mount and again open a test file.
5: pwrite randomly 300000 times on a test file. offset is aligned
by IO size(1024bytes).
6: measure time of preading randomly 100000 times on a test file.
The result was:
2.6.26
330 sec
2.6.26-patched
226 sec
Arch:i386
Filesystem:ext3
Blocksize:1024 bytes
Memory: 1GB
On ext3/4, a file is written through buffer/block. So random read/write
mixed workloads or random read after random write workloads are optimized
with this patch under pagesize != blocksize environment. This test result
showed this.
The benchmark program is as follows:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#define LEN 1024
#define LOOP 1024*512 /* 512MB */
main(void)
{
unsigned long i, offset, filesize;
int fd;
char buf[LEN];
time_t t1, t2;
if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
perror("cannot mount\n");
exit(1);
}
memset(buf, 0, LEN);
fd = open("/root/test1/testfile", O_CREAT|O_RDWR|O_TRUNC);
if (fd < 0) {
perror("cannot open file\n");
exit(1);
}
for (i = 0; i < LOOP; i++)
write(fd, buf, LEN);
close(fd);
if (umount("/root/test1/") < 0) {
perror("cannot umount\n");
exit(1);
}
if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
perror("cannot mount\n");
exit(1);
}
fd = open("/root/test1/testfile", O_RDWR);
if (fd < 0) {
perror("cannot open file\n");
exit(1);
}
filesize = LEN * LOOP;
for (i = 0; i < 300000; i++){
offset = (random() % filesize) & (~(LEN - 1));
pwrite(fd, buf, LEN, offset);
}
printf("start test\n");
time(&t1);
for (i = 0; i < 100000; i++){
offset = (random() % filesize) & (~(LEN - 1));
pread(fd, buf, LEN, offset);
}
time(&t2);
printf("%ld sec\n", t2-t1);
close(fd);
if (umount("/root/test1/") < 0) {
perror("cannot umount\n");
exit(1);
}
}
Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/inode.c | 92 |
1 files changed, 48 insertions, 44 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8ca2763df091..9843b046c235 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
2806 | } | 2806 | } |
2807 | 2807 | ||
2808 | static const struct address_space_operations ext4_ordered_aops = { | 2808 | static const struct address_space_operations ext4_ordered_aops = { |
2809 | .readpage = ext4_readpage, | 2809 | .readpage = ext4_readpage, |
2810 | .readpages = ext4_readpages, | 2810 | .readpages = ext4_readpages, |
2811 | .writepage = ext4_normal_writepage, | 2811 | .writepage = ext4_normal_writepage, |
2812 | .sync_page = block_sync_page, | 2812 | .sync_page = block_sync_page, |
2813 | .write_begin = ext4_write_begin, | 2813 | .write_begin = ext4_write_begin, |
2814 | .write_end = ext4_ordered_write_end, | 2814 | .write_end = ext4_ordered_write_end, |
2815 | .bmap = ext4_bmap, | 2815 | .bmap = ext4_bmap, |
2816 | .invalidatepage = ext4_invalidatepage, | 2816 | .invalidatepage = ext4_invalidatepage, |
2817 | .releasepage = ext4_releasepage, | 2817 | .releasepage = ext4_releasepage, |
2818 | .direct_IO = ext4_direct_IO, | 2818 | .direct_IO = ext4_direct_IO, |
2819 | .migratepage = buffer_migrate_page, | 2819 | .migratepage = buffer_migrate_page, |
2820 | .is_partially_uptodate = block_is_partially_uptodate, | ||
2820 | }; | 2821 | }; |
2821 | 2822 | ||
2822 | static const struct address_space_operations ext4_writeback_aops = { | 2823 | static const struct address_space_operations ext4_writeback_aops = { |
2823 | .readpage = ext4_readpage, | 2824 | .readpage = ext4_readpage, |
2824 | .readpages = ext4_readpages, | 2825 | .readpages = ext4_readpages, |
2825 | .writepage = ext4_normal_writepage, | 2826 | .writepage = ext4_normal_writepage, |
2826 | .sync_page = block_sync_page, | 2827 | .sync_page = block_sync_page, |
2827 | .write_begin = ext4_write_begin, | 2828 | .write_begin = ext4_write_begin, |
2828 | .write_end = ext4_writeback_write_end, | 2829 | .write_end = ext4_writeback_write_end, |
2829 | .bmap = ext4_bmap, | 2830 | .bmap = ext4_bmap, |
2830 | .invalidatepage = ext4_invalidatepage, | 2831 | .invalidatepage = ext4_invalidatepage, |
2831 | .releasepage = ext4_releasepage, | 2832 | .releasepage = ext4_releasepage, |
2832 | .direct_IO = ext4_direct_IO, | 2833 | .direct_IO = ext4_direct_IO, |
2833 | .migratepage = buffer_migrate_page, | 2834 | .migratepage = buffer_migrate_page, |
2835 | .is_partially_uptodate = block_is_partially_uptodate, | ||
2834 | }; | 2836 | }; |
2835 | 2837 | ||
2836 | static const struct address_space_operations ext4_journalled_aops = { | 2838 | static const struct address_space_operations ext4_journalled_aops = { |
2837 | .readpage = ext4_readpage, | 2839 | .readpage = ext4_readpage, |
2838 | .readpages = ext4_readpages, | 2840 | .readpages = ext4_readpages, |
2839 | .writepage = ext4_journalled_writepage, | 2841 | .writepage = ext4_journalled_writepage, |
2840 | .sync_page = block_sync_page, | 2842 | .sync_page = block_sync_page, |
2841 | .write_begin = ext4_write_begin, | 2843 | .write_begin = ext4_write_begin, |
2842 | .write_end = ext4_journalled_write_end, | 2844 | .write_end = ext4_journalled_write_end, |
2843 | .set_page_dirty = ext4_journalled_set_page_dirty, | 2845 | .set_page_dirty = ext4_journalled_set_page_dirty, |
2844 | .bmap = ext4_bmap, | 2846 | .bmap = ext4_bmap, |
2845 | .invalidatepage = ext4_invalidatepage, | 2847 | .invalidatepage = ext4_invalidatepage, |
2846 | .releasepage = ext4_releasepage, | 2848 | .releasepage = ext4_releasepage, |
2849 | .is_partially_uptodate = block_is_partially_uptodate, | ||
2847 | }; | 2850 | }; |
2848 | 2851 | ||
2849 | static const struct address_space_operations ext4_da_aops = { | 2852 | static const struct address_space_operations ext4_da_aops = { |
2850 | .readpage = ext4_readpage, | 2853 | .readpage = ext4_readpage, |
2851 | .readpages = ext4_readpages, | 2854 | .readpages = ext4_readpages, |
2852 | .writepage = ext4_da_writepage, | 2855 | .writepage = ext4_da_writepage, |
2853 | .writepages = ext4_da_writepages, | 2856 | .writepages = ext4_da_writepages, |
2854 | .sync_page = block_sync_page, | 2857 | .sync_page = block_sync_page, |
2855 | .write_begin = ext4_da_write_begin, | 2858 | .write_begin = ext4_da_write_begin, |
2856 | .write_end = ext4_da_write_end, | 2859 | .write_end = ext4_da_write_end, |
2857 | .bmap = ext4_bmap, | 2860 | .bmap = ext4_bmap, |
2858 | .invalidatepage = ext4_da_invalidatepage, | 2861 | .invalidatepage = ext4_da_invalidatepage, |
2859 | .releasepage = ext4_releasepage, | 2862 | .releasepage = ext4_releasepage, |
2860 | .direct_IO = ext4_direct_IO, | 2863 | .direct_IO = ext4_direct_IO, |
2861 | .migratepage = buffer_migrate_page, | 2864 | .migratepage = buffer_migrate_page, |
2865 | .is_partially_uptodate = block_is_partially_uptodate, | ||
2862 | }; | 2866 | }; |
2863 | 2867 | ||
2864 | void ext4_set_aops(struct inode *inode) | 2868 | void ext4_set_aops(struct inode *inode) |