diff options
author | Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> | 2008-07-28 18:46:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-28 19:30:21 -0400 |
commit | 8ab22b9abb5c55413802e4adc9aa6223324547c3 (patch) | |
tree | cff3319e1275e8a7c083d492889ec6bd0c7712d3 /fs/ext3 | |
parent | d84a52f62f6a396ed77aa0052da74ca9e760b28a (diff) |
vfs: pagecache usage optimization for pagesize!=blocksize
When we read some part of a file through pagecache, if there is a
pagecache of corresponding index but this page is not uptodate, read IO
is issued and this page will be uptodate.
I think this is good for pagesize == blocksize environment but there is
room for improvement on pagesize != blocksize environment. Because in
this case a page can have multiple buffers and even if a page is not
uptodate, some buffers can be uptodate.
So I suggest that when all buffers which correspond to a part of a file
that we want to read are uptodate, use this pagecache and copy data from
this pagecache to user buffer even if a page is not uptodate. This can
reduce read IO and improve system throughput.
I wrote a benchmark program and got result number with this program.
This benchmark do:
1: mount and open a test file.
2: create a 512MB file.
3: close a file and umount.
4: mount and again open a test file.
5: pwrite randomly 300000 times on a test file. offset is aligned
by IO size(1024bytes).
6: measure time of preading randomly 100000 times on a test file.
The result was:
2.6.26
330 sec
2.6.26-patched
226 sec
Arch:i386
Filesystem:ext3
Blocksize:1024 bytes
Memory: 1GB
On ext3/4, a file is written through buffer/block. So random read/write
mixed workloads or random read after random write workloads are optimized
with this patch under pagesize != blocksize environment. This test result
showed this.
The benchmark program is as follows:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#define LEN 1024
#define LOOP 1024*512 /* 512MB */
main(void)
{
unsigned long i, offset, filesize;
int fd;
char buf[LEN];
time_t t1, t2;
if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
perror("cannot mount\n");
exit(1);
}
memset(buf, 0, LEN);
fd = open("/root/test1/testfile", O_CREAT|O_RDWR|O_TRUNC);
if (fd < 0) {
perror("cannot open file\n");
exit(1);
}
for (i = 0; i < LOOP; i++)
write(fd, buf, LEN);
close(fd);
if (umount("/root/test1/") < 0) {
perror("cannot umount\n");
exit(1);
}
if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
perror("cannot mount\n");
exit(1);
}
fd = open("/root/test1/testfile", O_RDWR);
if (fd < 0) {
perror("cannot open file\n");
exit(1);
}
filesize = LEN * LOOP;
for (i = 0; i < 300000; i++){
offset = (random() % filesize) & (~(LEN - 1));
pwrite(fd, buf, LEN, offset);
}
printf("start test\n");
time(&t1);
for (i = 0; i < 100000; i++){
offset = (random() % filesize) & (~(LEN - 1));
pread(fd, buf, LEN, offset);
}
time(&t2);
printf("%ld sec\n", t2-t1);
close(fd);
if (umount("/root/test1/") < 0) {
perror("cannot umount\n");
exit(1);
}
}
Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ext3')
-rw-r--r-- | fs/ext3/inode.c | 67 |
1 files changed, 35 insertions, 32 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 3bf07d70b914..507d8689b111 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page) | |||
1767 | } | 1767 | } |
1768 | 1768 | ||
1769 | static const struct address_space_operations ext3_ordered_aops = { | 1769 | static const struct address_space_operations ext3_ordered_aops = { |
1770 | .readpage = ext3_readpage, | 1770 | .readpage = ext3_readpage, |
1771 | .readpages = ext3_readpages, | 1771 | .readpages = ext3_readpages, |
1772 | .writepage = ext3_ordered_writepage, | 1772 | .writepage = ext3_ordered_writepage, |
1773 | .sync_page = block_sync_page, | 1773 | .sync_page = block_sync_page, |
1774 | .write_begin = ext3_write_begin, | 1774 | .write_begin = ext3_write_begin, |
1775 | .write_end = ext3_ordered_write_end, | 1775 | .write_end = ext3_ordered_write_end, |
1776 | .bmap = ext3_bmap, | 1776 | .bmap = ext3_bmap, |
1777 | .invalidatepage = ext3_invalidatepage, | 1777 | .invalidatepage = ext3_invalidatepage, |
1778 | .releasepage = ext3_releasepage, | 1778 | .releasepage = ext3_releasepage, |
1779 | .direct_IO = ext3_direct_IO, | 1779 | .direct_IO = ext3_direct_IO, |
1780 | .migratepage = buffer_migrate_page, | 1780 | .migratepage = buffer_migrate_page, |
1781 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1781 | }; | 1782 | }; |
1782 | 1783 | ||
1783 | static const struct address_space_operations ext3_writeback_aops = { | 1784 | static const struct address_space_operations ext3_writeback_aops = { |
1784 | .readpage = ext3_readpage, | 1785 | .readpage = ext3_readpage, |
1785 | .readpages = ext3_readpages, | 1786 | .readpages = ext3_readpages, |
1786 | .writepage = ext3_writeback_writepage, | 1787 | .writepage = ext3_writeback_writepage, |
1787 | .sync_page = block_sync_page, | 1788 | .sync_page = block_sync_page, |
1788 | .write_begin = ext3_write_begin, | 1789 | .write_begin = ext3_write_begin, |
1789 | .write_end = ext3_writeback_write_end, | 1790 | .write_end = ext3_writeback_write_end, |
1790 | .bmap = ext3_bmap, | 1791 | .bmap = ext3_bmap, |
1791 | .invalidatepage = ext3_invalidatepage, | 1792 | .invalidatepage = ext3_invalidatepage, |
1792 | .releasepage = ext3_releasepage, | 1793 | .releasepage = ext3_releasepage, |
1793 | .direct_IO = ext3_direct_IO, | 1794 | .direct_IO = ext3_direct_IO, |
1794 | .migratepage = buffer_migrate_page, | 1795 | .migratepage = buffer_migrate_page, |
1796 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1795 | }; | 1797 | }; |
1796 | 1798 | ||
1797 | static const struct address_space_operations ext3_journalled_aops = { | 1799 | static const struct address_space_operations ext3_journalled_aops = { |
1798 | .readpage = ext3_readpage, | 1800 | .readpage = ext3_readpage, |
1799 | .readpages = ext3_readpages, | 1801 | .readpages = ext3_readpages, |
1800 | .writepage = ext3_journalled_writepage, | 1802 | .writepage = ext3_journalled_writepage, |
1801 | .sync_page = block_sync_page, | 1803 | .sync_page = block_sync_page, |
1802 | .write_begin = ext3_write_begin, | 1804 | .write_begin = ext3_write_begin, |
1803 | .write_end = ext3_journalled_write_end, | 1805 | .write_end = ext3_journalled_write_end, |
1804 | .set_page_dirty = ext3_journalled_set_page_dirty, | 1806 | .set_page_dirty = ext3_journalled_set_page_dirty, |
1805 | .bmap = ext3_bmap, | 1807 | .bmap = ext3_bmap, |
1806 | .invalidatepage = ext3_invalidatepage, | 1808 | .invalidatepage = ext3_invalidatepage, |
1807 | .releasepage = ext3_releasepage, | 1809 | .releasepage = ext3_releasepage, |
1810 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1808 | }; | 1811 | }; |
1809 | 1812 | ||
1810 | void ext3_set_aops(struct inode *inode) | 1813 | void ext3_set_aops(struct inode *inode) |