aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLiu Bo <liubo2009@cn.fujitsu.com>2012-07-20 23:43:09 -0400
committerChris Mason <chris.mason@fusionio.com>2012-07-23 16:28:10 -0400
commit67c9684f48ea9cbc5e9b8a1feb3151800e9dcc22 (patch)
tree737434c4bc4654ea00b284c383f0acd60ddec725
parentdf57dbe6bf73cc44305d81c24982a11da49b1f79 (diff)
Btrfs: improve multi-thread buffer read
While testing with my buffer read fio jobs[1], I find that btrfs does not perform well enough. Here is a scenario in fio jobs: We have 4 threads, "t1 t2 t3 t4", starting to buffer read a same file, and all of them will race on add_to_page_cache_lru(), and if one thread successfully puts its page into the page cache, it takes the responsibility to read the page's data. And what's more, reading a page needs a period of time to finish, in which other threads can slide in and process rest pages: t1 t2 t3 t4 add Page1 read Page1 add Page2 | read Page2 add Page3 | | read Page3 add Page4 | | | read Page4 -----|------------|-----------|-----------|-------- v v v v bio bio bio bio Now we have four bios, each of which holds only one page since we need to maintain consecutive pages in bio. Thus, we can end up with far more bios than we need. Here we're going to a) delay the real read-page section and b) try to put more pages into page cache. With that said, we can make each bio hold more pages and reduce the number of bios we need. Here is some numbers taken from fio results: w/o patch w patch ------------- -------- --------------- READ: 745MB/s +25% 934MB/s [1]: [global] group_reporting thread numjobs=4 bs=32k rw=read ioengine=sync directory=/mnt/btrfs/ [READ] filename=foobar size=2000M invalidate=1 Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
-rw-r--r--fs/btrfs/extent_io.c29
1 files changed, 24 insertions, 5 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 97efc2f22597..3e7c9ed6505b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3566,19 +3566,38 @@ int extent_readpages(struct extent_io_tree *tree,
3566 struct bio *bio = NULL; 3566 struct bio *bio = NULL;
3567 unsigned page_idx; 3567 unsigned page_idx;
3568 unsigned long bio_flags = 0; 3568 unsigned long bio_flags = 0;
3569 struct page *pagepool[16];
3570 struct page *page;
3571 int i = 0;
3572 int nr = 0;
3569 3573
3570 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 3574 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
3571 struct page *page = list_entry(pages->prev, struct page, lru); 3575 page = list_entry(pages->prev, struct page, lru);
3572 3576
3573 prefetchw(&page->flags); 3577 prefetchw(&page->flags);
3574 list_del(&page->lru); 3578 list_del(&page->lru);
3575 if (!add_to_page_cache_lru(page, mapping, 3579 if (add_to_page_cache_lru(page, mapping,
3576 page->index, GFP_NOFS)) { 3580 page->index, GFP_NOFS)) {
3577 __extent_read_full_page(tree, page, get_extent, 3581 page_cache_release(page);
3578 &bio, 0, &bio_flags); 3582 continue;
3579 } 3583 }
3580 page_cache_release(page); 3584
3585 pagepool[nr++] = page;
3586 if (nr < ARRAY_SIZE(pagepool))
3587 continue;
3588 for (i = 0; i < nr; i++) {
3589 __extent_read_full_page(tree, pagepool[i], get_extent,
3590 &bio, 0, &bio_flags);
3591 page_cache_release(pagepool[i]);
3592 }
3593 nr = 0;
3581 } 3594 }
3595 for (i = 0; i < nr; i++) {
3596 __extent_read_full_page(tree, pagepool[i], get_extent,
3597 &bio, 0, &bio_flags);
3598 page_cache_release(pagepool[i]);
3599 }
3600
3582 BUG_ON(!list_empty(pages)); 3601 BUG_ON(!list_empty(pages));
3583 if (bio) 3602 if (bio)
3584 return submit_one_bio(READ, bio, 0, bio_flags); 3603 return submit_one_bio(READ, bio, 0, bio_flags);