aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-08-06 13:21:20 -0400
committerChris Mason <chris.mason@oracle.com>2012-03-26 16:50:37 -0400
commit727011e07cbdf87772fcc1999cccd15cc915eb62 (patch)
tree05405dc1e9c86d67dbb02ddf063bd0c137ce6707 /fs/btrfs/disk-io.c
parent81c9ad237c604adec79fd4d4034264c6669e0ab3 (diff)
Btrfs: allow metadata blocks larger than the page size
A few years ago the btrfs code to support blocks lager than the page size was disabled to fix a few corner cases in the page cache handling. This fixes the code to properly support large metadata blocks again. Since current kernels will crash early and often with larger metadata blocks, this adds an incompat bit so that older kernels can't mount it. This also does away with different blocksizes for nodes and leaves. You get a single block size for all tree blocks. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c196
1 files changed, 95 insertions, 101 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 534266fe505f..68fc93e18db8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -370,8 +370,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
370 ret = read_extent_buffer_pages(io_tree, eb, start, 370 ret = read_extent_buffer_pages(io_tree, eb, start,
371 WAIT_COMPLETE, 371 WAIT_COMPLETE,
372 btree_get_extent, mirror_num); 372 btree_get_extent, mirror_num);
373 if (!ret && 373 if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
374 !verify_parent_transid(io_tree, eb, parent_transid))
375 return ret; 374 return ret;
376 375
377 /* 376 /*
@@ -406,14 +405,11 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
406 u64 found_start; 405 u64 found_start;
407 unsigned long len; 406 unsigned long len;
408 struct extent_buffer *eb; 407 struct extent_buffer *eb;
409 int ret;
410 408
411 tree = &BTRFS_I(page->mapping->host)->io_tree; 409 tree = &BTRFS_I(page->mapping->host)->io_tree;
412 410
413 if (page->private == EXTENT_PAGE_PRIVATE) { 411 if (page->private == EXTENT_PAGE_PRIVATE)
414 WARN_ON(1);
415 goto out; 412 goto out;
416 }
417 if (!page->private) { 413 if (!page->private) {
418 WARN_ON(1); 414 WARN_ON(1);
419 goto out; 415 goto out;
@@ -421,22 +417,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
421 len = page->private >> 2; 417 len = page->private >> 2;
422 WARN_ON(len == 0); 418 WARN_ON(len == 0);
423 419
424 eb = alloc_extent_buffer(tree, start, len, page); 420 eb = find_extent_buffer(tree, start, len);
425 if (eb == NULL) {
426 WARN_ON(1);
427 goto out;
428 }
429 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
430 btrfs_header_generation(eb));
431 BUG_ON(ret);
432 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
433 421
434 found_start = btrfs_header_bytenr(eb); 422 found_start = btrfs_header_bytenr(eb);
435 if (found_start != start) { 423 if (found_start != start) {
436 WARN_ON(1); 424 WARN_ON(1);
437 goto err; 425 goto err;
438 } 426 }
439 if (eb->first_page != page) { 427 if (eb->pages[0] != page) {
440 WARN_ON(1); 428 WARN_ON(1);
441 goto err; 429 goto err;
442 } 430 }
@@ -537,6 +525,41 @@ static noinline int check_leaf(struct btrfs_root *root,
537 return 0; 525 return 0;
538} 526}
539 527
528struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
529 struct page *page, int max_walk)
530{
531 struct extent_buffer *eb;
532 u64 start = page_offset(page);
533 u64 target = start;
534 u64 min_start;
535
536 if (start < max_walk)
537 min_start = 0;
538 else
539 min_start = start - max_walk;
540
541 while (start >= min_start) {
542 eb = find_extent_buffer(tree, start, 0);
543 if (eb) {
544 /*
545 * we found an extent buffer and it contains our page
546 * horray!
547 */
548 if (eb->start <= target &&
549 eb->start + eb->len > target)
550 return eb;
551
552 /* we found an extent buffer that wasn't for us */
553 free_extent_buffer(eb);
554 return NULL;
555 }
556 if (start == 0)
557 break;
558 start -= PAGE_CACHE_SIZE;
559 }
560 return NULL;
561}
562
540static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 563static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
541 struct extent_state *state) 564 struct extent_state *state)
542{ 565{
@@ -547,24 +570,25 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
547 struct extent_buffer *eb; 570 struct extent_buffer *eb;
548 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 571 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
549 int ret = 0; 572 int ret = 0;
573 int reads_done;
550 574
551 tree = &BTRFS_I(page->mapping->host)->io_tree;
552 if (page->private == EXTENT_PAGE_PRIVATE)
553 goto out;
554 if (!page->private) 575 if (!page->private)
555 goto out; 576 goto out;
556 577
578 tree = &BTRFS_I(page->mapping->host)->io_tree;
557 len = page->private >> 2; 579 len = page->private >> 2;
558 WARN_ON(len == 0);
559 580
560 eb = alloc_extent_buffer(tree, start, len, page); 581 eb = find_eb_for_page(tree, page, max(root->leafsize, root->nodesize));
561 if (eb == NULL) { 582 if (!eb) {
562 ret = -EIO; 583 ret = -EIO;
563 goto out; 584 goto out;
564 } 585 }
586 reads_done = atomic_dec_and_test(&eb->pages_reading);
587 if (!reads_done)
588 goto err;
565 589
566 found_start = btrfs_header_bytenr(eb); 590 found_start = btrfs_header_bytenr(eb);
567 if (found_start != start) { 591 if (found_start != eb->start) {
568 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 592 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
569 "%llu %llu\n", 593 "%llu %llu\n",
570 (unsigned long long)found_start, 594 (unsigned long long)found_start,
@@ -572,13 +596,6 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
572 ret = -EIO; 596 ret = -EIO;
573 goto err; 597 goto err;
574 } 598 }
575 if (eb->first_page != page) {
576 printk(KERN_INFO "btrfs bad first page %lu %lu\n",
577 eb->first_page->index, page->index);
578 WARN_ON(1);
579 ret = -EIO;
580 goto err;
581 }
582 if (check_tree_block_fsid(root, eb)) { 599 if (check_tree_block_fsid(root, eb)) {
583 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 600 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
584 (unsigned long long)eb->start); 601 (unsigned long long)eb->start);
@@ -606,14 +623,14 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
606 ret = -EIO; 623 ret = -EIO;
607 } 624 }
608 625
609 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
610 end = eb->start + end - 1;
611err: 626err:
612 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { 627 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
613 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); 628 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
614 btree_readahead_hook(root, eb, eb->start, ret); 629 btree_readahead_hook(root, eb, eb->start, ret);
615 } 630 }
616 631
632 if (ret && eb)
633 clear_extent_buffer_uptodate(tree, eb, NULL);
617 free_extent_buffer(eb); 634 free_extent_buffer(eb);
618out: 635out:
619 return ret; 636 return ret;
@@ -637,7 +654,7 @@ static int btree_io_failed_hook(struct bio *failed_bio,
637 len = page->private >> 2; 654 len = page->private >> 2;
638 WARN_ON(len == 0); 655 WARN_ON(len == 0);
639 656
640 eb = alloc_extent_buffer(tree, start, len, page); 657 eb = alloc_extent_buffer(tree, start, len);
641 if (eb == NULL) 658 if (eb == NULL)
642 goto out; 659 goto out;
643 660
@@ -896,28 +913,14 @@ static int btree_migratepage(struct address_space *mapping,
896static int btree_writepage(struct page *page, struct writeback_control *wbc) 913static int btree_writepage(struct page *page, struct writeback_control *wbc)
897{ 914{
898 struct extent_io_tree *tree; 915 struct extent_io_tree *tree;
899 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
900 struct extent_buffer *eb;
901 int was_dirty;
902
903 tree = &BTRFS_I(page->mapping->host)->io_tree; 916 tree = &BTRFS_I(page->mapping->host)->io_tree;
917
904 if (!(current->flags & PF_MEMALLOC)) { 918 if (!(current->flags & PF_MEMALLOC)) {
905 return extent_write_full_page(tree, page, 919 return extent_write_full_page(tree, page,
906 btree_get_extent, wbc); 920 btree_get_extent, wbc);
907 } 921 }
908 922
909 redirty_page_for_writepage(wbc, page); 923 redirty_page_for_writepage(wbc, page);
910 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
911 WARN_ON(!eb);
912
913 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
914 if (!was_dirty) {
915 spin_lock(&root->fs_info->delalloc_lock);
916 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
917 spin_unlock(&root->fs_info->delalloc_lock);
918 }
919 free_extent_buffer(eb);
920
921 unlock_page(page); 924 unlock_page(page);
922 return 0; 925 return 0;
923} 926}
@@ -954,6 +957,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
954{ 957{
955 struct extent_io_tree *tree; 958 struct extent_io_tree *tree;
956 struct extent_map_tree *map; 959 struct extent_map_tree *map;
960 struct extent_buffer *eb;
961 struct btrfs_root *root;
957 int ret; 962 int ret;
958 963
959 if (PageWriteback(page) || PageDirty(page)) 964 if (PageWriteback(page) || PageDirty(page))
@@ -962,6 +967,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
962 tree = &BTRFS_I(page->mapping->host)->io_tree; 967 tree = &BTRFS_I(page->mapping->host)->io_tree;
963 map = &BTRFS_I(page->mapping->host)->extent_tree; 968 map = &BTRFS_I(page->mapping->host)->extent_tree;
964 969
970 root = BTRFS_I(page->mapping->host)->root;
971 if (page->private == EXTENT_PAGE_PRIVATE) {
972 eb = find_eb_for_page(tree, page, max(root->leafsize, root->nodesize));
973 free_extent_buffer(eb);
974 if (eb)
975 return 0;
976 }
965 /* 977 /*
966 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing 978 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
967 * slab allocation from alloc_extent_state down the callchain where 979 * slab allocation from alloc_extent_state down the callchain where
@@ -1074,20 +1086,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1074 struct extent_buffer *eb; 1086 struct extent_buffer *eb;
1075 1087
1076 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 1088 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
1077 bytenr, blocksize, NULL); 1089 bytenr, blocksize);
1078 return eb; 1090 return eb;
1079} 1091}
1080 1092
1081 1093
1082int btrfs_write_tree_block(struct extent_buffer *buf) 1094int btrfs_write_tree_block(struct extent_buffer *buf)
1083{ 1095{
1084 return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, 1096 return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
1085 buf->start + buf->len - 1); 1097 buf->start + buf->len - 1);
1086} 1098}
1087 1099
1088int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) 1100int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
1089{ 1101{
1090 return filemap_fdatawait_range(buf->first_page->mapping, 1102 return filemap_fdatawait_range(buf->pages[0]->mapping,
1091 buf->start, buf->start + buf->len - 1); 1103 buf->start, buf->start + buf->len - 1);
1092} 1104}
1093 1105
@@ -1513,41 +1525,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1513 return 0; 1525 return 0;
1514} 1526}
1515 1527
1516static int bio_ready_for_csum(struct bio *bio)
1517{
1518 u64 length = 0;
1519 u64 buf_len = 0;
1520 u64 start = 0;
1521 struct page *page;
1522 struct extent_io_tree *io_tree = NULL;
1523 struct bio_vec *bvec;
1524 int i;
1525 int ret;
1526
1527 bio_for_each_segment(bvec, bio, i) {
1528 page = bvec->bv_page;
1529 if (page->private == EXTENT_PAGE_PRIVATE) {
1530 length += bvec->bv_len;
1531 continue;
1532 }
1533 if (!page->private) {
1534 length += bvec->bv_len;
1535 continue;
1536 }
1537 length = bvec->bv_len;
1538 buf_len = page->private >> 2;
1539 start = page_offset(page) + bvec->bv_offset;
1540 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1541 }
1542 /* are we fully contained in this bio? */
1543 if (buf_len <= length)
1544 return 1;
1545
1546 ret = extent_range_uptodate(io_tree, start + length,
1547 start + buf_len - 1);
1548 return ret;
1549}
1550
1551/* 1528/*
1552 * called by the kthread helper functions to finally call the bio end_io 1529 * called by the kthread helper functions to finally call the bio end_io
1553 * functions. This is where read checksum verification actually happens 1530 * functions. This is where read checksum verification actually happens
@@ -1563,17 +1540,6 @@ static void end_workqueue_fn(struct btrfs_work *work)
1563 bio = end_io_wq->bio; 1540 bio = end_io_wq->bio;
1564 fs_info = end_io_wq->info; 1541 fs_info = end_io_wq->info;
1565 1542
1566 /* metadata bio reads are special because the whole tree block must
1567 * be checksummed at once. This makes sure the entire block is in
1568 * ram and up to date before trying to verify things. For
1569 * blocksize <= pagesize, it is basically a noop
1570 */
1571 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1572 !bio_ready_for_csum(bio)) {
1573 btrfs_queue_worker(&fs_info->endio_meta_workers,
1574 &end_io_wq->work);
1575 return;
1576 }
1577 error = end_io_wq->error; 1543 error = end_io_wq->error;
1578 bio->bi_private = end_io_wq->private; 1544 bio->bi_private = end_io_wq->private;
1579 bio->bi_end_io = end_io_wq->end_io; 1545 bio->bi_end_io = end_io_wq->end_io;
@@ -2135,10 +2101,38 @@ int open_ctree(struct super_block *sb,
2135 goto fail_alloc; 2101 goto fail_alloc;
2136 } 2102 }
2137 2103
2104 if (btrfs_super_leafsize(disk_super) !=
2105 btrfs_super_nodesize(disk_super)) {
2106 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2107 "blocksizes don't match. node %d leaf %d\n",
2108 btrfs_super_nodesize(disk_super),
2109 btrfs_super_leafsize(disk_super));
2110 err = -EINVAL;
2111 goto fail_alloc;
2112 }
2113 if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
2114 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2115 "blocksize (%d) was too large\n",
2116 btrfs_super_leafsize(disk_super));
2117 err = -EINVAL;
2118 goto fail_alloc;
2119 }
2120
2138 features = btrfs_super_incompat_flags(disk_super); 2121 features = btrfs_super_incompat_flags(disk_super);
2139 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2122 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2140 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2123 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
2141 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2124 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2125
2126 /*
2127 * flag our filesystem as having big metadata blocks if
2128 * they are bigger than the page size
2129 */
2130 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
2131 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
2132 printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
2133 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
2134 }
2135
2142 btrfs_set_super_incompat_flags(disk_super, features); 2136 btrfs_set_super_incompat_flags(disk_super, features);
2143 2137
2144 features = btrfs_super_compat_ro_flags(disk_super) & 2138 features = btrfs_super_compat_ro_flags(disk_super) &
@@ -3122,7 +3116,7 @@ int close_ctree(struct btrfs_root *root)
3122int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) 3116int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3123{ 3117{
3124 int ret; 3118 int ret;
3125 struct inode *btree_inode = buf->first_page->mapping->host; 3119 struct inode *btree_inode = buf->pages[0]->mapping->host;
3126 3120
3127 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, 3121 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
3128 NULL); 3122 NULL);
@@ -3136,14 +3130,14 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3136 3130
3137int btrfs_set_buffer_uptodate(struct extent_buffer *buf) 3131int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
3138{ 3132{
3139 struct inode *btree_inode = buf->first_page->mapping->host; 3133 struct inode *btree_inode = buf->pages[0]->mapping->host;
3140 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, 3134 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
3141 buf); 3135 buf);
3142} 3136}
3143 3137
3144void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 3138void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3145{ 3139{
3146 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3140 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3147 u64 transid = btrfs_header_generation(buf); 3141 u64 transid = btrfs_header_generation(buf);
3148 struct inode *btree_inode = root->fs_info->btree_inode; 3142 struct inode *btree_inode = root->fs_info->btree_inode;
3149 int was_dirty; 3143 int was_dirty;
@@ -3212,7 +3206,7 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
3212 3206
3213int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 3207int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3214{ 3208{
3215 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3209 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3216 int ret; 3210 int ret;
3217 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 3211 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3218 if (ret == 0) 3212 if (ret == 0)