aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h24
-rw-r--r--fs/btrfs/disk-io.c196
-rw-r--r--fs/btrfs/extent_io.c144
-rw-r--r--fs/btrfs/extent_io.h12
-rw-r--r--fs/btrfs/inode-item.c1
-rw-r--r--fs/btrfs/volumes.c2
6 files changed, 190 insertions, 189 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index edccc948e877..85ab1c5844a2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -138,6 +138,12 @@ struct btrfs_ordered_sum;
138#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 138#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
139 139
140/* 140/*
141 * the max metadata block size. This limit is somewhat artificial,
142 * but the memmove costs go through the roof for larger blocks.
143 */
144#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
145
146/*
141 * we can actually store much bigger names, but lets not confuse the rest 147 * we can actually store much bigger names, but lets not confuse the rest
142 * of linux 148 * of linux
143 */ 149 */
@@ -461,6 +467,19 @@ struct btrfs_super_block {
461#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 467#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
462#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 468#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
463#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 469#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
470/*
471 * some patches floated around with a second compression method
472 * lets save that incompat here for when they do get in
473 * Note we don't actually support it, we're just reserving the
474 * number
475 */
476#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
477
478/*
479 * older kernels tried to do bigger metadata blocks, but the
480 * code was pretty buggy. Lets not let them try anymore.
481 */
482#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
464 483
465#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 484#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
466#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 485#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -468,6 +487,7 @@ struct btrfs_super_block {
468 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 487 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
469 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 488 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
470 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 489 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
490 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
471 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) 491 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
472 492
473/* 493/*
@@ -1555,14 +1575,14 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
1555#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ 1575#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
1556static inline u##bits btrfs_##name(struct extent_buffer *eb) \ 1576static inline u##bits btrfs_##name(struct extent_buffer *eb) \
1557{ \ 1577{ \
1558 type *p = page_address(eb->first_page); \ 1578 type *p = page_address(eb->pages[0]); \
1559 u##bits res = le##bits##_to_cpu(p->member); \ 1579 u##bits res = le##bits##_to_cpu(p->member); \
1560 return res; \ 1580 return res; \
1561} \ 1581} \
1562static inline void btrfs_set_##name(struct extent_buffer *eb, \ 1582static inline void btrfs_set_##name(struct extent_buffer *eb, \
1563 u##bits val) \ 1583 u##bits val) \
1564{ \ 1584{ \
1565 type *p = page_address(eb->first_page); \ 1585 type *p = page_address(eb->pages[0]); \
1566 p->member = cpu_to_le##bits(val); \ 1586 p->member = cpu_to_le##bits(val); \
1567} 1587}
1568 1588
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 534266fe505f..68fc93e18db8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -370,8 +370,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
370 ret = read_extent_buffer_pages(io_tree, eb, start, 370 ret = read_extent_buffer_pages(io_tree, eb, start,
371 WAIT_COMPLETE, 371 WAIT_COMPLETE,
372 btree_get_extent, mirror_num); 372 btree_get_extent, mirror_num);
373 if (!ret && 373 if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
374 !verify_parent_transid(io_tree, eb, parent_transid))
375 return ret; 374 return ret;
376 375
377 /* 376 /*
@@ -406,14 +405,11 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
406 u64 found_start; 405 u64 found_start;
407 unsigned long len; 406 unsigned long len;
408 struct extent_buffer *eb; 407 struct extent_buffer *eb;
409 int ret;
410 408
411 tree = &BTRFS_I(page->mapping->host)->io_tree; 409 tree = &BTRFS_I(page->mapping->host)->io_tree;
412 410
413 if (page->private == EXTENT_PAGE_PRIVATE) { 411 if (page->private == EXTENT_PAGE_PRIVATE)
414 WARN_ON(1);
415 goto out; 412 goto out;
416 }
417 if (!page->private) { 413 if (!page->private) {
418 WARN_ON(1); 414 WARN_ON(1);
419 goto out; 415 goto out;
@@ -421,22 +417,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
421 len = page->private >> 2; 417 len = page->private >> 2;
422 WARN_ON(len == 0); 418 WARN_ON(len == 0);
423 419
424 eb = alloc_extent_buffer(tree, start, len, page); 420 eb = find_extent_buffer(tree, start, len);
425 if (eb == NULL) {
426 WARN_ON(1);
427 goto out;
428 }
429 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
430 btrfs_header_generation(eb));
431 BUG_ON(ret);
432 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
433 421
434 found_start = btrfs_header_bytenr(eb); 422 found_start = btrfs_header_bytenr(eb);
435 if (found_start != start) { 423 if (found_start != start) {
436 WARN_ON(1); 424 WARN_ON(1);
437 goto err; 425 goto err;
438 } 426 }
439 if (eb->first_page != page) { 427 if (eb->pages[0] != page) {
440 WARN_ON(1); 428 WARN_ON(1);
441 goto err; 429 goto err;
442 } 430 }
@@ -537,6 +525,41 @@ static noinline int check_leaf(struct btrfs_root *root,
537 return 0; 525 return 0;
538} 526}
539 527
528struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
529 struct page *page, int max_walk)
530{
531 struct extent_buffer *eb;
532 u64 start = page_offset(page);
533 u64 target = start;
534 u64 min_start;
535
536 if (start < max_walk)
537 min_start = 0;
538 else
539 min_start = start - max_walk;
540
541 while (start >= min_start) {
542 eb = find_extent_buffer(tree, start, 0);
543 if (eb) {
544 /*
545 * we found an extent buffer and it contains our page
546 * horray!
547 */
548 if (eb->start <= target &&
549 eb->start + eb->len > target)
550 return eb;
551
552 /* we found an extent buffer that wasn't for us */
553 free_extent_buffer(eb);
554 return NULL;
555 }
556 if (start == 0)
557 break;
558 start -= PAGE_CACHE_SIZE;
559 }
560 return NULL;
561}
562
540static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 563static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
541 struct extent_state *state) 564 struct extent_state *state)
542{ 565{
@@ -547,24 +570,25 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
547 struct extent_buffer *eb; 570 struct extent_buffer *eb;
548 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 571 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
549 int ret = 0; 572 int ret = 0;
573 int reads_done;
550 574
551 tree = &BTRFS_I(page->mapping->host)->io_tree;
552 if (page->private == EXTENT_PAGE_PRIVATE)
553 goto out;
554 if (!page->private) 575 if (!page->private)
555 goto out; 576 goto out;
556 577
578 tree = &BTRFS_I(page->mapping->host)->io_tree;
557 len = page->private >> 2; 579 len = page->private >> 2;
558 WARN_ON(len == 0);
559 580
560 eb = alloc_extent_buffer(tree, start, len, page); 581 eb = find_eb_for_page(tree, page, max(root->leafsize, root->nodesize));
561 if (eb == NULL) { 582 if (!eb) {
562 ret = -EIO; 583 ret = -EIO;
563 goto out; 584 goto out;
564 } 585 }
586 reads_done = atomic_dec_and_test(&eb->pages_reading);
587 if (!reads_done)
588 goto err;
565 589
566 found_start = btrfs_header_bytenr(eb); 590 found_start = btrfs_header_bytenr(eb);
567 if (found_start != start) { 591 if (found_start != eb->start) {
568 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 592 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
569 "%llu %llu\n", 593 "%llu %llu\n",
570 (unsigned long long)found_start, 594 (unsigned long long)found_start,
@@ -572,13 +596,6 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
572 ret = -EIO; 596 ret = -EIO;
573 goto err; 597 goto err;
574 } 598 }
575 if (eb->first_page != page) {
576 printk(KERN_INFO "btrfs bad first page %lu %lu\n",
577 eb->first_page->index, page->index);
578 WARN_ON(1);
579 ret = -EIO;
580 goto err;
581 }
582 if (check_tree_block_fsid(root, eb)) { 599 if (check_tree_block_fsid(root, eb)) {
583 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 600 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
584 (unsigned long long)eb->start); 601 (unsigned long long)eb->start);
@@ -606,14 +623,14 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
606 ret = -EIO; 623 ret = -EIO;
607 } 624 }
608 625
609 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
610 end = eb->start + end - 1;
611err: 626err:
612 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { 627 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
613 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); 628 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
614 btree_readahead_hook(root, eb, eb->start, ret); 629 btree_readahead_hook(root, eb, eb->start, ret);
615 } 630 }
616 631
632 if (ret && eb)
633 clear_extent_buffer_uptodate(tree, eb, NULL);
617 free_extent_buffer(eb); 634 free_extent_buffer(eb);
618out: 635out:
619 return ret; 636 return ret;
@@ -637,7 +654,7 @@ static int btree_io_failed_hook(struct bio *failed_bio,
637 len = page->private >> 2; 654 len = page->private >> 2;
638 WARN_ON(len == 0); 655 WARN_ON(len == 0);
639 656
640 eb = alloc_extent_buffer(tree, start, len, page); 657 eb = alloc_extent_buffer(tree, start, len);
641 if (eb == NULL) 658 if (eb == NULL)
642 goto out; 659 goto out;
643 660
@@ -896,28 +913,14 @@ static int btree_migratepage(struct address_space *mapping,
896static int btree_writepage(struct page *page, struct writeback_control *wbc) 913static int btree_writepage(struct page *page, struct writeback_control *wbc)
897{ 914{
898 struct extent_io_tree *tree; 915 struct extent_io_tree *tree;
899 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
900 struct extent_buffer *eb;
901 int was_dirty;
902
903 tree = &BTRFS_I(page->mapping->host)->io_tree; 916 tree = &BTRFS_I(page->mapping->host)->io_tree;
917
904 if (!(current->flags & PF_MEMALLOC)) { 918 if (!(current->flags & PF_MEMALLOC)) {
905 return extent_write_full_page(tree, page, 919 return extent_write_full_page(tree, page,
906 btree_get_extent, wbc); 920 btree_get_extent, wbc);
907 } 921 }
908 922
909 redirty_page_for_writepage(wbc, page); 923 redirty_page_for_writepage(wbc, page);
910 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
911 WARN_ON(!eb);
912
913 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
914 if (!was_dirty) {
915 spin_lock(&root->fs_info->delalloc_lock);
916 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
917 spin_unlock(&root->fs_info->delalloc_lock);
918 }
919 free_extent_buffer(eb);
920
921 unlock_page(page); 924 unlock_page(page);
922 return 0; 925 return 0;
923} 926}
@@ -954,6 +957,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
954{ 957{
955 struct extent_io_tree *tree; 958 struct extent_io_tree *tree;
956 struct extent_map_tree *map; 959 struct extent_map_tree *map;
960 struct extent_buffer *eb;
961 struct btrfs_root *root;
957 int ret; 962 int ret;
958 963
959 if (PageWriteback(page) || PageDirty(page)) 964 if (PageWriteback(page) || PageDirty(page))
@@ -962,6 +967,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
962 tree = &BTRFS_I(page->mapping->host)->io_tree; 967 tree = &BTRFS_I(page->mapping->host)->io_tree;
963 map = &BTRFS_I(page->mapping->host)->extent_tree; 968 map = &BTRFS_I(page->mapping->host)->extent_tree;
964 969
970 root = BTRFS_I(page->mapping->host)->root;
971 if (page->private == EXTENT_PAGE_PRIVATE) {
972 eb = find_eb_for_page(tree, page, max(root->leafsize, root->nodesize));
973 free_extent_buffer(eb);
974 if (eb)
975 return 0;
976 }
965 /* 977 /*
966 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing 978 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
967 * slab allocation from alloc_extent_state down the callchain where 979 * slab allocation from alloc_extent_state down the callchain where
@@ -1074,20 +1086,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1074 struct extent_buffer *eb; 1086 struct extent_buffer *eb;
1075 1087
1076 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 1088 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
1077 bytenr, blocksize, NULL); 1089 bytenr, blocksize);
1078 return eb; 1090 return eb;
1079} 1091}
1080 1092
1081 1093
1082int btrfs_write_tree_block(struct extent_buffer *buf) 1094int btrfs_write_tree_block(struct extent_buffer *buf)
1083{ 1095{
1084 return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, 1096 return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
1085 buf->start + buf->len - 1); 1097 buf->start + buf->len - 1);
1086} 1098}
1087 1099
1088int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) 1100int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
1089{ 1101{
1090 return filemap_fdatawait_range(buf->first_page->mapping, 1102 return filemap_fdatawait_range(buf->pages[0]->mapping,
1091 buf->start, buf->start + buf->len - 1); 1103 buf->start, buf->start + buf->len - 1);
1092} 1104}
1093 1105
@@ -1513,41 +1525,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1513 return 0; 1525 return 0;
1514} 1526}
1515 1527
1516static int bio_ready_for_csum(struct bio *bio)
1517{
1518 u64 length = 0;
1519 u64 buf_len = 0;
1520 u64 start = 0;
1521 struct page *page;
1522 struct extent_io_tree *io_tree = NULL;
1523 struct bio_vec *bvec;
1524 int i;
1525 int ret;
1526
1527 bio_for_each_segment(bvec, bio, i) {
1528 page = bvec->bv_page;
1529 if (page->private == EXTENT_PAGE_PRIVATE) {
1530 length += bvec->bv_len;
1531 continue;
1532 }
1533 if (!page->private) {
1534 length += bvec->bv_len;
1535 continue;
1536 }
1537 length = bvec->bv_len;
1538 buf_len = page->private >> 2;
1539 start = page_offset(page) + bvec->bv_offset;
1540 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1541 }
1542 /* are we fully contained in this bio? */
1543 if (buf_len <= length)
1544 return 1;
1545
1546 ret = extent_range_uptodate(io_tree, start + length,
1547 start + buf_len - 1);
1548 return ret;
1549}
1550
1551/* 1528/*
1552 * called by the kthread helper functions to finally call the bio end_io 1529 * called by the kthread helper functions to finally call the bio end_io
1553 * functions. This is where read checksum verification actually happens 1530 * functions. This is where read checksum verification actually happens
@@ -1563,17 +1540,6 @@ static void end_workqueue_fn(struct btrfs_work *work)
1563 bio = end_io_wq->bio; 1540 bio = end_io_wq->bio;
1564 fs_info = end_io_wq->info; 1541 fs_info = end_io_wq->info;
1565 1542
1566 /* metadata bio reads are special because the whole tree block must
1567 * be checksummed at once. This makes sure the entire block is in
1568 * ram and up to date before trying to verify things. For
1569 * blocksize <= pagesize, it is basically a noop
1570 */
1571 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1572 !bio_ready_for_csum(bio)) {
1573 btrfs_queue_worker(&fs_info->endio_meta_workers,
1574 &end_io_wq->work);
1575 return;
1576 }
1577 error = end_io_wq->error; 1543 error = end_io_wq->error;
1578 bio->bi_private = end_io_wq->private; 1544 bio->bi_private = end_io_wq->private;
1579 bio->bi_end_io = end_io_wq->end_io; 1545 bio->bi_end_io = end_io_wq->end_io;
@@ -2135,10 +2101,38 @@ int open_ctree(struct super_block *sb,
2135 goto fail_alloc; 2101 goto fail_alloc;
2136 } 2102 }
2137 2103
2104 if (btrfs_super_leafsize(disk_super) !=
2105 btrfs_super_nodesize(disk_super)) {
2106 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2107 "blocksizes don't match. node %d leaf %d\n",
2108 btrfs_super_nodesize(disk_super),
2109 btrfs_super_leafsize(disk_super));
2110 err = -EINVAL;
2111 goto fail_alloc;
2112 }
2113 if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
2114 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2115 "blocksize (%d) was too large\n",
2116 btrfs_super_leafsize(disk_super));
2117 err = -EINVAL;
2118 goto fail_alloc;
2119 }
2120
2138 features = btrfs_super_incompat_flags(disk_super); 2121 features = btrfs_super_incompat_flags(disk_super);
2139 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2122 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2140 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2123 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
2141 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2124 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2125
2126 /*
2127 * flag our filesystem as having big metadata blocks if
2128 * they are bigger than the page size
2129 */
2130 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
2131 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
2132 printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
2133 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
2134 }
2135
2142 btrfs_set_super_incompat_flags(disk_super, features); 2136 btrfs_set_super_incompat_flags(disk_super, features);
2143 2137
2144 features = btrfs_super_compat_ro_flags(disk_super) & 2138 features = btrfs_super_compat_ro_flags(disk_super) &
@@ -3122,7 +3116,7 @@ int close_ctree(struct btrfs_root *root)
3122int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) 3116int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3123{ 3117{
3124 int ret; 3118 int ret;
3125 struct inode *btree_inode = buf->first_page->mapping->host; 3119 struct inode *btree_inode = buf->pages[0]->mapping->host;
3126 3120
3127 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, 3121 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
3128 NULL); 3122 NULL);
@@ -3136,14 +3130,14 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3136 3130
3137int btrfs_set_buffer_uptodate(struct extent_buffer *buf) 3131int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
3138{ 3132{
3139 struct inode *btree_inode = buf->first_page->mapping->host; 3133 struct inode *btree_inode = buf->pages[0]->mapping->host;
3140 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, 3134 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
3141 buf); 3135 buf);
3142} 3136}
3143 3137
3144void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 3138void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3145{ 3139{
3146 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3140 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3147 u64 transid = btrfs_header_generation(buf); 3141 u64 transid = btrfs_header_generation(buf);
3148 struct inode *btree_inode = root->fs_info->btree_inode; 3142 struct inode *btree_inode = root->fs_info->btree_inode;
3149 int was_dirty; 3143 int was_dirty;
@@ -3212,7 +3206,7 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
3212 3206
3213int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 3207int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3214{ 3208{
3215 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3209 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3216 int ret; 3210 int ret;
3217 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 3211 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3218 if (ret == 0) 3212 if (ret == 0)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a55fbe6252de..c6c9ce463c86 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3548,26 +3548,7 @@ out:
3548inline struct page *extent_buffer_page(struct extent_buffer *eb, 3548inline struct page *extent_buffer_page(struct extent_buffer *eb,
3549 unsigned long i) 3549 unsigned long i)
3550{ 3550{
3551 struct page *p; 3551 return eb->pages[i];
3552 struct address_space *mapping;
3553
3554 if (i == 0)
3555 return eb->first_page;
3556 i += eb->start >> PAGE_CACHE_SHIFT;
3557 mapping = eb->first_page->mapping;
3558 if (!mapping)
3559 return NULL;
3560
3561 /*
3562 * extent_buffer_page is only called after pinning the page
3563 * by increasing the reference count. So we know the page must
3564 * be in the radix tree.
3565 */
3566 rcu_read_lock();
3567 p = radix_tree_lookup(&mapping->page_tree, i);
3568 rcu_read_unlock();
3569
3570 return p;
3571} 3552}
3572 3553
3573inline unsigned long num_extent_pages(u64 start, u64 len) 3554inline unsigned long num_extent_pages(u64 start, u64 len)
@@ -3576,6 +3557,19 @@ inline unsigned long num_extent_pages(u64 start, u64 len)
3576 (start >> PAGE_CACHE_SHIFT); 3557 (start >> PAGE_CACHE_SHIFT);
3577} 3558}
3578 3559
3560static void __free_extent_buffer(struct extent_buffer *eb)
3561{
3562#if LEAK_DEBUG
3563 unsigned long flags;
3564 spin_lock_irqsave(&leak_lock, flags);
3565 list_del(&eb->leak_list);
3566 spin_unlock_irqrestore(&leak_lock, flags);
3567#endif
3568 if (eb->pages && eb->pages != eb->inline_pages)
3569 kfree(eb->pages);
3570 kmem_cache_free(extent_buffer_cache, eb);
3571}
3572
3579static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 3573static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3580 u64 start, 3574 u64 start,
3581 unsigned long len, 3575 unsigned long len,
@@ -3608,21 +3602,25 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3608 spin_unlock_irqrestore(&leak_lock, flags); 3602 spin_unlock_irqrestore(&leak_lock, flags);
3609#endif 3603#endif
3610 atomic_set(&eb->refs, 1); 3604 atomic_set(&eb->refs, 1);
3605 atomic_set(&eb->pages_reading, 0);
3606
3607 if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
3608 struct page **pages;
3609 int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
3610 PAGE_CACHE_SHIFT;
3611 pages = kzalloc(num_pages, mask);
3612 if (!pages) {
3613 __free_extent_buffer(eb);
3614 return NULL;
3615 }
3616 eb->pages = pages;
3617 } else {
3618 eb->pages = eb->inline_pages;
3619 }
3611 3620
3612 return eb; 3621 return eb;
3613} 3622}
3614 3623
3615static void __free_extent_buffer(struct extent_buffer *eb)
3616{
3617#if LEAK_DEBUG
3618 unsigned long flags;
3619 spin_lock_irqsave(&leak_lock, flags);
3620 list_del(&eb->leak_list);
3621 spin_unlock_irqrestore(&leak_lock, flags);
3622#endif
3623 kmem_cache_free(extent_buffer_cache, eb);
3624}
3625
3626/* 3624/*
3627 * Helper for releasing extent buffer page. 3625 * Helper for releasing extent buffer page.
3628 */ 3626 */
@@ -3632,9 +3630,6 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3632 unsigned long index; 3630 unsigned long index;
3633 struct page *page; 3631 struct page *page;
3634 3632
3635 if (!eb->first_page)
3636 return;
3637
3638 index = num_extent_pages(eb->start, eb->len); 3633 index = num_extent_pages(eb->start, eb->len);
3639 if (start_idx >= index) 3634 if (start_idx >= index)
3640 return; 3635 return;
@@ -3657,8 +3652,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3657} 3652}
3658 3653
3659struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3654struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3660 u64 start, unsigned long len, 3655 u64 start, unsigned long len)
3661 struct page *page0)
3662{ 3656{
3663 unsigned long num_pages = num_extent_pages(start, len); 3657 unsigned long num_pages = num_extent_pages(start, len);
3664 unsigned long i; 3658 unsigned long i;
@@ -3674,7 +3668,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3674 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3668 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3675 if (eb && atomic_inc_not_zero(&eb->refs)) { 3669 if (eb && atomic_inc_not_zero(&eb->refs)) {
3676 rcu_read_unlock(); 3670 rcu_read_unlock();
3677 mark_page_accessed(eb->first_page); 3671 mark_page_accessed(eb->pages[0]);
3678 return eb; 3672 return eb;
3679 } 3673 }
3680 rcu_read_unlock(); 3674 rcu_read_unlock();
@@ -3683,32 +3677,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3683 if (!eb) 3677 if (!eb)
3684 return NULL; 3678 return NULL;
3685 3679
3686 if (page0) { 3680 for (i = 0; i < num_pages; i++, index++) {
3687 eb->first_page = page0;
3688 i = 1;
3689 index++;
3690 page_cache_get(page0);
3691 mark_page_accessed(page0);
3692 set_page_extent_mapped(page0);
3693 set_page_extent_head(page0, len);
3694 uptodate = PageUptodate(page0);
3695 } else {
3696 i = 0;
3697 }
3698 for (; i < num_pages; i++, index++) {
3699 p = find_or_create_page(mapping, index, GFP_NOFS); 3681 p = find_or_create_page(mapping, index, GFP_NOFS);
3700 if (!p) { 3682 if (!p) {
3701 WARN_ON(1); 3683 WARN_ON(1);
3702 goto free_eb; 3684 goto free_eb;
3703 } 3685 }
3704 set_page_extent_mapped(p);
3705 mark_page_accessed(p); 3686 mark_page_accessed(p);
3706 if (i == 0) { 3687 eb->pages[i] = p;
3707 eb->first_page = p;
3708 set_page_extent_head(p, len);
3709 } else {
3710 set_page_private(p, EXTENT_PAGE_PRIVATE);
3711 }
3712 if (!PageUptodate(p)) 3688 if (!PageUptodate(p))
3713 uptodate = 0; 3689 uptodate = 0;
3714 3690
@@ -3716,8 +3692,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3716 * see below about how we avoid a nasty race with release page 3692 * see below about how we avoid a nasty race with release page
3717 * and why we unlock later 3693 * and why we unlock later
3718 */ 3694 */
3719 if (i != 0)
3720 unlock_page(p);
3721 } 3695 }
3722 if (uptodate) 3696 if (uptodate)
3723 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3697 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3751,15 +3725,23 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3751 * after the extent buffer is in the radix tree so 3725 * after the extent buffer is in the radix tree so
3752 * it doesn't get lost 3726 * it doesn't get lost
3753 */ 3727 */
3754 set_page_extent_mapped(eb->first_page); 3728 set_page_extent_mapped(eb->pages[0]);
3755 set_page_extent_head(eb->first_page, eb->len); 3729 set_page_extent_head(eb->pages[0], eb->len);
3756 if (!page0) 3730 SetPageChecked(eb->pages[0]);
3757 unlock_page(eb->first_page); 3731 for (i = 1; i < num_pages; i++) {
3732 p = extent_buffer_page(eb, i);
3733 set_page_extent_mapped(p);
3734 ClearPageChecked(p);
3735 unlock_page(p);
3736 }
3737 unlock_page(eb->pages[0]);
3758 return eb; 3738 return eb;
3759 3739
3760free_eb: 3740free_eb:
3761 if (eb->first_page && !page0) 3741 for (i = 0; i < num_pages; i++) {
3762 unlock_page(eb->first_page); 3742 if (eb->pages[i])
3743 unlock_page(eb->pages[i]);
3744 }
3763 3745
3764 if (!atomic_dec_and_test(&eb->refs)) 3746 if (!atomic_dec_and_test(&eb->refs))
3765 return exists; 3747 return exists;
@@ -3776,7 +3758,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3776 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3758 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3777 if (eb && atomic_inc_not_zero(&eb->refs)) { 3759 if (eb && atomic_inc_not_zero(&eb->refs)) {
3778 rcu_read_unlock(); 3760 rcu_read_unlock();
3779 mark_page_accessed(eb->first_page); 3761 mark_page_accessed(eb->pages[0]);
3780 return eb; 3762 return eb;
3781 } 3763 }
3782 rcu_read_unlock(); 3764 rcu_read_unlock();
@@ -3981,8 +3963,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3981 int ret = 0; 3963 int ret = 0;
3982 int locked_pages = 0; 3964 int locked_pages = 0;
3983 int all_uptodate = 1; 3965 int all_uptodate = 1;
3984 int inc_all_pages = 0;
3985 unsigned long num_pages; 3966 unsigned long num_pages;
3967 unsigned long num_reads = 0;
3986 struct bio *bio = NULL; 3968 struct bio *bio = NULL;
3987 unsigned long bio_flags = 0; 3969 unsigned long bio_flags = 0;
3988 3970
@@ -4014,8 +3996,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4014 lock_page(page); 3996 lock_page(page);
4015 } 3997 }
4016 locked_pages++; 3998 locked_pages++;
4017 if (!PageUptodate(page)) 3999 if (!PageUptodate(page)) {
4000 num_reads++;
4018 all_uptodate = 0; 4001 all_uptodate = 0;
4002 }
4019 } 4003 }
4020 if (all_uptodate) { 4004 if (all_uptodate) {
4021 if (start_i == 0) 4005 if (start_i == 0)
@@ -4023,20 +4007,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4023 goto unlock_exit; 4007 goto unlock_exit;
4024 } 4008 }
4025 4009
4010 atomic_set(&eb->pages_reading, num_reads);
4026 for (i = start_i; i < num_pages; i++) { 4011 for (i = start_i; i < num_pages; i++) {
4027 page = extent_buffer_page(eb, i); 4012 page = extent_buffer_page(eb, i);
4028
4029 WARN_ON(!PagePrivate(page));
4030
4031 set_page_extent_mapped(page); 4013 set_page_extent_mapped(page);
4032 if (i == 0) 4014 if (i == 0)
4033 set_page_extent_head(page, eb->len); 4015 set_page_extent_head(page, eb->len);
4034
4035 if (inc_all_pages)
4036 page_cache_get(page);
4037 if (!PageUptodate(page)) { 4016 if (!PageUptodate(page)) {
4038 if (start_i == 0)
4039 inc_all_pages = 1;
4040 ClearPageError(page); 4017 ClearPageError(page);
4041 err = __extent_read_full_page(tree, page, 4018 err = __extent_read_full_page(tree, page,
4042 get_extent, &bio, 4019 get_extent, &bio,
@@ -4304,15 +4281,20 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
4304{ 4281{
4305 char *dst_kaddr = page_address(dst_page); 4282 char *dst_kaddr = page_address(dst_page);
4306 char *src_kaddr; 4283 char *src_kaddr;
4284 int must_memmove = 0;
4307 4285
4308 if (dst_page != src_page) { 4286 if (dst_page != src_page) {
4309 src_kaddr = page_address(src_page); 4287 src_kaddr = page_address(src_page);
4310 } else { 4288 } else {
4311 src_kaddr = dst_kaddr; 4289 src_kaddr = dst_kaddr;
4312 BUG_ON(areas_overlap(src_off, dst_off, len)); 4290 if (areas_overlap(src_off, dst_off, len))
4291 must_memmove = 1;
4313 } 4292 }
4314 4293
4315 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 4294 if (must_memmove)
4295 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
4296 else
4297 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
4316} 4298}
4317 4299
4318void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 4300void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
@@ -4382,7 +4364,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4382 "len %lu len %lu\n", dst_offset, len, dst->len); 4364 "len %lu len %lu\n", dst_offset, len, dst->len);
4383 BUG_ON(1); 4365 BUG_ON(1);
4384 } 4366 }
4385 if (!areas_overlap(src_offset, dst_offset, len)) { 4367 if (dst_offset < src_offset) {
4386 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4368 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
4387 return; 4369 return;
4388 } 4370 }
@@ -4429,7 +4411,8 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4429 return ret; 4411 return ret;
4430 } 4412 }
4431 4413
4432 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 4414 if (atomic_read(&eb->refs) > 1 ||
4415 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4433 ret = 0; 4416 ret = 0;
4434 goto out; 4417 goto out;
4435 } 4418 }
@@ -4442,7 +4425,6 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4442 ret = 0; 4425 ret = 0;
4443 goto out; 4426 goto out;
4444 } 4427 }
4445
4446 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4428 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
4447out: 4429out:
4448 spin_unlock(&tree->buffer_lock); 4430 spin_unlock(&tree->buffer_lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cecc3518c121..4e38a3d9631a 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -119,16 +119,18 @@ struct extent_state {
119 struct list_head leak_list; 119 struct list_head leak_list;
120}; 120};
121 121
122#define INLINE_EXTENT_BUFFER_PAGES 16
123#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
122struct extent_buffer { 124struct extent_buffer {
123 u64 start; 125 u64 start;
124 unsigned long len; 126 unsigned long len;
125 unsigned long map_start; 127 unsigned long map_start;
126 unsigned long map_len; 128 unsigned long map_len;
127 struct page *first_page;
128 unsigned long bflags; 129 unsigned long bflags;
130 atomic_t refs;
131 atomic_t pages_reading;
129 struct list_head leak_list; 132 struct list_head leak_list;
130 struct rcu_head rcu_head; 133 struct rcu_head rcu_head;
131 atomic_t refs;
132 pid_t lock_owner; 134 pid_t lock_owner;
133 135
134 /* count of read lock holders on the extent buffer */ 136 /* count of read lock holders on the extent buffer */
@@ -152,6 +154,9 @@ struct extent_buffer {
152 * to unlock 154 * to unlock
153 */ 155 */
154 wait_queue_head_t read_lock_wq; 156 wait_queue_head_t read_lock_wq;
157 wait_queue_head_t lock_wq;
158 struct page *inline_pages[INLINE_EXTENT_BUFFER_PAGES];
159 struct page **pages;
155}; 160};
156 161
157static inline void extent_set_compress_type(unsigned long *bio_flags, 162static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -251,8 +256,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
251void set_page_extent_mapped(struct page *page); 256void set_page_extent_mapped(struct page *page);
252 257
253struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 258struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
254 u64 start, unsigned long len, 259 u64 start, unsigned long len);
255 struct page *page0);
256struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 260struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
257 u64 start, unsigned long len); 261 u64 start, unsigned long len);
258void free_extent_buffer(struct extent_buffer *eb); 262void free_extent_buffer(struct extent_buffer *eb);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index baa74f3db691..6ea71c60e80a 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -19,6 +19,7 @@
19#include "ctree.h" 19#include "ctree.h"
20#include "disk-io.h" 20#include "disk-io.h"
21#include "transaction.h" 21#include "transaction.h"
22#include "print-tree.h"
22 23
23static int find_name_in_backref(struct btrfs_path *path, const char *name, 24static int find_name_in_backref(struct btrfs_path *path, const char *name,
24 int name_len, struct btrfs_inode_ref **ref_ret) 25 int name_len, struct btrfs_inode_ref **ref_ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ef41f285a475..58aad63e1ad3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4384,7 +4384,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4384 * to silence the warning eg. on PowerPC 64. 4384 * to silence the warning eg. on PowerPC 64.
4385 */ 4385 */
4386 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) 4386 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
4387 SetPageUptodate(sb->first_page); 4387 SetPageUptodate(sb->pages[0]);
4388 4388
4389 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 4389 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
4390 array_size = btrfs_super_sys_array_size(super_copy); 4390 array_size = btrfs_super_sys_array_size(super_copy);