aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-08-06 13:21:20 -0400
committerChris Mason <chris.mason@oracle.com>2012-03-26 16:50:37 -0400
commit727011e07cbdf87772fcc1999cccd15cc915eb62 (patch)
tree05405dc1e9c86d67dbb02ddf063bd0c137ce6707 /fs/btrfs/extent_io.c
parent81c9ad237c604adec79fd4d4034264c6669e0ab3 (diff)
Btrfs: allow metadata blocks larger than the page size
A few years ago the btrfs code to support blocks lager than the page size was disabled to fix a few corner cases in the page cache handling. This fixes the code to properly support large metadata blocks again. Since current kernels will crash early and often with larger metadata blocks, this adds an incompat bit so that older kernels can't mount it. This also does away with different blocksizes for nodes and leaves. You get a single block size for all tree blocks. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c144
1 files changed, 63 insertions, 81 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a55fbe6252de..c6c9ce463c86 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3548,26 +3548,7 @@ out:
3548inline struct page *extent_buffer_page(struct extent_buffer *eb, 3548inline struct page *extent_buffer_page(struct extent_buffer *eb,
3549 unsigned long i) 3549 unsigned long i)
3550{ 3550{
3551 struct page *p; 3551 return eb->pages[i];
3552 struct address_space *mapping;
3553
3554 if (i == 0)
3555 return eb->first_page;
3556 i += eb->start >> PAGE_CACHE_SHIFT;
3557 mapping = eb->first_page->mapping;
3558 if (!mapping)
3559 return NULL;
3560
3561 /*
3562 * extent_buffer_page is only called after pinning the page
3563 * by increasing the reference count. So we know the page must
3564 * be in the radix tree.
3565 */
3566 rcu_read_lock();
3567 p = radix_tree_lookup(&mapping->page_tree, i);
3568 rcu_read_unlock();
3569
3570 return p;
3571} 3552}
3572 3553
3573inline unsigned long num_extent_pages(u64 start, u64 len) 3554inline unsigned long num_extent_pages(u64 start, u64 len)
@@ -3576,6 +3557,19 @@ inline unsigned long num_extent_pages(u64 start, u64 len)
3576 (start >> PAGE_CACHE_SHIFT); 3557 (start >> PAGE_CACHE_SHIFT);
3577} 3558}
3578 3559
3560static void __free_extent_buffer(struct extent_buffer *eb)
3561{
3562#if LEAK_DEBUG
3563 unsigned long flags;
3564 spin_lock_irqsave(&leak_lock, flags);
3565 list_del(&eb->leak_list);
3566 spin_unlock_irqrestore(&leak_lock, flags);
3567#endif
3568 if (eb->pages && eb->pages != eb->inline_pages)
3569 kfree(eb->pages);
3570 kmem_cache_free(extent_buffer_cache, eb);
3571}
3572
3579static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 3573static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3580 u64 start, 3574 u64 start,
3581 unsigned long len, 3575 unsigned long len,
@@ -3608,21 +3602,25 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3608 spin_unlock_irqrestore(&leak_lock, flags); 3602 spin_unlock_irqrestore(&leak_lock, flags);
3609#endif 3603#endif
3610 atomic_set(&eb->refs, 1); 3604 atomic_set(&eb->refs, 1);
3605 atomic_set(&eb->pages_reading, 0);
3606
3607 if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
3608 struct page **pages;
3609 int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
3610 PAGE_CACHE_SHIFT;
3611 pages = kzalloc(num_pages, mask);
3612 if (!pages) {
3613 __free_extent_buffer(eb);
3614 return NULL;
3615 }
3616 eb->pages = pages;
3617 } else {
3618 eb->pages = eb->inline_pages;
3619 }
3611 3620
3612 return eb; 3621 return eb;
3613} 3622}
3614 3623
3615static void __free_extent_buffer(struct extent_buffer *eb)
3616{
3617#if LEAK_DEBUG
3618 unsigned long flags;
3619 spin_lock_irqsave(&leak_lock, flags);
3620 list_del(&eb->leak_list);
3621 spin_unlock_irqrestore(&leak_lock, flags);
3622#endif
3623 kmem_cache_free(extent_buffer_cache, eb);
3624}
3625
3626/* 3624/*
3627 * Helper for releasing extent buffer page. 3625 * Helper for releasing extent buffer page.
3628 */ 3626 */
@@ -3632,9 +3630,6 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3632 unsigned long index; 3630 unsigned long index;
3633 struct page *page; 3631 struct page *page;
3634 3632
3635 if (!eb->first_page)
3636 return;
3637
3638 index = num_extent_pages(eb->start, eb->len); 3633 index = num_extent_pages(eb->start, eb->len);
3639 if (start_idx >= index) 3634 if (start_idx >= index)
3640 return; 3635 return;
@@ -3657,8 +3652,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3657} 3652}
3658 3653
3659struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3654struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3660 u64 start, unsigned long len, 3655 u64 start, unsigned long len)
3661 struct page *page0)
3662{ 3656{
3663 unsigned long num_pages = num_extent_pages(start, len); 3657 unsigned long num_pages = num_extent_pages(start, len);
3664 unsigned long i; 3658 unsigned long i;
@@ -3674,7 +3668,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3674 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3668 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3675 if (eb && atomic_inc_not_zero(&eb->refs)) { 3669 if (eb && atomic_inc_not_zero(&eb->refs)) {
3676 rcu_read_unlock(); 3670 rcu_read_unlock();
3677 mark_page_accessed(eb->first_page); 3671 mark_page_accessed(eb->pages[0]);
3678 return eb; 3672 return eb;
3679 } 3673 }
3680 rcu_read_unlock(); 3674 rcu_read_unlock();
@@ -3683,32 +3677,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3683 if (!eb) 3677 if (!eb)
3684 return NULL; 3678 return NULL;
3685 3679
3686 if (page0) { 3680 for (i = 0; i < num_pages; i++, index++) {
3687 eb->first_page = page0;
3688 i = 1;
3689 index++;
3690 page_cache_get(page0);
3691 mark_page_accessed(page0);
3692 set_page_extent_mapped(page0);
3693 set_page_extent_head(page0, len);
3694 uptodate = PageUptodate(page0);
3695 } else {
3696 i = 0;
3697 }
3698 for (; i < num_pages; i++, index++) {
3699 p = find_or_create_page(mapping, index, GFP_NOFS); 3681 p = find_or_create_page(mapping, index, GFP_NOFS);
3700 if (!p) { 3682 if (!p) {
3701 WARN_ON(1); 3683 WARN_ON(1);
3702 goto free_eb; 3684 goto free_eb;
3703 } 3685 }
3704 set_page_extent_mapped(p);
3705 mark_page_accessed(p); 3686 mark_page_accessed(p);
3706 if (i == 0) { 3687 eb->pages[i] = p;
3707 eb->first_page = p;
3708 set_page_extent_head(p, len);
3709 } else {
3710 set_page_private(p, EXTENT_PAGE_PRIVATE);
3711 }
3712 if (!PageUptodate(p)) 3688 if (!PageUptodate(p))
3713 uptodate = 0; 3689 uptodate = 0;
3714 3690
@@ -3716,8 +3692,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3716 * see below about how we avoid a nasty race with release page 3692 * see below about how we avoid a nasty race with release page
3717 * and why we unlock later 3693 * and why we unlock later
3718 */ 3694 */
3719 if (i != 0)
3720 unlock_page(p);
3721 } 3695 }
3722 if (uptodate) 3696 if (uptodate)
3723 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3697 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3751,15 +3725,23 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3751 * after the extent buffer is in the radix tree so 3725 * after the extent buffer is in the radix tree so
3752 * it doesn't get lost 3726 * it doesn't get lost
3753 */ 3727 */
3754 set_page_extent_mapped(eb->first_page); 3728 set_page_extent_mapped(eb->pages[0]);
3755 set_page_extent_head(eb->first_page, eb->len); 3729 set_page_extent_head(eb->pages[0], eb->len);
3756 if (!page0) 3730 SetPageChecked(eb->pages[0]);
3757 unlock_page(eb->first_page); 3731 for (i = 1; i < num_pages; i++) {
3732 p = extent_buffer_page(eb, i);
3733 set_page_extent_mapped(p);
3734 ClearPageChecked(p);
3735 unlock_page(p);
3736 }
3737 unlock_page(eb->pages[0]);
3758 return eb; 3738 return eb;
3759 3739
3760free_eb: 3740free_eb:
3761 if (eb->first_page && !page0) 3741 for (i = 0; i < num_pages; i++) {
3762 unlock_page(eb->first_page); 3742 if (eb->pages[i])
3743 unlock_page(eb->pages[i]);
3744 }
3763 3745
3764 if (!atomic_dec_and_test(&eb->refs)) 3746 if (!atomic_dec_and_test(&eb->refs))
3765 return exists; 3747 return exists;
@@ -3776,7 +3758,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3776 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3758 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3777 if (eb && atomic_inc_not_zero(&eb->refs)) { 3759 if (eb && atomic_inc_not_zero(&eb->refs)) {
3778 rcu_read_unlock(); 3760 rcu_read_unlock();
3779 mark_page_accessed(eb->first_page); 3761 mark_page_accessed(eb->pages[0]);
3780 return eb; 3762 return eb;
3781 } 3763 }
3782 rcu_read_unlock(); 3764 rcu_read_unlock();
@@ -3981,8 +3963,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3981 int ret = 0; 3963 int ret = 0;
3982 int locked_pages = 0; 3964 int locked_pages = 0;
3983 int all_uptodate = 1; 3965 int all_uptodate = 1;
3984 int inc_all_pages = 0;
3985 unsigned long num_pages; 3966 unsigned long num_pages;
3967 unsigned long num_reads = 0;
3986 struct bio *bio = NULL; 3968 struct bio *bio = NULL;
3987 unsigned long bio_flags = 0; 3969 unsigned long bio_flags = 0;
3988 3970
@@ -4014,8 +3996,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4014 lock_page(page); 3996 lock_page(page);
4015 } 3997 }
4016 locked_pages++; 3998 locked_pages++;
4017 if (!PageUptodate(page)) 3999 if (!PageUptodate(page)) {
4000 num_reads++;
4018 all_uptodate = 0; 4001 all_uptodate = 0;
4002 }
4019 } 4003 }
4020 if (all_uptodate) { 4004 if (all_uptodate) {
4021 if (start_i == 0) 4005 if (start_i == 0)
@@ -4023,20 +4007,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4023 goto unlock_exit; 4007 goto unlock_exit;
4024 } 4008 }
4025 4009
4010 atomic_set(&eb->pages_reading, num_reads);
4026 for (i = start_i; i < num_pages; i++) { 4011 for (i = start_i; i < num_pages; i++) {
4027 page = extent_buffer_page(eb, i); 4012 page = extent_buffer_page(eb, i);
4028
4029 WARN_ON(!PagePrivate(page));
4030
4031 set_page_extent_mapped(page); 4013 set_page_extent_mapped(page);
4032 if (i == 0) 4014 if (i == 0)
4033 set_page_extent_head(page, eb->len); 4015 set_page_extent_head(page, eb->len);
4034
4035 if (inc_all_pages)
4036 page_cache_get(page);
4037 if (!PageUptodate(page)) { 4016 if (!PageUptodate(page)) {
4038 if (start_i == 0)
4039 inc_all_pages = 1;
4040 ClearPageError(page); 4017 ClearPageError(page);
4041 err = __extent_read_full_page(tree, page, 4018 err = __extent_read_full_page(tree, page,
4042 get_extent, &bio, 4019 get_extent, &bio,
@@ -4304,15 +4281,20 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
4304{ 4281{
4305 char *dst_kaddr = page_address(dst_page); 4282 char *dst_kaddr = page_address(dst_page);
4306 char *src_kaddr; 4283 char *src_kaddr;
4284 int must_memmove = 0;
4307 4285
4308 if (dst_page != src_page) { 4286 if (dst_page != src_page) {
4309 src_kaddr = page_address(src_page); 4287 src_kaddr = page_address(src_page);
4310 } else { 4288 } else {
4311 src_kaddr = dst_kaddr; 4289 src_kaddr = dst_kaddr;
4312 BUG_ON(areas_overlap(src_off, dst_off, len)); 4290 if (areas_overlap(src_off, dst_off, len))
4291 must_memmove = 1;
4313 } 4292 }
4314 4293
4315 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 4294 if (must_memmove)
4295 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
4296 else
4297 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
4316} 4298}
4317 4299
4318void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 4300void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
@@ -4382,7 +4364,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4382 "len %lu len %lu\n", dst_offset, len, dst->len); 4364 "len %lu len %lu\n", dst_offset, len, dst->len);
4383 BUG_ON(1); 4365 BUG_ON(1);
4384 } 4366 }
4385 if (!areas_overlap(src_offset, dst_offset, len)) { 4367 if (dst_offset < src_offset) {
4386 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4368 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
4387 return; 4369 return;
4388 } 4370 }
@@ -4429,7 +4411,8 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4429 return ret; 4411 return ret;
4430 } 4412 }
4431 4413
4432 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 4414 if (atomic_read(&eb->refs) > 1 ||
4415 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4433 ret = 0; 4416 ret = 0;
4434 goto out; 4417 goto out;
4435 } 4418 }
@@ -4442,7 +4425,6 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4442 ret = 0; 4425 ret = 0;
4443 goto out; 4426 goto out;
4444 } 4427 }
4445
4446 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4428 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
4447out: 4429out:
4448 spin_unlock(&tree->buffer_lock); 4430 spin_unlock(&tree->buffer_lock);