diff options
author | Josef Bacik <josef@redhat.com> | 2012-03-09 16:01:49 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2012-03-26 16:51:08 -0400 |
commit | 3083ee2e18b701122a3b841db83448543a87a583 (patch) | |
tree | 0265021499da54e5b4667a041f8b82c6d1a8667e /fs/btrfs/extent_io.c | |
parent | 115391d2315239164e400a8259b26392afccf3bd (diff) |
Btrfs: introduce free_extent_buffer_stale
Because btrfs cow's we can end up with extent buffers that are no longer
necessary just sitting around in memory. So instead of evicting these pages, we
could end up evicting things we actually care about. Thus we have
free_extent_buffer_stale for use when we are freeing tree blocks. This will
make it so that the ref for the eb being in the radix tree is dropped as soon as
possible and then is freed when the refcount hits 0 instead of waiting to be
released by releasepage. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 205 |
1 files changed, 171 insertions, 34 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0f74262911be..0ce14369920c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3607,6 +3607,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3607 | list_add(&eb->leak_list, &buffers); | 3607 | list_add(&eb->leak_list, &buffers); |
3608 | spin_unlock_irqrestore(&leak_lock, flags); | 3608 | spin_unlock_irqrestore(&leak_lock, flags); |
3609 | #endif | 3609 | #endif |
3610 | spin_lock_init(&eb->refs_lock); | ||
3610 | atomic_set(&eb->refs, 1); | 3611 | atomic_set(&eb->refs, 1); |
3611 | atomic_set(&eb->pages_reading, 0); | 3612 | atomic_set(&eb->pages_reading, 0); |
3612 | 3613 | ||
@@ -3654,6 +3655,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
3654 | */ | 3655 | */ |
3655 | if (PagePrivate(page) && | 3656 | if (PagePrivate(page) && |
3656 | page->private == (unsigned long)eb) { | 3657 | page->private == (unsigned long)eb) { |
3658 | BUG_ON(PageDirty(page)); | ||
3659 | BUG_ON(PageWriteback(page)); | ||
3657 | /* | 3660 | /* |
3658 | * We need to make sure we haven't be attached | 3661 | * We need to make sure we haven't be attached |
3659 | * to a new eb. | 3662 | * to a new eb. |
@@ -3763,7 +3766,6 @@ again: | |||
3763 | if (!atomic_inc_not_zero(&exists->refs)) { | 3766 | if (!atomic_inc_not_zero(&exists->refs)) { |
3764 | spin_unlock(&tree->buffer_lock); | 3767 | spin_unlock(&tree->buffer_lock); |
3765 | radix_tree_preload_end(); | 3768 | radix_tree_preload_end(); |
3766 | synchronize_rcu(); | ||
3767 | exists = NULL; | 3769 | exists = NULL; |
3768 | goto again; | 3770 | goto again; |
3769 | } | 3771 | } |
@@ -3772,7 +3774,10 @@ again: | |||
3772 | goto free_eb; | 3774 | goto free_eb; |
3773 | } | 3775 | } |
3774 | /* add one reference for the tree */ | 3776 | /* add one reference for the tree */ |
3777 | spin_lock(&eb->refs_lock); | ||
3775 | atomic_inc(&eb->refs); | 3778 | atomic_inc(&eb->refs); |
3779 | set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags); | ||
3780 | spin_unlock(&eb->refs_lock); | ||
3776 | spin_unlock(&tree->buffer_lock); | 3781 | spin_unlock(&tree->buffer_lock); |
3777 | radix_tree_preload_end(); | 3782 | radix_tree_preload_end(); |
3778 | 3783 | ||
@@ -3823,15 +3828,143 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | |||
3823 | return NULL; | 3828 | return NULL; |
3824 | } | 3829 | } |
3825 | 3830 | ||
3831 | static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | ||
3832 | { | ||
3833 | struct extent_buffer *eb = | ||
3834 | container_of(head, struct extent_buffer, rcu_head); | ||
3835 | |||
3836 | __free_extent_buffer(eb); | ||
3837 | } | ||
3838 | |||
3839 | static int extent_buffer_under_io(struct extent_buffer *eb, | ||
3840 | struct page *locked_page) | ||
3841 | { | ||
3842 | unsigned long num_pages, i; | ||
3843 | |||
3844 | num_pages = num_extent_pages(eb->start, eb->len); | ||
3845 | for (i = 0; i < num_pages; i++) { | ||
3846 | struct page *page = eb->pages[i]; | ||
3847 | int need_unlock = 0; | ||
3848 | |||
3849 | if (!page) | ||
3850 | continue; | ||
3851 | |||
3852 | if (page != locked_page) { | ||
3853 | if (!trylock_page(page)) | ||
3854 | return 1; | ||
3855 | need_unlock = 1; | ||
3856 | } | ||
3857 | |||
3858 | if (PageDirty(page) || PageWriteback(page)) { | ||
3859 | if (need_unlock) | ||
3860 | unlock_page(page); | ||
3861 | return 1; | ||
3862 | } | ||
3863 | if (need_unlock) | ||
3864 | unlock_page(page); | ||
3865 | } | ||
3866 | |||
3867 | return 0; | ||
3868 | } | ||
3869 | |||
3870 | /* Expects to have eb->eb_lock already held */ | ||
3871 | static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | ||
3872 | { | ||
3873 | WARN_ON(atomic_read(&eb->refs) == 0); | ||
3874 | if (atomic_dec_and_test(&eb->refs)) { | ||
3875 | struct extent_io_tree *tree = eb->tree; | ||
3876 | int ret; | ||
3877 | |||
3878 | spin_unlock(&eb->refs_lock); | ||
3879 | |||
3880 | might_sleep_if(mask & __GFP_WAIT); | ||
3881 | ret = clear_extent_bit(tree, eb->start, | ||
3882 | eb->start + eb->len - 1, -1, 0, 0, | ||
3883 | NULL, mask); | ||
3884 | if (ret < 0) { | ||
3885 | unsigned long num_pages, i; | ||
3886 | |||
3887 | num_pages = num_extent_pages(eb->start, eb->len); | ||
3888 | /* | ||
3889 | * We failed to clear the state bits which likely means | ||
3890 | * ENOMEM, so just re-up the eb ref and continue, we | ||
3891 | * will get freed later on via releasepage or something | ||
3892 | * else and will be ok. | ||
3893 | */ | ||
3894 | spin_lock(&eb->tree->mapping->private_lock); | ||
3895 | spin_lock(&eb->refs_lock); | ||
3896 | set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags); | ||
3897 | atomic_inc(&eb->refs); | ||
3898 | |||
3899 | /* | ||
3900 | * We may have started to reclaim the pages for a newly | ||
3901 | * allocated eb, make sure we own all of them again. | ||
3902 | */ | ||
3903 | for (i = 0; i < num_pages; i++) { | ||
3904 | struct page *page = eb->pages[i]; | ||
3905 | |||
3906 | if (!page) { | ||
3907 | WARN_ON(1); | ||
3908 | continue; | ||
3909 | } | ||
3910 | |||
3911 | BUG_ON(!PagePrivate(page)); | ||
3912 | if (page->private != (unsigned long)eb) { | ||
3913 | ClearPagePrivate(page); | ||
3914 | page_cache_release(page); | ||
3915 | attach_extent_buffer_page(eb, page); | ||
3916 | } | ||
3917 | } | ||
3918 | spin_unlock(&eb->refs_lock); | ||
3919 | spin_unlock(&eb->tree->mapping->private_lock); | ||
3920 | return; | ||
3921 | } | ||
3922 | |||
3923 | spin_lock(&tree->buffer_lock); | ||
3924 | radix_tree_delete(&tree->buffer, | ||
3925 | eb->start >> PAGE_CACHE_SHIFT); | ||
3926 | spin_unlock(&tree->buffer_lock); | ||
3927 | |||
3928 | /* Should be safe to release our pages at this point */ | ||
3929 | btrfs_release_extent_buffer_page(eb, 0); | ||
3930 | |||
3931 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | ||
3932 | return; | ||
3933 | } | ||
3934 | spin_unlock(&eb->refs_lock); | ||
3935 | } | ||
3936 | |||
3826 | void free_extent_buffer(struct extent_buffer *eb) | 3937 | void free_extent_buffer(struct extent_buffer *eb) |
3827 | { | 3938 | { |
3828 | if (!eb) | 3939 | if (!eb) |
3829 | return; | 3940 | return; |
3830 | 3941 | ||
3831 | if (!atomic_dec_and_test(&eb->refs)) | 3942 | spin_lock(&eb->refs_lock); |
3943 | if (atomic_read(&eb->refs) == 2 && | ||
3944 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && | ||
3945 | !extent_buffer_under_io(eb, NULL) && | ||
3946 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | ||
3947 | atomic_dec(&eb->refs); | ||
3948 | |||
3949 | /* | ||
3950 | * I know this is terrible, but it's temporary until we stop tracking | ||
3951 | * the uptodate bits and such for the extent buffers. | ||
3952 | */ | ||
3953 | release_extent_buffer(eb, GFP_ATOMIC); | ||
3954 | } | ||
3955 | |||
3956 | void free_extent_buffer_stale(struct extent_buffer *eb) | ||
3957 | { | ||
3958 | if (!eb) | ||
3832 | return; | 3959 | return; |
3833 | 3960 | ||
3834 | WARN_ON(1); | 3961 | spin_lock(&eb->refs_lock); |
3962 | set_bit(EXTENT_BUFFER_STALE, &eb->bflags); | ||
3963 | |||
3964 | if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb, NULL) && | ||
3965 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | ||
3966 | atomic_dec(&eb->refs); | ||
3967 | release_extent_buffer(eb, GFP_NOFS); | ||
3835 | } | 3968 | } |
3836 | 3969 | ||
3837 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | 3970 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, |
@@ -3874,6 +4007,7 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3874 | 4007 | ||
3875 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | 4008 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); |
3876 | num_pages = num_extent_pages(eb->start, eb->len); | 4009 | num_pages = num_extent_pages(eb->start, eb->len); |
4010 | WARN_ON(atomic_read(&eb->refs) == 0); | ||
3877 | for (i = 0; i < num_pages; i++) | 4011 | for (i = 0; i < num_pages; i++) |
3878 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); | 4012 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); |
3879 | return was_dirty; | 4013 | return was_dirty; |
@@ -4440,45 +4574,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
4440 | } | 4574 | } |
4441 | } | 4575 | } |
4442 | 4576 | ||
4443 | static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | 4577 | int try_release_extent_buffer(struct page *page, gfp_t mask) |
4444 | { | ||
4445 | struct extent_buffer *eb = | ||
4446 | container_of(head, struct extent_buffer, rcu_head); | ||
4447 | |||
4448 | __free_extent_buffer(eb); | ||
4449 | } | ||
4450 | |||
4451 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | ||
4452 | { | 4578 | { |
4453 | u64 start = page_offset(page); | 4579 | struct extent_buffer *eb; |
4454 | struct extent_buffer *eb = (struct extent_buffer *)page->private; | ||
4455 | int ret = 1; | ||
4456 | 4580 | ||
4457 | if (!PagePrivate(page) || !eb) | 4581 | /* |
4582 | * We need to make sure noboody is attaching this page to an eb right | ||
4583 | * now. | ||
4584 | */ | ||
4585 | spin_lock(&page->mapping->private_lock); | ||
4586 | if (!PagePrivate(page)) { | ||
4587 | spin_unlock(&page->mapping->private_lock); | ||
4458 | return 1; | 4588 | return 1; |
4589 | } | ||
4459 | 4590 | ||
4460 | spin_lock(&tree->buffer_lock); | 4591 | eb = (struct extent_buffer *)page->private; |
4461 | if (atomic_read(&eb->refs) > 1 || | 4592 | BUG_ON(!eb); |
4462 | test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 4593 | |
4463 | ret = 0; | 4594 | /* |
4464 | goto out; | 4595 | * This is a little awful but should be ok, we need to make sure that |
4596 | * the eb doesn't disappear out from under us while we're looking at | ||
4597 | * this page. | ||
4598 | */ | ||
4599 | spin_lock(&eb->refs_lock); | ||
4600 | if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb, page)) { | ||
4601 | spin_unlock(&eb->refs_lock); | ||
4602 | spin_unlock(&page->mapping->private_lock); | ||
4603 | return 0; | ||
4465 | } | 4604 | } |
4605 | spin_unlock(&page->mapping->private_lock); | ||
4606 | |||
4607 | if ((mask & GFP_NOFS) == GFP_NOFS) | ||
4608 | mask = GFP_NOFS; | ||
4466 | 4609 | ||
4467 | /* | 4610 | /* |
4468 | * set @eb->refs to 0 if it is already 1, and then release the @eb. | 4611 | * If tree ref isn't set then we know the ref on this eb is a real ref, |
4469 | * Or go back. | 4612 | * so just return, this page will likely be freed soon anyway. |
4470 | */ | 4613 | */ |
4471 | if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { | 4614 | if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { |
4472 | ret = 0; | 4615 | spin_unlock(&eb->refs_lock); |
4473 | goto out; | 4616 | return 0; |
4474 | } | 4617 | } |
4475 | radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); | 4618 | release_extent_buffer(eb, mask); |
4476 | btrfs_release_extent_buffer_page(eb, 0); | ||
4477 | out: | ||
4478 | spin_unlock(&tree->buffer_lock); | ||
4479 | 4619 | ||
4480 | /* at this point we can safely release the extent buffer */ | 4620 | return 1; |
4481 | if (atomic_read(&eb->refs) == 0) | ||
4482 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | ||
4483 | return ret; | ||
4484 | } | 4621 | } |