aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.c31
-rw-r--r--fs/btrfs/disk-io.c14
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/extent_io.c205
-rw-r--r--fs/btrfs/extent_io.h6
5 files changed, 201 insertions, 60 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0639a555e16e..74c03fb0ca1d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -156,10 +156,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
156{ 156{
157 struct extent_buffer *eb; 157 struct extent_buffer *eb;
158 158
159 rcu_read_lock(); 159 while (1) {
160 eb = rcu_dereference(root->node); 160 rcu_read_lock();
161 extent_buffer_get(eb); 161 eb = rcu_dereference(root->node);
162 rcu_read_unlock(); 162
163 /*
164 * RCU really hurts here, we could free up the root node because
165 * it was cow'ed but we may not get the new root node yet so do
166 * the inc_not_zero dance and if it doesn't work then
167 * synchronize_rcu and try again.
168 */
169 if (atomic_inc_not_zero(&eb->refs)) {
170 rcu_read_unlock();
171 break;
172 }
173 rcu_read_unlock();
174 synchronize_rcu();
175 }
163 return eb; 176 return eb;
164} 177}
165 178
@@ -504,7 +517,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
504 } 517 }
505 if (unlock_orig) 518 if (unlock_orig)
506 btrfs_tree_unlock(buf); 519 btrfs_tree_unlock(buf);
507 free_extent_buffer(buf); 520 free_extent_buffer_stale(buf);
508 btrfs_mark_buffer_dirty(cow); 521 btrfs_mark_buffer_dirty(cow);
509 *cow_ret = cow; 522 *cow_ret = cow;
510 return 0; 523 return 0;
@@ -959,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
959 root_sub_used(root, mid->len); 972 root_sub_used(root, mid->len);
960 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 973 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
961 /* once for the root ptr */ 974 /* once for the root ptr */
962 free_extent_buffer(mid); 975 free_extent_buffer_stale(mid);
963 return 0; 976 return 0;
964 } 977 }
965 if (btrfs_header_nritems(mid) > 978 if (btrfs_header_nritems(mid) >
@@ -1016,7 +1029,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1016 ret = wret; 1029 ret = wret;
1017 root_sub_used(root, right->len); 1030 root_sub_used(root, right->len);
1018 btrfs_free_tree_block(trans, root, right, 0, 1, 0); 1031 btrfs_free_tree_block(trans, root, right, 0, 1, 0);
1019 free_extent_buffer(right); 1032 free_extent_buffer_stale(right);
1020 right = NULL; 1033 right = NULL;
1021 } else { 1034 } else {
1022 struct btrfs_disk_key right_key; 1035 struct btrfs_disk_key right_key;
@@ -1056,7 +1069,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1056 ret = wret; 1069 ret = wret;
1057 root_sub_used(root, mid->len); 1070 root_sub_used(root, mid->len);
1058 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 1071 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
1059 free_extent_buffer(mid); 1072 free_extent_buffer_stale(mid);
1060 mid = NULL; 1073 mid = NULL;
1061 } else { 1074 } else {
1062 /* update the parent key to reflect our changes */ 1075 /* update the parent key to reflect our changes */
@@ -3781,7 +3794,9 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3781 3794
3782 root_sub_used(root, leaf->len); 3795 root_sub_used(root, leaf->len);
3783 3796
3797 extent_buffer_get(leaf);
3784 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); 3798 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
3799 free_extent_buffer_stale(leaf);
3785 return 0; 3800 return 0;
3786} 3801}
3787/* 3802/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bc88649cffb7..0ba055e03eb8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -923,16 +923,8 @@ static int btree_readpage(struct file *file, struct page *page)
923 923
924static int btree_releasepage(struct page *page, gfp_t gfp_flags) 924static int btree_releasepage(struct page *page, gfp_t gfp_flags)
925{ 925{
926 struct extent_map_tree *map;
927 struct extent_io_tree *tree;
928 int ret;
929
930 if (PageWriteback(page) || PageDirty(page)) 926 if (PageWriteback(page) || PageDirty(page))
931 return 0; 927 return 0;
932
933 tree = &BTRFS_I(page->mapping->host)->io_tree;
934 map = &BTRFS_I(page->mapping->host)->extent_tree;
935
936 /* 928 /*
937 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing 929 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
938 * slab allocation from alloc_extent_state down the callchain where 930 * slab allocation from alloc_extent_state down the callchain where
@@ -940,11 +932,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
940 */ 932 */
941 gfp_flags &= ~GFP_SLAB_BUG_MASK; 933 gfp_flags &= ~GFP_SLAB_BUG_MASK;
942 934
943 ret = try_release_extent_state(map, tree, page, gfp_flags); 935 return try_release_extent_buffer(page, gfp_flags);
944 if (!ret)
945 return 0;
946
947 return try_release_extent_buffer(tree, page);
948} 936}
949 937
950static void btree_invalidatepage(struct page *page, unsigned long offset) 938static void btree_invalidatepage(struct page *page, unsigned long offset)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9b7e7682fda0..1b831ac4c079 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5018,10 +5018,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5018 if (is_data) { 5018 if (is_data) {
5019 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 5019 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
5020 BUG_ON(ret); 5020 BUG_ON(ret);
5021 } else {
5022 invalidate_mapping_pages(info->btree_inode->i_mapping,
5023 bytenr >> PAGE_CACHE_SHIFT,
5024 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
5025 } 5021 }
5026 5022
5027 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 5023 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
@@ -6022,6 +6018,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
6022 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 6018 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
6023 btrfs_tree_lock(buf); 6019 btrfs_tree_lock(buf);
6024 clean_tree_block(trans, root, buf); 6020 clean_tree_block(trans, root, buf);
6021 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
6025 6022
6026 btrfs_set_lock_blocking(buf); 6023 btrfs_set_lock_blocking(buf);
6027 btrfs_set_buffer_uptodate(buf); 6024 btrfs_set_buffer_uptodate(buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0f74262911be..0ce14369920c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3607,6 +3607,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3607 list_add(&eb->leak_list, &buffers); 3607 list_add(&eb->leak_list, &buffers);
3608 spin_unlock_irqrestore(&leak_lock, flags); 3608 spin_unlock_irqrestore(&leak_lock, flags);
3609#endif 3609#endif
3610 spin_lock_init(&eb->refs_lock);
3610 atomic_set(&eb->refs, 1); 3611 atomic_set(&eb->refs, 1);
3611 atomic_set(&eb->pages_reading, 0); 3612 atomic_set(&eb->pages_reading, 0);
3612 3613
@@ -3654,6 +3655,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3654 */ 3655 */
3655 if (PagePrivate(page) && 3656 if (PagePrivate(page) &&
3656 page->private == (unsigned long)eb) { 3657 page->private == (unsigned long)eb) {
3658 BUG_ON(PageDirty(page));
3659 BUG_ON(PageWriteback(page));
3657 /* 3660 /*
3658 * We need to make sure we haven't be attached 3661 * We need to make sure we haven't be attached
3659 * to a new eb. 3662 * to a new eb.
@@ -3763,7 +3766,6 @@ again:
3763 if (!atomic_inc_not_zero(&exists->refs)) { 3766 if (!atomic_inc_not_zero(&exists->refs)) {
3764 spin_unlock(&tree->buffer_lock); 3767 spin_unlock(&tree->buffer_lock);
3765 radix_tree_preload_end(); 3768 radix_tree_preload_end();
3766 synchronize_rcu();
3767 exists = NULL; 3769 exists = NULL;
3768 goto again; 3770 goto again;
3769 } 3771 }
@@ -3772,7 +3774,10 @@ again:
3772 goto free_eb; 3774 goto free_eb;
3773 } 3775 }
3774 /* add one reference for the tree */ 3776 /* add one reference for the tree */
3777 spin_lock(&eb->refs_lock);
3775 atomic_inc(&eb->refs); 3778 atomic_inc(&eb->refs);
3779 set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
3780 spin_unlock(&eb->refs_lock);
3776 spin_unlock(&tree->buffer_lock); 3781 spin_unlock(&tree->buffer_lock);
3777 radix_tree_preload_end(); 3782 radix_tree_preload_end();
3778 3783
@@ -3823,15 +3828,143 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3823 return NULL; 3828 return NULL;
3824} 3829}
3825 3830
3831static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3832{
3833 struct extent_buffer *eb =
3834 container_of(head, struct extent_buffer, rcu_head);
3835
3836 __free_extent_buffer(eb);
3837}
3838
3839static int extent_buffer_under_io(struct extent_buffer *eb,
3840 struct page *locked_page)
3841{
3842 unsigned long num_pages, i;
3843
3844 num_pages = num_extent_pages(eb->start, eb->len);
3845 for (i = 0; i < num_pages; i++) {
3846 struct page *page = eb->pages[i];
3847 int need_unlock = 0;
3848
3849 if (!page)
3850 continue;
3851
3852 if (page != locked_page) {
3853 if (!trylock_page(page))
3854 return 1;
3855 need_unlock = 1;
3856 }
3857
3858 if (PageDirty(page) || PageWriteback(page)) {
3859 if (need_unlock)
3860 unlock_page(page);
3861 return 1;
3862 }
3863 if (need_unlock)
3864 unlock_page(page);
3865 }
3866
3867 return 0;
3868}
3869
3870/* Expects to have eb->eb_lock already held */
3871static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
3872{
3873 WARN_ON(atomic_read(&eb->refs) == 0);
3874 if (atomic_dec_and_test(&eb->refs)) {
3875 struct extent_io_tree *tree = eb->tree;
3876 int ret;
3877
3878 spin_unlock(&eb->refs_lock);
3879
3880 might_sleep_if(mask & __GFP_WAIT);
3881 ret = clear_extent_bit(tree, eb->start,
3882 eb->start + eb->len - 1, -1, 0, 0,
3883 NULL, mask);
3884 if (ret < 0) {
3885 unsigned long num_pages, i;
3886
3887 num_pages = num_extent_pages(eb->start, eb->len);
3888 /*
3889 * We failed to clear the state bits which likely means
3890 * ENOMEM, so just re-up the eb ref and continue, we
3891 * will get freed later on via releasepage or something
3892 * else and will be ok.
3893 */
3894 spin_lock(&eb->tree->mapping->private_lock);
3895 spin_lock(&eb->refs_lock);
3896 set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
3897 atomic_inc(&eb->refs);
3898
3899 /*
3900 * We may have started to reclaim the pages for a newly
3901 * allocated eb, make sure we own all of them again.
3902 */
3903 for (i = 0; i < num_pages; i++) {
3904 struct page *page = eb->pages[i];
3905
3906 if (!page) {
3907 WARN_ON(1);
3908 continue;
3909 }
3910
3911 BUG_ON(!PagePrivate(page));
3912 if (page->private != (unsigned long)eb) {
3913 ClearPagePrivate(page);
3914 page_cache_release(page);
3915 attach_extent_buffer_page(eb, page);
3916 }
3917 }
3918 spin_unlock(&eb->refs_lock);
3919 spin_unlock(&eb->tree->mapping->private_lock);
3920 return;
3921 }
3922
3923 spin_lock(&tree->buffer_lock);
3924 radix_tree_delete(&tree->buffer,
3925 eb->start >> PAGE_CACHE_SHIFT);
3926 spin_unlock(&tree->buffer_lock);
3927
3928 /* Should be safe to release our pages at this point */
3929 btrfs_release_extent_buffer_page(eb, 0);
3930
3931 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3932 return;
3933 }
3934 spin_unlock(&eb->refs_lock);
3935}
3936
3826void free_extent_buffer(struct extent_buffer *eb) 3937void free_extent_buffer(struct extent_buffer *eb)
3827{ 3938{
3828 if (!eb) 3939 if (!eb)
3829 return; 3940 return;
3830 3941
3831 if (!atomic_dec_and_test(&eb->refs)) 3942 spin_lock(&eb->refs_lock);
3943 if (atomic_read(&eb->refs) == 2 &&
3944 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
3945 !extent_buffer_under_io(eb, NULL) &&
3946 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
3947 atomic_dec(&eb->refs);
3948
3949 /*
3950 * I know this is terrible, but it's temporary until we stop tracking
3951 * the uptodate bits and such for the extent buffers.
3952 */
3953 release_extent_buffer(eb, GFP_ATOMIC);
3954}
3955
3956void free_extent_buffer_stale(struct extent_buffer *eb)
3957{
3958 if (!eb)
3832 return; 3959 return;
3833 3960
3834 WARN_ON(1); 3961 spin_lock(&eb->refs_lock);
3962 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
3963
3964 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb, NULL) &&
3965 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
3966 atomic_dec(&eb->refs);
3967 release_extent_buffer(eb, GFP_NOFS);
3835} 3968}
3836 3969
3837int clear_extent_buffer_dirty(struct extent_io_tree *tree, 3970int clear_extent_buffer_dirty(struct extent_io_tree *tree,
@@ -3874,6 +4007,7 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
3874 4007
3875 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 4008 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
3876 num_pages = num_extent_pages(eb->start, eb->len); 4009 num_pages = num_extent_pages(eb->start, eb->len);
4010 WARN_ON(atomic_read(&eb->refs) == 0);
3877 for (i = 0; i < num_pages; i++) 4011 for (i = 0; i < num_pages; i++)
3878 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 4012 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
3879 return was_dirty; 4013 return was_dirty;
@@ -4440,45 +4574,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4440 } 4574 }
4441} 4575}
4442 4576
4443static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) 4577int try_release_extent_buffer(struct page *page, gfp_t mask)
4444{
4445 struct extent_buffer *eb =
4446 container_of(head, struct extent_buffer, rcu_head);
4447
4448 __free_extent_buffer(eb);
4449}
4450
4451int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4452{ 4578{
4453 u64 start = page_offset(page); 4579 struct extent_buffer *eb;
4454 struct extent_buffer *eb = (struct extent_buffer *)page->private;
4455 int ret = 1;
4456 4580
4457 if (!PagePrivate(page) || !eb) 4581 /*
4582 * We need to make sure noboody is attaching this page to an eb right
4583 * now.
4584 */
4585 spin_lock(&page->mapping->private_lock);
4586 if (!PagePrivate(page)) {
4587 spin_unlock(&page->mapping->private_lock);
4458 return 1; 4588 return 1;
4589 }
4459 4590
4460 spin_lock(&tree->buffer_lock); 4591 eb = (struct extent_buffer *)page->private;
4461 if (atomic_read(&eb->refs) > 1 || 4592 BUG_ON(!eb);
4462 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 4593
4463 ret = 0; 4594 /*
4464 goto out; 4595 * This is a little awful but should be ok, we need to make sure that
4596 * the eb doesn't disappear out from under us while we're looking at
4597 * this page.
4598 */
4599 spin_lock(&eb->refs_lock);
4600 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb, page)) {
4601 spin_unlock(&eb->refs_lock);
4602 spin_unlock(&page->mapping->private_lock);
4603 return 0;
4465 } 4604 }
4605 spin_unlock(&page->mapping->private_lock);
4606
4607 if ((mask & GFP_NOFS) == GFP_NOFS)
4608 mask = GFP_NOFS;
4466 4609
4467 /* 4610 /*
4468 * set @eb->refs to 0 if it is already 1, and then release the @eb. 4611 * If tree ref isn't set then we know the ref on this eb is a real ref,
4469 * Or go back. 4612 * so just return, this page will likely be freed soon anyway.
4470 */ 4613 */
4471 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { 4614 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
4472 ret = 0; 4615 spin_unlock(&eb->refs_lock);
4473 goto out; 4616 return 0;
4474 } 4617 }
4475 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4618 release_extent_buffer(eb, mask);
4476 btrfs_release_extent_buffer_page(eb, 0);
4477out:
4478 spin_unlock(&tree->buffer_lock);
4479 4619
4480 /* at this point we can safely release the extent buffer */ 4620 return 1;
4481 if (atomic_read(&eb->refs) == 0)
4482 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4483 return ret;
4484} 4621}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 83e432da2e26..60628341f156 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -35,6 +35,8 @@
35#define EXTENT_BUFFER_DIRTY 2 35#define EXTENT_BUFFER_DIRTY 2
36#define EXTENT_BUFFER_CORRUPT 3 36#define EXTENT_BUFFER_CORRUPT 3
37#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ 37#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
38#define EXTENT_BUFFER_TREE_REF 5
39#define EXTENT_BUFFER_STALE 6
38 40
39/* these are flags for extent_clear_unlock_delalloc */ 41/* these are flags for extent_clear_unlock_delalloc */
40#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 42#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -128,6 +130,7 @@ struct extent_buffer {
128 unsigned long map_len; 130 unsigned long map_len;
129 unsigned long bflags; 131 unsigned long bflags;
130 struct extent_io_tree *tree; 132 struct extent_io_tree *tree;
133 spinlock_t refs_lock;
131 atomic_t refs; 134 atomic_t refs;
132 atomic_t pages_reading; 135 atomic_t pages_reading;
133 struct list_head leak_list; 136 struct list_head leak_list;
@@ -184,7 +187,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
184int try_release_extent_mapping(struct extent_map_tree *map, 187int try_release_extent_mapping(struct extent_map_tree *map,
185 struct extent_io_tree *tree, struct page *page, 188 struct extent_io_tree *tree, struct page *page,
186 gfp_t mask); 189 gfp_t mask);
187int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); 190int try_release_extent_buffer(struct page *page, gfp_t mask);
188int try_release_extent_state(struct extent_map_tree *map, 191int try_release_extent_state(struct extent_map_tree *map,
189 struct extent_io_tree *tree, struct page *page, 192 struct extent_io_tree *tree, struct page *page,
190 gfp_t mask); 193 gfp_t mask);
@@ -261,6 +264,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
261struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 264struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
262 u64 start, unsigned long len); 265 u64 start, unsigned long len);
263void free_extent_buffer(struct extent_buffer *eb); 266void free_extent_buffer(struct extent_buffer *eb);
267void free_extent_buffer_stale(struct extent_buffer *eb);
264#define WAIT_NONE 0 268#define WAIT_NONE 0
265#define WAIT_COMPLETE 1 269#define WAIT_COMPLETE 1
266#define WAIT_PAGE_LOCK 2 270#define WAIT_PAGE_LOCK 2