aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-02 13:24:36 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-11 13:31:06 -0400
commit1edbb734b4e010974c41d2859d22a43d04f5f1cf (patch)
tree4f43aea677f7206707540dd8622fa4cac099057a
parente48c465bb366c0169f7908bfe62ae7080874ee7d (diff)
Btrfs: reduce CPU usage in the extent_state tree
Btrfs is currently mirroring some of the page state bits into its extent state tree. The goal behind this was to use it in supporting blocksizes other than the page size. But, we don't currently support that, and we're using quite a lot of CPU on the rb tree and its spin lock. This commit starts a series of cleanups to reduce the amount of work done in the extent state tree as part of each IO. This commit: * Adds the ability to lock an extent in the state tree and also set other bits. The idea is to do locking and delalloc in one call * Removes the EXTENT_WRITEBACK and EXTENT_DIRTY bits. Btrfs is using a combination of the page bits and the ordered write code for this instead. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent_io.c75
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c19
3 files changed, 28 insertions, 68 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8e168a457a37..7c70613eb72c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -654,25 +654,24 @@ static void set_state_bits(struct extent_io_tree *tree,
654} 654}
655 655
656/* 656/*
657 * set some bits on a range in the tree. This may require allocations 657 * set some bits on a range in the tree. This may require allocations or
658 * or sleeping, so the gfp mask is used to indicate what is allowed. 658 * sleeping, so the gfp mask is used to indicate what is allowed.
659 * 659 *
660 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 660 * If any of the exclusive bits are set, this will fail with -EEXIST if some
661 * range already has the desired bits set. The start of the existing 661 * part of the range already has the desired bits set. The start of the
662 * range is returned in failed_start in this case. 662 * existing range is returned in failed_start in this case.
663 * 663 *
664 * [start, end] is inclusive 664 * [start, end] is inclusive This takes the tree lock.
665 * This takes the tree lock.
666 */ 665 */
666
667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
668 int bits, int exclusive, u64 *failed_start, 668 int bits, int exclusive_bits, u64 *failed_start,
669 gfp_t mask) 669 gfp_t mask)
670{ 670{
671 struct extent_state *state; 671 struct extent_state *state;
672 struct extent_state *prealloc = NULL; 672 struct extent_state *prealloc = NULL;
673 struct rb_node *node; 673 struct rb_node *node;
674 int err = 0; 674 int err = 0;
675 int set;
676 u64 last_start; 675 u64 last_start;
677 u64 last_end; 676 u64 last_end;
678again: 677again:
@@ -707,8 +706,7 @@ hit_next:
707 */ 706 */
708 if (state->start == start && state->end <= end) { 707 if (state->start == start && state->end <= end) {
709 struct rb_node *next_node; 708 struct rb_node *next_node;
710 set = state->state & bits; 709 if (state->state & exclusive_bits) {
711 if (set && exclusive) {
712 *failed_start = state->start; 710 *failed_start = state->start;
713 err = -EEXIST; 711 err = -EEXIST;
714 goto out; 712 goto out;
@@ -748,8 +746,7 @@ hit_next:
748 * desired bit on it. 746 * desired bit on it.
749 */ 747 */
750 if (state->start < start) { 748 if (state->start < start) {
751 set = state->state & bits; 749 if (state->state & exclusive_bits) {
752 if (exclusive && set) {
753 *failed_start = start; 750 *failed_start = start;
754 err = -EEXIST; 751 err = -EEXIST;
755 goto out; 752 goto out;
@@ -799,8 +796,7 @@ hit_next:
799 * on the first half 796 * on the first half
800 */ 797 */
801 if (state->start <= end && state->end > end) { 798 if (state->start <= end && state->end > end) {
802 set = state->state & bits; 799 if (state->state & exclusive_bits) {
803 if (exclusive && set) {
804 *failed_start = start; 800 *failed_start = start;
805 err = -EEXIST; 801 err = -EEXIST;
806 goto out; 802 goto out;
@@ -906,19 +902,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
906 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 902 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
907} 903}
908 904
909static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
910 gfp_t mask)
911{
912 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
913 0, NULL, mask);
914}
915
916static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
917 u64 end, gfp_t mask)
918{
919 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
920}
921
922int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 905int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
923{ 906{
924 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 907 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
@@ -928,13 +911,14 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
928 * either insert or lock state struct between start and end use mask to tell 911 * either insert or lock state struct between start and end use mask to tell
929 * us if waiting is desired. 912 * us if waiting is desired.
930 */ 913 */
931int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 914int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
915 int bits, gfp_t mask)
932{ 916{
933 int err; 917 int err;
934 u64 failed_start; 918 u64 failed_start;
935 while (1) { 919 while (1) {
936 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 920 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
937 &failed_start, mask); 921 EXTENT_LOCKED, &failed_start, mask);
938 if (err == -EEXIST && (mask & __GFP_WAIT)) { 922 if (err == -EEXIST && (mask & __GFP_WAIT)) {
939 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 923 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
940 start = failed_start; 924 start = failed_start;
@@ -946,6 +930,11 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
946 return err; 930 return err;
947} 931}
948 932
933int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
934{
935 return lock_extent_bits(tree, start, end, 0, mask);
936}
937
949int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 938int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
950 gfp_t mask) 939 gfp_t mask)
951{ 940{
@@ -985,7 +974,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
985 page_cache_release(page); 974 page_cache_release(page);
986 index++; 975 index++;
987 } 976 }
988 set_extent_dirty(tree, start, end, GFP_NOFS);
989 return 0; 977 return 0;
990} 978}
991 979
@@ -1005,7 +993,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1005 page_cache_release(page); 993 page_cache_release(page);
1006 index++; 994 index++;
1007 } 995 }
1008 set_extent_writeback(tree, start, end, GFP_NOFS);
1009 return 0; 996 return 0;
1010} 997}
1011 998
@@ -1563,10 +1550,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1563static int check_page_writeback(struct extent_io_tree *tree, 1550static int check_page_writeback(struct extent_io_tree *tree,
1564 struct page *page) 1551 struct page *page)
1565{ 1552{
1566 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1553 end_page_writeback(page);
1567 u64 end = start + PAGE_CACHE_SIZE - 1;
1568 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1569 end_page_writeback(page);
1570 return 0; 1554 return 0;
1571} 1555}
1572 1556
@@ -1624,13 +1608,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1624 } 1608 }
1625 1609
1626 if (!uptodate) { 1610 if (!uptodate) {
1627 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1611 clear_extent_uptodate(tree, start, end, GFP_NOFS);
1628 ClearPageUptodate(page); 1612 ClearPageUptodate(page);
1629 SetPageError(page); 1613 SetPageError(page);
1630 } 1614 }
1631 1615
1632 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1633
1634 if (whole_page) 1616 if (whole_page)
1635 end_page_writeback(page); 1617 end_page_writeback(page);
1636 else 1618 else
@@ -2208,8 +2190,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2208 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); 2190 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
2209 2191
2210 if (last_byte <= start) { 2192 if (last_byte <= start) {
2211 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2193 clear_extent_bit(tree, start, page_end,
2212 unlock_extent(tree, start, page_end, GFP_NOFS); 2194 EXTENT_LOCKED | EXTENT_DIRTY,
2195 1, 0, GFP_NOFS);
2213 if (tree->ops && tree->ops->writepage_end_io_hook) 2196 if (tree->ops && tree->ops->writepage_end_io_hook)
2214 tree->ops->writepage_end_io_hook(page, start, 2197 tree->ops->writepage_end_io_hook(page, start,
2215 page_end, NULL, 1); 2198 page_end, NULL, 1);
@@ -2217,12 +2200,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2217 goto done; 2200 goto done;
2218 } 2201 }
2219 2202
2220 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2221 blocksize = inode->i_sb->s_blocksize; 2203 blocksize = inode->i_sb->s_blocksize;
2222 2204
2223 while (cur <= end) { 2205 while (cur <= end) {
2224 if (cur >= last_byte) { 2206 if (cur >= last_byte) {
2225 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2226 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2207 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2227 if (tree->ops && tree->ops->writepage_end_io_hook) 2208 if (tree->ops && tree->ops->writepage_end_io_hook)
2228 tree->ops->writepage_end_io_hook(page, cur, 2209 tree->ops->writepage_end_io_hook(page, cur,
@@ -2255,9 +2236,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2255 */ 2236 */
2256 if (compressed || block_start == EXTENT_MAP_HOLE || 2237 if (compressed || block_start == EXTENT_MAP_HOLE ||
2257 block_start == EXTENT_MAP_INLINE) { 2238 block_start == EXTENT_MAP_INLINE) {
2258 clear_extent_dirty(tree, cur,
2259 cur + iosize - 1, GFP_NOFS);
2260
2261 unlock_extent(tree, unlock_start, cur + iosize - 1, 2239 unlock_extent(tree, unlock_start, cur + iosize - 1,
2262 GFP_NOFS); 2240 GFP_NOFS);
2263 2241
@@ -2291,7 +2269,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2291 continue; 2269 continue;
2292 } 2270 }
2293 2271
2294 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2295 if (tree->ops && tree->ops->writepage_io_hook) { 2272 if (tree->ops && tree->ops->writepage_io_hook) {
2296 ret = tree->ops->writepage_io_hook(page, cur, 2273 ret = tree->ops->writepage_io_hook(page, cur,
2297 cur + iosize - 1); 2274 cur + iosize - 1);
@@ -2619,7 +2596,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2619 return 0; 2596 return 0;
2620 2597
2621 lock_extent(tree, start, end, GFP_NOFS); 2598 lock_extent(tree, start, end, GFP_NOFS);
2622 wait_on_extent_writeback(tree, start, end); 2599 wait_on_page_writeback(page);
2623 clear_extent_bit(tree, start, end, 2600 clear_extent_bit(tree, start, end,
2624 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2601 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2625 1, 1, GFP_NOFS); 2602 1, 1, GFP_NOFS);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5bc20abf3f3d..88d134d01fbc 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -142,6 +142,8 @@ int try_release_extent_state(struct extent_map_tree *map,
142 struct extent_io_tree *tree, struct page *page, 142 struct extent_io_tree *tree, struct page *page,
143 gfp_t mask); 143 gfp_t mask);
144int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 144int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
145int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
146 int bits, gfp_t mask);
145int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 147int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
146int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 148int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
147 gfp_t mask); 149 gfp_t mask);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8a9c76aecdf3..ef66c3d989b9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -113,8 +113,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
113 int err = 0; 113 int err = 0;
114 int i; 114 int i;
115 struct inode *inode = fdentry(file)->d_inode; 115 struct inode *inode = fdentry(file)->d_inode;
116 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
117 u64 hint_byte;
118 u64 num_bytes; 116 u64 num_bytes;
119 u64 start_pos; 117 u64 start_pos;
120 u64 end_of_last_block; 118 u64 end_of_last_block;
@@ -126,20 +124,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
126 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 124 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
127 125
128 end_of_last_block = start_pos + num_bytes - 1; 126 end_of_last_block = start_pos + num_bytes - 1;
129
130 lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
131 trans = btrfs_join_transaction(root, 1);
132 if (!trans) {
133 err = -ENOMEM;
134 goto out_unlock;
135 }
136 btrfs_set_trans_block_group(trans, inode);
137 hint_byte = 0;
138
139 /* check for reserved extents on each page, we don't want
140 * to reset the delalloc bit on things that already have
141 * extents reserved.
142 */
143 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 127 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
144 for (i = 0; i < num_pages; i++) { 128 for (i = 0; i < num_pages; i++) {
145 struct page *p = pages[i]; 129 struct page *p = pages[i];
@@ -154,9 +138,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
154 * at this time. 138 * at this time.
155 */ 139 */
156 } 140 }
157 err = btrfs_end_transaction(trans, root);
158out_unlock:
159 unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
160 return err; 141 return err;
161} 142}
162 143