aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-02 16:53:46 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-11 13:31:07 -0400
commit8b62b72b26bcd72082c4a69d179dd906bcc22200 (patch)
treeceee20dfebe45654cb3a25d8916c195836cdbabf /fs/btrfs/extent_io.c
parent9655d2982b53fdb38a9e0f2f11315b99b92d66e2 (diff)
Btrfs: Use PagePrivate2 to track pages in the data=ordered code.
Btrfs writes go through delalloc to the data=ordered code. This makes sure that all of the data is on disk before the metadata that references it. The tracking means that we have to make sure each page in an extent is fully written before we add that extent into the on-disk btree. This was done in the past by setting the EXTENT_ORDERED bit for the range of an extent when it was added to the data=ordered code, and then clearing the EXTENT_ORDERED bit in the extent state tree as each page finished IO. One of the reasons we had to do this was because sometimes pages are magically dirtied without page_mkwrite being called. The EXTENT_ORDERED bit is checked at writepage time, and if it isn't there, our page become dirty without going through the proper path. These bit operations make for a number of rbtree searches for each page, and can cause considerable lock contention. This commit switches from the EXTENT_ORDERED bit to use PagePrivate2. As pages go into the ordered code, PagePrivate2 is set on each one. This is a cheap operation because we already have all the pages locked and ready to go. As IO finishes, the PagePrivate2 bit is cleared and the ordered accoutning is updated for each page. At writepage time, if the PagePrivate2 bit is missing, we go into the writepage fixup code to handle improperly dirtied pages. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c29
1 files changed, 10 insertions, 19 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c9a438d374b6..a102422cd92e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -885,13 +885,6 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
885 NULL, mask); 885 NULL, mask);
886} 886}
887 887
888int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
889 gfp_t mask)
890{
891 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, NULL,
892 mask);
893}
894
895int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 888int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
896 int bits, gfp_t mask) 889 int bits, gfp_t mask)
897{ 890{
@@ -921,13 +914,6 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
921 NULL, mask); 914 NULL, mask);
922} 915}
923 916
924int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
925 gfp_t mask)
926{
927 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0,
928 NULL, mask);
929}
930
931int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 917int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
932 gfp_t mask) 918 gfp_t mask)
933{ 919{
@@ -1373,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1373 int clear_unlock, 1359 int clear_unlock,
1374 int clear_delalloc, int clear_dirty, 1360 int clear_delalloc, int clear_dirty,
1375 int set_writeback, 1361 int set_writeback,
1376 int end_writeback) 1362 int end_writeback,
1363 int set_private2)
1377{ 1364{
1378 int ret; 1365 int ret;
1379 struct page *pages[16]; 1366 struct page *pages[16];
@@ -1392,7 +1379,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1392 clear_bits |= EXTENT_DELALLOC; 1379 clear_bits |= EXTENT_DELALLOC;
1393 1380
1394 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1381 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1395 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1382 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
1383 set_private2))
1396 return 0; 1384 return 0;
1397 1385
1398 while (nr_pages > 0) { 1386 while (nr_pages > 0) {
@@ -1400,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1400 min_t(unsigned long, 1388 min_t(unsigned long,
1401 nr_pages, ARRAY_SIZE(pages)), pages); 1389 nr_pages, ARRAY_SIZE(pages)), pages);
1402 for (i = 0; i < ret; i++) { 1390 for (i = 0; i < ret; i++) {
1391
1392 if (set_private2)
1393 SetPagePrivate2(pages[i]);
1394
1403 if (pages[i] == locked_page) { 1395 if (pages[i] == locked_page) {
1404 page_cache_release(pages[i]); 1396 page_cache_release(pages[i]);
1405 continue; 1397 continue;
@@ -2792,7 +2784,7 @@ int try_release_extent_state(struct extent_map_tree *map,
2792 int ret = 1; 2784 int ret = 1;
2793 2785
2794 if (test_range_bit(tree, start, end, 2786 if (test_range_bit(tree, start, end,
2795 EXTENT_IOBITS | EXTENT_ORDERED, 0, NULL)) 2787 EXTENT_IOBITS, 0, NULL))
2796 ret = 0; 2788 ret = 0;
2797 else { 2789 else {
2798 if ((mask & GFP_NOFS) == GFP_NOFS) 2790 if ((mask & GFP_NOFS) == GFP_NOFS)
@@ -2835,8 +2827,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2835 } 2827 }
2836 if (!test_range_bit(tree, em->start, 2828 if (!test_range_bit(tree, em->start,
2837 extent_map_end(em) - 1, 2829 extent_map_end(em) - 1,
2838 EXTENT_LOCKED | EXTENT_WRITEBACK | 2830 EXTENT_LOCKED | EXTENT_WRITEBACK,
2839 EXTENT_ORDERED,
2840 0, NULL)) { 2831 0, NULL)) {
2841 remove_extent_mapping(map, em); 2832 remove_extent_mapping(map, em);
2842 /* once for the rb tree */ 2833 /* once for the rb tree */