aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:50 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commite6dcd2dc9c489108648e2ed543315dd134d50a9a (patch)
treecddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs/btrfs/extent_io.c
parent77a41afb7d0dd0f27b6f2f1a5bc701929c7034de (diff)
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c52
1 files changed, 45 insertions, 7 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 40a5f53cb040..3f82a6e9ca4f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
793} 793}
794EXPORT_SYMBOL(set_extent_dirty); 794EXPORT_SYMBOL(set_extent_dirty);
795 795
796int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
797 gfp_t mask)
798{
799 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
800}
801EXPORT_SYMBOL(set_extent_ordered);
802
796int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 803int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
797 int bits, gfp_t mask) 804 int bits, gfp_t mask)
798{ 805{
@@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
812 gfp_t mask) 819 gfp_t mask)
813{ 820{
814 return set_extent_bit(tree, start, end, 821 return set_extent_bit(tree, start, end,
815 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 822 EXTENT_DELALLOC | EXTENT_DIRTY,
816 mask); 823 0, NULL, mask);
817} 824}
818EXPORT_SYMBOL(set_extent_delalloc); 825EXPORT_SYMBOL(set_extent_delalloc);
819 826
@@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
825} 832}
826EXPORT_SYMBOL(clear_extent_dirty); 833EXPORT_SYMBOL(clear_extent_dirty);
827 834
835int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
836 gfp_t mask)
837{
838 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
839}
840EXPORT_SYMBOL(clear_extent_ordered);
841
828int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 842int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
829 gfp_t mask) 843 gfp_t mask)
830{ 844{
@@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio,
1395 1409
1396 if (--bvec >= bio->bi_io_vec) 1410 if (--bvec >= bio->bi_io_vec)
1397 prefetchw(&bvec->bv_page->flags); 1411 prefetchw(&bvec->bv_page->flags);
1398
1399 if (tree->ops && tree->ops->writepage_end_io_hook) { 1412 if (tree->ops && tree->ops->writepage_end_io_hook) {
1400 ret = tree->ops->writepage_end_io_hook(page, start, 1413 ret = tree->ops->writepage_end_io_hook(page, start,
1401 end, state); 1414 end, state, uptodate);
1402 if (ret) 1415 if (ret)
1403 uptodate = 0; 1416 uptodate = 0;
1404 } 1417 }
@@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1868 unlock_extent(tree, cur, end, GFP_NOFS); 1881 unlock_extent(tree, cur, end, GFP_NOFS);
1869 break; 1882 break;
1870 } 1883 }
1871
1872 extent_offset = cur - em->start; 1884 extent_offset = cur - em->start;
1885 if (extent_map_end(em) <= cur) {
1886printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
1887 }
1873 BUG_ON(extent_map_end(em) <= cur); 1888 BUG_ON(extent_map_end(em) <= cur);
1889 if (end < cur) {
1890printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1891 }
1874 BUG_ON(end < cur); 1892 BUG_ON(end < cur);
1875 1893
1876 iosize = min(extent_map_end(em) - cur, end - cur + 1); 1894 iosize = min(extent_map_end(em) - cur, end - cur + 1);
@@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1976 u64 last_byte = i_size_read(inode); 1994 u64 last_byte = i_size_read(inode);
1977 u64 block_start; 1995 u64 block_start;
1978 u64 iosize; 1996 u64 iosize;
1997 u64 unlock_start;
1979 sector_t sector; 1998 sector_t sector;
1980 struct extent_map *em; 1999 struct extent_map *em;
1981 struct block_device *bdev; 2000 struct block_device *bdev;
@@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1988 u64 nr_delalloc; 2007 u64 nr_delalloc;
1989 u64 delalloc_end; 2008 u64 delalloc_end;
1990 2009
1991
1992 WARN_ON(!PageLocked(page)); 2010 WARN_ON(!PageLocked(page));
1993 page_offset = i_size & (PAGE_CACHE_SIZE - 1); 2011 page_offset = i_size & (PAGE_CACHE_SIZE - 1);
1994 if (page->index > end_index || 2012 if (page->index > end_index ||
@@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2030 delalloc_start = delalloc_end + 1; 2048 delalloc_start = delalloc_end + 1;
2031 } 2049 }
2032 lock_extent(tree, start, page_end, GFP_NOFS); 2050 lock_extent(tree, start, page_end, GFP_NOFS);
2051 unlock_start = start;
2033 2052
2034 end = page_end; 2053 end = page_end;
2035 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 2054 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
@@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2038 2057
2039 if (last_byte <= start) { 2058 if (last_byte <= start) {
2040 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2059 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2060 unlock_extent(tree, start, page_end, GFP_NOFS);
2061 if (tree->ops && tree->ops->writepage_end_io_hook)
2062 tree->ops->writepage_end_io_hook(page, start,
2063 page_end, NULL, 1);
2064 unlock_start = page_end + 1;
2041 goto done; 2065 goto done;
2042 } 2066 }
2043 2067
@@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2047 while (cur <= end) { 2071 while (cur <= end) {
2048 if (cur >= last_byte) { 2072 if (cur >= last_byte) {
2049 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 2073 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2074 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2075 if (tree->ops && tree->ops->writepage_end_io_hook)
2076 tree->ops->writepage_end_io_hook(page, cur,
2077 page_end, NULL, 1);
2078 unlock_start = page_end + 1;
2050 break; 2079 break;
2051 } 2080 }
2052 em = epd->get_extent(inode, page, page_offset, cur, 2081 em = epd->get_extent(inode, page, page_offset, cur,
@@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2071 block_start == EXTENT_MAP_INLINE) { 2100 block_start == EXTENT_MAP_INLINE) {
2072 clear_extent_dirty(tree, cur, 2101 clear_extent_dirty(tree, cur,
2073 cur + iosize - 1, GFP_NOFS); 2102 cur + iosize - 1, GFP_NOFS);
2103
2104 unlock_extent(tree, unlock_start, cur + iosize -1,
2105 GFP_NOFS);
2106 if (tree->ops && tree->ops->writepage_end_io_hook)
2107 tree->ops->writepage_end_io_hook(page, cur,
2108 cur + iosize - 1,
2109 NULL, 1);
2074 cur = cur + iosize; 2110 cur = cur + iosize;
2075 page_offset += iosize; 2111 page_offset += iosize;
2112 unlock_start = cur;
2076 continue; 2113 continue;
2077 } 2114 }
2078 2115
@@ -2119,7 +2156,8 @@ done:
2119 set_page_writeback(page); 2156 set_page_writeback(page);
2120 end_page_writeback(page); 2157 end_page_writeback(page);
2121 } 2158 }
2122 unlock_extent(tree, start, page_end, GFP_NOFS); 2159 if (unlock_start <= page_end)
2160 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2123 unlock_page(page); 2161 unlock_page(page);
2124 return 0; 2162 return 0;
2125} 2163}