aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c411
1 files changed, 323 insertions, 88 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 563b2d12f4f2..314041fdfa43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -30,6 +30,7 @@ static struct kmem_cache *extent_buffer_cache;
30static LIST_HEAD(buffers); 30static LIST_HEAD(buffers);
31static LIST_HEAD(states); 31static LIST_HEAD(states);
32 32
33#define LEAK_DEBUG 1
33#ifdef LEAK_DEBUG 34#ifdef LEAK_DEBUG
34static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; 35static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
35#endif 36#endif
@@ -1067,8 +1068,8 @@ EXPORT_SYMBOL(find_first_extent_bit_state);
1067 * 1068 *
1068 * 1 is returned if we find something, 0 if nothing was in the tree 1069 * 1 is returned if we find something, 0 if nothing was in the tree
1069 */ 1070 */
1070static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, 1071static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1071 u64 *start, u64 *end, u64 max_bytes) 1072 u64 *start, u64 *end, u64 max_bytes)
1072{ 1073{
1073 struct rb_node *node; 1074 struct rb_node *node;
1074 struct extent_state *state; 1075 struct extent_state *state;
@@ -1077,11 +1078,11 @@ static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree,
1077 u64 total_bytes = 0; 1078 u64 total_bytes = 0;
1078 1079
1079 spin_lock_irq(&tree->lock); 1080 spin_lock_irq(&tree->lock);
1081
1080 /* 1082 /*
1081 * this search will find all the extents that end after 1083 * this search will find all the extents that end after
1082 * our range starts. 1084 * our range starts.
1083 */ 1085 */
1084search_again:
1085 node = tree_search(tree, cur_start); 1086 node = tree_search(tree, cur_start);
1086 if (!node) { 1087 if (!node) {
1087 if (!found) 1088 if (!found)
@@ -1100,40 +1101,6 @@ search_again:
1100 *end = state->end; 1101 *end = state->end;
1101 goto out; 1102 goto out;
1102 } 1103 }
1103 if (!found && !(state->state & EXTENT_BOUNDARY)) {
1104 struct extent_state *prev_state;
1105 struct rb_node *prev_node = node;
1106 while(1) {
1107 prev_node = rb_prev(prev_node);
1108 if (!prev_node)
1109 break;
1110 prev_state = rb_entry(prev_node,
1111 struct extent_state,
1112 rb_node);
1113 if ((prev_state->end + 1 != state->start) ||
1114 !(prev_state->state & EXTENT_DELALLOC))
1115 break;
1116 if ((cur_start - prev_state->start) * 2 >
1117 max_bytes)
1118 break;
1119 state = prev_state;
1120 node = prev_node;
1121 }
1122 }
1123 if (state->state & EXTENT_LOCKED) {
1124 DEFINE_WAIT(wait);
1125 atomic_inc(&state->refs);
1126 prepare_to_wait(&state->wq, &wait,
1127 TASK_UNINTERRUPTIBLE);
1128 spin_unlock_irq(&tree->lock);
1129 schedule();
1130 spin_lock_irq(&tree->lock);
1131 finish_wait(&state->wq, &wait);
1132 free_extent_state(state);
1133 goto search_again;
1134 }
1135 set_state_cb(tree, state, EXTENT_LOCKED);
1136 state->state |= EXTENT_LOCKED;
1137 if (!found) 1104 if (!found)
1138 *start = state->start; 1105 *start = state->start;
1139 found++; 1106 found++;
@@ -1151,6 +1118,208 @@ out:
1151 return found; 1118 return found;
1152} 1119}
1153 1120
1121static noinline int __unlock_for_delalloc(struct inode *inode,
1122 struct page *locked_page,
1123 u64 start, u64 end)
1124{
1125 int ret;
1126 struct page *pages[16];
1127 unsigned long index = start >> PAGE_CACHE_SHIFT;
1128 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1129 unsigned long nr_pages = end_index - index + 1;
1130 int i;
1131
1132 if (index == locked_page->index && end_index == index)
1133 return 0;
1134
1135 while(nr_pages > 0) {
1136 ret = find_get_pages_contig(inode->i_mapping, index,
1137 min(nr_pages, ARRAY_SIZE(pages)), pages);
1138 for (i = 0; i < ret; i++) {
1139 if (pages[i] != locked_page)
1140 unlock_page(pages[i]);
1141 page_cache_release(pages[i]);
1142 }
1143 nr_pages -= ret;
1144 index += ret;
1145 cond_resched();
1146 }
1147 return 0;
1148}
1149
1150static noinline int lock_delalloc_pages(struct inode *inode,
1151 struct page *locked_page,
1152 u64 delalloc_start,
1153 u64 delalloc_end)
1154{
1155 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1156 unsigned long start_index = index;
1157 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1158 unsigned long pages_locked = 0;
1159 struct page *pages[16];
1160 unsigned long nrpages;
1161 int ret;
1162 int i;
1163
1164 /* the caller is responsible for locking the start index */
1165 if (index == locked_page->index && index == end_index)
1166 return 0;
1167
1168 /* skip the page at the start index */
1169 nrpages = end_index - index + 1;
1170 while(nrpages > 0) {
1171 ret = find_get_pages_contig(inode->i_mapping, index,
1172 min(nrpages, ARRAY_SIZE(pages)), pages);
1173 if (ret == 0) {
1174 ret = -EAGAIN;
1175 goto done;
1176 }
1177 /* now we have an array of pages, lock them all */
1178 for (i = 0; i < ret; i++) {
1179 /*
1180 * the caller is taking responsibility for
1181 * locked_page
1182 */
1183 if (pages[i] != locked_page)
1184 lock_page(pages[i]);
1185 page_cache_release(pages[i]);
1186 }
1187 pages_locked += ret;
1188 nrpages -= ret;
1189 index += ret;
1190 cond_resched();
1191 }
1192 ret = 0;
1193done:
1194 if (ret && pages_locked) {
1195 __unlock_for_delalloc(inode, locked_page,
1196 delalloc_start,
1197 ((u64)(start_index + pages_locked - 1)) <<
1198 PAGE_CACHE_SHIFT);
1199 }
1200 return ret;
1201}
1202
1203/*
1204 * find a contiguous range of bytes in the file marked as delalloc, not
1205 * more than 'max_bytes'. start and end are used to return the range,
1206 *
1207 * 1 is returned if we find something, 0 if nothing was in the tree
1208 */
1209static noinline u64 find_lock_delalloc_range(struct inode *inode,
1210 struct extent_io_tree *tree,
1211 struct page *locked_page,
1212 u64 *start, u64 *end,
1213 u64 max_bytes)
1214{
1215 u64 delalloc_start;
1216 u64 delalloc_end;
1217 u64 found;
1218 int ret;
1219 int loops = 0;
1220
1221again:
1222 /* step one, find a bunch of delalloc bytes starting at start */
1223 delalloc_start = *start;
1224 delalloc_end = 0;
1225 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1226 max_bytes);
1227 if (!found) {
1228 *start = delalloc_start;
1229 *end = delalloc_end;
1230 return found;
1231 }
1232
1233 /*
1234 * make sure to limit the number of pages we try to lock down
1235 * if we're looping.
1236 */
1237 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) {
1238 delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) &
1239 ~((u64)PAGE_CACHE_SIZE - 1);
1240 }
1241 /* step two, lock all the pages after the page that has start */
1242 ret = lock_delalloc_pages(inode, locked_page,
1243 delalloc_start, delalloc_end);
1244 if (ret == -EAGAIN) {
1245 /* some of the pages are gone, lets avoid looping by
1246 * shortening the size of the delalloc range we're searching
1247 */
1248 if (!loops) {
1249 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1250 max_bytes = PAGE_CACHE_SIZE - offset;
1251 loops = 1;
1252 goto again;
1253 } else {
1254 found = 0;
1255 goto out_failed;
1256 }
1257 }
1258 BUG_ON(ret);
1259
1260 /* step three, lock the state bits for the whole range */
1261 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1262
1263 /* then test to make sure it is all still delalloc */
1264 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1265 EXTENT_DELALLOC, 1);
1266 if (!ret) {
1267 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1268 __unlock_for_delalloc(inode, locked_page,
1269 delalloc_start, delalloc_end);
1270 cond_resched();
1271 goto again;
1272 }
1273 *start = delalloc_start;
1274 *end = delalloc_end;
1275out_failed:
1276 return found;
1277}
1278
1279int extent_clear_unlock_delalloc(struct inode *inode,
1280 struct extent_io_tree *tree,
1281 u64 start, u64 end, struct page *locked_page,
1282 int clear_dirty, int set_writeback,
1283 int end_writeback)
1284{
1285 int ret;
1286 struct page *pages[16];
1287 unsigned long index = start >> PAGE_CACHE_SHIFT;
1288 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1289 unsigned long nr_pages = end_index - index + 1;
1290 int i;
1291 int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC;
1292
1293 if (clear_dirty)
1294 clear_bits |= EXTENT_DIRTY;
1295
1296 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
1297
1298 while(nr_pages > 0) {
1299 ret = find_get_pages_contig(inode->i_mapping, index,
1300 min(nr_pages, ARRAY_SIZE(pages)), pages);
1301 for (i = 0; i < ret; i++) {
1302 if (pages[i] == locked_page) {
1303 page_cache_release(pages[i]);
1304 continue;
1305 }
1306 if (clear_dirty)
1307 clear_page_dirty_for_io(pages[i]);
1308 if (set_writeback)
1309 set_page_writeback(pages[i]);
1310 if (end_writeback)
1311 end_page_writeback(pages[i]);
1312 unlock_page(pages[i]);
1313 page_cache_release(pages[i]);
1314 }
1315 nr_pages -= ret;
1316 index += ret;
1317 cond_resched();
1318 }
1319 return 0;
1320}
1321EXPORT_SYMBOL(extent_clear_unlock_delalloc);
1322
1154/* 1323/*
1155 * count the number of bytes in the tree that have a given bit(s) 1324 * count the number of bytes in the tree that have a given bit(s)
1156 * set. This can be fairly slow, except for EXTENT_DIRTY which is 1325 * set. This can be fairly slow, except for EXTENT_DIRTY which is
@@ -1631,38 +1800,26 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1631 return bio; 1800 return bio;
1632} 1801}
1633 1802
1634static int submit_one_bio(int rw, struct bio *bio, int mirror_num) 1803static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1804 unsigned long bio_flags)
1635{ 1805{
1636 int ret = 0; 1806 int ret = 0;
1637 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1807 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1638 struct page *page = bvec->bv_page; 1808 struct page *page = bvec->bv_page;
1639 struct extent_io_tree *tree = bio->bi_private; 1809 struct extent_io_tree *tree = bio->bi_private;
1640 struct rb_node *node;
1641 struct extent_state *state;
1642 u64 start; 1810 u64 start;
1643 u64 end; 1811 u64 end;
1644 1812
1645 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1813 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1646 end = start + bvec->bv_len - 1; 1814 end = start + bvec->bv_len - 1;
1647 1815
1648 spin_lock_irq(&tree->lock);
1649 node = __etree_search(tree, start, NULL, NULL);
1650 BUG_ON(!node);
1651 state = rb_entry(node, struct extent_state, rb_node);
1652 while(state->end < end) {
1653 node = rb_next(node);
1654 state = rb_entry(node, struct extent_state, rb_node);
1655 }
1656 BUG_ON(state->end != end);
1657 spin_unlock_irq(&tree->lock);
1658
1659 bio->bi_private = NULL; 1816 bio->bi_private = NULL;
1660 1817
1661 bio_get(bio); 1818 bio_get(bio);
1662 1819
1663 if (tree->ops && tree->ops->submit_bio_hook) 1820 if (tree->ops && tree->ops->submit_bio_hook)
1664 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1821 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1665 mirror_num); 1822 mirror_num, bio_flags);
1666 else 1823 else
1667 submit_bio(rw, bio); 1824 submit_bio(rw, bio);
1668 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1825 if (bio_flagged(bio, BIO_EOPNOTSUPP))
@@ -1678,39 +1835,56 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1678 struct bio **bio_ret, 1835 struct bio **bio_ret,
1679 unsigned long max_pages, 1836 unsigned long max_pages,
1680 bio_end_io_t end_io_func, 1837 bio_end_io_t end_io_func,
1681 int mirror_num) 1838 int mirror_num,
1839 unsigned long prev_bio_flags,
1840 unsigned long bio_flags)
1682{ 1841{
1683 int ret = 0; 1842 int ret = 0;
1684 struct bio *bio; 1843 struct bio *bio;
1685 int nr; 1844 int nr;
1845 int contig = 0;
1846 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
1847 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
1848 size_t page_size = min(size, PAGE_CACHE_SIZE);
1686 1849
1687 if (bio_ret && *bio_ret) { 1850 if (bio_ret && *bio_ret) {
1688 bio = *bio_ret; 1851 bio = *bio_ret;
1689 if (bio->bi_sector + (bio->bi_size >> 9) != sector || 1852 if (old_compressed)
1853 contig = bio->bi_sector == sector;
1854 else
1855 contig = bio->bi_sector + (bio->bi_size >> 9) ==
1856 sector;
1857
1858 if (prev_bio_flags != bio_flags || !contig ||
1690 (tree->ops && tree->ops->merge_bio_hook && 1859 (tree->ops && tree->ops->merge_bio_hook &&
1691 tree->ops->merge_bio_hook(page, offset, size, bio)) || 1860 tree->ops->merge_bio_hook(page, offset, page_size, bio,
1692 bio_add_page(bio, page, size, offset) < size) { 1861 bio_flags)) ||
1693 ret = submit_one_bio(rw, bio, mirror_num); 1862 bio_add_page(bio, page, page_size, offset) < page_size) {
1863 ret = submit_one_bio(rw, bio, mirror_num,
1864 prev_bio_flags);
1694 bio = NULL; 1865 bio = NULL;
1695 } else { 1866 } else {
1696 return 0; 1867 return 0;
1697 } 1868 }
1698 } 1869 }
1699 nr = bio_get_nr_vecs(bdev); 1870 if (this_compressed)
1871 nr = BIO_MAX_PAGES;
1872 else
1873 nr = bio_get_nr_vecs(bdev);
1874
1700 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1875 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1701 if (!bio) { 1876 if (!bio) {
1702 printk("failed to allocate bio nr %d\n", nr); 1877 printk("failed to allocate bio nr %d\n", nr);
1703 } 1878 }
1704 1879
1705 1880 bio_add_page(bio, page, page_size, offset);
1706 bio_add_page(bio, page, size, offset);
1707 bio->bi_end_io = end_io_func; 1881 bio->bi_end_io = end_io_func;
1708 bio->bi_private = tree; 1882 bio->bi_private = tree;
1709 1883
1710 if (bio_ret) { 1884 if (bio_ret) {
1711 *bio_ret = bio; 1885 *bio_ret = bio;
1712 } else { 1886 } else {
1713 ret = submit_one_bio(rw, bio, mirror_num); 1887 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
1714 } 1888 }
1715 1889
1716 return ret; 1890 return ret;
@@ -1738,7 +1912,8 @@ void set_page_extent_head(struct page *page, unsigned long len)
1738static int __extent_read_full_page(struct extent_io_tree *tree, 1912static int __extent_read_full_page(struct extent_io_tree *tree,
1739 struct page *page, 1913 struct page *page,
1740 get_extent_t *get_extent, 1914 get_extent_t *get_extent,
1741 struct bio **bio, int mirror_num) 1915 struct bio **bio, int mirror_num,
1916 unsigned long *bio_flags)
1742{ 1917{
1743 struct inode *inode = page->mapping->host; 1918 struct inode *inode = page->mapping->host;
1744 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1919 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
@@ -1756,13 +1931,27 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1756 int nr = 0; 1931 int nr = 0;
1757 size_t page_offset = 0; 1932 size_t page_offset = 0;
1758 size_t iosize; 1933 size_t iosize;
1934 size_t disk_io_size;
1759 size_t blocksize = inode->i_sb->s_blocksize; 1935 size_t blocksize = inode->i_sb->s_blocksize;
1936 unsigned long this_bio_flag = 0;
1760 1937
1761 set_page_extent_mapped(page); 1938 set_page_extent_mapped(page);
1762 1939
1763 end = page_end; 1940 end = page_end;
1764 lock_extent(tree, start, end, GFP_NOFS); 1941 lock_extent(tree, start, end, GFP_NOFS);
1765 1942
1943 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
1944 char *userpage;
1945 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
1946
1947 if (zero_offset) {
1948 iosize = PAGE_CACHE_SIZE - zero_offset;
1949 userpage = kmap_atomic(page, KM_USER0);
1950 memset(userpage + zero_offset, 0, iosize);
1951 flush_dcache_page(page);
1952 kunmap_atomic(userpage, KM_USER0);
1953 }
1954 }
1766 while (cur <= end) { 1955 while (cur <= end) {
1767 if (cur >= last_byte) { 1956 if (cur >= last_byte) {
1768 char *userpage; 1957 char *userpage;
@@ -1793,10 +1982,19 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1793 } 1982 }
1794 BUG_ON(end < cur); 1983 BUG_ON(end < cur);
1795 1984
1985 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
1986 this_bio_flag = EXTENT_BIO_COMPRESSED;
1987
1796 iosize = min(extent_map_end(em) - cur, end - cur + 1); 1988 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1797 cur_end = min(extent_map_end(em) - 1, end); 1989 cur_end = min(extent_map_end(em) - 1, end);
1798 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1990 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1799 sector = (em->block_start + extent_offset) >> 9; 1991 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
1992 disk_io_size = em->block_len;
1993 sector = em->block_start >> 9;
1994 } else {
1995 sector = (em->block_start + extent_offset) >> 9;
1996 disk_io_size = iosize;
1997 }
1800 bdev = em->bdev; 1998 bdev = em->bdev;
1801 block_start = em->block_start; 1999 block_start = em->block_start;
1802 free_extent_map(em); 2000 free_extent_map(em);
@@ -1845,10 +2043,13 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1845 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2043 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
1846 pnr -= page->index; 2044 pnr -= page->index;
1847 ret = submit_extent_page(READ, tree, page, 2045 ret = submit_extent_page(READ, tree, page,
1848 sector, iosize, page_offset, 2046 sector, disk_io_size, page_offset,
1849 bdev, bio, pnr, 2047 bdev, bio, pnr,
1850 end_bio_extent_readpage, mirror_num); 2048 end_bio_extent_readpage, mirror_num,
2049 *bio_flags,
2050 this_bio_flag);
1851 nr++; 2051 nr++;
2052 *bio_flags = this_bio_flag;
1852 } 2053 }
1853 if (ret) 2054 if (ret)
1854 SetPageError(page); 2055 SetPageError(page);
@@ -1867,11 +2068,13 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
1867 get_extent_t *get_extent) 2068 get_extent_t *get_extent)
1868{ 2069{
1869 struct bio *bio = NULL; 2070 struct bio *bio = NULL;
2071 unsigned long bio_flags = 0;
1870 int ret; 2072 int ret;
1871 2073
1872 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); 2074 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2075 &bio_flags);
1873 if (bio) 2076 if (bio)
1874 submit_one_bio(READ, bio, 0); 2077 submit_one_bio(READ, bio, 0, bio_flags);
1875 return ret; 2078 return ret;
1876} 2079}
1877EXPORT_SYMBOL(extent_read_full_page); 2080EXPORT_SYMBOL(extent_read_full_page);
@@ -1909,6 +2112,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1909 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 2112 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1910 u64 nr_delalloc; 2113 u64 nr_delalloc;
1911 u64 delalloc_end; 2114 u64 delalloc_end;
2115 int page_started;
2116 int compressed;
1912 2117
1913 WARN_ON(!PageLocked(page)); 2118 WARN_ON(!PageLocked(page));
1914 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2119 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
@@ -1934,27 +2139,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1934 2139
1935 delalloc_start = start; 2140 delalloc_start = start;
1936 delalloc_end = 0; 2141 delalloc_end = 0;
2142 page_started = 0;
1937 while(delalloc_end < page_end) { 2143 while(delalloc_end < page_end) {
1938 nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, 2144 nr_delalloc = find_lock_delalloc_range(inode, tree,
2145 page,
2146 &delalloc_start,
1939 &delalloc_end, 2147 &delalloc_end,
1940 128 * 1024 * 1024); 2148 128 * 1024 * 1024);
1941 if (nr_delalloc == 0) { 2149 if (nr_delalloc == 0) {
1942 delalloc_start = delalloc_end + 1; 2150 delalloc_start = delalloc_end + 1;
1943 continue; 2151 continue;
1944 } 2152 }
1945 tree->ops->fill_delalloc(inode, delalloc_start, 2153 tree->ops->fill_delalloc(inode, page, delalloc_start,
1946 delalloc_end); 2154 delalloc_end, &page_started);
1947 clear_extent_bit(tree, delalloc_start,
1948 delalloc_end,
1949 EXTENT_LOCKED | EXTENT_DELALLOC,
1950 1, 0, GFP_NOFS);
1951 delalloc_start = delalloc_end + 1; 2155 delalloc_start = delalloc_end + 1;
1952 } 2156 }
2157
2158 /* did the fill delalloc function already unlock and start the IO? */
2159 if (page_started) {
2160 return 0;
2161 }
2162
1953 lock_extent(tree, start, page_end, GFP_NOFS); 2163 lock_extent(tree, start, page_end, GFP_NOFS);
1954 unlock_start = start; 2164 unlock_start = start;
1955 2165
1956 if (tree->ops && tree->ops->writepage_start_hook) { 2166 if (tree->ops && tree->ops->writepage_start_hook) {
1957 ret = tree->ops->writepage_start_hook(page, start, page_end); 2167 ret = tree->ops->writepage_start_hook(page, start,
2168 page_end);
1958 if (ret == -EAGAIN) { 2169 if (ret == -EAGAIN) {
1959 unlock_extent(tree, start, page_end, GFP_NOFS); 2170 unlock_extent(tree, start, page_end, GFP_NOFS);
1960 redirty_page_for_writepage(wbc, page); 2171 redirty_page_for_writepage(wbc, page);
@@ -2006,10 +2217,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2006 sector = (em->block_start + extent_offset) >> 9; 2217 sector = (em->block_start + extent_offset) >> 9;
2007 bdev = em->bdev; 2218 bdev = em->bdev;
2008 block_start = em->block_start; 2219 block_start = em->block_start;
2220 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
2009 free_extent_map(em); 2221 free_extent_map(em);
2010 em = NULL; 2222 em = NULL;
2011 2223
2012 if (block_start == EXTENT_MAP_HOLE || 2224 /*
2225 * compressed and inline extents are written through other
2226 * paths in the FS
2227 */
2228 if (compressed || block_start == EXTENT_MAP_HOLE ||
2013 block_start == EXTENT_MAP_INLINE) { 2229 block_start == EXTENT_MAP_INLINE) {
2014 clear_extent_dirty(tree, cur, 2230 clear_extent_dirty(tree, cur,
2015 cur + iosize - 1, GFP_NOFS); 2231 cur + iosize - 1, GFP_NOFS);
@@ -2017,16 +2233,28 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2017 unlock_extent(tree, unlock_start, cur + iosize -1, 2233 unlock_extent(tree, unlock_start, cur + iosize -1,
2018 GFP_NOFS); 2234 GFP_NOFS);
2019 2235
2020 if (tree->ops && tree->ops->writepage_end_io_hook) 2236 /*
2237 * end_io notification does not happen here for
2238 * compressed extents
2239 */
2240 if (!compressed && tree->ops &&
2241 tree->ops->writepage_end_io_hook)
2021 tree->ops->writepage_end_io_hook(page, cur, 2242 tree->ops->writepage_end_io_hook(page, cur,
2022 cur + iosize - 1, 2243 cur + iosize - 1,
2023 NULL, 1); 2244 NULL, 1);
2024 cur = cur + iosize; 2245 else if (compressed) {
2246 /* we don't want to end_page_writeback on
2247 * a compressed extent. this happens
2248 * elsewhere
2249 */
2250 nr++;
2251 }
2252
2253 cur += iosize;
2025 pg_offset += iosize; 2254 pg_offset += iosize;
2026 unlock_start = cur; 2255 unlock_start = cur;
2027 continue; 2256 continue;
2028 } 2257 }
2029
2030 /* leave this out until we have a page_mkwrite call */ 2258 /* leave this out until we have a page_mkwrite call */
2031 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2259 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2032 EXTENT_DIRTY, 0)) { 2260 EXTENT_DIRTY, 0)) {
@@ -2034,6 +2262,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2034 pg_offset += iosize; 2262 pg_offset += iosize;
2035 continue; 2263 continue;
2036 } 2264 }
2265
2037 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 2266 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2038 if (tree->ops && tree->ops->writepage_io_hook) { 2267 if (tree->ops && tree->ops->writepage_io_hook) {
2039 ret = tree->ops->writepage_io_hook(page, cur, 2268 ret = tree->ops->writepage_io_hook(page, cur,
@@ -2057,7 +2286,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2057 ret = submit_extent_page(WRITE, tree, page, sector, 2286 ret = submit_extent_page(WRITE, tree, page, sector,
2058 iosize, pg_offset, bdev, 2287 iosize, pg_offset, bdev,
2059 &epd->bio, max_nr, 2288 &epd->bio, max_nr,
2060 end_bio_extent_writepage, 0); 2289 end_bio_extent_writepage,
2290 0, 0, 0);
2061 if (ret) 2291 if (ret)
2062 SetPageError(page); 2292 SetPageError(page);
2063 } 2293 }
@@ -2226,7 +2456,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2226 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2456 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2227 __extent_writepage, &epd); 2457 __extent_writepage, &epd);
2228 if (epd.bio) { 2458 if (epd.bio) {
2229 submit_one_bio(WRITE, epd.bio, 0); 2459 submit_one_bio(WRITE, epd.bio, 0, 0);
2230 } 2460 }
2231 return ret; 2461 return ret;
2232} 2462}
@@ -2248,7 +2478,7 @@ int extent_writepages(struct extent_io_tree *tree,
2248 ret = extent_write_cache_pages(tree, mapping, wbc, 2478 ret = extent_write_cache_pages(tree, mapping, wbc,
2249 __extent_writepage, &epd); 2479 __extent_writepage, &epd);
2250 if (epd.bio) { 2480 if (epd.bio) {
2251 submit_one_bio(WRITE, epd.bio, 0); 2481 submit_one_bio(WRITE, epd.bio, 0, 0);
2252 } 2482 }
2253 return ret; 2483 return ret;
2254} 2484}
@@ -2262,6 +2492,7 @@ int extent_readpages(struct extent_io_tree *tree,
2262 struct bio *bio = NULL; 2492 struct bio *bio = NULL;
2263 unsigned page_idx; 2493 unsigned page_idx;
2264 struct pagevec pvec; 2494 struct pagevec pvec;
2495 unsigned long bio_flags = 0;
2265 2496
2266 pagevec_init(&pvec, 0); 2497 pagevec_init(&pvec, 0);
2267 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 2498 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -2281,7 +2512,7 @@ int extent_readpages(struct extent_io_tree *tree,
2281 if (!pagevec_add(&pvec, page)) 2512 if (!pagevec_add(&pvec, page))
2282 __pagevec_lru_add(&pvec); 2513 __pagevec_lru_add(&pvec);
2283 __extent_read_full_page(tree, page, get_extent, 2514 __extent_read_full_page(tree, page, get_extent,
2284 &bio, 0); 2515 &bio, 0, &bio_flags);
2285 } 2516 }
2286 page_cache_release(page); 2517 page_cache_release(page);
2287 } 2518 }
@@ -2289,7 +2520,7 @@ int extent_readpages(struct extent_io_tree *tree,
2289 __pagevec_lru_add(&pvec); 2520 __pagevec_lru_add(&pvec);
2290 BUG_ON(!list_empty(pages)); 2521 BUG_ON(!list_empty(pages));
2291 if (bio) 2522 if (bio)
2292 submit_one_bio(READ, bio, 0); 2523 submit_one_bio(READ, bio, 0, bio_flags);
2293 return 0; 2524 return 0;
2294} 2525}
2295EXPORT_SYMBOL(extent_readpages); 2526EXPORT_SYMBOL(extent_readpages);
@@ -2414,7 +2645,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
2414 ret = submit_extent_page(READ, tree, page, 2645 ret = submit_extent_page(READ, tree, page,
2415 sector, iosize, page_offset, em->bdev, 2646 sector, iosize, page_offset, em->bdev,
2416 NULL, 1, 2647 NULL, 1,
2417 end_bio_extent_preparewrite, 0); 2648 end_bio_extent_preparewrite, 0,
2649 0, 0);
2418 iocount++; 2650 iocount++;
2419 block_start = block_start + iosize; 2651 block_start = block_start + iosize;
2420 } else { 2652 } else {
@@ -2495,7 +2727,9 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2495 } 2727 }
2496 if (!test_range_bit(tree, em->start, 2728 if (!test_range_bit(tree, em->start,
2497 extent_map_end(em) - 1, 2729 extent_map_end(em) - 1,
2498 EXTENT_LOCKED, 0)) { 2730 EXTENT_LOCKED | EXTENT_WRITEBACK |
2731 EXTENT_ORDERED,
2732 0)) {
2499 remove_extent_mapping(map, em); 2733 remove_extent_mapping(map, em);
2500 /* once for the rb tree */ 2734 /* once for the rb tree */
2501 free_extent_map(em); 2735 free_extent_map(em);
@@ -2923,6 +3157,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
2923 int inc_all_pages = 0; 3157 int inc_all_pages = 0;
2924 unsigned long num_pages; 3158 unsigned long num_pages;
2925 struct bio *bio = NULL; 3159 struct bio *bio = NULL;
3160 unsigned long bio_flags = 0;
2926 3161
2927 if (eb->flags & EXTENT_UPTODATE) 3162 if (eb->flags & EXTENT_UPTODATE)
2928 return 0; 3163 return 0;
@@ -2973,7 +3208,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
2973 ClearPageError(page); 3208 ClearPageError(page);
2974 err = __extent_read_full_page(tree, page, 3209 err = __extent_read_full_page(tree, page,
2975 get_extent, &bio, 3210 get_extent, &bio,
2976 mirror_num); 3211 mirror_num, &bio_flags);
2977 if (err) { 3212 if (err) {
2978 ret = err; 3213 ret = err;
2979 printk("err %d from __extent_read_full_page\n", ret); 3214 printk("err %d from __extent_read_full_page\n", ret);
@@ -2984,7 +3219,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
2984 } 3219 }
2985 3220
2986 if (bio) 3221 if (bio)
2987 submit_one_bio(READ, bio, mirror_num); 3222 submit_one_bio(READ, bio, mirror_num, bio_flags);
2988 3223
2989 if (ret || !wait) { 3224 if (ret || !wait) {
2990 if (ret) 3225 if (ret)