diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 411 |
1 files changed, 323 insertions, 88 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 563b2d12f4f2..314041fdfa43 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -30,6 +30,7 @@ static struct kmem_cache *extent_buffer_cache; | |||
30 | static LIST_HEAD(buffers); | 30 | static LIST_HEAD(buffers); |
31 | static LIST_HEAD(states); | 31 | static LIST_HEAD(states); |
32 | 32 | ||
33 | #define LEAK_DEBUG 1 | ||
33 | #ifdef LEAK_DEBUG | 34 | #ifdef LEAK_DEBUG |
34 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; | 35 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; |
35 | #endif | 36 | #endif |
@@ -1067,8 +1068,8 @@ EXPORT_SYMBOL(find_first_extent_bit_state); | |||
1067 | * | 1068 | * |
1068 | * 1 is returned if we find something, 0 if nothing was in the tree | 1069 | * 1 is returned if we find something, 0 if nothing was in the tree |
1069 | */ | 1070 | */ |
1070 | static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | 1071 | static noinline u64 find_delalloc_range(struct extent_io_tree *tree, |
1071 | u64 *start, u64 *end, u64 max_bytes) | 1072 | u64 *start, u64 *end, u64 max_bytes) |
1072 | { | 1073 | { |
1073 | struct rb_node *node; | 1074 | struct rb_node *node; |
1074 | struct extent_state *state; | 1075 | struct extent_state *state; |
@@ -1077,11 +1078,11 @@ static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | |||
1077 | u64 total_bytes = 0; | 1078 | u64 total_bytes = 0; |
1078 | 1079 | ||
1079 | spin_lock_irq(&tree->lock); | 1080 | spin_lock_irq(&tree->lock); |
1081 | |||
1080 | /* | 1082 | /* |
1081 | * this search will find all the extents that end after | 1083 | * this search will find all the extents that end after |
1082 | * our range starts. | 1084 | * our range starts. |
1083 | */ | 1085 | */ |
1084 | search_again: | ||
1085 | node = tree_search(tree, cur_start); | 1086 | node = tree_search(tree, cur_start); |
1086 | if (!node) { | 1087 | if (!node) { |
1087 | if (!found) | 1088 | if (!found) |
@@ -1100,40 +1101,6 @@ search_again: | |||
1100 | *end = state->end; | 1101 | *end = state->end; |
1101 | goto out; | 1102 | goto out; |
1102 | } | 1103 | } |
1103 | if (!found && !(state->state & EXTENT_BOUNDARY)) { | ||
1104 | struct extent_state *prev_state; | ||
1105 | struct rb_node *prev_node = node; | ||
1106 | while(1) { | ||
1107 | prev_node = rb_prev(prev_node); | ||
1108 | if (!prev_node) | ||
1109 | break; | ||
1110 | prev_state = rb_entry(prev_node, | ||
1111 | struct extent_state, | ||
1112 | rb_node); | ||
1113 | if ((prev_state->end + 1 != state->start) || | ||
1114 | !(prev_state->state & EXTENT_DELALLOC)) | ||
1115 | break; | ||
1116 | if ((cur_start - prev_state->start) * 2 > | ||
1117 | max_bytes) | ||
1118 | break; | ||
1119 | state = prev_state; | ||
1120 | node = prev_node; | ||
1121 | } | ||
1122 | } | ||
1123 | if (state->state & EXTENT_LOCKED) { | ||
1124 | DEFINE_WAIT(wait); | ||
1125 | atomic_inc(&state->refs); | ||
1126 | prepare_to_wait(&state->wq, &wait, | ||
1127 | TASK_UNINTERRUPTIBLE); | ||
1128 | spin_unlock_irq(&tree->lock); | ||
1129 | schedule(); | ||
1130 | spin_lock_irq(&tree->lock); | ||
1131 | finish_wait(&state->wq, &wait); | ||
1132 | free_extent_state(state); | ||
1133 | goto search_again; | ||
1134 | } | ||
1135 | set_state_cb(tree, state, EXTENT_LOCKED); | ||
1136 | state->state |= EXTENT_LOCKED; | ||
1137 | if (!found) | 1104 | if (!found) |
1138 | *start = state->start; | 1105 | *start = state->start; |
1139 | found++; | 1106 | found++; |
@@ -1151,6 +1118,208 @@ out: | |||
1151 | return found; | 1118 | return found; |
1152 | } | 1119 | } |
1153 | 1120 | ||
1121 | static noinline int __unlock_for_delalloc(struct inode *inode, | ||
1122 | struct page *locked_page, | ||
1123 | u64 start, u64 end) | ||
1124 | { | ||
1125 | int ret; | ||
1126 | struct page *pages[16]; | ||
1127 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1128 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1129 | unsigned long nr_pages = end_index - index + 1; | ||
1130 | int i; | ||
1131 | |||
1132 | if (index == locked_page->index && end_index == index) | ||
1133 | return 0; | ||
1134 | |||
1135 | while(nr_pages > 0) { | ||
1136 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
1137 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
1138 | for (i = 0; i < ret; i++) { | ||
1139 | if (pages[i] != locked_page) | ||
1140 | unlock_page(pages[i]); | ||
1141 | page_cache_release(pages[i]); | ||
1142 | } | ||
1143 | nr_pages -= ret; | ||
1144 | index += ret; | ||
1145 | cond_resched(); | ||
1146 | } | ||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | static noinline int lock_delalloc_pages(struct inode *inode, | ||
1151 | struct page *locked_page, | ||
1152 | u64 delalloc_start, | ||
1153 | u64 delalloc_end) | ||
1154 | { | ||
1155 | unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; | ||
1156 | unsigned long start_index = index; | ||
1157 | unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; | ||
1158 | unsigned long pages_locked = 0; | ||
1159 | struct page *pages[16]; | ||
1160 | unsigned long nrpages; | ||
1161 | int ret; | ||
1162 | int i; | ||
1163 | |||
1164 | /* the caller is responsible for locking the start index */ | ||
1165 | if (index == locked_page->index && index == end_index) | ||
1166 | return 0; | ||
1167 | |||
1168 | /* skip the page at the start index */ | ||
1169 | nrpages = end_index - index + 1; | ||
1170 | while(nrpages > 0) { | ||
1171 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
1172 | min(nrpages, ARRAY_SIZE(pages)), pages); | ||
1173 | if (ret == 0) { | ||
1174 | ret = -EAGAIN; | ||
1175 | goto done; | ||
1176 | } | ||
1177 | /* now we have an array of pages, lock them all */ | ||
1178 | for (i = 0; i < ret; i++) { | ||
1179 | /* | ||
1180 | * the caller is taking responsibility for | ||
1181 | * locked_page | ||
1182 | */ | ||
1183 | if (pages[i] != locked_page) | ||
1184 | lock_page(pages[i]); | ||
1185 | page_cache_release(pages[i]); | ||
1186 | } | ||
1187 | pages_locked += ret; | ||
1188 | nrpages -= ret; | ||
1189 | index += ret; | ||
1190 | cond_resched(); | ||
1191 | } | ||
1192 | ret = 0; | ||
1193 | done: | ||
1194 | if (ret && pages_locked) { | ||
1195 | __unlock_for_delalloc(inode, locked_page, | ||
1196 | delalloc_start, | ||
1197 | ((u64)(start_index + pages_locked - 1)) << | ||
1198 | PAGE_CACHE_SHIFT); | ||
1199 | } | ||
1200 | return ret; | ||
1201 | } | ||
1202 | |||
1203 | /* | ||
1204 | * find a contiguous range of bytes in the file marked as delalloc, not | ||
1205 | * more than 'max_bytes'. start and end are used to return the range, | ||
1206 | * | ||
1207 | * 1 is returned if we find something, 0 if nothing was in the tree | ||
1208 | */ | ||
1209 | static noinline u64 find_lock_delalloc_range(struct inode *inode, | ||
1210 | struct extent_io_tree *tree, | ||
1211 | struct page *locked_page, | ||
1212 | u64 *start, u64 *end, | ||
1213 | u64 max_bytes) | ||
1214 | { | ||
1215 | u64 delalloc_start; | ||
1216 | u64 delalloc_end; | ||
1217 | u64 found; | ||
1218 | int ret; | ||
1219 | int loops = 0; | ||
1220 | |||
1221 | again: | ||
1222 | /* step one, find a bunch of delalloc bytes starting at start */ | ||
1223 | delalloc_start = *start; | ||
1224 | delalloc_end = 0; | ||
1225 | found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, | ||
1226 | max_bytes); | ||
1227 | if (!found) { | ||
1228 | *start = delalloc_start; | ||
1229 | *end = delalloc_end; | ||
1230 | return found; | ||
1231 | } | ||
1232 | |||
1233 | /* | ||
1234 | * make sure to limit the number of pages we try to lock down | ||
1235 | * if we're looping. | ||
1236 | */ | ||
1237 | if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { | ||
1238 | delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & | ||
1239 | ~((u64)PAGE_CACHE_SIZE - 1); | ||
1240 | } | ||
1241 | /* step two, lock all the pages after the page that has start */ | ||
1242 | ret = lock_delalloc_pages(inode, locked_page, | ||
1243 | delalloc_start, delalloc_end); | ||
1244 | if (ret == -EAGAIN) { | ||
1245 | /* some of the pages are gone, lets avoid looping by | ||
1246 | * shortening the size of the delalloc range we're searching | ||
1247 | */ | ||
1248 | if (!loops) { | ||
1249 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | ||
1250 | max_bytes = PAGE_CACHE_SIZE - offset; | ||
1251 | loops = 1; | ||
1252 | goto again; | ||
1253 | } else { | ||
1254 | found = 0; | ||
1255 | goto out_failed; | ||
1256 | } | ||
1257 | } | ||
1258 | BUG_ON(ret); | ||
1259 | |||
1260 | /* step three, lock the state bits for the whole range */ | ||
1261 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | ||
1262 | |||
1263 | /* then test to make sure it is all still delalloc */ | ||
1264 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | ||
1265 | EXTENT_DELALLOC, 1); | ||
1266 | if (!ret) { | ||
1267 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | ||
1268 | __unlock_for_delalloc(inode, locked_page, | ||
1269 | delalloc_start, delalloc_end); | ||
1270 | cond_resched(); | ||
1271 | goto again; | ||
1272 | } | ||
1273 | *start = delalloc_start; | ||
1274 | *end = delalloc_end; | ||
1275 | out_failed: | ||
1276 | return found; | ||
1277 | } | ||
1278 | |||
1279 | int extent_clear_unlock_delalloc(struct inode *inode, | ||
1280 | struct extent_io_tree *tree, | ||
1281 | u64 start, u64 end, struct page *locked_page, | ||
1282 | int clear_dirty, int set_writeback, | ||
1283 | int end_writeback) | ||
1284 | { | ||
1285 | int ret; | ||
1286 | struct page *pages[16]; | ||
1287 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1288 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1289 | unsigned long nr_pages = end_index - index + 1; | ||
1290 | int i; | ||
1291 | int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; | ||
1292 | |||
1293 | if (clear_dirty) | ||
1294 | clear_bits |= EXTENT_DIRTY; | ||
1295 | |||
1296 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | ||
1297 | |||
1298 | while(nr_pages > 0) { | ||
1299 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
1300 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
1301 | for (i = 0; i < ret; i++) { | ||
1302 | if (pages[i] == locked_page) { | ||
1303 | page_cache_release(pages[i]); | ||
1304 | continue; | ||
1305 | } | ||
1306 | if (clear_dirty) | ||
1307 | clear_page_dirty_for_io(pages[i]); | ||
1308 | if (set_writeback) | ||
1309 | set_page_writeback(pages[i]); | ||
1310 | if (end_writeback) | ||
1311 | end_page_writeback(pages[i]); | ||
1312 | unlock_page(pages[i]); | ||
1313 | page_cache_release(pages[i]); | ||
1314 | } | ||
1315 | nr_pages -= ret; | ||
1316 | index += ret; | ||
1317 | cond_resched(); | ||
1318 | } | ||
1319 | return 0; | ||
1320 | } | ||
1321 | EXPORT_SYMBOL(extent_clear_unlock_delalloc); | ||
1322 | |||
1154 | /* | 1323 | /* |
1155 | * count the number of bytes in the tree that have a given bit(s) | 1324 | * count the number of bytes in the tree that have a given bit(s) |
1156 | * set. This can be fairly slow, except for EXTENT_DIRTY which is | 1325 | * set. This can be fairly slow, except for EXTENT_DIRTY which is |
@@ -1631,38 +1800,26 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
1631 | return bio; | 1800 | return bio; |
1632 | } | 1801 | } |
1633 | 1802 | ||
1634 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num) | 1803 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num, |
1804 | unsigned long bio_flags) | ||
1635 | { | 1805 | { |
1636 | int ret = 0; | 1806 | int ret = 0; |
1637 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1807 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1638 | struct page *page = bvec->bv_page; | 1808 | struct page *page = bvec->bv_page; |
1639 | struct extent_io_tree *tree = bio->bi_private; | 1809 | struct extent_io_tree *tree = bio->bi_private; |
1640 | struct rb_node *node; | ||
1641 | struct extent_state *state; | ||
1642 | u64 start; | 1810 | u64 start; |
1643 | u64 end; | 1811 | u64 end; |
1644 | 1812 | ||
1645 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 1813 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; |
1646 | end = start + bvec->bv_len - 1; | 1814 | end = start + bvec->bv_len - 1; |
1647 | 1815 | ||
1648 | spin_lock_irq(&tree->lock); | ||
1649 | node = __etree_search(tree, start, NULL, NULL); | ||
1650 | BUG_ON(!node); | ||
1651 | state = rb_entry(node, struct extent_state, rb_node); | ||
1652 | while(state->end < end) { | ||
1653 | node = rb_next(node); | ||
1654 | state = rb_entry(node, struct extent_state, rb_node); | ||
1655 | } | ||
1656 | BUG_ON(state->end != end); | ||
1657 | spin_unlock_irq(&tree->lock); | ||
1658 | |||
1659 | bio->bi_private = NULL; | 1816 | bio->bi_private = NULL; |
1660 | 1817 | ||
1661 | bio_get(bio); | 1818 | bio_get(bio); |
1662 | 1819 | ||
1663 | if (tree->ops && tree->ops->submit_bio_hook) | 1820 | if (tree->ops && tree->ops->submit_bio_hook) |
1664 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1821 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1665 | mirror_num); | 1822 | mirror_num, bio_flags); |
1666 | else | 1823 | else |
1667 | submit_bio(rw, bio); | 1824 | submit_bio(rw, bio); |
1668 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1825 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
@@ -1678,39 +1835,56 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1678 | struct bio **bio_ret, | 1835 | struct bio **bio_ret, |
1679 | unsigned long max_pages, | 1836 | unsigned long max_pages, |
1680 | bio_end_io_t end_io_func, | 1837 | bio_end_io_t end_io_func, |
1681 | int mirror_num) | 1838 | int mirror_num, |
1839 | unsigned long prev_bio_flags, | ||
1840 | unsigned long bio_flags) | ||
1682 | { | 1841 | { |
1683 | int ret = 0; | 1842 | int ret = 0; |
1684 | struct bio *bio; | 1843 | struct bio *bio; |
1685 | int nr; | 1844 | int nr; |
1845 | int contig = 0; | ||
1846 | int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; | ||
1847 | int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; | ||
1848 | size_t page_size = min(size, PAGE_CACHE_SIZE); | ||
1686 | 1849 | ||
1687 | if (bio_ret && *bio_ret) { | 1850 | if (bio_ret && *bio_ret) { |
1688 | bio = *bio_ret; | 1851 | bio = *bio_ret; |
1689 | if (bio->bi_sector + (bio->bi_size >> 9) != sector || | 1852 | if (old_compressed) |
1853 | contig = bio->bi_sector == sector; | ||
1854 | else | ||
1855 | contig = bio->bi_sector + (bio->bi_size >> 9) == | ||
1856 | sector; | ||
1857 | |||
1858 | if (prev_bio_flags != bio_flags || !contig || | ||
1690 | (tree->ops && tree->ops->merge_bio_hook && | 1859 | (tree->ops && tree->ops->merge_bio_hook && |
1691 | tree->ops->merge_bio_hook(page, offset, size, bio)) || | 1860 | tree->ops->merge_bio_hook(page, offset, page_size, bio, |
1692 | bio_add_page(bio, page, size, offset) < size) { | 1861 | bio_flags)) || |
1693 | ret = submit_one_bio(rw, bio, mirror_num); | 1862 | bio_add_page(bio, page, page_size, offset) < page_size) { |
1863 | ret = submit_one_bio(rw, bio, mirror_num, | ||
1864 | prev_bio_flags); | ||
1694 | bio = NULL; | 1865 | bio = NULL; |
1695 | } else { | 1866 | } else { |
1696 | return 0; | 1867 | return 0; |
1697 | } | 1868 | } |
1698 | } | 1869 | } |
1699 | nr = bio_get_nr_vecs(bdev); | 1870 | if (this_compressed) |
1871 | nr = BIO_MAX_PAGES; | ||
1872 | else | ||
1873 | nr = bio_get_nr_vecs(bdev); | ||
1874 | |||
1700 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1875 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1701 | if (!bio) { | 1876 | if (!bio) { |
1702 | printk("failed to allocate bio nr %d\n", nr); | 1877 | printk("failed to allocate bio nr %d\n", nr); |
1703 | } | 1878 | } |
1704 | 1879 | ||
1705 | 1880 | bio_add_page(bio, page, page_size, offset); | |
1706 | bio_add_page(bio, page, size, offset); | ||
1707 | bio->bi_end_io = end_io_func; | 1881 | bio->bi_end_io = end_io_func; |
1708 | bio->bi_private = tree; | 1882 | bio->bi_private = tree; |
1709 | 1883 | ||
1710 | if (bio_ret) { | 1884 | if (bio_ret) { |
1711 | *bio_ret = bio; | 1885 | *bio_ret = bio; |
1712 | } else { | 1886 | } else { |
1713 | ret = submit_one_bio(rw, bio, mirror_num); | 1887 | ret = submit_one_bio(rw, bio, mirror_num, bio_flags); |
1714 | } | 1888 | } |
1715 | 1889 | ||
1716 | return ret; | 1890 | return ret; |
@@ -1738,7 +1912,8 @@ void set_page_extent_head(struct page *page, unsigned long len) | |||
1738 | static int __extent_read_full_page(struct extent_io_tree *tree, | 1912 | static int __extent_read_full_page(struct extent_io_tree *tree, |
1739 | struct page *page, | 1913 | struct page *page, |
1740 | get_extent_t *get_extent, | 1914 | get_extent_t *get_extent, |
1741 | struct bio **bio, int mirror_num) | 1915 | struct bio **bio, int mirror_num, |
1916 | unsigned long *bio_flags) | ||
1742 | { | 1917 | { |
1743 | struct inode *inode = page->mapping->host; | 1918 | struct inode *inode = page->mapping->host; |
1744 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1919 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
@@ -1756,13 +1931,27 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
1756 | int nr = 0; | 1931 | int nr = 0; |
1757 | size_t page_offset = 0; | 1932 | size_t page_offset = 0; |
1758 | size_t iosize; | 1933 | size_t iosize; |
1934 | size_t disk_io_size; | ||
1759 | size_t blocksize = inode->i_sb->s_blocksize; | 1935 | size_t blocksize = inode->i_sb->s_blocksize; |
1936 | unsigned long this_bio_flag = 0; | ||
1760 | 1937 | ||
1761 | set_page_extent_mapped(page); | 1938 | set_page_extent_mapped(page); |
1762 | 1939 | ||
1763 | end = page_end; | 1940 | end = page_end; |
1764 | lock_extent(tree, start, end, GFP_NOFS); | 1941 | lock_extent(tree, start, end, GFP_NOFS); |
1765 | 1942 | ||
1943 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | ||
1944 | char *userpage; | ||
1945 | size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); | ||
1946 | |||
1947 | if (zero_offset) { | ||
1948 | iosize = PAGE_CACHE_SIZE - zero_offset; | ||
1949 | userpage = kmap_atomic(page, KM_USER0); | ||
1950 | memset(userpage + zero_offset, 0, iosize); | ||
1951 | flush_dcache_page(page); | ||
1952 | kunmap_atomic(userpage, KM_USER0); | ||
1953 | } | ||
1954 | } | ||
1766 | while (cur <= end) { | 1955 | while (cur <= end) { |
1767 | if (cur >= last_byte) { | 1956 | if (cur >= last_byte) { |
1768 | char *userpage; | 1957 | char *userpage; |
@@ -1793,10 +1982,19 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); | |||
1793 | } | 1982 | } |
1794 | BUG_ON(end < cur); | 1983 | BUG_ON(end < cur); |
1795 | 1984 | ||
1985 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | ||
1986 | this_bio_flag = EXTENT_BIO_COMPRESSED; | ||
1987 | |||
1796 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 1988 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
1797 | cur_end = min(extent_map_end(em) - 1, end); | 1989 | cur_end = min(extent_map_end(em) - 1, end); |
1798 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); | 1990 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); |
1799 | sector = (em->block_start + extent_offset) >> 9; | 1991 | if (this_bio_flag & EXTENT_BIO_COMPRESSED) { |
1992 | disk_io_size = em->block_len; | ||
1993 | sector = em->block_start >> 9; | ||
1994 | } else { | ||
1995 | sector = (em->block_start + extent_offset) >> 9; | ||
1996 | disk_io_size = iosize; | ||
1997 | } | ||
1800 | bdev = em->bdev; | 1998 | bdev = em->bdev; |
1801 | block_start = em->block_start; | 1999 | block_start = em->block_start; |
1802 | free_extent_map(em); | 2000 | free_extent_map(em); |
@@ -1845,10 +2043,13 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); | |||
1845 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2043 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
1846 | pnr -= page->index; | 2044 | pnr -= page->index; |
1847 | ret = submit_extent_page(READ, tree, page, | 2045 | ret = submit_extent_page(READ, tree, page, |
1848 | sector, iosize, page_offset, | 2046 | sector, disk_io_size, page_offset, |
1849 | bdev, bio, pnr, | 2047 | bdev, bio, pnr, |
1850 | end_bio_extent_readpage, mirror_num); | 2048 | end_bio_extent_readpage, mirror_num, |
2049 | *bio_flags, | ||
2050 | this_bio_flag); | ||
1851 | nr++; | 2051 | nr++; |
2052 | *bio_flags = this_bio_flag; | ||
1852 | } | 2053 | } |
1853 | if (ret) | 2054 | if (ret) |
1854 | SetPageError(page); | 2055 | SetPageError(page); |
@@ -1867,11 +2068,13 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
1867 | get_extent_t *get_extent) | 2068 | get_extent_t *get_extent) |
1868 | { | 2069 | { |
1869 | struct bio *bio = NULL; | 2070 | struct bio *bio = NULL; |
2071 | unsigned long bio_flags = 0; | ||
1870 | int ret; | 2072 | int ret; |
1871 | 2073 | ||
1872 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); | 2074 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2075 | &bio_flags); | ||
1873 | if (bio) | 2076 | if (bio) |
1874 | submit_one_bio(READ, bio, 0); | 2077 | submit_one_bio(READ, bio, 0, bio_flags); |
1875 | return ret; | 2078 | return ret; |
1876 | } | 2079 | } |
1877 | EXPORT_SYMBOL(extent_read_full_page); | 2080 | EXPORT_SYMBOL(extent_read_full_page); |
@@ -1909,6 +2112,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
1909 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 2112 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; |
1910 | u64 nr_delalloc; | 2113 | u64 nr_delalloc; |
1911 | u64 delalloc_end; | 2114 | u64 delalloc_end; |
2115 | int page_started; | ||
2116 | int compressed; | ||
1912 | 2117 | ||
1913 | WARN_ON(!PageLocked(page)); | 2118 | WARN_ON(!PageLocked(page)); |
1914 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2119 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
@@ -1934,27 +2139,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
1934 | 2139 | ||
1935 | delalloc_start = start; | 2140 | delalloc_start = start; |
1936 | delalloc_end = 0; | 2141 | delalloc_end = 0; |
2142 | page_started = 0; | ||
1937 | while(delalloc_end < page_end) { | 2143 | while(delalloc_end < page_end) { |
1938 | nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, | 2144 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2145 | page, | ||
2146 | &delalloc_start, | ||
1939 | &delalloc_end, | 2147 | &delalloc_end, |
1940 | 128 * 1024 * 1024); | 2148 | 128 * 1024 * 1024); |
1941 | if (nr_delalloc == 0) { | 2149 | if (nr_delalloc == 0) { |
1942 | delalloc_start = delalloc_end + 1; | 2150 | delalloc_start = delalloc_end + 1; |
1943 | continue; | 2151 | continue; |
1944 | } | 2152 | } |
1945 | tree->ops->fill_delalloc(inode, delalloc_start, | 2153 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
1946 | delalloc_end); | 2154 | delalloc_end, &page_started); |
1947 | clear_extent_bit(tree, delalloc_start, | ||
1948 | delalloc_end, | ||
1949 | EXTENT_LOCKED | EXTENT_DELALLOC, | ||
1950 | 1, 0, GFP_NOFS); | ||
1951 | delalloc_start = delalloc_end + 1; | 2155 | delalloc_start = delalloc_end + 1; |
1952 | } | 2156 | } |
2157 | |||
2158 | /* did the fill delalloc function already unlock and start the IO? */ | ||
2159 | if (page_started) { | ||
2160 | return 0; | ||
2161 | } | ||
2162 | |||
1953 | lock_extent(tree, start, page_end, GFP_NOFS); | 2163 | lock_extent(tree, start, page_end, GFP_NOFS); |
1954 | unlock_start = start; | 2164 | unlock_start = start; |
1955 | 2165 | ||
1956 | if (tree->ops && tree->ops->writepage_start_hook) { | 2166 | if (tree->ops && tree->ops->writepage_start_hook) { |
1957 | ret = tree->ops->writepage_start_hook(page, start, page_end); | 2167 | ret = tree->ops->writepage_start_hook(page, start, |
2168 | page_end); | ||
1958 | if (ret == -EAGAIN) { | 2169 | if (ret == -EAGAIN) { |
1959 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2170 | unlock_extent(tree, start, page_end, GFP_NOFS); |
1960 | redirty_page_for_writepage(wbc, page); | 2171 | redirty_page_for_writepage(wbc, page); |
@@ -2006,10 +2217,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2006 | sector = (em->block_start + extent_offset) >> 9; | 2217 | sector = (em->block_start + extent_offset) >> 9; |
2007 | bdev = em->bdev; | 2218 | bdev = em->bdev; |
2008 | block_start = em->block_start; | 2219 | block_start = em->block_start; |
2220 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
2009 | free_extent_map(em); | 2221 | free_extent_map(em); |
2010 | em = NULL; | 2222 | em = NULL; |
2011 | 2223 | ||
2012 | if (block_start == EXTENT_MAP_HOLE || | 2224 | /* |
2225 | * compressed and inline extents are written through other | ||
2226 | * paths in the FS | ||
2227 | */ | ||
2228 | if (compressed || block_start == EXTENT_MAP_HOLE || | ||
2013 | block_start == EXTENT_MAP_INLINE) { | 2229 | block_start == EXTENT_MAP_INLINE) { |
2014 | clear_extent_dirty(tree, cur, | 2230 | clear_extent_dirty(tree, cur, |
2015 | cur + iosize - 1, GFP_NOFS); | 2231 | cur + iosize - 1, GFP_NOFS); |
@@ -2017,16 +2233,28 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2017 | unlock_extent(tree, unlock_start, cur + iosize -1, | 2233 | unlock_extent(tree, unlock_start, cur + iosize -1, |
2018 | GFP_NOFS); | 2234 | GFP_NOFS); |
2019 | 2235 | ||
2020 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2236 | /* |
2237 | * end_io notification does not happen here for | ||
2238 | * compressed extents | ||
2239 | */ | ||
2240 | if (!compressed && tree->ops && | ||
2241 | tree->ops->writepage_end_io_hook) | ||
2021 | tree->ops->writepage_end_io_hook(page, cur, | 2242 | tree->ops->writepage_end_io_hook(page, cur, |
2022 | cur + iosize - 1, | 2243 | cur + iosize - 1, |
2023 | NULL, 1); | 2244 | NULL, 1); |
2024 | cur = cur + iosize; | 2245 | else if (compressed) { |
2246 | /* we don't want to end_page_writeback on | ||
2247 | * a compressed extent. this happens | ||
2248 | * elsewhere | ||
2249 | */ | ||
2250 | nr++; | ||
2251 | } | ||
2252 | |||
2253 | cur += iosize; | ||
2025 | pg_offset += iosize; | 2254 | pg_offset += iosize; |
2026 | unlock_start = cur; | 2255 | unlock_start = cur; |
2027 | continue; | 2256 | continue; |
2028 | } | 2257 | } |
2029 | |||
2030 | /* leave this out until we have a page_mkwrite call */ | 2258 | /* leave this out until we have a page_mkwrite call */ |
2031 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2259 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
2032 | EXTENT_DIRTY, 0)) { | 2260 | EXTENT_DIRTY, 0)) { |
@@ -2034,6 +2262,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2034 | pg_offset += iosize; | 2262 | pg_offset += iosize; |
2035 | continue; | 2263 | continue; |
2036 | } | 2264 | } |
2265 | |||
2037 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | 2266 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); |
2038 | if (tree->ops && tree->ops->writepage_io_hook) { | 2267 | if (tree->ops && tree->ops->writepage_io_hook) { |
2039 | ret = tree->ops->writepage_io_hook(page, cur, | 2268 | ret = tree->ops->writepage_io_hook(page, cur, |
@@ -2057,7 +2286,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2057 | ret = submit_extent_page(WRITE, tree, page, sector, | 2286 | ret = submit_extent_page(WRITE, tree, page, sector, |
2058 | iosize, pg_offset, bdev, | 2287 | iosize, pg_offset, bdev, |
2059 | &epd->bio, max_nr, | 2288 | &epd->bio, max_nr, |
2060 | end_bio_extent_writepage, 0); | 2289 | end_bio_extent_writepage, |
2290 | 0, 0, 0); | ||
2061 | if (ret) | 2291 | if (ret) |
2062 | SetPageError(page); | 2292 | SetPageError(page); |
2063 | } | 2293 | } |
@@ -2226,7 +2456,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2226 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2456 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2227 | __extent_writepage, &epd); | 2457 | __extent_writepage, &epd); |
2228 | if (epd.bio) { | 2458 | if (epd.bio) { |
2229 | submit_one_bio(WRITE, epd.bio, 0); | 2459 | submit_one_bio(WRITE, epd.bio, 0, 0); |
2230 | } | 2460 | } |
2231 | return ret; | 2461 | return ret; |
2232 | } | 2462 | } |
@@ -2248,7 +2478,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2248 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2478 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2249 | __extent_writepage, &epd); | 2479 | __extent_writepage, &epd); |
2250 | if (epd.bio) { | 2480 | if (epd.bio) { |
2251 | submit_one_bio(WRITE, epd.bio, 0); | 2481 | submit_one_bio(WRITE, epd.bio, 0, 0); |
2252 | } | 2482 | } |
2253 | return ret; | 2483 | return ret; |
2254 | } | 2484 | } |
@@ -2262,6 +2492,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2262 | struct bio *bio = NULL; | 2492 | struct bio *bio = NULL; |
2263 | unsigned page_idx; | 2493 | unsigned page_idx; |
2264 | struct pagevec pvec; | 2494 | struct pagevec pvec; |
2495 | unsigned long bio_flags = 0; | ||
2265 | 2496 | ||
2266 | pagevec_init(&pvec, 0); | 2497 | pagevec_init(&pvec, 0); |
2267 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 2498 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
@@ -2281,7 +2512,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2281 | if (!pagevec_add(&pvec, page)) | 2512 | if (!pagevec_add(&pvec, page)) |
2282 | __pagevec_lru_add(&pvec); | 2513 | __pagevec_lru_add(&pvec); |
2283 | __extent_read_full_page(tree, page, get_extent, | 2514 | __extent_read_full_page(tree, page, get_extent, |
2284 | &bio, 0); | 2515 | &bio, 0, &bio_flags); |
2285 | } | 2516 | } |
2286 | page_cache_release(page); | 2517 | page_cache_release(page); |
2287 | } | 2518 | } |
@@ -2289,7 +2520,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2289 | __pagevec_lru_add(&pvec); | 2520 | __pagevec_lru_add(&pvec); |
2290 | BUG_ON(!list_empty(pages)); | 2521 | BUG_ON(!list_empty(pages)); |
2291 | if (bio) | 2522 | if (bio) |
2292 | submit_one_bio(READ, bio, 0); | 2523 | submit_one_bio(READ, bio, 0, bio_flags); |
2293 | return 0; | 2524 | return 0; |
2294 | } | 2525 | } |
2295 | EXPORT_SYMBOL(extent_readpages); | 2526 | EXPORT_SYMBOL(extent_readpages); |
@@ -2414,7 +2645,8 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2414 | ret = submit_extent_page(READ, tree, page, | 2645 | ret = submit_extent_page(READ, tree, page, |
2415 | sector, iosize, page_offset, em->bdev, | 2646 | sector, iosize, page_offset, em->bdev, |
2416 | NULL, 1, | 2647 | NULL, 1, |
2417 | end_bio_extent_preparewrite, 0); | 2648 | end_bio_extent_preparewrite, 0, |
2649 | 0, 0); | ||
2418 | iocount++; | 2650 | iocount++; |
2419 | block_start = block_start + iosize; | 2651 | block_start = block_start + iosize; |
2420 | } else { | 2652 | } else { |
@@ -2495,7 +2727,9 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2495 | } | 2727 | } |
2496 | if (!test_range_bit(tree, em->start, | 2728 | if (!test_range_bit(tree, em->start, |
2497 | extent_map_end(em) - 1, | 2729 | extent_map_end(em) - 1, |
2498 | EXTENT_LOCKED, 0)) { | 2730 | EXTENT_LOCKED | EXTENT_WRITEBACK | |
2731 | EXTENT_ORDERED, | ||
2732 | 0)) { | ||
2499 | remove_extent_mapping(map, em); | 2733 | remove_extent_mapping(map, em); |
2500 | /* once for the rb tree */ | 2734 | /* once for the rb tree */ |
2501 | free_extent_map(em); | 2735 | free_extent_map(em); |
@@ -2923,6 +3157,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
2923 | int inc_all_pages = 0; | 3157 | int inc_all_pages = 0; |
2924 | unsigned long num_pages; | 3158 | unsigned long num_pages; |
2925 | struct bio *bio = NULL; | 3159 | struct bio *bio = NULL; |
3160 | unsigned long bio_flags = 0; | ||
2926 | 3161 | ||
2927 | if (eb->flags & EXTENT_UPTODATE) | 3162 | if (eb->flags & EXTENT_UPTODATE) |
2928 | return 0; | 3163 | return 0; |
@@ -2973,7 +3208,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
2973 | ClearPageError(page); | 3208 | ClearPageError(page); |
2974 | err = __extent_read_full_page(tree, page, | 3209 | err = __extent_read_full_page(tree, page, |
2975 | get_extent, &bio, | 3210 | get_extent, &bio, |
2976 | mirror_num); | 3211 | mirror_num, &bio_flags); |
2977 | if (err) { | 3212 | if (err) { |
2978 | ret = err; | 3213 | ret = err; |
2979 | printk("err %d from __extent_read_full_page\n", ret); | 3214 | printk("err %d from __extent_read_full_page\n", ret); |
@@ -2984,7 +3219,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
2984 | } | 3219 | } |
2985 | 3220 | ||
2986 | if (bio) | 3221 | if (bio) |
2987 | submit_one_bio(READ, bio, mirror_num); | 3222 | submit_one_bio(READ, bio, mirror_num, bio_flags); |
2988 | 3223 | ||
2989 | if (ret || !wait) { | 3224 | if (ret || !wait) { |
2990 | if (ret) | 3225 | if (ret) |