aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 12:58:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 12:58:51 -0400
commitb7c09ad4014e3678e8cc01fdf663c9f43b272dc6 (patch)
tree1edb073b0a76ce1530cb31c113f9e741e33ece0e /fs/btrfs/extent_io.c
parent1812997720ab90d029548778c55d7315555e1fef (diff)
parentd7396f07358a7c6e22c238d36d1d85f9d652a414 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This is against 3.11-rc7, but was pulled and tested against your tree as of yesterday. We do have two small incrementals queued up, but I wanted to get this bunch out the door before I hop on an airplane. This is a fairly large batch of fixes, performance improvements, and cleanups from the usual Btrfs suspects. We've included Stefan Behren's work to index subvolume UUIDs, which is targeted at speeding up send/receive with many subvolumes or snapshots in place. It closes a long standing performance issue that was built in to the disk format. Mark Fasheh's offline dedup work is also here. In this case offline means the FS is mounted and active, but the dedup work is not done inline during file IO. This is a building block where utilities are able to ask the FS to dedup a series of extents. The kernel takes care of verifying the data involved really is the same. Today this involves reading both extents, but we'll continue to evolve the patches" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) Btrfs: optimize key searches in btrfs_search_slot Btrfs: don't use an async starter for most of our workers Btrfs: only update disk_i_size as we remove extents Btrfs: fix deadlock in uuid scan kthread Btrfs: stop refusing the relocation of chunk 0 Btrfs: fix memory leak of uuid_root in free_fs_info btrfs: reuse kbasename helper btrfs: return btrfs error code for dev excl ops err Btrfs: allow partial ordered extent completion Btrfs: convert all bug_ons in free-space-cache.c Btrfs: add support for asserts Btrfs: adjust the fs_devices->missing count on unmount Btrf: cleanup: don't check for root_refs == 0 twice Btrfs: fix for patch "cleanup: don't check the same thing twice" Btrfs: get rid of one BUG() in write_all_supers() Btrfs: allocate prelim_ref with a slab allocater Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl Btrfs: fix race between removing a dev and writing sbs Btrfs: remove ourselves from the cluster list under lock ...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c664
1 files changed, 383 insertions, 281 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe443fece851..09582b81640c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
61 state = list_entry(states.next, struct extent_state, leak_list); 61 state = list_entry(states.next, struct extent_state, leak_list);
62 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 62 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
63 "state %lu in tree %p refs %d\n", 63 "state %lu in tree %p refs %d\n",
64 (unsigned long long)state->start, 64 state->start, state->end, state->state, state->tree,
65 (unsigned long long)state->end, 65 atomic_read(&state->refs));
66 state->state, state->tree, atomic_read(&state->refs));
67 list_del(&state->leak_list); 66 list_del(&state->leak_list);
68 kmem_cache_free(extent_state_cache, state); 67 kmem_cache_free(extent_state_cache, state);
69 } 68 }
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
71 while (!list_empty(&buffers)) { 70 while (!list_empty(&buffers)) {
72 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
73 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
74 "refs %d\n", (unsigned long long)eb->start, 73 "refs %d\n",
75 eb->len, atomic_read(&eb->refs)); 74 eb->start, eb->len, atomic_read(&eb->refs));
76 list_del(&eb->leak_list); 75 list_del(&eb->leak_list);
77 kmem_cache_free(extent_buffer_cache, eb); 76 kmem_cache_free(extent_buffer_cache, eb);
78 } 77 }
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 87 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
89 printk_ratelimited(KERN_DEBUG 88 printk_ratelimited(KERN_DEBUG
90 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", 89 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
91 caller, 90 caller, btrfs_ino(inode), isize, start, end);
92 (unsigned long long)btrfs_ino(inode),
93 (unsigned long long)isize,
94 (unsigned long long)start,
95 (unsigned long long)end);
96 } 91 }
97} 92}
98#else 93#else
@@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree,
388 383
389 if (end < start) 384 if (end < start)
390 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", 385 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
391 (unsigned long long)end, 386 end, start);
392 (unsigned long long)start);
393 state->start = start; 387 state->start = start;
394 state->end = end; 388 state->end = end;
395 389
@@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree,
400 struct extent_state *found; 394 struct extent_state *found;
401 found = rb_entry(node, struct extent_state, rb_node); 395 found = rb_entry(node, struct extent_state, rb_node);
402 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 396 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
403 "%llu %llu\n", (unsigned long long)found->start, 397 "%llu %llu\n",
404 (unsigned long long)found->end, 398 found->start, found->end, start, end);
405 (unsigned long long)start, (unsigned long long)end);
406 return -EEXIST; 399 return -EEXIST;
407 } 400 }
408 state->tree = tree; 401 state->tree = tree;
@@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state,
762 } 755 }
763} 756}
764 757
765static void uncache_state(struct extent_state **cached_ptr)
766{
767 if (cached_ptr && (*cached_ptr)) {
768 struct extent_state *state = *cached_ptr;
769 *cached_ptr = NULL;
770 free_extent_state(state);
771 }
772}
773
774/* 758/*
775 * set some bits on a range in the tree. This may require allocations or 759 * set some bits on a range in the tree. This may require allocations or
776 * sleeping, so the gfp mask is used to indicate what is allowed. 760 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1687,31 +1671,21 @@ out_failed:
1687 return found; 1671 return found;
1688} 1672}
1689 1673
1690int extent_clear_unlock_delalloc(struct inode *inode, 1674int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1691 struct extent_io_tree *tree, 1675 struct page *locked_page,
1692 u64 start, u64 end, struct page *locked_page, 1676 unsigned long clear_bits,
1693 unsigned long op) 1677 unsigned long page_ops)
1694{ 1678{
1679 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1695 int ret; 1680 int ret;
1696 struct page *pages[16]; 1681 struct page *pages[16];
1697 unsigned long index = start >> PAGE_CACHE_SHIFT; 1682 unsigned long index = start >> PAGE_CACHE_SHIFT;
1698 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1683 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1699 unsigned long nr_pages = end_index - index + 1; 1684 unsigned long nr_pages = end_index - index + 1;
1700 int i; 1685 int i;
1701 unsigned long clear_bits = 0;
1702
1703 if (op & EXTENT_CLEAR_UNLOCK)
1704 clear_bits |= EXTENT_LOCKED;
1705 if (op & EXTENT_CLEAR_DIRTY)
1706 clear_bits |= EXTENT_DIRTY;
1707
1708 if (op & EXTENT_CLEAR_DELALLOC)
1709 clear_bits |= EXTENT_DELALLOC;
1710 1686
1711 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1687 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1712 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 1688 if (page_ops == 0)
1713 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1714 EXTENT_SET_PRIVATE2)))
1715 return 0; 1689 return 0;
1716 1690
1717 while (nr_pages > 0) { 1691 while (nr_pages > 0) {
@@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1720 nr_pages, ARRAY_SIZE(pages)), pages); 1694 nr_pages, ARRAY_SIZE(pages)), pages);
1721 for (i = 0; i < ret; i++) { 1695 for (i = 0; i < ret; i++) {
1722 1696
1723 if (op & EXTENT_SET_PRIVATE2) 1697 if (page_ops & PAGE_SET_PRIVATE2)
1724 SetPagePrivate2(pages[i]); 1698 SetPagePrivate2(pages[i]);
1725 1699
1726 if (pages[i] == locked_page) { 1700 if (pages[i] == locked_page) {
1727 page_cache_release(pages[i]); 1701 page_cache_release(pages[i]);
1728 continue; 1702 continue;
1729 } 1703 }
1730 if (op & EXTENT_CLEAR_DIRTY) 1704 if (page_ops & PAGE_CLEAR_DIRTY)
1731 clear_page_dirty_for_io(pages[i]); 1705 clear_page_dirty_for_io(pages[i]);
1732 if (op & EXTENT_SET_WRITEBACK) 1706 if (page_ops & PAGE_SET_WRITEBACK)
1733 set_page_writeback(pages[i]); 1707 set_page_writeback(pages[i]);
1734 if (op & EXTENT_END_WRITEBACK) 1708 if (page_ops & PAGE_END_WRITEBACK)
1735 end_page_writeback(pages[i]); 1709 end_page_writeback(pages[i]);
1736 if (op & EXTENT_CLEAR_UNLOCK_PAGE) 1710 if (page_ops & PAGE_UNLOCK)
1737 unlock_page(pages[i]); 1711 unlock_page(pages[i]);
1738 page_cache_release(pages[i]); 1712 page_cache_release(pages[i]);
1739 } 1713 }
@@ -1810,7 +1784,7 @@ out:
1810 * set the private field for a given byte offset in the tree. If there isn't 1784 * set the private field for a given byte offset in the tree. If there isn't
1811 * an extent_state there already, this does nothing. 1785 * an extent_state there already, this does nothing.
1812 */ 1786 */
1813int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1787static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1814{ 1788{
1815 struct rb_node *node; 1789 struct rb_node *node;
1816 struct extent_state *state; 1790 struct extent_state *state;
@@ -1837,64 +1811,6 @@ out:
1837 return ret; 1811 return ret;
1838} 1812}
1839 1813
1840void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1841 int count)
1842{
1843 struct rb_node *node;
1844 struct extent_state *state;
1845
1846 spin_lock(&tree->lock);
1847 /*
1848 * this search will find all the extents that end after
1849 * our range starts.
1850 */
1851 node = tree_search(tree, start);
1852 BUG_ON(!node);
1853
1854 state = rb_entry(node, struct extent_state, rb_node);
1855 BUG_ON(state->start != start);
1856
1857 while (count) {
1858 state->private = *csums++;
1859 count--;
1860 state = next_state(state);
1861 }
1862 spin_unlock(&tree->lock);
1863}
1864
1865static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1866{
1867 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1868
1869 return page_offset(bvec->bv_page) + bvec->bv_offset;
1870}
1871
1872void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1873 u32 csums[], int count)
1874{
1875 struct rb_node *node;
1876 struct extent_state *state = NULL;
1877 u64 start;
1878
1879 spin_lock(&tree->lock);
1880 do {
1881 start = __btrfs_get_bio_offset(bio, bio_index);
1882 if (state == NULL || state->start != start) {
1883 node = tree_search(tree, start);
1884 BUG_ON(!node);
1885
1886 state = rb_entry(node, struct extent_state, rb_node);
1887 BUG_ON(state->start != start);
1888 }
1889 state->private = *csums++;
1890 count--;
1891 bio_index++;
1892
1893 state = next_state(state);
1894 } while (count);
1895 spin_unlock(&tree->lock);
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1814int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{ 1815{
1900 struct rb_node *node; 1816 struct rb_node *node;
@@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
2173 EXTENT_LOCKED); 2089 EXTENT_LOCKED);
2174 spin_unlock(&BTRFS_I(inode)->io_tree.lock); 2090 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2175 2091
2176 if (state && state->start == failrec->start) { 2092 if (state && state->start <= failrec->start &&
2093 state->end >= failrec->start + failrec->len - 1) {
2177 fs_info = BTRFS_I(inode)->root->fs_info; 2094 fs_info = BTRFS_I(inode)->root->fs_info;
2178 num_copies = btrfs_num_copies(fs_info, failrec->logical, 2095 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2179 failrec->len); 2096 failrec->len);
@@ -2201,9 +2118,9 @@ out:
2201 * needed 2118 * needed
2202 */ 2119 */
2203 2120
2204static int bio_readpage_error(struct bio *failed_bio, struct page *page, 2121static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2205 u64 start, u64 end, int failed_mirror, 2122 struct page *page, u64 start, u64 end,
2206 struct extent_state *state) 2123 int failed_mirror)
2207{ 2124{
2208 struct io_failure_record *failrec = NULL; 2125 struct io_failure_record *failrec = NULL;
2209 u64 private; 2126 u64 private;
@@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2213 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2130 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2214 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2131 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2215 struct bio *bio; 2132 struct bio *bio;
2133 struct btrfs_io_bio *btrfs_failed_bio;
2134 struct btrfs_io_bio *btrfs_bio;
2216 int num_copies; 2135 int num_copies;
2217 int ret; 2136 int ret;
2218 int read_mode; 2137 int read_mode;
@@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2296 * all the retry and error correction code that follows. no 2215 * all the retry and error correction code that follows. no
2297 * matter what the error is, it is very likely to persist. 2216 * matter what the error is, it is very likely to persist.
2298 */ 2217 */
2299 pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " 2218 pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2300 "state=%p, num_copies=%d, next_mirror %d, " 2219 num_copies, failrec->this_mirror, failed_mirror);
2301 "failed_mirror %d\n", state, num_copies,
2302 failrec->this_mirror, failed_mirror);
2303 free_io_failure(inode, failrec, 0); 2220 free_io_failure(inode, failrec, 0);
2304 return -EIO; 2221 return -EIO;
2305 } 2222 }
2306 2223
2307 if (!state) {
2308 spin_lock(&tree->lock);
2309 state = find_first_extent_bit_state(tree, failrec->start,
2310 EXTENT_LOCKED);
2311 if (state && state->start != failrec->start)
2312 state = NULL;
2313 spin_unlock(&tree->lock);
2314 }
2315
2316 /* 2224 /*
2317 * there are two premises: 2225 * there are two premises:
2318 * a) deliver good data to the caller 2226 * a) deliver good data to the caller
@@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2349 read_mode = READ_SYNC; 2257 read_mode = READ_SYNC;
2350 } 2258 }
2351 2259
2352 if (!state || failrec->this_mirror > num_copies) { 2260 if (failrec->this_mirror > num_copies) {
2353 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " 2261 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2354 "next_mirror %d, failed_mirror %d\n", state,
2355 num_copies, failrec->this_mirror, failed_mirror); 2262 num_copies, failrec->this_mirror, failed_mirror);
2356 free_io_failure(inode, failrec, 0); 2263 free_io_failure(inode, failrec, 0);
2357 return -EIO; 2264 return -EIO;
@@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2362 free_io_failure(inode, failrec, 0); 2269 free_io_failure(inode, failrec, 0);
2363 return -EIO; 2270 return -EIO;
2364 } 2271 }
2365 bio->bi_private = state;
2366 bio->bi_end_io = failed_bio->bi_end_io; 2272 bio->bi_end_io = failed_bio->bi_end_io;
2367 bio->bi_sector = failrec->logical >> 9; 2273 bio->bi_sector = failrec->logical >> 9;
2368 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2274 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2369 bio->bi_size = 0; 2275 bio->bi_size = 0;
2370 2276
2277 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2278 if (btrfs_failed_bio->csum) {
2279 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2280 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2281
2282 btrfs_bio = btrfs_io_bio(bio);
2283 btrfs_bio->csum = btrfs_bio->csum_inline;
2284 phy_offset >>= inode->i_sb->s_blocksize_bits;
2285 phy_offset *= csum_size;
2286 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2287 csum_size);
2288 }
2289
2371 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2290 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2372 2291
2373 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2292 pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2450 bio_put(bio); 2369 bio_put(bio);
2451} 2370}
2452 2371
2372static void
2373endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2374 int uptodate)
2375{
2376 struct extent_state *cached = NULL;
2377 u64 end = start + len - 1;
2378
2379 if (uptodate && tree->track_uptodate)
2380 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2381 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2382}
2383
2453/* 2384/*
2454 * after a readpage IO is done, we need to: 2385 * after a readpage IO is done, we need to:
2455 * clear the uptodate bits on error 2386 * clear the uptodate bits on error
@@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2466 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2397 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2467 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 2398 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2468 struct bio_vec *bvec = bio->bi_io_vec; 2399 struct bio_vec *bvec = bio->bi_io_vec;
2400 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2469 struct extent_io_tree *tree; 2401 struct extent_io_tree *tree;
2402 u64 offset = 0;
2470 u64 start; 2403 u64 start;
2471 u64 end; 2404 u64 end;
2405 u64 len;
2406 u64 extent_start = 0;
2407 u64 extent_len = 0;
2472 int mirror; 2408 int mirror;
2473 int ret; 2409 int ret;
2474 2410
@@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2477 2413
2478 do { 2414 do {
2479 struct page *page = bvec->bv_page; 2415 struct page *page = bvec->bv_page;
2480 struct extent_state *cached = NULL;
2481 struct extent_state *state;
2482 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2483 struct inode *inode = page->mapping->host; 2416 struct inode *inode = page->mapping->host;
2484 2417
2485 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2418 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2500 2433
2501 start = page_offset(page); 2434 start = page_offset(page);
2502 end = start + bvec->bv_offset + bvec->bv_len - 1; 2435 end = start + bvec->bv_offset + bvec->bv_len - 1;
2436 len = bvec->bv_len;
2503 2437
2504 if (++bvec <= bvec_end) 2438 if (++bvec <= bvec_end)
2505 prefetchw(&bvec->bv_page->flags); 2439 prefetchw(&bvec->bv_page->flags);
2506 2440
2507 spin_lock(&tree->lock);
2508 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
2509 if (state && state->start == start) {
2510 /*
2511 * take a reference on the state, unlock will drop
2512 * the ref
2513 */
2514 cache_state(state, &cached);
2515 }
2516 spin_unlock(&tree->lock);
2517
2518 mirror = io_bio->mirror_num; 2441 mirror = io_bio->mirror_num;
2519 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2442 if (likely(uptodate && tree->ops &&
2520 ret = tree->ops->readpage_end_io_hook(page, start, end, 2443 tree->ops->readpage_end_io_hook)) {
2521 state, mirror); 2444 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2445 page, start, end,
2446 mirror);
2522 if (ret) 2447 if (ret)
2523 uptodate = 0; 2448 uptodate = 0;
2524 else 2449 else
2525 clean_io_failure(start, page); 2450 clean_io_failure(start, page);
2526 } 2451 }
2527 2452
2528 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2453 if (likely(uptodate))
2454 goto readpage_ok;
2455
2456 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2529 ret = tree->ops->readpage_io_failed_hook(page, mirror); 2457 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2530 if (!ret && !err && 2458 if (!ret && !err &&
2531 test_bit(BIO_UPTODATE, &bio->bi_flags)) 2459 test_bit(BIO_UPTODATE, &bio->bi_flags))
2532 uptodate = 1; 2460 uptodate = 1;
2533 } else if (!uptodate) { 2461 } else {
2534 /* 2462 /*
2535 * The generic bio_readpage_error handles errors the 2463 * The generic bio_readpage_error handles errors the
2536 * following way: If possible, new read requests are 2464 * following way: If possible, new read requests are
@@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2541 * can't handle the error it will return -EIO and we 2469 * can't handle the error it will return -EIO and we
2542 * remain responsible for that page. 2470 * remain responsible for that page.
2543 */ 2471 */
2544 ret = bio_readpage_error(bio, page, start, end, mirror, NULL); 2472 ret = bio_readpage_error(bio, offset, page, start, end,
2473 mirror);
2545 if (ret == 0) { 2474 if (ret == 0) {
2546 uptodate = 2475 uptodate =
2547 test_bit(BIO_UPTODATE, &bio->bi_flags); 2476 test_bit(BIO_UPTODATE, &bio->bi_flags);
2548 if (err) 2477 if (err)
2549 uptodate = 0; 2478 uptodate = 0;
2550 uncache_state(&cached);
2551 continue; 2479 continue;
2552 } 2480 }
2553 } 2481 }
2554 2482readpage_ok:
2555 if (uptodate && tree->track_uptodate) { 2483 if (likely(uptodate)) {
2556 set_extent_uptodate(tree, start, end, &cached,
2557 GFP_ATOMIC);
2558 }
2559 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2560
2561 if (uptodate) {
2562 loff_t i_size = i_size_read(inode); 2484 loff_t i_size = i_size_read(inode);
2563 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 2485 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2564 unsigned offset; 2486 unsigned offset;
@@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2573 SetPageError(page); 2495 SetPageError(page);
2574 } 2496 }
2575 unlock_page(page); 2497 unlock_page(page);
2498 offset += len;
2499
2500 if (unlikely(!uptodate)) {
2501 if (extent_len) {
2502 endio_readpage_release_extent(tree,
2503 extent_start,
2504 extent_len, 1);
2505 extent_start = 0;
2506 extent_len = 0;
2507 }
2508 endio_readpage_release_extent(tree, start,
2509 end - start + 1, 0);
2510 } else if (!extent_len) {
2511 extent_start = start;
2512 extent_len = end + 1 - start;
2513 } else if (extent_start + extent_len == start) {
2514 extent_len += end + 1 - start;
2515 } else {
2516 endio_readpage_release_extent(tree, extent_start,
2517 extent_len, uptodate);
2518 extent_start = start;
2519 extent_len = end + 1 - start;
2520 }
2576 } while (bvec <= bvec_end); 2521 } while (bvec <= bvec_end);
2577 2522
2523 if (extent_len)
2524 endio_readpage_release_extent(tree, extent_start, extent_len,
2525 uptodate);
2526 if (io_bio->end_io)
2527 io_bio->end_io(io_bio, err);
2578 bio_put(bio); 2528 bio_put(bio);
2579} 2529}
2580 2530
@@ -2586,6 +2536,7 @@ struct bio *
2586btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2536btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2587 gfp_t gfp_flags) 2537 gfp_t gfp_flags)
2588{ 2538{
2539 struct btrfs_io_bio *btrfs_bio;
2589 struct bio *bio; 2540 struct bio *bio;
2590 2541
2591 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2542 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2601 bio->bi_size = 0; 2552 bio->bi_size = 0;
2602 bio->bi_bdev = bdev; 2553 bio->bi_bdev = bdev;
2603 bio->bi_sector = first_sector; 2554 bio->bi_sector = first_sector;
2555 btrfs_bio = btrfs_io_bio(bio);
2556 btrfs_bio->csum = NULL;
2557 btrfs_bio->csum_allocated = NULL;
2558 btrfs_bio->end_io = NULL;
2604 } 2559 }
2605 return bio; 2560 return bio;
2606} 2561}
@@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2614/* this also allocates from the btrfs_bioset */ 2569/* this also allocates from the btrfs_bioset */
2615struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2570struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2616{ 2571{
2617 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2572 struct btrfs_io_bio *btrfs_bio;
2573 struct bio *bio;
2574
2575 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2576 if (bio) {
2577 btrfs_bio = btrfs_io_bio(bio);
2578 btrfs_bio->csum = NULL;
2579 btrfs_bio->csum_allocated = NULL;
2580 btrfs_bio->end_io = NULL;
2581 }
2582 return bio;
2618} 2583}
2619 2584
2620 2585
@@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page)
2738 } 2703 }
2739} 2704}
2740 2705
2706static struct extent_map *
2707__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2708 u64 start, u64 len, get_extent_t *get_extent,
2709 struct extent_map **em_cached)
2710{
2711 struct extent_map *em;
2712
2713 if (em_cached && *em_cached) {
2714 em = *em_cached;
2715 if (em->in_tree && start >= em->start &&
2716 start < extent_map_end(em)) {
2717 atomic_inc(&em->refs);
2718 return em;
2719 }
2720
2721 free_extent_map(em);
2722 *em_cached = NULL;
2723 }
2724
2725 em = get_extent(inode, page, pg_offset, start, len, 0);
2726 if (em_cached && !IS_ERR_OR_NULL(em)) {
2727 BUG_ON(*em_cached);
2728 atomic_inc(&em->refs);
2729 *em_cached = em;
2730 }
2731 return em;
2732}
2741/* 2733/*
2742 * basic readpage implementation. Locked extent state structs are inserted 2734 * basic readpage implementation. Locked extent state structs are inserted
2743 * into the tree that are removed when the IO is done (by the end_io 2735 * into the tree that are removed when the IO is done (by the end_io
2744 * handlers) 2736 * handlers)
2745 * XXX JDM: This needs looking at to ensure proper page locking 2737 * XXX JDM: This needs looking at to ensure proper page locking
2746 */ 2738 */
2747static int __extent_read_full_page(struct extent_io_tree *tree, 2739static int __do_readpage(struct extent_io_tree *tree,
2748 struct page *page, 2740 struct page *page,
2749 get_extent_t *get_extent, 2741 get_extent_t *get_extent,
2750 struct bio **bio, int mirror_num, 2742 struct extent_map **em_cached,
2751 unsigned long *bio_flags, int rw) 2743 struct bio **bio, int mirror_num,
2744 unsigned long *bio_flags, int rw)
2752{ 2745{
2753 struct inode *inode = page->mapping->host; 2746 struct inode *inode = page->mapping->host;
2754 u64 start = page_offset(page); 2747 u64 start = page_offset(page);
@@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2762 sector_t sector; 2755 sector_t sector;
2763 struct extent_map *em; 2756 struct extent_map *em;
2764 struct block_device *bdev; 2757 struct block_device *bdev;
2765 struct btrfs_ordered_extent *ordered;
2766 int ret; 2758 int ret;
2767 int nr = 0; 2759 int nr = 0;
2760 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2768 size_t pg_offset = 0; 2761 size_t pg_offset = 0;
2769 size_t iosize; 2762 size_t iosize;
2770 size_t disk_io_size; 2763 size_t disk_io_size;
2771 size_t blocksize = inode->i_sb->s_blocksize; 2764 size_t blocksize = inode->i_sb->s_blocksize;
2772 unsigned long this_bio_flag = 0; 2765 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2773 2766
2774 set_page_extent_mapped(page); 2767 set_page_extent_mapped(page);
2775 2768
2769 end = page_end;
2776 if (!PageUptodate(page)) { 2770 if (!PageUptodate(page)) {
2777 if (cleancache_get_page(page) == 0) { 2771 if (cleancache_get_page(page) == 0) {
2778 BUG_ON(blocksize != PAGE_SIZE); 2772 BUG_ON(blocksize != PAGE_SIZE);
2773 unlock_extent(tree, start, end);
2779 goto out; 2774 goto out;
2780 } 2775 }
2781 } 2776 }
2782 2777
2783 end = page_end;
2784 while (1) {
2785 lock_extent(tree, start, end);
2786 ordered = btrfs_lookup_ordered_extent(inode, start);
2787 if (!ordered)
2788 break;
2789 unlock_extent(tree, start, end);
2790 btrfs_start_ordered_extent(inode, ordered, 1);
2791 btrfs_put_ordered_extent(ordered);
2792 }
2793
2794 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 2778 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2795 char *userpage; 2779 char *userpage;
2796 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 2780 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2817 kunmap_atomic(userpage); 2801 kunmap_atomic(userpage);
2818 set_extent_uptodate(tree, cur, cur + iosize - 1, 2802 set_extent_uptodate(tree, cur, cur + iosize - 1,
2819 &cached, GFP_NOFS); 2803 &cached, GFP_NOFS);
2820 unlock_extent_cached(tree, cur, cur + iosize - 1, 2804 if (!parent_locked)
2821 &cached, GFP_NOFS); 2805 unlock_extent_cached(tree, cur,
2806 cur + iosize - 1,
2807 &cached, GFP_NOFS);
2822 break; 2808 break;
2823 } 2809 }
2824 em = get_extent(inode, page, pg_offset, cur, 2810 em = __get_extent_map(inode, page, pg_offset, cur,
2825 end - cur + 1, 0); 2811 end - cur + 1, get_extent, em_cached);
2826 if (IS_ERR_OR_NULL(em)) { 2812 if (IS_ERR_OR_NULL(em)) {
2827 SetPageError(page); 2813 SetPageError(page);
2828 unlock_extent(tree, cur, end); 2814 if (!parent_locked)
2815 unlock_extent(tree, cur, end);
2829 break; 2816 break;
2830 } 2817 }
2831 extent_offset = cur - em->start; 2818 extent_offset = cur - em->start;
@@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2833 BUG_ON(end < cur); 2820 BUG_ON(end < cur);
2834 2821
2835 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2822 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2836 this_bio_flag = EXTENT_BIO_COMPRESSED; 2823 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2837 extent_set_compress_type(&this_bio_flag, 2824 extent_set_compress_type(&this_bio_flag,
2838 em->compress_type); 2825 em->compress_type);
2839 } 2826 }
@@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2877 if (test_range_bit(tree, cur, cur_end, 2864 if (test_range_bit(tree, cur, cur_end,
2878 EXTENT_UPTODATE, 1, NULL)) { 2865 EXTENT_UPTODATE, 1, NULL)) {
2879 check_page_uptodate(tree, page); 2866 check_page_uptodate(tree, page);
2880 unlock_extent(tree, cur, cur + iosize - 1); 2867 if (!parent_locked)
2868 unlock_extent(tree, cur, cur + iosize - 1);
2881 cur = cur + iosize; 2869 cur = cur + iosize;
2882 pg_offset += iosize; 2870 pg_offset += iosize;
2883 continue; 2871 continue;
@@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2887 */ 2875 */
2888 if (block_start == EXTENT_MAP_INLINE) { 2876 if (block_start == EXTENT_MAP_INLINE) {
2889 SetPageError(page); 2877 SetPageError(page);
2890 unlock_extent(tree, cur, cur + iosize - 1); 2878 if (!parent_locked)
2879 unlock_extent(tree, cur, cur + iosize - 1);
2891 cur = cur + iosize; 2880 cur = cur + iosize;
2892 pg_offset += iosize; 2881 pg_offset += iosize;
2893 continue; 2882 continue;
@@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2905 *bio_flags = this_bio_flag; 2894 *bio_flags = this_bio_flag;
2906 } else { 2895 } else {
2907 SetPageError(page); 2896 SetPageError(page);
2908 unlock_extent(tree, cur, cur + iosize - 1); 2897 if (!parent_locked)
2898 unlock_extent(tree, cur, cur + iosize - 1);
2909 } 2899 }
2910 cur = cur + iosize; 2900 cur = cur + iosize;
2911 pg_offset += iosize; 2901 pg_offset += iosize;
@@ -2919,6 +2909,104 @@ out:
2919 return 0; 2909 return 0;
2920} 2910}
2921 2911
2912static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
2913 struct page *pages[], int nr_pages,
2914 u64 start, u64 end,
2915 get_extent_t *get_extent,
2916 struct extent_map **em_cached,
2917 struct bio **bio, int mirror_num,
2918 unsigned long *bio_flags, int rw)
2919{
2920 struct inode *inode;
2921 struct btrfs_ordered_extent *ordered;
2922 int index;
2923
2924 inode = pages[0]->mapping->host;
2925 while (1) {
2926 lock_extent(tree, start, end);
2927 ordered = btrfs_lookup_ordered_range(inode, start,
2928 end - start + 1);
2929 if (!ordered)
2930 break;
2931 unlock_extent(tree, start, end);
2932 btrfs_start_ordered_extent(inode, ordered, 1);
2933 btrfs_put_ordered_extent(ordered);
2934 }
2935
2936 for (index = 0; index < nr_pages; index++) {
2937 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
2938 mirror_num, bio_flags, rw);
2939 page_cache_release(pages[index]);
2940 }
2941}
2942
2943static void __extent_readpages(struct extent_io_tree *tree,
2944 struct page *pages[],
2945 int nr_pages, get_extent_t *get_extent,
2946 struct extent_map **em_cached,
2947 struct bio **bio, int mirror_num,
2948 unsigned long *bio_flags, int rw)
2949{
2950 u64 start = 0;
2951 u64 end = 0;
2952 u64 page_start;
2953 int index;
2954 int first_index = 0;
2955
2956 for (index = 0; index < nr_pages; index++) {
2957 page_start = page_offset(pages[index]);
2958 if (!end) {
2959 start = page_start;
2960 end = start + PAGE_CACHE_SIZE - 1;
2961 first_index = index;
2962 } else if (end + 1 == page_start) {
2963 end += PAGE_CACHE_SIZE;
2964 } else {
2965 __do_contiguous_readpages(tree, &pages[first_index],
2966 index - first_index, start,
2967 end, get_extent, em_cached,
2968 bio, mirror_num, bio_flags,
2969 rw);
2970 start = page_start;
2971 end = start + PAGE_CACHE_SIZE - 1;
2972 first_index = index;
2973 }
2974 }
2975
2976 if (end)
2977 __do_contiguous_readpages(tree, &pages[first_index],
2978 index - first_index, start,
2979 end, get_extent, em_cached, bio,
2980 mirror_num, bio_flags, rw);
2981}
2982
2983static int __extent_read_full_page(struct extent_io_tree *tree,
2984 struct page *page,
2985 get_extent_t *get_extent,
2986 struct bio **bio, int mirror_num,
2987 unsigned long *bio_flags, int rw)
2988{
2989 struct inode *inode = page->mapping->host;
2990 struct btrfs_ordered_extent *ordered;
2991 u64 start = page_offset(page);
2992 u64 end = start + PAGE_CACHE_SIZE - 1;
2993 int ret;
2994
2995 while (1) {
2996 lock_extent(tree, start, end);
2997 ordered = btrfs_lookup_ordered_extent(inode, start);
2998 if (!ordered)
2999 break;
3000 unlock_extent(tree, start, end);
3001 btrfs_start_ordered_extent(inode, ordered, 1);
3002 btrfs_put_ordered_extent(ordered);
3003 }
3004
3005 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3006 bio_flags, rw);
3007 return ret;
3008}
3009
2922int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 3010int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2923 get_extent_t *get_extent, int mirror_num) 3011 get_extent_t *get_extent, int mirror_num)
2924{ 3012{
@@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2933 return ret; 3021 return ret;
2934} 3022}
2935 3023
3024int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3025 get_extent_t *get_extent, int mirror_num)
3026{
3027 struct bio *bio = NULL;
3028 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3029 int ret;
3030
3031 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3032 &bio_flags, READ);
3033 if (bio)
3034 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3035 return ret;
3036}
3037
2936static noinline void update_nr_written(struct page *page, 3038static noinline void update_nr_written(struct page *page,
2937 struct writeback_control *wbc, 3039 struct writeback_control *wbc,
2938 unsigned long nr_written) 3040 unsigned long nr_written)
@@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3189 if (!PageWriteback(page)) { 3291 if (!PageWriteback(page)) {
3190 printk(KERN_ERR "btrfs warning page %lu not " 3292 printk(KERN_ERR "btrfs warning page %lu not "
3191 "writeback, cur %llu end %llu\n", 3293 "writeback, cur %llu end %llu\n",
3192 page->index, (unsigned long long)cur, 3294 page->index, cur, end);
3193 (unsigned long long)end);
3194 } 3295 }
3195 3296
3196 ret = submit_extent_page(write_flags, tree, page, 3297 ret = submit_extent_page(write_flags, tree, page,
@@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree,
3769 unsigned long bio_flags = 0; 3870 unsigned long bio_flags = 0;
3770 struct page *pagepool[16]; 3871 struct page *pagepool[16];
3771 struct page *page; 3872 struct page *page;
3772 int i = 0; 3873 struct extent_map *em_cached = NULL;
3773 int nr = 0; 3874 int nr = 0;
3774 3875
3775 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 3876 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree,
3786 pagepool[nr++] = page; 3887 pagepool[nr++] = page;
3787 if (nr < ARRAY_SIZE(pagepool)) 3888 if (nr < ARRAY_SIZE(pagepool))
3788 continue; 3889 continue;
3789 for (i = 0; i < nr; i++) { 3890 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3790 __extent_read_full_page(tree, pagepool[i], get_extent, 3891 &bio, 0, &bio_flags, READ);
3791 &bio, 0, &bio_flags, READ);
3792 page_cache_release(pagepool[i]);
3793 }
3794 nr = 0; 3892 nr = 0;
3795 } 3893 }
3796 for (i = 0; i < nr; i++) { 3894 if (nr)
3797 __extent_read_full_page(tree, pagepool[i], get_extent, 3895 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3798 &bio, 0, &bio_flags, READ); 3896 &bio, 0, &bio_flags, READ);
3799 page_cache_release(pagepool[i]); 3897
3800 } 3898 if (em_cached)
3899 free_extent_map(em_cached);
3801 3900
3802 BUG_ON(!list_empty(pages)); 3901 BUG_ON(!list_empty(pages));
3803 if (bio) 3902 if (bio)
@@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
4136 kmem_cache_free(extent_buffer_cache, eb); 4235 kmem_cache_free(extent_buffer_cache, eb);
4137} 4236}
4138 4237
4238static int extent_buffer_under_io(struct extent_buffer *eb)
4239{
4240 return (atomic_read(&eb->io_pages) ||
4241 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4242 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4243}
4244
4245/*
4246 * Helper for releasing extent buffer page.
4247 */
4248static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4249 unsigned long start_idx)
4250{
4251 unsigned long index;
4252 unsigned long num_pages;
4253 struct page *page;
4254 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4255
4256 BUG_ON(extent_buffer_under_io(eb));
4257
4258 num_pages = num_extent_pages(eb->start, eb->len);
4259 index = start_idx + num_pages;
4260 if (start_idx >= index)
4261 return;
4262
4263 do {
4264 index--;
4265 page = extent_buffer_page(eb, index);
4266 if (page && mapped) {
4267 spin_lock(&page->mapping->private_lock);
4268 /*
4269 * We do this since we'll remove the pages after we've
4270 * removed the eb from the radix tree, so we could race
4271 * and have this page now attached to the new eb. So
4272 * only clear page_private if it's still connected to
4273 * this eb.
4274 */
4275 if (PagePrivate(page) &&
4276 page->private == (unsigned long)eb) {
4277 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4278 BUG_ON(PageDirty(page));
4279 BUG_ON(PageWriteback(page));
4280 /*
4281 * We need to make sure we haven't be attached
4282 * to a new eb.
4283 */
4284 ClearPagePrivate(page);
4285 set_page_private(page, 0);
4286 /* One for the page private */
4287 page_cache_release(page);
4288 }
4289 spin_unlock(&page->mapping->private_lock);
4290
4291 }
4292 if (page) {
4293 /* One for when we alloced the page */
4294 page_cache_release(page);
4295 }
4296 } while (index != start_idx);
4297}
4298
4299/*
4300 * Helper for releasing the extent buffer.
4301 */
4302static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4303{
4304 btrfs_release_extent_buffer_page(eb, 0);
4305 __free_extent_buffer(eb);
4306}
4307
4139static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 4308static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4140 u64 start, 4309 u64 start,
4141 unsigned long len, 4310 unsigned long len,
@@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4184 struct extent_buffer *new; 4353 struct extent_buffer *new;
4185 unsigned long num_pages = num_extent_pages(src->start, src->len); 4354 unsigned long num_pages = num_extent_pages(src->start, src->len);
4186 4355
4187 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); 4356 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
4188 if (new == NULL) 4357 if (new == NULL)
4189 return NULL; 4358 return NULL;
4190 4359
4191 for (i = 0; i < num_pages; i++) { 4360 for (i = 0; i < num_pages; i++) {
4192 p = alloc_page(GFP_ATOMIC); 4361 p = alloc_page(GFP_NOFS);
4193 BUG_ON(!p); 4362 if (!p) {
4363 btrfs_release_extent_buffer(new);
4364 return NULL;
4365 }
4194 attach_extent_buffer_page(new, p); 4366 attach_extent_buffer_page(new, p);
4195 WARN_ON(PageDirty(p)); 4367 WARN_ON(PageDirty(p));
4196 SetPageUptodate(p); 4368 SetPageUptodate(p);
@@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
4210 unsigned long num_pages = num_extent_pages(0, len); 4382 unsigned long num_pages = num_extent_pages(0, len);
4211 unsigned long i; 4383 unsigned long i;
4212 4384
4213 eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); 4385 eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
4214 if (!eb) 4386 if (!eb)
4215 return NULL; 4387 return NULL;
4216 4388
4217 for (i = 0; i < num_pages; i++) { 4389 for (i = 0; i < num_pages; i++) {
4218 eb->pages[i] = alloc_page(GFP_ATOMIC); 4390 eb->pages[i] = alloc_page(GFP_NOFS);
4219 if (!eb->pages[i]) 4391 if (!eb->pages[i])
4220 goto err; 4392 goto err;
4221 } 4393 }
@@ -4231,76 +4403,6 @@ err:
4231 return NULL; 4403 return NULL;
4232} 4404}
4233 4405
4234static int extent_buffer_under_io(struct extent_buffer *eb)
4235{
4236 return (atomic_read(&eb->io_pages) ||
4237 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4238 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4239}
4240
4241/*
4242 * Helper for releasing extent buffer page.
4243 */
4244static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4245 unsigned long start_idx)
4246{
4247 unsigned long index;
4248 unsigned long num_pages;
4249 struct page *page;
4250 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4251
4252 BUG_ON(extent_buffer_under_io(eb));
4253
4254 num_pages = num_extent_pages(eb->start, eb->len);
4255 index = start_idx + num_pages;
4256 if (start_idx >= index)
4257 return;
4258
4259 do {
4260 index--;
4261 page = extent_buffer_page(eb, index);
4262 if (page && mapped) {
4263 spin_lock(&page->mapping->private_lock);
4264 /*
4265 * We do this since we'll remove the pages after we've
4266 * removed the eb from the radix tree, so we could race
4267 * and have this page now attached to the new eb. So
4268 * only clear page_private if it's still connected to
4269 * this eb.
4270 */
4271 if (PagePrivate(page) &&
4272 page->private == (unsigned long)eb) {
4273 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4274 BUG_ON(PageDirty(page));
4275 BUG_ON(PageWriteback(page));
4276 /*
4277 * We need to make sure we haven't be attached
4278 * to a new eb.
4279 */
4280 ClearPagePrivate(page);
4281 set_page_private(page, 0);
4282 /* One for the page private */
4283 page_cache_release(page);
4284 }
4285 spin_unlock(&page->mapping->private_lock);
4286
4287 }
4288 if (page) {
4289 /* One for when we alloced the page */
4290 page_cache_release(page);
4291 }
4292 } while (index != start_idx);
4293}
4294
4295/*
4296 * Helper for releasing the extent buffer.
4297 */
4298static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4299{
4300 btrfs_release_extent_buffer_page(eb, 0);
4301 __free_extent_buffer(eb);
4302}
4303
4304static void check_buffer_tree_ref(struct extent_buffer *eb) 4406static void check_buffer_tree_ref(struct extent_buffer *eb)
4305{ 4407{
4306 int refs; 4408 int refs;
@@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
4771 WARN_ON(start > eb->len); 4873 WARN_ON(start > eb->len);
4772 WARN_ON(start + len > eb->start + eb->len); 4874 WARN_ON(start + len > eb->start + eb->len);
4773 4875
4774 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4876 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4775 4877
4776 while (len > 0) { 4878 while (len > 0) {
4777 page = extent_buffer_page(eb, i); 4879 page = extent_buffer_page(eb, i);
@@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
4813 4915
4814 if (start + min_len > eb->len) { 4916 if (start + min_len > eb->len) {
4815 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 4917 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
4816 "wanted %lu %lu\n", (unsigned long long)eb->start, 4918 "wanted %lu %lu\n",
4817 eb->len, start, min_len); 4919 eb->start, eb->len, start, min_len);
4818 return -EINVAL; 4920 return -EINVAL;
4819 } 4921 }
4820 4922
@@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
4841 WARN_ON(start > eb->len); 4943 WARN_ON(start > eb->len);
4842 WARN_ON(start + len > eb->start + eb->len); 4944 WARN_ON(start + len > eb->start + eb->len);
4843 4945
4844 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4946 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4845 4947
4846 while (len > 0) { 4948 while (len > 0) {
4847 page = extent_buffer_page(eb, i); 4949 page = extent_buffer_page(eb, i);
@@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
4875 WARN_ON(start > eb->len); 4977 WARN_ON(start > eb->len);
4876 WARN_ON(start + len > eb->start + eb->len); 4978 WARN_ON(start + len > eb->start + eb->len);
4877 4979
4878 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4980 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4879 4981
4880 while (len > 0) { 4982 while (len > 0) {
4881 page = extent_buffer_page(eb, i); 4983 page = extent_buffer_page(eb, i);
@@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
4905 WARN_ON(start > eb->len); 5007 WARN_ON(start > eb->len);
4906 WARN_ON(start + len > eb->start + eb->len); 5008 WARN_ON(start + len > eb->start + eb->len);
4907 5009
4908 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 5010 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4909 5011
4910 while (len > 0) { 5012 while (len > 0) {
4911 page = extent_buffer_page(eb, i); 5013 page = extent_buffer_page(eb, i);
@@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
4936 WARN_ON(src->len != dst_len); 5038 WARN_ON(src->len != dst_len);
4937 5039
4938 offset = (start_offset + dst_offset) & 5040 offset = (start_offset + dst_offset) &
4939 ((unsigned long)PAGE_CACHE_SIZE - 1); 5041 (PAGE_CACHE_SIZE - 1);
4940 5042
4941 while (len > 0) { 5043 while (len > 0) {
4942 page = extent_buffer_page(dst, i); 5044 page = extent_buffer_page(dst, i);
@@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5022 5124
5023 while (len > 0) { 5125 while (len > 0) {
5024 dst_off_in_page = (start_offset + dst_offset) & 5126 dst_off_in_page = (start_offset + dst_offset) &
5025 ((unsigned long)PAGE_CACHE_SIZE - 1); 5127 (PAGE_CACHE_SIZE - 1);
5026 src_off_in_page = (start_offset + src_offset) & 5128 src_off_in_page = (start_offset + src_offset) &
5027 ((unsigned long)PAGE_CACHE_SIZE - 1); 5129 (PAGE_CACHE_SIZE - 1);
5028 5130
5029 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 5131 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5030 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 5132 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5075 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 5177 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5076 5178
5077 dst_off_in_page = (start_offset + dst_end) & 5179 dst_off_in_page = (start_offset + dst_end) &
5078 ((unsigned long)PAGE_CACHE_SIZE - 1); 5180 (PAGE_CACHE_SIZE - 1);
5079 src_off_in_page = (start_offset + src_end) & 5181 src_off_in_page = (start_offset + src_end) &
5080 ((unsigned long)PAGE_CACHE_SIZE - 1); 5182 (PAGE_CACHE_SIZE - 1);
5081 5183
5082 cur = min_t(unsigned long, len, src_off_in_page + 1); 5184 cur = min_t(unsigned long, len, src_off_in_page + 1);
5083 cur = min(cur, dst_off_in_page + 1); 5185 cur = min(cur, dst_off_in_page + 1);