aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c664
1 files changed, 383 insertions, 281 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe443fece851..09582b81640c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
61 state = list_entry(states.next, struct extent_state, leak_list); 61 state = list_entry(states.next, struct extent_state, leak_list);
62 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 62 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
63 "state %lu in tree %p refs %d\n", 63 "state %lu in tree %p refs %d\n",
64 (unsigned long long)state->start, 64 state->start, state->end, state->state, state->tree,
65 (unsigned long long)state->end, 65 atomic_read(&state->refs));
66 state->state, state->tree, atomic_read(&state->refs));
67 list_del(&state->leak_list); 66 list_del(&state->leak_list);
68 kmem_cache_free(extent_state_cache, state); 67 kmem_cache_free(extent_state_cache, state);
69 } 68 }
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
71 while (!list_empty(&buffers)) { 70 while (!list_empty(&buffers)) {
72 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
73 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
74 "refs %d\n", (unsigned long long)eb->start, 73 "refs %d\n",
75 eb->len, atomic_read(&eb->refs)); 74 eb->start, eb->len, atomic_read(&eb->refs));
76 list_del(&eb->leak_list); 75 list_del(&eb->leak_list);
77 kmem_cache_free(extent_buffer_cache, eb); 76 kmem_cache_free(extent_buffer_cache, eb);
78 } 77 }
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 87 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
89 printk_ratelimited(KERN_DEBUG 88 printk_ratelimited(KERN_DEBUG
90 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", 89 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
91 caller, 90 caller, btrfs_ino(inode), isize, start, end);
92 (unsigned long long)btrfs_ino(inode),
93 (unsigned long long)isize,
94 (unsigned long long)start,
95 (unsigned long long)end);
96 } 91 }
97} 92}
98#else 93#else
@@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree,
388 383
389 if (end < start) 384 if (end < start)
390 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", 385 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
391 (unsigned long long)end, 386 end, start);
392 (unsigned long long)start);
393 state->start = start; 387 state->start = start;
394 state->end = end; 388 state->end = end;
395 389
@@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree,
400 struct extent_state *found; 394 struct extent_state *found;
401 found = rb_entry(node, struct extent_state, rb_node); 395 found = rb_entry(node, struct extent_state, rb_node);
402 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 396 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
403 "%llu %llu\n", (unsigned long long)found->start, 397 "%llu %llu\n",
404 (unsigned long long)found->end, 398 found->start, found->end, start, end);
405 (unsigned long long)start, (unsigned long long)end);
406 return -EEXIST; 399 return -EEXIST;
407 } 400 }
408 state->tree = tree; 401 state->tree = tree;
@@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state,
762 } 755 }
763} 756}
764 757
765static void uncache_state(struct extent_state **cached_ptr)
766{
767 if (cached_ptr && (*cached_ptr)) {
768 struct extent_state *state = *cached_ptr;
769 *cached_ptr = NULL;
770 free_extent_state(state);
771 }
772}
773
774/* 758/*
775 * set some bits on a range in the tree. This may require allocations or 759 * set some bits on a range in the tree. This may require allocations or
776 * sleeping, so the gfp mask is used to indicate what is allowed. 760 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1687,31 +1671,21 @@ out_failed:
1687 return found; 1671 return found;
1688} 1672}
1689 1673
1690int extent_clear_unlock_delalloc(struct inode *inode, 1674int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1691 struct extent_io_tree *tree, 1675 struct page *locked_page,
1692 u64 start, u64 end, struct page *locked_page, 1676 unsigned long clear_bits,
1693 unsigned long op) 1677 unsigned long page_ops)
1694{ 1678{
1679 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1695 int ret; 1680 int ret;
1696 struct page *pages[16]; 1681 struct page *pages[16];
1697 unsigned long index = start >> PAGE_CACHE_SHIFT; 1682 unsigned long index = start >> PAGE_CACHE_SHIFT;
1698 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1683 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1699 unsigned long nr_pages = end_index - index + 1; 1684 unsigned long nr_pages = end_index - index + 1;
1700 int i; 1685 int i;
1701 unsigned long clear_bits = 0;
1702
1703 if (op & EXTENT_CLEAR_UNLOCK)
1704 clear_bits |= EXTENT_LOCKED;
1705 if (op & EXTENT_CLEAR_DIRTY)
1706 clear_bits |= EXTENT_DIRTY;
1707
1708 if (op & EXTENT_CLEAR_DELALLOC)
1709 clear_bits |= EXTENT_DELALLOC;
1710 1686
1711 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1687 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1712 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 1688 if (page_ops == 0)
1713 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1714 EXTENT_SET_PRIVATE2)))
1715 return 0; 1689 return 0;
1716 1690
1717 while (nr_pages > 0) { 1691 while (nr_pages > 0) {
@@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1720 nr_pages, ARRAY_SIZE(pages)), pages); 1694 nr_pages, ARRAY_SIZE(pages)), pages);
1721 for (i = 0; i < ret; i++) { 1695 for (i = 0; i < ret; i++) {
1722 1696
1723 if (op & EXTENT_SET_PRIVATE2) 1697 if (page_ops & PAGE_SET_PRIVATE2)
1724 SetPagePrivate2(pages[i]); 1698 SetPagePrivate2(pages[i]);
1725 1699
1726 if (pages[i] == locked_page) { 1700 if (pages[i] == locked_page) {
1727 page_cache_release(pages[i]); 1701 page_cache_release(pages[i]);
1728 continue; 1702 continue;
1729 } 1703 }
1730 if (op & EXTENT_CLEAR_DIRTY) 1704 if (page_ops & PAGE_CLEAR_DIRTY)
1731 clear_page_dirty_for_io(pages[i]); 1705 clear_page_dirty_for_io(pages[i]);
1732 if (op & EXTENT_SET_WRITEBACK) 1706 if (page_ops & PAGE_SET_WRITEBACK)
1733 set_page_writeback(pages[i]); 1707 set_page_writeback(pages[i]);
1734 if (op & EXTENT_END_WRITEBACK) 1708 if (page_ops & PAGE_END_WRITEBACK)
1735 end_page_writeback(pages[i]); 1709 end_page_writeback(pages[i]);
1736 if (op & EXTENT_CLEAR_UNLOCK_PAGE) 1710 if (page_ops & PAGE_UNLOCK)
1737 unlock_page(pages[i]); 1711 unlock_page(pages[i]);
1738 page_cache_release(pages[i]); 1712 page_cache_release(pages[i]);
1739 } 1713 }
@@ -1810,7 +1784,7 @@ out:
1810 * set the private field for a given byte offset in the tree. If there isn't 1784 * set the private field for a given byte offset in the tree. If there isn't
1811 * an extent_state there already, this does nothing. 1785 * an extent_state there already, this does nothing.
1812 */ 1786 */
1813int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1787static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1814{ 1788{
1815 struct rb_node *node; 1789 struct rb_node *node;
1816 struct extent_state *state; 1790 struct extent_state *state;
@@ -1837,64 +1811,6 @@ out:
1837 return ret; 1811 return ret;
1838} 1812}
1839 1813
1840void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1841 int count)
1842{
1843 struct rb_node *node;
1844 struct extent_state *state;
1845
1846 spin_lock(&tree->lock);
1847 /*
1848 * this search will find all the extents that end after
1849 * our range starts.
1850 */
1851 node = tree_search(tree, start);
1852 BUG_ON(!node);
1853
1854 state = rb_entry(node, struct extent_state, rb_node);
1855 BUG_ON(state->start != start);
1856
1857 while (count) {
1858 state->private = *csums++;
1859 count--;
1860 state = next_state(state);
1861 }
1862 spin_unlock(&tree->lock);
1863}
1864
1865static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1866{
1867 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1868
1869 return page_offset(bvec->bv_page) + bvec->bv_offset;
1870}
1871
1872void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1873 u32 csums[], int count)
1874{
1875 struct rb_node *node;
1876 struct extent_state *state = NULL;
1877 u64 start;
1878
1879 spin_lock(&tree->lock);
1880 do {
1881 start = __btrfs_get_bio_offset(bio, bio_index);
1882 if (state == NULL || state->start != start) {
1883 node = tree_search(tree, start);
1884 BUG_ON(!node);
1885
1886 state = rb_entry(node, struct extent_state, rb_node);
1887 BUG_ON(state->start != start);
1888 }
1889 state->private = *csums++;
1890 count--;
1891 bio_index++;
1892
1893 state = next_state(state);
1894 } while (count);
1895 spin_unlock(&tree->lock);
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1814int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{ 1815{
1900 struct rb_node *node; 1816 struct rb_node *node;
@@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
2173 EXTENT_LOCKED); 2089 EXTENT_LOCKED);
2174 spin_unlock(&BTRFS_I(inode)->io_tree.lock); 2090 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2175 2091
2176 if (state && state->start == failrec->start) { 2092 if (state && state->start <= failrec->start &&
2093 state->end >= failrec->start + failrec->len - 1) {
2177 fs_info = BTRFS_I(inode)->root->fs_info; 2094 fs_info = BTRFS_I(inode)->root->fs_info;
2178 num_copies = btrfs_num_copies(fs_info, failrec->logical, 2095 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2179 failrec->len); 2096 failrec->len);
@@ -2201,9 +2118,9 @@ out:
2201 * needed 2118 * needed
2202 */ 2119 */
2203 2120
2204static int bio_readpage_error(struct bio *failed_bio, struct page *page, 2121static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2205 u64 start, u64 end, int failed_mirror, 2122 struct page *page, u64 start, u64 end,
2206 struct extent_state *state) 2123 int failed_mirror)
2207{ 2124{
2208 struct io_failure_record *failrec = NULL; 2125 struct io_failure_record *failrec = NULL;
2209 u64 private; 2126 u64 private;
@@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2213 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2130 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2214 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2131 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2215 struct bio *bio; 2132 struct bio *bio;
2133 struct btrfs_io_bio *btrfs_failed_bio;
2134 struct btrfs_io_bio *btrfs_bio;
2216 int num_copies; 2135 int num_copies;
2217 int ret; 2136 int ret;
2218 int read_mode; 2137 int read_mode;
@@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2296 * all the retry and error correction code that follows. no 2215 * all the retry and error correction code that follows. no
2297 * matter what the error is, it is very likely to persist. 2216 * matter what the error is, it is very likely to persist.
2298 */ 2217 */
2299 pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " 2218 pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2300 "state=%p, num_copies=%d, next_mirror %d, " 2219 num_copies, failrec->this_mirror, failed_mirror);
2301 "failed_mirror %d\n", state, num_copies,
2302 failrec->this_mirror, failed_mirror);
2303 free_io_failure(inode, failrec, 0); 2220 free_io_failure(inode, failrec, 0);
2304 return -EIO; 2221 return -EIO;
2305 } 2222 }
2306 2223
2307 if (!state) {
2308 spin_lock(&tree->lock);
2309 state = find_first_extent_bit_state(tree, failrec->start,
2310 EXTENT_LOCKED);
2311 if (state && state->start != failrec->start)
2312 state = NULL;
2313 spin_unlock(&tree->lock);
2314 }
2315
2316 /* 2224 /*
2317 * there are two premises: 2225 * there are two premises:
2318 * a) deliver good data to the caller 2226 * a) deliver good data to the caller
@@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2349 read_mode = READ_SYNC; 2257 read_mode = READ_SYNC;
2350 } 2258 }
2351 2259
2352 if (!state || failrec->this_mirror > num_copies) { 2260 if (failrec->this_mirror > num_copies) {
2353 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " 2261 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2354 "next_mirror %d, failed_mirror %d\n", state,
2355 num_copies, failrec->this_mirror, failed_mirror); 2262 num_copies, failrec->this_mirror, failed_mirror);
2356 free_io_failure(inode, failrec, 0); 2263 free_io_failure(inode, failrec, 0);
2357 return -EIO; 2264 return -EIO;
@@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2362 free_io_failure(inode, failrec, 0); 2269 free_io_failure(inode, failrec, 0);
2363 return -EIO; 2270 return -EIO;
2364 } 2271 }
2365 bio->bi_private = state;
2366 bio->bi_end_io = failed_bio->bi_end_io; 2272 bio->bi_end_io = failed_bio->bi_end_io;
2367 bio->bi_sector = failrec->logical >> 9; 2273 bio->bi_sector = failrec->logical >> 9;
2368 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2274 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2369 bio->bi_size = 0; 2275 bio->bi_size = 0;
2370 2276
2277 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2278 if (btrfs_failed_bio->csum) {
2279 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2280 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2281
2282 btrfs_bio = btrfs_io_bio(bio);
2283 btrfs_bio->csum = btrfs_bio->csum_inline;
2284 phy_offset >>= inode->i_sb->s_blocksize_bits;
2285 phy_offset *= csum_size;
2286 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2287 csum_size);
2288 }
2289
2371 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2290 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2372 2291
2373 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2292 pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2450 bio_put(bio); 2369 bio_put(bio);
2451} 2370}
2452 2371
2372static void
2373endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2374 int uptodate)
2375{
2376 struct extent_state *cached = NULL;
2377 u64 end = start + len - 1;
2378
2379 if (uptodate && tree->track_uptodate)
2380 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2381 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2382}
2383
2453/* 2384/*
2454 * after a readpage IO is done, we need to: 2385 * after a readpage IO is done, we need to:
2455 * clear the uptodate bits on error 2386 * clear the uptodate bits on error
@@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2466 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2397 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2467 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 2398 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2468 struct bio_vec *bvec = bio->bi_io_vec; 2399 struct bio_vec *bvec = bio->bi_io_vec;
2400 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2469 struct extent_io_tree *tree; 2401 struct extent_io_tree *tree;
2402 u64 offset = 0;
2470 u64 start; 2403 u64 start;
2471 u64 end; 2404 u64 end;
2405 u64 len;
2406 u64 extent_start = 0;
2407 u64 extent_len = 0;
2472 int mirror; 2408 int mirror;
2473 int ret; 2409 int ret;
2474 2410
@@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2477 2413
2478 do { 2414 do {
2479 struct page *page = bvec->bv_page; 2415 struct page *page = bvec->bv_page;
2480 struct extent_state *cached = NULL;
2481 struct extent_state *state;
2482 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2483 struct inode *inode = page->mapping->host; 2416 struct inode *inode = page->mapping->host;
2484 2417
2485 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2418 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2500 2433
2501 start = page_offset(page); 2434 start = page_offset(page);
2502 end = start + bvec->bv_offset + bvec->bv_len - 1; 2435 end = start + bvec->bv_offset + bvec->bv_len - 1;
2436 len = bvec->bv_len;
2503 2437
2504 if (++bvec <= bvec_end) 2438 if (++bvec <= bvec_end)
2505 prefetchw(&bvec->bv_page->flags); 2439 prefetchw(&bvec->bv_page->flags);
2506 2440
2507 spin_lock(&tree->lock);
2508 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
2509 if (state && state->start == start) {
2510 /*
2511 * take a reference on the state, unlock will drop
2512 * the ref
2513 */
2514 cache_state(state, &cached);
2515 }
2516 spin_unlock(&tree->lock);
2517
2518 mirror = io_bio->mirror_num; 2441 mirror = io_bio->mirror_num;
2519 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2442 if (likely(uptodate && tree->ops &&
2520 ret = tree->ops->readpage_end_io_hook(page, start, end, 2443 tree->ops->readpage_end_io_hook)) {
2521 state, mirror); 2444 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2445 page, start, end,
2446 mirror);
2522 if (ret) 2447 if (ret)
2523 uptodate = 0; 2448 uptodate = 0;
2524 else 2449 else
2525 clean_io_failure(start, page); 2450 clean_io_failure(start, page);
2526 } 2451 }
2527 2452
2528 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2453 if (likely(uptodate))
2454 goto readpage_ok;
2455
2456 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2529 ret = tree->ops->readpage_io_failed_hook(page, mirror); 2457 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2530 if (!ret && !err && 2458 if (!ret && !err &&
2531 test_bit(BIO_UPTODATE, &bio->bi_flags)) 2459 test_bit(BIO_UPTODATE, &bio->bi_flags))
2532 uptodate = 1; 2460 uptodate = 1;
2533 } else if (!uptodate) { 2461 } else {
2534 /* 2462 /*
2535 * The generic bio_readpage_error handles errors the 2463 * The generic bio_readpage_error handles errors the
2536 * following way: If possible, new read requests are 2464 * following way: If possible, new read requests are
@@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2541 * can't handle the error it will return -EIO and we 2469 * can't handle the error it will return -EIO and we
2542 * remain responsible for that page. 2470 * remain responsible for that page.
2543 */ 2471 */
2544 ret = bio_readpage_error(bio, page, start, end, mirror, NULL); 2472 ret = bio_readpage_error(bio, offset, page, start, end,
2473 mirror);
2545 if (ret == 0) { 2474 if (ret == 0) {
2546 uptodate = 2475 uptodate =
2547 test_bit(BIO_UPTODATE, &bio->bi_flags); 2476 test_bit(BIO_UPTODATE, &bio->bi_flags);
2548 if (err) 2477 if (err)
2549 uptodate = 0; 2478 uptodate = 0;
2550 uncache_state(&cached);
2551 continue; 2479 continue;
2552 } 2480 }
2553 } 2481 }
2554 2482readpage_ok:
2555 if (uptodate && tree->track_uptodate) { 2483 if (likely(uptodate)) {
2556 set_extent_uptodate(tree, start, end, &cached,
2557 GFP_ATOMIC);
2558 }
2559 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2560
2561 if (uptodate) {
2562 loff_t i_size = i_size_read(inode); 2484 loff_t i_size = i_size_read(inode);
2563 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 2485 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2564 unsigned offset; 2486 unsigned offset;
@@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2573 SetPageError(page); 2495 SetPageError(page);
2574 } 2496 }
2575 unlock_page(page); 2497 unlock_page(page);
2498 offset += len;
2499
2500 if (unlikely(!uptodate)) {
2501 if (extent_len) {
2502 endio_readpage_release_extent(tree,
2503 extent_start,
2504 extent_len, 1);
2505 extent_start = 0;
2506 extent_len = 0;
2507 }
2508 endio_readpage_release_extent(tree, start,
2509 end - start + 1, 0);
2510 } else if (!extent_len) {
2511 extent_start = start;
2512 extent_len = end + 1 - start;
2513 } else if (extent_start + extent_len == start) {
2514 extent_len += end + 1 - start;
2515 } else {
2516 endio_readpage_release_extent(tree, extent_start,
2517 extent_len, uptodate);
2518 extent_start = start;
2519 extent_len = end + 1 - start;
2520 }
2576 } while (bvec <= bvec_end); 2521 } while (bvec <= bvec_end);
2577 2522
2523 if (extent_len)
2524 endio_readpage_release_extent(tree, extent_start, extent_len,
2525 uptodate);
2526 if (io_bio->end_io)
2527 io_bio->end_io(io_bio, err);
2578 bio_put(bio); 2528 bio_put(bio);
2579} 2529}
2580 2530
@@ -2586,6 +2536,7 @@ struct bio *
2586btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2536btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2587 gfp_t gfp_flags) 2537 gfp_t gfp_flags)
2588{ 2538{
2539 struct btrfs_io_bio *btrfs_bio;
2589 struct bio *bio; 2540 struct bio *bio;
2590 2541
2591 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2542 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2601 bio->bi_size = 0; 2552 bio->bi_size = 0;
2602 bio->bi_bdev = bdev; 2553 bio->bi_bdev = bdev;
2603 bio->bi_sector = first_sector; 2554 bio->bi_sector = first_sector;
2555 btrfs_bio = btrfs_io_bio(bio);
2556 btrfs_bio->csum = NULL;
2557 btrfs_bio->csum_allocated = NULL;
2558 btrfs_bio->end_io = NULL;
2604 } 2559 }
2605 return bio; 2560 return bio;
2606} 2561}
@@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2614/* this also allocates from the btrfs_bioset */ 2569/* this also allocates from the btrfs_bioset */
2615struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2570struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2616{ 2571{
2617 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2572 struct btrfs_io_bio *btrfs_bio;
2573 struct bio *bio;
2574
2575 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2576 if (bio) {
2577 btrfs_bio = btrfs_io_bio(bio);
2578 btrfs_bio->csum = NULL;
2579 btrfs_bio->csum_allocated = NULL;
2580 btrfs_bio->end_io = NULL;
2581 }
2582 return bio;
2618} 2583}
2619 2584
2620 2585
@@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page)
2738 } 2703 }
2739} 2704}
2740 2705
2706static struct extent_map *
2707__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2708 u64 start, u64 len, get_extent_t *get_extent,
2709 struct extent_map **em_cached)
2710{
2711 struct extent_map *em;
2712
2713 if (em_cached && *em_cached) {
2714 em = *em_cached;
2715 if (em->in_tree && start >= em->start &&
2716 start < extent_map_end(em)) {
2717 atomic_inc(&em->refs);
2718 return em;
2719 }
2720
2721 free_extent_map(em);
2722 *em_cached = NULL;
2723 }
2724
2725 em = get_extent(inode, page, pg_offset, start, len, 0);
2726 if (em_cached && !IS_ERR_OR_NULL(em)) {
2727 BUG_ON(*em_cached);
2728 atomic_inc(&em->refs);
2729 *em_cached = em;
2730 }
2731 return em;
2732}
2741/* 2733/*
2742 * basic readpage implementation. Locked extent state structs are inserted 2734 * basic readpage implementation. Locked extent state structs are inserted
2743 * into the tree that are removed when the IO is done (by the end_io 2735 * into the tree that are removed when the IO is done (by the end_io
2744 * handlers) 2736 * handlers)
2745 * XXX JDM: This needs looking at to ensure proper page locking 2737 * XXX JDM: This needs looking at to ensure proper page locking
2746 */ 2738 */
2747static int __extent_read_full_page(struct extent_io_tree *tree, 2739static int __do_readpage(struct extent_io_tree *tree,
2748 struct page *page, 2740 struct page *page,
2749 get_extent_t *get_extent, 2741 get_extent_t *get_extent,
2750 struct bio **bio, int mirror_num, 2742 struct extent_map **em_cached,
2751 unsigned long *bio_flags, int rw) 2743 struct bio **bio, int mirror_num,
2744 unsigned long *bio_flags, int rw)
2752{ 2745{
2753 struct inode *inode = page->mapping->host; 2746 struct inode *inode = page->mapping->host;
2754 u64 start = page_offset(page); 2747 u64 start = page_offset(page);
@@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2762 sector_t sector; 2755 sector_t sector;
2763 struct extent_map *em; 2756 struct extent_map *em;
2764 struct block_device *bdev; 2757 struct block_device *bdev;
2765 struct btrfs_ordered_extent *ordered;
2766 int ret; 2758 int ret;
2767 int nr = 0; 2759 int nr = 0;
2760 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2768 size_t pg_offset = 0; 2761 size_t pg_offset = 0;
2769 size_t iosize; 2762 size_t iosize;
2770 size_t disk_io_size; 2763 size_t disk_io_size;
2771 size_t blocksize = inode->i_sb->s_blocksize; 2764 size_t blocksize = inode->i_sb->s_blocksize;
2772 unsigned long this_bio_flag = 0; 2765 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2773 2766
2774 set_page_extent_mapped(page); 2767 set_page_extent_mapped(page);
2775 2768
2769 end = page_end;
2776 if (!PageUptodate(page)) { 2770 if (!PageUptodate(page)) {
2777 if (cleancache_get_page(page) == 0) { 2771 if (cleancache_get_page(page) == 0) {
2778 BUG_ON(blocksize != PAGE_SIZE); 2772 BUG_ON(blocksize != PAGE_SIZE);
2773 unlock_extent(tree, start, end);
2779 goto out; 2774 goto out;
2780 } 2775 }
2781 } 2776 }
2782 2777
2783 end = page_end;
2784 while (1) {
2785 lock_extent(tree, start, end);
2786 ordered = btrfs_lookup_ordered_extent(inode, start);
2787 if (!ordered)
2788 break;
2789 unlock_extent(tree, start, end);
2790 btrfs_start_ordered_extent(inode, ordered, 1);
2791 btrfs_put_ordered_extent(ordered);
2792 }
2793
2794 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 2778 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2795 char *userpage; 2779 char *userpage;
2796 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 2780 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2817 kunmap_atomic(userpage); 2801 kunmap_atomic(userpage);
2818 set_extent_uptodate(tree, cur, cur + iosize - 1, 2802 set_extent_uptodate(tree, cur, cur + iosize - 1,
2819 &cached, GFP_NOFS); 2803 &cached, GFP_NOFS);
2820 unlock_extent_cached(tree, cur, cur + iosize - 1, 2804 if (!parent_locked)
2821 &cached, GFP_NOFS); 2805 unlock_extent_cached(tree, cur,
2806 cur + iosize - 1,
2807 &cached, GFP_NOFS);
2822 break; 2808 break;
2823 } 2809 }
2824 em = get_extent(inode, page, pg_offset, cur, 2810 em = __get_extent_map(inode, page, pg_offset, cur,
2825 end - cur + 1, 0); 2811 end - cur + 1, get_extent, em_cached);
2826 if (IS_ERR_OR_NULL(em)) { 2812 if (IS_ERR_OR_NULL(em)) {
2827 SetPageError(page); 2813 SetPageError(page);
2828 unlock_extent(tree, cur, end); 2814 if (!parent_locked)
2815 unlock_extent(tree, cur, end);
2829 break; 2816 break;
2830 } 2817 }
2831 extent_offset = cur - em->start; 2818 extent_offset = cur - em->start;
@@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2833 BUG_ON(end < cur); 2820 BUG_ON(end < cur);
2834 2821
2835 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2822 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2836 this_bio_flag = EXTENT_BIO_COMPRESSED; 2823 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2837 extent_set_compress_type(&this_bio_flag, 2824 extent_set_compress_type(&this_bio_flag,
2838 em->compress_type); 2825 em->compress_type);
2839 } 2826 }
@@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2877 if (test_range_bit(tree, cur, cur_end, 2864 if (test_range_bit(tree, cur, cur_end,
2878 EXTENT_UPTODATE, 1, NULL)) { 2865 EXTENT_UPTODATE, 1, NULL)) {
2879 check_page_uptodate(tree, page); 2866 check_page_uptodate(tree, page);
2880 unlock_extent(tree, cur, cur + iosize - 1); 2867 if (!parent_locked)
2868 unlock_extent(tree, cur, cur + iosize - 1);
2881 cur = cur + iosize; 2869 cur = cur + iosize;
2882 pg_offset += iosize; 2870 pg_offset += iosize;
2883 continue; 2871 continue;
@@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2887 */ 2875 */
2888 if (block_start == EXTENT_MAP_INLINE) { 2876 if (block_start == EXTENT_MAP_INLINE) {
2889 SetPageError(page); 2877 SetPageError(page);
2890 unlock_extent(tree, cur, cur + iosize - 1); 2878 if (!parent_locked)
2879 unlock_extent(tree, cur, cur + iosize - 1);
2891 cur = cur + iosize; 2880 cur = cur + iosize;
2892 pg_offset += iosize; 2881 pg_offset += iosize;
2893 continue; 2882 continue;
@@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2905 *bio_flags = this_bio_flag; 2894 *bio_flags = this_bio_flag;
2906 } else { 2895 } else {
2907 SetPageError(page); 2896 SetPageError(page);
2908 unlock_extent(tree, cur, cur + iosize - 1); 2897 if (!parent_locked)
2898 unlock_extent(tree, cur, cur + iosize - 1);
2909 } 2899 }
2910 cur = cur + iosize; 2900 cur = cur + iosize;
2911 pg_offset += iosize; 2901 pg_offset += iosize;
@@ -2919,6 +2909,104 @@ out:
2919 return 0; 2909 return 0;
2920} 2910}
2921 2911
2912static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
2913 struct page *pages[], int nr_pages,
2914 u64 start, u64 end,
2915 get_extent_t *get_extent,
2916 struct extent_map **em_cached,
2917 struct bio **bio, int mirror_num,
2918 unsigned long *bio_flags, int rw)
2919{
2920 struct inode *inode;
2921 struct btrfs_ordered_extent *ordered;
2922 int index;
2923
2924 inode = pages[0]->mapping->host;
2925 while (1) {
2926 lock_extent(tree, start, end);
2927 ordered = btrfs_lookup_ordered_range(inode, start,
2928 end - start + 1);
2929 if (!ordered)
2930 break;
2931 unlock_extent(tree, start, end);
2932 btrfs_start_ordered_extent(inode, ordered, 1);
2933 btrfs_put_ordered_extent(ordered);
2934 }
2935
2936 for (index = 0; index < nr_pages; index++) {
2937 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
2938 mirror_num, bio_flags, rw);
2939 page_cache_release(pages[index]);
2940 }
2941}
2942
2943static void __extent_readpages(struct extent_io_tree *tree,
2944 struct page *pages[],
2945 int nr_pages, get_extent_t *get_extent,
2946 struct extent_map **em_cached,
2947 struct bio **bio, int mirror_num,
2948 unsigned long *bio_flags, int rw)
2949{
2950 u64 start = 0;
2951 u64 end = 0;
2952 u64 page_start;
2953 int index;
2954 int first_index = 0;
2955
2956 for (index = 0; index < nr_pages; index++) {
2957 page_start = page_offset(pages[index]);
2958 if (!end) {
2959 start = page_start;
2960 end = start + PAGE_CACHE_SIZE - 1;
2961 first_index = index;
2962 } else if (end + 1 == page_start) {
2963 end += PAGE_CACHE_SIZE;
2964 } else {
2965 __do_contiguous_readpages(tree, &pages[first_index],
2966 index - first_index, start,
2967 end, get_extent, em_cached,
2968 bio, mirror_num, bio_flags,
2969 rw);
2970 start = page_start;
2971 end = start + PAGE_CACHE_SIZE - 1;
2972 first_index = index;
2973 }
2974 }
2975
2976 if (end)
2977 __do_contiguous_readpages(tree, &pages[first_index],
2978 index - first_index, start,
2979 end, get_extent, em_cached, bio,
2980 mirror_num, bio_flags, rw);
2981}
2982
2983static int __extent_read_full_page(struct extent_io_tree *tree,
2984 struct page *page,
2985 get_extent_t *get_extent,
2986 struct bio **bio, int mirror_num,
2987 unsigned long *bio_flags, int rw)
2988{
2989 struct inode *inode = page->mapping->host;
2990 struct btrfs_ordered_extent *ordered;
2991 u64 start = page_offset(page);
2992 u64 end = start + PAGE_CACHE_SIZE - 1;
2993 int ret;
2994
2995 while (1) {
2996 lock_extent(tree, start, end);
2997 ordered = btrfs_lookup_ordered_extent(inode, start);
2998 if (!ordered)
2999 break;
3000 unlock_extent(tree, start, end);
3001 btrfs_start_ordered_extent(inode, ordered, 1);
3002 btrfs_put_ordered_extent(ordered);
3003 }
3004
3005 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3006 bio_flags, rw);
3007 return ret;
3008}
3009
2922int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 3010int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2923 get_extent_t *get_extent, int mirror_num) 3011 get_extent_t *get_extent, int mirror_num)
2924{ 3012{
@@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2933 return ret; 3021 return ret;
2934} 3022}
2935 3023
3024int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3025 get_extent_t *get_extent, int mirror_num)
3026{
3027 struct bio *bio = NULL;
3028 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3029 int ret;
3030
3031 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3032 &bio_flags, READ);
3033 if (bio)
3034 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3035 return ret;
3036}
3037
2936static noinline void update_nr_written(struct page *page, 3038static noinline void update_nr_written(struct page *page,
2937 struct writeback_control *wbc, 3039 struct writeback_control *wbc,
2938 unsigned long nr_written) 3040 unsigned long nr_written)
@@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3189 if (!PageWriteback(page)) { 3291 if (!PageWriteback(page)) {
3190 printk(KERN_ERR "btrfs warning page %lu not " 3292 printk(KERN_ERR "btrfs warning page %lu not "
3191 "writeback, cur %llu end %llu\n", 3293 "writeback, cur %llu end %llu\n",
3192 page->index, (unsigned long long)cur, 3294 page->index, cur, end);
3193 (unsigned long long)end);
3194 } 3295 }
3195 3296
3196 ret = submit_extent_page(write_flags, tree, page, 3297 ret = submit_extent_page(write_flags, tree, page,
@@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree,
3769 unsigned long bio_flags = 0; 3870 unsigned long bio_flags = 0;
3770 struct page *pagepool[16]; 3871 struct page *pagepool[16];
3771 struct page *page; 3872 struct page *page;
3772 int i = 0; 3873 struct extent_map *em_cached = NULL;
3773 int nr = 0; 3874 int nr = 0;
3774 3875
3775 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 3876 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree,
3786 pagepool[nr++] = page; 3887 pagepool[nr++] = page;
3787 if (nr < ARRAY_SIZE(pagepool)) 3888 if (nr < ARRAY_SIZE(pagepool))
3788 continue; 3889 continue;
3789 for (i = 0; i < nr; i++) { 3890 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3790 __extent_read_full_page(tree, pagepool[i], get_extent, 3891 &bio, 0, &bio_flags, READ);
3791 &bio, 0, &bio_flags, READ);
3792 page_cache_release(pagepool[i]);
3793 }
3794 nr = 0; 3892 nr = 0;
3795 } 3893 }
3796 for (i = 0; i < nr; i++) { 3894 if (nr)
3797 __extent_read_full_page(tree, pagepool[i], get_extent, 3895 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3798 &bio, 0, &bio_flags, READ); 3896 &bio, 0, &bio_flags, READ);
3799 page_cache_release(pagepool[i]); 3897
3800 } 3898 if (em_cached)
3899 free_extent_map(em_cached);
3801 3900
3802 BUG_ON(!list_empty(pages)); 3901 BUG_ON(!list_empty(pages));
3803 if (bio) 3902 if (bio)
@@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
4136 kmem_cache_free(extent_buffer_cache, eb); 4235 kmem_cache_free(extent_buffer_cache, eb);
4137} 4236}
4138 4237
4238static int extent_buffer_under_io(struct extent_buffer *eb)
4239{
4240 return (atomic_read(&eb->io_pages) ||
4241 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4242 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4243}
4244
4245/*
4246 * Helper for releasing extent buffer page.
4247 */
4248static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4249 unsigned long start_idx)
4250{
4251 unsigned long index;
4252 unsigned long num_pages;
4253 struct page *page;
4254 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4255
4256 BUG_ON(extent_buffer_under_io(eb));
4257
4258 num_pages = num_extent_pages(eb->start, eb->len);
4259 index = start_idx + num_pages;
4260 if (start_idx >= index)
4261 return;
4262
4263 do {
4264 index--;
4265 page = extent_buffer_page(eb, index);
4266 if (page && mapped) {
4267 spin_lock(&page->mapping->private_lock);
4268 /*
4269 * We do this since we'll remove the pages after we've
4270 * removed the eb from the radix tree, so we could race
4271 * and have this page now attached to the new eb. So
4272 * only clear page_private if it's still connected to
4273 * this eb.
4274 */
4275 if (PagePrivate(page) &&
4276 page->private == (unsigned long)eb) {
4277 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4278 BUG_ON(PageDirty(page));
4279 BUG_ON(PageWriteback(page));
4280 /*
4281 * We need to make sure we haven't be attached
4282 * to a new eb.
4283 */
4284 ClearPagePrivate(page);
4285 set_page_private(page, 0);
4286 /* One for the page private */
4287 page_cache_release(page);
4288 }
4289 spin_unlock(&page->mapping->private_lock);
4290
4291 }
4292 if (page) {
4293 /* One for when we alloced the page */
4294 page_cache_release(page);
4295 }
4296 } while (index != start_idx);
4297}
4298
4299/*
4300 * Helper for releasing the extent buffer.
4301 */
4302static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4303{
4304 btrfs_release_extent_buffer_page(eb, 0);
4305 __free_extent_buffer(eb);
4306}
4307
4139static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 4308static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4140 u64 start, 4309 u64 start,
4141 unsigned long len, 4310 unsigned long len,
@@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4184 struct extent_buffer *new; 4353 struct extent_buffer *new;
4185 unsigned long num_pages = num_extent_pages(src->start, src->len); 4354 unsigned long num_pages = num_extent_pages(src->start, src->len);
4186 4355
4187 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); 4356 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
4188 if (new == NULL) 4357 if (new == NULL)
4189 return NULL; 4358 return NULL;
4190 4359
4191 for (i = 0; i < num_pages; i++) { 4360 for (i = 0; i < num_pages; i++) {
4192 p = alloc_page(GFP_ATOMIC); 4361 p = alloc_page(GFP_NOFS);
4193 BUG_ON(!p); 4362 if (!p) {
4363 btrfs_release_extent_buffer(new);
4364 return NULL;
4365 }
4194 attach_extent_buffer_page(new, p); 4366 attach_extent_buffer_page(new, p);
4195 WARN_ON(PageDirty(p)); 4367 WARN_ON(PageDirty(p));
4196 SetPageUptodate(p); 4368 SetPageUptodate(p);
@@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
4210 unsigned long num_pages = num_extent_pages(0, len); 4382 unsigned long num_pages = num_extent_pages(0, len);
4211 unsigned long i; 4383 unsigned long i;
4212 4384
4213 eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); 4385 eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
4214 if (!eb) 4386 if (!eb)
4215 return NULL; 4387 return NULL;
4216 4388
4217 for (i = 0; i < num_pages; i++) { 4389 for (i = 0; i < num_pages; i++) {
4218 eb->pages[i] = alloc_page(GFP_ATOMIC); 4390 eb->pages[i] = alloc_page(GFP_NOFS);
4219 if (!eb->pages[i]) 4391 if (!eb->pages[i])
4220 goto err; 4392 goto err;
4221 } 4393 }
@@ -4231,76 +4403,6 @@ err:
4231 return NULL; 4403 return NULL;
4232} 4404}
4233 4405
4234static int extent_buffer_under_io(struct extent_buffer *eb)
4235{
4236 return (atomic_read(&eb->io_pages) ||
4237 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4238 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4239}
4240
4241/*
4242 * Helper for releasing extent buffer page.
4243 */
4244static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4245 unsigned long start_idx)
4246{
4247 unsigned long index;
4248 unsigned long num_pages;
4249 struct page *page;
4250 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4251
4252 BUG_ON(extent_buffer_under_io(eb));
4253
4254 num_pages = num_extent_pages(eb->start, eb->len);
4255 index = start_idx + num_pages;
4256 if (start_idx >= index)
4257 return;
4258
4259 do {
4260 index--;
4261 page = extent_buffer_page(eb, index);
4262 if (page && mapped) {
4263 spin_lock(&page->mapping->private_lock);
4264 /*
4265 * We do this since we'll remove the pages after we've
4266 * removed the eb from the radix tree, so we could race
4267 * and have this page now attached to the new eb. So
4268 * only clear page_private if it's still connected to
4269 * this eb.
4270 */
4271 if (PagePrivate(page) &&
4272 page->private == (unsigned long)eb) {
4273 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4274 BUG_ON(PageDirty(page));
4275 BUG_ON(PageWriteback(page));
4276 /*
4277 * We need to make sure we haven't be attached
4278 * to a new eb.
4279 */
4280 ClearPagePrivate(page);
4281 set_page_private(page, 0);
4282 /* One for the page private */
4283 page_cache_release(page);
4284 }
4285 spin_unlock(&page->mapping->private_lock);
4286
4287 }
4288 if (page) {
4289 /* One for when we alloced the page */
4290 page_cache_release(page);
4291 }
4292 } while (index != start_idx);
4293}
4294
4295/*
4296 * Helper for releasing the extent buffer.
4297 */
4298static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4299{
4300 btrfs_release_extent_buffer_page(eb, 0);
4301 __free_extent_buffer(eb);
4302}
4303
4304static void check_buffer_tree_ref(struct extent_buffer *eb) 4406static void check_buffer_tree_ref(struct extent_buffer *eb)
4305{ 4407{
4306 int refs; 4408 int refs;
@@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
4771 WARN_ON(start > eb->len); 4873 WARN_ON(start > eb->len);
4772 WARN_ON(start + len > eb->start + eb->len); 4874 WARN_ON(start + len > eb->start + eb->len);
4773 4875
4774 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4876 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4775 4877
4776 while (len > 0) { 4878 while (len > 0) {
4777 page = extent_buffer_page(eb, i); 4879 page = extent_buffer_page(eb, i);
@@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
4813 4915
4814 if (start + min_len > eb->len) { 4916 if (start + min_len > eb->len) {
4815 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 4917 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
4816 "wanted %lu %lu\n", (unsigned long long)eb->start, 4918 "wanted %lu %lu\n",
4817 eb->len, start, min_len); 4919 eb->start, eb->len, start, min_len);
4818 return -EINVAL; 4920 return -EINVAL;
4819 } 4921 }
4820 4922
@@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
4841 WARN_ON(start > eb->len); 4943 WARN_ON(start > eb->len);
4842 WARN_ON(start + len > eb->start + eb->len); 4944 WARN_ON(start + len > eb->start + eb->len);
4843 4945
4844 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4946 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4845 4947
4846 while (len > 0) { 4948 while (len > 0) {
4847 page = extent_buffer_page(eb, i); 4949 page = extent_buffer_page(eb, i);
@@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
4875 WARN_ON(start > eb->len); 4977 WARN_ON(start > eb->len);
4876 WARN_ON(start + len > eb->start + eb->len); 4978 WARN_ON(start + len > eb->start + eb->len);
4877 4979
4878 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4980 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4879 4981
4880 while (len > 0) { 4982 while (len > 0) {
4881 page = extent_buffer_page(eb, i); 4983 page = extent_buffer_page(eb, i);
@@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
4905 WARN_ON(start > eb->len); 5007 WARN_ON(start > eb->len);
4906 WARN_ON(start + len > eb->start + eb->len); 5008 WARN_ON(start + len > eb->start + eb->len);
4907 5009
4908 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 5010 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4909 5011
4910 while (len > 0) { 5012 while (len > 0) {
4911 page = extent_buffer_page(eb, i); 5013 page = extent_buffer_page(eb, i);
@@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
4936 WARN_ON(src->len != dst_len); 5038 WARN_ON(src->len != dst_len);
4937 5039
4938 offset = (start_offset + dst_offset) & 5040 offset = (start_offset + dst_offset) &
4939 ((unsigned long)PAGE_CACHE_SIZE - 1); 5041 (PAGE_CACHE_SIZE - 1);
4940 5042
4941 while (len > 0) { 5043 while (len > 0) {
4942 page = extent_buffer_page(dst, i); 5044 page = extent_buffer_page(dst, i);
@@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5022 5124
5023 while (len > 0) { 5125 while (len > 0) {
5024 dst_off_in_page = (start_offset + dst_offset) & 5126 dst_off_in_page = (start_offset + dst_offset) &
5025 ((unsigned long)PAGE_CACHE_SIZE - 1); 5127 (PAGE_CACHE_SIZE - 1);
5026 src_off_in_page = (start_offset + src_offset) & 5128 src_off_in_page = (start_offset + src_offset) &
5027 ((unsigned long)PAGE_CACHE_SIZE - 1); 5129 (PAGE_CACHE_SIZE - 1);
5028 5130
5029 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 5131 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5030 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 5132 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5075 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 5177 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5076 5178
5077 dst_off_in_page = (start_offset + dst_end) & 5179 dst_off_in_page = (start_offset + dst_end) &
5078 ((unsigned long)PAGE_CACHE_SIZE - 1); 5180 (PAGE_CACHE_SIZE - 1);
5079 src_off_in_page = (start_offset + src_end) & 5181 src_off_in_page = (start_offset + src_end) &
5080 ((unsigned long)PAGE_CACHE_SIZE - 1); 5182 (PAGE_CACHE_SIZE - 1);
5081 5183
5082 cur = min_t(unsigned long, len, src_off_in_page + 1); 5184 cur = min_t(unsigned long, len, src_off_in_page + 1);
5083 cur = min(cur, dst_off_in_page + 1); 5185 cur = min(cur, dst_off_in_page + 1);