aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/btrfs/extent_io.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c806
1 files changed, 415 insertions, 391 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53a..7055d11c1efd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,8 @@
10#include <linux/swap.h> 10#include <linux/swap.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/pagevec.h> 12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
14#include <linux/cleancache.h>
13#include "extent_io.h" 15#include "extent_io.h"
14#include "extent_map.h" 16#include "extent_map.h"
15#include "compat.h" 17#include "compat.h"
@@ -101,10 +103,10 @@ void extent_io_exit(void)
101} 103}
102 104
103void extent_io_tree_init(struct extent_io_tree *tree, 105void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 106 struct address_space *mapping)
105{ 107{
106 tree->state = RB_ROOT; 108 tree->state = RB_ROOT;
107 tree->buffer = RB_ROOT; 109 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
108 tree->ops = NULL; 110 tree->ops = NULL;
109 tree->dirty_bytes = 0; 111 tree->dirty_bytes = 0;
110 spin_lock_init(&tree->lock); 112 spin_lock_init(&tree->lock);
@@ -235,50 +237,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 return ret; 237 return ret;
236} 238}
237 239
238static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
239 u64 offset, struct rb_node *node)
240{
241 struct rb_root *root = &tree->buffer;
242 struct rb_node **p = &root->rb_node;
243 struct rb_node *parent = NULL;
244 struct extent_buffer *eb;
245
246 while (*p) {
247 parent = *p;
248 eb = rb_entry(parent, struct extent_buffer, rb_node);
249
250 if (offset < eb->start)
251 p = &(*p)->rb_left;
252 else if (offset > eb->start)
253 p = &(*p)->rb_right;
254 else
255 return eb;
256 }
257
258 rb_link_node(node, parent, p);
259 rb_insert_color(node, root);
260 return NULL;
261}
262
263static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
264 u64 offset)
265{
266 struct rb_root *root = &tree->buffer;
267 struct rb_node *n = root->rb_node;
268 struct extent_buffer *eb;
269
270 while (n) {
271 eb = rb_entry(n, struct extent_buffer, rb_node);
272 if (offset < eb->start)
273 n = n->rb_left;
274 else if (offset > eb->start)
275 n = n->rb_right;
276 else
277 return eb;
278 }
279 return NULL;
280}
281
282static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 240static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
283 struct extent_state *other) 241 struct extent_state *other)
284{ 242{
@@ -483,6 +441,15 @@ static int clear_state_bit(struct extent_io_tree *tree,
483 return ret; 441 return ret;
484} 442}
485 443
444static struct extent_state *
445alloc_extent_state_atomic(struct extent_state *prealloc)
446{
447 if (!prealloc)
448 prealloc = alloc_extent_state(GFP_ATOMIC);
449
450 return prealloc;
451}
452
486/* 453/*
487 * clear some bits on a range in the tree. This may require splitting 454 * clear some bits on a range in the tree. This may require splitting
488 * or inserting elements in the tree, so the gfp mask is used to 455 * or inserting elements in the tree, so the gfp mask is used to
@@ -573,8 +540,8 @@ hit_next:
573 */ 540 */
574 541
575 if (state->start < start) { 542 if (state->start < start) {
576 if (!prealloc) 543 prealloc = alloc_extent_state_atomic(prealloc);
577 prealloc = alloc_extent_state(GFP_ATOMIC); 544 BUG_ON(!prealloc);
578 err = split_state(tree, state, prealloc, start); 545 err = split_state(tree, state, prealloc, start);
579 BUG_ON(err == -EEXIST); 546 BUG_ON(err == -EEXIST);
580 prealloc = NULL; 547 prealloc = NULL;
@@ -595,8 +562,8 @@ hit_next:
595 * on the first half 562 * on the first half
596 */ 563 */
597 if (state->start <= end && state->end > end) { 564 if (state->start <= end && state->end > end) {
598 if (!prealloc) 565 prealloc = alloc_extent_state_atomic(prealloc);
599 prealloc = alloc_extent_state(GFP_ATOMIC); 566 BUG_ON(!prealloc);
600 err = split_state(tree, state, prealloc, end + 1); 567 err = split_state(tree, state, prealloc, end + 1);
601 BUG_ON(err == -EEXIST); 568 BUG_ON(err == -EEXIST);
602 if (wake) 569 if (wake)
@@ -734,6 +701,15 @@ static void cache_state(struct extent_state *state,
734 } 701 }
735} 702}
736 703
704static void uncache_state(struct extent_state **cached_ptr)
705{
706 if (cached_ptr && (*cached_ptr)) {
707 struct extent_state *state = *cached_ptr;
708 *cached_ptr = NULL;
709 free_extent_state(state);
710 }
711}
712
737/* 713/*
738 * set some bits on a range in the tree. This may require allocations or 714 * set some bits on a range in the tree. This may require allocations or
739 * sleeping, so the gfp mask is used to indicate what is allowed. 715 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -760,8 +736,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
760again: 736again:
761 if (!prealloc && (mask & __GFP_WAIT)) { 737 if (!prealloc && (mask & __GFP_WAIT)) {
762 prealloc = alloc_extent_state(mask); 738 prealloc = alloc_extent_state(mask);
763 if (!prealloc) 739 BUG_ON(!prealloc);
764 return -ENOMEM;
765 } 740 }
766 741
767 spin_lock(&tree->lock); 742 spin_lock(&tree->lock);
@@ -778,6 +753,8 @@ again:
778 */ 753 */
779 node = tree_search(tree, start); 754 node = tree_search(tree, start);
780 if (!node) { 755 if (!node) {
756 prealloc = alloc_extent_state_atomic(prealloc);
757 BUG_ON(!prealloc);
781 err = insert_state(tree, prealloc, start, end, &bits); 758 err = insert_state(tree, prealloc, start, end, &bits);
782 prealloc = NULL; 759 prealloc = NULL;
783 BUG_ON(err == -EEXIST); 760 BUG_ON(err == -EEXIST);
@@ -806,20 +783,18 @@ hit_next:
806 if (err) 783 if (err)
807 goto out; 784 goto out;
808 785
786 next_node = rb_next(node);
809 cache_state(state, cached_state); 787 cache_state(state, cached_state);
810 merge_state(tree, state); 788 merge_state(tree, state);
811 if (last_end == (u64)-1) 789 if (last_end == (u64)-1)
812 goto out; 790 goto out;
813 791
814 start = last_end + 1; 792 start = last_end + 1;
815 if (start < end && prealloc && !need_resched()) { 793 if (next_node && start < end && prealloc && !need_resched()) {
816 next_node = rb_next(node); 794 state = rb_entry(next_node, struct extent_state,
817 if (next_node) { 795 rb_node);
818 state = rb_entry(next_node, struct extent_state, 796 if (state->start == start)
819 rb_node); 797 goto hit_next;
820 if (state->start == start)
821 goto hit_next;
822 }
823 } 798 }
824 goto search_again; 799 goto search_again;
825 } 800 }
@@ -846,6 +821,9 @@ hit_next:
846 err = -EEXIST; 821 err = -EEXIST;
847 goto out; 822 goto out;
848 } 823 }
824
825 prealloc = alloc_extent_state_atomic(prealloc);
826 BUG_ON(!prealloc);
849 err = split_state(tree, state, prealloc, start); 827 err = split_state(tree, state, prealloc, start);
850 BUG_ON(err == -EEXIST); 828 BUG_ON(err == -EEXIST);
851 prealloc = NULL; 829 prealloc = NULL;
@@ -876,14 +854,25 @@ hit_next:
876 this_end = end; 854 this_end = end;
877 else 855 else
878 this_end = last_start - 1; 856 this_end = last_start - 1;
857
858 prealloc = alloc_extent_state_atomic(prealloc);
859 BUG_ON(!prealloc);
860
861 /*
862 * Avoid to free 'prealloc' if it can be merged with
863 * the later extent.
864 */
865 atomic_inc(&prealloc->refs);
879 err = insert_state(tree, prealloc, start, this_end, 866 err = insert_state(tree, prealloc, start, this_end,
880 &bits); 867 &bits);
881 BUG_ON(err == -EEXIST); 868 BUG_ON(err == -EEXIST);
882 if (err) { 869 if (err) {
870 free_extent_state(prealloc);
883 prealloc = NULL; 871 prealloc = NULL;
884 goto out; 872 goto out;
885 } 873 }
886 cache_state(prealloc, cached_state); 874 cache_state(prealloc, cached_state);
875 free_extent_state(prealloc);
887 prealloc = NULL; 876 prealloc = NULL;
888 start = this_end + 1; 877 start = this_end + 1;
889 goto search_again; 878 goto search_again;
@@ -900,6 +889,9 @@ hit_next:
900 err = -EEXIST; 889 err = -EEXIST;
901 goto out; 890 goto out;
902 } 891 }
892
893 prealloc = alloc_extent_state_atomic(prealloc);
894 BUG_ON(!prealloc);
903 err = split_state(tree, state, prealloc, end + 1); 895 err = split_state(tree, state, prealloc, end + 1);
904 BUG_ON(err == -EEXIST); 896 BUG_ON(err == -EEXIST);
905 897
@@ -976,18 +968,11 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
976 NULL, mask); 968 NULL, mask);
977} 969}
978 970
979static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
980 gfp_t mask)
981{
982 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
983 NULL, mask);
984}
985
986int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 971int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
987 gfp_t mask) 972 struct extent_state **cached_state, gfp_t mask)
988{ 973{
989 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 974 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
990 NULL, mask); 975 NULL, cached_state, mask);
991} 976}
992 977
993static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 978static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -998,11 +983,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
998 cached_state, mask); 983 cached_state, mask);
999} 984}
1000 985
1001int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1002{
1003 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
1004}
1005
1006/* 986/*
1007 * either insert or lock state struct between start and end use mask to tell 987 * either insert or lock state struct between start and end use mask to tell
1008 * us if waiting is desired. 988 * us if waiting is desired.
@@ -1056,33 +1036,13 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1056 mask); 1036 mask);
1057} 1037}
1058 1038
1059int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1039int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1060 gfp_t mask)
1061{ 1040{
1062 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1041 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1063 mask); 1042 mask);
1064} 1043}
1065 1044
1066/* 1045/*
1067 * helper function to set pages and extents in the tree dirty
1068 */
1069int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
1070{
1071 unsigned long index = start >> PAGE_CACHE_SHIFT;
1072 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1073 struct page *page;
1074
1075 while (index <= end_index) {
1076 page = find_get_page(tree->mapping, index);
1077 BUG_ON(!page);
1078 __set_page_dirty_nobuffers(page);
1079 page_cache_release(page);
1080 index++;
1081 }
1082 return 0;
1083}
1084
1085/*
1086 * helper function to set both pages and extents in the tree writeback 1046 * helper function to set both pages and extents in the tree writeback
1087 */ 1047 */
1088static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1048static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1477,12 +1437,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1477 */ 1437 */
1478u64 count_range_bits(struct extent_io_tree *tree, 1438u64 count_range_bits(struct extent_io_tree *tree,
1479 u64 *start, u64 search_end, u64 max_bytes, 1439 u64 *start, u64 search_end, u64 max_bytes,
1480 unsigned long bits) 1440 unsigned long bits, int contig)
1481{ 1441{
1482 struct rb_node *node; 1442 struct rb_node *node;
1483 struct extent_state *state; 1443 struct extent_state *state;
1484 u64 cur_start = *start; 1444 u64 cur_start = *start;
1485 u64 total_bytes = 0; 1445 u64 total_bytes = 0;
1446 u64 last = 0;
1486 int found = 0; 1447 int found = 0;
1487 1448
1488 if (search_end <= cur_start) { 1449 if (search_end <= cur_start) {
@@ -1507,15 +1468,20 @@ u64 count_range_bits(struct extent_io_tree *tree,
1507 state = rb_entry(node, struct extent_state, rb_node); 1468 state = rb_entry(node, struct extent_state, rb_node);
1508 if (state->start > search_end) 1469 if (state->start > search_end)
1509 break; 1470 break;
1510 if (state->end >= cur_start && (state->state & bits)) { 1471 if (contig && found && state->start > last + 1)
1472 break;
1473 if (state->end >= cur_start && (state->state & bits) == bits) {
1511 total_bytes += min(search_end, state->end) + 1 - 1474 total_bytes += min(search_end, state->end) + 1 -
1512 max(cur_start, state->start); 1475 max(cur_start, state->start);
1513 if (total_bytes >= max_bytes) 1476 if (total_bytes >= max_bytes)
1514 break; 1477 break;
1515 if (!found) { 1478 if (!found) {
1516 *start = state->start; 1479 *start = max(cur_start, state->start);
1517 found = 1; 1480 found = 1;
1518 } 1481 }
1482 last = state->end;
1483 } else if (contig && found) {
1484 break;
1519 } 1485 }
1520 node = rb_next(node); 1486 node = rb_next(node);
1521 if (!node) 1487 if (!node)
@@ -1773,6 +1739,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1773 1739
1774 do { 1740 do {
1775 struct page *page = bvec->bv_page; 1741 struct page *page = bvec->bv_page;
1742 struct extent_state *cached = NULL;
1743 struct extent_state *state;
1744
1776 tree = &BTRFS_I(page->mapping->host)->io_tree; 1745 tree = &BTRFS_I(page->mapping->host)->io_tree;
1777 1746
1778 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1747 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1787,9 +1756,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1787 if (++bvec <= bvec_end) 1756 if (++bvec <= bvec_end)
1788 prefetchw(&bvec->bv_page->flags); 1757 prefetchw(&bvec->bv_page->flags);
1789 1758
1759 spin_lock(&tree->lock);
1760 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1761 if (state && state->start == start) {
1762 /*
1763 * take a reference on the state, unlock will drop
1764 * the ref
1765 */
1766 cache_state(state, &cached);
1767 }
1768 spin_unlock(&tree->lock);
1769
1790 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1770 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1791 ret = tree->ops->readpage_end_io_hook(page, start, end, 1771 ret = tree->ops->readpage_end_io_hook(page, start, end,
1792 NULL); 1772 state);
1793 if (ret) 1773 if (ret)
1794 uptodate = 0; 1774 uptodate = 0;
1795 } 1775 }
@@ -1802,15 +1782,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1802 test_bit(BIO_UPTODATE, &bio->bi_flags); 1782 test_bit(BIO_UPTODATE, &bio->bi_flags);
1803 if (err) 1783 if (err)
1804 uptodate = 0; 1784 uptodate = 0;
1785 uncache_state(&cached);
1805 continue; 1786 continue;
1806 } 1787 }
1807 } 1788 }
1808 1789
1809 if (uptodate) { 1790 if (uptodate) {
1810 set_extent_uptodate(tree, start, end, 1791 set_extent_uptodate(tree, start, end, &cached,
1811 GFP_ATOMIC); 1792 GFP_ATOMIC);
1812 } 1793 }
1813 unlock_extent(tree, start, end, GFP_ATOMIC); 1794 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1814 1795
1815 if (whole_page) { 1796 if (whole_page) {
1816 if (uptodate) { 1797 if (uptodate) {
@@ -1834,47 +1815,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1834 bio_put(bio); 1815 bio_put(bio);
1835} 1816}
1836 1817
1837/* 1818struct bio *
1838 * IO done from prepare_write is pretty simple, we just unlock 1819btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1839 * the structs in the extent tree when done, and set the uptodate bits 1820 gfp_t gfp_flags)
1840 * as appropriate.
1841 */
1842static void end_bio_extent_preparewrite(struct bio *bio, int err)
1843{
1844 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1845 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1846 struct extent_io_tree *tree;
1847 u64 start;
1848 u64 end;
1849
1850 do {
1851 struct page *page = bvec->bv_page;
1852 tree = &BTRFS_I(page->mapping->host)->io_tree;
1853
1854 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1855 bvec->bv_offset;
1856 end = start + bvec->bv_len - 1;
1857
1858 if (--bvec >= bio->bi_io_vec)
1859 prefetchw(&bvec->bv_page->flags);
1860
1861 if (uptodate) {
1862 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1863 } else {
1864 ClearPageUptodate(page);
1865 SetPageError(page);
1866 }
1867
1868 unlock_extent(tree, start, end, GFP_ATOMIC);
1869
1870 } while (bvec >= bio->bi_io_vec);
1871
1872 bio_put(bio);
1873}
1874
1875static struct bio *
1876extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1877 gfp_t gfp_flags)
1878{ 1821{
1879 struct bio *bio; 1822 struct bio *bio;
1880 1823
@@ -1901,17 +1844,15 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1901 struct page *page = bvec->bv_page; 1844 struct page *page = bvec->bv_page;
1902 struct extent_io_tree *tree = bio->bi_private; 1845 struct extent_io_tree *tree = bio->bi_private;
1903 u64 start; 1846 u64 start;
1904 u64 end;
1905 1847
1906 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1848 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1907 end = start + bvec->bv_len - 1;
1908 1849
1909 bio->bi_private = NULL; 1850 bio->bi_private = NULL;
1910 1851
1911 bio_get(bio); 1852 bio_get(bio);
1912 1853
1913 if (tree->ops && tree->ops->submit_bio_hook) 1854 if (tree->ops && tree->ops->submit_bio_hook)
1914 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1855 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1915 mirror_num, bio_flags, start); 1856 mirror_num, bio_flags, start);
1916 else 1857 else
1917 submit_bio(rw, bio); 1858 submit_bio(rw, bio);
@@ -1965,7 +1906,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1965 else 1906 else
1966 nr = bio_get_nr_vecs(bdev); 1907 nr = bio_get_nr_vecs(bdev);
1967 1908
1968 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1909 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1910 if (!bio)
1911 return -ENOMEM;
1969 1912
1970 bio_add_page(bio, page, page_size, offset); 1913 bio_add_page(bio, page, page_size, offset);
1971 bio->bi_end_io = end_io_func; 1914 bio->bi_end_io = end_io_func;
@@ -1990,6 +1933,7 @@ void set_page_extent_mapped(struct page *page)
1990 1933
1991static void set_page_extent_head(struct page *page, unsigned long len) 1934static void set_page_extent_head(struct page *page, unsigned long len)
1992{ 1935{
1936 WARN_ON(!PagePrivate(page));
1993 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1937 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1994} 1938}
1995 1939
@@ -2019,7 +1963,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2019 struct btrfs_ordered_extent *ordered; 1963 struct btrfs_ordered_extent *ordered;
2020 int ret; 1964 int ret;
2021 int nr = 0; 1965 int nr = 0;
2022 size_t page_offset = 0; 1966 size_t pg_offset = 0;
2023 size_t iosize; 1967 size_t iosize;
2024 size_t disk_io_size; 1968 size_t disk_io_size;
2025 size_t blocksize = inode->i_sb->s_blocksize; 1969 size_t blocksize = inode->i_sb->s_blocksize;
@@ -2027,6 +1971,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2027 1971
2028 set_page_extent_mapped(page); 1972 set_page_extent_mapped(page);
2029 1973
1974 if (!PageUptodate(page)) {
1975 if (cleancache_get_page(page) == 0) {
1976 BUG_ON(blocksize != PAGE_SIZE);
1977 goto out;
1978 }
1979 }
1980
2030 end = page_end; 1981 end = page_end;
2031 while (1) { 1982 while (1) {
2032 lock_extent(tree, start, end, GFP_NOFS); 1983 lock_extent(tree, start, end, GFP_NOFS);
@@ -2053,19 +2004,22 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2053 while (cur <= end) { 2004 while (cur <= end) {
2054 if (cur >= last_byte) { 2005 if (cur >= last_byte) {
2055 char *userpage; 2006 char *userpage;
2056 iosize = PAGE_CACHE_SIZE - page_offset; 2007 struct extent_state *cached = NULL;
2008
2009 iosize = PAGE_CACHE_SIZE - pg_offset;
2057 userpage = kmap_atomic(page, KM_USER0); 2010 userpage = kmap_atomic(page, KM_USER0);
2058 memset(userpage + page_offset, 0, iosize); 2011 memset(userpage + pg_offset, 0, iosize);
2059 flush_dcache_page(page); 2012 flush_dcache_page(page);
2060 kunmap_atomic(userpage, KM_USER0); 2013 kunmap_atomic(userpage, KM_USER0);
2061 set_extent_uptodate(tree, cur, cur + iosize - 1, 2014 set_extent_uptodate(tree, cur, cur + iosize - 1,
2062 GFP_NOFS); 2015 &cached, GFP_NOFS);
2063 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2016 unlock_extent_cached(tree, cur, cur + iosize - 1,
2017 &cached, GFP_NOFS);
2064 break; 2018 break;
2065 } 2019 }
2066 em = get_extent(inode, page, page_offset, cur, 2020 em = get_extent(inode, page, pg_offset, cur,
2067 end - cur + 1, 0); 2021 end - cur + 1, 0);
2068 if (IS_ERR(em) || !em) { 2022 if (IS_ERR_OR_NULL(em)) {
2069 SetPageError(page); 2023 SetPageError(page);
2070 unlock_extent(tree, cur, end, GFP_NOFS); 2024 unlock_extent(tree, cur, end, GFP_NOFS);
2071 break; 2025 break;
@@ -2074,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2074 BUG_ON(extent_map_end(em) <= cur); 2028 BUG_ON(extent_map_end(em) <= cur);
2075 BUG_ON(end < cur); 2029 BUG_ON(end < cur);
2076 2030
2077 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2031 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2078 this_bio_flag = EXTENT_BIO_COMPRESSED; 2032 this_bio_flag = EXTENT_BIO_COMPRESSED;
2033 extent_set_compress_type(&this_bio_flag,
2034 em->compress_type);
2035 }
2079 2036
2080 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2037 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2081 cur_end = min(extent_map_end(em) - 1, end); 2038 cur_end = min(extent_map_end(em) - 1, end);
@@ -2097,16 +2054,19 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2097 /* we've found a hole, just zero and go on */ 2054 /* we've found a hole, just zero and go on */
2098 if (block_start == EXTENT_MAP_HOLE) { 2055 if (block_start == EXTENT_MAP_HOLE) {
2099 char *userpage; 2056 char *userpage;
2057 struct extent_state *cached = NULL;
2058
2100 userpage = kmap_atomic(page, KM_USER0); 2059 userpage = kmap_atomic(page, KM_USER0);
2101 memset(userpage + page_offset, 0, iosize); 2060 memset(userpage + pg_offset, 0, iosize);
2102 flush_dcache_page(page); 2061 flush_dcache_page(page);
2103 kunmap_atomic(userpage, KM_USER0); 2062 kunmap_atomic(userpage, KM_USER0);
2104 2063
2105 set_extent_uptodate(tree, cur, cur + iosize - 1, 2064 set_extent_uptodate(tree, cur, cur + iosize - 1,
2106 GFP_NOFS); 2065 &cached, GFP_NOFS);
2107 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2066 unlock_extent_cached(tree, cur, cur + iosize - 1,
2067 &cached, GFP_NOFS);
2108 cur = cur + iosize; 2068 cur = cur + iosize;
2109 page_offset += iosize; 2069 pg_offset += iosize;
2110 continue; 2070 continue;
2111 } 2071 }
2112 /* the get_extent function already copied into the page */ 2072 /* the get_extent function already copied into the page */
@@ -2115,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2115 check_page_uptodate(tree, page); 2075 check_page_uptodate(tree, page);
2116 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2076 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2117 cur = cur + iosize; 2077 cur = cur + iosize;
2118 page_offset += iosize; 2078 pg_offset += iosize;
2119 continue; 2079 continue;
2120 } 2080 }
2121 /* we have an inline extent but it didn't get marked up 2081 /* we have an inline extent but it didn't get marked up
@@ -2125,7 +2085,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2125 SetPageError(page); 2085 SetPageError(page);
2126 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2086 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2127 cur = cur + iosize; 2087 cur = cur + iosize;
2128 page_offset += iosize; 2088 pg_offset += iosize;
2129 continue; 2089 continue;
2130 } 2090 }
2131 2091
@@ -2138,7 +2098,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2138 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2098 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2139 pnr -= page->index; 2099 pnr -= page->index;
2140 ret = submit_extent_page(READ, tree, page, 2100 ret = submit_extent_page(READ, tree, page,
2141 sector, disk_io_size, page_offset, 2101 sector, disk_io_size, pg_offset,
2142 bdev, bio, pnr, 2102 bdev, bio, pnr,
2143 end_bio_extent_readpage, mirror_num, 2103 end_bio_extent_readpage, mirror_num,
2144 *bio_flags, 2104 *bio_flags,
@@ -2149,8 +2109,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2149 if (ret) 2109 if (ret)
2150 SetPageError(page); 2110 SetPageError(page);
2151 cur = cur + iosize; 2111 cur = cur + iosize;
2152 page_offset += iosize; 2112 pg_offset += iosize;
2153 } 2113 }
2114out:
2154 if (!nr) { 2115 if (!nr) {
2155 if (!PageError(page)) 2116 if (!PageError(page))
2156 SetPageUptodate(page); 2117 SetPageUptodate(page);
@@ -2169,7 +2130,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2169 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2130 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2170 &bio_flags); 2131 &bio_flags);
2171 if (bio) 2132 if (bio)
2172 submit_one_bio(READ, bio, 0, bio_flags); 2133 ret = submit_one_bio(READ, bio, 0, bio_flags);
2173 return ret; 2134 return ret;
2174} 2135}
2175 2136
@@ -2204,7 +2165,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2204 u64 last_byte = i_size_read(inode); 2165 u64 last_byte = i_size_read(inode);
2205 u64 block_start; 2166 u64 block_start;
2206 u64 iosize; 2167 u64 iosize;
2207 u64 unlock_start;
2208 sector_t sector; 2168 sector_t sector;
2209 struct extent_state *cached_state = NULL; 2169 struct extent_state *cached_state = NULL;
2210 struct extent_map *em; 2170 struct extent_map *em;
@@ -2223,10 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2223 unsigned long nr_written = 0; 2183 unsigned long nr_written = 0;
2224 2184
2225 if (wbc->sync_mode == WB_SYNC_ALL) 2185 if (wbc->sync_mode == WB_SYNC_ALL)
2226 write_flags = WRITE_SYNC_PLUG; 2186 write_flags = WRITE_SYNC;
2227 else 2187 else
2228 write_flags = WRITE; 2188 write_flags = WRITE;
2229 2189
2190 trace___extent_writepage(page, inode, wbc);
2191
2230 WARN_ON(!PageLocked(page)); 2192 WARN_ON(!PageLocked(page));
2231 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2193 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2232 if (page->index > end_index || 2194 if (page->index > end_index ||
@@ -2329,7 +2291,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2329 if (tree->ops && tree->ops->writepage_end_io_hook) 2291 if (tree->ops && tree->ops->writepage_end_io_hook)
2330 tree->ops->writepage_end_io_hook(page, start, 2292 tree->ops->writepage_end_io_hook(page, start,
2331 page_end, NULL, 1); 2293 page_end, NULL, 1);
2332 unlock_start = page_end + 1;
2333 goto done; 2294 goto done;
2334 } 2295 }
2335 2296
@@ -2340,12 +2301,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2340 if (tree->ops && tree->ops->writepage_end_io_hook) 2301 if (tree->ops && tree->ops->writepage_end_io_hook)
2341 tree->ops->writepage_end_io_hook(page, cur, 2302 tree->ops->writepage_end_io_hook(page, cur,
2342 page_end, NULL, 1); 2303 page_end, NULL, 1);
2343 unlock_start = page_end + 1;
2344 break; 2304 break;
2345 } 2305 }
2346 em = epd->get_extent(inode, page, pg_offset, cur, 2306 em = epd->get_extent(inode, page, pg_offset, cur,
2347 end - cur + 1, 1); 2307 end - cur + 1, 1);
2348 if (IS_ERR(em) || !em) { 2308 if (IS_ERR_OR_NULL(em)) {
2349 SetPageError(page); 2309 SetPageError(page);
2350 break; 2310 break;
2351 } 2311 }
@@ -2387,7 +2347,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2387 2347
2388 cur += iosize; 2348 cur += iosize;
2389 pg_offset += iosize; 2349 pg_offset += iosize;
2390 unlock_start = cur;
2391 continue; 2350 continue;
2392 } 2351 }
2393 /* leave this out until we have a page_mkwrite call */ 2352 /* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2432,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2473 pgoff_t index; 2432 pgoff_t index;
2474 pgoff_t end; /* Inclusive */ 2433 pgoff_t end; /* Inclusive */
2475 int scanned = 0; 2434 int scanned = 0;
2476 int range_whole = 0;
2477 2435
2478 pagevec_init(&pvec, 0); 2436 pagevec_init(&pvec, 0);
2479 if (wbc->range_cyclic) { 2437 if (wbc->range_cyclic) {
@@ -2482,8 +2440,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2482 } else { 2440 } else {
2483 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2441 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2484 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2442 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2485 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2486 range_whole = 1;
2487 scanned = 1; 2443 scanned = 1;
2488 } 2444 }
2489retry: 2445retry:
@@ -2689,7 +2645,7 @@ int extent_readpages(struct extent_io_tree *tree,
2689 prefetchw(&page->flags); 2645 prefetchw(&page->flags);
2690 list_del(&page->lru); 2646 list_del(&page->lru);
2691 if (!add_to_page_cache_lru(page, mapping, 2647 if (!add_to_page_cache_lru(page, mapping,
2692 page->index, GFP_KERNEL)) { 2648 page->index, GFP_NOFS)) {
2693 __extent_read_full_page(tree, page, get_extent, 2649 __extent_read_full_page(tree, page, get_extent,
2694 &bio, 0, &bio_flags); 2650 &bio, 0, &bio_flags);
2695 } 2651 }
@@ -2728,123 +2684,6 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2728} 2684}
2729 2685
2730/* 2686/*
2731 * simple commit_write call, set_range_dirty is used to mark both
2732 * the pages and the extent records as dirty
2733 */
2734int extent_commit_write(struct extent_io_tree *tree,
2735 struct inode *inode, struct page *page,
2736 unsigned from, unsigned to)
2737{
2738 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2739
2740 set_page_extent_mapped(page);
2741 set_page_dirty(page);
2742
2743 if (pos > inode->i_size) {
2744 i_size_write(inode, pos);
2745 mark_inode_dirty(inode);
2746 }
2747 return 0;
2748}
2749
2750int extent_prepare_write(struct extent_io_tree *tree,
2751 struct inode *inode, struct page *page,
2752 unsigned from, unsigned to, get_extent_t *get_extent)
2753{
2754 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2755 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2756 u64 block_start;
2757 u64 orig_block_start;
2758 u64 block_end;
2759 u64 cur_end;
2760 struct extent_map *em;
2761 unsigned blocksize = 1 << inode->i_blkbits;
2762 size_t page_offset = 0;
2763 size_t block_off_start;
2764 size_t block_off_end;
2765 int err = 0;
2766 int iocount = 0;
2767 int ret = 0;
2768 int isnew;
2769
2770 set_page_extent_mapped(page);
2771
2772 block_start = (page_start + from) & ~((u64)blocksize - 1);
2773 block_end = (page_start + to - 1) | (blocksize - 1);
2774 orig_block_start = block_start;
2775
2776 lock_extent(tree, page_start, page_end, GFP_NOFS);
2777 while (block_start <= block_end) {
2778 em = get_extent(inode, page, page_offset, block_start,
2779 block_end - block_start + 1, 1);
2780 if (IS_ERR(em) || !em)
2781 goto err;
2782
2783 cur_end = min(block_end, extent_map_end(em) - 1);
2784 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2785 block_off_end = block_off_start + blocksize;
2786 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2787
2788 if (!PageUptodate(page) && isnew &&
2789 (block_off_end > to || block_off_start < from)) {
2790 void *kaddr;
2791
2792 kaddr = kmap_atomic(page, KM_USER0);
2793 if (block_off_end > to)
2794 memset(kaddr + to, 0, block_off_end - to);
2795 if (block_off_start < from)
2796 memset(kaddr + block_off_start, 0,
2797 from - block_off_start);
2798 flush_dcache_page(page);
2799 kunmap_atomic(kaddr, KM_USER0);
2800 }
2801 if ((em->block_start != EXTENT_MAP_HOLE &&
2802 em->block_start != EXTENT_MAP_INLINE) &&
2803 !isnew && !PageUptodate(page) &&
2804 (block_off_end > to || block_off_start < from) &&
2805 !test_range_bit(tree, block_start, cur_end,
2806 EXTENT_UPTODATE, 1, NULL)) {
2807 u64 sector;
2808 u64 extent_offset = block_start - em->start;
2809 size_t iosize;
2810 sector = (em->block_start + extent_offset) >> 9;
2811 iosize = (cur_end - block_start + blocksize) &
2812 ~((u64)blocksize - 1);
2813 /*
2814 * we've already got the extent locked, but we
2815 * need to split the state such that our end_bio
2816 * handler can clear the lock.
2817 */
2818 set_extent_bit(tree, block_start,
2819 block_start + iosize - 1,
2820 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2821 ret = submit_extent_page(READ, tree, page,
2822 sector, iosize, page_offset, em->bdev,
2823 NULL, 1,
2824 end_bio_extent_preparewrite, 0,
2825 0, 0);
2826 iocount++;
2827 block_start = block_start + iosize;
2828 } else {
2829 set_extent_uptodate(tree, block_start, cur_end,
2830 GFP_NOFS);
2831 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2832 block_start = cur_end + 1;
2833 }
2834 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2835 free_extent_map(em);
2836 }
2837 if (iocount) {
2838 wait_extent_bit(tree, orig_block_start,
2839 block_end, EXTENT_LOCKED);
2840 }
2841 check_page_uptodate(tree, page);
2842err:
2843 /* FIXME, zero out newly allocated blocks on error */
2844 return err;
2845}
2846
2847/*
2848 * a helper for releasepage, this tests for areas of the page that 2687 * a helper for releasepage, this tests for areas of the page that
2849 * are locked or under IO and drops the related state bits if it is safe 2688 * are locked or under IO and drops the related state bits if it is safe
2850 * to drop the page. 2689 * to drop the page.
@@ -2867,9 +2706,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2867 * at this point we can safely clear everything except the 2706 * at this point we can safely clear everything except the
2868 * locked bit and the nodatasum bit 2707 * locked bit and the nodatasum bit
2869 */ 2708 */
2870 clear_extent_bit(tree, start, end, 2709 ret = clear_extent_bit(tree, start, end,
2871 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2710 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2872 0, 0, NULL, mask); 2711 0, 0, NULL, mask);
2712
2713 /* if clear_extent_bit failed for enomem reasons,
2714 * we can't allow the release to continue.
2715 */
2716 if (ret < 0)
2717 ret = 0;
2718 else
2719 ret = 1;
2873 } 2720 }
2874 return ret; 2721 return ret;
2875} 2722}
@@ -2894,7 +2741,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2894 len = end - start + 1; 2741 len = end - start + 1;
2895 write_lock(&map->lock); 2742 write_lock(&map->lock);
2896 em = lookup_extent_mapping(map, start, len); 2743 em = lookup_extent_mapping(map, start, len);
2897 if (!em || IS_ERR(em)) { 2744 if (IS_ERR_OR_NULL(em)) {
2898 write_unlock(&map->lock); 2745 write_unlock(&map->lock);
2899 break; 2746 break;
2900 } 2747 }
@@ -2922,76 +2769,169 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2922 return try_release_extent_state(map, tree, page, mask); 2769 return try_release_extent_state(map, tree, page, mask);
2923} 2770}
2924 2771
2925sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 2772/*
2926 get_extent_t *get_extent) 2773 * helper function for fiemap, which doesn't want to see any holes.
2774 * This maps until we find something past 'last'
2775 */
2776static struct extent_map *get_extent_skip_holes(struct inode *inode,
2777 u64 offset,
2778 u64 last,
2779 get_extent_t *get_extent)
2927{ 2780{
2928 struct inode *inode = mapping->host; 2781 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2929 struct extent_state *cached_state = NULL;
2930 u64 start = iblock << inode->i_blkbits;
2931 sector_t sector = 0;
2932 size_t blksize = (1 << inode->i_blkbits);
2933 struct extent_map *em; 2782 struct extent_map *em;
2783 u64 len;
2934 2784
2935 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2785 if (offset >= last)
2936 0, &cached_state, GFP_NOFS); 2786 return NULL;
2937 em = get_extent(inode, NULL, 0, start, blksize, 0);
2938 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2939 start + blksize - 1, &cached_state, GFP_NOFS);
2940 if (!em || IS_ERR(em))
2941 return 0;
2942 2787
2943 if (em->block_start > EXTENT_MAP_LAST_BYTE) 2788 while(1) {
2944 goto out; 2789 len = last - offset;
2790 if (len == 0)
2791 break;
2792 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2793 em = get_extent(inode, NULL, 0, offset, len, 0);
2794 if (IS_ERR_OR_NULL(em))
2795 return em;
2945 2796
2946 sector = (em->block_start + start - em->start) >> inode->i_blkbits; 2797 /* if this isn't a hole return it */
2947out: 2798 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 free_extent_map(em); 2799 em->block_start != EXTENT_MAP_HOLE) {
2949 return sector; 2800 return em;
2801 }
2802
2803 /* this is a hole, advance to the next extent */
2804 offset = extent_map_end(em);
2805 free_extent_map(em);
2806 if (offset >= last)
2807 break;
2808 }
2809 return NULL;
2950} 2810}
2951 2811
2952int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2812int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 __u64 start, __u64 len, get_extent_t *get_extent) 2813 __u64 start, __u64 len, get_extent_t *get_extent)
2954{ 2814{
2955 int ret; 2815 int ret = 0;
2956 u64 off = start; 2816 u64 off = start;
2957 u64 max = start + len; 2817 u64 max = start + len;
2958 u32 flags = 0; 2818 u32 flags = 0;
2819 u32 found_type;
2820 u64 last;
2821 u64 last_for_get_extent = 0;
2959 u64 disko = 0; 2822 u64 disko = 0;
2823 u64 isize = i_size_read(inode);
2824 struct btrfs_key found_key;
2960 struct extent_map *em = NULL; 2825 struct extent_map *em = NULL;
2961 struct extent_state *cached_state = NULL; 2826 struct extent_state *cached_state = NULL;
2827 struct btrfs_path *path;
2828 struct btrfs_file_extent_item *item;
2962 int end = 0; 2829 int end = 0;
2963 u64 em_start = 0, em_len = 0; 2830 u64 em_start = 0;
2831 u64 em_len = 0;
2832 u64 em_end = 0;
2964 unsigned long emflags; 2833 unsigned long emflags;
2965 ret = 0;
2966 2834
2967 if (len == 0) 2835 if (len == 0)
2968 return -EINVAL; 2836 return -EINVAL;
2969 2837
2838 path = btrfs_alloc_path();
2839 if (!path)
2840 return -ENOMEM;
2841 path->leave_spinning = 1;
2842
2843 /*
2844 * lookup the last file extent. We're not using i_size here
2845 * because there might be preallocation past i_size
2846 */
2847 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2848 path, btrfs_ino(inode), -1, 0);
2849 if (ret < 0) {
2850 btrfs_free_path(path);
2851 return ret;
2852 }
2853 WARN_ON(!ret);
2854 path->slots[0]--;
2855 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2856 struct btrfs_file_extent_item);
2857 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2858 found_type = btrfs_key_type(&found_key);
2859
2860 /* No extents, but there might be delalloc bits */
2861 if (found_key.objectid != btrfs_ino(inode) ||
2862 found_type != BTRFS_EXTENT_DATA_KEY) {
2863 /* have to trust i_size as the end */
2864 last = (u64)-1;
2865 last_for_get_extent = isize;
2866 } else {
2867 /*
2868 * remember the start of the last extent. There are a
2869 * bunch of different factors that go into the length of the
2870 * extent, so its much less complex to remember where it started
2871 */
2872 last = found_key.offset;
2873 last_for_get_extent = last + 1;
2874 }
2875 btrfs_free_path(path);
2876
2877 /*
2878 * we might have some extents allocated but more delalloc past those
2879 * extents. so, we trust isize unless the start of the last extent is
2880 * beyond isize
2881 */
2882 if (last < isize) {
2883 last = (u64)-1;
2884 last_for_get_extent = isize;
2885 }
2886
2970 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 2887 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2971 &cached_state, GFP_NOFS); 2888 &cached_state, GFP_NOFS);
2972 em = get_extent(inode, NULL, 0, off, max - off, 0); 2889
2890 em = get_extent_skip_holes(inode, off, last_for_get_extent,
2891 get_extent);
2973 if (!em) 2892 if (!em)
2974 goto out; 2893 goto out;
2975 if (IS_ERR(em)) { 2894 if (IS_ERR(em)) {
2976 ret = PTR_ERR(em); 2895 ret = PTR_ERR(em);
2977 goto out; 2896 goto out;
2978 } 2897 }
2898
2979 while (!end) { 2899 while (!end) {
2980 off = em->start + em->len; 2900 u64 offset_in_extent;
2981 if (off >= max) 2901
2982 end = 1; 2902 /* break if the extent we found is outside the range */
2903 if (em->start >= max || extent_map_end(em) < off)
2904 break;
2983 2905
2984 em_start = em->start; 2906 /*
2985 em_len = em->len; 2907 * get_extent may return an extent that starts before our
2908 * requested range. We have to make sure the ranges
2909 * we return to fiemap always move forward and don't
2910 * overlap, so adjust the offsets here
2911 */
2912 em_start = max(em->start, off);
2986 2913
2914 /*
2915 * record the offset from the start of the extent
2916 * for adjusting the disk offset below
2917 */
2918 offset_in_extent = em_start - em->start;
2919 em_end = extent_map_end(em);
2920 em_len = em_end - em_start;
2921 emflags = em->flags;
2987 disko = 0; 2922 disko = 0;
2988 flags = 0; 2923 flags = 0;
2989 2924
2925 /*
2926 * bump off for our next call to get_extent
2927 */
2928 off = extent_map_end(em);
2929 if (off >= max)
2930 end = 1;
2931
2990 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 2932 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2991 end = 1; 2933 end = 1;
2992 flags |= FIEMAP_EXTENT_LAST; 2934 flags |= FIEMAP_EXTENT_LAST;
2993 } else if (em->block_start == EXTENT_MAP_HOLE) {
2994 flags |= FIEMAP_EXTENT_UNWRITTEN;
2995 } else if (em->block_start == EXTENT_MAP_INLINE) { 2935 } else if (em->block_start == EXTENT_MAP_INLINE) {
2996 flags |= (FIEMAP_EXTENT_DATA_INLINE | 2936 flags |= (FIEMAP_EXTENT_DATA_INLINE |
2997 FIEMAP_EXTENT_NOT_ALIGNED); 2937 FIEMAP_EXTENT_NOT_ALIGNED);
@@ -2999,32 +2939,32 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2999 flags |= (FIEMAP_EXTENT_DELALLOC | 2939 flags |= (FIEMAP_EXTENT_DELALLOC |
3000 FIEMAP_EXTENT_UNKNOWN); 2940 FIEMAP_EXTENT_UNKNOWN);
3001 } else { 2941 } else {
3002 disko = em->block_start; 2942 disko = em->block_start + offset_in_extent;
3003 } 2943 }
3004 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2944 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3005 flags |= FIEMAP_EXTENT_ENCODED; 2945 flags |= FIEMAP_EXTENT_ENCODED;
3006 2946
3007 emflags = em->flags;
3008 free_extent_map(em); 2947 free_extent_map(em);
3009 em = NULL; 2948 em = NULL;
2949 if ((em_start >= last) || em_len == (u64)-1 ||
2950 (last == (u64)-1 && isize <= em_end)) {
2951 flags |= FIEMAP_EXTENT_LAST;
2952 end = 1;
2953 }
3010 2954
3011 if (!end) { 2955 /* now scan forward to see if this is really the last extent. */
3012 em = get_extent(inode, NULL, 0, off, max - off, 0); 2956 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3013 if (!em) 2957 get_extent);
3014 goto out; 2958 if (IS_ERR(em)) {
3015 if (IS_ERR(em)) { 2959 ret = PTR_ERR(em);
3016 ret = PTR_ERR(em); 2960 goto out;
3017 goto out;
3018 }
3019 emflags = em->flags;
3020 } 2961 }
3021 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 2962 if (!em) {
3022 flags |= FIEMAP_EXTENT_LAST; 2963 flags |= FIEMAP_EXTENT_LAST;
3023 end = 1; 2964 end = 1;
3024 } 2965 }
3025
3026 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 2966 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3027 em_len, flags); 2967 em_len, flags);
3028 if (ret) 2968 if (ret)
3029 goto out_free; 2969 goto out_free;
3030 } 2970 }
@@ -3078,6 +3018,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3078#endif 3018#endif
3079 3019
3080 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 3020 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
3021 if (eb == NULL)
3022 return NULL;
3081 eb->start = start; 3023 eb->start = start;
3082 eb->len = len; 3024 eb->len = len;
3083 spin_lock_init(&eb->lock); 3025 spin_lock_init(&eb->lock);
@@ -3104,10 +3046,42 @@ static void __free_extent_buffer(struct extent_buffer *eb)
3104 kmem_cache_free(extent_buffer_cache, eb); 3046 kmem_cache_free(extent_buffer_cache, eb);
3105} 3047}
3106 3048
3049/*
3050 * Helper for releasing extent buffer page.
3051 */
3052static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3053 unsigned long start_idx)
3054{
3055 unsigned long index;
3056 struct page *page;
3057
3058 if (!eb->first_page)
3059 return;
3060
3061 index = num_extent_pages(eb->start, eb->len);
3062 if (start_idx >= index)
3063 return;
3064
3065 do {
3066 index--;
3067 page = extent_buffer_page(eb, index);
3068 if (page)
3069 page_cache_release(page);
3070 } while (index != start_idx);
3071}
3072
3073/*
3074 * Helper for releasing the extent buffer.
3075 */
3076static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3077{
3078 btrfs_release_extent_buffer_page(eb, 0);
3079 __free_extent_buffer(eb);
3080}
3081
3107struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3082struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3108 u64 start, unsigned long len, 3083 u64 start, unsigned long len,
3109 struct page *page0, 3084 struct page *page0)
3110 gfp_t mask)
3111{ 3085{
3112 unsigned long num_pages = num_extent_pages(start, len); 3086 unsigned long num_pages = num_extent_pages(start, len);
3113 unsigned long i; 3087 unsigned long i;
@@ -3117,18 +3091,18 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3117 struct page *p; 3091 struct page *p;
3118 struct address_space *mapping = tree->mapping; 3092 struct address_space *mapping = tree->mapping;
3119 int uptodate = 1; 3093 int uptodate = 1;
3094 int ret;
3120 3095
3121 spin_lock(&tree->buffer_lock); 3096 rcu_read_lock();
3122 eb = buffer_search(tree, start); 3097 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3123 if (eb) { 3098 if (eb && atomic_inc_not_zero(&eb->refs)) {
3124 atomic_inc(&eb->refs); 3099 rcu_read_unlock();
3125 spin_unlock(&tree->buffer_lock);
3126 mark_page_accessed(eb->first_page); 3100 mark_page_accessed(eb->first_page);
3127 return eb; 3101 return eb;
3128 } 3102 }
3129 spin_unlock(&tree->buffer_lock); 3103 rcu_read_unlock();
3130 3104
3131 eb = __alloc_extent_buffer(tree, start, len, mask); 3105 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
3132 if (!eb) 3106 if (!eb)
3133 return NULL; 3107 return NULL;
3134 3108
@@ -3145,7 +3119,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3145 i = 0; 3119 i = 0;
3146 } 3120 }
3147 for (; i < num_pages; i++, index++) { 3121 for (; i < num_pages; i++, index++) {
3148 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3122 p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
3149 if (!p) { 3123 if (!p) {
3150 WARN_ON(1); 3124 WARN_ON(1);
3151 goto free_eb; 3125 goto free_eb;
@@ -3160,50 +3134,77 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3160 } 3134 }
3161 if (!PageUptodate(p)) 3135 if (!PageUptodate(p))
3162 uptodate = 0; 3136 uptodate = 0;
3163 unlock_page(p); 3137
3138 /*
3139 * see below about how we avoid a nasty race with release page
3140 * and why we unlock later
3141 */
3142 if (i != 0)
3143 unlock_page(p);
3164 } 3144 }
3165 if (uptodate) 3145 if (uptodate)
3166 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3146 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3167 3147
3148 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3149 if (ret)
3150 goto free_eb;
3151
3168 spin_lock(&tree->buffer_lock); 3152 spin_lock(&tree->buffer_lock);
3169 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3153 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3170 if (exists) { 3154 if (ret == -EEXIST) {
3155 exists = radix_tree_lookup(&tree->buffer,
3156 start >> PAGE_CACHE_SHIFT);
3171 /* add one reference for the caller */ 3157 /* add one reference for the caller */
3172 atomic_inc(&exists->refs); 3158 atomic_inc(&exists->refs);
3173 spin_unlock(&tree->buffer_lock); 3159 spin_unlock(&tree->buffer_lock);
3160 radix_tree_preload_end();
3174 goto free_eb; 3161 goto free_eb;
3175 } 3162 }
3176 /* add one reference for the tree */ 3163 /* add one reference for the tree */
3177 atomic_inc(&eb->refs); 3164 atomic_inc(&eb->refs);
3178 spin_unlock(&tree->buffer_lock); 3165 spin_unlock(&tree->buffer_lock);
3166 radix_tree_preload_end();
3167
3168 /*
3169 * there is a race where release page may have
3170 * tried to find this extent buffer in the radix
3171 * but failed. It will tell the VM it is safe to
3172 * reclaim the, and it will clear the page private bit.
3173 * We must make sure to set the page private bit properly
3174 * after the extent buffer is in the radix tree so
3175 * it doesn't get lost
3176 */
3177 set_page_extent_mapped(eb->first_page);
3178 set_page_extent_head(eb->first_page, eb->len);
3179 if (!page0)
3180 unlock_page(eb->first_page);
3179 return eb; 3181 return eb;
3180 3182
3181free_eb: 3183free_eb:
3184 if (eb->first_page && !page0)
3185 unlock_page(eb->first_page);
3186
3182 if (!atomic_dec_and_test(&eb->refs)) 3187 if (!atomic_dec_and_test(&eb->refs))
3183 return exists; 3188 return exists;
3184 for (index = 1; index < i; index++) 3189 btrfs_release_extent_buffer(eb);
3185 page_cache_release(extent_buffer_page(eb, index));
3186 page_cache_release(extent_buffer_page(eb, 0));
3187 __free_extent_buffer(eb);
3188 return exists; 3190 return exists;
3189} 3191}
3190 3192
3191struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3193struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3192 u64 start, unsigned long len, 3194 u64 start, unsigned long len)
3193 gfp_t mask)
3194{ 3195{
3195 struct extent_buffer *eb; 3196 struct extent_buffer *eb;
3196 3197
3197 spin_lock(&tree->buffer_lock); 3198 rcu_read_lock();
3198 eb = buffer_search(tree, start); 3199 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3199 if (eb) 3200 if (eb && atomic_inc_not_zero(&eb->refs)) {
3200 atomic_inc(&eb->refs); 3201 rcu_read_unlock();
3201 spin_unlock(&tree->buffer_lock);
3202
3203 if (eb)
3204 mark_page_accessed(eb->first_page); 3202 mark_page_accessed(eb->first_page);
3203 return eb;
3204 }
3205 rcu_read_unlock();
3205 3206
3206 return eb; 3207 return NULL;
3207} 3208}
3208 3209
3209void free_extent_buffer(struct extent_buffer *eb) 3210void free_extent_buffer(struct extent_buffer *eb)
@@ -3232,10 +3233,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3232 continue; 3233 continue;
3233 3234
3234 lock_page(page); 3235 lock_page(page);
3236 WARN_ON(!PagePrivate(page));
3237
3238 set_page_extent_mapped(page);
3235 if (i == 0) 3239 if (i == 0)
3236 set_page_extent_head(page, eb->len); 3240 set_page_extent_head(page, eb->len);
3237 else
3238 set_page_private(page, EXTENT_PAGE_PRIVATE);
3239 3241
3240 clear_page_dirty_for_io(page); 3242 clear_page_dirty_for_io(page);
3241 spin_lock_irq(&page->mapping->tree_lock); 3243 spin_lock_irq(&page->mapping->tree_lock);
@@ -3250,13 +3252,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3250 return 0; 3252 return 0;
3251} 3253}
3252 3254
3253int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3254 struct extent_buffer *eb)
3255{
3256 return wait_on_extent_writeback(tree, eb->start,
3257 eb->start + eb->len - 1);
3258}
3259
3260int set_extent_buffer_dirty(struct extent_io_tree *tree, 3255int set_extent_buffer_dirty(struct extent_io_tree *tree,
3261 struct extent_buffer *eb) 3256 struct extent_buffer *eb)
3262{ 3257{
@@ -3302,7 +3297,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3302 num_pages = num_extent_pages(eb->start, eb->len); 3297 num_pages = num_extent_pages(eb->start, eb->len);
3303 3298
3304 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3299 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3305 GFP_NOFS); 3300 NULL, GFP_NOFS);
3306 for (i = 0; i < num_pages; i++) { 3301 for (i = 0; i < num_pages; i++) {
3307 page = extent_buffer_page(eb, i); 3302 page = extent_buffer_page(eb, i);
3308 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3303 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3425,6 +3420,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3425 3420
3426 for (i = start_i; i < num_pages; i++) { 3421 for (i = start_i; i < num_pages; i++) {
3427 page = extent_buffer_page(eb, i); 3422 page = extent_buffer_page(eb, i);
3423
3424 WARN_ON(!PagePrivate(page));
3425
3426 set_page_extent_mapped(page);
3427 if (i == 0)
3428 set_page_extent_head(page, eb->len);
3429
3428 if (inc_all_pages) 3430 if (inc_all_pages)
3429 page_cache_get(page); 3431 page_cache_get(page);
3430 if (!PageUptodate(page)) { 3432 if (!PageUptodate(page)) {
@@ -3530,6 +3532,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3530 "wanted %lu %lu\n", (unsigned long long)eb->start, 3532 "wanted %lu %lu\n", (unsigned long long)eb->start,
3531 eb->len, start, min_len); 3533 eb->len, start, min_len);
3532 WARN_ON(1); 3534 WARN_ON(1);
3535 return -EINVAL;
3533 } 3536 }
3534 3537
3535 p = extent_buffer_page(eb, i); 3538 p = extent_buffer_page(eb, i);
@@ -3722,6 +3725,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3722 kunmap_atomic(dst_kaddr, KM_USER0); 3725 kunmap_atomic(dst_kaddr, KM_USER0);
3723} 3726}
3724 3727
3728static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3729{
3730 unsigned long distance = (src > dst) ? src - dst : dst - src;
3731 return distance < len;
3732}
3733
3725static void copy_pages(struct page *dst_page, struct page *src_page, 3734static void copy_pages(struct page *dst_page, struct page *src_page,
3726 unsigned long dst_off, unsigned long src_off, 3735 unsigned long dst_off, unsigned long src_off,
3727 unsigned long len) 3736 unsigned long len)
@@ -3729,10 +3738,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3729 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3738 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3730 char *src_kaddr; 3739 char *src_kaddr;
3731 3740
3732 if (dst_page != src_page) 3741 if (dst_page != src_page) {
3733 src_kaddr = kmap_atomic(src_page, KM_USER1); 3742 src_kaddr = kmap_atomic(src_page, KM_USER1);
3734 else 3743 } else {
3735 src_kaddr = dst_kaddr; 3744 src_kaddr = dst_kaddr;
3745 BUG_ON(areas_overlap(src_off, dst_off, len));
3746 }
3736 3747
3737 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3748 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3738 kunmap_atomic(dst_kaddr, KM_USER0); 3749 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3807,7 +3818,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3807 "len %lu len %lu\n", dst_offset, len, dst->len); 3818 "len %lu len %lu\n", dst_offset, len, dst->len);
3808 BUG_ON(1); 3819 BUG_ON(1);
3809 } 3820 }
3810 if (dst_offset < src_offset) { 3821 if (!areas_overlap(src_offset, dst_offset, len)) {
3811 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 3822 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3812 return; 3823 return;
3813 } 3824 }
@@ -3833,34 +3844,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3833 } 3844 }
3834} 3845}
3835 3846
3847static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3848{
3849 struct extent_buffer *eb =
3850 container_of(head, struct extent_buffer, rcu_head);
3851
3852 btrfs_release_extent_buffer(eb);
3853}
3854
3836int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3855int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3837{ 3856{
3838 u64 start = page_offset(page); 3857 u64 start = page_offset(page);
3839 struct extent_buffer *eb; 3858 struct extent_buffer *eb;
3840 int ret = 1; 3859 int ret = 1;
3841 unsigned long i;
3842 unsigned long num_pages;
3843 3860
3844 spin_lock(&tree->buffer_lock); 3861 spin_lock(&tree->buffer_lock);
3845 eb = buffer_search(tree, start); 3862 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3846 if (!eb) 3863 if (!eb) {
3847 goto out; 3864 spin_unlock(&tree->buffer_lock);
3865 return ret;
3866 }
3848 3867
3849 if (atomic_read(&eb->refs) > 1) { 3868 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3850 ret = 0; 3869 ret = 0;
3851 goto out; 3870 goto out;
3852 } 3871 }
3853 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3872
3873 /*
3874 * set @eb->refs to 0 if it is already 1, and then release the @eb.
3875 * Or go back.
3876 */
3877 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
3854 ret = 0; 3878 ret = 0;
3855 goto out; 3879 goto out;
3856 } 3880 }
3857 /* at this point we can safely release the extent buffer */ 3881
3858 num_pages = num_extent_pages(eb->start, eb->len); 3882 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3859 for (i = 0; i < num_pages; i++)
3860 page_cache_release(extent_buffer_page(eb, i));
3861 rb_erase(&eb->rb_node, &tree->buffer);
3862 __free_extent_buffer(eb);
3863out: 3883out:
3864 spin_unlock(&tree->buffer_lock); 3884 spin_unlock(&tree->buffer_lock);
3885
3886 /* at this point we can safely release the extent buffer */
3887 if (atomic_read(&eb->refs) == 0)
3888 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3865 return ret; 3889 return ret;
3866} 3890}