diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/btrfs/extent_io.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 806 |
1 files changed, 415 insertions, 391 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d74e6af9b53a..7055d11c1efd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -10,6 +10,8 @@ | |||
10 | #include <linux/swap.h> | 10 | #include <linux/swap.h> |
11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
12 | #include <linux/pagevec.h> | 12 | #include <linux/pagevec.h> |
13 | #include <linux/prefetch.h> | ||
14 | #include <linux/cleancache.h> | ||
13 | #include "extent_io.h" | 15 | #include "extent_io.h" |
14 | #include "extent_map.h" | 16 | #include "extent_map.h" |
15 | #include "compat.h" | 17 | #include "compat.h" |
@@ -101,10 +103,10 @@ void extent_io_exit(void) | |||
101 | } | 103 | } |
102 | 104 | ||
103 | void extent_io_tree_init(struct extent_io_tree *tree, | 105 | void extent_io_tree_init(struct extent_io_tree *tree, |
104 | struct address_space *mapping, gfp_t mask) | 106 | struct address_space *mapping) |
105 | { | 107 | { |
106 | tree->state = RB_ROOT; | 108 | tree->state = RB_ROOT; |
107 | tree->buffer = RB_ROOT; | 109 | INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); |
108 | tree->ops = NULL; | 110 | tree->ops = NULL; |
109 | tree->dirty_bytes = 0; | 111 | tree->dirty_bytes = 0; |
110 | spin_lock_init(&tree->lock); | 112 | spin_lock_init(&tree->lock); |
@@ -235,50 +237,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, | |||
235 | return ret; | 237 | return ret; |
236 | } | 238 | } |
237 | 239 | ||
238 | static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, | ||
239 | u64 offset, struct rb_node *node) | ||
240 | { | ||
241 | struct rb_root *root = &tree->buffer; | ||
242 | struct rb_node **p = &root->rb_node; | ||
243 | struct rb_node *parent = NULL; | ||
244 | struct extent_buffer *eb; | ||
245 | |||
246 | while (*p) { | ||
247 | parent = *p; | ||
248 | eb = rb_entry(parent, struct extent_buffer, rb_node); | ||
249 | |||
250 | if (offset < eb->start) | ||
251 | p = &(*p)->rb_left; | ||
252 | else if (offset > eb->start) | ||
253 | p = &(*p)->rb_right; | ||
254 | else | ||
255 | return eb; | ||
256 | } | ||
257 | |||
258 | rb_link_node(node, parent, p); | ||
259 | rb_insert_color(node, root); | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | ||
264 | u64 offset) | ||
265 | { | ||
266 | struct rb_root *root = &tree->buffer; | ||
267 | struct rb_node *n = root->rb_node; | ||
268 | struct extent_buffer *eb; | ||
269 | |||
270 | while (n) { | ||
271 | eb = rb_entry(n, struct extent_buffer, rb_node); | ||
272 | if (offset < eb->start) | ||
273 | n = n->rb_left; | ||
274 | else if (offset > eb->start) | ||
275 | n = n->rb_right; | ||
276 | else | ||
277 | return eb; | ||
278 | } | ||
279 | return NULL; | ||
280 | } | ||
281 | |||
282 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | 240 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, |
283 | struct extent_state *other) | 241 | struct extent_state *other) |
284 | { | 242 | { |
@@ -483,6 +441,15 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
483 | return ret; | 441 | return ret; |
484 | } | 442 | } |
485 | 443 | ||
444 | static struct extent_state * | ||
445 | alloc_extent_state_atomic(struct extent_state *prealloc) | ||
446 | { | ||
447 | if (!prealloc) | ||
448 | prealloc = alloc_extent_state(GFP_ATOMIC); | ||
449 | |||
450 | return prealloc; | ||
451 | } | ||
452 | |||
486 | /* | 453 | /* |
487 | * clear some bits on a range in the tree. This may require splitting | 454 | * clear some bits on a range in the tree. This may require splitting |
488 | * or inserting elements in the tree, so the gfp mask is used to | 455 | * or inserting elements in the tree, so the gfp mask is used to |
@@ -573,8 +540,8 @@ hit_next: | |||
573 | */ | 540 | */ |
574 | 541 | ||
575 | if (state->start < start) { | 542 | if (state->start < start) { |
576 | if (!prealloc) | 543 | prealloc = alloc_extent_state_atomic(prealloc); |
577 | prealloc = alloc_extent_state(GFP_ATOMIC); | 544 | BUG_ON(!prealloc); |
578 | err = split_state(tree, state, prealloc, start); | 545 | err = split_state(tree, state, prealloc, start); |
579 | BUG_ON(err == -EEXIST); | 546 | BUG_ON(err == -EEXIST); |
580 | prealloc = NULL; | 547 | prealloc = NULL; |
@@ -595,8 +562,8 @@ hit_next: | |||
595 | * on the first half | 562 | * on the first half |
596 | */ | 563 | */ |
597 | if (state->start <= end && state->end > end) { | 564 | if (state->start <= end && state->end > end) { |
598 | if (!prealloc) | 565 | prealloc = alloc_extent_state_atomic(prealloc); |
599 | prealloc = alloc_extent_state(GFP_ATOMIC); | 566 | BUG_ON(!prealloc); |
600 | err = split_state(tree, state, prealloc, end + 1); | 567 | err = split_state(tree, state, prealloc, end + 1); |
601 | BUG_ON(err == -EEXIST); | 568 | BUG_ON(err == -EEXIST); |
602 | if (wake) | 569 | if (wake) |
@@ -734,6 +701,15 @@ static void cache_state(struct extent_state *state, | |||
734 | } | 701 | } |
735 | } | 702 | } |
736 | 703 | ||
704 | static void uncache_state(struct extent_state **cached_ptr) | ||
705 | { | ||
706 | if (cached_ptr && (*cached_ptr)) { | ||
707 | struct extent_state *state = *cached_ptr; | ||
708 | *cached_ptr = NULL; | ||
709 | free_extent_state(state); | ||
710 | } | ||
711 | } | ||
712 | |||
737 | /* | 713 | /* |
738 | * set some bits on a range in the tree. This may require allocations or | 714 | * set some bits on a range in the tree. This may require allocations or |
739 | * sleeping, so the gfp mask is used to indicate what is allowed. | 715 | * sleeping, so the gfp mask is used to indicate what is allowed. |
@@ -760,8 +736,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
760 | again: | 736 | again: |
761 | if (!prealloc && (mask & __GFP_WAIT)) { | 737 | if (!prealloc && (mask & __GFP_WAIT)) { |
762 | prealloc = alloc_extent_state(mask); | 738 | prealloc = alloc_extent_state(mask); |
763 | if (!prealloc) | 739 | BUG_ON(!prealloc); |
764 | return -ENOMEM; | ||
765 | } | 740 | } |
766 | 741 | ||
767 | spin_lock(&tree->lock); | 742 | spin_lock(&tree->lock); |
@@ -778,6 +753,8 @@ again: | |||
778 | */ | 753 | */ |
779 | node = tree_search(tree, start); | 754 | node = tree_search(tree, start); |
780 | if (!node) { | 755 | if (!node) { |
756 | prealloc = alloc_extent_state_atomic(prealloc); | ||
757 | BUG_ON(!prealloc); | ||
781 | err = insert_state(tree, prealloc, start, end, &bits); | 758 | err = insert_state(tree, prealloc, start, end, &bits); |
782 | prealloc = NULL; | 759 | prealloc = NULL; |
783 | BUG_ON(err == -EEXIST); | 760 | BUG_ON(err == -EEXIST); |
@@ -806,20 +783,18 @@ hit_next: | |||
806 | if (err) | 783 | if (err) |
807 | goto out; | 784 | goto out; |
808 | 785 | ||
786 | next_node = rb_next(node); | ||
809 | cache_state(state, cached_state); | 787 | cache_state(state, cached_state); |
810 | merge_state(tree, state); | 788 | merge_state(tree, state); |
811 | if (last_end == (u64)-1) | 789 | if (last_end == (u64)-1) |
812 | goto out; | 790 | goto out; |
813 | 791 | ||
814 | start = last_end + 1; | 792 | start = last_end + 1; |
815 | if (start < end && prealloc && !need_resched()) { | 793 | if (next_node && start < end && prealloc && !need_resched()) { |
816 | next_node = rb_next(node); | 794 | state = rb_entry(next_node, struct extent_state, |
817 | if (next_node) { | 795 | rb_node); |
818 | state = rb_entry(next_node, struct extent_state, | 796 | if (state->start == start) |
819 | rb_node); | 797 | goto hit_next; |
820 | if (state->start == start) | ||
821 | goto hit_next; | ||
822 | } | ||
823 | } | 798 | } |
824 | goto search_again; | 799 | goto search_again; |
825 | } | 800 | } |
@@ -846,6 +821,9 @@ hit_next: | |||
846 | err = -EEXIST; | 821 | err = -EEXIST; |
847 | goto out; | 822 | goto out; |
848 | } | 823 | } |
824 | |||
825 | prealloc = alloc_extent_state_atomic(prealloc); | ||
826 | BUG_ON(!prealloc); | ||
849 | err = split_state(tree, state, prealloc, start); | 827 | err = split_state(tree, state, prealloc, start); |
850 | BUG_ON(err == -EEXIST); | 828 | BUG_ON(err == -EEXIST); |
851 | prealloc = NULL; | 829 | prealloc = NULL; |
@@ -876,14 +854,25 @@ hit_next: | |||
876 | this_end = end; | 854 | this_end = end; |
877 | else | 855 | else |
878 | this_end = last_start - 1; | 856 | this_end = last_start - 1; |
857 | |||
858 | prealloc = alloc_extent_state_atomic(prealloc); | ||
859 | BUG_ON(!prealloc); | ||
860 | |||
861 | /* | ||
862 | * Avoid to free 'prealloc' if it can be merged with | ||
863 | * the later extent. | ||
864 | */ | ||
865 | atomic_inc(&prealloc->refs); | ||
879 | err = insert_state(tree, prealloc, start, this_end, | 866 | err = insert_state(tree, prealloc, start, this_end, |
880 | &bits); | 867 | &bits); |
881 | BUG_ON(err == -EEXIST); | 868 | BUG_ON(err == -EEXIST); |
882 | if (err) { | 869 | if (err) { |
870 | free_extent_state(prealloc); | ||
883 | prealloc = NULL; | 871 | prealloc = NULL; |
884 | goto out; | 872 | goto out; |
885 | } | 873 | } |
886 | cache_state(prealloc, cached_state); | 874 | cache_state(prealloc, cached_state); |
875 | free_extent_state(prealloc); | ||
887 | prealloc = NULL; | 876 | prealloc = NULL; |
888 | start = this_end + 1; | 877 | start = this_end + 1; |
889 | goto search_again; | 878 | goto search_again; |
@@ -900,6 +889,9 @@ hit_next: | |||
900 | err = -EEXIST; | 889 | err = -EEXIST; |
901 | goto out; | 890 | goto out; |
902 | } | 891 | } |
892 | |||
893 | prealloc = alloc_extent_state_atomic(prealloc); | ||
894 | BUG_ON(!prealloc); | ||
903 | err = split_state(tree, state, prealloc, end + 1); | 895 | err = split_state(tree, state, prealloc, end + 1); |
904 | BUG_ON(err == -EEXIST); | 896 | BUG_ON(err == -EEXIST); |
905 | 897 | ||
@@ -976,18 +968,11 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | |||
976 | NULL, mask); | 968 | NULL, mask); |
977 | } | 969 | } |
978 | 970 | ||
979 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | ||
980 | gfp_t mask) | ||
981 | { | ||
982 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, | ||
983 | NULL, mask); | ||
984 | } | ||
985 | |||
986 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 971 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
987 | gfp_t mask) | 972 | struct extent_state **cached_state, gfp_t mask) |
988 | { | 973 | { |
989 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 974 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, |
990 | NULL, mask); | 975 | NULL, cached_state, mask); |
991 | } | 976 | } |
992 | 977 | ||
993 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 978 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
@@ -998,11 +983,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | |||
998 | cached_state, mask); | 983 | cached_state, mask); |
999 | } | 984 | } |
1000 | 985 | ||
1001 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | ||
1002 | { | ||
1003 | return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); | ||
1004 | } | ||
1005 | |||
1006 | /* | 986 | /* |
1007 | * either insert or lock state struct between start and end use mask to tell | 987 | * either insert or lock state struct between start and end use mask to tell |
1008 | * us if waiting is desired. | 988 | * us if waiting is desired. |
@@ -1056,33 +1036,13 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | |||
1056 | mask); | 1036 | mask); |
1057 | } | 1037 | } |
1058 | 1038 | ||
1059 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1039 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) |
1060 | gfp_t mask) | ||
1061 | { | 1040 | { |
1062 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, | 1041 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
1063 | mask); | 1042 | mask); |
1064 | } | 1043 | } |
1065 | 1044 | ||
1066 | /* | 1045 | /* |
1067 | * helper function to set pages and extents in the tree dirty | ||
1068 | */ | ||
1069 | int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | ||
1070 | { | ||
1071 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1072 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1073 | struct page *page; | ||
1074 | |||
1075 | while (index <= end_index) { | ||
1076 | page = find_get_page(tree->mapping, index); | ||
1077 | BUG_ON(!page); | ||
1078 | __set_page_dirty_nobuffers(page); | ||
1079 | page_cache_release(page); | ||
1080 | index++; | ||
1081 | } | ||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * helper function to set both pages and extents in the tree writeback | 1046 | * helper function to set both pages and extents in the tree writeback |
1087 | */ | 1047 | */ |
1088 | static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 1048 | static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
@@ -1477,12 +1437,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1477 | */ | 1437 | */ |
1478 | u64 count_range_bits(struct extent_io_tree *tree, | 1438 | u64 count_range_bits(struct extent_io_tree *tree, |
1479 | u64 *start, u64 search_end, u64 max_bytes, | 1439 | u64 *start, u64 search_end, u64 max_bytes, |
1480 | unsigned long bits) | 1440 | unsigned long bits, int contig) |
1481 | { | 1441 | { |
1482 | struct rb_node *node; | 1442 | struct rb_node *node; |
1483 | struct extent_state *state; | 1443 | struct extent_state *state; |
1484 | u64 cur_start = *start; | 1444 | u64 cur_start = *start; |
1485 | u64 total_bytes = 0; | 1445 | u64 total_bytes = 0; |
1446 | u64 last = 0; | ||
1486 | int found = 0; | 1447 | int found = 0; |
1487 | 1448 | ||
1488 | if (search_end <= cur_start) { | 1449 | if (search_end <= cur_start) { |
@@ -1507,15 +1468,20 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1507 | state = rb_entry(node, struct extent_state, rb_node); | 1468 | state = rb_entry(node, struct extent_state, rb_node); |
1508 | if (state->start > search_end) | 1469 | if (state->start > search_end) |
1509 | break; | 1470 | break; |
1510 | if (state->end >= cur_start && (state->state & bits)) { | 1471 | if (contig && found && state->start > last + 1) |
1472 | break; | ||
1473 | if (state->end >= cur_start && (state->state & bits) == bits) { | ||
1511 | total_bytes += min(search_end, state->end) + 1 - | 1474 | total_bytes += min(search_end, state->end) + 1 - |
1512 | max(cur_start, state->start); | 1475 | max(cur_start, state->start); |
1513 | if (total_bytes >= max_bytes) | 1476 | if (total_bytes >= max_bytes) |
1514 | break; | 1477 | break; |
1515 | if (!found) { | 1478 | if (!found) { |
1516 | *start = state->start; | 1479 | *start = max(cur_start, state->start); |
1517 | found = 1; | 1480 | found = 1; |
1518 | } | 1481 | } |
1482 | last = state->end; | ||
1483 | } else if (contig && found) { | ||
1484 | break; | ||
1519 | } | 1485 | } |
1520 | node = rb_next(node); | 1486 | node = rb_next(node); |
1521 | if (!node) | 1487 | if (!node) |
@@ -1773,6 +1739,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1773 | 1739 | ||
1774 | do { | 1740 | do { |
1775 | struct page *page = bvec->bv_page; | 1741 | struct page *page = bvec->bv_page; |
1742 | struct extent_state *cached = NULL; | ||
1743 | struct extent_state *state; | ||
1744 | |||
1776 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1745 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
1777 | 1746 | ||
1778 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1747 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1787,9 +1756,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1787 | if (++bvec <= bvec_end) | 1756 | if (++bvec <= bvec_end) |
1788 | prefetchw(&bvec->bv_page->flags); | 1757 | prefetchw(&bvec->bv_page->flags); |
1789 | 1758 | ||
1759 | spin_lock(&tree->lock); | ||
1760 | state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); | ||
1761 | if (state && state->start == start) { | ||
1762 | /* | ||
1763 | * take a reference on the state, unlock will drop | ||
1764 | * the ref | ||
1765 | */ | ||
1766 | cache_state(state, &cached); | ||
1767 | } | ||
1768 | spin_unlock(&tree->lock); | ||
1769 | |||
1790 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 1770 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
1791 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 1771 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
1792 | NULL); | 1772 | state); |
1793 | if (ret) | 1773 | if (ret) |
1794 | uptodate = 0; | 1774 | uptodate = 0; |
1795 | } | 1775 | } |
@@ -1802,15 +1782,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1802 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 1782 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
1803 | if (err) | 1783 | if (err) |
1804 | uptodate = 0; | 1784 | uptodate = 0; |
1785 | uncache_state(&cached); | ||
1805 | continue; | 1786 | continue; |
1806 | } | 1787 | } |
1807 | } | 1788 | } |
1808 | 1789 | ||
1809 | if (uptodate) { | 1790 | if (uptodate) { |
1810 | set_extent_uptodate(tree, start, end, | 1791 | set_extent_uptodate(tree, start, end, &cached, |
1811 | GFP_ATOMIC); | 1792 | GFP_ATOMIC); |
1812 | } | 1793 | } |
1813 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1794 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
1814 | 1795 | ||
1815 | if (whole_page) { | 1796 | if (whole_page) { |
1816 | if (uptodate) { | 1797 | if (uptodate) { |
@@ -1834,47 +1815,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1834 | bio_put(bio); | 1815 | bio_put(bio); |
1835 | } | 1816 | } |
1836 | 1817 | ||
1837 | /* | 1818 | struct bio * |
1838 | * IO done from prepare_write is pretty simple, we just unlock | 1819 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
1839 | * the structs in the extent tree when done, and set the uptodate bits | 1820 | gfp_t gfp_flags) |
1840 | * as appropriate. | ||
1841 | */ | ||
1842 | static void end_bio_extent_preparewrite(struct bio *bio, int err) | ||
1843 | { | ||
1844 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
1845 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1846 | struct extent_io_tree *tree; | ||
1847 | u64 start; | ||
1848 | u64 end; | ||
1849 | |||
1850 | do { | ||
1851 | struct page *page = bvec->bv_page; | ||
1852 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
1853 | |||
1854 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | ||
1855 | bvec->bv_offset; | ||
1856 | end = start + bvec->bv_len - 1; | ||
1857 | |||
1858 | if (--bvec >= bio->bi_io_vec) | ||
1859 | prefetchw(&bvec->bv_page->flags); | ||
1860 | |||
1861 | if (uptodate) { | ||
1862 | set_extent_uptodate(tree, start, end, GFP_ATOMIC); | ||
1863 | } else { | ||
1864 | ClearPageUptodate(page); | ||
1865 | SetPageError(page); | ||
1866 | } | ||
1867 | |||
1868 | unlock_extent(tree, start, end, GFP_ATOMIC); | ||
1869 | |||
1870 | } while (bvec >= bio->bi_io_vec); | ||
1871 | |||
1872 | bio_put(bio); | ||
1873 | } | ||
1874 | |||
1875 | static struct bio * | ||
1876 | extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
1877 | gfp_t gfp_flags) | ||
1878 | { | 1821 | { |
1879 | struct bio *bio; | 1822 | struct bio *bio; |
1880 | 1823 | ||
@@ -1901,17 +1844,15 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1901 | struct page *page = bvec->bv_page; | 1844 | struct page *page = bvec->bv_page; |
1902 | struct extent_io_tree *tree = bio->bi_private; | 1845 | struct extent_io_tree *tree = bio->bi_private; |
1903 | u64 start; | 1846 | u64 start; |
1904 | u64 end; | ||
1905 | 1847 | ||
1906 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 1848 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; |
1907 | end = start + bvec->bv_len - 1; | ||
1908 | 1849 | ||
1909 | bio->bi_private = NULL; | 1850 | bio->bi_private = NULL; |
1910 | 1851 | ||
1911 | bio_get(bio); | 1852 | bio_get(bio); |
1912 | 1853 | ||
1913 | if (tree->ops && tree->ops->submit_bio_hook) | 1854 | if (tree->ops && tree->ops->submit_bio_hook) |
1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1855 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1915 | mirror_num, bio_flags, start); | 1856 | mirror_num, bio_flags, start); |
1916 | else | 1857 | else |
1917 | submit_bio(rw, bio); | 1858 | submit_bio(rw, bio); |
@@ -1965,7 +1906,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1965 | else | 1906 | else |
1966 | nr = bio_get_nr_vecs(bdev); | 1907 | nr = bio_get_nr_vecs(bdev); |
1967 | 1908 | ||
1968 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1909 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1910 | if (!bio) | ||
1911 | return -ENOMEM; | ||
1969 | 1912 | ||
1970 | bio_add_page(bio, page, page_size, offset); | 1913 | bio_add_page(bio, page, page_size, offset); |
1971 | bio->bi_end_io = end_io_func; | 1914 | bio->bi_end_io = end_io_func; |
@@ -1990,6 +1933,7 @@ void set_page_extent_mapped(struct page *page) | |||
1990 | 1933 | ||
1991 | static void set_page_extent_head(struct page *page, unsigned long len) | 1934 | static void set_page_extent_head(struct page *page, unsigned long len) |
1992 | { | 1935 | { |
1936 | WARN_ON(!PagePrivate(page)); | ||
1993 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); | 1937 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); |
1994 | } | 1938 | } |
1995 | 1939 | ||
@@ -2019,7 +1963,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2019 | struct btrfs_ordered_extent *ordered; | 1963 | struct btrfs_ordered_extent *ordered; |
2020 | int ret; | 1964 | int ret; |
2021 | int nr = 0; | 1965 | int nr = 0; |
2022 | size_t page_offset = 0; | 1966 | size_t pg_offset = 0; |
2023 | size_t iosize; | 1967 | size_t iosize; |
2024 | size_t disk_io_size; | 1968 | size_t disk_io_size; |
2025 | size_t blocksize = inode->i_sb->s_blocksize; | 1969 | size_t blocksize = inode->i_sb->s_blocksize; |
@@ -2027,6 +1971,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2027 | 1971 | ||
2028 | set_page_extent_mapped(page); | 1972 | set_page_extent_mapped(page); |
2029 | 1973 | ||
1974 | if (!PageUptodate(page)) { | ||
1975 | if (cleancache_get_page(page) == 0) { | ||
1976 | BUG_ON(blocksize != PAGE_SIZE); | ||
1977 | goto out; | ||
1978 | } | ||
1979 | } | ||
1980 | |||
2030 | end = page_end; | 1981 | end = page_end; |
2031 | while (1) { | 1982 | while (1) { |
2032 | lock_extent(tree, start, end, GFP_NOFS); | 1983 | lock_extent(tree, start, end, GFP_NOFS); |
@@ -2053,19 +2004,22 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2053 | while (cur <= end) { | 2004 | while (cur <= end) { |
2054 | if (cur >= last_byte) { | 2005 | if (cur >= last_byte) { |
2055 | char *userpage; | 2006 | char *userpage; |
2056 | iosize = PAGE_CACHE_SIZE - page_offset; | 2007 | struct extent_state *cached = NULL; |
2008 | |||
2009 | iosize = PAGE_CACHE_SIZE - pg_offset; | ||
2057 | userpage = kmap_atomic(page, KM_USER0); | 2010 | userpage = kmap_atomic(page, KM_USER0); |
2058 | memset(userpage + page_offset, 0, iosize); | 2011 | memset(userpage + pg_offset, 0, iosize); |
2059 | flush_dcache_page(page); | 2012 | flush_dcache_page(page); |
2060 | kunmap_atomic(userpage, KM_USER0); | 2013 | kunmap_atomic(userpage, KM_USER0); |
2061 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2014 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2062 | GFP_NOFS); | 2015 | &cached, GFP_NOFS); |
2063 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2016 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2017 | &cached, GFP_NOFS); | ||
2064 | break; | 2018 | break; |
2065 | } | 2019 | } |
2066 | em = get_extent(inode, page, page_offset, cur, | 2020 | em = get_extent(inode, page, pg_offset, cur, |
2067 | end - cur + 1, 0); | 2021 | end - cur + 1, 0); |
2068 | if (IS_ERR(em) || !em) { | 2022 | if (IS_ERR_OR_NULL(em)) { |
2069 | SetPageError(page); | 2023 | SetPageError(page); |
2070 | unlock_extent(tree, cur, end, GFP_NOFS); | 2024 | unlock_extent(tree, cur, end, GFP_NOFS); |
2071 | break; | 2025 | break; |
@@ -2074,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2074 | BUG_ON(extent_map_end(em) <= cur); | 2028 | BUG_ON(extent_map_end(em) <= cur); |
2075 | BUG_ON(end < cur); | 2029 | BUG_ON(end < cur); |
2076 | 2030 | ||
2077 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2078 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2033 | extent_set_compress_type(&this_bio_flag, | ||
2034 | em->compress_type); | ||
2035 | } | ||
2079 | 2036 | ||
2080 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2037 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2081 | cur_end = min(extent_map_end(em) - 1, end); | 2038 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -2097,16 +2054,19 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2097 | /* we've found a hole, just zero and go on */ | 2054 | /* we've found a hole, just zero and go on */ |
2098 | if (block_start == EXTENT_MAP_HOLE) { | 2055 | if (block_start == EXTENT_MAP_HOLE) { |
2099 | char *userpage; | 2056 | char *userpage; |
2057 | struct extent_state *cached = NULL; | ||
2058 | |||
2100 | userpage = kmap_atomic(page, KM_USER0); | 2059 | userpage = kmap_atomic(page, KM_USER0); |
2101 | memset(userpage + page_offset, 0, iosize); | 2060 | memset(userpage + pg_offset, 0, iosize); |
2102 | flush_dcache_page(page); | 2061 | flush_dcache_page(page); |
2103 | kunmap_atomic(userpage, KM_USER0); | 2062 | kunmap_atomic(userpage, KM_USER0); |
2104 | 2063 | ||
2105 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2064 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2106 | GFP_NOFS); | 2065 | &cached, GFP_NOFS); |
2107 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2066 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2067 | &cached, GFP_NOFS); | ||
2108 | cur = cur + iosize; | 2068 | cur = cur + iosize; |
2109 | page_offset += iosize; | 2069 | pg_offset += iosize; |
2110 | continue; | 2070 | continue; |
2111 | } | 2071 | } |
2112 | /* the get_extent function already copied into the page */ | 2072 | /* the get_extent function already copied into the page */ |
@@ -2115,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2115 | check_page_uptodate(tree, page); | 2075 | check_page_uptodate(tree, page); |
2116 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2076 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
2117 | cur = cur + iosize; | 2077 | cur = cur + iosize; |
2118 | page_offset += iosize; | 2078 | pg_offset += iosize; |
2119 | continue; | 2079 | continue; |
2120 | } | 2080 | } |
2121 | /* we have an inline extent but it didn't get marked up | 2081 | /* we have an inline extent but it didn't get marked up |
@@ -2125,7 +2085,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2125 | SetPageError(page); | 2085 | SetPageError(page); |
2126 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2086 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
2127 | cur = cur + iosize; | 2087 | cur = cur + iosize; |
2128 | page_offset += iosize; | 2088 | pg_offset += iosize; |
2129 | continue; | 2089 | continue; |
2130 | } | 2090 | } |
2131 | 2091 | ||
@@ -2138,7 +2098,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2138 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2098 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
2139 | pnr -= page->index; | 2099 | pnr -= page->index; |
2140 | ret = submit_extent_page(READ, tree, page, | 2100 | ret = submit_extent_page(READ, tree, page, |
2141 | sector, disk_io_size, page_offset, | 2101 | sector, disk_io_size, pg_offset, |
2142 | bdev, bio, pnr, | 2102 | bdev, bio, pnr, |
2143 | end_bio_extent_readpage, mirror_num, | 2103 | end_bio_extent_readpage, mirror_num, |
2144 | *bio_flags, | 2104 | *bio_flags, |
@@ -2149,8 +2109,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2149 | if (ret) | 2109 | if (ret) |
2150 | SetPageError(page); | 2110 | SetPageError(page); |
2151 | cur = cur + iosize; | 2111 | cur = cur + iosize; |
2152 | page_offset += iosize; | 2112 | pg_offset += iosize; |
2153 | } | 2113 | } |
2114 | out: | ||
2154 | if (!nr) { | 2115 | if (!nr) { |
2155 | if (!PageError(page)) | 2116 | if (!PageError(page)) |
2156 | SetPageUptodate(page); | 2117 | SetPageUptodate(page); |
@@ -2169,7 +2130,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2169 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2130 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2170 | &bio_flags); | 2131 | &bio_flags); |
2171 | if (bio) | 2132 | if (bio) |
2172 | submit_one_bio(READ, bio, 0, bio_flags); | 2133 | ret = submit_one_bio(READ, bio, 0, bio_flags); |
2173 | return ret; | 2134 | return ret; |
2174 | } | 2135 | } |
2175 | 2136 | ||
@@ -2204,7 +2165,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2204 | u64 last_byte = i_size_read(inode); | 2165 | u64 last_byte = i_size_read(inode); |
2205 | u64 block_start; | 2166 | u64 block_start; |
2206 | u64 iosize; | 2167 | u64 iosize; |
2207 | u64 unlock_start; | ||
2208 | sector_t sector; | 2168 | sector_t sector; |
2209 | struct extent_state *cached_state = NULL; | 2169 | struct extent_state *cached_state = NULL; |
2210 | struct extent_map *em; | 2170 | struct extent_map *em; |
@@ -2223,10 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2223 | unsigned long nr_written = 0; | 2183 | unsigned long nr_written = 0; |
2224 | 2184 | ||
2225 | if (wbc->sync_mode == WB_SYNC_ALL) | 2185 | if (wbc->sync_mode == WB_SYNC_ALL) |
2226 | write_flags = WRITE_SYNC_PLUG; | 2186 | write_flags = WRITE_SYNC; |
2227 | else | 2187 | else |
2228 | write_flags = WRITE; | 2188 | write_flags = WRITE; |
2229 | 2189 | ||
2190 | trace___extent_writepage(page, inode, wbc); | ||
2191 | |||
2230 | WARN_ON(!PageLocked(page)); | 2192 | WARN_ON(!PageLocked(page)); |
2231 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2193 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2232 | if (page->index > end_index || | 2194 | if (page->index > end_index || |
@@ -2329,7 +2291,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2329 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2291 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2330 | tree->ops->writepage_end_io_hook(page, start, | 2292 | tree->ops->writepage_end_io_hook(page, start, |
2331 | page_end, NULL, 1); | 2293 | page_end, NULL, 1); |
2332 | unlock_start = page_end + 1; | ||
2333 | goto done; | 2294 | goto done; |
2334 | } | 2295 | } |
2335 | 2296 | ||
@@ -2340,12 +2301,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2340 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2301 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2341 | tree->ops->writepage_end_io_hook(page, cur, | 2302 | tree->ops->writepage_end_io_hook(page, cur, |
2342 | page_end, NULL, 1); | 2303 | page_end, NULL, 1); |
2343 | unlock_start = page_end + 1; | ||
2344 | break; | 2304 | break; |
2345 | } | 2305 | } |
2346 | em = epd->get_extent(inode, page, pg_offset, cur, | 2306 | em = epd->get_extent(inode, page, pg_offset, cur, |
2347 | end - cur + 1, 1); | 2307 | end - cur + 1, 1); |
2348 | if (IS_ERR(em) || !em) { | 2308 | if (IS_ERR_OR_NULL(em)) { |
2349 | SetPageError(page); | 2309 | SetPageError(page); |
2350 | break; | 2310 | break; |
2351 | } | 2311 | } |
@@ -2387,7 +2347,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2387 | 2347 | ||
2388 | cur += iosize; | 2348 | cur += iosize; |
2389 | pg_offset += iosize; | 2349 | pg_offset += iosize; |
2390 | unlock_start = cur; | ||
2391 | continue; | 2350 | continue; |
2392 | } | 2351 | } |
2393 | /* leave this out until we have a page_mkwrite call */ | 2352 | /* leave this out until we have a page_mkwrite call */ |
@@ -2473,7 +2432,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2473 | pgoff_t index; | 2432 | pgoff_t index; |
2474 | pgoff_t end; /* Inclusive */ | 2433 | pgoff_t end; /* Inclusive */ |
2475 | int scanned = 0; | 2434 | int scanned = 0; |
2476 | int range_whole = 0; | ||
2477 | 2435 | ||
2478 | pagevec_init(&pvec, 0); | 2436 | pagevec_init(&pvec, 0); |
2479 | if (wbc->range_cyclic) { | 2437 | if (wbc->range_cyclic) { |
@@ -2482,8 +2440,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2482 | } else { | 2440 | } else { |
2483 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2441 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2484 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2485 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2486 | range_whole = 1; | ||
2487 | scanned = 1; | 2443 | scanned = 1; |
2488 | } | 2444 | } |
2489 | retry: | 2445 | retry: |
@@ -2689,7 +2645,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2689 | prefetchw(&page->flags); | 2645 | prefetchw(&page->flags); |
2690 | list_del(&page->lru); | 2646 | list_del(&page->lru); |
2691 | if (!add_to_page_cache_lru(page, mapping, | 2647 | if (!add_to_page_cache_lru(page, mapping, |
2692 | page->index, GFP_KERNEL)) { | 2648 | page->index, GFP_NOFS)) { |
2693 | __extent_read_full_page(tree, page, get_extent, | 2649 | __extent_read_full_page(tree, page, get_extent, |
2694 | &bio, 0, &bio_flags); | 2650 | &bio, 0, &bio_flags); |
2695 | } | 2651 | } |
@@ -2728,123 +2684,6 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
2728 | } | 2684 | } |
2729 | 2685 | ||
2730 | /* | 2686 | /* |
2731 | * simple commit_write call, set_range_dirty is used to mark both | ||
2732 | * the pages and the extent records as dirty | ||
2733 | */ | ||
2734 | int extent_commit_write(struct extent_io_tree *tree, | ||
2735 | struct inode *inode, struct page *page, | ||
2736 | unsigned from, unsigned to) | ||
2737 | { | ||
2738 | loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
2739 | |||
2740 | set_page_extent_mapped(page); | ||
2741 | set_page_dirty(page); | ||
2742 | |||
2743 | if (pos > inode->i_size) { | ||
2744 | i_size_write(inode, pos); | ||
2745 | mark_inode_dirty(inode); | ||
2746 | } | ||
2747 | return 0; | ||
2748 | } | ||
2749 | |||
2750 | int extent_prepare_write(struct extent_io_tree *tree, | ||
2751 | struct inode *inode, struct page *page, | ||
2752 | unsigned from, unsigned to, get_extent_t *get_extent) | ||
2753 | { | ||
2754 | u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
2755 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
2756 | u64 block_start; | ||
2757 | u64 orig_block_start; | ||
2758 | u64 block_end; | ||
2759 | u64 cur_end; | ||
2760 | struct extent_map *em; | ||
2761 | unsigned blocksize = 1 << inode->i_blkbits; | ||
2762 | size_t page_offset = 0; | ||
2763 | size_t block_off_start; | ||
2764 | size_t block_off_end; | ||
2765 | int err = 0; | ||
2766 | int iocount = 0; | ||
2767 | int ret = 0; | ||
2768 | int isnew; | ||
2769 | |||
2770 | set_page_extent_mapped(page); | ||
2771 | |||
2772 | block_start = (page_start + from) & ~((u64)blocksize - 1); | ||
2773 | block_end = (page_start + to - 1) | (blocksize - 1); | ||
2774 | orig_block_start = block_start; | ||
2775 | |||
2776 | lock_extent(tree, page_start, page_end, GFP_NOFS); | ||
2777 | while (block_start <= block_end) { | ||
2778 | em = get_extent(inode, page, page_offset, block_start, | ||
2779 | block_end - block_start + 1, 1); | ||
2780 | if (IS_ERR(em) || !em) | ||
2781 | goto err; | ||
2782 | |||
2783 | cur_end = min(block_end, extent_map_end(em) - 1); | ||
2784 | block_off_start = block_start & (PAGE_CACHE_SIZE - 1); | ||
2785 | block_off_end = block_off_start + blocksize; | ||
2786 | isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); | ||
2787 | |||
2788 | if (!PageUptodate(page) && isnew && | ||
2789 | (block_off_end > to || block_off_start < from)) { | ||
2790 | void *kaddr; | ||
2791 | |||
2792 | kaddr = kmap_atomic(page, KM_USER0); | ||
2793 | if (block_off_end > to) | ||
2794 | memset(kaddr + to, 0, block_off_end - to); | ||
2795 | if (block_off_start < from) | ||
2796 | memset(kaddr + block_off_start, 0, | ||
2797 | from - block_off_start); | ||
2798 | flush_dcache_page(page); | ||
2799 | kunmap_atomic(kaddr, KM_USER0); | ||
2800 | } | ||
2801 | if ((em->block_start != EXTENT_MAP_HOLE && | ||
2802 | em->block_start != EXTENT_MAP_INLINE) && | ||
2803 | !isnew && !PageUptodate(page) && | ||
2804 | (block_off_end > to || block_off_start < from) && | ||
2805 | !test_range_bit(tree, block_start, cur_end, | ||
2806 | EXTENT_UPTODATE, 1, NULL)) { | ||
2807 | u64 sector; | ||
2808 | u64 extent_offset = block_start - em->start; | ||
2809 | size_t iosize; | ||
2810 | sector = (em->block_start + extent_offset) >> 9; | ||
2811 | iosize = (cur_end - block_start + blocksize) & | ||
2812 | ~((u64)blocksize - 1); | ||
2813 | /* | ||
2814 | * we've already got the extent locked, but we | ||
2815 | * need to split the state such that our end_bio | ||
2816 | * handler can clear the lock. | ||
2817 | */ | ||
2818 | set_extent_bit(tree, block_start, | ||
2819 | block_start + iosize - 1, | ||
2820 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); | ||
2821 | ret = submit_extent_page(READ, tree, page, | ||
2822 | sector, iosize, page_offset, em->bdev, | ||
2823 | NULL, 1, | ||
2824 | end_bio_extent_preparewrite, 0, | ||
2825 | 0, 0); | ||
2826 | iocount++; | ||
2827 | block_start = block_start + iosize; | ||
2828 | } else { | ||
2829 | set_extent_uptodate(tree, block_start, cur_end, | ||
2830 | GFP_NOFS); | ||
2831 | unlock_extent(tree, block_start, cur_end, GFP_NOFS); | ||
2832 | block_start = cur_end + 1; | ||
2833 | } | ||
2834 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); | ||
2835 | free_extent_map(em); | ||
2836 | } | ||
2837 | if (iocount) { | ||
2838 | wait_extent_bit(tree, orig_block_start, | ||
2839 | block_end, EXTENT_LOCKED); | ||
2840 | } | ||
2841 | check_page_uptodate(tree, page); | ||
2842 | err: | ||
2843 | /* FIXME, zero out newly allocated blocks on error */ | ||
2844 | return err; | ||
2845 | } | ||
2846 | |||
2847 | /* | ||
2848 | * a helper for releasepage, this tests for areas of the page that | 2687 | * a helper for releasepage, this tests for areas of the page that |
2849 | * are locked or under IO and drops the related state bits if it is safe | 2688 | * are locked or under IO and drops the related state bits if it is safe |
2850 | * to drop the page. | 2689 | * to drop the page. |
@@ -2867,9 +2706,17 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2867 | * at this point we can safely clear everything except the | 2706 | * at this point we can safely clear everything except the |
2868 | * locked bit and the nodatasum bit | 2707 | * locked bit and the nodatasum bit |
2869 | */ | 2708 | */ |
2870 | clear_extent_bit(tree, start, end, | 2709 | ret = clear_extent_bit(tree, start, end, |
2871 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | 2710 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), |
2872 | 0, 0, NULL, mask); | 2711 | 0, 0, NULL, mask); |
2712 | |||
2713 | /* if clear_extent_bit failed for enomem reasons, | ||
2714 | * we can't allow the release to continue. | ||
2715 | */ | ||
2716 | if (ret < 0) | ||
2717 | ret = 0; | ||
2718 | else | ||
2719 | ret = 1; | ||
2873 | } | 2720 | } |
2874 | return ret; | 2721 | return ret; |
2875 | } | 2722 | } |
@@ -2894,7 +2741,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2894 | len = end - start + 1; | 2741 | len = end - start + 1; |
2895 | write_lock(&map->lock); | 2742 | write_lock(&map->lock); |
2896 | em = lookup_extent_mapping(map, start, len); | 2743 | em = lookup_extent_mapping(map, start, len); |
2897 | if (!em || IS_ERR(em)) { | 2744 | if (IS_ERR_OR_NULL(em)) { |
2898 | write_unlock(&map->lock); | 2745 | write_unlock(&map->lock); |
2899 | break; | 2746 | break; |
2900 | } | 2747 | } |
@@ -2922,76 +2769,169 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2922 | return try_release_extent_state(map, tree, page, mask); | 2769 | return try_release_extent_state(map, tree, page, mask); |
2923 | } | 2770 | } |
2924 | 2771 | ||
2925 | sector_t extent_bmap(struct address_space *mapping, sector_t iblock, | 2772 | /* |
2926 | get_extent_t *get_extent) | 2773 | * helper function for fiemap, which doesn't want to see any holes. |
2774 | * This maps until we find something past 'last' | ||
2775 | */ | ||
2776 | static struct extent_map *get_extent_skip_holes(struct inode *inode, | ||
2777 | u64 offset, | ||
2778 | u64 last, | ||
2779 | get_extent_t *get_extent) | ||
2927 | { | 2780 | { |
2928 | struct inode *inode = mapping->host; | 2781 | u64 sectorsize = BTRFS_I(inode)->root->sectorsize; |
2929 | struct extent_state *cached_state = NULL; | ||
2930 | u64 start = iblock << inode->i_blkbits; | ||
2931 | sector_t sector = 0; | ||
2932 | size_t blksize = (1 << inode->i_blkbits); | ||
2933 | struct extent_map *em; | 2782 | struct extent_map *em; |
2783 | u64 len; | ||
2934 | 2784 | ||
2935 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, | 2785 | if (offset >= last) |
2936 | 0, &cached_state, GFP_NOFS); | 2786 | return NULL; |
2937 | em = get_extent(inode, NULL, 0, start, blksize, 0); | ||
2938 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, | ||
2939 | start + blksize - 1, &cached_state, GFP_NOFS); | ||
2940 | if (!em || IS_ERR(em)) | ||
2941 | return 0; | ||
2942 | 2787 | ||
2943 | if (em->block_start > EXTENT_MAP_LAST_BYTE) | 2788 | while(1) { |
2944 | goto out; | 2789 | len = last - offset; |
2790 | if (len == 0) | ||
2791 | break; | ||
2792 | len = (len + sectorsize - 1) & ~(sectorsize - 1); | ||
2793 | em = get_extent(inode, NULL, 0, offset, len, 0); | ||
2794 | if (IS_ERR_OR_NULL(em)) | ||
2795 | return em; | ||
2945 | 2796 | ||
2946 | sector = (em->block_start + start - em->start) >> inode->i_blkbits; | 2797 | /* if this isn't a hole return it */ |
2947 | out: | 2798 | if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && |
2948 | free_extent_map(em); | 2799 | em->block_start != EXTENT_MAP_HOLE) { |
2949 | return sector; | 2800 | return em; |
2801 | } | ||
2802 | |||
2803 | /* this is a hole, advance to the next extent */ | ||
2804 | offset = extent_map_end(em); | ||
2805 | free_extent_map(em); | ||
2806 | if (offset >= last) | ||
2807 | break; | ||
2808 | } | ||
2809 | return NULL; | ||
2950 | } | 2810 | } |
2951 | 2811 | ||
2952 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2812 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2953 | __u64 start, __u64 len, get_extent_t *get_extent) | 2813 | __u64 start, __u64 len, get_extent_t *get_extent) |
2954 | { | 2814 | { |
2955 | int ret; | 2815 | int ret = 0; |
2956 | u64 off = start; | 2816 | u64 off = start; |
2957 | u64 max = start + len; | 2817 | u64 max = start + len; |
2958 | u32 flags = 0; | 2818 | u32 flags = 0; |
2819 | u32 found_type; | ||
2820 | u64 last; | ||
2821 | u64 last_for_get_extent = 0; | ||
2959 | u64 disko = 0; | 2822 | u64 disko = 0; |
2823 | u64 isize = i_size_read(inode); | ||
2824 | struct btrfs_key found_key; | ||
2960 | struct extent_map *em = NULL; | 2825 | struct extent_map *em = NULL; |
2961 | struct extent_state *cached_state = NULL; | 2826 | struct extent_state *cached_state = NULL; |
2827 | struct btrfs_path *path; | ||
2828 | struct btrfs_file_extent_item *item; | ||
2962 | int end = 0; | 2829 | int end = 0; |
2963 | u64 em_start = 0, em_len = 0; | 2830 | u64 em_start = 0; |
2831 | u64 em_len = 0; | ||
2832 | u64 em_end = 0; | ||
2964 | unsigned long emflags; | 2833 | unsigned long emflags; |
2965 | ret = 0; | ||
2966 | 2834 | ||
2967 | if (len == 0) | 2835 | if (len == 0) |
2968 | return -EINVAL; | 2836 | return -EINVAL; |
2969 | 2837 | ||
2838 | path = btrfs_alloc_path(); | ||
2839 | if (!path) | ||
2840 | return -ENOMEM; | ||
2841 | path->leave_spinning = 1; | ||
2842 | |||
2843 | /* | ||
2844 | * lookup the last file extent. We're not using i_size here | ||
2845 | * because there might be preallocation past i_size | ||
2846 | */ | ||
2847 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | ||
2848 | path, btrfs_ino(inode), -1, 0); | ||
2849 | if (ret < 0) { | ||
2850 | btrfs_free_path(path); | ||
2851 | return ret; | ||
2852 | } | ||
2853 | WARN_ON(!ret); | ||
2854 | path->slots[0]--; | ||
2855 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2856 | struct btrfs_file_extent_item); | ||
2857 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | ||
2858 | found_type = btrfs_key_type(&found_key); | ||
2859 | |||
2860 | /* No extents, but there might be delalloc bits */ | ||
2861 | if (found_key.objectid != btrfs_ino(inode) || | ||
2862 | found_type != BTRFS_EXTENT_DATA_KEY) { | ||
2863 | /* have to trust i_size as the end */ | ||
2864 | last = (u64)-1; | ||
2865 | last_for_get_extent = isize; | ||
2866 | } else { | ||
2867 | /* | ||
2868 | * remember the start of the last extent. There are a | ||
2869 | * bunch of different factors that go into the length of the | ||
2870 | * extent, so its much less complex to remember where it started | ||
2871 | */ | ||
2872 | last = found_key.offset; | ||
2873 | last_for_get_extent = last + 1; | ||
2874 | } | ||
2875 | btrfs_free_path(path); | ||
2876 | |||
2877 | /* | ||
2878 | * we might have some extents allocated but more delalloc past those | ||
2879 | * extents. so, we trust isize unless the start of the last extent is | ||
2880 | * beyond isize | ||
2881 | */ | ||
2882 | if (last < isize) { | ||
2883 | last = (u64)-1; | ||
2884 | last_for_get_extent = isize; | ||
2885 | } | ||
2886 | |||
2970 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 2887 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2971 | &cached_state, GFP_NOFS); | 2888 | &cached_state, GFP_NOFS); |
2972 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2889 | |
2890 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
2891 | get_extent); | ||
2973 | if (!em) | 2892 | if (!em) |
2974 | goto out; | 2893 | goto out; |
2975 | if (IS_ERR(em)) { | 2894 | if (IS_ERR(em)) { |
2976 | ret = PTR_ERR(em); | 2895 | ret = PTR_ERR(em); |
2977 | goto out; | 2896 | goto out; |
2978 | } | 2897 | } |
2898 | |||
2979 | while (!end) { | 2899 | while (!end) { |
2980 | off = em->start + em->len; | 2900 | u64 offset_in_extent; |
2981 | if (off >= max) | 2901 | |
2982 | end = 1; | 2902 | /* break if the extent we found is outside the range */ |
2903 | if (em->start >= max || extent_map_end(em) < off) | ||
2904 | break; | ||
2983 | 2905 | ||
2984 | em_start = em->start; | 2906 | /* |
2985 | em_len = em->len; | 2907 | * get_extent may return an extent that starts before our |
2908 | * requested range. We have to make sure the ranges | ||
2909 | * we return to fiemap always move forward and don't | ||
2910 | * overlap, so adjust the offsets here | ||
2911 | */ | ||
2912 | em_start = max(em->start, off); | ||
2986 | 2913 | ||
2914 | /* | ||
2915 | * record the offset from the start of the extent | ||
2916 | * for adjusting the disk offset below | ||
2917 | */ | ||
2918 | offset_in_extent = em_start - em->start; | ||
2919 | em_end = extent_map_end(em); | ||
2920 | em_len = em_end - em_start; | ||
2921 | emflags = em->flags; | ||
2987 | disko = 0; | 2922 | disko = 0; |
2988 | flags = 0; | 2923 | flags = 0; |
2989 | 2924 | ||
2925 | /* | ||
2926 | * bump off for our next call to get_extent | ||
2927 | */ | ||
2928 | off = extent_map_end(em); | ||
2929 | if (off >= max) | ||
2930 | end = 1; | ||
2931 | |||
2990 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 2932 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2991 | end = 1; | 2933 | end = 1; |
2992 | flags |= FIEMAP_EXTENT_LAST; | 2934 | flags |= FIEMAP_EXTENT_LAST; |
2993 | } else if (em->block_start == EXTENT_MAP_HOLE) { | ||
2994 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
2995 | } else if (em->block_start == EXTENT_MAP_INLINE) { | 2935 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2996 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2936 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2997 | FIEMAP_EXTENT_NOT_ALIGNED); | 2937 | FIEMAP_EXTENT_NOT_ALIGNED); |
@@ -2999,32 +2939,32 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2999 | flags |= (FIEMAP_EXTENT_DELALLOC | | 2939 | flags |= (FIEMAP_EXTENT_DELALLOC | |
3000 | FIEMAP_EXTENT_UNKNOWN); | 2940 | FIEMAP_EXTENT_UNKNOWN); |
3001 | } else { | 2941 | } else { |
3002 | disko = em->block_start; | 2942 | disko = em->block_start + offset_in_extent; |
3003 | } | 2943 | } |
3004 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2944 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
3005 | flags |= FIEMAP_EXTENT_ENCODED; | 2945 | flags |= FIEMAP_EXTENT_ENCODED; |
3006 | 2946 | ||
3007 | emflags = em->flags; | ||
3008 | free_extent_map(em); | 2947 | free_extent_map(em); |
3009 | em = NULL; | 2948 | em = NULL; |
2949 | if ((em_start >= last) || em_len == (u64)-1 || | ||
2950 | (last == (u64)-1 && isize <= em_end)) { | ||
2951 | flags |= FIEMAP_EXTENT_LAST; | ||
2952 | end = 1; | ||
2953 | } | ||
3010 | 2954 | ||
3011 | if (!end) { | 2955 | /* now scan forward to see if this is really the last extent. */ |
3012 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2956 | em = get_extent_skip_holes(inode, off, last_for_get_extent, |
3013 | if (!em) | 2957 | get_extent); |
3014 | goto out; | 2958 | if (IS_ERR(em)) { |
3015 | if (IS_ERR(em)) { | 2959 | ret = PTR_ERR(em); |
3016 | ret = PTR_ERR(em); | 2960 | goto out; |
3017 | goto out; | ||
3018 | } | ||
3019 | emflags = em->flags; | ||
3020 | } | 2961 | } |
3021 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | 2962 | if (!em) { |
3022 | flags |= FIEMAP_EXTENT_LAST; | 2963 | flags |= FIEMAP_EXTENT_LAST; |
3023 | end = 1; | 2964 | end = 1; |
3024 | } | 2965 | } |
3025 | |||
3026 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 2966 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, |
3027 | em_len, flags); | 2967 | em_len, flags); |
3028 | if (ret) | 2968 | if (ret) |
3029 | goto out_free; | 2969 | goto out_free; |
3030 | } | 2970 | } |
@@ -3078,6 +3018,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3078 | #endif | 3018 | #endif |
3079 | 3019 | ||
3080 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3020 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3021 | if (eb == NULL) | ||
3022 | return NULL; | ||
3081 | eb->start = start; | 3023 | eb->start = start; |
3082 | eb->len = len; | 3024 | eb->len = len; |
3083 | spin_lock_init(&eb->lock); | 3025 | spin_lock_init(&eb->lock); |
@@ -3104,10 +3046,42 @@ static void __free_extent_buffer(struct extent_buffer *eb) | |||
3104 | kmem_cache_free(extent_buffer_cache, eb); | 3046 | kmem_cache_free(extent_buffer_cache, eb); |
3105 | } | 3047 | } |
3106 | 3048 | ||
3049 | /* | ||
3050 | * Helper for releasing extent buffer page. | ||
3051 | */ | ||
3052 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | ||
3053 | unsigned long start_idx) | ||
3054 | { | ||
3055 | unsigned long index; | ||
3056 | struct page *page; | ||
3057 | |||
3058 | if (!eb->first_page) | ||
3059 | return; | ||
3060 | |||
3061 | index = num_extent_pages(eb->start, eb->len); | ||
3062 | if (start_idx >= index) | ||
3063 | return; | ||
3064 | |||
3065 | do { | ||
3066 | index--; | ||
3067 | page = extent_buffer_page(eb, index); | ||
3068 | if (page) | ||
3069 | page_cache_release(page); | ||
3070 | } while (index != start_idx); | ||
3071 | } | ||
3072 | |||
3073 | /* | ||
3074 | * Helper for releasing the extent buffer. | ||
3075 | */ | ||
3076 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) | ||
3077 | { | ||
3078 | btrfs_release_extent_buffer_page(eb, 0); | ||
3079 | __free_extent_buffer(eb); | ||
3080 | } | ||
3081 | |||
3107 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 3082 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
3108 | u64 start, unsigned long len, | 3083 | u64 start, unsigned long len, |
3109 | struct page *page0, | 3084 | struct page *page0) |
3110 | gfp_t mask) | ||
3111 | { | 3085 | { |
3112 | unsigned long num_pages = num_extent_pages(start, len); | 3086 | unsigned long num_pages = num_extent_pages(start, len); |
3113 | unsigned long i; | 3087 | unsigned long i; |
@@ -3117,18 +3091,18 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3117 | struct page *p; | 3091 | struct page *p; |
3118 | struct address_space *mapping = tree->mapping; | 3092 | struct address_space *mapping = tree->mapping; |
3119 | int uptodate = 1; | 3093 | int uptodate = 1; |
3094 | int ret; | ||
3120 | 3095 | ||
3121 | spin_lock(&tree->buffer_lock); | 3096 | rcu_read_lock(); |
3122 | eb = buffer_search(tree, start); | 3097 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3123 | if (eb) { | 3098 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3124 | atomic_inc(&eb->refs); | 3099 | rcu_read_unlock(); |
3125 | spin_unlock(&tree->buffer_lock); | ||
3126 | mark_page_accessed(eb->first_page); | 3100 | mark_page_accessed(eb->first_page); |
3127 | return eb; | 3101 | return eb; |
3128 | } | 3102 | } |
3129 | spin_unlock(&tree->buffer_lock); | 3103 | rcu_read_unlock(); |
3130 | 3104 | ||
3131 | eb = __alloc_extent_buffer(tree, start, len, mask); | 3105 | eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); |
3132 | if (!eb) | 3106 | if (!eb) |
3133 | return NULL; | 3107 | return NULL; |
3134 | 3108 | ||
@@ -3145,7 +3119,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3145 | i = 0; | 3119 | i = 0; |
3146 | } | 3120 | } |
3147 | for (; i < num_pages; i++, index++) { | 3121 | for (; i < num_pages; i++, index++) { |
3148 | p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); | 3122 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); |
3149 | if (!p) { | 3123 | if (!p) { |
3150 | WARN_ON(1); | 3124 | WARN_ON(1); |
3151 | goto free_eb; | 3125 | goto free_eb; |
@@ -3160,50 +3134,77 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3160 | } | 3134 | } |
3161 | if (!PageUptodate(p)) | 3135 | if (!PageUptodate(p)) |
3162 | uptodate = 0; | 3136 | uptodate = 0; |
3163 | unlock_page(p); | 3137 | |
3138 | /* | ||
3139 | * see below about how we avoid a nasty race with release page | ||
3140 | * and why we unlock later | ||
3141 | */ | ||
3142 | if (i != 0) | ||
3143 | unlock_page(p); | ||
3164 | } | 3144 | } |
3165 | if (uptodate) | 3145 | if (uptodate) |
3166 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3146 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3167 | 3147 | ||
3148 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
3149 | if (ret) | ||
3150 | goto free_eb; | ||
3151 | |||
3168 | spin_lock(&tree->buffer_lock); | 3152 | spin_lock(&tree->buffer_lock); |
3169 | exists = buffer_tree_insert(tree, start, &eb->rb_node); | 3153 | ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); |
3170 | if (exists) { | 3154 | if (ret == -EEXIST) { |
3155 | exists = radix_tree_lookup(&tree->buffer, | ||
3156 | start >> PAGE_CACHE_SHIFT); | ||
3171 | /* add one reference for the caller */ | 3157 | /* add one reference for the caller */ |
3172 | atomic_inc(&exists->refs); | 3158 | atomic_inc(&exists->refs); |
3173 | spin_unlock(&tree->buffer_lock); | 3159 | spin_unlock(&tree->buffer_lock); |
3160 | radix_tree_preload_end(); | ||
3174 | goto free_eb; | 3161 | goto free_eb; |
3175 | } | 3162 | } |
3176 | /* add one reference for the tree */ | 3163 | /* add one reference for the tree */ |
3177 | atomic_inc(&eb->refs); | 3164 | atomic_inc(&eb->refs); |
3178 | spin_unlock(&tree->buffer_lock); | 3165 | spin_unlock(&tree->buffer_lock); |
3166 | radix_tree_preload_end(); | ||
3167 | |||
3168 | /* | ||
3169 | * there is a race where release page may have | ||
3170 | * tried to find this extent buffer in the radix | ||
3171 | * but failed. It will tell the VM it is safe to | ||
3172 | * reclaim the, and it will clear the page private bit. | ||
3173 | * We must make sure to set the page private bit properly | ||
3174 | * after the extent buffer is in the radix tree so | ||
3175 | * it doesn't get lost | ||
3176 | */ | ||
3177 | set_page_extent_mapped(eb->first_page); | ||
3178 | set_page_extent_head(eb->first_page, eb->len); | ||
3179 | if (!page0) | ||
3180 | unlock_page(eb->first_page); | ||
3179 | return eb; | 3181 | return eb; |
3180 | 3182 | ||
3181 | free_eb: | 3183 | free_eb: |
3184 | if (eb->first_page && !page0) | ||
3185 | unlock_page(eb->first_page); | ||
3186 | |||
3182 | if (!atomic_dec_and_test(&eb->refs)) | 3187 | if (!atomic_dec_and_test(&eb->refs)) |
3183 | return exists; | 3188 | return exists; |
3184 | for (index = 1; index < i; index++) | 3189 | btrfs_release_extent_buffer(eb); |
3185 | page_cache_release(extent_buffer_page(eb, index)); | ||
3186 | page_cache_release(extent_buffer_page(eb, 0)); | ||
3187 | __free_extent_buffer(eb); | ||
3188 | return exists; | 3190 | return exists; |
3189 | } | 3191 | } |
3190 | 3192 | ||
3191 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 3193 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
3192 | u64 start, unsigned long len, | 3194 | u64 start, unsigned long len) |
3193 | gfp_t mask) | ||
3194 | { | 3195 | { |
3195 | struct extent_buffer *eb; | 3196 | struct extent_buffer *eb; |
3196 | 3197 | ||
3197 | spin_lock(&tree->buffer_lock); | 3198 | rcu_read_lock(); |
3198 | eb = buffer_search(tree, start); | 3199 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3199 | if (eb) | 3200 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3200 | atomic_inc(&eb->refs); | 3201 | rcu_read_unlock(); |
3201 | spin_unlock(&tree->buffer_lock); | ||
3202 | |||
3203 | if (eb) | ||
3204 | mark_page_accessed(eb->first_page); | 3202 | mark_page_accessed(eb->first_page); |
3203 | return eb; | ||
3204 | } | ||
3205 | rcu_read_unlock(); | ||
3205 | 3206 | ||
3206 | return eb; | 3207 | return NULL; |
3207 | } | 3208 | } |
3208 | 3209 | ||
3209 | void free_extent_buffer(struct extent_buffer *eb) | 3210 | void free_extent_buffer(struct extent_buffer *eb) |
@@ -3232,10 +3233,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3232 | continue; | 3233 | continue; |
3233 | 3234 | ||
3234 | lock_page(page); | 3235 | lock_page(page); |
3236 | WARN_ON(!PagePrivate(page)); | ||
3237 | |||
3238 | set_page_extent_mapped(page); | ||
3235 | if (i == 0) | 3239 | if (i == 0) |
3236 | set_page_extent_head(page, eb->len); | 3240 | set_page_extent_head(page, eb->len); |
3237 | else | ||
3238 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
3239 | 3241 | ||
3240 | clear_page_dirty_for_io(page); | 3242 | clear_page_dirty_for_io(page); |
3241 | spin_lock_irq(&page->mapping->tree_lock); | 3243 | spin_lock_irq(&page->mapping->tree_lock); |
@@ -3250,13 +3252,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3250 | return 0; | 3252 | return 0; |
3251 | } | 3253 | } |
3252 | 3254 | ||
3253 | int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, | ||
3254 | struct extent_buffer *eb) | ||
3255 | { | ||
3256 | return wait_on_extent_writeback(tree, eb->start, | ||
3257 | eb->start + eb->len - 1); | ||
3258 | } | ||
3259 | |||
3260 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | 3255 | int set_extent_buffer_dirty(struct extent_io_tree *tree, |
3261 | struct extent_buffer *eb) | 3256 | struct extent_buffer *eb) |
3262 | { | 3257 | { |
@@ -3302,7 +3297,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3302 | num_pages = num_extent_pages(eb->start, eb->len); | 3297 | num_pages = num_extent_pages(eb->start, eb->len); |
3303 | 3298 | ||
3304 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3299 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3305 | GFP_NOFS); | 3300 | NULL, GFP_NOFS); |
3306 | for (i = 0; i < num_pages; i++) { | 3301 | for (i = 0; i < num_pages; i++) { |
3307 | page = extent_buffer_page(eb, i); | 3302 | page = extent_buffer_page(eb, i); |
3308 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3303 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3425,6 +3420,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3425 | 3420 | ||
3426 | for (i = start_i; i < num_pages; i++) { | 3421 | for (i = start_i; i < num_pages; i++) { |
3427 | page = extent_buffer_page(eb, i); | 3422 | page = extent_buffer_page(eb, i); |
3423 | |||
3424 | WARN_ON(!PagePrivate(page)); | ||
3425 | |||
3426 | set_page_extent_mapped(page); | ||
3427 | if (i == 0) | ||
3428 | set_page_extent_head(page, eb->len); | ||
3429 | |||
3428 | if (inc_all_pages) | 3430 | if (inc_all_pages) |
3429 | page_cache_get(page); | 3431 | page_cache_get(page); |
3430 | if (!PageUptodate(page)) { | 3432 | if (!PageUptodate(page)) { |
@@ -3530,6 +3532,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3530 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 3532 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
3531 | eb->len, start, min_len); | 3533 | eb->len, start, min_len); |
3532 | WARN_ON(1); | 3534 | WARN_ON(1); |
3535 | return -EINVAL; | ||
3533 | } | 3536 | } |
3534 | 3537 | ||
3535 | p = extent_buffer_page(eb, i); | 3538 | p = extent_buffer_page(eb, i); |
@@ -3722,6 +3725,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3722 | kunmap_atomic(dst_kaddr, KM_USER0); | 3725 | kunmap_atomic(dst_kaddr, KM_USER0); |
3723 | } | 3726 | } |
3724 | 3727 | ||
3728 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | ||
3729 | { | ||
3730 | unsigned long distance = (src > dst) ? src - dst : dst - src; | ||
3731 | return distance < len; | ||
3732 | } | ||
3733 | |||
3725 | static void copy_pages(struct page *dst_page, struct page *src_page, | 3734 | static void copy_pages(struct page *dst_page, struct page *src_page, |
3726 | unsigned long dst_off, unsigned long src_off, | 3735 | unsigned long dst_off, unsigned long src_off, |
3727 | unsigned long len) | 3736 | unsigned long len) |
@@ -3729,10 +3738,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3729 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3738 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); |
3730 | char *src_kaddr; | 3739 | char *src_kaddr; |
3731 | 3740 | ||
3732 | if (dst_page != src_page) | 3741 | if (dst_page != src_page) { |
3733 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3742 | src_kaddr = kmap_atomic(src_page, KM_USER1); |
3734 | else | 3743 | } else { |
3735 | src_kaddr = dst_kaddr; | 3744 | src_kaddr = dst_kaddr; |
3745 | BUG_ON(areas_overlap(src_off, dst_off, len)); | ||
3746 | } | ||
3736 | 3747 | ||
3737 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3748 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3738 | kunmap_atomic(dst_kaddr, KM_USER0); | 3749 | kunmap_atomic(dst_kaddr, KM_USER0); |
@@ -3807,7 +3818,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3807 | "len %lu len %lu\n", dst_offset, len, dst->len); | 3818 | "len %lu len %lu\n", dst_offset, len, dst->len); |
3808 | BUG_ON(1); | 3819 | BUG_ON(1); |
3809 | } | 3820 | } |
3810 | if (dst_offset < src_offset) { | 3821 | if (!areas_overlap(src_offset, dst_offset, len)) { |
3811 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); | 3822 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); |
3812 | return; | 3823 | return; |
3813 | } | 3824 | } |
@@ -3833,34 +3844,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3833 | } | 3844 | } |
3834 | } | 3845 | } |
3835 | 3846 | ||
3847 | static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | ||
3848 | { | ||
3849 | struct extent_buffer *eb = | ||
3850 | container_of(head, struct extent_buffer, rcu_head); | ||
3851 | |||
3852 | btrfs_release_extent_buffer(eb); | ||
3853 | } | ||
3854 | |||
3836 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | 3855 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) |
3837 | { | 3856 | { |
3838 | u64 start = page_offset(page); | 3857 | u64 start = page_offset(page); |
3839 | struct extent_buffer *eb; | 3858 | struct extent_buffer *eb; |
3840 | int ret = 1; | 3859 | int ret = 1; |
3841 | unsigned long i; | ||
3842 | unsigned long num_pages; | ||
3843 | 3860 | ||
3844 | spin_lock(&tree->buffer_lock); | 3861 | spin_lock(&tree->buffer_lock); |
3845 | eb = buffer_search(tree, start); | 3862 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3846 | if (!eb) | 3863 | if (!eb) { |
3847 | goto out; | 3864 | spin_unlock(&tree->buffer_lock); |
3865 | return ret; | ||
3866 | } | ||
3848 | 3867 | ||
3849 | if (atomic_read(&eb->refs) > 1) { | 3868 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3850 | ret = 0; | 3869 | ret = 0; |
3851 | goto out; | 3870 | goto out; |
3852 | } | 3871 | } |
3853 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3872 | |
3873 | /* | ||
3874 | * set @eb->refs to 0 if it is already 1, and then release the @eb. | ||
3875 | * Or go back. | ||
3876 | */ | ||
3877 | if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { | ||
3854 | ret = 0; | 3878 | ret = 0; |
3855 | goto out; | 3879 | goto out; |
3856 | } | 3880 | } |
3857 | /* at this point we can safely release the extent buffer */ | 3881 | |
3858 | num_pages = num_extent_pages(eb->start, eb->len); | 3882 | radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3859 | for (i = 0; i < num_pages; i++) | ||
3860 | page_cache_release(extent_buffer_page(eb, i)); | ||
3861 | rb_erase(&eb->rb_node, &tree->buffer); | ||
3862 | __free_extent_buffer(eb); | ||
3863 | out: | 3883 | out: |
3864 | spin_unlock(&tree->buffer_lock); | 3884 | spin_unlock(&tree->buffer_lock); |
3885 | |||
3886 | /* at this point we can safely release the extent buffer */ | ||
3887 | if (atomic_read(&eb->refs) == 0) | ||
3888 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | ||
3865 | return ret; | 3889 | return ret; |
3866 | } | 3890 | } |