aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c330
1 files changed, 198 insertions, 132 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68260180f587..0cb88f8146ea 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree,
367 } 367 }
368 if (bits & EXTENT_DIRTY) 368 if (bits & EXTENT_DIRTY)
369 tree->dirty_bytes += end - start + 1; 369 tree->dirty_bytes += end - start + 1;
370 set_state_cb(tree, state, bits);
371 state->state |= bits;
372 state->start = start; 370 state->start = start;
373 state->end = end; 371 state->end = end;
372 set_state_cb(tree, state, bits);
373 state->state |= bits;
374 node = tree_insert(&tree->state, end, &state->rb_node); 374 node = tree_insert(&tree->state, end, &state->rb_node);
375 if (node) { 375 if (node) {
376 struct extent_state *found; 376 struct extent_state *found;
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
471 * bits were already set, or zero if none of the bits were already set. 471 * bits were already set, or zero if none of the bits were already set.
472 */ 472 */
473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
474 int bits, int wake, int delete, gfp_t mask) 474 int bits, int wake, int delete,
475 struct extent_state **cached_state,
476 gfp_t mask)
475{ 477{
476 struct extent_state *state; 478 struct extent_state *state;
479 struct extent_state *cached;
477 struct extent_state *prealloc = NULL; 480 struct extent_state *prealloc = NULL;
481 struct rb_node *next_node;
478 struct rb_node *node; 482 struct rb_node *node;
479 u64 last_end; 483 u64 last_end;
480 int err; 484 int err;
@@ -488,6 +492,17 @@ again:
488 } 492 }
489 493
490 spin_lock(&tree->lock); 494 spin_lock(&tree->lock);
495 if (cached_state) {
496 cached = *cached_state;
497 *cached_state = NULL;
498 cached_state = NULL;
499 if (cached && cached->tree && cached->start == start) {
500 atomic_dec(&cached->refs);
501 state = cached;
502 goto hit_next;
503 }
504 free_extent_state(cached);
505 }
491 /* 506 /*
492 * this search will find the extents that end after 507 * this search will find the extents that end after
493 * our range starts 508 * our range starts
@@ -496,6 +511,7 @@ again:
496 if (!node) 511 if (!node)
497 goto out; 512 goto out;
498 state = rb_entry(node, struct extent_state, rb_node); 513 state = rb_entry(node, struct extent_state, rb_node);
514hit_next:
499 if (state->start > end) 515 if (state->start > end)
500 goto out; 516 goto out;
501 WARN_ON(state->end < start); 517 WARN_ON(state->end < start);
@@ -531,8 +547,6 @@ again:
531 if (last_end == (u64)-1) 547 if (last_end == (u64)-1)
532 goto out; 548 goto out;
533 start = last_end + 1; 549 start = last_end + 1;
534 } else {
535 start = state->start;
536 } 550 }
537 goto search_again; 551 goto search_again;
538 } 552 }
@@ -550,16 +564,28 @@ again:
550 564
551 if (wake) 565 if (wake)
552 wake_up(&state->wq); 566 wake_up(&state->wq);
567
553 set |= clear_state_bit(tree, prealloc, bits, 568 set |= clear_state_bit(tree, prealloc, bits,
554 wake, delete); 569 wake, delete);
555 prealloc = NULL; 570 prealloc = NULL;
556 goto out; 571 goto out;
557 } 572 }
558 573
574 if (state->end < end && prealloc && !need_resched())
575 next_node = rb_next(&state->rb_node);
576 else
577 next_node = NULL;
578
559 set |= clear_state_bit(tree, state, bits, wake, delete); 579 set |= clear_state_bit(tree, state, bits, wake, delete);
560 if (last_end == (u64)-1) 580 if (last_end == (u64)-1)
561 goto out; 581 goto out;
562 start = last_end + 1; 582 start = last_end + 1;
583 if (start <= end && next_node) {
584 state = rb_entry(next_node, struct extent_state,
585 rb_node);
586 if (state->start == start)
587 goto hit_next;
588 }
563 goto search_again; 589 goto search_again;
564 590
565out: 591out:
@@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree,
653 state->state |= bits; 679 state->state |= bits;
654} 680}
655 681
682static void cache_state(struct extent_state *state,
683 struct extent_state **cached_ptr)
684{
685 if (cached_ptr && !(*cached_ptr)) {
686 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
687 *cached_ptr = state;
688 atomic_inc(&state->refs);
689 }
690 }
691}
692
656/* 693/*
657 * set some bits on a range in the tree. This may require allocations 694 * set some bits on a range in the tree. This may require allocations or
658 * or sleeping, so the gfp mask is used to indicate what is allowed. 695 * sleeping, so the gfp mask is used to indicate what is allowed.
659 * 696 *
660 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 697 * If any of the exclusive bits are set, this will fail with -EEXIST if some
661 * range already has the desired bits set. The start of the existing 698 * part of the range already has the desired bits set. The start of the
662 * range is returned in failed_start in this case. 699 * existing range is returned in failed_start in this case.
663 * 700 *
664 * [start, end] is inclusive 701 * [start, end] is inclusive This takes the tree lock.
665 * This takes the tree lock.
666 */ 702 */
703
667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 704static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
668 int bits, int exclusive, u64 *failed_start, 705 int bits, int exclusive_bits, u64 *failed_start,
706 struct extent_state **cached_state,
669 gfp_t mask) 707 gfp_t mask)
670{ 708{
671 struct extent_state *state; 709 struct extent_state *state;
672 struct extent_state *prealloc = NULL; 710 struct extent_state *prealloc = NULL;
673 struct rb_node *node; 711 struct rb_node *node;
674 int err = 0; 712 int err = 0;
675 int set;
676 u64 last_start; 713 u64 last_start;
677 u64 last_end; 714 u64 last_end;
715
678again: 716again:
679 if (!prealloc && (mask & __GFP_WAIT)) { 717 if (!prealloc && (mask & __GFP_WAIT)) {
680 prealloc = alloc_extent_state(mask); 718 prealloc = alloc_extent_state(mask);
@@ -683,6 +721,13 @@ again:
683 } 721 }
684 722
685 spin_lock(&tree->lock); 723 spin_lock(&tree->lock);
724 if (cached_state && *cached_state) {
725 state = *cached_state;
726 if (state->start == start && state->tree) {
727 node = &state->rb_node;
728 goto hit_next;
729 }
730 }
686 /* 731 /*
687 * this search will find all the extents that end after 732 * this search will find all the extents that end after
688 * our range starts. 733 * our range starts.
@@ -694,8 +739,8 @@ again:
694 BUG_ON(err == -EEXIST); 739 BUG_ON(err == -EEXIST);
695 goto out; 740 goto out;
696 } 741 }
697
698 state = rb_entry(node, struct extent_state, rb_node); 742 state = rb_entry(node, struct extent_state, rb_node);
743hit_next:
699 last_start = state->start; 744 last_start = state->start;
700 last_end = state->end; 745 last_end = state->end;
701 746
@@ -706,17 +751,29 @@ again:
706 * Just lock what we found and keep going 751 * Just lock what we found and keep going
707 */ 752 */
708 if (state->start == start && state->end <= end) { 753 if (state->start == start && state->end <= end) {
709 set = state->state & bits; 754 struct rb_node *next_node;
710 if (set && exclusive) { 755 if (state->state & exclusive_bits) {
711 *failed_start = state->start; 756 *failed_start = state->start;
712 err = -EEXIST; 757 err = -EEXIST;
713 goto out; 758 goto out;
714 } 759 }
760
715 set_state_bits(tree, state, bits); 761 set_state_bits(tree, state, bits);
762 cache_state(state, cached_state);
716 merge_state(tree, state); 763 merge_state(tree, state);
717 if (last_end == (u64)-1) 764 if (last_end == (u64)-1)
718 goto out; 765 goto out;
766
719 start = last_end + 1; 767 start = last_end + 1;
768 if (start < end && prealloc && !need_resched()) {
769 next_node = rb_next(node);
770 if (next_node) {
771 state = rb_entry(next_node, struct extent_state,
772 rb_node);
773 if (state->start == start)
774 goto hit_next;
775 }
776 }
720 goto search_again; 777 goto search_again;
721 } 778 }
722 779
@@ -737,8 +794,7 @@ again:
737 * desired bit on it. 794 * desired bit on it.
738 */ 795 */
739 if (state->start < start) { 796 if (state->start < start) {
740 set = state->state & bits; 797 if (state->state & exclusive_bits) {
741 if (exclusive && set) {
742 *failed_start = start; 798 *failed_start = start;
743 err = -EEXIST; 799 err = -EEXIST;
744 goto out; 800 goto out;
@@ -750,12 +806,11 @@ again:
750 goto out; 806 goto out;
751 if (state->end <= end) { 807 if (state->end <= end) {
752 set_state_bits(tree, state, bits); 808 set_state_bits(tree, state, bits);
809 cache_state(state, cached_state);
753 merge_state(tree, state); 810 merge_state(tree, state);
754 if (last_end == (u64)-1) 811 if (last_end == (u64)-1)
755 goto out; 812 goto out;
756 start = last_end + 1; 813 start = last_end + 1;
757 } else {
758 start = state->start;
759 } 814 }
760 goto search_again; 815 goto search_again;
761 } 816 }
@@ -774,6 +829,7 @@ again:
774 this_end = last_start - 1; 829 this_end = last_start - 1;
775 err = insert_state(tree, prealloc, start, this_end, 830 err = insert_state(tree, prealloc, start, this_end,
776 bits); 831 bits);
832 cache_state(prealloc, cached_state);
777 prealloc = NULL; 833 prealloc = NULL;
778 BUG_ON(err == -EEXIST); 834 BUG_ON(err == -EEXIST);
779 if (err) 835 if (err)
@@ -788,8 +844,7 @@ again:
788 * on the first half 844 * on the first half
789 */ 845 */
790 if (state->start <= end && state->end > end) { 846 if (state->start <= end && state->end > end) {
791 set = state->state & bits; 847 if (state->state & exclusive_bits) {
792 if (exclusive && set) {
793 *failed_start = start; 848 *failed_start = start;
794 err = -EEXIST; 849 err = -EEXIST;
795 goto out; 850 goto out;
@@ -798,6 +853,7 @@ again:
798 BUG_ON(err == -EEXIST); 853 BUG_ON(err == -EEXIST);
799 854
800 set_state_bits(tree, prealloc, bits); 855 set_state_bits(tree, prealloc, bits);
856 cache_state(prealloc, cached_state);
801 merge_state(tree, prealloc); 857 merge_state(tree, prealloc);
802 prealloc = NULL; 858 prealloc = NULL;
803 goto out; 859 goto out;
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
826 gfp_t mask) 882 gfp_t mask)
827{ 883{
828 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 884 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
829 mask); 885 NULL, mask);
830}
831
832int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
833 gfp_t mask)
834{
835 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
836} 886}
837 887
838int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 888int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
839 int bits, gfp_t mask) 889 int bits, gfp_t mask)
840{ 890{
841 return set_extent_bit(tree, start, end, bits, 0, NULL, 891 return set_extent_bit(tree, start, end, bits, 0, NULL,
842 mask); 892 NULL, mask);
843} 893}
844 894
845int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 895int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
846 int bits, gfp_t mask) 896 int bits, gfp_t mask)
847{ 897{
848 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 898 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
849} 899}
850 900
851int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 901int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
852 gfp_t mask) 902 gfp_t mask)
853{ 903{
854 return set_extent_bit(tree, start, end, 904 return set_extent_bit(tree, start, end,
855 EXTENT_DELALLOC | EXTENT_DIRTY, 905 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
856 0, NULL, mask); 906 0, NULL, NULL, mask);
857} 907}
858 908
859int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 909int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
860 gfp_t mask) 910 gfp_t mask)
861{ 911{
862 return clear_extent_bit(tree, start, end, 912 return clear_extent_bit(tree, start, end,
863 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 913 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
864} 914 NULL, mask);
865
866int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
867 gfp_t mask)
868{
869 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
870} 915}
871 916
872int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 917int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
873 gfp_t mask) 918 gfp_t mask)
874{ 919{
875 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 920 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
876 mask); 921 NULL, mask);
877} 922}
878 923
879static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 924static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
880 gfp_t mask) 925 gfp_t mask)
881{ 926{
882 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 927 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
928 NULL, mask);
883} 929}
884 930
885int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 931int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
886 gfp_t mask) 932 gfp_t mask)
887{ 933{
888 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 934 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
889 mask); 935 NULL, mask);
890} 936}
891 937
892static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 938static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
893 u64 end, gfp_t mask) 939 u64 end, gfp_t mask)
894{ 940{
895 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 941 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
896} 942 NULL, mask);
897
898static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
899 gfp_t mask)
900{
901 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
902 0, NULL, mask);
903}
904
905static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
906 u64 end, gfp_t mask)
907{
908 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
909} 943}
910 944
911int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 945int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
917 * either insert or lock state struct between start and end use mask to tell 951 * either insert or lock state struct between start and end use mask to tell
918 * us if waiting is desired. 952 * us if waiting is desired.
919 */ 953 */
920int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 954int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
955 int bits, struct extent_state **cached_state, gfp_t mask)
921{ 956{
922 int err; 957 int err;
923 u64 failed_start; 958 u64 failed_start;
924 while (1) { 959 while (1) {
925 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 960 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
926 &failed_start, mask); 961 EXTENT_LOCKED, &failed_start,
962 cached_state, mask);
927 if (err == -EEXIST && (mask & __GFP_WAIT)) { 963 if (err == -EEXIST && (mask & __GFP_WAIT)) {
928 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 964 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
929 start = failed_start; 965 start = failed_start;
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
935 return err; 971 return err;
936} 972}
937 973
974int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
975{
976 return lock_extent_bits(tree, start, end, 0, NULL, mask);
977}
978
938int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 979int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
939 gfp_t mask) 980 gfp_t mask)
940{ 981{
941 int err; 982 int err;
942 u64 failed_start; 983 u64 failed_start;
943 984
944 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 985 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
945 &failed_start, mask); 986 &failed_start, NULL, mask);
946 if (err == -EEXIST) { 987 if (err == -EEXIST) {
947 if (failed_start > start) 988 if (failed_start > start)
948 clear_extent_bit(tree, start, failed_start - 1, 989 clear_extent_bit(tree, start, failed_start - 1,
949 EXTENT_LOCKED, 1, 0, mask); 990 EXTENT_LOCKED, 1, 0, NULL, mask);
950 return 0; 991 return 0;
951 } 992 }
952 return 1; 993 return 1;
953} 994}
954 995
996int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
997 struct extent_state **cached, gfp_t mask)
998{
999 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1000 mask);
1001}
1002
955int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1003int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
956 gfp_t mask) 1004 gfp_t mask)
957{ 1005{
958 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 1006 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1007 mask);
959} 1008}
960 1009
961/* 1010/*
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
974 page_cache_release(page); 1023 page_cache_release(page);
975 index++; 1024 index++;
976 } 1025 }
977 set_extent_dirty(tree, start, end, GFP_NOFS);
978 return 0; 1026 return 0;
979} 1027}
980 1028
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
994 page_cache_release(page); 1042 page_cache_release(page);
995 index++; 1043 index++;
996 } 1044 }
997 set_extent_writeback(tree, start, end, GFP_NOFS);
998 return 0; 1045 return 0;
999} 1046}
1000 1047
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
1232 u64 delalloc_start; 1279 u64 delalloc_start;
1233 u64 delalloc_end; 1280 u64 delalloc_end;
1234 u64 found; 1281 u64 found;
1282 struct extent_state *cached_state = NULL;
1235 int ret; 1283 int ret;
1236 int loops = 0; 1284 int loops = 0;
1237 1285
@@ -1269,6 +1317,7 @@ again:
1269 /* some of the pages are gone, lets avoid looping by 1317 /* some of the pages are gone, lets avoid looping by
1270 * shortening the size of the delalloc range we're searching 1318 * shortening the size of the delalloc range we're searching
1271 */ 1319 */
1320 free_extent_state(cached_state);
1272 if (!loops) { 1321 if (!loops) {
1273 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1322 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1274 max_bytes = PAGE_CACHE_SIZE - offset; 1323 max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1331,21 @@ again:
1282 BUG_ON(ret); 1331 BUG_ON(ret);
1283 1332
1284 /* step three, lock the state bits for the whole range */ 1333 /* step three, lock the state bits for the whole range */
1285 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1334 lock_extent_bits(tree, delalloc_start, delalloc_end,
1335 0, &cached_state, GFP_NOFS);
1286 1336
1287 /* then test to make sure it is all still delalloc */ 1337 /* then test to make sure it is all still delalloc */
1288 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1338 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1289 EXTENT_DELALLOC, 1); 1339 EXTENT_DELALLOC, 1, cached_state);
1290 if (!ret) { 1340 if (!ret) {
1291 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1341 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1342 &cached_state, GFP_NOFS);
1292 __unlock_for_delalloc(inode, locked_page, 1343 __unlock_for_delalloc(inode, locked_page,
1293 delalloc_start, delalloc_end); 1344 delalloc_start, delalloc_end);
1294 cond_resched(); 1345 cond_resched();
1295 goto again; 1346 goto again;
1296 } 1347 }
1348 free_extent_state(cached_state);
1297 *start = delalloc_start; 1349 *start = delalloc_start;
1298 *end = delalloc_end; 1350 *end = delalloc_end;
1299out_failed: 1351out_failed:
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1307 int clear_unlock, 1359 int clear_unlock,
1308 int clear_delalloc, int clear_dirty, 1360 int clear_delalloc, int clear_dirty,
1309 int set_writeback, 1361 int set_writeback,
1310 int end_writeback) 1362 int end_writeback,
1363 int set_private2)
1311{ 1364{
1312 int ret; 1365 int ret;
1313 struct page *pages[16]; 1366 struct page *pages[16];
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1325 if (clear_delalloc) 1378 if (clear_delalloc)
1326 clear_bits |= EXTENT_DELALLOC; 1379 clear_bits |= EXTENT_DELALLOC;
1327 1380
1328 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1381 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1329 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1382 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
1383 set_private2))
1330 return 0; 1384 return 0;
1331 1385
1332 while (nr_pages > 0) { 1386 while (nr_pages > 0) {
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1334 min_t(unsigned long, 1388 min_t(unsigned long,
1335 nr_pages, ARRAY_SIZE(pages)), pages); 1389 nr_pages, ARRAY_SIZE(pages)), pages);
1336 for (i = 0; i < ret; i++) { 1390 for (i = 0; i < ret; i++) {
1391
1392 if (set_private2)
1393 SetPagePrivate2(pages[i]);
1394
1337 if (pages[i] == locked_page) { 1395 if (pages[i] == locked_page) {
1338 page_cache_release(pages[i]); 1396 page_cache_release(pages[i]);
1339 continue; 1397 continue;
@@ -1476,14 +1534,17 @@ out:
1476 * range is found set. 1534 * range is found set.
1477 */ 1535 */
1478int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1536int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1479 int bits, int filled) 1537 int bits, int filled, struct extent_state *cached)
1480{ 1538{
1481 struct extent_state *state = NULL; 1539 struct extent_state *state = NULL;
1482 struct rb_node *node; 1540 struct rb_node *node;
1483 int bitset = 0; 1541 int bitset = 0;
1484 1542
1485 spin_lock(&tree->lock); 1543 spin_lock(&tree->lock);
1486 node = tree_search(tree, start); 1544 if (cached && cached->tree && cached->start == start)
1545 node = &cached->rb_node;
1546 else
1547 node = tree_search(tree, start);
1487 while (node && start <= end) { 1548 while (node && start <= end) {
1488 state = rb_entry(node, struct extent_state, rb_node); 1549 state = rb_entry(node, struct extent_state, rb_node);
1489 1550
@@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1503 bitset = 0; 1564 bitset = 0;
1504 break; 1565 break;
1505 } 1566 }
1567
1568 if (state->end == (u64)-1)
1569 break;
1570
1506 start = state->end + 1; 1571 start = state->end + 1;
1507 if (start > end) 1572 if (start > end)
1508 break; 1573 break;
@@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
1526{ 1591{
1527 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1592 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1528 u64 end = start + PAGE_CACHE_SIZE - 1; 1593 u64 end = start + PAGE_CACHE_SIZE - 1;
1529 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1594 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1530 SetPageUptodate(page); 1595 SetPageUptodate(page);
1531 return 0; 1596 return 0;
1532} 1597}
@@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1540{ 1605{
1541 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1606 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1542 u64 end = start + PAGE_CACHE_SIZE - 1; 1607 u64 end = start + PAGE_CACHE_SIZE - 1;
1543 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1608 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1544 unlock_page(page); 1609 unlock_page(page);
1545 return 0; 1610 return 0;
1546} 1611}
@@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1552static int check_page_writeback(struct extent_io_tree *tree, 1617static int check_page_writeback(struct extent_io_tree *tree,
1553 struct page *page) 1618 struct page *page)
1554{ 1619{
1555 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1620 end_page_writeback(page);
1556 u64 end = start + PAGE_CACHE_SIZE - 1;
1557 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1558 end_page_writeback(page);
1559 return 0; 1621 return 0;
1560} 1622}
1561 1623
@@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1613 } 1675 }
1614 1676
1615 if (!uptodate) { 1677 if (!uptodate) {
1616 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1678 clear_extent_uptodate(tree, start, end, GFP_NOFS);
1617 ClearPageUptodate(page); 1679 ClearPageUptodate(page);
1618 SetPageError(page); 1680 SetPageError(page);
1619 } 1681 }
1620 1682
1621 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1622
1623 if (whole_page) 1683 if (whole_page)
1624 end_page_writeback(page); 1684 end_page_writeback(page);
1625 else 1685 else
@@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1983 continue; 2043 continue;
1984 } 2044 }
1985 /* the get_extent function already copied into the page */ 2045 /* the get_extent function already copied into the page */
1986 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 2046 if (test_range_bit(tree, cur, cur_end,
2047 EXTENT_UPTODATE, 1, NULL)) {
1987 check_page_uptodate(tree, page); 2048 check_page_uptodate(tree, page);
1988 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2049 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1989 cur = cur + iosize; 2050 cur = cur + iosize;
@@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2078 u64 iosize; 2139 u64 iosize;
2079 u64 unlock_start; 2140 u64 unlock_start;
2080 sector_t sector; 2141 sector_t sector;
2142 struct extent_state *cached_state = NULL;
2081 struct extent_map *em; 2143 struct extent_map *em;
2082 struct block_device *bdev; 2144 struct block_device *bdev;
2083 int ret; 2145 int ret;
@@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2124 delalloc_end = 0; 2186 delalloc_end = 0;
2125 page_started = 0; 2187 page_started = 0;
2126 if (!epd->extent_locked) { 2188 if (!epd->extent_locked) {
2189 u64 delalloc_to_write = 0;
2127 /* 2190 /*
2128 * make sure the wbc mapping index is at least updated 2191 * make sure the wbc mapping index is at least updated
2129 * to this page. 2192 * to this page.
@@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2143 tree->ops->fill_delalloc(inode, page, delalloc_start, 2206 tree->ops->fill_delalloc(inode, page, delalloc_start,
2144 delalloc_end, &page_started, 2207 delalloc_end, &page_started,
2145 &nr_written); 2208 &nr_written);
2209 /*
2210 * delalloc_end is already one less than the total
2211 * length, so we don't subtract one from
2212 * PAGE_CACHE_SIZE
2213 */
2214 delalloc_to_write += (delalloc_end - delalloc_start +
2215 PAGE_CACHE_SIZE) >>
2216 PAGE_CACHE_SHIFT;
2146 delalloc_start = delalloc_end + 1; 2217 delalloc_start = delalloc_end + 1;
2147 } 2218 }
2219 if (wbc->nr_to_write < delalloc_to_write) {
2220 int thresh = 8192;
2221
2222 if (delalloc_to_write < thresh * 2)
2223 thresh = delalloc_to_write;
2224 wbc->nr_to_write = min_t(u64, delalloc_to_write,
2225 thresh);
2226 }
2148 2227
2149 /* did the fill delalloc function already unlock and start 2228 /* did the fill delalloc function already unlock and start
2150 * the IO? 2229 * the IO?
@@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2160 goto done_unlocked; 2239 goto done_unlocked;
2161 } 2240 }
2162 } 2241 }
2163 lock_extent(tree, start, page_end, GFP_NOFS);
2164
2165 unlock_start = start;
2166
2167 if (tree->ops && tree->ops->writepage_start_hook) { 2242 if (tree->ops && tree->ops->writepage_start_hook) {
2168 ret = tree->ops->writepage_start_hook(page, start, 2243 ret = tree->ops->writepage_start_hook(page, start,
2169 page_end); 2244 page_end);
2170 if (ret == -EAGAIN) { 2245 if (ret == -EAGAIN) {
2171 unlock_extent(tree, start, page_end, GFP_NOFS);
2172 redirty_page_for_writepage(wbc, page); 2246 redirty_page_for_writepage(wbc, page);
2173 update_nr_written(page, wbc, nr_written); 2247 update_nr_written(page, wbc, nr_written);
2174 unlock_page(page); 2248 unlock_page(page);
@@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2184 update_nr_written(page, wbc, nr_written + 1); 2258 update_nr_written(page, wbc, nr_written + 1);
2185 2259
2186 end = page_end; 2260 end = page_end;
2187 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2188 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
2189
2190 if (last_byte <= start) { 2261 if (last_byte <= start) {
2191 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2192 unlock_extent(tree, start, page_end, GFP_NOFS);
2193 if (tree->ops && tree->ops->writepage_end_io_hook) 2262 if (tree->ops && tree->ops->writepage_end_io_hook)
2194 tree->ops->writepage_end_io_hook(page, start, 2263 tree->ops->writepage_end_io_hook(page, start,
2195 page_end, NULL, 1); 2264 page_end, NULL, 1);
@@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2197 goto done; 2266 goto done;
2198 } 2267 }
2199 2268
2200 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2201 blocksize = inode->i_sb->s_blocksize; 2269 blocksize = inode->i_sb->s_blocksize;
2202 2270
2203 while (cur <= end) { 2271 while (cur <= end) {
2204 if (cur >= last_byte) { 2272 if (cur >= last_byte) {
2205 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2206 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2207 if (tree->ops && tree->ops->writepage_end_io_hook) 2273 if (tree->ops && tree->ops->writepage_end_io_hook)
2208 tree->ops->writepage_end_io_hook(page, cur, 2274 tree->ops->writepage_end_io_hook(page, cur,
2209 page_end, NULL, 1); 2275 page_end, NULL, 1);
@@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2235 */ 2301 */
2236 if (compressed || block_start == EXTENT_MAP_HOLE || 2302 if (compressed || block_start == EXTENT_MAP_HOLE ||
2237 block_start == EXTENT_MAP_INLINE) { 2303 block_start == EXTENT_MAP_INLINE) {
2238 clear_extent_dirty(tree, cur,
2239 cur + iosize - 1, GFP_NOFS);
2240
2241 unlock_extent(tree, unlock_start, cur + iosize - 1,
2242 GFP_NOFS);
2243
2244 /* 2304 /*
2245 * end_io notification does not happen here for 2305 * end_io notification does not happen here for
2246 * compressed extents 2306 * compressed extents
@@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2265 } 2325 }
2266 /* leave this out until we have a page_mkwrite call */ 2326 /* leave this out until we have a page_mkwrite call */
2267 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2327 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2268 EXTENT_DIRTY, 0)) { 2328 EXTENT_DIRTY, 0, NULL)) {
2269 cur = cur + iosize; 2329 cur = cur + iosize;
2270 pg_offset += iosize; 2330 pg_offset += iosize;
2271 continue; 2331 continue;
2272 } 2332 }
2273 2333
2274 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2275 if (tree->ops && tree->ops->writepage_io_hook) { 2334 if (tree->ops && tree->ops->writepage_io_hook) {
2276 ret = tree->ops->writepage_io_hook(page, cur, 2335 ret = tree->ops->writepage_io_hook(page, cur,
2277 cur + iosize - 1); 2336 cur + iosize - 1);
@@ -2309,12 +2368,12 @@ done:
2309 set_page_writeback(page); 2368 set_page_writeback(page);
2310 end_page_writeback(page); 2369 end_page_writeback(page);
2311 } 2370 }
2312 if (unlock_start <= page_end)
2313 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2314 unlock_page(page); 2371 unlock_page(page);
2315 2372
2316done_unlocked: 2373done_unlocked:
2317 2374
2375 /* drop our reference on any cached states */
2376 free_extent_state(cached_state);
2318 return 0; 2377 return 0;
2319} 2378}
2320 2379
@@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2339 writepage_t writepage, void *data, 2398 writepage_t writepage, void *data,
2340 void (*flush_fn)(void *)) 2399 void (*flush_fn)(void *))
2341{ 2400{
2342 struct backing_dev_info *bdi = mapping->backing_dev_info;
2343 int ret = 0; 2401 int ret = 0;
2344 int done = 0; 2402 int done = 0;
2403 int nr_to_write_done = 0;
2345 struct pagevec pvec; 2404 struct pagevec pvec;
2346 int nr_pages; 2405 int nr_pages;
2347 pgoff_t index; 2406 pgoff_t index;
@@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2361 scanned = 1; 2420 scanned = 1;
2362 } 2421 }
2363retry: 2422retry:
2364 while (!done && (index <= end) && 2423 while (!done && !nr_to_write_done && (index <= end) &&
2365 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2424 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2366 PAGECACHE_TAG_DIRTY, min(end - index, 2425 PAGECACHE_TAG_DIRTY, min(end - index,
2367 (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 2426 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2412,12 +2471,15 @@ retry:
2412 unlock_page(page); 2471 unlock_page(page);
2413 ret = 0; 2472 ret = 0;
2414 } 2473 }
2415 if (ret || wbc->nr_to_write <= 0) 2474 if (ret)
2416 done = 1;
2417 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2418 wbc->encountered_congestion = 1;
2419 done = 1; 2475 done = 1;
2420 } 2476
2477 /*
2478 * the filesystem may choose to bump up nr_to_write.
2479 * We have to make sure to honor the new nr_to_write
2480 * at any time
2481 */
2482 nr_to_write_done = wbc->nr_to_write <= 0;
2421 } 2483 }
2422 pagevec_release(&pvec); 2484 pagevec_release(&pvec);
2423 cond_resched(); 2485 cond_resched();
@@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2604 return 0; 2666 return 0;
2605 2667
2606 lock_extent(tree, start, end, GFP_NOFS); 2668 lock_extent(tree, start, end, GFP_NOFS);
2607 wait_on_extent_writeback(tree, start, end); 2669 wait_on_page_writeback(page);
2608 clear_extent_bit(tree, start, end, 2670 clear_extent_bit(tree, start, end,
2609 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2671 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2610 1, 1, GFP_NOFS); 2672 1, 1, NULL, GFP_NOFS);
2611 return 0; 2673 return 0;
2612} 2674}
2613 2675
@@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2687 !isnew && !PageUptodate(page) && 2749 !isnew && !PageUptodate(page) &&
2688 (block_off_end > to || block_off_start < from) && 2750 (block_off_end > to || block_off_start < from) &&
2689 !test_range_bit(tree, block_start, cur_end, 2751 !test_range_bit(tree, block_start, cur_end,
2690 EXTENT_UPTODATE, 1)) { 2752 EXTENT_UPTODATE, 1, NULL)) {
2691 u64 sector; 2753 u64 sector;
2692 u64 extent_offset = block_start - em->start; 2754 u64 extent_offset = block_start - em->start;
2693 size_t iosize; 2755 size_t iosize;
@@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2701 */ 2763 */
2702 set_extent_bit(tree, block_start, 2764 set_extent_bit(tree, block_start,
2703 block_start + iosize - 1, 2765 block_start + iosize - 1,
2704 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2766 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2705 ret = submit_extent_page(READ, tree, page, 2767 ret = submit_extent_page(READ, tree, page,
2706 sector, iosize, page_offset, em->bdev, 2768 sector, iosize, page_offset, em->bdev,
2707 NULL, 1, 2769 NULL, 1,
@@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map,
2742 int ret = 1; 2804 int ret = 1;
2743 2805
2744 if (test_range_bit(tree, start, end, 2806 if (test_range_bit(tree, start, end,
2745 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2807 EXTENT_IOBITS, 0, NULL))
2746 ret = 0; 2808 ret = 0;
2747 else { 2809 else {
2748 if ((mask & GFP_NOFS) == GFP_NOFS) 2810 if ((mask & GFP_NOFS) == GFP_NOFS)
2749 mask = GFP_NOFS; 2811 mask = GFP_NOFS;
2750 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2812 /*
2751 1, 1, mask); 2813 * at this point we can safely clear everything except the
2814 * locked bit and the nodatasum bit
2815 */
2816 clear_extent_bit(tree, start, end,
2817 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2818 0, 0, NULL, mask);
2752 } 2819 }
2753 return ret; 2820 return ret;
2754} 2821}
@@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2771 u64 len; 2838 u64 len;
2772 while (start <= end) { 2839 while (start <= end) {
2773 len = end - start + 1; 2840 len = end - start + 1;
2774 spin_lock(&map->lock); 2841 write_lock(&map->lock);
2775 em = lookup_extent_mapping(map, start, len); 2842 em = lookup_extent_mapping(map, start, len);
2776 if (!em || IS_ERR(em)) { 2843 if (!em || IS_ERR(em)) {
2777 spin_unlock(&map->lock); 2844 write_unlock(&map->lock);
2778 break; 2845 break;
2779 } 2846 }
2780 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2847 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2781 em->start != start) { 2848 em->start != start) {
2782 spin_unlock(&map->lock); 2849 write_unlock(&map->lock);
2783 free_extent_map(em); 2850 free_extent_map(em);
2784 break; 2851 break;
2785 } 2852 }
2786 if (!test_range_bit(tree, em->start, 2853 if (!test_range_bit(tree, em->start,
2787 extent_map_end(em) - 1, 2854 extent_map_end(em) - 1,
2788 EXTENT_LOCKED | EXTENT_WRITEBACK | 2855 EXTENT_LOCKED | EXTENT_WRITEBACK,
2789 EXTENT_ORDERED, 2856 0, NULL)) {
2790 0)) {
2791 remove_extent_mapping(map, em); 2857 remove_extent_mapping(map, em);
2792 /* once for the rb tree */ 2858 /* once for the rb tree */
2793 free_extent_map(em); 2859 free_extent_map(em);
2794 } 2860 }
2795 start = extent_map_end(em); 2861 start = extent_map_end(em);
2796 spin_unlock(&map->lock); 2862 write_unlock(&map->lock);
2797 2863
2798 /* once for us */ 2864 /* once for us */
2799 free_extent_map(em); 2865 free_extent_map(em);
@@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3203 int uptodate; 3269 int uptodate;
3204 unsigned long index; 3270 unsigned long index;
3205 3271
3206 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3272 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
3207 if (ret) 3273 if (ret)
3208 return 1; 3274 return 1;
3209 while (start <= end) { 3275 while (start <= end) {
@@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3233 return 1; 3299 return 1;
3234 3300
3235 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3301 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3236 EXTENT_UPTODATE, 1); 3302 EXTENT_UPTODATE, 1, NULL);
3237 if (ret) 3303 if (ret)
3238 return ret; 3304 return ret;
3239 3305
@@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3269 return 0; 3335 return 0;
3270 3336
3271 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3337 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3272 EXTENT_UPTODATE, 1)) { 3338 EXTENT_UPTODATE, 1, NULL)) {
3273 return 0; 3339 return 0;
3274 } 3340 }
3275 3341