aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c446
1 files changed, 280 insertions, 166 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68260180f587..96577e8bf9fd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
280 return NULL; 280 return NULL;
281} 281}
282 282
283static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
284 struct extent_state *other)
285{
286 if (tree->ops && tree->ops->merge_extent_hook)
287 tree->ops->merge_extent_hook(tree->mapping->host, new,
288 other);
289}
290
283/* 291/*
284 * utility function to look for merge candidates inside a given range. 292 * utility function to look for merge candidates inside a given range.
285 * Any extents with matching state are merged together into a single 293 * Any extents with matching state are merged together into a single
@@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree,
303 other = rb_entry(other_node, struct extent_state, rb_node); 311 other = rb_entry(other_node, struct extent_state, rb_node);
304 if (other->end == state->start - 1 && 312 if (other->end == state->start - 1 &&
305 other->state == state->state) { 313 other->state == state->state) {
314 merge_cb(tree, state, other);
306 state->start = other->start; 315 state->start = other->start;
307 other->tree = NULL; 316 other->tree = NULL;
308 rb_erase(&other->rb_node, &tree->state); 317 rb_erase(&other->rb_node, &tree->state);
@@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree,
314 other = rb_entry(other_node, struct extent_state, rb_node); 323 other = rb_entry(other_node, struct extent_state, rb_node);
315 if (other->start == state->end + 1 && 324 if (other->start == state->end + 1 &&
316 other->state == state->state) { 325 other->state == state->state) {
326 merge_cb(tree, state, other);
317 other->start = state->start; 327 other->start = state->start;
318 state->tree = NULL; 328 state->tree = NULL;
319 rb_erase(&state->rb_node, &tree->state); 329 rb_erase(&state->rb_node, &tree->state);
320 free_extent_state(state); 330 free_extent_state(state);
331 state = NULL;
321 } 332 }
322 } 333 }
334
323 return 0; 335 return 0;
324} 336}
325 337
326static void set_state_cb(struct extent_io_tree *tree, 338static int set_state_cb(struct extent_io_tree *tree,
327 struct extent_state *state, 339 struct extent_state *state,
328 unsigned long bits) 340 unsigned long bits)
329{ 341{
330 if (tree->ops && tree->ops->set_bit_hook) { 342 if (tree->ops && tree->ops->set_bit_hook) {
331 tree->ops->set_bit_hook(tree->mapping->host, state->start, 343 return tree->ops->set_bit_hook(tree->mapping->host,
332 state->end, state->state, bits); 344 state->start, state->end,
345 state->state, bits);
333 } 346 }
347
348 return 0;
334} 349}
335 350
336static void clear_state_cb(struct extent_io_tree *tree, 351static void clear_state_cb(struct extent_io_tree *tree,
337 struct extent_state *state, 352 struct extent_state *state,
338 unsigned long bits) 353 unsigned long bits)
339{ 354{
340 if (tree->ops && tree->ops->clear_bit_hook) { 355 if (tree->ops && tree->ops->clear_bit_hook)
341 tree->ops->clear_bit_hook(tree->mapping->host, state->start, 356 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
342 state->end, state->state, bits);
343 }
344} 357}
345 358
346/* 359/*
@@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree,
358 int bits) 371 int bits)
359{ 372{
360 struct rb_node *node; 373 struct rb_node *node;
374 int ret;
361 375
362 if (end < start) { 376 if (end < start) {
363 printk(KERN_ERR "btrfs end < start %llu %llu\n", 377 printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -365,12 +379,15 @@ static int insert_state(struct extent_io_tree *tree,
365 (unsigned long long)start); 379 (unsigned long long)start);
366 WARN_ON(1); 380 WARN_ON(1);
367 } 381 }
382 state->start = start;
383 state->end = end;
384 ret = set_state_cb(tree, state, bits);
385 if (ret)
386 return ret;
387
368 if (bits & EXTENT_DIRTY) 388 if (bits & EXTENT_DIRTY)
369 tree->dirty_bytes += end - start + 1; 389 tree->dirty_bytes += end - start + 1;
370 set_state_cb(tree, state, bits);
371 state->state |= bits; 390 state->state |= bits;
372 state->start = start;
373 state->end = end;
374 node = tree_insert(&tree->state, end, &state->rb_node); 391 node = tree_insert(&tree->state, end, &state->rb_node);
375 if (node) { 392 if (node) {
376 struct extent_state *found; 393 struct extent_state *found;
@@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree,
387 return 0; 404 return 0;
388} 405}
389 406
407static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
408 u64 split)
409{
410 if (tree->ops && tree->ops->split_extent_hook)
411 return tree->ops->split_extent_hook(tree->mapping->host,
412 orig, split);
413 return 0;
414}
415
390/* 416/*
391 * split a given extent state struct in two, inserting the preallocated 417 * split a given extent state struct in two, inserting the preallocated
392 * struct 'prealloc' as the newly created second half. 'split' indicates an 418 * struct 'prealloc' as the newly created second half. 'split' indicates an
@@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
405 struct extent_state *prealloc, u64 split) 431 struct extent_state *prealloc, u64 split)
406{ 432{
407 struct rb_node *node; 433 struct rb_node *node;
434
435 split_cb(tree, orig, split);
436
408 prealloc->start = orig->start; 437 prealloc->start = orig->start;
409 prealloc->end = split - 1; 438 prealloc->end = split - 1;
410 prealloc->state = orig->state; 439 prealloc->state = orig->state;
@@ -431,7 +460,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
431 struct extent_state *state, int bits, int wake, 460 struct extent_state *state, int bits, int wake,
432 int delete) 461 int delete)
433{ 462{
434 int ret = state->state & bits; 463 int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
464 int ret = state->state & bits_to_clear;
435 465
436 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 466 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
437 u64 range = state->end - state->start + 1; 467 u64 range = state->end - state->start + 1;
@@ -439,7 +469,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
439 tree->dirty_bytes -= range; 469 tree->dirty_bytes -= range;
440 } 470 }
441 clear_state_cb(tree, state, bits); 471 clear_state_cb(tree, state, bits);
442 state->state &= ~bits; 472 state->state &= ~bits_to_clear;
443 if (wake) 473 if (wake)
444 wake_up(&state->wq); 474 wake_up(&state->wq);
445 if (delete || state->state == 0) { 475 if (delete || state->state == 0) {
@@ -471,10 +501,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
471 * bits were already set, or zero if none of the bits were already set. 501 * bits were already set, or zero if none of the bits were already set.
472 */ 502 */
473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 503int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
474 int bits, int wake, int delete, gfp_t mask) 504 int bits, int wake, int delete,
505 struct extent_state **cached_state,
506 gfp_t mask)
475{ 507{
476 struct extent_state *state; 508 struct extent_state *state;
509 struct extent_state *cached;
477 struct extent_state *prealloc = NULL; 510 struct extent_state *prealloc = NULL;
511 struct rb_node *next_node;
478 struct rb_node *node; 512 struct rb_node *node;
479 u64 last_end; 513 u64 last_end;
480 int err; 514 int err;
@@ -488,6 +522,17 @@ again:
488 } 522 }
489 523
490 spin_lock(&tree->lock); 524 spin_lock(&tree->lock);
525 if (cached_state) {
526 cached = *cached_state;
527 *cached_state = NULL;
528 cached_state = NULL;
529 if (cached && cached->tree && cached->start == start) {
530 atomic_dec(&cached->refs);
531 state = cached;
532 goto hit_next;
533 }
534 free_extent_state(cached);
535 }
491 /* 536 /*
492 * this search will find the extents that end after 537 * this search will find the extents that end after
493 * our range starts 538 * our range starts
@@ -496,6 +541,7 @@ again:
496 if (!node) 541 if (!node)
497 goto out; 542 goto out;
498 state = rb_entry(node, struct extent_state, rb_node); 543 state = rb_entry(node, struct extent_state, rb_node);
544hit_next:
499 if (state->start > end) 545 if (state->start > end)
500 goto out; 546 goto out;
501 WARN_ON(state->end < start); 547 WARN_ON(state->end < start);
@@ -526,13 +572,11 @@ again:
526 if (err) 572 if (err)
527 goto out; 573 goto out;
528 if (state->end <= end) { 574 if (state->end <= end) {
529 set |= clear_state_bit(tree, state, bits, 575 set |= clear_state_bit(tree, state, bits, wake,
530 wake, delete); 576 delete);
531 if (last_end == (u64)-1) 577 if (last_end == (u64)-1)
532 goto out; 578 goto out;
533 start = last_end + 1; 579 start = last_end + 1;
534 } else {
535 start = state->start;
536 } 580 }
537 goto search_again; 581 goto search_again;
538 } 582 }
@@ -547,19 +591,30 @@ again:
547 prealloc = alloc_extent_state(GFP_ATOMIC); 591 prealloc = alloc_extent_state(GFP_ATOMIC);
548 err = split_state(tree, state, prealloc, end + 1); 592 err = split_state(tree, state, prealloc, end + 1);
549 BUG_ON(err == -EEXIST); 593 BUG_ON(err == -EEXIST);
550
551 if (wake) 594 if (wake)
552 wake_up(&state->wq); 595 wake_up(&state->wq);
553 set |= clear_state_bit(tree, prealloc, bits, 596
554 wake, delete); 597 set |= clear_state_bit(tree, prealloc, bits, wake, delete);
598
555 prealloc = NULL; 599 prealloc = NULL;
556 goto out; 600 goto out;
557 } 601 }
558 602
603 if (state->end < end && prealloc && !need_resched())
604 next_node = rb_next(&state->rb_node);
605 else
606 next_node = NULL;
607
559 set |= clear_state_bit(tree, state, bits, wake, delete); 608 set |= clear_state_bit(tree, state, bits, wake, delete);
560 if (last_end == (u64)-1) 609 if (last_end == (u64)-1)
561 goto out; 610 goto out;
562 start = last_end + 1; 611 start = last_end + 1;
612 if (start <= end && next_node) {
613 state = rb_entry(next_node, struct extent_state,
614 rb_node);
615 if (state->start == start)
616 goto hit_next;
617 }
563 goto search_again; 618 goto search_again;
564 619
565out: 620out:
@@ -641,40 +696,59 @@ out:
641 return 0; 696 return 0;
642} 697}
643 698
644static void set_state_bits(struct extent_io_tree *tree, 699static int set_state_bits(struct extent_io_tree *tree,
645 struct extent_state *state, 700 struct extent_state *state,
646 int bits) 701 int bits)
647{ 702{
703 int ret;
704
705 ret = set_state_cb(tree, state, bits);
706 if (ret)
707 return ret;
708
648 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 709 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
649 u64 range = state->end - state->start + 1; 710 u64 range = state->end - state->start + 1;
650 tree->dirty_bytes += range; 711 tree->dirty_bytes += range;
651 } 712 }
652 set_state_cb(tree, state, bits);
653 state->state |= bits; 713 state->state |= bits;
714
715 return 0;
716}
717
718static void cache_state(struct extent_state *state,
719 struct extent_state **cached_ptr)
720{
721 if (cached_ptr && !(*cached_ptr)) {
722 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
723 *cached_ptr = state;
724 atomic_inc(&state->refs);
725 }
726 }
654} 727}
655 728
656/* 729/*
657 * set some bits on a range in the tree. This may require allocations 730 * set some bits on a range in the tree. This may require allocations or
658 * or sleeping, so the gfp mask is used to indicate what is allowed. 731 * sleeping, so the gfp mask is used to indicate what is allowed.
659 * 732 *
660 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 733 * If any of the exclusive bits are set, this will fail with -EEXIST if some
661 * range already has the desired bits set. The start of the existing 734 * part of the range already has the desired bits set. The start of the
662 * range is returned in failed_start in this case. 735 * existing range is returned in failed_start in this case.
663 * 736 *
664 * [start, end] is inclusive 737 * [start, end] is inclusive This takes the tree lock.
665 * This takes the tree lock.
666 */ 738 */
739
667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 740static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
668 int bits, int exclusive, u64 *failed_start, 741 int bits, int exclusive_bits, u64 *failed_start,
742 struct extent_state **cached_state,
669 gfp_t mask) 743 gfp_t mask)
670{ 744{
671 struct extent_state *state; 745 struct extent_state *state;
672 struct extent_state *prealloc = NULL; 746 struct extent_state *prealloc = NULL;
673 struct rb_node *node; 747 struct rb_node *node;
674 int err = 0; 748 int err = 0;
675 int set;
676 u64 last_start; 749 u64 last_start;
677 u64 last_end; 750 u64 last_end;
751
678again: 752again:
679 if (!prealloc && (mask & __GFP_WAIT)) { 753 if (!prealloc && (mask & __GFP_WAIT)) {
680 prealloc = alloc_extent_state(mask); 754 prealloc = alloc_extent_state(mask);
@@ -683,6 +757,13 @@ again:
683 } 757 }
684 758
685 spin_lock(&tree->lock); 759 spin_lock(&tree->lock);
760 if (cached_state && *cached_state) {
761 state = *cached_state;
762 if (state->start == start && state->tree) {
763 node = &state->rb_node;
764 goto hit_next;
765 }
766 }
686 /* 767 /*
687 * this search will find all the extents that end after 768 * this search will find all the extents that end after
688 * our range starts. 769 * our range starts.
@@ -694,8 +775,8 @@ again:
694 BUG_ON(err == -EEXIST); 775 BUG_ON(err == -EEXIST);
695 goto out; 776 goto out;
696 } 777 }
697
698 state = rb_entry(node, struct extent_state, rb_node); 778 state = rb_entry(node, struct extent_state, rb_node);
779hit_next:
699 last_start = state->start; 780 last_start = state->start;
700 last_end = state->end; 781 last_end = state->end;
701 782
@@ -706,17 +787,32 @@ again:
706 * Just lock what we found and keep going 787 * Just lock what we found and keep going
707 */ 788 */
708 if (state->start == start && state->end <= end) { 789 if (state->start == start && state->end <= end) {
709 set = state->state & bits; 790 struct rb_node *next_node;
710 if (set && exclusive) { 791 if (state->state & exclusive_bits) {
711 *failed_start = state->start; 792 *failed_start = state->start;
712 err = -EEXIST; 793 err = -EEXIST;
713 goto out; 794 goto out;
714 } 795 }
715 set_state_bits(tree, state, bits); 796
797 err = set_state_bits(tree, state, bits);
798 if (err)
799 goto out;
800
801 cache_state(state, cached_state);
716 merge_state(tree, state); 802 merge_state(tree, state);
717 if (last_end == (u64)-1) 803 if (last_end == (u64)-1)
718 goto out; 804 goto out;
805
719 start = last_end + 1; 806 start = last_end + 1;
807 if (start < end && prealloc && !need_resched()) {
808 next_node = rb_next(node);
809 if (next_node) {
810 state = rb_entry(next_node, struct extent_state,
811 rb_node);
812 if (state->start == start)
813 goto hit_next;
814 }
815 }
720 goto search_again; 816 goto search_again;
721 } 817 }
722 818
@@ -737,8 +833,7 @@ again:
737 * desired bit on it. 833 * desired bit on it.
738 */ 834 */
739 if (state->start < start) { 835 if (state->start < start) {
740 set = state->state & bits; 836 if (state->state & exclusive_bits) {
741 if (exclusive && set) {
742 *failed_start = start; 837 *failed_start = start;
743 err = -EEXIST; 838 err = -EEXIST;
744 goto out; 839 goto out;
@@ -749,13 +844,14 @@ again:
749 if (err) 844 if (err)
750 goto out; 845 goto out;
751 if (state->end <= end) { 846 if (state->end <= end) {
752 set_state_bits(tree, state, bits); 847 err = set_state_bits(tree, state, bits);
848 if (err)
849 goto out;
850 cache_state(state, cached_state);
753 merge_state(tree, state); 851 merge_state(tree, state);
754 if (last_end == (u64)-1) 852 if (last_end == (u64)-1)
755 goto out; 853 goto out;
756 start = last_end + 1; 854 start = last_end + 1;
757 } else {
758 start = state->start;
759 } 855 }
760 goto search_again; 856 goto search_again;
761 } 857 }
@@ -774,10 +870,13 @@ again:
774 this_end = last_start - 1; 870 this_end = last_start - 1;
775 err = insert_state(tree, prealloc, start, this_end, 871 err = insert_state(tree, prealloc, start, this_end,
776 bits); 872 bits);
777 prealloc = NULL;
778 BUG_ON(err == -EEXIST); 873 BUG_ON(err == -EEXIST);
779 if (err) 874 if (err) {
875 prealloc = NULL;
780 goto out; 876 goto out;
877 }
878 cache_state(prealloc, cached_state);
879 prealloc = NULL;
781 start = this_end + 1; 880 start = this_end + 1;
782 goto search_again; 881 goto search_again;
783 } 882 }
@@ -788,8 +887,7 @@ again:
788 * on the first half 887 * on the first half
789 */ 888 */
790 if (state->start <= end && state->end > end) { 889 if (state->start <= end && state->end > end) {
791 set = state->state & bits; 890 if (state->state & exclusive_bits) {
792 if (exclusive && set) {
793 *failed_start = start; 891 *failed_start = start;
794 err = -EEXIST; 892 err = -EEXIST;
795 goto out; 893 goto out;
@@ -797,7 +895,12 @@ again:
797 err = split_state(tree, state, prealloc, end + 1); 895 err = split_state(tree, state, prealloc, end + 1);
798 BUG_ON(err == -EEXIST); 896 BUG_ON(err == -EEXIST);
799 897
800 set_state_bits(tree, prealloc, bits); 898 err = set_state_bits(tree, prealloc, bits);
899 if (err) {
900 prealloc = NULL;
901 goto out;
902 }
903 cache_state(prealloc, cached_state);
801 merge_state(tree, prealloc); 904 merge_state(tree, prealloc);
802 prealloc = NULL; 905 prealloc = NULL;
803 goto out; 906 goto out;
@@ -826,86 +929,65 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
826 gfp_t mask) 929 gfp_t mask)
827{ 930{
828 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 931 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
829 mask); 932 NULL, mask);
830}
831
832int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
833 gfp_t mask)
834{
835 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
836} 933}
837 934
838int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 935int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
839 int bits, gfp_t mask) 936 int bits, gfp_t mask)
840{ 937{
841 return set_extent_bit(tree, start, end, bits, 0, NULL, 938 return set_extent_bit(tree, start, end, bits, 0, NULL,
842 mask); 939 NULL, mask);
843} 940}
844 941
845int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 942int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
846 int bits, gfp_t mask) 943 int bits, gfp_t mask)
847{ 944{
848 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 945 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
849} 946}
850 947
851int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 948int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
852 gfp_t mask) 949 gfp_t mask)
853{ 950{
854 return set_extent_bit(tree, start, end, 951 return set_extent_bit(tree, start, end,
855 EXTENT_DELALLOC | EXTENT_DIRTY, 952 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
856 0, NULL, mask); 953 0, NULL, NULL, mask);
857} 954}
858 955
859int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 956int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
860 gfp_t mask) 957 gfp_t mask)
861{ 958{
862 return clear_extent_bit(tree, start, end, 959 return clear_extent_bit(tree, start, end,
863 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 960 EXTENT_DIRTY | EXTENT_DELALLOC |
864} 961 EXTENT_DO_ACCOUNTING, 0, 0,
865 962 NULL, mask);
866int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
867 gfp_t mask)
868{
869 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
870} 963}
871 964
872int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 965int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
873 gfp_t mask) 966 gfp_t mask)
874{ 967{
875 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 968 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
876 mask); 969 NULL, mask);
877} 970}
878 971
879static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 972static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
880 gfp_t mask) 973 gfp_t mask)
881{ 974{
882 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 975 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
976 NULL, mask);
883} 977}
884 978
885int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 979int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
886 gfp_t mask) 980 gfp_t mask)
887{ 981{
888 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 982 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
889 mask); 983 NULL, mask);
890} 984}
891 985
892static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 986static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
893 u64 end, gfp_t mask) 987 u64 end, gfp_t mask)
894{ 988{
895 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 989 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
896} 990 NULL, mask);
897
898static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
899 gfp_t mask)
900{
901 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
902 0, NULL, mask);
903}
904
905static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
906 u64 end, gfp_t mask)
907{
908 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
909} 991}
910 992
911int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 993int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +999,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
917 * either insert or lock state struct between start and end use mask to tell 999 * either insert or lock state struct between start and end use mask to tell
918 * us if waiting is desired. 1000 * us if waiting is desired.
919 */ 1001 */
920int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 1002int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1003 int bits, struct extent_state **cached_state, gfp_t mask)
921{ 1004{
922 int err; 1005 int err;
923 u64 failed_start; 1006 u64 failed_start;
924 while (1) { 1007 while (1) {
925 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 1008 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
926 &failed_start, mask); 1009 EXTENT_LOCKED, &failed_start,
1010 cached_state, mask);
927 if (err == -EEXIST && (mask & __GFP_WAIT)) { 1011 if (err == -EEXIST && (mask & __GFP_WAIT)) {
928 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 1012 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
929 start = failed_start; 1013 start = failed_start;
@@ -935,27 +1019,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
935 return err; 1019 return err;
936} 1020}
937 1021
1022int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1023{
1024 return lock_extent_bits(tree, start, end, 0, NULL, mask);
1025}
1026
938int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1027int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
939 gfp_t mask) 1028 gfp_t mask)
940{ 1029{
941 int err; 1030 int err;
942 u64 failed_start; 1031 u64 failed_start;
943 1032
944 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 1033 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
945 &failed_start, mask); 1034 &failed_start, NULL, mask);
946 if (err == -EEXIST) { 1035 if (err == -EEXIST) {
947 if (failed_start > start) 1036 if (failed_start > start)
948 clear_extent_bit(tree, start, failed_start - 1, 1037 clear_extent_bit(tree, start, failed_start - 1,
949 EXTENT_LOCKED, 1, 0, mask); 1038 EXTENT_LOCKED, 1, 0, NULL, mask);
950 return 0; 1039 return 0;
951 } 1040 }
952 return 1; 1041 return 1;
953} 1042}
954 1043
1044int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1045 struct extent_state **cached, gfp_t mask)
1046{
1047 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1048 mask);
1049}
1050
955int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1051int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
956 gfp_t mask) 1052 gfp_t mask)
957{ 1053{
958 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 1054 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1055 mask);
959} 1056}
960 1057
961/* 1058/*
@@ -974,7 +1071,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
974 page_cache_release(page); 1071 page_cache_release(page);
975 index++; 1072 index++;
976 } 1073 }
977 set_extent_dirty(tree, start, end, GFP_NOFS);
978 return 0; 1074 return 0;
979} 1075}
980 1076
@@ -994,7 +1090,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
994 page_cache_release(page); 1090 page_cache_release(page);
995 index++; 1091 index++;
996 } 1092 }
997 set_extent_writeback(tree, start, end, GFP_NOFS);
998 return 0; 1093 return 0;
999} 1094}
1000 1095
@@ -1232,6 +1327,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
1232 u64 delalloc_start; 1327 u64 delalloc_start;
1233 u64 delalloc_end; 1328 u64 delalloc_end;
1234 u64 found; 1329 u64 found;
1330 struct extent_state *cached_state = NULL;
1235 int ret; 1331 int ret;
1236 int loops = 0; 1332 int loops = 0;
1237 1333
@@ -1269,6 +1365,7 @@ again:
1269 /* some of the pages are gone, lets avoid looping by 1365 /* some of the pages are gone, lets avoid looping by
1270 * shortening the size of the delalloc range we're searching 1366 * shortening the size of the delalloc range we're searching
1271 */ 1367 */
1368 free_extent_state(cached_state);
1272 if (!loops) { 1369 if (!loops) {
1273 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1370 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1274 max_bytes = PAGE_CACHE_SIZE - offset; 1371 max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1379,21 @@ again:
1282 BUG_ON(ret); 1379 BUG_ON(ret);
1283 1380
1284 /* step three, lock the state bits for the whole range */ 1381 /* step three, lock the state bits for the whole range */
1285 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1382 lock_extent_bits(tree, delalloc_start, delalloc_end,
1383 0, &cached_state, GFP_NOFS);
1286 1384
1287 /* then test to make sure it is all still delalloc */ 1385 /* then test to make sure it is all still delalloc */
1288 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1386 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1289 EXTENT_DELALLOC, 1); 1387 EXTENT_DELALLOC, 1, cached_state);
1290 if (!ret) { 1388 if (!ret) {
1291 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1389 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1390 &cached_state, GFP_NOFS);
1292 __unlock_for_delalloc(inode, locked_page, 1391 __unlock_for_delalloc(inode, locked_page,
1293 delalloc_start, delalloc_end); 1392 delalloc_start, delalloc_end);
1294 cond_resched(); 1393 cond_resched();
1295 goto again; 1394 goto again;
1296 } 1395 }
1396 free_extent_state(cached_state);
1297 *start = delalloc_start; 1397 *start = delalloc_start;
1298 *end = delalloc_end; 1398 *end = delalloc_end;
1299out_failed: 1399out_failed:
@@ -1303,11 +1403,7 @@ out_failed:
1303int extent_clear_unlock_delalloc(struct inode *inode, 1403int extent_clear_unlock_delalloc(struct inode *inode,
1304 struct extent_io_tree *tree, 1404 struct extent_io_tree *tree,
1305 u64 start, u64 end, struct page *locked_page, 1405 u64 start, u64 end, struct page *locked_page,
1306 int unlock_pages, 1406 unsigned long op)
1307 int clear_unlock,
1308 int clear_delalloc, int clear_dirty,
1309 int set_writeback,
1310 int end_writeback)
1311{ 1407{
1312 int ret; 1408 int ret;
1313 struct page *pages[16]; 1409 struct page *pages[16];
@@ -1317,16 +1413,21 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1317 int i; 1413 int i;
1318 int clear_bits = 0; 1414 int clear_bits = 0;
1319 1415
1320 if (clear_unlock) 1416 if (op & EXTENT_CLEAR_UNLOCK)
1321 clear_bits |= EXTENT_LOCKED; 1417 clear_bits |= EXTENT_LOCKED;
1322 if (clear_dirty) 1418 if (op & EXTENT_CLEAR_DIRTY)
1323 clear_bits |= EXTENT_DIRTY; 1419 clear_bits |= EXTENT_DIRTY;
1324 1420
1325 if (clear_delalloc) 1421 if (op & EXTENT_CLEAR_DELALLOC)
1326 clear_bits |= EXTENT_DELALLOC; 1422 clear_bits |= EXTENT_DELALLOC;
1327 1423
1328 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1424 if (op & EXTENT_CLEAR_ACCOUNTING)
1329 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1425 clear_bits |= EXTENT_DO_ACCOUNTING;
1426
1427 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1428 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
1429 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1430 EXTENT_SET_PRIVATE2)))
1330 return 0; 1431 return 0;
1331 1432
1332 while (nr_pages > 0) { 1433 while (nr_pages > 0) {
@@ -1334,17 +1435,21 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1334 min_t(unsigned long, 1435 min_t(unsigned long,
1335 nr_pages, ARRAY_SIZE(pages)), pages); 1436 nr_pages, ARRAY_SIZE(pages)), pages);
1336 for (i = 0; i < ret; i++) { 1437 for (i = 0; i < ret; i++) {
1438
1439 if (op & EXTENT_SET_PRIVATE2)
1440 SetPagePrivate2(pages[i]);
1441
1337 if (pages[i] == locked_page) { 1442 if (pages[i] == locked_page) {
1338 page_cache_release(pages[i]); 1443 page_cache_release(pages[i]);
1339 continue; 1444 continue;
1340 } 1445 }
1341 if (clear_dirty) 1446 if (op & EXTENT_CLEAR_DIRTY)
1342 clear_page_dirty_for_io(pages[i]); 1447 clear_page_dirty_for_io(pages[i]);
1343 if (set_writeback) 1448 if (op & EXTENT_SET_WRITEBACK)
1344 set_page_writeback(pages[i]); 1449 set_page_writeback(pages[i]);
1345 if (end_writeback) 1450 if (op & EXTENT_END_WRITEBACK)
1346 end_page_writeback(pages[i]); 1451 end_page_writeback(pages[i]);
1347 if (unlock_pages) 1452 if (op & EXTENT_CLEAR_UNLOCK_PAGE)
1348 unlock_page(pages[i]); 1453 unlock_page(pages[i]);
1349 page_cache_release(pages[i]); 1454 page_cache_release(pages[i]);
1350 } 1455 }
@@ -1476,14 +1581,17 @@ out:
1476 * range is found set. 1581 * range is found set.
1477 */ 1582 */
1478int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1583int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1479 int bits, int filled) 1584 int bits, int filled, struct extent_state *cached)
1480{ 1585{
1481 struct extent_state *state = NULL; 1586 struct extent_state *state = NULL;
1482 struct rb_node *node; 1587 struct rb_node *node;
1483 int bitset = 0; 1588 int bitset = 0;
1484 1589
1485 spin_lock(&tree->lock); 1590 spin_lock(&tree->lock);
1486 node = tree_search(tree, start); 1591 if (cached && cached->tree && cached->start == start)
1592 node = &cached->rb_node;
1593 else
1594 node = tree_search(tree, start);
1487 while (node && start <= end) { 1595 while (node && start <= end) {
1488 state = rb_entry(node, struct extent_state, rb_node); 1596 state = rb_entry(node, struct extent_state, rb_node);
1489 1597
@@ -1503,6 +1611,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1503 bitset = 0; 1611 bitset = 0;
1504 break; 1612 break;
1505 } 1613 }
1614
1615 if (state->end == (u64)-1)
1616 break;
1617
1506 start = state->end + 1; 1618 start = state->end + 1;
1507 if (start > end) 1619 if (start > end)
1508 break; 1620 break;
@@ -1526,7 +1638,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
1526{ 1638{
1527 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1639 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1528 u64 end = start + PAGE_CACHE_SIZE - 1; 1640 u64 end = start + PAGE_CACHE_SIZE - 1;
1529 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1641 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1530 SetPageUptodate(page); 1642 SetPageUptodate(page);
1531 return 0; 1643 return 0;
1532} 1644}
@@ -1540,7 +1652,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1540{ 1652{
1541 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1653 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1542 u64 end = start + PAGE_CACHE_SIZE - 1; 1654 u64 end = start + PAGE_CACHE_SIZE - 1;
1543 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1655 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1544 unlock_page(page); 1656 unlock_page(page);
1545 return 0; 1657 return 0;
1546} 1658}
@@ -1552,10 +1664,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1552static int check_page_writeback(struct extent_io_tree *tree, 1664static int check_page_writeback(struct extent_io_tree *tree,
1553 struct page *page) 1665 struct page *page)
1554{ 1666{
1555 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1667 end_page_writeback(page);
1556 u64 end = start + PAGE_CACHE_SIZE - 1;
1557 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1558 end_page_writeback(page);
1559 return 0; 1668 return 0;
1560} 1669}
1561 1670
@@ -1613,13 +1722,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1613 } 1722 }
1614 1723
1615 if (!uptodate) { 1724 if (!uptodate) {
1616 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1725 clear_extent_uptodate(tree, start, end, GFP_NOFS);
1617 ClearPageUptodate(page); 1726 ClearPageUptodate(page);
1618 SetPageError(page); 1727 SetPageError(page);
1619 } 1728 }
1620 1729
1621 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1622
1623 if (whole_page) 1730 if (whole_page)
1624 end_page_writeback(page); 1731 end_page_writeback(page);
1625 else 1732 else
@@ -1983,7 +2090,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1983 continue; 2090 continue;
1984 } 2091 }
1985 /* the get_extent function already copied into the page */ 2092 /* the get_extent function already copied into the page */
1986 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 2093 if (test_range_bit(tree, cur, cur_end,
2094 EXTENT_UPTODATE, 1, NULL)) {
1987 check_page_uptodate(tree, page); 2095 check_page_uptodate(tree, page);
1988 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2096 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1989 cur = cur + iosize; 2097 cur = cur + iosize;
@@ -2078,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2078 u64 iosize; 2186 u64 iosize;
2079 u64 unlock_start; 2187 u64 unlock_start;
2080 sector_t sector; 2188 sector_t sector;
2189 struct extent_state *cached_state = NULL;
2081 struct extent_map *em; 2190 struct extent_map *em;
2082 struct block_device *bdev; 2191 struct block_device *bdev;
2083 int ret; 2192 int ret;
@@ -2124,6 +2233,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2124 delalloc_end = 0; 2233 delalloc_end = 0;
2125 page_started = 0; 2234 page_started = 0;
2126 if (!epd->extent_locked) { 2235 if (!epd->extent_locked) {
2236 u64 delalloc_to_write = 0;
2127 /* 2237 /*
2128 * make sure the wbc mapping index is at least updated 2238 * make sure the wbc mapping index is at least updated
2129 * to this page. 2239 * to this page.
@@ -2143,8 +2253,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2143 tree->ops->fill_delalloc(inode, page, delalloc_start, 2253 tree->ops->fill_delalloc(inode, page, delalloc_start,
2144 delalloc_end, &page_started, 2254 delalloc_end, &page_started,
2145 &nr_written); 2255 &nr_written);
2256 /*
2257 * delalloc_end is already one less than the total
2258 * length, so we don't subtract one from
2259 * PAGE_CACHE_SIZE
2260 */
2261 delalloc_to_write += (delalloc_end - delalloc_start +
2262 PAGE_CACHE_SIZE) >>
2263 PAGE_CACHE_SHIFT;
2146 delalloc_start = delalloc_end + 1; 2264 delalloc_start = delalloc_end + 1;
2147 } 2265 }
2266 if (wbc->nr_to_write < delalloc_to_write) {
2267 int thresh = 8192;
2268
2269 if (delalloc_to_write < thresh * 2)
2270 thresh = delalloc_to_write;
2271 wbc->nr_to_write = min_t(u64, delalloc_to_write,
2272 thresh);
2273 }
2148 2274
2149 /* did the fill delalloc function already unlock and start 2275 /* did the fill delalloc function already unlock and start
2150 * the IO? 2276 * the IO?
@@ -2160,15 +2286,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2160 goto done_unlocked; 2286 goto done_unlocked;
2161 } 2287 }
2162 } 2288 }
2163 lock_extent(tree, start, page_end, GFP_NOFS);
2164
2165 unlock_start = start;
2166
2167 if (tree->ops && tree->ops->writepage_start_hook) { 2289 if (tree->ops && tree->ops->writepage_start_hook) {
2168 ret = tree->ops->writepage_start_hook(page, start, 2290 ret = tree->ops->writepage_start_hook(page, start,
2169 page_end); 2291 page_end);
2170 if (ret == -EAGAIN) { 2292 if (ret == -EAGAIN) {
2171 unlock_extent(tree, start, page_end, GFP_NOFS);
2172 redirty_page_for_writepage(wbc, page); 2293 redirty_page_for_writepage(wbc, page);
2173 update_nr_written(page, wbc, nr_written); 2294 update_nr_written(page, wbc, nr_written);
2174 unlock_page(page); 2295 unlock_page(page);
@@ -2184,12 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2184 update_nr_written(page, wbc, nr_written + 1); 2305 update_nr_written(page, wbc, nr_written + 1);
2185 2306
2186 end = page_end; 2307 end = page_end;
2187 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2188 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
2189
2190 if (last_byte <= start) { 2308 if (last_byte <= start) {
2191 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2192 unlock_extent(tree, start, page_end, GFP_NOFS);
2193 if (tree->ops && tree->ops->writepage_end_io_hook) 2309 if (tree->ops && tree->ops->writepage_end_io_hook)
2194 tree->ops->writepage_end_io_hook(page, start, 2310 tree->ops->writepage_end_io_hook(page, start,
2195 page_end, NULL, 1); 2311 page_end, NULL, 1);
@@ -2197,13 +2313,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2197 goto done; 2313 goto done;
2198 } 2314 }
2199 2315
2200 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2201 blocksize = inode->i_sb->s_blocksize; 2316 blocksize = inode->i_sb->s_blocksize;
2202 2317
2203 while (cur <= end) { 2318 while (cur <= end) {
2204 if (cur >= last_byte) { 2319 if (cur >= last_byte) {
2205 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2206 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2207 if (tree->ops && tree->ops->writepage_end_io_hook) 2320 if (tree->ops && tree->ops->writepage_end_io_hook)
2208 tree->ops->writepage_end_io_hook(page, cur, 2321 tree->ops->writepage_end_io_hook(page, cur,
2209 page_end, NULL, 1); 2322 page_end, NULL, 1);
@@ -2235,12 +2348,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2235 */ 2348 */
2236 if (compressed || block_start == EXTENT_MAP_HOLE || 2349 if (compressed || block_start == EXTENT_MAP_HOLE ||
2237 block_start == EXTENT_MAP_INLINE) { 2350 block_start == EXTENT_MAP_INLINE) {
2238 clear_extent_dirty(tree, cur,
2239 cur + iosize - 1, GFP_NOFS);
2240
2241 unlock_extent(tree, unlock_start, cur + iosize - 1,
2242 GFP_NOFS);
2243
2244 /* 2351 /*
2245 * end_io notification does not happen here for 2352 * end_io notification does not happen here for
2246 * compressed extents 2353 * compressed extents
@@ -2265,13 +2372,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2265 } 2372 }
2266 /* leave this out until we have a page_mkwrite call */ 2373 /* leave this out until we have a page_mkwrite call */
2267 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2374 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2268 EXTENT_DIRTY, 0)) { 2375 EXTENT_DIRTY, 0, NULL)) {
2269 cur = cur + iosize; 2376 cur = cur + iosize;
2270 pg_offset += iosize; 2377 pg_offset += iosize;
2271 continue; 2378 continue;
2272 } 2379 }
2273 2380
2274 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2275 if (tree->ops && tree->ops->writepage_io_hook) { 2381 if (tree->ops && tree->ops->writepage_io_hook) {
2276 ret = tree->ops->writepage_io_hook(page, cur, 2382 ret = tree->ops->writepage_io_hook(page, cur,
2277 cur + iosize - 1); 2383 cur + iosize - 1);
@@ -2309,12 +2415,12 @@ done:
2309 set_page_writeback(page); 2415 set_page_writeback(page);
2310 end_page_writeback(page); 2416 end_page_writeback(page);
2311 } 2417 }
2312 if (unlock_start <= page_end)
2313 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2314 unlock_page(page); 2418 unlock_page(page);
2315 2419
2316done_unlocked: 2420done_unlocked:
2317 2421
2422 /* drop our reference on any cached states */
2423 free_extent_state(cached_state);
2318 return 0; 2424 return 0;
2319} 2425}
2320 2426
@@ -2339,9 +2445,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2339 writepage_t writepage, void *data, 2445 writepage_t writepage, void *data,
2340 void (*flush_fn)(void *)) 2446 void (*flush_fn)(void *))
2341{ 2447{
2342 struct backing_dev_info *bdi = mapping->backing_dev_info;
2343 int ret = 0; 2448 int ret = 0;
2344 int done = 0; 2449 int done = 0;
2450 int nr_to_write_done = 0;
2345 struct pagevec pvec; 2451 struct pagevec pvec;
2346 int nr_pages; 2452 int nr_pages;
2347 pgoff_t index; 2453 pgoff_t index;
@@ -2361,7 +2467,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2361 scanned = 1; 2467 scanned = 1;
2362 } 2468 }
2363retry: 2469retry:
2364 while (!done && (index <= end) && 2470 while (!done && !nr_to_write_done && (index <= end) &&
2365 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2471 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2366 PAGECACHE_TAG_DIRTY, min(end - index, 2472 PAGECACHE_TAG_DIRTY, min(end - index,
2367 (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 2473 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2412,12 +2518,15 @@ retry:
2412 unlock_page(page); 2518 unlock_page(page);
2413 ret = 0; 2519 ret = 0;
2414 } 2520 }
2415 if (ret || wbc->nr_to_write <= 0) 2521 if (ret)
2416 done = 1;
2417 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2418 wbc->encountered_congestion = 1;
2419 done = 1; 2522 done = 1;
2420 } 2523
2524 /*
2525 * the filesystem may choose to bump up nr_to_write.
2526 * We have to make sure to honor the new nr_to_write
2527 * at any time
2528 */
2529 nr_to_write_done = wbc->nr_to_write <= 0;
2421 } 2530 }
2422 pagevec_release(&pvec); 2531 pagevec_release(&pvec);
2423 cond_resched(); 2532 cond_resched();
@@ -2604,10 +2713,11 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2604 return 0; 2713 return 0;
2605 2714
2606 lock_extent(tree, start, end, GFP_NOFS); 2715 lock_extent(tree, start, end, GFP_NOFS);
2607 wait_on_extent_writeback(tree, start, end); 2716 wait_on_page_writeback(page);
2608 clear_extent_bit(tree, start, end, 2717 clear_extent_bit(tree, start, end,
2609 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2718 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
2610 1, 1, GFP_NOFS); 2719 EXTENT_DO_ACCOUNTING,
2720 1, 1, NULL, GFP_NOFS);
2611 return 0; 2721 return 0;
2612} 2722}
2613 2723
@@ -2687,7 +2797,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2687 !isnew && !PageUptodate(page) && 2797 !isnew && !PageUptodate(page) &&
2688 (block_off_end > to || block_off_start < from) && 2798 (block_off_end > to || block_off_start < from) &&
2689 !test_range_bit(tree, block_start, cur_end, 2799 !test_range_bit(tree, block_start, cur_end,
2690 EXTENT_UPTODATE, 1)) { 2800 EXTENT_UPTODATE, 1, NULL)) {
2691 u64 sector; 2801 u64 sector;
2692 u64 extent_offset = block_start - em->start; 2802 u64 extent_offset = block_start - em->start;
2693 size_t iosize; 2803 size_t iosize;
@@ -2701,7 +2811,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2701 */ 2811 */
2702 set_extent_bit(tree, block_start, 2812 set_extent_bit(tree, block_start,
2703 block_start + iosize - 1, 2813 block_start + iosize - 1,
2704 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2814 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2705 ret = submit_extent_page(READ, tree, page, 2815 ret = submit_extent_page(READ, tree, page,
2706 sector, iosize, page_offset, em->bdev, 2816 sector, iosize, page_offset, em->bdev,
2707 NULL, 1, 2817 NULL, 1,
@@ -2742,13 +2852,18 @@ int try_release_extent_state(struct extent_map_tree *map,
2742 int ret = 1; 2852 int ret = 1;
2743 2853
2744 if (test_range_bit(tree, start, end, 2854 if (test_range_bit(tree, start, end,
2745 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2855 EXTENT_IOBITS, 0, NULL))
2746 ret = 0; 2856 ret = 0;
2747 else { 2857 else {
2748 if ((mask & GFP_NOFS) == GFP_NOFS) 2858 if ((mask & GFP_NOFS) == GFP_NOFS)
2749 mask = GFP_NOFS; 2859 mask = GFP_NOFS;
2750 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2860 /*
2751 1, 1, mask); 2861 * at this point we can safely clear everything except the
2862 * locked bit and the nodatasum bit
2863 */
2864 clear_extent_bit(tree, start, end,
2865 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2866 0, 0, NULL, mask);
2752 } 2867 }
2753 return ret; 2868 return ret;
2754} 2869}
@@ -2771,29 +2886,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2771 u64 len; 2886 u64 len;
2772 while (start <= end) { 2887 while (start <= end) {
2773 len = end - start + 1; 2888 len = end - start + 1;
2774 spin_lock(&map->lock); 2889 write_lock(&map->lock);
2775 em = lookup_extent_mapping(map, start, len); 2890 em = lookup_extent_mapping(map, start, len);
2776 if (!em || IS_ERR(em)) { 2891 if (!em || IS_ERR(em)) {
2777 spin_unlock(&map->lock); 2892 write_unlock(&map->lock);
2778 break; 2893 break;
2779 } 2894 }
2780 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2895 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2781 em->start != start) { 2896 em->start != start) {
2782 spin_unlock(&map->lock); 2897 write_unlock(&map->lock);
2783 free_extent_map(em); 2898 free_extent_map(em);
2784 break; 2899 break;
2785 } 2900 }
2786 if (!test_range_bit(tree, em->start, 2901 if (!test_range_bit(tree, em->start,
2787 extent_map_end(em) - 1, 2902 extent_map_end(em) - 1,
2788 EXTENT_LOCKED | EXTENT_WRITEBACK | 2903 EXTENT_LOCKED | EXTENT_WRITEBACK,
2789 EXTENT_ORDERED, 2904 0, NULL)) {
2790 0)) {
2791 remove_extent_mapping(map, em); 2905 remove_extent_mapping(map, em);
2792 /* once for the rb tree */ 2906 /* once for the rb tree */
2793 free_extent_map(em); 2907 free_extent_map(em);
2794 } 2908 }
2795 start = extent_map_end(em); 2909 start = extent_map_end(em);
2796 spin_unlock(&map->lock); 2910 write_unlock(&map->lock);
2797 2911
2798 /* once for us */ 2912 /* once for us */
2799 free_extent_map(em); 2913 free_extent_map(em);
@@ -3203,7 +3317,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3203 int uptodate; 3317 int uptodate;
3204 unsigned long index; 3318 unsigned long index;
3205 3319
3206 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3320 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
3207 if (ret) 3321 if (ret)
3208 return 1; 3322 return 1;
3209 while (start <= end) { 3323 while (start <= end) {
@@ -3233,7 +3347,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3233 return 1; 3347 return 1;
3234 3348
3235 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3349 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3236 EXTENT_UPTODATE, 1); 3350 EXTENT_UPTODATE, 1, NULL);
3237 if (ret) 3351 if (ret)
3238 return ret; 3352 return ret;
3239 3353
@@ -3269,7 +3383,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3269 return 0; 3383 return 0;
3270 3384
3271 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3385 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3272 EXTENT_UPTODATE, 1)) { 3386 EXTENT_UPTODATE, 1, NULL)) {
3273 return 0; 3387 return 0;
3274 } 3388 }
3275 3389