diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
| -rw-r--r-- | fs/btrfs/extent_io.c | 935 |
1 files changed, 735 insertions, 200 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11c1efd..be1bf627a14b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "compat.h" | 17 | #include "compat.h" |
| 18 | #include "ctree.h" | 18 | #include "ctree.h" |
| 19 | #include "btrfs_inode.h" | 19 | #include "btrfs_inode.h" |
| 20 | #include "volumes.h" | ||
| 20 | 21 | ||
| 21 | static struct kmem_cache *extent_state_cache; | 22 | static struct kmem_cache *extent_state_cache; |
| 22 | static struct kmem_cache *extent_buffer_cache; | 23 | static struct kmem_cache *extent_buffer_cache; |
| @@ -254,14 +255,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | |||
| 254 | * | 255 | * |
| 255 | * This should be called with the tree lock held. | 256 | * This should be called with the tree lock held. |
| 256 | */ | 257 | */ |
| 257 | static int merge_state(struct extent_io_tree *tree, | 258 | static void merge_state(struct extent_io_tree *tree, |
| 258 | struct extent_state *state) | 259 | struct extent_state *state) |
| 259 | { | 260 | { |
| 260 | struct extent_state *other; | 261 | struct extent_state *other; |
| 261 | struct rb_node *other_node; | 262 | struct rb_node *other_node; |
| 262 | 263 | ||
| 263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 264 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
| 264 | return 0; | 265 | return; |
| 265 | 266 | ||
| 266 | other_node = rb_prev(&state->rb_node); | 267 | other_node = rb_prev(&state->rb_node); |
| 267 | if (other_node) { | 268 | if (other_node) { |
| @@ -281,26 +282,19 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 281 | if (other->start == state->end + 1 && | 282 | if (other->start == state->end + 1 && |
| 282 | other->state == state->state) { | 283 | other->state == state->state) { |
| 283 | merge_cb(tree, state, other); | 284 | merge_cb(tree, state, other); |
| 284 | other->start = state->start; | 285 | state->end = other->end; |
| 285 | state->tree = NULL; | 286 | other->tree = NULL; |
| 286 | rb_erase(&state->rb_node, &tree->state); | 287 | rb_erase(&other->rb_node, &tree->state); |
| 287 | free_extent_state(state); | 288 | free_extent_state(other); |
| 288 | state = NULL; | ||
| 289 | } | 289 | } |
| 290 | } | 290 | } |
| 291 | |||
| 292 | return 0; | ||
| 293 | } | 291 | } |
| 294 | 292 | ||
| 295 | static int set_state_cb(struct extent_io_tree *tree, | 293 | static void set_state_cb(struct extent_io_tree *tree, |
| 296 | struct extent_state *state, int *bits) | 294 | struct extent_state *state, int *bits) |
| 297 | { | 295 | { |
| 298 | if (tree->ops && tree->ops->set_bit_hook) { | 296 | if (tree->ops && tree->ops->set_bit_hook) |
| 299 | return tree->ops->set_bit_hook(tree->mapping->host, | 297 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); |
| 300 | state, bits); | ||
| 301 | } | ||
| 302 | |||
| 303 | return 0; | ||
| 304 | } | 298 | } |
| 305 | 299 | ||
| 306 | static void clear_state_cb(struct extent_io_tree *tree, | 300 | static void clear_state_cb(struct extent_io_tree *tree, |
| @@ -310,6 +304,9 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
| 310 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 304 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| 311 | } | 305 | } |
| 312 | 306 | ||
| 307 | static void set_state_bits(struct extent_io_tree *tree, | ||
| 308 | struct extent_state *state, int *bits); | ||
| 309 | |||
| 313 | /* | 310 | /* |
| 314 | * insert an extent_state struct into the tree. 'bits' are set on the | 311 | * insert an extent_state struct into the tree. 'bits' are set on the |
| 315 | * struct before it is inserted. | 312 | * struct before it is inserted. |
| @@ -325,8 +322,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 325 | int *bits) | 322 | int *bits) |
| 326 | { | 323 | { |
| 327 | struct rb_node *node; | 324 | struct rb_node *node; |
| 328 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
| 329 | int ret; | ||
| 330 | 325 | ||
| 331 | if (end < start) { | 326 | if (end < start) { |
| 332 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 327 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
| @@ -336,13 +331,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 336 | } | 331 | } |
| 337 | state->start = start; | 332 | state->start = start; |
| 338 | state->end = end; | 333 | state->end = end; |
| 339 | ret = set_state_cb(tree, state, bits); | ||
| 340 | if (ret) | ||
| 341 | return ret; | ||
| 342 | 334 | ||
| 343 | if (bits_to_set & EXTENT_DIRTY) | 335 | set_state_bits(tree, state, bits); |
| 344 | tree->dirty_bytes += end - start + 1; | 336 | |
| 345 | state->state |= bits_to_set; | ||
| 346 | node = tree_insert(&tree->state, end, &state->rb_node); | 337 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 347 | if (node) { | 338 | if (node) { |
| 348 | struct extent_state *found; | 339 | struct extent_state *found; |
| @@ -351,7 +342,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 351 | "%llu %llu\n", (unsigned long long)found->start, | 342 | "%llu %llu\n", (unsigned long long)found->start, |
| 352 | (unsigned long long)found->end, | 343 | (unsigned long long)found->end, |
| 353 | (unsigned long long)start, (unsigned long long)end); | 344 | (unsigned long long)start, (unsigned long long)end); |
| 354 | free_extent_state(state); | ||
| 355 | return -EEXIST; | 345 | return -EEXIST; |
| 356 | } | 346 | } |
| 357 | state->tree = tree; | 347 | state->tree = tree; |
| @@ -359,13 +349,11 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 359 | return 0; | 349 | return 0; |
| 360 | } | 350 | } |
| 361 | 351 | ||
| 362 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | 352 | static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, |
| 363 | u64 split) | 353 | u64 split) |
| 364 | { | 354 | { |
| 365 | if (tree->ops && tree->ops->split_extent_hook) | 355 | if (tree->ops && tree->ops->split_extent_hook) |
| 366 | return tree->ops->split_extent_hook(tree->mapping->host, | 356 | tree->ops->split_extent_hook(tree->mapping->host, orig, split); |
| 367 | orig, split); | ||
| 368 | return 0; | ||
| 369 | } | 357 | } |
| 370 | 358 | ||
| 371 | /* | 359 | /* |
| @@ -500,7 +488,8 @@ again: | |||
| 500 | cached_state = NULL; | 488 | cached_state = NULL; |
| 501 | } | 489 | } |
| 502 | 490 | ||
| 503 | if (cached && cached->tree && cached->start == start) { | 491 | if (cached && cached->tree && cached->start <= start && |
| 492 | cached->end > start) { | ||
| 504 | if (clear) | 493 | if (clear) |
| 505 | atomic_dec(&cached->refs); | 494 | atomic_dec(&cached->refs); |
| 506 | state = cached; | 495 | state = cached; |
| @@ -660,34 +649,25 @@ again: | |||
| 660 | if (start > end) | 649 | if (start > end) |
| 661 | break; | 650 | break; |
| 662 | 651 | ||
| 663 | if (need_resched()) { | 652 | cond_resched_lock(&tree->lock); |
| 664 | spin_unlock(&tree->lock); | ||
| 665 | cond_resched(); | ||
| 666 | spin_lock(&tree->lock); | ||
| 667 | } | ||
| 668 | } | 653 | } |
| 669 | out: | 654 | out: |
| 670 | spin_unlock(&tree->lock); | 655 | spin_unlock(&tree->lock); |
| 671 | return 0; | 656 | return 0; |
| 672 | } | 657 | } |
| 673 | 658 | ||
| 674 | static int set_state_bits(struct extent_io_tree *tree, | 659 | static void set_state_bits(struct extent_io_tree *tree, |
| 675 | struct extent_state *state, | 660 | struct extent_state *state, |
| 676 | int *bits) | 661 | int *bits) |
| 677 | { | 662 | { |
| 678 | int ret; | ||
| 679 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | 663 | int bits_to_set = *bits & ~EXTENT_CTLBITS; |
| 680 | 664 | ||
| 681 | ret = set_state_cb(tree, state, bits); | 665 | set_state_cb(tree, state, bits); |
| 682 | if (ret) | ||
| 683 | return ret; | ||
| 684 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 666 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
| 685 | u64 range = state->end - state->start + 1; | 667 | u64 range = state->end - state->start + 1; |
| 686 | tree->dirty_bytes += range; | 668 | tree->dirty_bytes += range; |
| 687 | } | 669 | } |
| 688 | state->state |= bits_to_set; | 670 | state->state |= bits_to_set; |
| 689 | |||
| 690 | return 0; | ||
| 691 | } | 671 | } |
| 692 | 672 | ||
| 693 | static void cache_state(struct extent_state *state, | 673 | static void cache_state(struct extent_state *state, |
| @@ -742,7 +722,8 @@ again: | |||
| 742 | spin_lock(&tree->lock); | 722 | spin_lock(&tree->lock); |
| 743 | if (cached_state && *cached_state) { | 723 | if (cached_state && *cached_state) { |
| 744 | state = *cached_state; | 724 | state = *cached_state; |
| 745 | if (state->start == start && state->tree) { | 725 | if (state->start <= start && state->end > start && |
| 726 | state->tree) { | ||
| 746 | node = &state->rb_node; | 727 | node = &state->rb_node; |
| 747 | goto hit_next; | 728 | goto hit_next; |
| 748 | } | 729 | } |
| @@ -779,17 +760,15 @@ hit_next: | |||
| 779 | goto out; | 760 | goto out; |
| 780 | } | 761 | } |
| 781 | 762 | ||
| 782 | err = set_state_bits(tree, state, &bits); | 763 | set_state_bits(tree, state, &bits); |
| 783 | if (err) | ||
| 784 | goto out; | ||
| 785 | 764 | ||
| 786 | next_node = rb_next(node); | ||
| 787 | cache_state(state, cached_state); | 765 | cache_state(state, cached_state); |
| 788 | merge_state(tree, state); | 766 | merge_state(tree, state); |
| 789 | if (last_end == (u64)-1) | 767 | if (last_end == (u64)-1) |
| 790 | goto out; | 768 | goto out; |
| 791 | 769 | ||
| 792 | start = last_end + 1; | 770 | start = last_end + 1; |
| 771 | next_node = rb_next(&state->rb_node); | ||
| 793 | if (next_node && start < end && prealloc && !need_resched()) { | 772 | if (next_node && start < end && prealloc && !need_resched()) { |
| 794 | state = rb_entry(next_node, struct extent_state, | 773 | state = rb_entry(next_node, struct extent_state, |
| 795 | rb_node); | 774 | rb_node); |
| @@ -830,9 +809,7 @@ hit_next: | |||
| 830 | if (err) | 809 | if (err) |
| 831 | goto out; | 810 | goto out; |
| 832 | if (state->end <= end) { | 811 | if (state->end <= end) { |
| 833 | err = set_state_bits(tree, state, &bits); | 812 | set_state_bits(tree, state, &bits); |
| 834 | if (err) | ||
| 835 | goto out; | ||
| 836 | cache_state(state, cached_state); | 813 | cache_state(state, cached_state); |
| 837 | merge_state(tree, state); | 814 | merge_state(tree, state); |
| 838 | if (last_end == (u64)-1) | 815 | if (last_end == (u64)-1) |
| @@ -862,7 +839,6 @@ hit_next: | |||
| 862 | * Avoid to free 'prealloc' if it can be merged with | 839 | * Avoid to free 'prealloc' if it can be merged with |
| 863 | * the later extent. | 840 | * the later extent. |
| 864 | */ | 841 | */ |
| 865 | atomic_inc(&prealloc->refs); | ||
| 866 | err = insert_state(tree, prealloc, start, this_end, | 842 | err = insert_state(tree, prealloc, start, this_end, |
| 867 | &bits); | 843 | &bits); |
| 868 | BUG_ON(err == -EEXIST); | 844 | BUG_ON(err == -EEXIST); |
| @@ -872,7 +848,6 @@ hit_next: | |||
| 872 | goto out; | 848 | goto out; |
| 873 | } | 849 | } |
| 874 | cache_state(prealloc, cached_state); | 850 | cache_state(prealloc, cached_state); |
| 875 | free_extent_state(prealloc); | ||
| 876 | prealloc = NULL; | 851 | prealloc = NULL; |
| 877 | start = this_end + 1; | 852 | start = this_end + 1; |
| 878 | goto search_again; | 853 | goto search_again; |
| @@ -895,12 +870,196 @@ hit_next: | |||
| 895 | err = split_state(tree, state, prealloc, end + 1); | 870 | err = split_state(tree, state, prealloc, end + 1); |
| 896 | BUG_ON(err == -EEXIST); | 871 | BUG_ON(err == -EEXIST); |
| 897 | 872 | ||
| 898 | err = set_state_bits(tree, prealloc, &bits); | 873 | set_state_bits(tree, prealloc, &bits); |
| 874 | cache_state(prealloc, cached_state); | ||
| 875 | merge_state(tree, prealloc); | ||
| 876 | prealloc = NULL; | ||
| 877 | goto out; | ||
| 878 | } | ||
| 879 | |||
| 880 | goto search_again; | ||
| 881 | |||
| 882 | out: | ||
| 883 | spin_unlock(&tree->lock); | ||
| 884 | if (prealloc) | ||
| 885 | free_extent_state(prealloc); | ||
| 886 | |||
| 887 | return err; | ||
| 888 | |||
| 889 | search_again: | ||
| 890 | if (start > end) | ||
| 891 | goto out; | ||
| 892 | spin_unlock(&tree->lock); | ||
| 893 | if (mask & __GFP_WAIT) | ||
| 894 | cond_resched(); | ||
| 895 | goto again; | ||
| 896 | } | ||
| 897 | |||
| 898 | /** | ||
| 899 | * convert_extent - convert all bits in a given range from one bit to another | ||
| 900 | * @tree: the io tree to search | ||
| 901 | * @start: the start offset in bytes | ||
| 902 | * @end: the end offset in bytes (inclusive) | ||
| 903 | * @bits: the bits to set in this range | ||
| 904 | * @clear_bits: the bits to clear in this range | ||
| 905 | * @mask: the allocation mask | ||
| 906 | * | ||
| 907 | * This will go through and set bits for the given range. If any states exist | ||
| 908 | * already in this range they are set with the given bit and cleared of the | ||
| 909 | * clear_bits. This is only meant to be used by things that are mergeable, ie | ||
| 910 | * converting from say DELALLOC to DIRTY. This is not meant to be used with | ||
| 911 | * boundary bits like LOCK. | ||
| 912 | */ | ||
| 913 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 914 | int bits, int clear_bits, gfp_t mask) | ||
| 915 | { | ||
| 916 | struct extent_state *state; | ||
| 917 | struct extent_state *prealloc = NULL; | ||
| 918 | struct rb_node *node; | ||
| 919 | int err = 0; | ||
| 920 | u64 last_start; | ||
| 921 | u64 last_end; | ||
| 922 | |||
| 923 | again: | ||
| 924 | if (!prealloc && (mask & __GFP_WAIT)) { | ||
| 925 | prealloc = alloc_extent_state(mask); | ||
| 926 | if (!prealloc) | ||
| 927 | return -ENOMEM; | ||
| 928 | } | ||
| 929 | |||
| 930 | spin_lock(&tree->lock); | ||
| 931 | /* | ||
| 932 | * this search will find all the extents that end after | ||
| 933 | * our range starts. | ||
| 934 | */ | ||
| 935 | node = tree_search(tree, start); | ||
| 936 | if (!node) { | ||
| 937 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 938 | if (!prealloc) | ||
| 939 | return -ENOMEM; | ||
| 940 | err = insert_state(tree, prealloc, start, end, &bits); | ||
| 941 | prealloc = NULL; | ||
| 942 | BUG_ON(err == -EEXIST); | ||
| 943 | goto out; | ||
| 944 | } | ||
| 945 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 946 | hit_next: | ||
| 947 | last_start = state->start; | ||
| 948 | last_end = state->end; | ||
| 949 | |||
| 950 | /* | ||
| 951 | * | ---- desired range ---- | | ||
| 952 | * | state | | ||
| 953 | * | ||
| 954 | * Just lock what we found and keep going | ||
| 955 | */ | ||
| 956 | if (state->start == start && state->end <= end) { | ||
| 957 | struct rb_node *next_node; | ||
| 958 | |||
| 959 | set_state_bits(tree, state, &bits); | ||
| 960 | clear_state_bit(tree, state, &clear_bits, 0); | ||
| 961 | |||
| 962 | merge_state(tree, state); | ||
| 963 | if (last_end == (u64)-1) | ||
| 964 | goto out; | ||
| 965 | |||
| 966 | start = last_end + 1; | ||
| 967 | next_node = rb_next(&state->rb_node); | ||
| 968 | if (next_node && start < end && prealloc && !need_resched()) { | ||
| 969 | state = rb_entry(next_node, struct extent_state, | ||
| 970 | rb_node); | ||
| 971 | if (state->start == start) | ||
| 972 | goto hit_next; | ||
| 973 | } | ||
| 974 | goto search_again; | ||
| 975 | } | ||
| 976 | |||
| 977 | /* | ||
| 978 | * | ---- desired range ---- | | ||
| 979 | * | state | | ||
| 980 | * or | ||
| 981 | * | ------------- state -------------- | | ||
| 982 | * | ||
| 983 | * We need to split the extent we found, and may flip bits on | ||
| 984 | * second half. | ||
| 985 | * | ||
| 986 | * If the extent we found extends past our | ||
| 987 | * range, we just split and search again. It'll get split | ||
| 988 | * again the next time though. | ||
| 989 | * | ||
| 990 | * If the extent we found is inside our range, we set the | ||
| 991 | * desired bit on it. | ||
| 992 | */ | ||
| 993 | if (state->start < start) { | ||
| 994 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 995 | if (!prealloc) | ||
| 996 | return -ENOMEM; | ||
| 997 | err = split_state(tree, state, prealloc, start); | ||
| 998 | BUG_ON(err == -EEXIST); | ||
| 999 | prealloc = NULL; | ||
| 1000 | if (err) | ||
| 1001 | goto out; | ||
| 1002 | if (state->end <= end) { | ||
| 1003 | set_state_bits(tree, state, &bits); | ||
| 1004 | clear_state_bit(tree, state, &clear_bits, 0); | ||
| 1005 | merge_state(tree, state); | ||
| 1006 | if (last_end == (u64)-1) | ||
| 1007 | goto out; | ||
| 1008 | start = last_end + 1; | ||
| 1009 | } | ||
| 1010 | goto search_again; | ||
| 1011 | } | ||
| 1012 | /* | ||
| 1013 | * | ---- desired range ---- | | ||
| 1014 | * | state | or | state | | ||
| 1015 | * | ||
| 1016 | * There's a hole, we need to insert something in it and | ||
| 1017 | * ignore the extent we found. | ||
| 1018 | */ | ||
| 1019 | if (state->start > start) { | ||
| 1020 | u64 this_end; | ||
| 1021 | if (end < last_start) | ||
| 1022 | this_end = end; | ||
| 1023 | else | ||
| 1024 | this_end = last_start - 1; | ||
| 1025 | |||
| 1026 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 1027 | if (!prealloc) | ||
| 1028 | return -ENOMEM; | ||
| 1029 | |||
| 1030 | /* | ||
| 1031 | * Avoid to free 'prealloc' if it can be merged with | ||
| 1032 | * the later extent. | ||
| 1033 | */ | ||
| 1034 | err = insert_state(tree, prealloc, start, this_end, | ||
| 1035 | &bits); | ||
| 1036 | BUG_ON(err == -EEXIST); | ||
| 899 | if (err) { | 1037 | if (err) { |
| 1038 | free_extent_state(prealloc); | ||
| 900 | prealloc = NULL; | 1039 | prealloc = NULL; |
| 901 | goto out; | 1040 | goto out; |
| 902 | } | 1041 | } |
| 903 | cache_state(prealloc, cached_state); | 1042 | prealloc = NULL; |
| 1043 | start = this_end + 1; | ||
| 1044 | goto search_again; | ||
| 1045 | } | ||
| 1046 | /* | ||
| 1047 | * | ---- desired range ---- | | ||
| 1048 | * | state | | ||
| 1049 | * We need to split the extent, and set the bit | ||
| 1050 | * on the first half | ||
| 1051 | */ | ||
| 1052 | if (state->start <= end && state->end > end) { | ||
| 1053 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 1054 | if (!prealloc) | ||
| 1055 | return -ENOMEM; | ||
| 1056 | |||
| 1057 | err = split_state(tree, state, prealloc, end + 1); | ||
| 1058 | BUG_ON(err == -EEXIST); | ||
| 1059 | |||
| 1060 | set_state_bits(tree, prealloc, &bits); | ||
| 1061 | clear_state_bit(tree, prealloc, &clear_bits, 0); | ||
| 1062 | |||
| 904 | merge_state(tree, prealloc); | 1063 | merge_state(tree, prealloc); |
| 905 | prealloc = NULL; | 1064 | prealloc = NULL; |
| 906 | goto out; | 1065 | goto out; |
| @@ -949,7 +1108,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 949 | struct extent_state **cached_state, gfp_t mask) | 1108 | struct extent_state **cached_state, gfp_t mask) |
| 950 | { | 1109 | { |
| 951 | return set_extent_bit(tree, start, end, | 1110 | return set_extent_bit(tree, start, end, |
| 952 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, | 1111 | EXTENT_DELALLOC | EXTENT_UPTODATE, |
| 953 | 0, NULL, cached_state, mask); | 1112 | 0, NULL, cached_state, mask); |
| 954 | } | 1113 | } |
| 955 | 1114 | ||
| @@ -1061,46 +1220,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 1061 | return 0; | 1220 | return 0; |
| 1062 | } | 1221 | } |
| 1063 | 1222 | ||
| 1064 | /* | ||
| 1065 | * find the first offset in the io tree with 'bits' set. zero is | ||
| 1066 | * returned if we find something, and *start_ret and *end_ret are | ||
| 1067 | * set to reflect the state struct that was found. | ||
| 1068 | * | ||
| 1069 | * If nothing was found, 1 is returned, < 0 on error | ||
| 1070 | */ | ||
| 1071 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
| 1072 | u64 *start_ret, u64 *end_ret, int bits) | ||
| 1073 | { | ||
| 1074 | struct rb_node *node; | ||
| 1075 | struct extent_state *state; | ||
| 1076 | int ret = 1; | ||
| 1077 | |||
| 1078 | spin_lock(&tree->lock); | ||
| 1079 | /* | ||
| 1080 | * this search will find all the extents that end after | ||
| 1081 | * our range starts. | ||
| 1082 | */ | ||
| 1083 | node = tree_search(tree, start); | ||
| 1084 | if (!node) | ||
| 1085 | goto out; | ||
| 1086 | |||
| 1087 | while (1) { | ||
| 1088 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1089 | if (state->end >= start && (state->state & bits)) { | ||
| 1090 | *start_ret = state->start; | ||
| 1091 | *end_ret = state->end; | ||
| 1092 | ret = 0; | ||
| 1093 | break; | ||
| 1094 | } | ||
| 1095 | node = rb_next(node); | ||
| 1096 | if (!node) | ||
| 1097 | break; | ||
| 1098 | } | ||
| 1099 | out: | ||
| 1100 | spin_unlock(&tree->lock); | ||
| 1101 | return ret; | ||
| 1102 | } | ||
| 1103 | |||
| 1104 | /* find the first state struct with 'bits' set after 'start', and | 1223 | /* find the first state struct with 'bits' set after 'start', and |
| 1105 | * return it. tree->lock must be held. NULL will returned if | 1224 | * return it. tree->lock must be held. NULL will returned if |
| 1106 | * nothing was found after 'start' | 1225 | * nothing was found after 'start' |
| @@ -1133,6 +1252,30 @@ out: | |||
| 1133 | } | 1252 | } |
| 1134 | 1253 | ||
| 1135 | /* | 1254 | /* |
| 1255 | * find the first offset in the io tree with 'bits' set. zero is | ||
| 1256 | * returned if we find something, and *start_ret and *end_ret are | ||
| 1257 | * set to reflect the state struct that was found. | ||
| 1258 | * | ||
| 1259 | * If nothing was found, 1 is returned, < 0 on error | ||
| 1260 | */ | ||
| 1261 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
| 1262 | u64 *start_ret, u64 *end_ret, int bits) | ||
| 1263 | { | ||
| 1264 | struct extent_state *state; | ||
| 1265 | int ret = 1; | ||
| 1266 | |||
| 1267 | spin_lock(&tree->lock); | ||
| 1268 | state = find_first_extent_bit_state(tree, start, bits); | ||
| 1269 | if (state) { | ||
| 1270 | *start_ret = state->start; | ||
| 1271 | *end_ret = state->end; | ||
| 1272 | ret = 0; | ||
| 1273 | } | ||
| 1274 | spin_unlock(&tree->lock); | ||
| 1275 | return ret; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | /* | ||
| 1136 | * find a contiguous range of bytes in the file marked as delalloc, not | 1279 | * find a contiguous range of bytes in the file marked as delalloc, not |
| 1137 | * more than 'max_bytes'. start and end are used to return the range, | 1280 | * more than 'max_bytes'. start and end are used to return the range, |
| 1138 | * | 1281 | * |
| @@ -1564,7 +1707,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 1564 | int bitset = 0; | 1707 | int bitset = 0; |
| 1565 | 1708 | ||
| 1566 | spin_lock(&tree->lock); | 1709 | spin_lock(&tree->lock); |
| 1567 | if (cached && cached->tree && cached->start == start) | 1710 | if (cached && cached->tree && cached->start <= start && |
| 1711 | cached->end > start) | ||
| 1568 | node = &cached->rb_node; | 1712 | node = &cached->rb_node; |
| 1569 | else | 1713 | else |
| 1570 | node = tree_search(tree, start); | 1714 | node = tree_search(tree, start); |
| @@ -1644,6 +1788,368 @@ static int check_page_writeback(struct extent_io_tree *tree, | |||
| 1644 | return 0; | 1788 | return 0; |
| 1645 | } | 1789 | } |
| 1646 | 1790 | ||
| 1791 | /* | ||
| 1792 | * When IO fails, either with EIO or csum verification fails, we | ||
| 1793 | * try other mirrors that might have a good copy of the data. This | ||
| 1794 | * io_failure_record is used to record state as we go through all the | ||
| 1795 | * mirrors. If another mirror has good data, the page is set up to date | ||
| 1796 | * and things continue. If a good mirror can't be found, the original | ||
| 1797 | * bio end_io callback is called to indicate things have failed. | ||
| 1798 | */ | ||
| 1799 | struct io_failure_record { | ||
| 1800 | struct page *page; | ||
| 1801 | u64 start; | ||
| 1802 | u64 len; | ||
| 1803 | u64 logical; | ||
| 1804 | unsigned long bio_flags; | ||
| 1805 | int this_mirror; | ||
| 1806 | int failed_mirror; | ||
| 1807 | int in_validation; | ||
| 1808 | }; | ||
| 1809 | |||
| 1810 | static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | ||
| 1811 | int did_repair) | ||
| 1812 | { | ||
| 1813 | int ret; | ||
| 1814 | int err = 0; | ||
| 1815 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
| 1816 | |||
| 1817 | set_state_private(failure_tree, rec->start, 0); | ||
| 1818 | ret = clear_extent_bits(failure_tree, rec->start, | ||
| 1819 | rec->start + rec->len - 1, | ||
| 1820 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
| 1821 | if (ret) | ||
| 1822 | err = ret; | ||
| 1823 | |||
| 1824 | if (did_repair) { | ||
| 1825 | ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start, | ||
| 1826 | rec->start + rec->len - 1, | ||
| 1827 | EXTENT_DAMAGED, GFP_NOFS); | ||
| 1828 | if (ret && !err) | ||
| 1829 | err = ret; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | kfree(rec); | ||
| 1833 | return err; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | static void repair_io_failure_callback(struct bio *bio, int err) | ||
| 1837 | { | ||
| 1838 | complete(bio->bi_private); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * this bypasses the standard btrfs submit functions deliberately, as | ||
| 1843 | * the standard behavior is to write all copies in a raid setup. here we only | ||
| 1844 | * want to write the one bad copy. so we do the mapping for ourselves and issue | ||
| 1845 | * submit_bio directly. | ||
| 1846 | * to avoid any synchonization issues, wait for the data after writing, which | ||
| 1847 | * actually prevents the read that triggered the error from finishing. | ||
| 1848 | * currently, there can be no more than two copies of every data bit. thus, | ||
| 1849 | * exactly one rewrite is required. | ||
| 1850 | */ | ||
| 1851 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | ||
| 1852 | u64 length, u64 logical, struct page *page, | ||
| 1853 | int mirror_num) | ||
| 1854 | { | ||
| 1855 | struct bio *bio; | ||
| 1856 | struct btrfs_device *dev; | ||
| 1857 | DECLARE_COMPLETION_ONSTACK(compl); | ||
| 1858 | u64 map_length = 0; | ||
| 1859 | u64 sector; | ||
| 1860 | struct btrfs_bio *bbio = NULL; | ||
| 1861 | int ret; | ||
| 1862 | |||
| 1863 | BUG_ON(!mirror_num); | ||
| 1864 | |||
| 1865 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 1866 | if (!bio) | ||
| 1867 | return -EIO; | ||
| 1868 | bio->bi_private = &compl; | ||
| 1869 | bio->bi_end_io = repair_io_failure_callback; | ||
| 1870 | bio->bi_size = 0; | ||
| 1871 | map_length = length; | ||
| 1872 | |||
| 1873 | ret = btrfs_map_block(map_tree, WRITE, logical, | ||
| 1874 | &map_length, &bbio, mirror_num); | ||
| 1875 | if (ret) { | ||
| 1876 | bio_put(bio); | ||
| 1877 | return -EIO; | ||
| 1878 | } | ||
| 1879 | BUG_ON(mirror_num != bbio->mirror_num); | ||
| 1880 | sector = bbio->stripes[mirror_num-1].physical >> 9; | ||
| 1881 | bio->bi_sector = sector; | ||
| 1882 | dev = bbio->stripes[mirror_num-1].dev; | ||
| 1883 | kfree(bbio); | ||
| 1884 | if (!dev || !dev->bdev || !dev->writeable) { | ||
| 1885 | bio_put(bio); | ||
| 1886 | return -EIO; | ||
| 1887 | } | ||
| 1888 | bio->bi_bdev = dev->bdev; | ||
| 1889 | bio_add_page(bio, page, length, start-page_offset(page)); | ||
| 1890 | submit_bio(WRITE_SYNC, bio); | ||
| 1891 | wait_for_completion(&compl); | ||
| 1892 | |||
| 1893 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
| 1894 | /* try to remap that extent elsewhere? */ | ||
| 1895 | bio_put(bio); | ||
| 1896 | return -EIO; | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " | ||
| 1900 | "sector %llu)\n", page->mapping->host->i_ino, start, | ||
| 1901 | dev->name, sector); | ||
| 1902 | |||
| 1903 | bio_put(bio); | ||
| 1904 | return 0; | ||
| 1905 | } | ||
| 1906 | |||
| 1907 | /* | ||
| 1908 | * each time an IO finishes, we do a fast check in the IO failure tree | ||
| 1909 | * to see if we need to process or clean up an io_failure_record | ||
| 1910 | */ | ||
| 1911 | static int clean_io_failure(u64 start, struct page *page) | ||
| 1912 | { | ||
| 1913 | u64 private; | ||
| 1914 | u64 private_failure; | ||
| 1915 | struct io_failure_record *failrec; | ||
| 1916 | struct btrfs_mapping_tree *map_tree; | ||
| 1917 | struct extent_state *state; | ||
| 1918 | int num_copies; | ||
| 1919 | int did_repair = 0; | ||
| 1920 | int ret; | ||
| 1921 | struct inode *inode = page->mapping->host; | ||
| 1922 | |||
| 1923 | private = 0; | ||
| 1924 | ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
| 1925 | (u64)-1, 1, EXTENT_DIRTY, 0); | ||
| 1926 | if (!ret) | ||
| 1927 | return 0; | ||
| 1928 | |||
| 1929 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, | ||
| 1930 | &private_failure); | ||
| 1931 | if (ret) | ||
| 1932 | return 0; | ||
| 1933 | |||
| 1934 | failrec = (struct io_failure_record *)(unsigned long) private_failure; | ||
| 1935 | BUG_ON(!failrec->this_mirror); | ||
| 1936 | |||
| 1937 | if (failrec->in_validation) { | ||
| 1938 | /* there was no real error, just free the record */ | ||
| 1939 | pr_debug("clean_io_failure: freeing dummy error at %llu\n", | ||
| 1940 | failrec->start); | ||
| 1941 | did_repair = 1; | ||
| 1942 | goto out; | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | spin_lock(&BTRFS_I(inode)->io_tree.lock); | ||
| 1946 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, | ||
| 1947 | failrec->start, | ||
| 1948 | EXTENT_LOCKED); | ||
| 1949 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); | ||
| 1950 | |||
| 1951 | if (state && state->start == failrec->start) { | ||
| 1952 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | ||
| 1953 | num_copies = btrfs_num_copies(map_tree, failrec->logical, | ||
| 1954 | failrec->len); | ||
| 1955 | if (num_copies > 1) { | ||
| 1956 | ret = repair_io_failure(map_tree, start, failrec->len, | ||
| 1957 | failrec->logical, page, | ||
| 1958 | failrec->failed_mirror); | ||
| 1959 | did_repair = !ret; | ||
| 1960 | } | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | out: | ||
| 1964 | if (!ret) | ||
| 1965 | ret = free_io_failure(inode, failrec, did_repair); | ||
| 1966 | |||
| 1967 | return ret; | ||
| 1968 | } | ||
| 1969 | |||
| 1970 | /* | ||
| 1971 | * this is a generic handler for readpage errors (default | ||
| 1972 | * readpage_io_failed_hook). if other copies exist, read those and write back | ||
| 1973 | * good data to the failed position. does not investigate in remapping the | ||
| 1974 | * failed extent elsewhere, hoping the device will be smart enough to do this as | ||
| 1975 | * needed | ||
| 1976 | */ | ||
| 1977 | |||
| 1978 | static int bio_readpage_error(struct bio *failed_bio, struct page *page, | ||
| 1979 | u64 start, u64 end, int failed_mirror, | ||
| 1980 | struct extent_state *state) | ||
| 1981 | { | ||
| 1982 | struct io_failure_record *failrec = NULL; | ||
| 1983 | u64 private; | ||
| 1984 | struct extent_map *em; | ||
| 1985 | struct inode *inode = page->mapping->host; | ||
| 1986 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
| 1987 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
| 1988 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1989 | struct bio *bio; | ||
| 1990 | int num_copies; | ||
| 1991 | int ret; | ||
| 1992 | int read_mode; | ||
| 1993 | u64 logical; | ||
| 1994 | |||
| 1995 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
| 1996 | |||
| 1997 | ret = get_state_private(failure_tree, start, &private); | ||
| 1998 | if (ret) { | ||
| 1999 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); | ||
| 2000 | if (!failrec) | ||
| 2001 | return -ENOMEM; | ||
| 2002 | failrec->start = start; | ||
| 2003 | failrec->len = end - start + 1; | ||
| 2004 | failrec->this_mirror = 0; | ||
| 2005 | failrec->bio_flags = 0; | ||
| 2006 | failrec->in_validation = 0; | ||
| 2007 | |||
| 2008 | read_lock(&em_tree->lock); | ||
| 2009 | em = lookup_extent_mapping(em_tree, start, failrec->len); | ||
| 2010 | if (!em) { | ||
| 2011 | read_unlock(&em_tree->lock); | ||
| 2012 | kfree(failrec); | ||
| 2013 | return -EIO; | ||
| 2014 | } | ||
| 2015 | |||
| 2016 | if (em->start > start || em->start + em->len < start) { | ||
| 2017 | free_extent_map(em); | ||
| 2018 | em = NULL; | ||
| 2019 | } | ||
| 2020 | read_unlock(&em_tree->lock); | ||
| 2021 | |||
| 2022 | if (!em || IS_ERR(em)) { | ||
| 2023 | kfree(failrec); | ||
| 2024 | return -EIO; | ||
| 2025 | } | ||
| 2026 | logical = start - em->start; | ||
| 2027 | logical = em->block_start + logical; | ||
| 2028 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | ||
| 2029 | logical = em->block_start; | ||
| 2030 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | ||
| 2031 | extent_set_compress_type(&failrec->bio_flags, | ||
| 2032 | em->compress_type); | ||
| 2033 | } | ||
| 2034 | pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " | ||
| 2035 | "len=%llu\n", logical, start, failrec->len); | ||
| 2036 | failrec->logical = logical; | ||
| 2037 | free_extent_map(em); | ||
| 2038 | |||
| 2039 | /* set the bits in the private failure tree */ | ||
| 2040 | ret = set_extent_bits(failure_tree, start, end, | ||
| 2041 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
| 2042 | if (ret >= 0) | ||
| 2043 | ret = set_state_private(failure_tree, start, | ||
| 2044 | (u64)(unsigned long)failrec); | ||
| 2045 | /* set the bits in the inode's tree */ | ||
| 2046 | if (ret >= 0) | ||
| 2047 | ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, | ||
| 2048 | GFP_NOFS); | ||
| 2049 | if (ret < 0) { | ||
| 2050 | kfree(failrec); | ||
| 2051 | return ret; | ||
| 2052 | } | ||
| 2053 | } else { | ||
| 2054 | failrec = (struct io_failure_record *)(unsigned long)private; | ||
| 2055 | pr_debug("bio_readpage_error: (found) logical=%llu, " | ||
| 2056 | "start=%llu, len=%llu, validation=%d\n", | ||
| 2057 | failrec->logical, failrec->start, failrec->len, | ||
| 2058 | failrec->in_validation); | ||
| 2059 | /* | ||
| 2060 | * when data can be on disk more than twice, add to failrec here | ||
| 2061 | * (e.g. with a list for failed_mirror) to make | ||
| 2062 | * clean_io_failure() clean all those errors at once. | ||
| 2063 | */ | ||
| 2064 | } | ||
| 2065 | num_copies = btrfs_num_copies( | ||
| 2066 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
| 2067 | failrec->logical, failrec->len); | ||
| 2068 | if (num_copies == 1) { | ||
| 2069 | /* | ||
| 2070 | * we only have a single copy of the data, so don't bother with | ||
| 2071 | * all the retry and error correction code that follows. no | ||
| 2072 | * matter what the error is, it is very likely to persist. | ||
| 2073 | */ | ||
| 2074 | pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " | ||
| 2075 | "state=%p, num_copies=%d, next_mirror %d, " | ||
| 2076 | "failed_mirror %d\n", state, num_copies, | ||
| 2077 | failrec->this_mirror, failed_mirror); | ||
| 2078 | free_io_failure(inode, failrec, 0); | ||
| 2079 | return -EIO; | ||
| 2080 | } | ||
| 2081 | |||
| 2082 | if (!state) { | ||
| 2083 | spin_lock(&tree->lock); | ||
| 2084 | state = find_first_extent_bit_state(tree, failrec->start, | ||
| 2085 | EXTENT_LOCKED); | ||
| 2086 | if (state && state->start != failrec->start) | ||
| 2087 | state = NULL; | ||
| 2088 | spin_unlock(&tree->lock); | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | /* | ||
| 2092 | * there are two premises: | ||
| 2093 | * a) deliver good data to the caller | ||
| 2094 | * b) correct the bad sectors on disk | ||
| 2095 | */ | ||
| 2096 | if (failed_bio->bi_vcnt > 1) { | ||
| 2097 | /* | ||
| 2098 | * to fulfill b), we need to know the exact failing sectors, as | ||
| 2099 | * we don't want to rewrite any more than the failed ones. thus, | ||
| 2100 | * we need separate read requests for the failed bio | ||
| 2101 | * | ||
| 2102 | * if the following BUG_ON triggers, our validation request got | ||
| 2103 | * merged. we need separate requests for our algorithm to work. | ||
| 2104 | */ | ||
| 2105 | BUG_ON(failrec->in_validation); | ||
| 2106 | failrec->in_validation = 1; | ||
| 2107 | failrec->this_mirror = failed_mirror; | ||
| 2108 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
| 2109 | } else { | ||
| 2110 | /* | ||
| 2111 | * we're ready to fulfill a) and b) alongside. get a good copy | ||
| 2112 | * of the failed sector and if we succeed, we have setup | ||
| 2113 | * everything for repair_io_failure to do the rest for us. | ||
| 2114 | */ | ||
| 2115 | if (failrec->in_validation) { | ||
| 2116 | BUG_ON(failrec->this_mirror != failed_mirror); | ||
| 2117 | failrec->in_validation = 0; | ||
| 2118 | failrec->this_mirror = 0; | ||
| 2119 | } | ||
| 2120 | failrec->failed_mirror = failed_mirror; | ||
| 2121 | failrec->this_mirror++; | ||
| 2122 | if (failrec->this_mirror == failed_mirror) | ||
| 2123 | failrec->this_mirror++; | ||
| 2124 | read_mode = READ_SYNC; | ||
| 2125 | } | ||
| 2126 | |||
| 2127 | if (!state || failrec->this_mirror > num_copies) { | ||
| 2128 | pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " | ||
| 2129 | "next_mirror %d, failed_mirror %d\n", state, | ||
| 2130 | num_copies, failrec->this_mirror, failed_mirror); | ||
| 2131 | free_io_failure(inode, failrec, 0); | ||
| 2132 | return -EIO; | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 2136 | bio->bi_private = state; | ||
| 2137 | bio->bi_end_io = failed_bio->bi_end_io; | ||
| 2138 | bio->bi_sector = failrec->logical >> 9; | ||
| 2139 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 2140 | bio->bi_size = 0; | ||
| 2141 | |||
| 2142 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | ||
| 2143 | |||
| 2144 | pr_debug("bio_readpage_error: submitting new read[%#x] to " | ||
| 2145 | "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, | ||
| 2146 | failrec->this_mirror, num_copies, failrec->in_validation); | ||
| 2147 | |||
| 2148 | tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, | ||
| 2149 | failrec->bio_flags, 0); | ||
| 2150 | return 0; | ||
| 2151 | } | ||
| 2152 | |||
| 1647 | /* lots and lots of room for performance fixes in the end_bio funcs */ | 2153 | /* lots and lots of room for performance fixes in the end_bio funcs */ |
| 1648 | 2154 | ||
| 1649 | /* | 2155 | /* |
| @@ -1742,6 +2248,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 1742 | struct extent_state *cached = NULL; | 2248 | struct extent_state *cached = NULL; |
| 1743 | struct extent_state *state; | 2249 | struct extent_state *state; |
| 1744 | 2250 | ||
| 2251 | pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " | ||
| 2252 | "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, | ||
| 2253 | (long int)bio->bi_bdev); | ||
| 1745 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2254 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 1746 | 2255 | ||
| 1747 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 2256 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
| @@ -1772,12 +2281,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 1772 | state); | 2281 | state); |
| 1773 | if (ret) | 2282 | if (ret) |
| 1774 | uptodate = 0; | 2283 | uptodate = 0; |
| 2284 | else | ||
| 2285 | clean_io_failure(start, page); | ||
| 1775 | } | 2286 | } |
| 1776 | if (!uptodate && tree->ops && | 2287 | if (!uptodate) { |
| 1777 | tree->ops->readpage_io_failed_hook) { | 2288 | int failed_mirror; |
| 1778 | ret = tree->ops->readpage_io_failed_hook(bio, page, | 2289 | failed_mirror = (int)(unsigned long)bio->bi_bdev; |
| 1779 | start, end, NULL); | 2290 | /* |
| 2291 | * The generic bio_readpage_error handles errors the | ||
| 2292 | * following way: If possible, new read requests are | ||
| 2293 | * created and submitted and will end up in | ||
| 2294 | * end_bio_extent_readpage as well (if we're lucky, not | ||
| 2295 | * in the !uptodate case). In that case it returns 0 and | ||
| 2296 | * we just go on with the next page in our bio. If it | ||
| 2297 | * can't handle the error it will return -EIO and we | ||
| 2298 | * remain responsible for that page. | ||
| 2299 | */ | ||
| 2300 | ret = bio_readpage_error(bio, page, start, end, | ||
| 2301 | failed_mirror, NULL); | ||
| 1780 | if (ret == 0) { | 2302 | if (ret == 0) { |
| 2303 | error_handled: | ||
| 1781 | uptodate = | 2304 | uptodate = |
| 1782 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 2305 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 1783 | if (err) | 2306 | if (err) |
| @@ -1785,6 +2308,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 1785 | uncache_state(&cached); | 2308 | uncache_state(&cached); |
| 1786 | continue; | 2309 | continue; |
| 1787 | } | 2310 | } |
| 2311 | if (tree->ops && tree->ops->readpage_io_failed_hook) { | ||
| 2312 | ret = tree->ops->readpage_io_failed_hook( | ||
| 2313 | bio, page, start, end, | ||
| 2314 | failed_mirror, state); | ||
| 2315 | if (ret == 0) | ||
| 2316 | goto error_handled; | ||
| 2317 | } | ||
| 1788 | } | 2318 | } |
| 1789 | 2319 | ||
| 1790 | if (uptodate) { | 2320 | if (uptodate) { |
| @@ -1856,6 +2386,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
| 1856 | mirror_num, bio_flags, start); | 2386 | mirror_num, bio_flags, start); |
| 1857 | else | 2387 | else |
| 1858 | submit_bio(rw, bio); | 2388 | submit_bio(rw, bio); |
| 2389 | |||
| 1859 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 2390 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
| 1860 | ret = -EOPNOTSUPP; | 2391 | ret = -EOPNOTSUPP; |
| 1861 | bio_put(bio); | 2392 | bio_put(bio); |
| @@ -2121,16 +2652,16 @@ out: | |||
| 2121 | } | 2652 | } |
| 2122 | 2653 | ||
| 2123 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | 2654 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
| 2124 | get_extent_t *get_extent) | 2655 | get_extent_t *get_extent, int mirror_num) |
| 2125 | { | 2656 | { |
| 2126 | struct bio *bio = NULL; | 2657 | struct bio *bio = NULL; |
| 2127 | unsigned long bio_flags = 0; | 2658 | unsigned long bio_flags = 0; |
| 2128 | int ret; | 2659 | int ret; |
| 2129 | 2660 | ||
| 2130 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2661 | ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, |
| 2131 | &bio_flags); | 2662 | &bio_flags); |
| 2132 | if (bio) | 2663 | if (bio) |
| 2133 | ret = submit_one_bio(READ, bio, 0, bio_flags); | 2664 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); |
| 2134 | return ret; | 2665 | return ret; |
| 2135 | } | 2666 | } |
| 2136 | 2667 | ||
| @@ -2181,6 +2712,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2181 | int compressed; | 2712 | int compressed; |
| 2182 | int write_flags; | 2713 | int write_flags; |
| 2183 | unsigned long nr_written = 0; | 2714 | unsigned long nr_written = 0; |
| 2715 | bool fill_delalloc = true; | ||
| 2184 | 2716 | ||
| 2185 | if (wbc->sync_mode == WB_SYNC_ALL) | 2717 | if (wbc->sync_mode == WB_SYNC_ALL) |
| 2186 | write_flags = WRITE_SYNC; | 2718 | write_flags = WRITE_SYNC; |
| @@ -2190,6 +2722,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2190 | trace___extent_writepage(page, inode, wbc); | 2722 | trace___extent_writepage(page, inode, wbc); |
| 2191 | 2723 | ||
| 2192 | WARN_ON(!PageLocked(page)); | 2724 | WARN_ON(!PageLocked(page)); |
| 2725 | |||
| 2726 | ClearPageError(page); | ||
| 2727 | |||
| 2193 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2728 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
| 2194 | if (page->index > end_index || | 2729 | if (page->index > end_index || |
| 2195 | (page->index == end_index && !pg_offset)) { | 2730 | (page->index == end_index && !pg_offset)) { |
| @@ -2211,10 +2746,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2211 | 2746 | ||
| 2212 | set_page_extent_mapped(page); | 2747 | set_page_extent_mapped(page); |
| 2213 | 2748 | ||
| 2749 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
| 2750 | fill_delalloc = false; | ||
| 2751 | |||
| 2214 | delalloc_start = start; | 2752 | delalloc_start = start; |
| 2215 | delalloc_end = 0; | 2753 | delalloc_end = 0; |
| 2216 | page_started = 0; | 2754 | page_started = 0; |
| 2217 | if (!epd->extent_locked) { | 2755 | if (!epd->extent_locked && fill_delalloc) { |
| 2218 | u64 delalloc_to_write = 0; | 2756 | u64 delalloc_to_write = 0; |
| 2219 | /* | 2757 | /* |
| 2220 | * make sure the wbc mapping index is at least updated | 2758 | * make sure the wbc mapping index is at least updated |
| @@ -2432,6 +2970,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2432 | pgoff_t index; | 2970 | pgoff_t index; |
| 2433 | pgoff_t end; /* Inclusive */ | 2971 | pgoff_t end; /* Inclusive */ |
| 2434 | int scanned = 0; | 2972 | int scanned = 0; |
| 2973 | int tag; | ||
| 2435 | 2974 | ||
| 2436 | pagevec_init(&pvec, 0); | 2975 | pagevec_init(&pvec, 0); |
| 2437 | if (wbc->range_cyclic) { | 2976 | if (wbc->range_cyclic) { |
| @@ -2442,11 +2981,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2981 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
| 2443 | scanned = 1; | 2982 | scanned = 1; |
| 2444 | } | 2983 | } |
| 2984 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 2985 | tag = PAGECACHE_TAG_TOWRITE; | ||
| 2986 | else | ||
| 2987 | tag = PAGECACHE_TAG_DIRTY; | ||
| 2445 | retry: | 2988 | retry: |
| 2989 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 2990 | tag_pages_for_writeback(mapping, index, end); | ||
| 2446 | while (!done && !nr_to_write_done && (index <= end) && | 2991 | while (!done && !nr_to_write_done && (index <= end) && |
| 2447 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2992 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
| 2448 | PAGECACHE_TAG_DIRTY, min(end - index, | 2993 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
| 2449 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
| 2450 | unsigned i; | 2994 | unsigned i; |
| 2451 | 2995 | ||
| 2452 | scanned = 1; | 2996 | scanned = 1; |
| @@ -2460,10 +3004,16 @@ retry: | |||
| 2460 | * swizzled back from swapper_space to tmpfs file | 3004 | * swizzled back from swapper_space to tmpfs file |
| 2461 | * mapping | 3005 | * mapping |
| 2462 | */ | 3006 | */ |
| 2463 | if (tree->ops && tree->ops->write_cache_pages_lock_hook) | 3007 | if (tree->ops && |
| 2464 | tree->ops->write_cache_pages_lock_hook(page); | 3008 | tree->ops->write_cache_pages_lock_hook) { |
| 2465 | else | 3009 | tree->ops->write_cache_pages_lock_hook(page, |
| 2466 | lock_page(page); | 3010 | data, flush_fn); |
| 3011 | } else { | ||
| 3012 | if (!trylock_page(page)) { | ||
| 3013 | flush_fn(data); | ||
| 3014 | lock_page(page); | ||
| 3015 | } | ||
| 3016 | } | ||
| 2467 | 3017 | ||
| 2468 | if (unlikely(page->mapping != mapping)) { | 3018 | if (unlikely(page->mapping != mapping)) { |
| 2469 | unlock_page(page); | 3019 | unlock_page(page); |
| @@ -2541,7 +3091,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 2541 | struct writeback_control *wbc) | 3091 | struct writeback_control *wbc) |
| 2542 | { | 3092 | { |
| 2543 | int ret; | 3093 | int ret; |
| 2544 | struct address_space *mapping = page->mapping; | ||
| 2545 | struct extent_page_data epd = { | 3094 | struct extent_page_data epd = { |
| 2546 | .bio = NULL, | 3095 | .bio = NULL, |
| 2547 | .tree = tree, | 3096 | .tree = tree, |
| @@ -2549,18 +3098,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 2549 | .extent_locked = 0, | 3098 | .extent_locked = 0, |
| 2550 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3099 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 2551 | }; | 3100 | }; |
| 2552 | struct writeback_control wbc_writepages = { | ||
| 2553 | .sync_mode = wbc->sync_mode, | ||
| 2554 | .older_than_this = NULL, | ||
| 2555 | .nr_to_write = 64, | ||
| 2556 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | ||
| 2557 | .range_end = (loff_t)-1, | ||
| 2558 | }; | ||
| 2559 | 3101 | ||
| 2560 | ret = __extent_writepage(page, wbc, &epd); | 3102 | ret = __extent_writepage(page, wbc, &epd); |
| 2561 | 3103 | ||
| 2562 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | ||
| 2563 | __extent_writepage, &epd, flush_write_bio); | ||
| 2564 | flush_epd_write_bio(&epd); | 3104 | flush_epd_write_bio(&epd); |
| 2565 | return ret; | 3105 | return ret; |
| 2566 | } | 3106 | } |
| @@ -2584,7 +3124,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
| 2584 | }; | 3124 | }; |
| 2585 | struct writeback_control wbc_writepages = { | 3125 | struct writeback_control wbc_writepages = { |
| 2586 | .sync_mode = mode, | 3126 | .sync_mode = mode, |
| 2587 | .older_than_this = NULL, | ||
| 2588 | .nr_to_write = nr_pages * 2, | 3127 | .nr_to_write = nr_pages * 2, |
| 2589 | .range_start = start, | 3128 | .range_start = start, |
| 2590 | .range_end = end + 1, | 3129 | .range_end = end + 1, |
| @@ -2840,6 +3379,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
| 2840 | return -ENOMEM; | 3379 | return -ENOMEM; |
| 2841 | path->leave_spinning = 1; | 3380 | path->leave_spinning = 1; |
| 2842 | 3381 | ||
| 3382 | start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); | ||
| 3383 | len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); | ||
| 3384 | |||
| 2843 | /* | 3385 | /* |
| 2844 | * lookup the last file extent. We're not using i_size here | 3386 | * lookup the last file extent. We're not using i_size here |
| 2845 | * because there might be preallocation past i_size | 3387 | * because there might be preallocation past i_size |
| @@ -2887,7 +3429,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
| 2887 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3429 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
| 2888 | &cached_state, GFP_NOFS); | 3430 | &cached_state, GFP_NOFS); |
| 2889 | 3431 | ||
| 2890 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | 3432 | em = get_extent_skip_holes(inode, start, last_for_get_extent, |
| 2891 | get_extent); | 3433 | get_extent); |
| 2892 | if (!em) | 3434 | if (!em) |
| 2893 | goto out; | 3435 | goto out; |
| @@ -2976,7 +3518,7 @@ out: | |||
| 2976 | return ret; | 3518 | return ret; |
| 2977 | } | 3519 | } |
| 2978 | 3520 | ||
| 2979 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | 3521 | inline struct page *extent_buffer_page(struct extent_buffer *eb, |
| 2980 | unsigned long i) | 3522 | unsigned long i) |
| 2981 | { | 3523 | { |
| 2982 | struct page *p; | 3524 | struct page *p; |
| @@ -3001,7 +3543,7 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, | |||
| 3001 | return p; | 3543 | return p; |
| 3002 | } | 3544 | } |
| 3003 | 3545 | ||
| 3004 | static inline unsigned long num_extent_pages(u64 start, u64 len) | 3546 | inline unsigned long num_extent_pages(u64 start, u64 len) |
| 3005 | { | 3547 | { |
| 3006 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | 3548 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - |
| 3007 | (start >> PAGE_CACHE_SHIFT); | 3549 | (start >> PAGE_CACHE_SHIFT); |
| @@ -3022,8 +3564,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 3022 | return NULL; | 3564 | return NULL; |
| 3023 | eb->start = start; | 3565 | eb->start = start; |
| 3024 | eb->len = len; | 3566 | eb->len = len; |
| 3025 | spin_lock_init(&eb->lock); | 3567 | rwlock_init(&eb->lock); |
| 3026 | init_waitqueue_head(&eb->lock_wq); | 3568 | atomic_set(&eb->write_locks, 0); |
| 3569 | atomic_set(&eb->read_locks, 0); | ||
| 3570 | atomic_set(&eb->blocking_readers, 0); | ||
| 3571 | atomic_set(&eb->blocking_writers, 0); | ||
| 3572 | atomic_set(&eb->spinning_readers, 0); | ||
| 3573 | atomic_set(&eb->spinning_writers, 0); | ||
| 3574 | init_waitqueue_head(&eb->write_lock_wq); | ||
| 3575 | init_waitqueue_head(&eb->read_lock_wq); | ||
| 3027 | 3576 | ||
| 3028 | #if LEAK_DEBUG | 3577 | #if LEAK_DEBUG |
| 3029 | spin_lock_irqsave(&leak_lock, flags); | 3578 | spin_lock_irqsave(&leak_lock, flags); |
| @@ -3119,7 +3668,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 3119 | i = 0; | 3668 | i = 0; |
| 3120 | } | 3669 | } |
| 3121 | for (; i < num_pages; i++, index++) { | 3670 | for (; i < num_pages; i++, index++) { |
| 3122 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); | 3671 | p = find_or_create_page(mapping, index, GFP_NOFS); |
| 3123 | if (!p) { | 3672 | if (!p) { |
| 3124 | WARN_ON(1); | 3673 | WARN_ON(1); |
| 3125 | goto free_eb; | 3674 | goto free_eb; |
| @@ -3247,6 +3796,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
| 3247 | PAGECACHE_TAG_DIRTY); | 3796 | PAGECACHE_TAG_DIRTY); |
| 3248 | } | 3797 | } |
| 3249 | spin_unlock_irq(&page->mapping->tree_lock); | 3798 | spin_unlock_irq(&page->mapping->tree_lock); |
| 3799 | ClearPageError(page); | ||
| 3250 | unlock_page(page); | 3800 | unlock_page(page); |
| 3251 | } | 3801 | } |
| 3252 | return 0; | 3802 | return 0; |
| @@ -3266,6 +3816,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
| 3266 | return was_dirty; | 3816 | return was_dirty; |
| 3267 | } | 3817 | } |
| 3268 | 3818 | ||
| 3819 | static int __eb_straddles_pages(u64 start, u64 len) | ||
| 3820 | { | ||
| 3821 | if (len < PAGE_CACHE_SIZE) | ||
| 3822 | return 1; | ||
| 3823 | if (start & (PAGE_CACHE_SIZE - 1)) | ||
| 3824 | return 1; | ||
| 3825 | if ((start + len) & (PAGE_CACHE_SIZE - 1)) | ||
| 3826 | return 1; | ||
| 3827 | return 0; | ||
| 3828 | } | ||
| 3829 | |||
| 3830 | static int eb_straddles_pages(struct extent_buffer *eb) | ||
| 3831 | { | ||
| 3832 | return __eb_straddles_pages(eb->start, eb->len); | ||
| 3833 | } | ||
| 3834 | |||
| 3269 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3835 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
| 3270 | struct extent_buffer *eb, | 3836 | struct extent_buffer *eb, |
| 3271 | struct extent_state **cached_state) | 3837 | struct extent_state **cached_state) |
| @@ -3277,8 +3843,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3277 | num_pages = num_extent_pages(eb->start, eb->len); | 3843 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3278 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3844 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
| 3279 | 3845 | ||
| 3280 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3846 | if (eb_straddles_pages(eb)) { |
| 3281 | cached_state, GFP_NOFS); | 3847 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
| 3848 | cached_state, GFP_NOFS); | ||
| 3849 | } | ||
| 3282 | for (i = 0; i < num_pages; i++) { | 3850 | for (i = 0; i < num_pages; i++) { |
| 3283 | page = extent_buffer_page(eb, i); | 3851 | page = extent_buffer_page(eb, i); |
| 3284 | if (page) | 3852 | if (page) |
| @@ -3296,8 +3864,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3296 | 3864 | ||
| 3297 | num_pages = num_extent_pages(eb->start, eb->len); | 3865 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3298 | 3866 | ||
| 3299 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3867 | if (eb_straddles_pages(eb)) { |
| 3300 | NULL, GFP_NOFS); | 3868 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
| 3869 | NULL, GFP_NOFS); | ||
| 3870 | } | ||
| 3301 | for (i = 0; i < num_pages; i++) { | 3871 | for (i = 0; i < num_pages; i++) { |
| 3302 | page = extent_buffer_page(eb, i); | 3872 | page = extent_buffer_page(eb, i); |
| 3303 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3873 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
| @@ -3320,9 +3890,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
| 3320 | int uptodate; | 3890 | int uptodate; |
| 3321 | unsigned long index; | 3891 | unsigned long index; |
| 3322 | 3892 | ||
| 3323 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); | 3893 | if (__eb_straddles_pages(start, end - start + 1)) { |
| 3324 | if (ret) | 3894 | ret = test_range_bit(tree, start, end, |
| 3325 | return 1; | 3895 | EXTENT_UPTODATE, 1, NULL); |
| 3896 | if (ret) | ||
| 3897 | return 1; | ||
| 3898 | } | ||
| 3326 | while (start <= end) { | 3899 | while (start <= end) { |
| 3327 | index = start >> PAGE_CACHE_SHIFT; | 3900 | index = start >> PAGE_CACHE_SHIFT; |
| 3328 | page = find_get_page(tree->mapping, index); | 3901 | page = find_get_page(tree->mapping, index); |
| @@ -3350,10 +3923,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3350 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3923 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
| 3351 | return 1; | 3924 | return 1; |
| 3352 | 3925 | ||
| 3353 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3926 | if (eb_straddles_pages(eb)) { |
| 3354 | EXTENT_UPTODATE, 1, cached_state); | 3927 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3355 | if (ret) | 3928 | EXTENT_UPTODATE, 1, cached_state); |
| 3356 | return ret; | 3929 | if (ret) |
| 3930 | return ret; | ||
| 3931 | } | ||
| 3357 | 3932 | ||
| 3358 | num_pages = num_extent_pages(eb->start, eb->len); | 3933 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3359 | for (i = 0; i < num_pages; i++) { | 3934 | for (i = 0; i < num_pages; i++) { |
| @@ -3367,8 +3942,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3367 | } | 3942 | } |
| 3368 | 3943 | ||
| 3369 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 3944 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
| 3370 | struct extent_buffer *eb, | 3945 | struct extent_buffer *eb, u64 start, int wait, |
| 3371 | u64 start, int wait, | ||
| 3372 | get_extent_t *get_extent, int mirror_num) | 3946 | get_extent_t *get_extent, int mirror_num) |
| 3373 | { | 3947 | { |
| 3374 | unsigned long i; | 3948 | unsigned long i; |
| @@ -3386,9 +3960,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3386 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3960 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
| 3387 | return 0; | 3961 | return 0; |
| 3388 | 3962 | ||
| 3389 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3963 | if (eb_straddles_pages(eb)) { |
| 3390 | EXTENT_UPTODATE, 1, NULL)) { | 3964 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3391 | return 0; | 3965 | EXTENT_UPTODATE, 1, NULL)) { |
| 3966 | return 0; | ||
| 3967 | } | ||
| 3392 | } | 3968 | } |
| 3393 | 3969 | ||
| 3394 | if (start) { | 3970 | if (start) { |
| @@ -3402,7 +3978,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3402 | num_pages = num_extent_pages(eb->start, eb->len); | 3978 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3403 | for (i = start_i; i < num_pages; i++) { | 3979 | for (i = start_i; i < num_pages; i++) { |
| 3404 | page = extent_buffer_page(eb, i); | 3980 | page = extent_buffer_page(eb, i); |
| 3405 | if (!wait) { | 3981 | if (wait == WAIT_NONE) { |
| 3406 | if (!trylock_page(page)) | 3982 | if (!trylock_page(page)) |
| 3407 | goto unlock_exit; | 3983 | goto unlock_exit; |
| 3408 | } else { | 3984 | } else { |
| @@ -3446,7 +4022,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3446 | if (bio) | 4022 | if (bio) |
| 3447 | submit_one_bio(READ, bio, mirror_num, bio_flags); | 4023 | submit_one_bio(READ, bio, mirror_num, bio_flags); |
| 3448 | 4024 | ||
| 3449 | if (ret || !wait) | 4025 | if (ret || wait != WAIT_COMPLETE) |
| 3450 | return ret; | 4026 | return ret; |
| 3451 | 4027 | ||
| 3452 | for (i = start_i; i < num_pages; i++) { | 4028 | for (i = start_i; i < num_pages; i++) { |
| @@ -3492,9 +4068,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
| 3492 | page = extent_buffer_page(eb, i); | 4068 | page = extent_buffer_page(eb, i); |
| 3493 | 4069 | ||
| 3494 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 4070 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
| 3495 | kaddr = kmap_atomic(page, KM_USER1); | 4071 | kaddr = page_address(page); |
| 3496 | memcpy(dst, kaddr + offset, cur); | 4072 | memcpy(dst, kaddr + offset, cur); |
| 3497 | kunmap_atomic(kaddr, KM_USER1); | ||
| 3498 | 4073 | ||
| 3499 | dst += cur; | 4074 | dst += cur; |
| 3500 | len -= cur; | 4075 | len -= cur; |
| @@ -3504,9 +4079,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
| 3504 | } | 4079 | } |
| 3505 | 4080 | ||
| 3506 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 4081 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
| 3507 | unsigned long min_len, char **token, char **map, | 4082 | unsigned long min_len, char **map, |
| 3508 | unsigned long *map_start, | 4083 | unsigned long *map_start, |
| 3509 | unsigned long *map_len, int km) | 4084 | unsigned long *map_len) |
| 3510 | { | 4085 | { |
| 3511 | size_t offset = start & (PAGE_CACHE_SIZE - 1); | 4086 | size_t offset = start & (PAGE_CACHE_SIZE - 1); |
| 3512 | char *kaddr; | 4087 | char *kaddr; |
| @@ -3536,42 +4111,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
| 3536 | } | 4111 | } |
| 3537 | 4112 | ||
| 3538 | p = extent_buffer_page(eb, i); | 4113 | p = extent_buffer_page(eb, i); |
| 3539 | kaddr = kmap_atomic(p, km); | 4114 | kaddr = page_address(p); |
| 3540 | *token = kaddr; | ||
| 3541 | *map = kaddr + offset; | 4115 | *map = kaddr + offset; |
| 3542 | *map_len = PAGE_CACHE_SIZE - offset; | 4116 | *map_len = PAGE_CACHE_SIZE - offset; |
| 3543 | return 0; | 4117 | return 0; |
| 3544 | } | 4118 | } |
| 3545 | 4119 | ||
| 3546 | int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
| 3547 | unsigned long min_len, | ||
| 3548 | char **token, char **map, | ||
| 3549 | unsigned long *map_start, | ||
| 3550 | unsigned long *map_len, int km) | ||
| 3551 | { | ||
| 3552 | int err; | ||
| 3553 | int save = 0; | ||
| 3554 | if (eb->map_token) { | ||
| 3555 | unmap_extent_buffer(eb, eb->map_token, km); | ||
| 3556 | eb->map_token = NULL; | ||
| 3557 | save = 1; | ||
| 3558 | } | ||
| 3559 | err = map_private_extent_buffer(eb, start, min_len, token, map, | ||
| 3560 | map_start, map_len, km); | ||
| 3561 | if (!err && save) { | ||
| 3562 | eb->map_token = *token; | ||
| 3563 | eb->kaddr = *map; | ||
| 3564 | eb->map_start = *map_start; | ||
| 3565 | eb->map_len = *map_len; | ||
| 3566 | } | ||
| 3567 | return err; | ||
| 3568 | } | ||
| 3569 | |||
| 3570 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) | ||
| 3571 | { | ||
| 3572 | kunmap_atomic(token, km); | ||
| 3573 | } | ||
| 3574 | |||
| 3575 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | 4120 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, |
| 3576 | unsigned long start, | 4121 | unsigned long start, |
| 3577 | unsigned long len) | 4122 | unsigned long len) |
| @@ -3595,9 +4140,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
| 3595 | 4140 | ||
| 3596 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 4141 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
| 3597 | 4142 | ||
| 3598 | kaddr = kmap_atomic(page, KM_USER0); | 4143 | kaddr = page_address(page); |
| 3599 | ret = memcmp(ptr, kaddr + offset, cur); | 4144 | ret = memcmp(ptr, kaddr + offset, cur); |
| 3600 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3601 | if (ret) | 4145 | if (ret) |
| 3602 | break; | 4146 | break; |
| 3603 | 4147 | ||
| @@ -3630,9 +4174,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
| 3630 | WARN_ON(!PageUptodate(page)); | 4174 | WARN_ON(!PageUptodate(page)); |
| 3631 | 4175 | ||
| 3632 | cur = min(len, PAGE_CACHE_SIZE - offset); | 4176 | cur = min(len, PAGE_CACHE_SIZE - offset); |
| 3633 | kaddr = kmap_atomic(page, KM_USER1); | 4177 | kaddr = page_address(page); |
| 3634 | memcpy(kaddr + offset, src, cur); | 4178 | memcpy(kaddr + offset, src, cur); |
| 3635 | kunmap_atomic(kaddr, KM_USER1); | ||
| 3636 | 4179 | ||
| 3637 | src += cur; | 4180 | src += cur; |
| 3638 | len -= cur; | 4181 | len -= cur; |
| @@ -3661,9 +4204,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
| 3661 | WARN_ON(!PageUptodate(page)); | 4204 | WARN_ON(!PageUptodate(page)); |
| 3662 | 4205 | ||
| 3663 | cur = min(len, PAGE_CACHE_SIZE - offset); | 4206 | cur = min(len, PAGE_CACHE_SIZE - offset); |
| 3664 | kaddr = kmap_atomic(page, KM_USER0); | 4207 | kaddr = page_address(page); |
| 3665 | memset(kaddr + offset, c, cur); | 4208 | memset(kaddr + offset, c, cur); |
| 3666 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3667 | 4209 | ||
| 3668 | len -= cur; | 4210 | len -= cur; |
| 3669 | offset = 0; | 4211 | offset = 0; |
| @@ -3694,9 +4236,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
| 3694 | 4236 | ||
| 3695 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 4237 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
| 3696 | 4238 | ||
| 3697 | kaddr = kmap_atomic(page, KM_USER0); | 4239 | kaddr = page_address(page); |
| 3698 | read_extent_buffer(src, kaddr + offset, src_offset, cur); | 4240 | read_extent_buffer(src, kaddr + offset, src_offset, cur); |
| 3699 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3700 | 4241 | ||
| 3701 | src_offset += cur; | 4242 | src_offset += cur; |
| 3702 | len -= cur; | 4243 | len -= cur; |
| @@ -3709,20 +4250,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
| 3709 | unsigned long dst_off, unsigned long src_off, | 4250 | unsigned long dst_off, unsigned long src_off, |
| 3710 | unsigned long len) | 4251 | unsigned long len) |
| 3711 | { | 4252 | { |
| 3712 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 4253 | char *dst_kaddr = page_address(dst_page); |
| 3713 | if (dst_page == src_page) { | 4254 | if (dst_page == src_page) { |
| 3714 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); | 4255 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); |
| 3715 | } else { | 4256 | } else { |
| 3716 | char *src_kaddr = kmap_atomic(src_page, KM_USER1); | 4257 | char *src_kaddr = page_address(src_page); |
| 3717 | char *p = dst_kaddr + dst_off + len; | 4258 | char *p = dst_kaddr + dst_off + len; |
| 3718 | char *s = src_kaddr + src_off + len; | 4259 | char *s = src_kaddr + src_off + len; |
| 3719 | 4260 | ||
| 3720 | while (len--) | 4261 | while (len--) |
| 3721 | *--p = *--s; | 4262 | *--p = *--s; |
| 3722 | |||
| 3723 | kunmap_atomic(src_kaddr, KM_USER1); | ||
| 3724 | } | 4263 | } |
| 3725 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
| 3726 | } | 4264 | } |
| 3727 | 4265 | ||
| 3728 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | 4266 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) |
| @@ -3735,20 +4273,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
| 3735 | unsigned long dst_off, unsigned long src_off, | 4273 | unsigned long dst_off, unsigned long src_off, |
| 3736 | unsigned long len) | 4274 | unsigned long len) |
| 3737 | { | 4275 | { |
| 3738 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 4276 | char *dst_kaddr = page_address(dst_page); |
| 3739 | char *src_kaddr; | 4277 | char *src_kaddr; |
| 3740 | 4278 | ||
| 3741 | if (dst_page != src_page) { | 4279 | if (dst_page != src_page) { |
| 3742 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 4280 | src_kaddr = page_address(src_page); |
| 3743 | } else { | 4281 | } else { |
| 3744 | src_kaddr = dst_kaddr; | 4282 | src_kaddr = dst_kaddr; |
| 3745 | BUG_ON(areas_overlap(src_off, dst_off, len)); | 4283 | BUG_ON(areas_overlap(src_off, dst_off, len)); |
| 3746 | } | 4284 | } |
| 3747 | 4285 | ||
| 3748 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 4286 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
| 3749 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
| 3750 | if (dst_page != src_page) | ||
| 3751 | kunmap_atomic(src_kaddr, KM_USER1); | ||
| 3752 | } | 4287 | } |
| 3753 | 4288 | ||
| 3754 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | 4289 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
