diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-01-29 09:59:12 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:03:59 -0400 |
commit | 70dec8079d78691e476cc6c7cede40656078ad30 (patch) | |
tree | 19ab80f247fc026c3f5a5713381e98372737227b /fs/btrfs/extent_io.c | |
parent | aadfeb6e39ad6bde080cb3ab23f4da57ccb25f4a (diff) |
Btrfs: extent_io and extent_state optimizations
The end_bio routines are changed to take a pointer to the extent state
struct, and the state tree is walked in order to set/clear appropriate
bits as IO completes. This greatly reduces the number of rbtree searches
done by the end_bio handlers, and reduces lock contention.
The extent_io releasepage function is changed to avoid expensive searches
for locked state.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 358 |
1 files changed, 263 insertions, 95 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 15cc158a0498..1f734c34dc24 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -27,13 +27,11 @@ static struct kmem_cache *extent_buffer_cache; | |||
27 | static LIST_HEAD(buffers); | 27 | static LIST_HEAD(buffers); |
28 | static LIST_HEAD(states); | 28 | static LIST_HEAD(states); |
29 | 29 | ||
30 | static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; | ||
31 | #define BUFFER_LRU_MAX 64 | 30 | #define BUFFER_LRU_MAX 64 |
32 | 31 | ||
33 | struct tree_entry { | 32 | struct tree_entry { |
34 | u64 start; | 33 | u64 start; |
35 | u64 end; | 34 | u64 end; |
36 | int in_tree; | ||
37 | struct rb_node rb_node; | 35 | struct rb_node rb_node; |
38 | }; | 36 | }; |
39 | 37 | ||
@@ -69,7 +67,7 @@ void extent_io_exit(void) | |||
69 | 67 | ||
70 | while (!list_empty(&states)) { | 68 | while (!list_empty(&states)) { |
71 | state = list_entry(states.next, struct extent_state, list); | 69 | state = list_entry(states.next, struct extent_state, list); |
72 | printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); | 70 | printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); |
73 | list_del(&state->list); | 71 | list_del(&state->list); |
74 | kmem_cache_free(extent_state_cache, state); | 72 | kmem_cache_free(extent_state_cache, state); |
75 | 73 | ||
@@ -87,7 +85,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, | |||
87 | tree->state.rb_node = NULL; | 85 | tree->state.rb_node = NULL; |
88 | tree->ops = NULL; | 86 | tree->ops = NULL; |
89 | tree->dirty_bytes = 0; | 87 | tree->dirty_bytes = 0; |
90 | rwlock_init(&tree->lock); | 88 | spin_lock_init(&tree->lock); |
91 | spin_lock_init(&tree->lru_lock); | 89 | spin_lock_init(&tree->lru_lock); |
92 | tree->mapping = mapping; | 90 | tree->mapping = mapping; |
93 | INIT_LIST_HEAD(&tree->buffer_lru); | 91 | INIT_LIST_HEAD(&tree->buffer_lru); |
@@ -110,18 +108,13 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru); | |||
110 | struct extent_state *alloc_extent_state(gfp_t mask) | 108 | struct extent_state *alloc_extent_state(gfp_t mask) |
111 | { | 109 | { |
112 | struct extent_state *state; | 110 | struct extent_state *state; |
113 | unsigned long flags; | ||
114 | 111 | ||
115 | state = kmem_cache_alloc(extent_state_cache, mask); | 112 | state = kmem_cache_alloc(extent_state_cache, mask); |
116 | if (!state || IS_ERR(state)) | 113 | if (!state || IS_ERR(state)) |
117 | return state; | 114 | return state; |
118 | state->state = 0; | 115 | state->state = 0; |
119 | state->in_tree = 0; | ||
120 | state->private = 0; | 116 | state->private = 0; |
121 | 117 | state->tree = NULL; | |
122 | spin_lock_irqsave(&state_lock, flags); | ||
123 | list_add(&state->list, &states); | ||
124 | spin_unlock_irqrestore(&state_lock, flags); | ||
125 | 118 | ||
126 | atomic_set(&state->refs, 1); | 119 | atomic_set(&state->refs, 1); |
127 | init_waitqueue_head(&state->wq); | 120 | init_waitqueue_head(&state->wq); |
@@ -131,14 +124,10 @@ EXPORT_SYMBOL(alloc_extent_state); | |||
131 | 124 | ||
132 | void free_extent_state(struct extent_state *state) | 125 | void free_extent_state(struct extent_state *state) |
133 | { | 126 | { |
134 | unsigned long flags; | ||
135 | if (!state) | 127 | if (!state) |
136 | return; | 128 | return; |
137 | if (atomic_dec_and_test(&state->refs)) { | 129 | if (atomic_dec_and_test(&state->refs)) { |
138 | WARN_ON(state->in_tree); | 130 | WARN_ON(state->tree); |
139 | spin_lock_irqsave(&state_lock, flags); | ||
140 | list_del(&state->list); | ||
141 | spin_unlock_irqrestore(&state_lock, flags); | ||
142 | kmem_cache_free(extent_state_cache, state); | 131 | kmem_cache_free(extent_state_cache, state); |
143 | } | 132 | } |
144 | } | 133 | } |
@@ -164,7 +153,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
164 | } | 153 | } |
165 | 154 | ||
166 | entry = rb_entry(node, struct tree_entry, rb_node); | 155 | entry = rb_entry(node, struct tree_entry, rb_node); |
167 | entry->in_tree = 1; | ||
168 | rb_link_node(node, parent, p); | 156 | rb_link_node(node, parent, p); |
169 | rb_insert_color(node, root); | 157 | rb_insert_color(node, root); |
170 | return NULL; | 158 | return NULL; |
@@ -216,8 +204,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
216 | 204 | ||
217 | static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) | 205 | static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) |
218 | { | 206 | { |
219 | struct rb_node *prev; | 207 | struct rb_node *prev = NULL; |
220 | struct rb_node *ret; | 208 | struct rb_node *ret; |
209 | |||
221 | ret = __tree_search(root, offset, &prev, NULL); | 210 | ret = __tree_search(root, offset, &prev, NULL); |
222 | if (!ret) | 211 | if (!ret) |
223 | return prev; | 212 | return prev; |
@@ -248,7 +237,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
248 | if (other->end == state->start - 1 && | 237 | if (other->end == state->start - 1 && |
249 | other->state == state->state) { | 238 | other->state == state->state) { |
250 | state->start = other->start; | 239 | state->start = other->start; |
251 | other->in_tree = 0; | 240 | other->tree = NULL; |
252 | rb_erase(&other->rb_node, &tree->state); | 241 | rb_erase(&other->rb_node, &tree->state); |
253 | free_extent_state(other); | 242 | free_extent_state(other); |
254 | } | 243 | } |
@@ -259,7 +248,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
259 | if (other->start == state->end + 1 && | 248 | if (other->start == state->end + 1 && |
260 | other->state == state->state) { | 249 | other->state == state->state) { |
261 | other->start = state->start; | 250 | other->start = state->start; |
262 | state->in_tree = 0; | 251 | state->tree = NULL; |
263 | rb_erase(&state->rb_node, &tree->state); | 252 | rb_erase(&state->rb_node, &tree->state); |
264 | free_extent_state(state); | 253 | free_extent_state(state); |
265 | } | 254 | } |
@@ -300,6 +289,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
300 | free_extent_state(state); | 289 | free_extent_state(state); |
301 | return -EEXIST; | 290 | return -EEXIST; |
302 | } | 291 | } |
292 | state->tree = tree; | ||
303 | merge_state(tree, state); | 293 | merge_state(tree, state); |
304 | return 0; | 294 | return 0; |
305 | } | 295 | } |
@@ -335,6 +325,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
335 | free_extent_state(prealloc); | 325 | free_extent_state(prealloc); |
336 | return -EEXIST; | 326 | return -EEXIST; |
337 | } | 327 | } |
328 | prealloc->tree = tree; | ||
338 | return 0; | 329 | return 0; |
339 | } | 330 | } |
340 | 331 | ||
@@ -361,9 +352,9 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
361 | if (wake) | 352 | if (wake) |
362 | wake_up(&state->wq); | 353 | wake_up(&state->wq); |
363 | if (delete || state->state == 0) { | 354 | if (delete || state->state == 0) { |
364 | if (state->in_tree) { | 355 | if (state->tree) { |
365 | rb_erase(&state->rb_node, &tree->state); | 356 | rb_erase(&state->rb_node, &tree->state); |
366 | state->in_tree = 0; | 357 | state->tree = NULL; |
367 | free_extent_state(state); | 358 | free_extent_state(state); |
368 | } else { | 359 | } else { |
369 | WARN_ON(1); | 360 | WARN_ON(1); |
@@ -404,7 +395,7 @@ again: | |||
404 | return -ENOMEM; | 395 | return -ENOMEM; |
405 | } | 396 | } |
406 | 397 | ||
407 | write_lock_irqsave(&tree->lock, flags); | 398 | spin_lock_irqsave(&tree->lock, flags); |
408 | /* | 399 | /* |
409 | * this search will find the extents that end after | 400 | * this search will find the extents that end after |
410 | * our range starts | 401 | * our range starts |
@@ -434,6 +425,8 @@ again: | |||
434 | */ | 425 | */ |
435 | 426 | ||
436 | if (state->start < start) { | 427 | if (state->start < start) { |
428 | if (!prealloc) | ||
429 | prealloc = alloc_extent_state(GFP_ATOMIC); | ||
437 | err = split_state(tree, state, prealloc, start); | 430 | err = split_state(tree, state, prealloc, start); |
438 | BUG_ON(err == -EEXIST); | 431 | BUG_ON(err == -EEXIST); |
439 | prealloc = NULL; | 432 | prealloc = NULL; |
@@ -455,6 +448,8 @@ again: | |||
455 | * on the first half | 448 | * on the first half |
456 | */ | 449 | */ |
457 | if (state->start <= end && state->end > end) { | 450 | if (state->start <= end && state->end > end) { |
451 | if (!prealloc) | ||
452 | prealloc = alloc_extent_state(GFP_ATOMIC); | ||
458 | err = split_state(tree, state, prealloc, end + 1); | 453 | err = split_state(tree, state, prealloc, end + 1); |
459 | BUG_ON(err == -EEXIST); | 454 | BUG_ON(err == -EEXIST); |
460 | 455 | ||
@@ -471,7 +466,7 @@ again: | |||
471 | goto search_again; | 466 | goto search_again; |
472 | 467 | ||
473 | out: | 468 | out: |
474 | write_unlock_irqrestore(&tree->lock, flags); | 469 | spin_unlock_irqrestore(&tree->lock, flags); |
475 | if (prealloc) | 470 | if (prealloc) |
476 | free_extent_state(prealloc); | 471 | free_extent_state(prealloc); |
477 | 472 | ||
@@ -480,7 +475,7 @@ out: | |||
480 | search_again: | 475 | search_again: |
481 | if (start > end) | 476 | if (start > end) |
482 | goto out; | 477 | goto out; |
483 | write_unlock_irqrestore(&tree->lock, flags); | 478 | spin_unlock_irqrestore(&tree->lock, flags); |
484 | if (mask & __GFP_WAIT) | 479 | if (mask & __GFP_WAIT) |
485 | cond_resched(); | 480 | cond_resched(); |
486 | goto again; | 481 | goto again; |
@@ -492,9 +487,9 @@ static int wait_on_state(struct extent_io_tree *tree, | |||
492 | { | 487 | { |
493 | DEFINE_WAIT(wait); | 488 | DEFINE_WAIT(wait); |
494 | prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); | 489 | prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); |
495 | read_unlock_irq(&tree->lock); | 490 | spin_unlock_irq(&tree->lock); |
496 | schedule(); | 491 | schedule(); |
497 | read_lock_irq(&tree->lock); | 492 | spin_lock_irq(&tree->lock); |
498 | finish_wait(&state->wq, &wait); | 493 | finish_wait(&state->wq, &wait); |
499 | return 0; | 494 | return 0; |
500 | } | 495 | } |
@@ -509,7 +504,7 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) | |||
509 | struct extent_state *state; | 504 | struct extent_state *state; |
510 | struct rb_node *node; | 505 | struct rb_node *node; |
511 | 506 | ||
512 | read_lock_irq(&tree->lock); | 507 | spin_lock_irq(&tree->lock); |
513 | again: | 508 | again: |
514 | while (1) { | 509 | while (1) { |
515 | /* | 510 | /* |
@@ -538,13 +533,13 @@ again: | |||
538 | break; | 533 | break; |
539 | 534 | ||
540 | if (need_resched()) { | 535 | if (need_resched()) { |
541 | read_unlock_irq(&tree->lock); | 536 | spin_unlock_irq(&tree->lock); |
542 | cond_resched(); | 537 | cond_resched(); |
543 | read_lock_irq(&tree->lock); | 538 | spin_lock_irq(&tree->lock); |
544 | } | 539 | } |
545 | } | 540 | } |
546 | out: | 541 | out: |
547 | read_unlock_irq(&tree->lock); | 542 | spin_unlock_irq(&tree->lock); |
548 | return 0; | 543 | return 0; |
549 | } | 544 | } |
550 | EXPORT_SYMBOL(wait_extent_bit); | 545 | EXPORT_SYMBOL(wait_extent_bit); |
@@ -589,7 +584,7 @@ again: | |||
589 | return -ENOMEM; | 584 | return -ENOMEM; |
590 | } | 585 | } |
591 | 586 | ||
592 | write_lock_irqsave(&tree->lock, flags); | 587 | spin_lock_irqsave(&tree->lock, flags); |
593 | /* | 588 | /* |
594 | * this search will find all the extents that end after | 589 | * this search will find all the extents that end after |
595 | * our range starts. | 590 | * our range starts. |
@@ -709,7 +704,7 @@ again: | |||
709 | goto search_again; | 704 | goto search_again; |
710 | 705 | ||
711 | out: | 706 | out: |
712 | write_unlock_irqrestore(&tree->lock, flags); | 707 | spin_unlock_irqrestore(&tree->lock, flags); |
713 | if (prealloc) | 708 | if (prealloc) |
714 | free_extent_state(prealloc); | 709 | free_extent_state(prealloc); |
715 | 710 | ||
@@ -718,7 +713,7 @@ out: | |||
718 | search_again: | 713 | search_again: |
719 | if (start > end) | 714 | if (start > end) |
720 | goto out; | 715 | goto out; |
721 | write_unlock_irqrestore(&tree->lock, flags); | 716 | spin_unlock_irqrestore(&tree->lock, flags); |
722 | if (mask & __GFP_WAIT) | 717 | if (mask & __GFP_WAIT) |
723 | cond_resched(); | 718 | cond_resched(); |
724 | goto again; | 719 | goto again; |
@@ -817,10 +812,6 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
817 | } | 812 | } |
818 | EXPORT_SYMBOL(wait_on_extent_writeback); | 813 | EXPORT_SYMBOL(wait_on_extent_writeback); |
819 | 814 | ||
820 | /* | ||
821 | * locks a range in ascending order, waiting for any locked regions | ||
822 | * it hits on the way. [start,end] are inclusive, and this will sleep. | ||
823 | */ | ||
824 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 815 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) |
825 | { | 816 | { |
826 | int err; | 817 | int err; |
@@ -896,7 +887,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | |||
896 | struct extent_state *state; | 887 | struct extent_state *state; |
897 | int ret = 1; | 888 | int ret = 1; |
898 | 889 | ||
899 | read_lock_irq(&tree->lock); | 890 | spin_lock_irq(&tree->lock); |
900 | /* | 891 | /* |
901 | * this search will find all the extents that end after | 892 | * this search will find all the extents that end after |
902 | * our range starts. | 893 | * our range starts. |
@@ -919,7 +910,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | |||
919 | break; | 910 | break; |
920 | } | 911 | } |
921 | out: | 912 | out: |
922 | read_unlock_irq(&tree->lock); | 913 | spin_unlock_irq(&tree->lock); |
923 | return ret; | 914 | return ret; |
924 | } | 915 | } |
925 | EXPORT_SYMBOL(find_first_extent_bit); | 916 | EXPORT_SYMBOL(find_first_extent_bit); |
@@ -933,7 +924,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, | |||
933 | u64 found = 0; | 924 | u64 found = 0; |
934 | u64 total_bytes = 0; | 925 | u64 total_bytes = 0; |
935 | 926 | ||
936 | write_lock_irq(&tree->lock); | 927 | spin_lock_irq(&tree->lock); |
937 | /* | 928 | /* |
938 | * this search will find all the extents that end after | 929 | * this search will find all the extents that end after |
939 | * our range starts. | 930 | * our range starts. |
@@ -976,9 +967,9 @@ search_again: | |||
976 | atomic_inc(&state->refs); | 967 | atomic_inc(&state->refs); |
977 | prepare_to_wait(&state->wq, &wait, | 968 | prepare_to_wait(&state->wq, &wait, |
978 | TASK_UNINTERRUPTIBLE); | 969 | TASK_UNINTERRUPTIBLE); |
979 | write_unlock_irq(&tree->lock); | 970 | spin_unlock_irq(&tree->lock); |
980 | schedule(); | 971 | schedule(); |
981 | write_lock_irq(&tree->lock); | 972 | spin_lock_irq(&tree->lock); |
982 | finish_wait(&state->wq, &wait); | 973 | finish_wait(&state->wq, &wait); |
983 | free_extent_state(state); | 974 | free_extent_state(state); |
984 | goto search_again; | 975 | goto search_again; |
@@ -997,7 +988,7 @@ search_again: | |||
997 | break; | 988 | break; |
998 | } | 989 | } |
999 | out: | 990 | out: |
1000 | write_unlock_irq(&tree->lock); | 991 | spin_unlock_irq(&tree->lock); |
1001 | return found; | 992 | return found; |
1002 | } | 993 | } |
1003 | 994 | ||
@@ -1017,7 +1008,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1017 | return 0; | 1008 | return 0; |
1018 | } | 1009 | } |
1019 | 1010 | ||
1020 | write_lock_irq(&tree->lock); | 1011 | spin_lock_irq(&tree->lock); |
1021 | if (cur_start == 0 && bits == EXTENT_DIRTY) { | 1012 | if (cur_start == 0 && bits == EXTENT_DIRTY) { |
1022 | total_bytes = tree->dirty_bytes; | 1013 | total_bytes = tree->dirty_bytes; |
1023 | goto out; | 1014 | goto out; |
@@ -1050,7 +1041,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1050 | break; | 1041 | break; |
1051 | } | 1042 | } |
1052 | out: | 1043 | out: |
1053 | write_unlock_irq(&tree->lock); | 1044 | spin_unlock_irq(&tree->lock); |
1054 | return total_bytes; | 1045 | return total_bytes; |
1055 | } | 1046 | } |
1056 | /* | 1047 | /* |
@@ -1122,7 +1113,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) | |||
1122 | struct extent_state *state; | 1113 | struct extent_state *state; |
1123 | int ret = 0; | 1114 | int ret = 0; |
1124 | 1115 | ||
1125 | write_lock_irq(&tree->lock); | 1116 | spin_lock_irq(&tree->lock); |
1126 | /* | 1117 | /* |
1127 | * this search will find all the extents that end after | 1118 | * this search will find all the extents that end after |
1128 | * our range starts. | 1119 | * our range starts. |
@@ -1139,7 +1130,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) | |||
1139 | } | 1130 | } |
1140 | state->private = private; | 1131 | state->private = private; |
1141 | out: | 1132 | out: |
1142 | write_unlock_irq(&tree->lock); | 1133 | spin_unlock_irq(&tree->lock); |
1143 | return ret; | 1134 | return ret; |
1144 | } | 1135 | } |
1145 | 1136 | ||
@@ -1149,7 +1140,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) | |||
1149 | struct extent_state *state; | 1140 | struct extent_state *state; |
1150 | int ret = 0; | 1141 | int ret = 0; |
1151 | 1142 | ||
1152 | read_lock_irq(&tree->lock); | 1143 | spin_lock_irq(&tree->lock); |
1153 | /* | 1144 | /* |
1154 | * this search will find all the extents that end after | 1145 | * this search will find all the extents that end after |
1155 | * our range starts. | 1146 | * our range starts. |
@@ -1166,13 +1157,13 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) | |||
1166 | } | 1157 | } |
1167 | *private = state->private; | 1158 | *private = state->private; |
1168 | out: | 1159 | out: |
1169 | read_unlock_irq(&tree->lock); | 1160 | spin_unlock_irq(&tree->lock); |
1170 | return ret; | 1161 | return ret; |
1171 | } | 1162 | } |
1172 | 1163 | ||
1173 | /* | 1164 | /* |
1174 | * searches a range in the state tree for a given mask. | 1165 | * searches a range in the state tree for a given mask. |
1175 | * If 'filled' == 1, this returns 1 only if ever extent in the tree | 1166 | * If 'filled' == 1, this returns 1 only if every extent in the tree |
1176 | * has the bits set. Otherwise, 1 is returned if any bit in the | 1167 | * has the bits set. Otherwise, 1 is returned if any bit in the |
1177 | * range is found set. | 1168 | * range is found set. |
1178 | */ | 1169 | */ |
@@ -1184,7 +1175,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1184 | int bitset = 0; | 1175 | int bitset = 0; |
1185 | unsigned long flags; | 1176 | unsigned long flags; |
1186 | 1177 | ||
1187 | read_lock_irqsave(&tree->lock, flags); | 1178 | spin_lock_irqsave(&tree->lock, flags); |
1188 | node = tree_search(&tree->state, start); | 1179 | node = tree_search(&tree->state, start); |
1189 | while (node && start <= end) { | 1180 | while (node && start <= end) { |
1190 | state = rb_entry(node, struct extent_state, rb_node); | 1181 | state = rb_entry(node, struct extent_state, rb_node); |
@@ -1215,7 +1206,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1215 | break; | 1206 | break; |
1216 | } | 1207 | } |
1217 | } | 1208 | } |
1218 | read_unlock_irqrestore(&tree->lock, flags); | 1209 | spin_unlock_irqrestore(&tree->lock, flags); |
1219 | return bitset; | 1210 | return bitset; |
1220 | } | 1211 | } |
1221 | EXPORT_SYMBOL(test_range_bit); | 1212 | EXPORT_SYMBOL(test_range_bit); |
@@ -1282,16 +1273,19 @@ static int end_bio_extent_writepage(struct bio *bio, | |||
1282 | { | 1273 | { |
1283 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1274 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
1284 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1275 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1285 | struct extent_io_tree *tree = bio->bi_private; | 1276 | struct extent_state *state = bio->bi_private; |
1277 | struct extent_io_tree *tree = state->tree; | ||
1278 | struct rb_node *node; | ||
1286 | u64 start; | 1279 | u64 start; |
1287 | u64 end; | 1280 | u64 end; |
1281 | u64 cur; | ||
1288 | int whole_page; | 1282 | int whole_page; |
1283 | unsigned long flags; | ||
1289 | 1284 | ||
1290 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | 1285 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) |
1291 | if (bio->bi_size) | 1286 | if (bio->bi_size) |
1292 | return 1; | 1287 | return 1; |
1293 | #endif | 1288 | #endif |
1294 | |||
1295 | do { | 1289 | do { |
1296 | struct page *page = bvec->bv_page; | 1290 | struct page *page = bvec->bv_page; |
1297 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1291 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1311,16 +1305,80 @@ static int end_bio_extent_writepage(struct bio *bio, | |||
1311 | ClearPageUptodate(page); | 1305 | ClearPageUptodate(page); |
1312 | SetPageError(page); | 1306 | SetPageError(page); |
1313 | } | 1307 | } |
1314 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | 1308 | |
1309 | if (tree->ops && tree->ops->writepage_end_io_hook) { | ||
1310 | tree->ops->writepage_end_io_hook(page, start, end, | ||
1311 | state); | ||
1312 | } | ||
1313 | |||
1314 | /* | ||
1315 | * bios can get merged in funny ways, and so we need to | ||
1316 | * be careful with the state variable. We know the | ||
1317 | * state won't be merged with others because it has | ||
1318 | * WRITEBACK set, but we can't be sure each biovec is | ||
1319 | * sequential in the file. So, if our cached state | ||
1320 | * doesn't match the expected end, search the tree | ||
1321 | * for the correct one. | ||
1322 | */ | ||
1323 | |||
1324 | spin_lock_irqsave(&tree->lock, flags); | ||
1325 | if (!state || state->end != end) { | ||
1326 | state = NULL; | ||
1327 | node = __tree_search(&tree->state, start, NULL, NULL); | ||
1328 | if (node) { | ||
1329 | state = rb_entry(node, struct extent_state, | ||
1330 | rb_node); | ||
1331 | if (state->end != end || | ||
1332 | !(state->state & EXTENT_WRITEBACK)) | ||
1333 | state = NULL; | ||
1334 | } | ||
1335 | if (!state) { | ||
1336 | spin_unlock_irqrestore(&tree->lock, flags); | ||
1337 | clear_extent_writeback(tree, start, | ||
1338 | end, GFP_ATOMIC); | ||
1339 | goto next_io; | ||
1340 | } | ||
1341 | } | ||
1342 | cur = end; | ||
1343 | while(1) { | ||
1344 | struct extent_state *clear = state; | ||
1345 | cur = state->start; | ||
1346 | node = rb_prev(&state->rb_node); | ||
1347 | if (node) { | ||
1348 | state = rb_entry(node, | ||
1349 | struct extent_state, | ||
1350 | rb_node); | ||
1351 | } else { | ||
1352 | state = NULL; | ||
1353 | } | ||
1354 | |||
1355 | clear_state_bit(tree, clear, EXTENT_WRITEBACK, | ||
1356 | 1, 0); | ||
1357 | if (cur == start) | ||
1358 | break; | ||
1359 | if (cur < start) { | ||
1360 | WARN_ON(1); | ||
1361 | break; | ||
1362 | } | ||
1363 | if (!node) | ||
1364 | break; | ||
1365 | } | ||
1366 | /* before releasing the lock, make sure the next state | ||
1367 | * variable has the expected bits set and corresponds | ||
1368 | * to the correct offsets in the file | ||
1369 | */ | ||
1370 | if (state && (state->end + 1 != start || | ||
1371 | !state->state & EXTENT_WRITEBACK)) { | ||
1372 | state = NULL; | ||
1373 | } | ||
1374 | spin_unlock_irqrestore(&tree->lock, flags); | ||
1375 | next_io: | ||
1315 | 1376 | ||
1316 | if (whole_page) | 1377 | if (whole_page) |
1317 | end_page_writeback(page); | 1378 | end_page_writeback(page); |
1318 | else | 1379 | else |
1319 | check_page_writeback(tree, page); | 1380 | check_page_writeback(tree, page); |
1320 | if (tree->ops && tree->ops->writepage_end_io_hook) | ||
1321 | tree->ops->writepage_end_io_hook(page, start, end); | ||
1322 | } while (bvec >= bio->bi_io_vec); | 1381 | } while (bvec >= bio->bi_io_vec); |
1323 | |||
1324 | bio_put(bio); | 1382 | bio_put(bio); |
1325 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | 1383 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) |
1326 | return 0; | 1384 | return 0; |
@@ -1347,9 +1405,13 @@ static int end_bio_extent_readpage(struct bio *bio, | |||
1347 | { | 1405 | { |
1348 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1406 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
1349 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1407 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1350 | struct extent_io_tree *tree = bio->bi_private; | 1408 | struct extent_state *state = bio->bi_private; |
1409 | struct extent_io_tree *tree = state->tree; | ||
1410 | struct rb_node *node; | ||
1351 | u64 start; | 1411 | u64 start; |
1352 | u64 end; | 1412 | u64 end; |
1413 | u64 cur; | ||
1414 | unsigned long flags; | ||
1353 | int whole_page; | 1415 | int whole_page; |
1354 | int ret; | 1416 | int ret; |
1355 | 1417 | ||
@@ -1373,27 +1435,83 @@ static int end_bio_extent_readpage(struct bio *bio, | |||
1373 | prefetchw(&bvec->bv_page->flags); | 1435 | prefetchw(&bvec->bv_page->flags); |
1374 | 1436 | ||
1375 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 1437 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
1376 | ret = tree->ops->readpage_end_io_hook(page, start, end); | 1438 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
1439 | state); | ||
1377 | if (ret) | 1440 | if (ret) |
1378 | uptodate = 0; | 1441 | uptodate = 0; |
1379 | } | 1442 | } |
1380 | if (uptodate) { | ||
1381 | set_extent_uptodate(tree, start, end, GFP_ATOMIC); | ||
1382 | if (whole_page) | ||
1383 | SetPageUptodate(page); | ||
1384 | else | ||
1385 | check_page_uptodate(tree, page); | ||
1386 | } else { | ||
1387 | ClearPageUptodate(page); | ||
1388 | SetPageError(page); | ||
1389 | } | ||
1390 | 1443 | ||
1391 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1444 | spin_lock_irqsave(&tree->lock, flags); |
1445 | if (!state || state->end != end) { | ||
1446 | state = NULL; | ||
1447 | node = __tree_search(&tree->state, start, NULL, NULL); | ||
1448 | if (node) { | ||
1449 | state = rb_entry(node, struct extent_state, | ||
1450 | rb_node); | ||
1451 | if (state->end != end || | ||
1452 | !(state->state & EXTENT_LOCKED)) | ||
1453 | state = NULL; | ||
1454 | } | ||
1455 | if (!state) { | ||
1456 | spin_unlock_irqrestore(&tree->lock, flags); | ||
1457 | set_extent_uptodate(tree, start, end, | ||
1458 | GFP_ATOMIC); | ||
1459 | unlock_extent(tree, start, end, GFP_ATOMIC); | ||
1460 | goto next_io; | ||
1461 | } | ||
1462 | } | ||
1392 | 1463 | ||
1393 | if (whole_page) | 1464 | cur = end; |
1465 | while(1) { | ||
1466 | struct extent_state *clear = state; | ||
1467 | cur = state->start; | ||
1468 | node = rb_prev(&state->rb_node); | ||
1469 | if (node) { | ||
1470 | state = rb_entry(node, | ||
1471 | struct extent_state, | ||
1472 | rb_node); | ||
1473 | } else { | ||
1474 | state = NULL; | ||
1475 | } | ||
1476 | clear->state |= EXTENT_UPTODATE; | ||
1477 | clear_state_bit(tree, clear, EXTENT_LOCKED, | ||
1478 | 1, 0); | ||
1479 | if (cur == start) | ||
1480 | break; | ||
1481 | if (cur < start) { | ||
1482 | WARN_ON(1); | ||
1483 | break; | ||
1484 | } | ||
1485 | if (!node) | ||
1486 | break; | ||
1487 | } | ||
1488 | /* before releasing the lock, make sure the next state | ||
1489 | * variable has the expected bits set and corresponds | ||
1490 | * to the correct offsets in the file | ||
1491 | */ | ||
1492 | if (state && (state->end + 1 != start || | ||
1493 | !state->state & EXTENT_WRITEBACK)) { | ||
1494 | state = NULL; | ||
1495 | } | ||
1496 | spin_unlock_irqrestore(&tree->lock, flags); | ||
1497 | next_io: | ||
1498 | if (whole_page) { | ||
1499 | if (uptodate) { | ||
1500 | SetPageUptodate(page); | ||
1501 | } else { | ||
1502 | ClearPageUptodate(page); | ||
1503 | SetPageError(page); | ||
1504 | } | ||
1394 | unlock_page(page); | 1505 | unlock_page(page); |
1395 | else | 1506 | } else { |
1507 | if (uptodate) { | ||
1508 | check_page_uptodate(tree, page); | ||
1509 | } else { | ||
1510 | ClearPageUptodate(page); | ||
1511 | SetPageError(page); | ||
1512 | } | ||
1396 | check_page_locked(tree, page); | 1513 | check_page_locked(tree, page); |
1514 | } | ||
1397 | } while (bvec >= bio->bi_io_vec); | 1515 | } while (bvec >= bio->bi_io_vec); |
1398 | 1516 | ||
1399 | bio_put(bio); | 1517 | bio_put(bio); |
@@ -1416,7 +1534,8 @@ static int end_bio_extent_preparewrite(struct bio *bio, | |||
1416 | { | 1534 | { |
1417 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1535 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
1418 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1536 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1419 | struct extent_io_tree *tree = bio->bi_private; | 1537 | struct extent_state *state = bio->bi_private; |
1538 | struct extent_io_tree *tree = state->tree; | ||
1420 | u64 start; | 1539 | u64 start; |
1421 | u64 end; | 1540 | u64 end; |
1422 | 1541 | ||
@@ -1475,6 +1594,29 @@ static int submit_one_bio(int rw, struct bio *bio) | |||
1475 | { | 1594 | { |
1476 | u64 maxsector; | 1595 | u64 maxsector; |
1477 | int ret = 0; | 1596 | int ret = 0; |
1597 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1598 | struct page *page = bvec->bv_page; | ||
1599 | struct extent_io_tree *tree = bio->bi_private; | ||
1600 | struct rb_node *node; | ||
1601 | struct extent_state *state; | ||
1602 | u64 start; | ||
1603 | u64 end; | ||
1604 | |||
1605 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | ||
1606 | end = start + bvec->bv_len - 1; | ||
1607 | |||
1608 | spin_lock_irq(&tree->lock); | ||
1609 | node = __tree_search(&tree->state, start, NULL, NULL); | ||
1610 | BUG_ON(!node); | ||
1611 | state = rb_entry(node, struct extent_state, rb_node); | ||
1612 | while(state->end < end) { | ||
1613 | node = rb_next(node); | ||
1614 | state = rb_entry(node, struct extent_state, rb_node); | ||
1615 | } | ||
1616 | BUG_ON(state->end != end); | ||
1617 | spin_unlock_irq(&tree->lock); | ||
1618 | |||
1619 | bio->bi_private = state; | ||
1478 | 1620 | ||
1479 | bio_get(bio); | 1621 | bio_get(bio); |
1480 | 1622 | ||
@@ -1519,9 +1661,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1519 | if (!bio) { | 1661 | if (!bio) { |
1520 | printk("failed to allocate bio nr %d\n", nr); | 1662 | printk("failed to allocate bio nr %d\n", nr); |
1521 | } | 1663 | } |
1664 | |||
1665 | |||
1522 | bio_add_page(bio, page, size, offset); | 1666 | bio_add_page(bio, page, size, offset); |
1523 | bio->bi_end_io = end_io_func; | 1667 | bio->bi_end_io = end_io_func; |
1524 | bio->bi_private = tree; | 1668 | bio->bi_private = tree; |
1669 | |||
1525 | if (bio_ret) { | 1670 | if (bio_ret) { |
1526 | *bio_ret = bio; | 1671 | *bio_ret = bio; |
1527 | } else { | 1672 | } else { |
@@ -1635,6 +1780,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
1635 | page_offset += iosize; | 1780 | page_offset += iosize; |
1636 | continue; | 1781 | continue; |
1637 | } | 1782 | } |
1783 | /* we have an inline extent but it didn't get marked up | ||
1784 | * to date. Error out | ||
1785 | */ | ||
1786 | if (block_start == EXTENT_MAP_INLINE) { | ||
1787 | SetPageError(page); | ||
1788 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
1789 | cur = cur + iosize; | ||
1790 | page_offset += iosize; | ||
1791 | continue; | ||
1792 | } | ||
1638 | 1793 | ||
1639 | ret = 0; | 1794 | ret = 0; |
1640 | if (tree->ops && tree->ops->readpage_io_hook) { | 1795 | if (tree->ops && tree->ops->readpage_io_hook) { |
@@ -2205,7 +2360,8 @@ EXPORT_SYMBOL(extent_prepare_write); | |||
2205 | * map records are removed | 2360 | * map records are removed |
2206 | */ | 2361 | */ |
2207 | int try_release_extent_mapping(struct extent_map_tree *map, | 2362 | int try_release_extent_mapping(struct extent_map_tree *map, |
2208 | struct extent_io_tree *tree, struct page *page) | 2363 | struct extent_io_tree *tree, struct page *page, |
2364 | gfp_t mask) | ||
2209 | { | 2365 | { |
2210 | struct extent_map *em; | 2366 | struct extent_map *em; |
2211 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 2367 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
@@ -2213,30 +2369,42 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2213 | u64 orig_start = start; | 2369 | u64 orig_start = start; |
2214 | int ret = 1; | 2370 | int ret = 1; |
2215 | 2371 | ||
2216 | while (start <= end) { | 2372 | if ((mask & __GFP_WAIT) && |
2217 | spin_lock(&map->lock); | 2373 | page->mapping->host->i_size > 16 * 1024 * 1024) { |
2218 | em = lookup_extent_mapping(map, start, end); | 2374 | while (start <= end) { |
2219 | if (!em || IS_ERR(em)) { | 2375 | spin_lock(&map->lock); |
2376 | em = lookup_extent_mapping(map, start, end); | ||
2377 | if (!em || IS_ERR(em)) { | ||
2378 | spin_unlock(&map->lock); | ||
2379 | break; | ||
2380 | } | ||
2381 | if (em->start != start) { | ||
2382 | spin_unlock(&map->lock); | ||
2383 | free_extent_map(em); | ||
2384 | break; | ||
2385 | } | ||
2386 | if (!test_range_bit(tree, em->start, | ||
2387 | extent_map_end(em) - 1, | ||
2388 | EXTENT_LOCKED, 0)) { | ||
2389 | remove_extent_mapping(map, em); | ||
2390 | /* once for the rb tree */ | ||
2391 | free_extent_map(em); | ||
2392 | } | ||
2393 | start = extent_map_end(em); | ||
2220 | spin_unlock(&map->lock); | 2394 | spin_unlock(&map->lock); |
2221 | break; | 2395 | |
2222 | } | 2396 | /* once for us */ |
2223 | if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, | ||
2224 | EXTENT_LOCKED, 0)) { | ||
2225 | remove_extent_mapping(map, em); | ||
2226 | /* once for the rb tree */ | ||
2227 | free_extent_map(em); | 2397 | free_extent_map(em); |
2228 | } | 2398 | } |
2229 | start = extent_map_end(em); | ||
2230 | spin_unlock(&map->lock); | ||
2231 | |||
2232 | /* once for us */ | ||
2233 | free_extent_map(em); | ||
2234 | } | 2399 | } |
2235 | if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) | 2400 | if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0)) |
2236 | ret = 0; | 2401 | ret = 0; |
2237 | else | 2402 | else { |
2403 | if ((mask & GFP_NOFS) == GFP_NOFS) | ||
2404 | mask = GFP_NOFS; | ||
2238 | clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, | 2405 | clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, |
2239 | 1, 1, GFP_NOFS); | 2406 | 1, 1, mask); |
2407 | } | ||
2240 | return ret; | 2408 | return ret; |
2241 | } | 2409 | } |
2242 | EXPORT_SYMBOL(try_release_extent_mapping); | 2410 | EXPORT_SYMBOL(try_release_extent_mapping); |
@@ -2553,13 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
2553 | } | 2721 | } |
2554 | } | 2722 | } |
2555 | clear_page_dirty_for_io(page); | 2723 | clear_page_dirty_for_io(page); |
2556 | write_lock_irq(&page->mapping->tree_lock); | 2724 | read_lock_irq(&page->mapping->tree_lock); |
2557 | if (!PageDirty(page)) { | 2725 | if (!PageDirty(page)) { |
2558 | radix_tree_tag_clear(&page->mapping->page_tree, | 2726 | radix_tree_tag_clear(&page->mapping->page_tree, |
2559 | page_index(page), | 2727 | page_index(page), |
2560 | PAGECACHE_TAG_DIRTY); | 2728 | PAGECACHE_TAG_DIRTY); |
2561 | } | 2729 | } |
2562 | write_unlock_irq(&page->mapping->tree_lock); | 2730 | read_unlock_irq(&page->mapping->tree_lock); |
2563 | unlock_page(page); | 2731 | unlock_page(page); |
2564 | } | 2732 | } |
2565 | return 0; | 2733 | return 0; |