aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-01-29 09:59:12 -0500
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:03:59 -0400
commit70dec8079d78691e476cc6c7cede40656078ad30 (patch)
tree19ab80f247fc026c3f5a5713381e98372737227b /fs/btrfs/extent_io.c
parentaadfeb6e39ad6bde080cb3ab23f4da57ccb25f4a (diff)
Btrfs: extent_io and extent_state optimizations
The end_bio routines are changed to take a pointer to the extent state struct, and the state tree is walked in order to set/clear appropriate bits as IO completes. This greatly reduces the number of rbtree searches done by the end_bio handlers, and reduces lock contention. The extent_io releasepage function is changed to avoid expensive searches for locked state. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c358
1 files changed, 263 insertions, 95 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 15cc158a0498..1f734c34dc24 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -27,13 +27,11 @@ static struct kmem_cache *extent_buffer_cache;
27static LIST_HEAD(buffers); 27static LIST_HEAD(buffers);
28static LIST_HEAD(states); 28static LIST_HEAD(states);
29 29
30static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
31#define BUFFER_LRU_MAX 64 30#define BUFFER_LRU_MAX 64
32 31
33struct tree_entry { 32struct tree_entry {
34 u64 start; 33 u64 start;
35 u64 end; 34 u64 end;
36 int in_tree;
37 struct rb_node rb_node; 35 struct rb_node rb_node;
38}; 36};
39 37
@@ -69,7 +67,7 @@ void extent_io_exit(void)
69 67
70 while (!list_empty(&states)) { 68 while (!list_empty(&states)) {
71 state = list_entry(states.next, struct extent_state, list); 69 state = list_entry(states.next, struct extent_state, list);
72 printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); 70 printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
73 list_del(&state->list); 71 list_del(&state->list);
74 kmem_cache_free(extent_state_cache, state); 72 kmem_cache_free(extent_state_cache, state);
75 73
@@ -87,7 +85,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
87 tree->state.rb_node = NULL; 85 tree->state.rb_node = NULL;
88 tree->ops = NULL; 86 tree->ops = NULL;
89 tree->dirty_bytes = 0; 87 tree->dirty_bytes = 0;
90 rwlock_init(&tree->lock); 88 spin_lock_init(&tree->lock);
91 spin_lock_init(&tree->lru_lock); 89 spin_lock_init(&tree->lru_lock);
92 tree->mapping = mapping; 90 tree->mapping = mapping;
93 INIT_LIST_HEAD(&tree->buffer_lru); 91 INIT_LIST_HEAD(&tree->buffer_lru);
@@ -110,18 +108,13 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru);
110struct extent_state *alloc_extent_state(gfp_t mask) 108struct extent_state *alloc_extent_state(gfp_t mask)
111{ 109{
112 struct extent_state *state; 110 struct extent_state *state;
113 unsigned long flags;
114 111
115 state = kmem_cache_alloc(extent_state_cache, mask); 112 state = kmem_cache_alloc(extent_state_cache, mask);
116 if (!state || IS_ERR(state)) 113 if (!state || IS_ERR(state))
117 return state; 114 return state;
118 state->state = 0; 115 state->state = 0;
119 state->in_tree = 0;
120 state->private = 0; 116 state->private = 0;
121 117 state->tree = NULL;
122 spin_lock_irqsave(&state_lock, flags);
123 list_add(&state->list, &states);
124 spin_unlock_irqrestore(&state_lock, flags);
125 118
126 atomic_set(&state->refs, 1); 119 atomic_set(&state->refs, 1);
127 init_waitqueue_head(&state->wq); 120 init_waitqueue_head(&state->wq);
@@ -131,14 +124,10 @@ EXPORT_SYMBOL(alloc_extent_state);
131 124
132void free_extent_state(struct extent_state *state) 125void free_extent_state(struct extent_state *state)
133{ 126{
134 unsigned long flags;
135 if (!state) 127 if (!state)
136 return; 128 return;
137 if (atomic_dec_and_test(&state->refs)) { 129 if (atomic_dec_and_test(&state->refs)) {
138 WARN_ON(state->in_tree); 130 WARN_ON(state->tree);
139 spin_lock_irqsave(&state_lock, flags);
140 list_del(&state->list);
141 spin_unlock_irqrestore(&state_lock, flags);
142 kmem_cache_free(extent_state_cache, state); 131 kmem_cache_free(extent_state_cache, state);
143 } 132 }
144} 133}
@@ -164,7 +153,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
164 } 153 }
165 154
166 entry = rb_entry(node, struct tree_entry, rb_node); 155 entry = rb_entry(node, struct tree_entry, rb_node);
167 entry->in_tree = 1;
168 rb_link_node(node, parent, p); 156 rb_link_node(node, parent, p);
169 rb_insert_color(node, root); 157 rb_insert_color(node, root);
170 return NULL; 158 return NULL;
@@ -216,8 +204,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
216 204
217static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 205static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
218{ 206{
219 struct rb_node *prev; 207 struct rb_node *prev = NULL;
220 struct rb_node *ret; 208 struct rb_node *ret;
209
221 ret = __tree_search(root, offset, &prev, NULL); 210 ret = __tree_search(root, offset, &prev, NULL);
222 if (!ret) 211 if (!ret)
223 return prev; 212 return prev;
@@ -248,7 +237,7 @@ static int merge_state(struct extent_io_tree *tree,
248 if (other->end == state->start - 1 && 237 if (other->end == state->start - 1 &&
249 other->state == state->state) { 238 other->state == state->state) {
250 state->start = other->start; 239 state->start = other->start;
251 other->in_tree = 0; 240 other->tree = NULL;
252 rb_erase(&other->rb_node, &tree->state); 241 rb_erase(&other->rb_node, &tree->state);
253 free_extent_state(other); 242 free_extent_state(other);
254 } 243 }
@@ -259,7 +248,7 @@ static int merge_state(struct extent_io_tree *tree,
259 if (other->start == state->end + 1 && 248 if (other->start == state->end + 1 &&
260 other->state == state->state) { 249 other->state == state->state) {
261 other->start = state->start; 250 other->start = state->start;
262 state->in_tree = 0; 251 state->tree = NULL;
263 rb_erase(&state->rb_node, &tree->state); 252 rb_erase(&state->rb_node, &tree->state);
264 free_extent_state(state); 253 free_extent_state(state);
265 } 254 }
@@ -300,6 +289,7 @@ static int insert_state(struct extent_io_tree *tree,
300 free_extent_state(state); 289 free_extent_state(state);
301 return -EEXIST; 290 return -EEXIST;
302 } 291 }
292 state->tree = tree;
303 merge_state(tree, state); 293 merge_state(tree, state);
304 return 0; 294 return 0;
305} 295}
@@ -335,6 +325,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
335 free_extent_state(prealloc); 325 free_extent_state(prealloc);
336 return -EEXIST; 326 return -EEXIST;
337 } 327 }
328 prealloc->tree = tree;
338 return 0; 329 return 0;
339} 330}
340 331
@@ -361,9 +352,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
361 if (wake) 352 if (wake)
362 wake_up(&state->wq); 353 wake_up(&state->wq);
363 if (delete || state->state == 0) { 354 if (delete || state->state == 0) {
364 if (state->in_tree) { 355 if (state->tree) {
365 rb_erase(&state->rb_node, &tree->state); 356 rb_erase(&state->rb_node, &tree->state);
366 state->in_tree = 0; 357 state->tree = NULL;
367 free_extent_state(state); 358 free_extent_state(state);
368 } else { 359 } else {
369 WARN_ON(1); 360 WARN_ON(1);
@@ -404,7 +395,7 @@ again:
404 return -ENOMEM; 395 return -ENOMEM;
405 } 396 }
406 397
407 write_lock_irqsave(&tree->lock, flags); 398 spin_lock_irqsave(&tree->lock, flags);
408 /* 399 /*
409 * this search will find the extents that end after 400 * this search will find the extents that end after
410 * our range starts 401 * our range starts
@@ -434,6 +425,8 @@ again:
434 */ 425 */
435 426
436 if (state->start < start) { 427 if (state->start < start) {
428 if (!prealloc)
429 prealloc = alloc_extent_state(GFP_ATOMIC);
437 err = split_state(tree, state, prealloc, start); 430 err = split_state(tree, state, prealloc, start);
438 BUG_ON(err == -EEXIST); 431 BUG_ON(err == -EEXIST);
439 prealloc = NULL; 432 prealloc = NULL;
@@ -455,6 +448,8 @@ again:
455 * on the first half 448 * on the first half
456 */ 449 */
457 if (state->start <= end && state->end > end) { 450 if (state->start <= end && state->end > end) {
451 if (!prealloc)
452 prealloc = alloc_extent_state(GFP_ATOMIC);
458 err = split_state(tree, state, prealloc, end + 1); 453 err = split_state(tree, state, prealloc, end + 1);
459 BUG_ON(err == -EEXIST); 454 BUG_ON(err == -EEXIST);
460 455
@@ -471,7 +466,7 @@ again:
471 goto search_again; 466 goto search_again;
472 467
473out: 468out:
474 write_unlock_irqrestore(&tree->lock, flags); 469 spin_unlock_irqrestore(&tree->lock, flags);
475 if (prealloc) 470 if (prealloc)
476 free_extent_state(prealloc); 471 free_extent_state(prealloc);
477 472
@@ -480,7 +475,7 @@ out:
480search_again: 475search_again:
481 if (start > end) 476 if (start > end)
482 goto out; 477 goto out;
483 write_unlock_irqrestore(&tree->lock, flags); 478 spin_unlock_irqrestore(&tree->lock, flags);
484 if (mask & __GFP_WAIT) 479 if (mask & __GFP_WAIT)
485 cond_resched(); 480 cond_resched();
486 goto again; 481 goto again;
@@ -492,9 +487,9 @@ static int wait_on_state(struct extent_io_tree *tree,
492{ 487{
493 DEFINE_WAIT(wait); 488 DEFINE_WAIT(wait);
494 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 489 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
495 read_unlock_irq(&tree->lock); 490 spin_unlock_irq(&tree->lock);
496 schedule(); 491 schedule();
497 read_lock_irq(&tree->lock); 492 spin_lock_irq(&tree->lock);
498 finish_wait(&state->wq, &wait); 493 finish_wait(&state->wq, &wait);
499 return 0; 494 return 0;
500} 495}
@@ -509,7 +504,7 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
509 struct extent_state *state; 504 struct extent_state *state;
510 struct rb_node *node; 505 struct rb_node *node;
511 506
512 read_lock_irq(&tree->lock); 507 spin_lock_irq(&tree->lock);
513again: 508again:
514 while (1) { 509 while (1) {
515 /* 510 /*
@@ -538,13 +533,13 @@ again:
538 break; 533 break;
539 534
540 if (need_resched()) { 535 if (need_resched()) {
541 read_unlock_irq(&tree->lock); 536 spin_unlock_irq(&tree->lock);
542 cond_resched(); 537 cond_resched();
543 read_lock_irq(&tree->lock); 538 spin_lock_irq(&tree->lock);
544 } 539 }
545 } 540 }
546out: 541out:
547 read_unlock_irq(&tree->lock); 542 spin_unlock_irq(&tree->lock);
548 return 0; 543 return 0;
549} 544}
550EXPORT_SYMBOL(wait_extent_bit); 545EXPORT_SYMBOL(wait_extent_bit);
@@ -589,7 +584,7 @@ again:
589 return -ENOMEM; 584 return -ENOMEM;
590 } 585 }
591 586
592 write_lock_irqsave(&tree->lock, flags); 587 spin_lock_irqsave(&tree->lock, flags);
593 /* 588 /*
594 * this search will find all the extents that end after 589 * this search will find all the extents that end after
595 * our range starts. 590 * our range starts.
@@ -709,7 +704,7 @@ again:
709 goto search_again; 704 goto search_again;
710 705
711out: 706out:
712 write_unlock_irqrestore(&tree->lock, flags); 707 spin_unlock_irqrestore(&tree->lock, flags);
713 if (prealloc) 708 if (prealloc)
714 free_extent_state(prealloc); 709 free_extent_state(prealloc);
715 710
@@ -718,7 +713,7 @@ out:
718search_again: 713search_again:
719 if (start > end) 714 if (start > end)
720 goto out; 715 goto out;
721 write_unlock_irqrestore(&tree->lock, flags); 716 spin_unlock_irqrestore(&tree->lock, flags);
722 if (mask & __GFP_WAIT) 717 if (mask & __GFP_WAIT)
723 cond_resched(); 718 cond_resched();
724 goto again; 719 goto again;
@@ -817,10 +812,6 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
817} 812}
818EXPORT_SYMBOL(wait_on_extent_writeback); 813EXPORT_SYMBOL(wait_on_extent_writeback);
819 814
820/*
821 * locks a range in ascending order, waiting for any locked regions
822 * it hits on the way. [start,end] are inclusive, and this will sleep.
823 */
824int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 815int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
825{ 816{
826 int err; 817 int err;
@@ -896,7 +887,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
896 struct extent_state *state; 887 struct extent_state *state;
897 int ret = 1; 888 int ret = 1;
898 889
899 read_lock_irq(&tree->lock); 890 spin_lock_irq(&tree->lock);
900 /* 891 /*
901 * this search will find all the extents that end after 892 * this search will find all the extents that end after
902 * our range starts. 893 * our range starts.
@@ -919,7 +910,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
919 break; 910 break;
920 } 911 }
921out: 912out:
922 read_unlock_irq(&tree->lock); 913 spin_unlock_irq(&tree->lock);
923 return ret; 914 return ret;
924} 915}
925EXPORT_SYMBOL(find_first_extent_bit); 916EXPORT_SYMBOL(find_first_extent_bit);
@@ -933,7 +924,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
933 u64 found = 0; 924 u64 found = 0;
934 u64 total_bytes = 0; 925 u64 total_bytes = 0;
935 926
936 write_lock_irq(&tree->lock); 927 spin_lock_irq(&tree->lock);
937 /* 928 /*
938 * this search will find all the extents that end after 929 * this search will find all the extents that end after
939 * our range starts. 930 * our range starts.
@@ -976,9 +967,9 @@ search_again:
976 atomic_inc(&state->refs); 967 atomic_inc(&state->refs);
977 prepare_to_wait(&state->wq, &wait, 968 prepare_to_wait(&state->wq, &wait,
978 TASK_UNINTERRUPTIBLE); 969 TASK_UNINTERRUPTIBLE);
979 write_unlock_irq(&tree->lock); 970 spin_unlock_irq(&tree->lock);
980 schedule(); 971 schedule();
981 write_lock_irq(&tree->lock); 972 spin_lock_irq(&tree->lock);
982 finish_wait(&state->wq, &wait); 973 finish_wait(&state->wq, &wait);
983 free_extent_state(state); 974 free_extent_state(state);
984 goto search_again; 975 goto search_again;
@@ -997,7 +988,7 @@ search_again:
997 break; 988 break;
998 } 989 }
999out: 990out:
1000 write_unlock_irq(&tree->lock); 991 spin_unlock_irq(&tree->lock);
1001 return found; 992 return found;
1002} 993}
1003 994
@@ -1017,7 +1008,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
1017 return 0; 1008 return 0;
1018 } 1009 }
1019 1010
1020 write_lock_irq(&tree->lock); 1011 spin_lock_irq(&tree->lock);
1021 if (cur_start == 0 && bits == EXTENT_DIRTY) { 1012 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1022 total_bytes = tree->dirty_bytes; 1013 total_bytes = tree->dirty_bytes;
1023 goto out; 1014 goto out;
@@ -1050,7 +1041,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
1050 break; 1041 break;
1051 } 1042 }
1052out: 1043out:
1053 write_unlock_irq(&tree->lock); 1044 spin_unlock_irq(&tree->lock);
1054 return total_bytes; 1045 return total_bytes;
1055} 1046}
1056/* 1047/*
@@ -1122,7 +1113,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1122 struct extent_state *state; 1113 struct extent_state *state;
1123 int ret = 0; 1114 int ret = 0;
1124 1115
1125 write_lock_irq(&tree->lock); 1116 spin_lock_irq(&tree->lock);
1126 /* 1117 /*
1127 * this search will find all the extents that end after 1118 * this search will find all the extents that end after
1128 * our range starts. 1119 * our range starts.
@@ -1139,7 +1130,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1139 } 1130 }
1140 state->private = private; 1131 state->private = private;
1141out: 1132out:
1142 write_unlock_irq(&tree->lock); 1133 spin_unlock_irq(&tree->lock);
1143 return ret; 1134 return ret;
1144} 1135}
1145 1136
@@ -1149,7 +1140,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1149 struct extent_state *state; 1140 struct extent_state *state;
1150 int ret = 0; 1141 int ret = 0;
1151 1142
1152 read_lock_irq(&tree->lock); 1143 spin_lock_irq(&tree->lock);
1153 /* 1144 /*
1154 * this search will find all the extents that end after 1145 * this search will find all the extents that end after
1155 * our range starts. 1146 * our range starts.
@@ -1166,13 +1157,13 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1166 } 1157 }
1167 *private = state->private; 1158 *private = state->private;
1168out: 1159out:
1169 read_unlock_irq(&tree->lock); 1160 spin_unlock_irq(&tree->lock);
1170 return ret; 1161 return ret;
1171} 1162}
1172 1163
1173/* 1164/*
1174 * searches a range in the state tree for a given mask. 1165 * searches a range in the state tree for a given mask.
1175 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1166 * If 'filled' == 1, this returns 1 only if every extent in the tree
1176 * has the bits set. Otherwise, 1 is returned if any bit in the 1167 * has the bits set. Otherwise, 1 is returned if any bit in the
1177 * range is found set. 1168 * range is found set.
1178 */ 1169 */
@@ -1184,7 +1175,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1184 int bitset = 0; 1175 int bitset = 0;
1185 unsigned long flags; 1176 unsigned long flags;
1186 1177
1187 read_lock_irqsave(&tree->lock, flags); 1178 spin_lock_irqsave(&tree->lock, flags);
1188 node = tree_search(&tree->state, start); 1179 node = tree_search(&tree->state, start);
1189 while (node && start <= end) { 1180 while (node && start <= end) {
1190 state = rb_entry(node, struct extent_state, rb_node); 1181 state = rb_entry(node, struct extent_state, rb_node);
@@ -1215,7 +1206,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1215 break; 1206 break;
1216 } 1207 }
1217 } 1208 }
1218 read_unlock_irqrestore(&tree->lock, flags); 1209 spin_unlock_irqrestore(&tree->lock, flags);
1219 return bitset; 1210 return bitset;
1220} 1211}
1221EXPORT_SYMBOL(test_range_bit); 1212EXPORT_SYMBOL(test_range_bit);
@@ -1282,16 +1273,19 @@ static int end_bio_extent_writepage(struct bio *bio,
1282{ 1273{
1283 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1274 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1284 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1275 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1285 struct extent_io_tree *tree = bio->bi_private; 1276 struct extent_state *state = bio->bi_private;
1277 struct extent_io_tree *tree = state->tree;
1278 struct rb_node *node;
1286 u64 start; 1279 u64 start;
1287 u64 end; 1280 u64 end;
1281 u64 cur;
1288 int whole_page; 1282 int whole_page;
1283 unsigned long flags;
1289 1284
1290#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1285#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1291 if (bio->bi_size) 1286 if (bio->bi_size)
1292 return 1; 1287 return 1;
1293#endif 1288#endif
1294
1295 do { 1289 do {
1296 struct page *page = bvec->bv_page; 1290 struct page *page = bvec->bv_page;
1297 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1291 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1311,16 +1305,80 @@ static int end_bio_extent_writepage(struct bio *bio,
1311 ClearPageUptodate(page); 1305 ClearPageUptodate(page);
1312 SetPageError(page); 1306 SetPageError(page);
1313 } 1307 }
1314 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1308
1309 if (tree->ops && tree->ops->writepage_end_io_hook) {
1310 tree->ops->writepage_end_io_hook(page, start, end,
1311 state);
1312 }
1313
1314 /*
1315 * bios can get merged in funny ways, and so we need to
1316 * be careful with the state variable. We know the
1317 * state won't be merged with others because it has
1318 * WRITEBACK set, but we can't be sure each biovec is
1319 * sequential in the file. So, if our cached state
1320 * doesn't match the expected end, search the tree
1321 * for the correct one.
1322 */
1323
1324 spin_lock_irqsave(&tree->lock, flags);
1325 if (!state || state->end != end) {
1326 state = NULL;
1327 node = __tree_search(&tree->state, start, NULL, NULL);
1328 if (node) {
1329 state = rb_entry(node, struct extent_state,
1330 rb_node);
1331 if (state->end != end ||
1332 !(state->state & EXTENT_WRITEBACK))
1333 state = NULL;
1334 }
1335 if (!state) {
1336 spin_unlock_irqrestore(&tree->lock, flags);
1337 clear_extent_writeback(tree, start,
1338 end, GFP_ATOMIC);
1339 goto next_io;
1340 }
1341 }
1342 cur = end;
1343 while(1) {
1344 struct extent_state *clear = state;
1345 cur = state->start;
1346 node = rb_prev(&state->rb_node);
1347 if (node) {
1348 state = rb_entry(node,
1349 struct extent_state,
1350 rb_node);
1351 } else {
1352 state = NULL;
1353 }
1354
1355 clear_state_bit(tree, clear, EXTENT_WRITEBACK,
1356 1, 0);
1357 if (cur == start)
1358 break;
1359 if (cur < start) {
1360 WARN_ON(1);
1361 break;
1362 }
1363 if (!node)
1364 break;
1365 }
1366 /* before releasing the lock, make sure the next state
1367 * variable has the expected bits set and corresponds
1368 * to the correct offsets in the file
1369 */
1370 if (state && (state->end + 1 != start ||
1371 !state->state & EXTENT_WRITEBACK)) {
1372 state = NULL;
1373 }
1374 spin_unlock_irqrestore(&tree->lock, flags);
1375next_io:
1315 1376
1316 if (whole_page) 1377 if (whole_page)
1317 end_page_writeback(page); 1378 end_page_writeback(page);
1318 else 1379 else
1319 check_page_writeback(tree, page); 1380 check_page_writeback(tree, page);
1320 if (tree->ops && tree->ops->writepage_end_io_hook)
1321 tree->ops->writepage_end_io_hook(page, start, end);
1322 } while (bvec >= bio->bi_io_vec); 1381 } while (bvec >= bio->bi_io_vec);
1323
1324 bio_put(bio); 1382 bio_put(bio);
1325#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1383#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1326 return 0; 1384 return 0;
@@ -1347,9 +1405,13 @@ static int end_bio_extent_readpage(struct bio *bio,
1347{ 1405{
1348 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1406 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1349 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1407 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1350 struct extent_io_tree *tree = bio->bi_private; 1408 struct extent_state *state = bio->bi_private;
1409 struct extent_io_tree *tree = state->tree;
1410 struct rb_node *node;
1351 u64 start; 1411 u64 start;
1352 u64 end; 1412 u64 end;
1413 u64 cur;
1414 unsigned long flags;
1353 int whole_page; 1415 int whole_page;
1354 int ret; 1416 int ret;
1355 1417
@@ -1373,27 +1435,83 @@ static int end_bio_extent_readpage(struct bio *bio,
1373 prefetchw(&bvec->bv_page->flags); 1435 prefetchw(&bvec->bv_page->flags);
1374 1436
1375 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1437 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1376 ret = tree->ops->readpage_end_io_hook(page, start, end); 1438 ret = tree->ops->readpage_end_io_hook(page, start, end,
1439 state);
1377 if (ret) 1440 if (ret)
1378 uptodate = 0; 1441 uptodate = 0;
1379 } 1442 }
1380 if (uptodate) {
1381 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1382 if (whole_page)
1383 SetPageUptodate(page);
1384 else
1385 check_page_uptodate(tree, page);
1386 } else {
1387 ClearPageUptodate(page);
1388 SetPageError(page);
1389 }
1390 1443
1391 unlock_extent(tree, start, end, GFP_ATOMIC); 1444 spin_lock_irqsave(&tree->lock, flags);
1445 if (!state || state->end != end) {
1446 state = NULL;
1447 node = __tree_search(&tree->state, start, NULL, NULL);
1448 if (node) {
1449 state = rb_entry(node, struct extent_state,
1450 rb_node);
1451 if (state->end != end ||
1452 !(state->state & EXTENT_LOCKED))
1453 state = NULL;
1454 }
1455 if (!state) {
1456 spin_unlock_irqrestore(&tree->lock, flags);
1457 set_extent_uptodate(tree, start, end,
1458 GFP_ATOMIC);
1459 unlock_extent(tree, start, end, GFP_ATOMIC);
1460 goto next_io;
1461 }
1462 }
1392 1463
1393 if (whole_page) 1464 cur = end;
1465 while(1) {
1466 struct extent_state *clear = state;
1467 cur = state->start;
1468 node = rb_prev(&state->rb_node);
1469 if (node) {
1470 state = rb_entry(node,
1471 struct extent_state,
1472 rb_node);
1473 } else {
1474 state = NULL;
1475 }
1476 clear->state |= EXTENT_UPTODATE;
1477 clear_state_bit(tree, clear, EXTENT_LOCKED,
1478 1, 0);
1479 if (cur == start)
1480 break;
1481 if (cur < start) {
1482 WARN_ON(1);
1483 break;
1484 }
1485 if (!node)
1486 break;
1487 }
1488 /* before releasing the lock, make sure the next state
1489 * variable has the expected bits set and corresponds
1490 * to the correct offsets in the file
1491 */
1492 if (state && (state->end + 1 != start ||
1493 !state->state & EXTENT_WRITEBACK)) {
1494 state = NULL;
1495 }
1496 spin_unlock_irqrestore(&tree->lock, flags);
1497next_io:
1498 if (whole_page) {
1499 if (uptodate) {
1500 SetPageUptodate(page);
1501 } else {
1502 ClearPageUptodate(page);
1503 SetPageError(page);
1504 }
1394 unlock_page(page); 1505 unlock_page(page);
1395 else 1506 } else {
1507 if (uptodate) {
1508 check_page_uptodate(tree, page);
1509 } else {
1510 ClearPageUptodate(page);
1511 SetPageError(page);
1512 }
1396 check_page_locked(tree, page); 1513 check_page_locked(tree, page);
1514 }
1397 } while (bvec >= bio->bi_io_vec); 1515 } while (bvec >= bio->bi_io_vec);
1398 1516
1399 bio_put(bio); 1517 bio_put(bio);
@@ -1416,7 +1534,8 @@ static int end_bio_extent_preparewrite(struct bio *bio,
1416{ 1534{
1417 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1535 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1418 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1536 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1419 struct extent_io_tree *tree = bio->bi_private; 1537 struct extent_state *state = bio->bi_private;
1538 struct extent_io_tree *tree = state->tree;
1420 u64 start; 1539 u64 start;
1421 u64 end; 1540 u64 end;
1422 1541
@@ -1475,6 +1594,29 @@ static int submit_one_bio(int rw, struct bio *bio)
1475{ 1594{
1476 u64 maxsector; 1595 u64 maxsector;
1477 int ret = 0; 1596 int ret = 0;
1597 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1598 struct page *page = bvec->bv_page;
1599 struct extent_io_tree *tree = bio->bi_private;
1600 struct rb_node *node;
1601 struct extent_state *state;
1602 u64 start;
1603 u64 end;
1604
1605 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1606 end = start + bvec->bv_len - 1;
1607
1608 spin_lock_irq(&tree->lock);
1609 node = __tree_search(&tree->state, start, NULL, NULL);
1610 BUG_ON(!node);
1611 state = rb_entry(node, struct extent_state, rb_node);
1612 while(state->end < end) {
1613 node = rb_next(node);
1614 state = rb_entry(node, struct extent_state, rb_node);
1615 }
1616 BUG_ON(state->end != end);
1617 spin_unlock_irq(&tree->lock);
1618
1619 bio->bi_private = state;
1478 1620
1479 bio_get(bio); 1621 bio_get(bio);
1480 1622
@@ -1519,9 +1661,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1519 if (!bio) { 1661 if (!bio) {
1520 printk("failed to allocate bio nr %d\n", nr); 1662 printk("failed to allocate bio nr %d\n", nr);
1521 } 1663 }
1664
1665
1522 bio_add_page(bio, page, size, offset); 1666 bio_add_page(bio, page, size, offset);
1523 bio->bi_end_io = end_io_func; 1667 bio->bi_end_io = end_io_func;
1524 bio->bi_private = tree; 1668 bio->bi_private = tree;
1669
1525 if (bio_ret) { 1670 if (bio_ret) {
1526 *bio_ret = bio; 1671 *bio_ret = bio;
1527 } else { 1672 } else {
@@ -1635,6 +1780,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1635 page_offset += iosize; 1780 page_offset += iosize;
1636 continue; 1781 continue;
1637 } 1782 }
1783 /* we have an inline extent but it didn't get marked up
1784 * to date. Error out
1785 */
1786 if (block_start == EXTENT_MAP_INLINE) {
1787 SetPageError(page);
1788 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1789 cur = cur + iosize;
1790 page_offset += iosize;
1791 continue;
1792 }
1638 1793
1639 ret = 0; 1794 ret = 0;
1640 if (tree->ops && tree->ops->readpage_io_hook) { 1795 if (tree->ops && tree->ops->readpage_io_hook) {
@@ -2205,7 +2360,8 @@ EXPORT_SYMBOL(extent_prepare_write);
2205 * map records are removed 2360 * map records are removed
2206 */ 2361 */
2207int try_release_extent_mapping(struct extent_map_tree *map, 2362int try_release_extent_mapping(struct extent_map_tree *map,
2208 struct extent_io_tree *tree, struct page *page) 2363 struct extent_io_tree *tree, struct page *page,
2364 gfp_t mask)
2209{ 2365{
2210 struct extent_map *em; 2366 struct extent_map *em;
2211 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2367 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
@@ -2213,30 +2369,42 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2213 u64 orig_start = start; 2369 u64 orig_start = start;
2214 int ret = 1; 2370 int ret = 1;
2215 2371
2216 while (start <= end) { 2372 if ((mask & __GFP_WAIT) &&
2217 spin_lock(&map->lock); 2373 page->mapping->host->i_size > 16 * 1024 * 1024) {
2218 em = lookup_extent_mapping(map, start, end); 2374 while (start <= end) {
2219 if (!em || IS_ERR(em)) { 2375 spin_lock(&map->lock);
2376 em = lookup_extent_mapping(map, start, end);
2377 if (!em || IS_ERR(em)) {
2378 spin_unlock(&map->lock);
2379 break;
2380 }
2381 if (em->start != start) {
2382 spin_unlock(&map->lock);
2383 free_extent_map(em);
2384 break;
2385 }
2386 if (!test_range_bit(tree, em->start,
2387 extent_map_end(em) - 1,
2388 EXTENT_LOCKED, 0)) {
2389 remove_extent_mapping(map, em);
2390 /* once for the rb tree */
2391 free_extent_map(em);
2392 }
2393 start = extent_map_end(em);
2220 spin_unlock(&map->lock); 2394 spin_unlock(&map->lock);
2221 break; 2395
2222 } 2396 /* once for us */
2223 if (!test_range_bit(tree, em->start, extent_map_end(em) - 1,
2224 EXTENT_LOCKED, 0)) {
2225 remove_extent_mapping(map, em);
2226 /* once for the rb tree */
2227 free_extent_map(em); 2397 free_extent_map(em);
2228 } 2398 }
2229 start = extent_map_end(em);
2230 spin_unlock(&map->lock);
2231
2232 /* once for us */
2233 free_extent_map(em);
2234 } 2399 }
2235 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 2400 if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0))
2236 ret = 0; 2401 ret = 0;
2237 else 2402 else {
2403 if ((mask & GFP_NOFS) == GFP_NOFS)
2404 mask = GFP_NOFS;
2238 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 2405 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
2239 1, 1, GFP_NOFS); 2406 1, 1, mask);
2407 }
2240 return ret; 2408 return ret;
2241} 2409}
2242EXPORT_SYMBOL(try_release_extent_mapping); 2410EXPORT_SYMBOL(try_release_extent_mapping);
@@ -2553,13 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2553 } 2721 }
2554 } 2722 }
2555 clear_page_dirty_for_io(page); 2723 clear_page_dirty_for_io(page);
2556 write_lock_irq(&page->mapping->tree_lock); 2724 read_lock_irq(&page->mapping->tree_lock);
2557 if (!PageDirty(page)) { 2725 if (!PageDirty(page)) {
2558 radix_tree_tag_clear(&page->mapping->page_tree, 2726 radix_tree_tag_clear(&page->mapping->page_tree,
2559 page_index(page), 2727 page_index(page),
2560 PAGECACHE_TAG_DIRTY); 2728 PAGECACHE_TAG_DIRTY);
2561 } 2729 }
2562 write_unlock_irq(&page->mapping->tree_lock); 2730 read_unlock_irq(&page->mapping->tree_lock);
2563 unlock_page(page); 2731 unlock_page(page);
2564 } 2732 }
2565 return 0; 2733 return 0;