aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-09-11 16:12:44 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-28 16:29:42 -0400
commit9ed74f2dba6ebf9f30b80554290bfc73cc3ef083 (patch)
tree763d58a4a11ceca26dcdaedefb1fd662c4e2fa8b /fs/btrfs/extent_io.c
parentc65ddb52dc412c9b67681b1aa16cd1bac8434e24 (diff)
Btrfs: proper -ENOSPC handling
At the start of a transaction we do a btrfs_reserve_metadata_space() and specify how many items we plan on modifying. Then once we've done our modifications and such, just call btrfs_unreserve_metadata_space() for the same number of items we reserved. For keeping track of metadata needed for data I've had to add an extent_io op for when we merge extents. This lets us track space properly when we are doing sequential writes, so we don't end up reserving way more metadata space than what we need. The only place where the metadata space accounting is not done is in the relocation code. This is because Yan is going to be reworking that code in the near future, so running btrfs-vol -b could still possibly result in a ENOSPC related panic. This patch also turns off the metadata_ratio stuff in order to allow users to more efficiently use their disk space. This patch makes it so we track how much metadata we need for an inode's delayed allocation extents by tracking how many extents are currently waiting for allocation. It introduces two new callbacks for the extent_io tree's, merge_extent_hook and split_extent_hook. These help us keep track of when we merge delalloc extents together and split them up. Reservations are handled prior to any actually dirty'ing occurs, and then we unreserve after we dirty. btrfs_unreserve_metadata_for_delalloc() will make the appropriate unreservations as needed based on the number of reservations we currently have and the number of extents we currently have. Doing the reservation outside of doing any of the actual dirty'ing lets us do things like filemap_flush() the inode to try and force delalloc to happen, or as a last resort actually start allocation on all delalloc inodes in the fs. This has survived dbench, fs_mark and an fsx torture test. Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c92
1 files changed, 69 insertions, 23 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0cb88f8146ea..de1793ba004a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
280 return NULL; 280 return NULL;
281} 281}
282 282
283static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
284 struct extent_state *other)
285{
286 if (tree->ops && tree->ops->merge_extent_hook)
287 tree->ops->merge_extent_hook(tree->mapping->host, new,
288 other);
289}
290
283/* 291/*
284 * utility function to look for merge candidates inside a given range. 292 * utility function to look for merge candidates inside a given range.
285 * Any extents with matching state are merged together into a single 293 * Any extents with matching state are merged together into a single
@@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree,
303 other = rb_entry(other_node, struct extent_state, rb_node); 311 other = rb_entry(other_node, struct extent_state, rb_node);
304 if (other->end == state->start - 1 && 312 if (other->end == state->start - 1 &&
305 other->state == state->state) { 313 other->state == state->state) {
314 merge_cb(tree, state, other);
306 state->start = other->start; 315 state->start = other->start;
307 other->tree = NULL; 316 other->tree = NULL;
308 rb_erase(&other->rb_node, &tree->state); 317 rb_erase(&other->rb_node, &tree->state);
@@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree,
314 other = rb_entry(other_node, struct extent_state, rb_node); 323 other = rb_entry(other_node, struct extent_state, rb_node);
315 if (other->start == state->end + 1 && 324 if (other->start == state->end + 1 &&
316 other->state == state->state) { 325 other->state == state->state) {
326 merge_cb(tree, state, other);
317 other->start = state->start; 327 other->start = state->start;
318 state->tree = NULL; 328 state->tree = NULL;
319 rb_erase(&state->rb_node, &tree->state); 329 rb_erase(&state->rb_node, &tree->state);
320 free_extent_state(state); 330 free_extent_state(state);
331 state = NULL;
321 } 332 }
322 } 333 }
334
323 return 0; 335 return 0;
324} 336}
325 337
326static void set_state_cb(struct extent_io_tree *tree, 338static int set_state_cb(struct extent_io_tree *tree,
327 struct extent_state *state, 339 struct extent_state *state,
328 unsigned long bits) 340 unsigned long bits)
329{ 341{
330 if (tree->ops && tree->ops->set_bit_hook) { 342 if (tree->ops && tree->ops->set_bit_hook) {
331 tree->ops->set_bit_hook(tree->mapping->host, state->start, 343 return tree->ops->set_bit_hook(tree->mapping->host,
332 state->end, state->state, bits); 344 state->start, state->end,
345 state->state, bits);
333 } 346 }
347
348 return 0;
334} 349}
335 350
336static void clear_state_cb(struct extent_io_tree *tree, 351static void clear_state_cb(struct extent_io_tree *tree,
337 struct extent_state *state, 352 struct extent_state *state,
338 unsigned long bits) 353 unsigned long bits)
339{ 354{
340 if (tree->ops && tree->ops->clear_bit_hook) { 355 if (tree->ops && tree->ops->clear_bit_hook)
341 tree->ops->clear_bit_hook(tree->mapping->host, state->start, 356 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
342 state->end, state->state, bits);
343 }
344} 357}
345 358
346/* 359/*
@@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree,
358 int bits) 371 int bits)
359{ 372{
360 struct rb_node *node; 373 struct rb_node *node;
374 int ret;
361 375
362 if (end < start) { 376 if (end < start) {
363 printk(KERN_ERR "btrfs end < start %llu %llu\n", 377 printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -365,11 +379,14 @@ static int insert_state(struct extent_io_tree *tree,
365 (unsigned long long)start); 379 (unsigned long long)start);
366 WARN_ON(1); 380 WARN_ON(1);
367 } 381 }
368 if (bits & EXTENT_DIRTY)
369 tree->dirty_bytes += end - start + 1;
370 state->start = start; 382 state->start = start;
371 state->end = end; 383 state->end = end;
372 set_state_cb(tree, state, bits); 384 ret = set_state_cb(tree, state, bits);
385 if (ret)
386 return ret;
387
388 if (bits & EXTENT_DIRTY)
389 tree->dirty_bytes += end - start + 1;
373 state->state |= bits; 390 state->state |= bits;
374 node = tree_insert(&tree->state, end, &state->rb_node); 391 node = tree_insert(&tree->state, end, &state->rb_node);
375 if (node) { 392 if (node) {
@@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree,
387 return 0; 404 return 0;
388} 405}
389 406
407static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
408 u64 split)
409{
410 if (tree->ops && tree->ops->split_extent_hook)
411 return tree->ops->split_extent_hook(tree->mapping->host,
412 orig, split);
413 return 0;
414}
415
390/* 416/*
391 * split a given extent state struct in two, inserting the preallocated 417 * split a given extent state struct in two, inserting the preallocated
392 * struct 'prealloc' as the newly created second half. 'split' indicates an 418 * struct 'prealloc' as the newly created second half. 'split' indicates an
@@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
405 struct extent_state *prealloc, u64 split) 431 struct extent_state *prealloc, u64 split)
406{ 432{
407 struct rb_node *node; 433 struct rb_node *node;
434
435 split_cb(tree, orig, split);
436
408 prealloc->start = orig->start; 437 prealloc->start = orig->start;
409 prealloc->end = split - 1; 438 prealloc->end = split - 1;
410 prealloc->state = orig->state; 439 prealloc->state = orig->state;
@@ -542,8 +571,8 @@ hit_next:
542 if (err) 571 if (err)
543 goto out; 572 goto out;
544 if (state->end <= end) { 573 if (state->end <= end) {
545 set |= clear_state_bit(tree, state, bits, 574 set |= clear_state_bit(tree, state, bits, wake,
546 wake, delete); 575 delete);
547 if (last_end == (u64)-1) 576 if (last_end == (u64)-1)
548 goto out; 577 goto out;
549 start = last_end + 1; 578 start = last_end + 1;
@@ -561,12 +590,11 @@ hit_next:
561 prealloc = alloc_extent_state(GFP_ATOMIC); 590 prealloc = alloc_extent_state(GFP_ATOMIC);
562 err = split_state(tree, state, prealloc, end + 1); 591 err = split_state(tree, state, prealloc, end + 1);
563 BUG_ON(err == -EEXIST); 592 BUG_ON(err == -EEXIST);
564
565 if (wake) 593 if (wake)
566 wake_up(&state->wq); 594 wake_up(&state->wq);
567 595
568 set |= clear_state_bit(tree, prealloc, bits, 596 set |= clear_state_bit(tree, prealloc, bits, wake, delete);
569 wake, delete); 597
570 prealloc = NULL; 598 prealloc = NULL;
571 goto out; 599 goto out;
572 } 600 }
@@ -667,16 +695,23 @@ out:
667 return 0; 695 return 0;
668} 696}
669 697
670static void set_state_bits(struct extent_io_tree *tree, 698static int set_state_bits(struct extent_io_tree *tree,
671 struct extent_state *state, 699 struct extent_state *state,
672 int bits) 700 int bits)
673{ 701{
702 int ret;
703
704 ret = set_state_cb(tree, state, bits);
705 if (ret)
706 return ret;
707
674 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 708 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
675 u64 range = state->end - state->start + 1; 709 u64 range = state->end - state->start + 1;
676 tree->dirty_bytes += range; 710 tree->dirty_bytes += range;
677 } 711 }
678 set_state_cb(tree, state, bits);
679 state->state |= bits; 712 state->state |= bits;
713
714 return 0;
680} 715}
681 716
682static void cache_state(struct extent_state *state, 717static void cache_state(struct extent_state *state,
@@ -758,7 +793,10 @@ hit_next:
758 goto out; 793 goto out;
759 } 794 }
760 795
761 set_state_bits(tree, state, bits); 796 err = set_state_bits(tree, state, bits);
797 if (err)
798 goto out;
799
762 cache_state(state, cached_state); 800 cache_state(state, cached_state);
763 merge_state(tree, state); 801 merge_state(tree, state);
764 if (last_end == (u64)-1) 802 if (last_end == (u64)-1)
@@ -805,7 +843,9 @@ hit_next:
805 if (err) 843 if (err)
806 goto out; 844 goto out;
807 if (state->end <= end) { 845 if (state->end <= end) {
808 set_state_bits(tree, state, bits); 846 err = set_state_bits(tree, state, bits);
847 if (err)
848 goto out;
809 cache_state(state, cached_state); 849 cache_state(state, cached_state);
810 merge_state(tree, state); 850 merge_state(tree, state);
811 if (last_end == (u64)-1) 851 if (last_end == (u64)-1)
@@ -829,11 +869,13 @@ hit_next:
829 this_end = last_start - 1; 869 this_end = last_start - 1;
830 err = insert_state(tree, prealloc, start, this_end, 870 err = insert_state(tree, prealloc, start, this_end,
831 bits); 871 bits);
832 cache_state(prealloc, cached_state);
833 prealloc = NULL;
834 BUG_ON(err == -EEXIST); 872 BUG_ON(err == -EEXIST);
835 if (err) 873 if (err) {
874 prealloc = NULL;
836 goto out; 875 goto out;
876 }
877 cache_state(prealloc, cached_state);
878 prealloc = NULL;
837 start = this_end + 1; 879 start = this_end + 1;
838 goto search_again; 880 goto search_again;
839 } 881 }
@@ -852,7 +894,11 @@ hit_next:
852 err = split_state(tree, state, prealloc, end + 1); 894 err = split_state(tree, state, prealloc, end + 1);
853 BUG_ON(err == -EEXIST); 895 BUG_ON(err == -EEXIST);
854 896
855 set_state_bits(tree, prealloc, bits); 897 err = set_state_bits(tree, prealloc, bits);
898 if (err) {
899 prealloc = NULL;
900 goto out;
901 }
856 cache_state(prealloc, cached_state); 902 cache_state(prealloc, cached_state);
857 merge_state(tree, prealloc); 903 merge_state(tree, prealloc);
858 prealloc = NULL; 904 prealloc = NULL;