diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index df19b60eef61..3fdadd28e935 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -18,6 +18,15 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include "ctree.h" | 20 | #include "ctree.h" |
21 | #include "free-space-cache.h" | ||
22 | #include "transaction.h" | ||
23 | |||
24 | struct btrfs_free_space { | ||
25 | struct rb_node bytes_index; | ||
26 | struct rb_node offset_index; | ||
27 | u64 offset; | ||
28 | u64 bytes; | ||
29 | }; | ||
21 | 30 | ||
22 | static int tree_insert_offset(struct rb_root *root, u64 offset, | 31 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
23 | struct rb_node *node) | 32 | struct rb_node *node) |
@@ -371,12 +380,58 @@ u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | |||
371 | return ret; | 380 | return ret; |
372 | } | 381 | } |
373 | 382 | ||
383 | /* | ||
384 | * for a given cluster, put all of its extents back into the free | ||
385 | * space cache. If the block group passed doesn't match the block group | ||
386 | * pointed to by the cluster, someone else raced in and freed the | ||
387 | * cluster already. In that case, we just return without changing anything | ||
388 | */ | ||
389 | static int | ||
390 | __btrfs_return_cluster_to_free_space( | ||
391 | struct btrfs_block_group_cache *block_group, | ||
392 | struct btrfs_free_cluster *cluster) | ||
393 | { | ||
394 | struct btrfs_free_space *entry; | ||
395 | struct rb_node *node; | ||
396 | |||
397 | spin_lock(&cluster->lock); | ||
398 | if (cluster->block_group != block_group) | ||
399 | goto out; | ||
400 | |||
401 | cluster->window_start = 0; | ||
402 | node = rb_first(&cluster->root); | ||
403 | while(node) { | ||
404 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
405 | node = rb_next(&entry->offset_index); | ||
406 | rb_erase(&entry->offset_index, &cluster->root); | ||
407 | link_free_space(block_group, entry); | ||
408 | } | ||
409 | list_del_init(&cluster->block_group_list); | ||
410 | |||
411 | btrfs_put_block_group(cluster->block_group); | ||
412 | cluster->block_group = NULL; | ||
413 | cluster->root.rb_node = NULL; | ||
414 | out: | ||
415 | spin_unlock(&cluster->lock); | ||
416 | return 0; | ||
417 | } | ||
418 | |||
374 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 419 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
375 | { | 420 | { |
376 | struct btrfs_free_space *info; | 421 | struct btrfs_free_space *info; |
377 | struct rb_node *node; | 422 | struct rb_node *node; |
423 | struct btrfs_free_cluster *cluster; | ||
424 | struct btrfs_free_cluster *safe; | ||
378 | 425 | ||
379 | spin_lock(&block_group->tree_lock); | 426 | spin_lock(&block_group->tree_lock); |
427 | |||
428 | list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, | ||
429 | block_group_list) { | ||
430 | |||
431 | WARN_ON(cluster->block_group != block_group); | ||
432 | __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
433 | } | ||
434 | |||
380 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | 435 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { |
381 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | 436 | info = rb_entry(node, struct btrfs_free_space, bytes_index); |
382 | unlink_free_space(block_group, info); | 437 | unlink_free_space(block_group, info); |
@@ -417,3 +472,245 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
417 | 472 | ||
418 | return ret; | 473 | return ret; |
419 | } | 474 | } |
475 | |||
476 | /* | ||
477 | * given a cluster, put all of its extents back into the free space | ||
478 | * cache. If a block group is passed, this function will only free | ||
479 | * a cluster that belongs to the passed block group. | ||
480 | * | ||
481 | * Otherwise, it'll get a reference on the block group pointed to by the | ||
482 | * cluster and remove the cluster from it. | ||
483 | */ | ||
484 | int btrfs_return_cluster_to_free_space( | ||
485 | struct btrfs_block_group_cache *block_group, | ||
486 | struct btrfs_free_cluster *cluster) | ||
487 | { | ||
488 | int ret; | ||
489 | |||
490 | /* first, get a safe pointer to the block group */ | ||
491 | spin_lock(&cluster->lock); | ||
492 | if (!block_group) { | ||
493 | block_group = cluster->block_group; | ||
494 | if (!block_group) { | ||
495 | spin_unlock(&cluster->lock); | ||
496 | return 0; | ||
497 | } | ||
498 | } else if (cluster->block_group != block_group) { | ||
499 | /* someone else has already freed it don't redo their work */ | ||
500 | spin_unlock(&cluster->lock); | ||
501 | return 0; | ||
502 | } | ||
503 | atomic_inc(&block_group->count); | ||
504 | spin_unlock(&cluster->lock); | ||
505 | |||
506 | /* now return any extents the cluster had on it */ | ||
507 | spin_lock(&block_group->tree_lock); | ||
508 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
509 | spin_unlock(&block_group->tree_lock); | ||
510 | |||
511 | /* finally drop our ref */ | ||
512 | btrfs_put_block_group(block_group); | ||
513 | return ret; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * given a cluster, try to allocate 'bytes' from it, returns 0 | ||
518 | * if it couldn't find anything suitably large, or a logical disk offset | ||
519 | * if things worked out | ||
520 | */ | ||
521 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
522 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
523 | u64 min_start) | ||
524 | { | ||
525 | struct btrfs_free_space *entry = NULL; | ||
526 | struct rb_node *node; | ||
527 | u64 ret = 0; | ||
528 | |||
529 | spin_lock(&cluster->lock); | ||
530 | if (bytes > cluster->max_size) | ||
531 | goto out; | ||
532 | |||
533 | if (cluster->block_group != block_group) | ||
534 | goto out; | ||
535 | |||
536 | node = rb_first(&cluster->root); | ||
537 | if (!node) | ||
538 | goto out; | ||
539 | |||
540 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
541 | |||
542 | while(1) { | ||
543 | if (entry->bytes < bytes || entry->offset < min_start) { | ||
544 | struct rb_node *node; | ||
545 | |||
546 | node = rb_next(&entry->offset_index); | ||
547 | if (!node) | ||
548 | break; | ||
549 | entry = rb_entry(node, struct btrfs_free_space, | ||
550 | offset_index); | ||
551 | continue; | ||
552 | } | ||
553 | ret = entry->offset; | ||
554 | |||
555 | entry->offset += bytes; | ||
556 | entry->bytes -= bytes; | ||
557 | |||
558 | if (entry->bytes == 0) { | ||
559 | rb_erase(&entry->offset_index, &cluster->root); | ||
560 | kfree(entry); | ||
561 | } | ||
562 | break; | ||
563 | } | ||
564 | out: | ||
565 | spin_unlock(&cluster->lock); | ||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * here we try to find a cluster of blocks in a block group. The goal | ||
571 | * is to find at least bytes free and up to empty_size + bytes free. | ||
572 | * We might not find them all in one contiguous area. | ||
573 | * | ||
574 | * returns zero and sets up cluster if things worked out, otherwise | ||
575 | * it returns -enospc | ||
576 | */ | ||
577 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
578 | struct btrfs_block_group_cache *block_group, | ||
579 | struct btrfs_free_cluster *cluster, | ||
580 | u64 offset, u64 bytes, u64 empty_size) | ||
581 | { | ||
582 | struct btrfs_free_space *entry = NULL; | ||
583 | struct rb_node *node; | ||
584 | struct btrfs_free_space *next; | ||
585 | struct btrfs_free_space *last; | ||
586 | u64 min_bytes; | ||
587 | u64 window_start; | ||
588 | u64 window_free; | ||
589 | u64 max_extent = 0; | ||
590 | int total_retries = 0; | ||
591 | int ret; | ||
592 | |||
593 | /* for metadata, allow allocates with more holes */ | ||
594 | if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
595 | /* | ||
596 | * we want to do larger allocations when we are | ||
597 | * flushing out the delayed refs, it helps prevent | ||
598 | * making more work as we go along. | ||
599 | */ | ||
600 | if (trans->transaction->delayed_refs.flushing) | ||
601 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
602 | else | ||
603 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
604 | } else | ||
605 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
606 | |||
607 | spin_lock(&block_group->tree_lock); | ||
608 | spin_lock(&cluster->lock); | ||
609 | |||
610 | /* someone already found a cluster, hooray */ | ||
611 | if (cluster->block_group) { | ||
612 | ret = 0; | ||
613 | goto out; | ||
614 | } | ||
615 | again: | ||
616 | min_bytes = min(min_bytes, bytes + empty_size); | ||
617 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
618 | offset, min_bytes); | ||
619 | if (!entry) { | ||
620 | ret = -ENOSPC; | ||
621 | goto out; | ||
622 | } | ||
623 | window_start = entry->offset; | ||
624 | window_free = entry->bytes; | ||
625 | last = entry; | ||
626 | max_extent = entry->bytes; | ||
627 | |||
628 | while(1) { | ||
629 | /* out window is just right, lets fill it */ | ||
630 | if (window_free >= bytes + empty_size) | ||
631 | break; | ||
632 | |||
633 | node = rb_next(&last->offset_index); | ||
634 | if (!node) { | ||
635 | ret = -ENOSPC; | ||
636 | goto out; | ||
637 | } | ||
638 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
639 | |||
640 | /* | ||
641 | * we haven't filled the empty size and the window is | ||
642 | * very large. reset and try again | ||
643 | */ | ||
644 | if (next->offset - window_start > (bytes + empty_size) * 2) { | ||
645 | entry = next; | ||
646 | window_start = entry->offset; | ||
647 | window_free = entry->bytes; | ||
648 | last = entry; | ||
649 | max_extent = 0; | ||
650 | total_retries++; | ||
651 | if (total_retries % 256 == 0) { | ||
652 | if (min_bytes >= (bytes + empty_size)) { | ||
653 | ret = -ENOSPC; | ||
654 | goto out; | ||
655 | } | ||
656 | /* | ||
657 | * grow our allocation a bit, we're not having | ||
658 | * much luck | ||
659 | */ | ||
660 | min_bytes *= 2; | ||
661 | goto again; | ||
662 | } | ||
663 | } else { | ||
664 | last = next; | ||
665 | window_free += next->bytes; | ||
666 | if (entry->bytes > max_extent) | ||
667 | max_extent = entry->bytes; | ||
668 | } | ||
669 | } | ||
670 | |||
671 | cluster->window_start = entry->offset; | ||
672 | |||
673 | /* | ||
674 | * now we've found our entries, pull them out of the free space | ||
675 | * cache and put them into the cluster rbtree | ||
676 | * | ||
677 | * The cluster includes an rbtree, but only uses the offset index | ||
678 | * of each free space cache entry. | ||
679 | */ | ||
680 | while(1) { | ||
681 | node = rb_next(&entry->offset_index); | ||
682 | unlink_free_space(block_group, entry); | ||
683 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
684 | &entry->offset_index); | ||
685 | BUG_ON(ret); | ||
686 | |||
687 | if (!node || entry == last) | ||
688 | break; | ||
689 | |||
690 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
691 | } | ||
692 | ret = 0; | ||
693 | cluster->max_size = max_extent; | ||
694 | atomic_inc(&block_group->count); | ||
695 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | ||
696 | cluster->block_group = block_group; | ||
697 | out: | ||
698 | spin_unlock(&cluster->lock); | ||
699 | spin_unlock(&block_group->tree_lock); | ||
700 | |||
701 | return ret; | ||
702 | } | ||
703 | |||
704 | /* | ||
705 | * simple code to zero out a cluster | ||
706 | */ | ||
707 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | ||
708 | { | ||
709 | spin_lock_init(&cluster->lock); | ||
710 | spin_lock_init(&cluster->refill_lock); | ||
711 | cluster->root.rb_node = NULL; | ||
712 | cluster->max_size = 0; | ||
713 | INIT_LIST_HEAD(&cluster->block_group_list); | ||
714 | cluster->block_group = NULL; | ||
715 | } | ||
716 | |||