diff options
-rw-r--r-- | fs/btrfs/ctree.h | 54 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 5 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 181 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 297 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.h | 44 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 2 |
6 files changed, 509 insertions, 74 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aaa049b8e134..b82931f97ef3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -633,11 +633,29 @@ struct btrfs_space_info { | |||
633 | struct rw_semaphore groups_sem; | 633 | struct rw_semaphore groups_sem; |
634 | }; | 634 | }; |
635 | 635 | ||
636 | struct btrfs_free_space { | 636 | /* |
637 | struct rb_node bytes_index; | 637 | * free clusters are used to claim free space in relatively large chunks, |
638 | struct rb_node offset_index; | 638 | * allowing us to do less seeky writes. They are used for all metadata |
639 | u64 offset; | 639 | * allocations and data allocations in ssd mode. |
640 | u64 bytes; | 640 | */ |
641 | struct btrfs_free_cluster { | ||
642 | spinlock_t lock; | ||
643 | spinlock_t refill_lock; | ||
644 | struct rb_root root; | ||
645 | |||
646 | /* largest extent in this cluster */ | ||
647 | u64 max_size; | ||
648 | |||
649 | /* first extent starting offset */ | ||
650 | u64 window_start; | ||
651 | |||
652 | struct btrfs_block_group_cache *block_group; | ||
653 | /* | ||
654 | * when a cluster is allocated from a block group, we put the | ||
655 | * cluster onto a list in the block group so that it can | ||
656 | * be freed before the block group is freed. | ||
657 | */ | ||
658 | struct list_head block_group_list; | ||
641 | }; | 659 | }; |
642 | 660 | ||
643 | struct btrfs_block_group_cache { | 661 | struct btrfs_block_group_cache { |
@@ -667,6 +685,11 @@ struct btrfs_block_group_cache { | |||
667 | 685 | ||
668 | /* usage count */ | 686 | /* usage count */ |
669 | atomic_t count; | 687 | atomic_t count; |
688 | |||
689 | /* List of struct btrfs_free_clusters for this block group. | ||
690 | * Today it will only have one thing on it, but that may change | ||
691 | */ | ||
692 | struct list_head cluster_list; | ||
670 | }; | 693 | }; |
671 | 694 | ||
672 | struct btrfs_leaf_ref_tree { | 695 | struct btrfs_leaf_ref_tree { |
@@ -838,8 +861,12 @@ struct btrfs_fs_info { | |||
838 | spinlock_t delalloc_lock; | 861 | spinlock_t delalloc_lock; |
839 | spinlock_t new_trans_lock; | 862 | spinlock_t new_trans_lock; |
840 | u64 delalloc_bytes; | 863 | u64 delalloc_bytes; |
841 | u64 last_alloc; | 864 | |
842 | u64 last_data_alloc; | 865 | /* data_alloc_cluster is only used in ssd mode */ |
866 | struct btrfs_free_cluster data_alloc_cluster; | ||
867 | |||
868 | /* all metadata allocations go through this cluster */ | ||
869 | struct btrfs_free_cluster meta_alloc_cluster; | ||
843 | 870 | ||
844 | spinlock_t ref_cache_lock; | 871 | spinlock_t ref_cache_lock; |
845 | u64 total_ref_cache_size; | 872 | u64 total_ref_cache_size; |
@@ -1747,6 +1774,7 @@ static inline struct dentry *fdentry(struct file *file) | |||
1747 | } | 1774 | } |
1748 | 1775 | ||
1749 | /* extent-tree.c */ | 1776 | /* extent-tree.c */ |
1777 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1750 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1778 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1751 | struct btrfs_root *root, unsigned long count); | 1779 | struct btrfs_root *root, unsigned long count); |
1752 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1780 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
@@ -2173,16 +2201,4 @@ int btrfs_check_acl(struct inode *inode, int mask); | |||
2173 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2201 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2174 | int btrfs_acl_chmod(struct inode *inode); | 2202 | int btrfs_acl_chmod(struct inode *inode); |
2175 | 2203 | ||
2176 | /* free-space-cache.c */ | ||
2177 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
2178 | u64 bytenr, u64 size); | ||
2179 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
2180 | u64 bytenr, u64 size); | ||
2181 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
2182 | *block_group); | ||
2183 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | ||
2184 | u64 offset, u64 bytes, u64 empty_size); | ||
2185 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
2186 | u64 bytes); | ||
2187 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
2188 | #endif | 2204 | #endif |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ea59ebfa505d..e68ef7b456b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | 39 | #include "ref-cache.h" |
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | ||
41 | 42 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 43 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -1652,6 +1653,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1652 | mutex_init(&fs_info->cleaner_mutex); | 1653 | mutex_init(&fs_info->cleaner_mutex); |
1653 | mutex_init(&fs_info->volume_mutex); | 1654 | mutex_init(&fs_info->volume_mutex); |
1654 | mutex_init(&fs_info->tree_reloc_mutex); | 1655 | mutex_init(&fs_info->tree_reloc_mutex); |
1656 | |||
1657 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1658 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1659 | |||
1655 | init_waitqueue_head(&fs_info->transaction_throttle); | 1660 | init_waitqueue_head(&fs_info->transaction_throttle); |
1656 | init_waitqueue_head(&fs_info->transaction_wait); | 1661 | init_waitqueue_head(&fs_info->transaction_wait); |
1657 | init_waitqueue_head(&fs_info->async_submit_wait); | 1662 | init_waitqueue_head(&fs_info->async_submit_wait); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 15d62a9214b7..178df4c67de4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "volumes.h" | 31 | #include "volumes.h" |
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "ref-cache.h" | 33 | #include "ref-cache.h" |
34 | #include "free-space-cache.h" | ||
34 | 35 | ||
35 | #define PENDING_EXTENT_INSERT 0 | 36 | #define PENDING_EXTENT_INSERT 0 |
36 | #define PENDING_EXTENT_DELETE 1 | 37 | #define PENDING_EXTENT_DELETE 1 |
@@ -324,7 +325,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
324 | return cache; | 325 | return cache; |
325 | } | 326 | } |
326 | 327 | ||
327 | static inline void put_block_group(struct btrfs_block_group_cache *cache) | 328 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
328 | { | 329 | { |
329 | if (atomic_dec_and_test(&cache->count)) | 330 | if (atomic_dec_and_test(&cache->count)) |
330 | kfree(cache); | 331 | kfree(cache); |
@@ -397,12 +398,12 @@ again: | |||
397 | div_factor(cache->key.offset, factor)) { | 398 | div_factor(cache->key.offset, factor)) { |
398 | group_start = cache->key.objectid; | 399 | group_start = cache->key.objectid; |
399 | spin_unlock(&cache->lock); | 400 | spin_unlock(&cache->lock); |
400 | put_block_group(cache); | 401 | btrfs_put_block_group(cache); |
401 | goto found; | 402 | goto found; |
402 | } | 403 | } |
403 | } | 404 | } |
404 | spin_unlock(&cache->lock); | 405 | spin_unlock(&cache->lock); |
405 | put_block_group(cache); | 406 | btrfs_put_block_group(cache); |
406 | cond_resched(); | 407 | cond_resched(); |
407 | } | 408 | } |
408 | if (!wrapped) { | 409 | if (!wrapped) { |
@@ -1592,7 +1593,7 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | |||
1592 | if (!block_group || block_group->ro) | 1593 | if (!block_group || block_group->ro) |
1593 | readonly = 1; | 1594 | readonly = 1; |
1594 | if (block_group) | 1595 | if (block_group) |
1595 | put_block_group(block_group); | 1596 | btrfs_put_block_group(block_group); |
1596 | return readonly; | 1597 | return readonly; |
1597 | } | 1598 | } |
1598 | 1599 | ||
@@ -2016,7 +2017,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
2016 | WARN_ON(ret); | 2017 | WARN_ON(ret); |
2017 | } | 2018 | } |
2018 | } | 2019 | } |
2019 | put_block_group(cache); | 2020 | btrfs_put_block_group(cache); |
2020 | total -= num_bytes; | 2021 | total -= num_bytes; |
2021 | bytenr += num_bytes; | 2022 | bytenr += num_bytes; |
2022 | } | 2023 | } |
@@ -2033,7 +2034,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
2033 | return 0; | 2034 | return 0; |
2034 | 2035 | ||
2035 | bytenr = cache->key.objectid; | 2036 | bytenr = cache->key.objectid; |
2036 | put_block_group(cache); | 2037 | btrfs_put_block_group(cache); |
2037 | 2038 | ||
2038 | return bytenr; | 2039 | return bytenr; |
2039 | } | 2040 | } |
@@ -2077,7 +2078,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2077 | if (cache->cached) | 2078 | if (cache->cached) |
2078 | btrfs_add_free_space(cache, bytenr, len); | 2079 | btrfs_add_free_space(cache, bytenr, len); |
2079 | } | 2080 | } |
2080 | put_block_group(cache); | 2081 | btrfs_put_block_group(cache); |
2081 | bytenr += len; | 2082 | bytenr += len; |
2082 | num -= len; | 2083 | num -= len; |
2083 | } | 2084 | } |
@@ -2108,7 +2109,7 @@ static int update_reserved_extents(struct btrfs_root *root, | |||
2108 | } | 2109 | } |
2109 | spin_unlock(&cache->lock); | 2110 | spin_unlock(&cache->lock); |
2110 | spin_unlock(&cache->space_info->lock); | 2111 | spin_unlock(&cache->space_info->lock); |
2111 | put_block_group(cache); | 2112 | btrfs_put_block_group(cache); |
2112 | bytenr += len; | 2113 | bytenr += len; |
2113 | num -= len; | 2114 | num -= len; |
2114 | } | 2115 | } |
@@ -2543,12 +2544,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
2543 | { | 2544 | { |
2544 | int ret = 0; | 2545 | int ret = 0; |
2545 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 2546 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
2546 | u64 *last_ptr = NULL; | 2547 | struct btrfs_free_cluster *last_ptr = NULL; |
2547 | struct btrfs_block_group_cache *block_group = NULL; | 2548 | struct btrfs_block_group_cache *block_group = NULL; |
2548 | int empty_cluster = 2 * 1024 * 1024; | 2549 | int empty_cluster = 2 * 1024 * 1024; |
2549 | int allowed_chunk_alloc = 0; | 2550 | int allowed_chunk_alloc = 0; |
2550 | int using_hint = 0; | ||
2551 | struct btrfs_space_info *space_info; | 2551 | struct btrfs_space_info *space_info; |
2552 | int last_ptr_loop = 0; | ||
2553 | int loop = 0; | ||
2552 | 2554 | ||
2553 | WARN_ON(num_bytes < root->sectorsize); | 2555 | WARN_ON(num_bytes < root->sectorsize); |
2554 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 2556 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -2561,38 +2563,39 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
2561 | allowed_chunk_alloc = 1; | 2563 | allowed_chunk_alloc = 1; |
2562 | 2564 | ||
2563 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 2565 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
2564 | last_ptr = &root->fs_info->last_alloc; | 2566 | last_ptr = &root->fs_info->meta_alloc_cluster; |
2565 | if (!btrfs_test_opt(root, SSD)) | 2567 | if (!btrfs_test_opt(root, SSD)) |
2566 | empty_cluster = 64 * 1024; | 2568 | empty_cluster = 64 * 1024; |
2567 | } | 2569 | } |
2568 | 2570 | ||
2569 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | 2571 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { |
2570 | last_ptr = &root->fs_info->last_data_alloc; | 2572 | last_ptr = &root->fs_info->data_alloc_cluster; |
2573 | } | ||
2571 | 2574 | ||
2572 | if (last_ptr) { | 2575 | if (last_ptr) { |
2573 | if (*last_ptr) | 2576 | spin_lock(&last_ptr->lock); |
2574 | hint_byte = *last_ptr; | 2577 | if (last_ptr->block_group) |
2575 | else | 2578 | hint_byte = last_ptr->window_start; |
2576 | empty_size += empty_cluster; | 2579 | spin_unlock(&last_ptr->lock); |
2577 | } else { | ||
2578 | empty_cluster = 0; | ||
2579 | } | 2580 | } |
2581 | |||
2580 | search_start = max(search_start, first_logical_byte(root, 0)); | 2582 | search_start = max(search_start, first_logical_byte(root, 0)); |
2581 | search_start = max(search_start, hint_byte); | 2583 | search_start = max(search_start, hint_byte); |
2582 | 2584 | ||
2585 | if (!last_ptr) { | ||
2586 | empty_cluster = 0; | ||
2587 | loop = 1; | ||
2588 | } | ||
2589 | |||
2583 | if (search_start == hint_byte) { | 2590 | if (search_start == hint_byte) { |
2584 | using_hint = 1; | ||
2585 | block_group = btrfs_lookup_block_group(root->fs_info, | 2591 | block_group = btrfs_lookup_block_group(root->fs_info, |
2586 | search_start); | 2592 | search_start); |
2587 | if (block_group && block_group_bits(block_group, data)) { | 2593 | if (block_group && block_group_bits(block_group, data)) { |
2588 | down_read(&space_info->groups_sem); | 2594 | down_read(&space_info->groups_sem); |
2589 | goto have_block_group; | 2595 | goto have_block_group; |
2590 | } else if (block_group) { | 2596 | } else if (block_group) { |
2591 | put_block_group(block_group); | 2597 | btrfs_put_block_group(block_group); |
2592 | } | 2598 | } |
2593 | |||
2594 | empty_size += empty_cluster; | ||
2595 | using_hint = 0; | ||
2596 | } | 2599 | } |
2597 | 2600 | ||
2598 | search: | 2601 | search: |
@@ -2609,7 +2612,7 @@ have_block_group: | |||
2609 | ret = cache_block_group(root, block_group); | 2612 | ret = cache_block_group(root, block_group); |
2610 | mutex_unlock(&block_group->cache_mutex); | 2613 | mutex_unlock(&block_group->cache_mutex); |
2611 | if (ret) { | 2614 | if (ret) { |
2612 | put_block_group(block_group); | 2615 | btrfs_put_block_group(block_group); |
2613 | break; | 2616 | break; |
2614 | } | 2617 | } |
2615 | } | 2618 | } |
@@ -2617,11 +2620,88 @@ have_block_group: | |||
2617 | if (unlikely(block_group->ro)) | 2620 | if (unlikely(block_group->ro)) |
2618 | goto loop; | 2621 | goto loop; |
2619 | 2622 | ||
2623 | if (last_ptr) { | ||
2624 | /* | ||
2625 | * the refill lock keeps out other | ||
2626 | * people trying to start a new cluster | ||
2627 | */ | ||
2628 | spin_lock(&last_ptr->refill_lock); | ||
2629 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | ||
2630 | num_bytes, search_start); | ||
2631 | if (offset) { | ||
2632 | /* we have a block, we're done */ | ||
2633 | spin_unlock(&last_ptr->refill_lock); | ||
2634 | goto checks; | ||
2635 | } | ||
2636 | |||
2637 | spin_lock(&last_ptr->lock); | ||
2638 | /* | ||
2639 | * whoops, this cluster doesn't actually point to | ||
2640 | * this block group. Get a ref on the block | ||
2641 | * group is does point to and try again | ||
2642 | */ | ||
2643 | if (!last_ptr_loop && last_ptr->block_group && | ||
2644 | last_ptr->block_group != block_group) { | ||
2645 | |||
2646 | btrfs_put_block_group(block_group); | ||
2647 | block_group = last_ptr->block_group; | ||
2648 | atomic_inc(&block_group->count); | ||
2649 | spin_unlock(&last_ptr->lock); | ||
2650 | spin_unlock(&last_ptr->refill_lock); | ||
2651 | |||
2652 | last_ptr_loop = 1; | ||
2653 | search_start = block_group->key.objectid; | ||
2654 | goto have_block_group; | ||
2655 | } | ||
2656 | spin_unlock(&last_ptr->lock); | ||
2657 | |||
2658 | /* | ||
2659 | * this cluster didn't work out, free it and | ||
2660 | * start over | ||
2661 | */ | ||
2662 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | ||
2663 | |||
2664 | last_ptr_loop = 0; | ||
2665 | |||
2666 | /* allocate a cluster in this block group */ | ||
2667 | ret = btrfs_find_space_cluster(trans, | ||
2668 | block_group, last_ptr, | ||
2669 | offset, num_bytes, | ||
2670 | empty_cluster + empty_size); | ||
2671 | if (ret == 0) { | ||
2672 | /* | ||
2673 | * now pull our allocation out of this | ||
2674 | * cluster | ||
2675 | */ | ||
2676 | offset = btrfs_alloc_from_cluster(block_group, | ||
2677 | last_ptr, num_bytes, | ||
2678 | search_start); | ||
2679 | if (offset) { | ||
2680 | /* we found one, proceed */ | ||
2681 | spin_unlock(&last_ptr->refill_lock); | ||
2682 | goto checks; | ||
2683 | } | ||
2684 | } | ||
2685 | /* | ||
2686 | * at this point we either didn't find a cluster | ||
2687 | * or we weren't able to allocate a block from our | ||
2688 | * cluster. Free the cluster we've been trying | ||
2689 | * to use, and go to the next block group | ||
2690 | */ | ||
2691 | if (loop < 2) { | ||
2692 | btrfs_return_cluster_to_free_space(NULL, | ||
2693 | last_ptr); | ||
2694 | spin_unlock(&last_ptr->refill_lock); | ||
2695 | goto loop; | ||
2696 | } | ||
2697 | spin_unlock(&last_ptr->refill_lock); | ||
2698 | } | ||
2699 | |||
2620 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 2700 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
2621 | num_bytes, empty_size); | 2701 | num_bytes, empty_size); |
2622 | if (!offset) | 2702 | if (!offset) |
2623 | goto loop; | 2703 | goto loop; |
2624 | 2704 | checks: | |
2625 | search_start = stripe_align(root, offset); | 2705 | search_start = stripe_align(root, offset); |
2626 | 2706 | ||
2627 | /* move on to the next group */ | 2707 | /* move on to the next group */ |
@@ -2637,11 +2717,6 @@ have_block_group: | |||
2637 | goto loop; | 2717 | goto loop; |
2638 | } | 2718 | } |
2639 | 2719 | ||
2640 | if (using_hint && search_start > hint_byte) { | ||
2641 | btrfs_add_free_space(block_group, offset, num_bytes); | ||
2642 | goto loop; | ||
2643 | } | ||
2644 | |||
2645 | if (exclude_nr > 0 && | 2720 | if (exclude_nr > 0 && |
2646 | (search_start + num_bytes > exclude_start && | 2721 | (search_start + num_bytes > exclude_start && |
2647 | search_start < exclude_start + exclude_nr)) { | 2722 | search_start < exclude_start + exclude_nr)) { |
@@ -2670,33 +2745,33 @@ have_block_group: | |||
2670 | /* we are all good, lets return */ | 2745 | /* we are all good, lets return */ |
2671 | break; | 2746 | break; |
2672 | loop: | 2747 | loop: |
2673 | put_block_group(block_group); | 2748 | btrfs_put_block_group(block_group); |
2674 | if (using_hint) { | ||
2675 | empty_size += empty_cluster; | ||
2676 | using_hint = 0; | ||
2677 | up_read(&space_info->groups_sem); | ||
2678 | goto search; | ||
2679 | } | ||
2680 | } | 2749 | } |
2681 | up_read(&space_info->groups_sem); | 2750 | up_read(&space_info->groups_sem); |
2682 | 2751 | ||
2683 | if (!ins->objectid && (empty_size || allowed_chunk_alloc)) { | 2752 | /* loop == 0, try to find a clustered alloc in every block group |
2684 | int try_again = empty_size; | 2753 | * loop == 1, try again after forcing a chunk allocation |
2685 | 2754 | * loop == 2, set empty_size and empty_cluster to 0 and try again | |
2686 | empty_size = 0; | 2755 | */ |
2756 | if (!ins->objectid && loop < 3 && | ||
2757 | (empty_size || empty_cluster || allowed_chunk_alloc)) { | ||
2758 | if (loop >= 2) { | ||
2759 | empty_size = 0; | ||
2760 | empty_cluster = 0; | ||
2761 | } | ||
2687 | 2762 | ||
2688 | if (allowed_chunk_alloc) { | 2763 | if (allowed_chunk_alloc) { |
2689 | ret = do_chunk_alloc(trans, root, num_bytes + | 2764 | ret = do_chunk_alloc(trans, root, num_bytes + |
2690 | 2 * 1024 * 1024, data, 1); | 2765 | 2 * 1024 * 1024, data, 1); |
2691 | if (!ret) | ||
2692 | try_again = 1; | ||
2693 | allowed_chunk_alloc = 0; | 2766 | allowed_chunk_alloc = 0; |
2694 | } else { | 2767 | } else { |
2695 | space_info->force_alloc = 1; | 2768 | space_info->force_alloc = 1; |
2696 | } | 2769 | } |
2697 | 2770 | ||
2698 | if (try_again) | 2771 | if (loop < 3) { |
2772 | loop++; | ||
2699 | goto search; | 2773 | goto search; |
2774 | } | ||
2700 | ret = -ENOSPC; | 2775 | ret = -ENOSPC; |
2701 | } else if (!ins->objectid) { | 2776 | } else if (!ins->objectid) { |
2702 | ret = -ENOSPC; | 2777 | ret = -ENOSPC; |
@@ -2707,9 +2782,7 @@ loop: | |||
2707 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | 2782 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) |
2708 | trans->block_group = block_group->key.objectid; | 2783 | trans->block_group = block_group->key.objectid; |
2709 | 2784 | ||
2710 | if (last_ptr) | 2785 | btrfs_put_block_group(block_group); |
2711 | *last_ptr = ins->objectid + ins->offset; | ||
2712 | put_block_group(block_group); | ||
2713 | ret = 0; | 2786 | ret = 0; |
2714 | } | 2787 | } |
2715 | 2788 | ||
@@ -2817,7 +2890,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
2817 | ret = btrfs_discard_extent(root, start, len); | 2890 | ret = btrfs_discard_extent(root, start, len); |
2818 | 2891 | ||
2819 | btrfs_add_free_space(cache, start, len); | 2892 | btrfs_add_free_space(cache, start, len); |
2820 | put_block_group(cache); | 2893 | btrfs_put_block_group(cache); |
2821 | update_reserved_extents(root, start, len, 0); | 2894 | update_reserved_extents(root, start, len, 0); |
2822 | 2895 | ||
2823 | return ret; | 2896 | return ret; |
@@ -2955,7 +3028,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
2955 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 3028 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
2956 | ins->offset); | 3029 | ins->offset); |
2957 | BUG_ON(ret); | 3030 | BUG_ON(ret); |
2958 | put_block_group(block_group); | 3031 | btrfs_put_block_group(block_group); |
2959 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3032 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, |
2960 | ref_generation, owner, ins, 1); | 3033 | ref_generation, owner, ins, 1); |
2961 | return ret; | 3034 | return ret; |
@@ -5644,7 +5717,7 @@ next: | |||
5644 | WARN_ON(block_group->reserved > 0); | 5717 | WARN_ON(block_group->reserved > 0); |
5645 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | 5718 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); |
5646 | spin_unlock(&block_group->lock); | 5719 | spin_unlock(&block_group->lock); |
5647 | put_block_group(block_group); | 5720 | btrfs_put_block_group(block_group); |
5648 | ret = 0; | 5721 | ret = 0; |
5649 | out: | 5722 | out: |
5650 | btrfs_free_path(path); | 5723 | btrfs_free_path(path); |
@@ -5774,6 +5847,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
5774 | spin_lock_init(&cache->tree_lock); | 5847 | spin_lock_init(&cache->tree_lock); |
5775 | mutex_init(&cache->cache_mutex); | 5848 | mutex_init(&cache->cache_mutex); |
5776 | INIT_LIST_HEAD(&cache->list); | 5849 | INIT_LIST_HEAD(&cache->list); |
5850 | INIT_LIST_HEAD(&cache->cluster_list); | ||
5777 | read_extent_buffer(leaf, &cache->item, | 5851 | read_extent_buffer(leaf, &cache->item, |
5778 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 5852 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
5779 | sizeof(cache->item)); | 5853 | sizeof(cache->item)); |
@@ -5830,6 +5904,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
5830 | spin_lock_init(&cache->tree_lock); | 5904 | spin_lock_init(&cache->tree_lock); |
5831 | mutex_init(&cache->cache_mutex); | 5905 | mutex_init(&cache->cache_mutex); |
5832 | INIT_LIST_HEAD(&cache->list); | 5906 | INIT_LIST_HEAD(&cache->list); |
5907 | INIT_LIST_HEAD(&cache->cluster_list); | ||
5833 | 5908 | ||
5834 | btrfs_set_block_group_used(&cache->item, bytes_used); | 5909 | btrfs_set_block_group_used(&cache->item, bytes_used); |
5835 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | 5910 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); |
@@ -5889,8 +5964,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
5889 | spin_unlock(&block_group->space_info->lock); | 5964 | spin_unlock(&block_group->space_info->lock); |
5890 | block_group->space_info->full = 0; | 5965 | block_group->space_info->full = 0; |
5891 | 5966 | ||
5892 | put_block_group(block_group); | 5967 | btrfs_put_block_group(block_group); |
5893 | put_block_group(block_group); | 5968 | btrfs_put_block_group(block_group); |
5894 | 5969 | ||
5895 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 5970 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
5896 | if (ret > 0) | 5971 | if (ret > 0) |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index df19b60eef61..3fdadd28e935 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -18,6 +18,15 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include "ctree.h" | 20 | #include "ctree.h" |
21 | #include "free-space-cache.h" | ||
22 | #include "transaction.h" | ||
23 | |||
24 | struct btrfs_free_space { | ||
25 | struct rb_node bytes_index; | ||
26 | struct rb_node offset_index; | ||
27 | u64 offset; | ||
28 | u64 bytes; | ||
29 | }; | ||
21 | 30 | ||
22 | static int tree_insert_offset(struct rb_root *root, u64 offset, | 31 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
23 | struct rb_node *node) | 32 | struct rb_node *node) |
@@ -371,12 +380,58 @@ u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | |||
371 | return ret; | 380 | return ret; |
372 | } | 381 | } |
373 | 382 | ||
383 | /* | ||
384 | * for a given cluster, put all of its extents back into the free | ||
385 | * space cache. If the block group passed doesn't match the block group | ||
386 | * pointed to by the cluster, someone else raced in and freed the | ||
387 | * cluster already. In that case, we just return without changing anything | ||
388 | */ | ||
389 | static int | ||
390 | __btrfs_return_cluster_to_free_space( | ||
391 | struct btrfs_block_group_cache *block_group, | ||
392 | struct btrfs_free_cluster *cluster) | ||
393 | { | ||
394 | struct btrfs_free_space *entry; | ||
395 | struct rb_node *node; | ||
396 | |||
397 | spin_lock(&cluster->lock); | ||
398 | if (cluster->block_group != block_group) | ||
399 | goto out; | ||
400 | |||
401 | cluster->window_start = 0; | ||
402 | node = rb_first(&cluster->root); | ||
403 | while(node) { | ||
404 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
405 | node = rb_next(&entry->offset_index); | ||
406 | rb_erase(&entry->offset_index, &cluster->root); | ||
407 | link_free_space(block_group, entry); | ||
408 | } | ||
409 | list_del_init(&cluster->block_group_list); | ||
410 | |||
411 | btrfs_put_block_group(cluster->block_group); | ||
412 | cluster->block_group = NULL; | ||
413 | cluster->root.rb_node = NULL; | ||
414 | out: | ||
415 | spin_unlock(&cluster->lock); | ||
416 | return 0; | ||
417 | } | ||
418 | |||
374 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 419 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
375 | { | 420 | { |
376 | struct btrfs_free_space *info; | 421 | struct btrfs_free_space *info; |
377 | struct rb_node *node; | 422 | struct rb_node *node; |
423 | struct btrfs_free_cluster *cluster; | ||
424 | struct btrfs_free_cluster *safe; | ||
378 | 425 | ||
379 | spin_lock(&block_group->tree_lock); | 426 | spin_lock(&block_group->tree_lock); |
427 | |||
428 | list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, | ||
429 | block_group_list) { | ||
430 | |||
431 | WARN_ON(cluster->block_group != block_group); | ||
432 | __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
433 | } | ||
434 | |||
380 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | 435 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { |
381 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | 436 | info = rb_entry(node, struct btrfs_free_space, bytes_index); |
382 | unlink_free_space(block_group, info); | 437 | unlink_free_space(block_group, info); |
@@ -417,3 +472,245 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
417 | 472 | ||
418 | return ret; | 473 | return ret; |
419 | } | 474 | } |
475 | |||
476 | /* | ||
477 | * given a cluster, put all of its extents back into the free space | ||
478 | * cache. If a block group is passed, this function will only free | ||
479 | * a cluster that belongs to the passed block group. | ||
480 | * | ||
481 | * Otherwise, it'll get a reference on the block group pointed to by the | ||
482 | * cluster and remove the cluster from it. | ||
483 | */ | ||
484 | int btrfs_return_cluster_to_free_space( | ||
485 | struct btrfs_block_group_cache *block_group, | ||
486 | struct btrfs_free_cluster *cluster) | ||
487 | { | ||
488 | int ret; | ||
489 | |||
490 | /* first, get a safe pointer to the block group */ | ||
491 | spin_lock(&cluster->lock); | ||
492 | if (!block_group) { | ||
493 | block_group = cluster->block_group; | ||
494 | if (!block_group) { | ||
495 | spin_unlock(&cluster->lock); | ||
496 | return 0; | ||
497 | } | ||
498 | } else if (cluster->block_group != block_group) { | ||
499 | /* someone else has already freed it don't redo their work */ | ||
500 | spin_unlock(&cluster->lock); | ||
501 | return 0; | ||
502 | } | ||
503 | atomic_inc(&block_group->count); | ||
504 | spin_unlock(&cluster->lock); | ||
505 | |||
506 | /* now return any extents the cluster had on it */ | ||
507 | spin_lock(&block_group->tree_lock); | ||
508 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
509 | spin_unlock(&block_group->tree_lock); | ||
510 | |||
511 | /* finally drop our ref */ | ||
512 | btrfs_put_block_group(block_group); | ||
513 | return ret; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * given a cluster, try to allocate 'bytes' from it, returns 0 | ||
518 | * if it couldn't find anything suitably large, or a logical disk offset | ||
519 | * if things worked out | ||
520 | */ | ||
521 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
522 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
523 | u64 min_start) | ||
524 | { | ||
525 | struct btrfs_free_space *entry = NULL; | ||
526 | struct rb_node *node; | ||
527 | u64 ret = 0; | ||
528 | |||
529 | spin_lock(&cluster->lock); | ||
530 | if (bytes > cluster->max_size) | ||
531 | goto out; | ||
532 | |||
533 | if (cluster->block_group != block_group) | ||
534 | goto out; | ||
535 | |||
536 | node = rb_first(&cluster->root); | ||
537 | if (!node) | ||
538 | goto out; | ||
539 | |||
540 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
541 | |||
542 | while(1) { | ||
543 | if (entry->bytes < bytes || entry->offset < min_start) { | ||
544 | struct rb_node *node; | ||
545 | |||
546 | node = rb_next(&entry->offset_index); | ||
547 | if (!node) | ||
548 | break; | ||
549 | entry = rb_entry(node, struct btrfs_free_space, | ||
550 | offset_index); | ||
551 | continue; | ||
552 | } | ||
553 | ret = entry->offset; | ||
554 | |||
555 | entry->offset += bytes; | ||
556 | entry->bytes -= bytes; | ||
557 | |||
558 | if (entry->bytes == 0) { | ||
559 | rb_erase(&entry->offset_index, &cluster->root); | ||
560 | kfree(entry); | ||
561 | } | ||
562 | break; | ||
563 | } | ||
564 | out: | ||
565 | spin_unlock(&cluster->lock); | ||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * here we try to find a cluster of blocks in a block group. The goal | ||
571 | * is to find at least bytes free and up to empty_size + bytes free. | ||
572 | * We might not find them all in one contiguous area. | ||
573 | * | ||
574 | * returns zero and sets up cluster if things worked out, otherwise | ||
575 | * it returns -enospc | ||
576 | */ | ||
577 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
578 | struct btrfs_block_group_cache *block_group, | ||
579 | struct btrfs_free_cluster *cluster, | ||
580 | u64 offset, u64 bytes, u64 empty_size) | ||
581 | { | ||
582 | struct btrfs_free_space *entry = NULL; | ||
583 | struct rb_node *node; | ||
584 | struct btrfs_free_space *next; | ||
585 | struct btrfs_free_space *last; | ||
586 | u64 min_bytes; | ||
587 | u64 window_start; | ||
588 | u64 window_free; | ||
589 | u64 max_extent = 0; | ||
590 | int total_retries = 0; | ||
591 | int ret; | ||
592 | |||
593 | /* for metadata, allow allocates with more holes */ | ||
594 | if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
595 | /* | ||
596 | * we want to do larger allocations when we are | ||
597 | * flushing out the delayed refs, it helps prevent | ||
598 | * making more work as we go along. | ||
599 | */ | ||
600 | if (trans->transaction->delayed_refs.flushing) | ||
601 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
602 | else | ||
603 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
604 | } else | ||
605 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
606 | |||
607 | spin_lock(&block_group->tree_lock); | ||
608 | spin_lock(&cluster->lock); | ||
609 | |||
610 | /* someone already found a cluster, hooray */ | ||
611 | if (cluster->block_group) { | ||
612 | ret = 0; | ||
613 | goto out; | ||
614 | } | ||
615 | again: | ||
616 | min_bytes = min(min_bytes, bytes + empty_size); | ||
617 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
618 | offset, min_bytes); | ||
619 | if (!entry) { | ||
620 | ret = -ENOSPC; | ||
621 | goto out; | ||
622 | } | ||
623 | window_start = entry->offset; | ||
624 | window_free = entry->bytes; | ||
625 | last = entry; | ||
626 | max_extent = entry->bytes; | ||
627 | |||
628 | while(1) { | ||
629 | /* out window is just right, lets fill it */ | ||
630 | if (window_free >= bytes + empty_size) | ||
631 | break; | ||
632 | |||
633 | node = rb_next(&last->offset_index); | ||
634 | if (!node) { | ||
635 | ret = -ENOSPC; | ||
636 | goto out; | ||
637 | } | ||
638 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
639 | |||
640 | /* | ||
641 | * we haven't filled the empty size and the window is | ||
642 | * very large. reset and try again | ||
643 | */ | ||
644 | if (next->offset - window_start > (bytes + empty_size) * 2) { | ||
645 | entry = next; | ||
646 | window_start = entry->offset; | ||
647 | window_free = entry->bytes; | ||
648 | last = entry; | ||
649 | max_extent = 0; | ||
650 | total_retries++; | ||
651 | if (total_retries % 256 == 0) { | ||
652 | if (min_bytes >= (bytes + empty_size)) { | ||
653 | ret = -ENOSPC; | ||
654 | goto out; | ||
655 | } | ||
656 | /* | ||
657 | * grow our allocation a bit, we're not having | ||
658 | * much luck | ||
659 | */ | ||
660 | min_bytes *= 2; | ||
661 | goto again; | ||
662 | } | ||
663 | } else { | ||
664 | last = next; | ||
665 | window_free += next->bytes; | ||
666 | if (entry->bytes > max_extent) | ||
667 | max_extent = entry->bytes; | ||
668 | } | ||
669 | } | ||
670 | |||
671 | cluster->window_start = entry->offset; | ||
672 | |||
673 | /* | ||
674 | * now we've found our entries, pull them out of the free space | ||
675 | * cache and put them into the cluster rbtree | ||
676 | * | ||
677 | * The cluster includes an rbtree, but only uses the offset index | ||
678 | * of each free space cache entry. | ||
679 | */ | ||
680 | while(1) { | ||
681 | node = rb_next(&entry->offset_index); | ||
682 | unlink_free_space(block_group, entry); | ||
683 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
684 | &entry->offset_index); | ||
685 | BUG_ON(ret); | ||
686 | |||
687 | if (!node || entry == last) | ||
688 | break; | ||
689 | |||
690 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
691 | } | ||
692 | ret = 0; | ||
693 | cluster->max_size = max_extent; | ||
694 | atomic_inc(&block_group->count); | ||
695 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | ||
696 | cluster->block_group = block_group; | ||
697 | out: | ||
698 | spin_unlock(&cluster->lock); | ||
699 | spin_unlock(&block_group->tree_lock); | ||
700 | |||
701 | return ret; | ||
702 | } | ||
703 | |||
704 | /* | ||
705 | * simple code to zero out a cluster | ||
706 | */ | ||
707 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | ||
708 | { | ||
709 | spin_lock_init(&cluster->lock); | ||
710 | spin_lock_init(&cluster->refill_lock); | ||
711 | cluster->root.rb_node = NULL; | ||
712 | cluster->max_size = 0; | ||
713 | INIT_LIST_HEAD(&cluster->block_group_list); | ||
714 | cluster->block_group = NULL; | ||
715 | } | ||
716 | |||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h new file mode 100644 index 000000000000..ab0bdc0a63ce --- /dev/null +++ b/fs/btrfs/free-space-cache.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_FREE_SPACE_CACHE | ||
20 | #define __BTRFS_FREE_SPACE_CACHE | ||
21 | |||
22 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
23 | u64 bytenr, u64 size); | ||
24 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
25 | u64 bytenr, u64 size); | ||
26 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
27 | *block_group); | ||
28 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | ||
29 | u64 offset, u64 bytes, u64 empty_size); | ||
30 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
31 | u64 bytes); | ||
32 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
33 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
34 | struct btrfs_block_group_cache *block_group, | ||
35 | struct btrfs_free_cluster *cluster, | ||
36 | u64 offset, u64 bytes, u64 empty_size); | ||
37 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); | ||
38 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
39 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
40 | u64 min_start); | ||
41 | int btrfs_return_cluster_to_free_space( | ||
42 | struct btrfs_block_group_cache *block_group, | ||
43 | struct btrfs_free_cluster *cluster); | ||
44 | #endif | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 664782c6a2df..3e8225de4e9d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -53,8 +53,6 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
53 | GFP_NOFS); | 53 | GFP_NOFS); |
54 | BUG_ON(!cur_trans); | 54 | BUG_ON(!cur_trans); |
55 | root->fs_info->generation++; | 55 | root->fs_info->generation++; |
56 | root->fs_info->last_alloc = 0; | ||
57 | root->fs_info->last_data_alloc = 0; | ||
58 | cur_trans->num_writers = 1; | 56 | cur_trans->num_writers = 1; |
59 | cur_trans->num_joined = 0; | 57 | cur_trans->num_joined = 0; |
60 | cur_trans->transid = root->fs_info->generation; | 58 | cur_trans->transid = root->fs_info->generation; |