aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-06-25 16:01:31 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commit3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 (patch)
treedf9421e7b1d0c06d5efb8659f4317438d3d511d7
parent1b1e2135dc1e4efbcf25ac9ac9979316d4e1193e (diff)
Btrfs: Online btree defragmentation fixes
The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c170
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c61
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/transaction.c35
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-defrag.c36
9 files changed, 190 insertions, 129 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7f4cc2b88d09..0cb80f32a9c7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -63,10 +63,9 @@ void btrfs_free_path(struct btrfs_path *p)
63void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 63void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64{ 64{
65 int i; 65 int i;
66 int keep = p->keep_locks;
67 int skip = p->skip_locking;
68 66
69 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 67 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
68 p->slots[i] = 0;
70 if (!p->nodes[i]) 69 if (!p->nodes[i])
71 continue; 70 continue;
72 if (p->locks[i]) { 71 if (p->locks[i]) {
@@ -74,10 +73,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
74 p->locks[i] = 0; 73 p->locks[i] = 0;
75 } 74 }
76 free_extent_buffer(p->nodes[i]); 75 free_extent_buffer(p->nodes[i]);
76 p->nodes[i] = NULL;
77 } 77 }
78 memset(p, 0, sizeof(*p));
79 p->keep_locks = keep;
80 p->skip_locking = skip;
81} 78}
82 79
83struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 80struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
@@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
463 search_start = cur->start; 460 search_start = cur->start;
464 last_block = cur->start; 461 last_block = cur->start;
465 *last_ret = search_start; 462 *last_ret = search_start;
466 if (parent_level == 1)
467 btrfs_clear_buffer_defrag(cur);
468 btrfs_tree_unlock(cur); 463 btrfs_tree_unlock(cur);
469 free_extent_buffer(cur); 464 free_extent_buffer(cur);
470 } 465 }
@@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2969 return 1; 2964 return 1;
2970} 2965}
2971 2966
2967/*
2968 * A helper function to walk down the tree starting at min_key, and looking
2969 * for nodes or leaves that are either in cache or have a minimum
2970 * transaction id. This is used by the btree defrag code, but could
2971 * also be used to search for blocks that have changed since a given
2972 * transaction id.
2973 *
2974 * This does not cow, but it does stuff the starting key it finds back
2975 * into min_key, so you can call btrfs_search_slot with cow=1 on the
2976 * key and get a writable path.
2977 *
2978 * This does lock as it descends, and path->keep_locks should be set
2979 * to 1 by the caller.
2980 *
2981 * This honors path->lowest_level to prevent descent past a given level
2982 * of the tree.
2983 *
2984 * returns zero if something useful was found, < 0 on error and 1 if there
2985 * was nothing in the tree that matched the search criteria.
2986 */
2987int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
2988 struct btrfs_path *path, int cache_only,
2989 u64 min_trans)
2990{
2991 struct extent_buffer *cur;
2992 struct btrfs_key found_key;
2993 int slot;
2994 u32 nritems;
2995 int level;
2996 int ret = 1;
2997
2998again:
2999 cur = btrfs_lock_root_node(root);
3000 level = btrfs_header_level(cur);
3001 path->nodes[level] = cur;
3002 path->locks[level] = 1;
3003
3004 if (btrfs_header_generation(cur) < min_trans) {
3005 ret = 1;
3006 goto out;
3007 }
3008 while(1) {
3009 nritems = btrfs_header_nritems(cur);
3010 level = btrfs_header_level(cur);
3011 bin_search(cur, min_key, level, &slot);
3012
3013 /* at level = 0, we're done, setup the path and exit */
3014 if (level == 0) {
3015 ret = 0;
3016 path->slots[level] = slot;
3017 btrfs_item_key_to_cpu(cur, &found_key, slot);
3018 goto out;
3019 }
3020 /*
3021 * check this node pointer against the cache_only and
3022 * min_trans parameters. If it isn't in cache or is too
3023 * old, skip to the next one.
3024 */
3025 while(slot < nritems) {
3026 u64 blockptr;
3027 u64 gen;
3028 struct extent_buffer *tmp;
3029 blockptr = btrfs_node_blockptr(cur, slot);
3030 gen = btrfs_node_ptr_generation(cur, slot);
3031 if (gen < min_trans) {
3032 slot++;
3033 continue;
3034 }
3035 if (!cache_only)
3036 break;
3037
3038 tmp = btrfs_find_tree_block(root, blockptr,
3039 btrfs_level_size(root, level - 1));
3040
3041 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3042 free_extent_buffer(tmp);
3043 break;
3044 }
3045 if (tmp)
3046 free_extent_buffer(tmp);
3047 slot++;
3048 }
3049 /*
3050 * we didn't find a candidate key in this node, walk forward
3051 * and find another one
3052 */
3053 if (slot >= nritems) {
3054 ret = btrfs_find_next_key(root, path, min_key, level,
3055 cache_only, min_trans);
3056 if (ret == 0) {
3057 btrfs_release_path(root, path);
3058 goto again;
3059 } else {
3060 goto out;
3061 }
3062 }
3063 /* save our key for returning back */
3064 btrfs_node_key_to_cpu(cur, &found_key, slot);
3065 path->slots[level] = slot;
3066 if (level == path->lowest_level) {
3067 ret = 0;
3068 unlock_up(path, level, 1);
3069 goto out;
3070 }
3071 cur = read_node_slot(root, cur, slot);
3072
3073 btrfs_tree_lock(cur);
3074 path->locks[level - 1] = 1;
3075 path->nodes[level - 1] = cur;
3076 unlock_up(path, level, 1);
3077 }
3078out:
3079 if (ret == 0)
3080 memcpy(min_key, &found_key, sizeof(found_key));
3081 return ret;
3082}
3083
3084/*
3085 * this is similar to btrfs_next_leaf, but does not try to preserve
3086 * and fixup the path. It looks for and returns the next key in the
3087 * tree based on the current path and the cache_only and min_trans
3088 * parameters.
3089 *
3090 * 0 is returned if another key is found, < 0 if there are any errors
3091 * and 1 is returned if there are no higher keys in the tree
3092 *
3093 * path->keep_locks should be set to 1 on the search made before
3094 * calling this function.
3095 */
2972int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 3096int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
2973 struct btrfs_key *key, int lowest_level) 3097 struct btrfs_key *key, int lowest_level,
3098 int cache_only, u64 min_trans)
2974{ 3099{
2975 int level = lowest_level; 3100 int level = lowest_level;
2976 int slot; 3101 int slot;
@@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
2982 3107
2983 slot = path->slots[level] + 1; 3108 slot = path->slots[level] + 1;
2984 c = path->nodes[level]; 3109 c = path->nodes[level];
3110next:
2985 if (slot >= btrfs_header_nritems(c)) { 3111 if (slot >= btrfs_header_nritems(c)) {
2986 level++; 3112 level++;
2987 if (level == BTRFS_MAX_LEVEL) { 3113 if (level == BTRFS_MAX_LEVEL) {
@@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
2991 } 3117 }
2992 if (level == 0) 3118 if (level == 0)
2993 btrfs_item_key_to_cpu(c, key, slot); 3119 btrfs_item_key_to_cpu(c, key, slot);
2994 else 3120 else {
3121 u64 blockptr = btrfs_node_blockptr(c, slot);
3122 u64 gen = btrfs_node_ptr_generation(c, slot);
3123
3124 if (cache_only) {
3125 struct extent_buffer *cur;
3126 cur = btrfs_find_tree_block(root, blockptr,
3127 btrfs_level_size(root, level - 1));
3128 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
3129 slot++;
3130 if (cur)
3131 free_extent_buffer(cur);
3132 goto next;
3133 }
3134 free_extent_buffer(cur);
3135 }
3136 if (gen < min_trans) {
3137 slot++;
3138 goto next;
3139 }
2995 btrfs_node_key_to_cpu(c, key, slot); 3140 btrfs_node_key_to_cpu(c, key, slot);
3141 }
2996 return 0; 3142 return 0;
2997 } 3143 }
2998 return 1; 3144 return 1;
@@ -3095,6 +3241,12 @@ done:
3095 return 0; 3241 return 0;
3096} 3242}
3097 3243
3244/*
3245 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
3246 * searching until it gets past min_objectid or finds an item of 'type'
3247 *
3248 * returns 0 if something is found, 1 if nothing was found and < 0 on error
3249 */
3098int btrfs_previous_item(struct btrfs_root *root, 3250int btrfs_previous_item(struct btrfs_root *root,
3099 struct btrfs_path *path, u64 min_objectid, 3251 struct btrfs_path *path, u64 min_objectid,
3100 int type) 3252 int type)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ca8e6f15859e..a28796482b4a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -609,6 +609,7 @@ struct btrfs_root {
609 u64 last_inode_alloc; 609 u64 last_inode_alloc;
610 int ref_cows; 610 int ref_cows;
611 int track_dirty; 611 int track_dirty;
612 u64 defrag_trans_start;
612 struct btrfs_key defrag_progress; 613 struct btrfs_key defrag_progress;
613 struct btrfs_key defrag_max; 614 struct btrfs_key defrag_max;
614 int defrag_running; 615 int defrag_running;
@@ -1412,7 +1413,11 @@ int btrfs_previous_item(struct btrfs_root *root,
1412struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 1413struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
1413struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 1414struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
1414int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 1415int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
1415 struct btrfs_key *key, int lowest_level); 1416 struct btrfs_key *key, int lowest_level,
1417 int cache_only, u64 min_trans);
1418int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
1419 struct btrfs_path *path, int cache_only,
1420 u64 min_trans);
1416 1421
1417int btrfs_cow_block(struct btrfs_trans_handle *trans, 1422int btrfs_cow_block(struct btrfs_trans_handle *trans,
1418 struct btrfs_root *root, struct extent_buffer *buf, 1423 struct btrfs_root *root, struct extent_buffer *buf,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 31ca9f89388d..4cdc0b6a2672 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
295 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 295 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
296 btrfs_header_generation(eb)); 296 btrfs_header_generation(eb));
297 BUG_ON(ret); 297 BUG_ON(ret);
298 btrfs_clear_buffer_defrag(eb);
299 found_start = btrfs_header_bytenr(eb); 298 found_start = btrfs_header_bytenr(eb);
300 if (found_start != start) { 299 if (found_start != start) {
301 printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", 300 printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
@@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
355 } 354 }
356 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 355 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
357 356
358 btrfs_clear_buffer_defrag(eb);
359 found_start = btrfs_header_bytenr(eb); 357 found_start = btrfs_header_bytenr(eb);
360 if (found_start != start) { 358 if (found_start != start) {
361 ret = -EIO; 359 ret = -EIO;
@@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
736 memset(&root->root_item, 0, sizeof(root->root_item)); 734 memset(&root->root_item, 0, sizeof(root->root_item));
737 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 735 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
738 memset(&root->root_kobj, 0, sizeof(root->root_kobj)); 736 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
737 root->defrag_trans_start = fs_info->generation;
739 init_completion(&root->kobj_unregister); 738 init_completion(&root->kobj_unregister);
740 root->defrag_running = 0; 739 root->defrag_running = 0;
741 root->defrag_level = 0; 740 root->defrag_level = 0;
@@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg)
1168 goto sleep; 1167 goto sleep;
1169 } 1168 }
1170 mutex_unlock(&root->fs_info->trans_mutex); 1169 mutex_unlock(&root->fs_info->trans_mutex);
1171 btrfs_defrag_dirty_roots(root->fs_info);
1172 trans = btrfs_start_transaction(root, 1); 1170 trans = btrfs_start_transaction(root, 1);
1173 ret = btrfs_commit_transaction(trans, root); 1171 ret = btrfs_commit_transaction(trans, root);
1174sleep: 1172sleep:
@@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1434 tree_root, 1432 tree_root,
1435 "btrfs-transaction"); 1433 "btrfs-transaction");
1436 if (!fs_info->transaction_kthread) 1434 if (!fs_info->transaction_kthread)
1437 goto fail_trans_kthread; 1435 goto fail_cleaner;
1438 1436
1439 1437
1440 return tree_root; 1438 return tree_root;
1441 1439
1442fail_trans_kthread: 1440fail_cleaner:
1443 kthread_stop(fs_info->cleaner_kthread); 1441 kthread_stop(fs_info->cleaner_kthread);
1444fail_extent_root: 1442fail_extent_root:
1445 free_extent_buffer(extent_root->node); 1443 free_extent_buffer(extent_root->node);
@@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root)
1662 kthread_stop(root->fs_info->transaction_kthread); 1660 kthread_stop(root->fs_info->transaction_kthread);
1663 kthread_stop(root->fs_info->cleaner_kthread); 1661 kthread_stop(root->fs_info->cleaner_kthread);
1664 1662
1665 btrfs_defrag_dirty_roots(root->fs_info);
1666 btrfs_clean_old_snapshots(root); 1663 btrfs_clean_old_snapshots(root);
1667 trans = btrfs_start_transaction(root, 1); 1664 trans = btrfs_start_transaction(root, 1);
1668 ret = btrfs_commit_transaction(trans, root); 1665 ret = btrfs_commit_transaction(trans, root);
@@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
1794 return; 1791 return;
1795} 1792}
1796 1793
1797void btrfs_set_buffer_defrag(struct extent_buffer *buf)
1798{
1799 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1800 struct inode *btree_inode = root->fs_info->btree_inode;
1801 set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
1802 buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
1803}
1804
1805void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
1806{
1807 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1808 struct inode *btree_inode = root->fs_info->btree_inode;
1809 set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
1810 buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
1811 GFP_NOFS);
1812}
1813
1814int btrfs_buffer_defrag(struct extent_buffer *buf)
1815{
1816 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1817 struct inode *btree_inode = root->fs_info->btree_inode;
1818 return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
1819 buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
1820}
1821
1822int btrfs_buffer_defrag_done(struct extent_buffer *buf)
1823{
1824 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1825 struct inode *btree_inode = root->fs_info->btree_inode;
1826 return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
1827 buf->start, buf->start + buf->len - 1,
1828 EXTENT_DEFRAG_DONE, 0);
1829}
1830
1831int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
1832{
1833 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1834 struct inode *btree_inode = root->fs_info->btree_inode;
1835 return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
1836 buf->start, buf->start + buf->len - 1,
1837 EXTENT_DEFRAG_DONE, GFP_NOFS);
1838}
1839
1840int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
1841{
1842 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1843 struct inode *btree_inode = root->fs_info->btree_inode;
1844 return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
1845 buf->start, buf->start + buf->len - 1,
1846 EXTENT_DEFRAG, GFP_NOFS);
1847}
1848
1849int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 1794int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
1850{ 1795{
1851 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 1796 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index deff6b4815a7..353c3c50c957 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,12 +61,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
61int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 61int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
62int wait_on_tree_block_writeback(struct btrfs_root *root, 62int wait_on_tree_block_writeback(struct btrfs_root *root,
63 struct extent_buffer *buf); 63 struct extent_buffer *buf);
64void btrfs_set_buffer_defrag(struct extent_buffer *buf);
65void btrfs_set_buffer_defrag_done(struct extent_buffer *buf);
66int btrfs_buffer_defrag(struct extent_buffer *buf);
67int btrfs_buffer_defrag_done(struct extent_buffer *buf);
68int btrfs_clear_buffer_defrag(struct extent_buffer *buf);
69int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf);
70int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 64int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
71u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 65u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
72void btrfs_csum_final(u32 crc, char *result); 66void btrfs_csum_final(u32 crc, char *result);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dc3c03c6612d..5e0857ffbc35 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2095,8 +2095,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2095 2095
2096 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 2096 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2097 buf->start + buf->len - 1, GFP_NOFS); 2097 buf->start + buf->len - 1, GFP_NOFS);
2098 if (!btrfs_test_opt(root, SSD))
2099 btrfs_set_buffer_defrag(buf);
2100 trans->blocks_used++; 2098 trans->blocks_used++;
2101 return buf; 2099 return buf;
2102} 2100}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 726d6871fa13..5e28cf5c2e85 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -365,7 +365,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
365 return 0; 365 return 0;
366 } 366 }
367 btrfs_clean_old_snapshots(root); 367 btrfs_clean_old_snapshots(root);
368 btrfs_defrag_dirty_roots(root->fs_info);
369 trans = btrfs_start_transaction(root, 1); 368 trans = btrfs_start_transaction(root, 1);
370 ret = btrfs_commit_transaction(trans, root); 369 ret = btrfs_commit_transaction(trans, root);
371 sb->s_dirt = 0; 370 sb->s_dirt = 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8e909cb97c6d..98f422d9ab07 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -30,7 +30,6 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
30extern struct kmem_cache *btrfs_transaction_cachep; 30extern struct kmem_cache *btrfs_transaction_cachep;
31 31
32#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
33#define BTRFS_ROOT_DEFRAG_TAG 1
34 33
35static noinline void put_transaction(struct btrfs_transaction *transaction) 34static noinline void put_transaction(struct btrfs_transaction *transaction)
36{ 35{
@@ -92,9 +91,6 @@ static noinline int record_root_in_trans(struct btrfs_root *root)
92 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 91 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
93 (unsigned long)root->root_key.objectid, 92 (unsigned long)root->root_key.objectid,
94 BTRFS_ROOT_TRANS_TAG); 93 BTRFS_ROOT_TRANS_TAG);
95 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
96 (unsigned long)root->root_key.objectid,
97 BTRFS_ROOT_DEFRAG_TAG);
98 root->commit_root = btrfs_root_node(root); 94 root->commit_root = btrfs_root_node(root);
99 } else { 95 } else {
100 WARN_ON(1); 96 WARN_ON(1);
@@ -403,44 +399,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
403 cond_resched(); 399 cond_resched();
404 400
405 trans = btrfs_start_transaction(root, 1); 401 trans = btrfs_start_transaction(root, 1);
406 if (ret != -EAGAIN) 402 if (root->fs_info->closing || ret != -EAGAIN)
407 break; 403 break;
408 } 404 }
409 root->defrag_running = 0; 405 root->defrag_running = 0;
410 smp_mb(); 406 smp_mb();
411 radix_tree_tag_clear(&info->fs_roots_radix,
412 (unsigned long)root->root_key.objectid,
413 BTRFS_ROOT_DEFRAG_TAG);
414 btrfs_end_transaction(trans, root); 407 btrfs_end_transaction(trans, root);
415 return 0; 408 return 0;
416} 409}
417 410
418int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
419{
420 struct btrfs_root *gang[1];
421 struct btrfs_root *root;
422 int i;
423 int ret;
424 int err = 0;
425 u64 last = 0;
426
427 while(1) {
428 ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
429 (void **)gang, last,
430 ARRAY_SIZE(gang),
431 BTRFS_ROOT_DEFRAG_TAG);
432 if (ret == 0)
433 break;
434 for (i = 0; i < ret; i++) {
435 root = gang[i];
436 last = root->root_key.objectid + 1;
437 btrfs_defrag_root(root, 1);
438 }
439 }
440 btrfs_defrag_root(info->extent_root, 1);
441 return err;
442}
443
444static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 411static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
445 struct list_head *list) 412 struct list_head *list)
446{ 413{
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e1e5a06b65f4..9ccd5a5b170f 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -84,7 +84,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
84 84
85int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, 85int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest,
86 struct list_head *dead_list); 86 struct list_head *dead_list);
87int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
88int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 87int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
89int btrfs_clean_old_snapshots(struct btrfs_root *root); 88int btrfs_clean_old_snapshots(struct btrfs_root *root);
90int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 89int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index b17693f61fbc..cc2650b06952 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -32,10 +32,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
32 int wret; 32 int wret;
33 int level; 33 int level;
34 int orig_level; 34 int orig_level;
35 int i;
36 int is_extent = 0; 35 int is_extent = 0;
37 int next_key_ret = 0; 36 int next_key_ret = 0;
38 u64 last_ret = 0; 37 u64 last_ret = 0;
38 u64 min_trans = 0;
39
40 if (cache_only)
41 goto out;
39 42
40 if (root->fs_info->extent_root == root) { 43 if (root->fs_info->extent_root == root) {
41 /* 44 /*
@@ -43,10 +46,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
43 * we can't defrag the extent root without deadlock 46 * we can't defrag the extent root without deadlock
44 */ 47 */
45 goto out; 48 goto out;
46#if 0
47 mutex_lock(&root->fs_info->alloc_mutex);
48 is_extent = 1;
49#endif
50 } 49 }
51 50
52 if (root->ref_cows == 0 && !is_extent) 51 if (root->ref_cows == 0 && !is_extent)
@@ -84,6 +83,17 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
84 83
85 path->lowest_level = 1; 84 path->lowest_level = 1;
86 path->keep_locks = 1; 85 path->keep_locks = 1;
86 if (cache_only)
87 min_trans = root->defrag_trans_start;
88
89 ret = btrfs_search_forward(root, &key, path, cache_only, min_trans);
90 if (ret < 0)
91 goto out;
92 if (ret > 0) {
93 ret = 0;
94 goto out;
95 }
96 btrfs_release_path(root, path);
87 wret = btrfs_search_slot(trans, root, &key, path, 0, 1); 97 wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
88 98
89 if (wret < 0) { 99 if (wret < 0) {
@@ -95,7 +105,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
95 goto out; 105 goto out;
96 } 106 }
97 path->slots[1] = btrfs_header_nritems(path->nodes[1]); 107 path->slots[1] = btrfs_header_nritems(path->nodes[1]);
98 next_key_ret = btrfs_find_next_key(root, path, &key, 1); 108 next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
109 min_trans);
99 ret = btrfs_realloc_node(trans, root, 110 ret = btrfs_realloc_node(trans, root,
100 path->nodes[1], 0, 111 path->nodes[1], 0,
101 cache_only, &last_ret, 112 cache_only, &last_ret,
@@ -106,19 +117,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
106 ret = -EAGAIN; 117 ret = -EAGAIN;
107 } 118 }
108 119
109 for (i = 1; i < BTRFS_MAX_LEVEL; i++) { 120 btrfs_release_path(root, path);
110 if (path->locks[i]) {
111 btrfs_tree_unlock(path->nodes[i]);
112 path->locks[i] = 0;
113 }
114 if (path->nodes[i]) {
115 free_extent_buffer(path->nodes[i]);
116 path->nodes[i] = NULL;
117 }
118 }
119 if (is_extent) 121 if (is_extent)
120 btrfs_extent_post_op(trans, root); 122 btrfs_extent_post_op(trans, root);
121
122out: 123out:
123 if (is_extent) 124 if (is_extent)
124 mutex_unlock(&root->fs_info->alloc_mutex); 125 mutex_unlock(&root->fs_info->alloc_mutex);
@@ -138,6 +139,7 @@ done:
138 if (ret != -EAGAIN) { 139 if (ret != -EAGAIN) {
139 memset(&root->defrag_progress, 0, 140 memset(&root->defrag_progress, 0,
140 sizeof(root->defrag_progress)); 141 sizeof(root->defrag_progress));
142 root->defrag_trans_start = trans->transid;
141 } 143 }
142 return ret; 144 return ret;
143} 145}