aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-05-09 20:13:14 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-05-09 20:13:14 -0400
commite37c9e6921207cf503634b06bee37ecb7904408d (patch)
tree6f04d434ce9407b01b1fa36139ae8ffc7957fd1c
parent3e1ad54fe2839319c1aa66b954da0753f5b1f906 (diff)
Btrfs: many allocator fixes, pretty solid
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/TODO1
-rw-r--r--fs/btrfs/bit-radix.c10
-rw-r--r--fs/btrfs/bit-radix.h2
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c375
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/transaction.c3
8 files changed, 340 insertions, 59 deletions
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO
index 6a8c8cd03ca4..f6df246f26c3 100644
--- a/fs/btrfs/TODO
+++ b/fs/btrfs/TODO
@@ -7,6 +7,7 @@
7* Get rid of struct ctree_path, limiting tree levels held at one time 7* Get rid of struct ctree_path, limiting tree levels held at one time
8* Add generation number to key pointer in nodes 8* Add generation number to key pointer in nodes
9* Add generation number to inode 9* Add generation number to inode
10* Add ability to switch a block group from data to metadata or vice versa
10* Release 11* Release
11* Do real tree locking 12* Do real tree locking
12* Add extent mirroring (backup copies of blocks) 13* Add extent mirroring (backup copies of blocks)
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c
index 1a0271445dfb..8f9cd4277231 100644
--- a/fs/btrfs/bit-radix.c
+++ b/fs/btrfs/bit-radix.c
@@ -77,7 +77,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
77} 77}
78 78
79int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, 79int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
80 int nr) 80 unsigned long start, int nr)
81{ 81{
82 unsigned long *bits; 82 unsigned long *bits;
83 unsigned long *gang[4]; 83 unsigned long *gang[4];
@@ -85,10 +85,13 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
85 int ret; 85 int ret;
86 int i; 86 int i;
87 int total_found = 0; 87 int total_found = 0;
88 unsigned long slot;
88 89
89 ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); 90 slot = start / BIT_RADIX_BITS_PER_ARRAY;
91 ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
92 ARRAY_SIZE(gang));
93 found = start % BIT_RADIX_BITS_PER_ARRAY;
90 for (i = 0; i < ret && nr > 0; i++) { 94 for (i = 0; i < ret && nr > 0; i++) {
91 found = 0;
92 bits = gang[i]; 95 bits = gang[i];
93 while(nr > 0) { 96 while(nr > 0) {
94 found = find_next_bit(bits + 1, 97 found = find_next_bit(bits + 1,
@@ -104,6 +107,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
104 } else 107 } else
105 break; 108 break;
106 } 109 }
110 found = 0;
107 } 111 }
108 return total_found; 112 return total_found;
109} 113}
diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h
index 56aad4c7d7f7..4e717e30db4f 100644
--- a/fs/btrfs/bit-radix.h
+++ b/fs/btrfs/bit-radix.h
@@ -6,7 +6,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
6int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); 6int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
7int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); 7int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
8int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, 8int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
9 int nr); 9 unsigned long start, int nr);
10 10
11static inline void init_bit_radix(struct radix_tree_root *radix) 11static inline void init_bit_radix(struct radix_tree_root *radix)
12{ 12{
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cdb7c23c41f9..92a6078de827 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -259,7 +259,9 @@ struct btrfs_block_group_cache {
259 u64 first_free; 259 u64 first_free;
260 u64 last_alloc; 260 u64 last_alloc;
261 u64 pinned; 261 u64 pinned;
262 u64 last_prealloc;
262 int data; 263 int data;
264 int cached;
263}; 265};
264 266
265struct crypto_hash; 267struct crypto_hash;
@@ -273,6 +275,7 @@ struct btrfs_fs_info {
273 struct radix_tree_root dev_radix; 275 struct radix_tree_root dev_radix;
274 struct radix_tree_root block_group_radix; 276 struct radix_tree_root block_group_radix;
275 struct radix_tree_root block_group_data_radix; 277 struct radix_tree_root block_group_data_radix;
278 struct radix_tree_root extent_map_radix;
276 279
277 u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; 280 u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3];
278 int extent_tree_insert_nr; 281 int extent_tree_insert_nr;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7930458c227e..2dbf422a2b9a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -551,6 +551,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
551 551
552 init_bit_radix(&fs_info->pinned_radix); 552 init_bit_radix(&fs_info->pinned_radix);
553 init_bit_radix(&fs_info->pending_del_radix); 553 init_bit_radix(&fs_info->pending_del_radix);
554 init_bit_radix(&fs_info->extent_map_radix);
554 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); 555 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
555 INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); 556 INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS);
556 INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); 557 INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3edfc300289f..3ac9da453472 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -12,6 +12,97 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
12static int del_pending_extents(struct btrfs_trans_handle *trans, struct 12static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13 btrfs_root *extent_root); 13 btrfs_root *extent_root);
14 14
15static int cache_block_group(struct btrfs_root *root,
16 struct btrfs_block_group_cache *block_group)
17{
18 struct btrfs_path *path;
19 int ret;
20 struct btrfs_key key;
21 struct btrfs_leaf *leaf;
22 struct radix_tree_root *extent_radix;
23 int slot;
24 u64 i;
25 u64 last = 0;
26 u64 hole_size;
27 int found = 0;
28
29 root = root->fs_info->extent_root;
30 extent_radix = &root->fs_info->extent_map_radix;
31
32 if (block_group->cached)
33 return 0;
34 if (block_group->data)
35 return 0;
36 path = btrfs_alloc_path();
37 if (!path)
38 return -ENOMEM;
39printk("cache block group %Lu\n", block_group->key.objectid);
40 key.objectid = block_group->key.objectid;
41 key.flags = 0;
42 key.offset = 0;
43 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
44 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
45 if (ret < 0)
46 return ret;
47 if (ret && path->slots[0] > 0)
48 path->slots[0]--;
49 while(1) {
50 leaf = btrfs_buffer_leaf(path->nodes[0]);
51 slot = path->slots[0];
52 if (slot >= btrfs_header_nritems(&leaf->header)) {
53 ret = btrfs_next_leaf(root, path);
54 if (ret == 0)
55 continue;
56 else {
57 if (found) {
58 hole_size = block_group->key.objectid +
59 block_group->key.offset - last;
60 } else {
61 last = block_group->key.objectid;
62 hole_size = block_group->key.offset;
63 }
64 for (i = 0; i < hole_size; i++) {
65 set_radix_bit(extent_radix,
66 last + i);
67 }
68 break;
69 }
70 }
71 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
72 if (key.objectid >= block_group->key.objectid +
73 block_group->key.offset) {
74 if (found) {
75 hole_size = block_group->key.objectid +
76 block_group->key.offset - last;
77 } else {
78 last = block_group->key.objectid;
79 hole_size = block_group->key.offset;
80 }
81 for (i = 0; i < hole_size; i++) {
82 set_radix_bit(extent_radix, last + i);
83 }
84 break;
85 }
86 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
87 if (!found) {
88 last = key.objectid + key.offset;
89 found = 1;
90 } else {
91 hole_size = key.objectid - last;
92 for (i = 0; i < hole_size; i++) {
93 set_radix_bit(extent_radix, last + i);
94 }
95 last = key.objectid + key.offset;
96 }
97 }
98 path->slots[0]++;
99 }
100
101 block_group->cached = 1;
102 btrfs_free_path(path);
103 return 0;
104}
105
15static struct btrfs_block_group_cache *lookup_block_group(struct 106static struct btrfs_block_group_cache *lookup_block_group(struct
16 btrfs_fs_info *info, 107 btrfs_fs_info *info,
17 u64 blocknr) 108 u64 blocknr)
@@ -44,6 +135,63 @@ static struct btrfs_block_group_cache *lookup_block_group(struct
44 return NULL; 135 return NULL;
45} 136}
46 137
138static u64 leaf_range(struct btrfs_root *root)
139{
140 u64 size = BTRFS_LEAF_DATA_SIZE(root);
141 size = size / (sizeof(struct btrfs_extent_item) +
142 sizeof(struct btrfs_item));
143 return size;
144}
145
146static u64 find_search_start(struct btrfs_root *root,
147 struct btrfs_block_group_cache **cache_ret,
148 u64 search_start, int num)
149{
150 unsigned long gang[8];
151 int ret;
152 struct btrfs_block_group_cache *cache = *cache_ret;
153 u64 last = max(search_start, cache->key.objectid);
154
155 if (cache->data)
156 goto out;
157 if (num > 1) {
158 last = max(last, cache->last_prealloc);
159 }
160again:
161 cache_block_group(root, cache);
162 while(1) {
163 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
164 gang, last, ARRAY_SIZE(gang));
165 if (!ret)
166 goto out;
167 last = gang[ret-1] + 1;
168 if (num > 1) {
169 if (ret != ARRAY_SIZE(gang)) {
170 goto new_group;
171 }
172 if (gang[ret-1] - gang[0] > leaf_range(root)) {
173 continue;
174 }
175 }
176 if (gang[0] >= cache->key.objectid + cache->key.offset) {
177 goto new_group;
178 }
179 return gang[0];
180 }
181out:
182 return max(cache->last_alloc, search_start);
183
184new_group:
185 cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1);
186 if (!cache) {
187 return max((*cache_ret)->last_alloc, search_start);
188 }
189 cache = btrfs_find_block_group(root, cache,
190 last + cache->key.offset - 1, 0);
191 *cache_ret = cache;
192 goto again;
193}
194
47struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, 195struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
48 struct btrfs_block_group_cache 196 struct btrfs_block_group_cache
49 *hint, u64 search_start, 197 *hint, u64 search_start,
@@ -89,13 +237,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
89 } 237 }
90 last = hint->key.offset * 2; 238 last = hint->key.offset * 2;
91 if (hint->key.objectid >= last) 239 if (hint->key.objectid >= last)
92 last = max(search_start, hint->key.objectid - last); 240 last = max(search_start + hint->key.offset - 1,
241 hint->key.objectid - last);
93 else 242 else
94 last = hint->key.objectid + hint->key.offset; 243 last = hint->key.objectid + hint->key.offset;
95 hint_last = last; 244 hint_last = last;
96 } else { 245 } else {
97 hint_last = search_start; 246 if (hint)
98 last = search_start; 247 hint_last = max(hint->key.objectid, search_start);
248 else
249 hint_last = search_start;
250
251 last = hint_last;
99 } 252 }
100 while(1) { 253 while(1) {
101 ret = radix_tree_gang_lookup_tag(radix, (void **)cache, 254 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
@@ -357,13 +510,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
357 510
358static int update_block_group(struct btrfs_trans_handle *trans, 511static int update_block_group(struct btrfs_trans_handle *trans,
359 struct btrfs_root *root, 512 struct btrfs_root *root,
360 u64 blocknr, u64 num, int alloc) 513 u64 blocknr, u64 num, int alloc, int mark_free)
361{ 514{
362 struct btrfs_block_group_cache *cache; 515 struct btrfs_block_group_cache *cache;
363 struct btrfs_fs_info *info = root->fs_info; 516 struct btrfs_fs_info *info = root->fs_info;
364 u64 total = num; 517 u64 total = num;
365 u64 old_val; 518 u64 old_val;
366 u64 block_in_group; 519 u64 block_in_group;
520 u64 i;
367 521
368 while(total) { 522 while(total) {
369 cache = lookup_block_group(info, blocknr); 523 cache = lookup_block_group(info, blocknr);
@@ -380,18 +534,38 @@ static int update_block_group(struct btrfs_trans_handle *trans,
380 534
381 old_val = btrfs_block_group_used(&cache->item); 535 old_val = btrfs_block_group_used(&cache->item);
382 num = min(total, cache->key.offset - block_in_group); 536 num = min(total, cache->key.offset - block_in_group);
383 total -= num;
384 blocknr += num;
385 if (alloc) { 537 if (alloc) {
386 old_val += num; 538 old_val += num;
387 if (blocknr > cache->last_alloc) 539 if (blocknr > cache->last_alloc)
388 cache->last_alloc = blocknr; 540 cache->last_alloc = blocknr;
541 if (!cache->data) {
542 for (i = 0; i < num; i++) {
543 clear_radix_bit(&info->extent_map_radix,
544 blocknr + i);
545 }
546 }
389 } else { 547 } else {
390 old_val -= num; 548 old_val -= num;
391 if (blocknr < cache->first_free) 549 if (blocknr < cache->first_free)
392 cache->first_free = blocknr; 550 cache->first_free = blocknr;
551 if (!cache->data && mark_free) {
552 for (i = 0; i < num; i++) {
553 set_radix_bit(&info->extent_map_radix,
554 blocknr + i);
555 }
556 }
557 if (old_val < (cache->key.offset * 8) / 10 &&
558 old_val + num >= (cache->key.offset * 8) / 10) {
559printk("group %Lu now available\n", cache->key.objectid);
560 radix_tree_tag_set(cache->radix,
561 cache->key.objectid +
562 cache->key.offset - 1,
563 BTRFS_BLOCK_GROUP_AVAIL);
564 }
393 } 565 }
394 btrfs_set_block_group_used(&cache->item, old_val); 566 btrfs_set_block_group_used(&cache->item, old_val);
567 total -= num;
568 blocknr += num;
395 } 569 }
396 return 0; 570 return 0;
397} 571}
@@ -413,9 +587,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
413 int ret; 587 int ret;
414 int i; 588 int i;
415 struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; 589 struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
590 struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
416 591
417 while(1) { 592 while(1) {
418 ret = find_first_radix_bit(pinned_radix, gang, 593 ret = find_first_radix_bit(pinned_radix, gang, 0,
419 ARRAY_SIZE(gang)); 594 ARRAY_SIZE(gang));
420 if (!ret) 595 if (!ret)
421 break; 596 break;
@@ -430,6 +605,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
430 block_group->pinned--; 605 block_group->pinned--;
431 if (gang[i] < block_group->last_alloc) 606 if (gang[i] < block_group->last_alloc)
432 block_group->last_alloc = gang[i]; 607 block_group->last_alloc = gang[i];
608 if (gang[i] < block_group->last_prealloc)
609 block_group->last_prealloc = gang[i];
610 if (!block_group->data)
611 set_radix_bit(extent_radix, gang[i]);
433 } 612 }
434 try_remove_page(btree_inode->i_mapping, 613 try_remove_page(btree_inode->i_mapping,
435 gang[i] << (PAGE_CACHE_SHIFT - 614 gang[i] << (PAGE_CACHE_SHIFT -
@@ -508,7 +687,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
508 * remove an extent from the root, returns 0 on success 687 * remove an extent from the root, returns 0 on success
509 */ 688 */
510static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 689static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
511 *root, u64 blocknr, u64 num_blocks, int pin) 690 *root, u64 blocknr, u64 num_blocks, int pin,
691 int mark_free)
512{ 692{
513 struct btrfs_path *path; 693 struct btrfs_path *path;
514 struct btrfs_key key; 694 struct btrfs_key key;
@@ -556,10 +736,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
556 ret = btrfs_del_item(trans, extent_root, path); 736 ret = btrfs_del_item(trans, extent_root, path);
557 if (ret) 737 if (ret)
558 BUG(); 738 BUG();
559 ret = update_block_group(trans, root, blocknr, num_blocks, 0); 739 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
740 mark_free);
560 BUG_ON(ret); 741 BUG_ON(ret);
561 } 742 }
562 btrfs_release_path(extent_root, path);
563 btrfs_free_path(path); 743 btrfs_free_path(path);
564 finish_current_insert(trans, extent_root); 744 finish_current_insert(trans, extent_root);
565 return ret; 745 return ret;
@@ -585,7 +765,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
585 pinned_radix = &extent_root->fs_info->pinned_radix; 765 pinned_radix = &extent_root->fs_info->pinned_radix;
586 766
587 while(1) { 767 while(1) {
588 ret = find_first_radix_bit(pending_radix, gang, 768 ret = find_first_radix_bit(pending_radix, gang, 0,
589 ARRAY_SIZE(gang)); 769 ARRAY_SIZE(gang));
590 if (!ret) 770 if (!ret)
591 break; 771 break;
@@ -605,7 +785,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
605 wret = clear_radix_bit(pending_radix, gang[i]); 785 wret = clear_radix_bit(pending_radix, gang[i]);
606 BUG_ON(wret); 786 BUG_ON(wret);
607 wret = __free_extent(trans, extent_root, 787 wret = __free_extent(trans, extent_root,
608 gang[i], 1, 0); 788 gang[i], 1, 0, 0);
609 if (wret) 789 if (wret)
610 err = wret; 790 err = wret;
611 } 791 }
@@ -627,7 +807,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
627 pin_down_block(root, blocknr, 1); 807 pin_down_block(root, blocknr, 1);
628 return 0; 808 return 0;
629 } 809 }
630 ret = __free_extent(trans, root, blocknr, num_blocks, pin); 810 ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
631 pending_ret = del_pending_extents(trans, root->fs_info->extent_root); 811 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
632 return ret ? ret : pending_ret; 812 return ret ? ret : pending_ret;
633} 813}
@@ -688,18 +868,45 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
688check_failed: 868check_failed:
689 if (!full_scan && block_group->data != data) 869 if (!full_scan && block_group->data != data)
690 WARN_ON(1); 870 WARN_ON(1);
691 if (block_group->last_alloc > search_start) 871
692 search_start = block_group->last_alloc; 872 if (!data)
873 search_start = find_search_start(root, &block_group,
874 search_start, total_needed);
875 else
876 search_start = max(block_group->last_alloc, search_start);
877
693 btrfs_init_path(path); 878 btrfs_init_path(path);
694 ins->objectid = search_start; 879 ins->objectid = search_start;
695 ins->offset = 0; 880 ins->offset = 0;
696 start_found = 0; 881 start_found = 0;
882
697 ret = btrfs_search_slot(trans, root, ins, path, 0, 0); 883 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
698 if (ret < 0) 884 if (ret < 0)
699 goto error; 885 goto error;
700 886
701 if (path->slots[0] > 0) 887 if (path->slots[0] > 0) {
702 path->slots[0]--; 888 path->slots[0]--;
889 }
890
891 l = btrfs_buffer_leaf(path->nodes[0]);
892 btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
893 /*
894 * a rare case, go back one key if we hit a block group item
895 * instead of an extent item
896 */
897 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
898 key.objectid + key.offset >= search_start) {
899 ins->objectid = key.objectid;
900 ins->offset = key.offset - 1;
901 btrfs_release_path(root, path);
902 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
903 if (ret < 0)
904 goto error;
905
906 if (path->slots[0] > 0) {
907 path->slots[0]--;
908 }
909 }
703 910
704 while (1) { 911 while (1) {
705 l = btrfs_buffer_leaf(path->nodes[0]); 912 l = btrfs_buffer_leaf(path->nodes[0]);
@@ -725,21 +932,23 @@ check_failed:
725 ins->offset = search_end - ins->objectid; 932 ins->offset = search_end - ins->objectid;
726 goto check_pending; 933 goto check_pending;
727 } 934 }
935
728 btrfs_disk_key_to_cpu(&key, &l->items[slot].key); 936 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
729 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) 937 if (key.objectid >= search_start && key.objectid > last_block &&
730 goto next; 938 start_found) {
731 if (key.objectid >= search_start) { 939 if (last_block < search_start)
732 if (start_found) { 940 last_block = search_start;
733 if (last_block < search_start) 941 hole_size = key.objectid - last_block;
734 last_block = search_start; 942 if (hole_size >= num_blocks) {
735 hole_size = key.objectid - last_block; 943 ins->objectid = last_block;
736 if (hole_size >= num_blocks) { 944 ins->offset = hole_size;
737 ins->objectid = last_block; 945 goto check_pending;
738 ins->offset = hole_size;
739 goto check_pending;
740 }
741 } 946 }
742 } 947 }
948
949 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
950 goto next;
951
743 start_found = 1; 952 start_found = 1;
744 last_block = key.objectid + key.offset; 953 last_block = key.objectid + key.offset;
745 if (last_block >= block_group->key.objectid + 954 if (last_block >= block_group->key.objectid +
@@ -759,6 +968,7 @@ check_pending:
759 */ 968 */
760 btrfs_release_path(root, path); 969 btrfs_release_path(root, path);
761 BUG_ON(ins->objectid < search_start); 970 BUG_ON(ins->objectid < search_start);
971
762 if (ins->objectid + num_blocks >= search_end) { 972 if (ins->objectid + num_blocks >= search_end) {
763 if (full_scan) 973 if (full_scan)
764 return -ENOSPC; 974 return -ENOSPC;
@@ -780,7 +990,7 @@ check_pending:
780 info->extent_tree_insert[0] && 990 info->extent_tree_insert[0] &&
781 ins->objectid <= last) { 991 ins->objectid <= last) {
782 search_start = last + 1; 992 search_start = last + 1;
783 WARN_ON(1); 993 WARN_ON(!full_scan);
784 goto new_group; 994 goto new_group;
785 } 995 }
786 } 996 }
@@ -790,13 +1000,18 @@ check_pending:
790 if (ins->objectid + num_blocks > first && 1000 if (ins->objectid + num_blocks > first &&
791 ins->objectid <= info->extent_tree_prealloc[0]) { 1001 ins->objectid <= info->extent_tree_prealloc[0]) {
792 search_start = info->extent_tree_prealloc[0] + 1; 1002 search_start = info->extent_tree_prealloc[0] + 1;
793 WARN_ON(1); 1003 WARN_ON(!full_scan);
794 goto new_group; 1004 goto new_group;
795 } 1005 }
796 } 1006 }
797 if (fill_prealloc) { 1007 if (fill_prealloc) {
798 int nr; 1008 int nr;
799 test_block = ins->objectid; 1009 test_block = ins->objectid;
1010 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1011 leaf_range(root)) {
1012 total_found = 0;
1013 info->extent_tree_prealloc_nr = total_found;
1014 }
800 while(test_block < ins->objectid + ins->offset && 1015 while(test_block < ins->objectid + ins->offset &&
801 total_found < total_needed) { 1016 total_found < total_needed) {
802 nr = total_needed - total_found - 1; 1017 nr = total_needed - total_found - 1;
@@ -811,11 +1026,15 @@ check_pending:
811 } 1026 }
812 info->extent_tree_prealloc_nr = total_found; 1027 info->extent_tree_prealloc_nr = total_found;
813 } 1028 }
814 block_group = lookup_block_group(info, ins->objectid); 1029 if (!data) {
815 if (block_group) { 1030 block_group = lookup_block_group(info, ins->objectid);
816 block_group->last_alloc = ins->objectid; 1031 if (block_group) {
817 if (!data) 1032 if (fill_prealloc)
818 trans->block_group = block_group; 1033 block_group->last_prealloc =
1034 info->extent_tree_prealloc[total_needed-1];
1035 else
1036 trans->block_group = block_group;
1037 }
819 } 1038 }
820 ins->offset = num_blocks; 1039 ins->offset = num_blocks;
821 btrfs_free_path(path); 1040 btrfs_free_path(path);
@@ -824,6 +1043,7 @@ check_pending:
824new_group: 1043new_group:
825 if (search_start + num_blocks >= search_end) { 1044 if (search_start + num_blocks >= search_end) {
826 search_start = orig_search_start; 1045 search_start = orig_search_start;
1046printk("doing full scan!\n");
827 full_scan = 1; 1047 full_scan = 1;
828 } 1048 }
829 block_group = lookup_block_group(info, search_start); 1049 block_group = lookup_block_group(info, search_start);
@@ -871,26 +1091,57 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
871 info->extent_tree_insert[info->extent_tree_insert_nr++] = 1091 info->extent_tree_insert[info->extent_tree_insert_nr++] =
872 ins->objectid; 1092 ins->objectid;
873 ret = update_block_group(trans, root, 1093 ret = update_block_group(trans, root,
874 ins->objectid, ins->offset, 1); 1094 ins->objectid, ins->offset, 1, 0);
875 BUG_ON(ret); 1095 BUG_ON(ret);
876 return 0; 1096 return 0;
877 } 1097 }
1098
1099 /*
1100 * if we're doing a data allocation, preallocate room in the
1101 * extent tree first. This way the extent tree blocks end up
1102 * in the correct block group.
1103 */
1104 if (data) {
1105 ret = find_free_extent(trans, root, 0, search_start,
1106 search_end, &prealloc_key, 0);
1107 if (ret) {
1108 return ret;
1109 }
1110 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1111 int nr = info->extent_tree_prealloc_nr;
1112 search_end = info->extent_tree_prealloc[nr - 1] - 1;
1113 } else {
1114 search_start = info->extent_tree_prealloc[0] + 1;
1115 }
1116 }
878 /* do the real allocation */ 1117 /* do the real allocation */
879 ret = find_free_extent(trans, root, num_blocks, search_start, 1118 ret = find_free_extent(trans, root, num_blocks, search_start,
880 search_end, ins, data); 1119 search_end, ins, data);
881 if (ret) 1120 if (ret) {
882 return ret; 1121 return ret;
1122 }
883 1123
884 /* then do prealloc for the extent tree */ 1124 /*
885 if (ins->objectid + ins->offset >= search_end) 1125 * if we're doing a metadata allocation, preallocate space in the
886 search_end = ins->objectid - 1; 1126 * extent tree second. This way, we don't create a tiny hole
887 else 1127 * in the allocation map between any unused preallocation blocks
888 search_start = ins->objectid + ins->offset; 1128 * and the metadata block we're actually allocating. On disk,
1129 * it'll go:
1130 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1131 * The unused prealloc will get reused the next time around.
1132 */
1133 if (!data) {
1134 if (ins->objectid + ins->offset >= search_end)
1135 search_end = ins->objectid - 1;
1136 else
1137 search_start = ins->objectid + ins->offset;
889 1138
890 ret = find_free_extent(trans, root, 0, search_start, 1139 ret = find_free_extent(trans, root, 0, search_start,
891 search_end, &prealloc_key, 0); 1140 search_end, &prealloc_key, 0);
892 if (ret) 1141 if (ret) {
893 return ret; 1142 return ret;
1143 }
1144 }
894 1145
895 super_blocks_used = btrfs_super_blocks_used(info->disk_super); 1146 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
896 btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + 1147 btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
@@ -900,11 +1151,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
900 1151
901 finish_current_insert(trans, extent_root); 1152 finish_current_insert(trans, extent_root);
902 pending_ret = del_pending_extents(trans, extent_root); 1153 pending_ret = del_pending_extents(trans, extent_root);
903 if (ret) 1154 if (ret) {
904 return ret; 1155 return ret;
905 if (pending_ret) 1156 }
1157 if (pending_ret) {
906 return pending_ret; 1158 return pending_ret;
907 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 1159 }
1160 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
908 return 0; 1161 return 0;
909} 1162}
910 1163
@@ -920,7 +1173,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
920 struct buffer_head *buf; 1173 struct buffer_head *buf;
921 1174
922 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1175 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
923 1, hint, (unsigned long)-1, &ins, 0); 1176 1, 0, (unsigned long)-1, &ins, 0);
924 if (ret) { 1177 if (ret) {
925 BUG(); 1178 BUG();
926 return NULL; 1179 return NULL;
@@ -1134,6 +1387,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
1134{ 1387{
1135 int ret; 1388 int ret;
1136 int ret2; 1389 int ret2;
1390 unsigned long gang[16];
1391 int i;
1137 1392
1138 ret = free_block_group_radix(&info->block_group_radix); 1393 ret = free_block_group_radix(&info->block_group_radix);
1139 ret2 = free_block_group_radix(&info->block_group_data_radix); 1394 ret2 = free_block_group_radix(&info->block_group_data_radix);
@@ -1141,6 +1396,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
1141 return ret; 1396 return ret;
1142 if (ret2) 1397 if (ret2)
1143 return ret2; 1398 return ret2;
1399
1400 while(1) {
1401 ret = find_first_radix_bit(&info->extent_map_radix,
1402 gang, 0, ARRAY_SIZE(gang));
1403 if (!ret)
1404 break;
1405 for (i = 0; i < ret; i++) {
1406 clear_radix_bit(&info->extent_map_radix, gang[i]);
1407 }
1408 }
1144 return 0; 1409 return 0;
1145} 1410}
1146 1411
@@ -1186,7 +1451,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
1186 break; 1451 break;
1187 } 1452 }
1188 1453
1189 if (nr & 1) 1454 if (nr % 3)
1190 radix = &info->block_group_data_radix; 1455 radix = &info->block_group_data_radix;
1191 else 1456 else
1192 radix = &info->block_group_radix; 1457 radix = &info->block_group_radix;
@@ -1197,8 +1462,14 @@ int btrfs_read_block_groups(struct btrfs_root *root)
1197 memcpy(&cache->key, &found_key, sizeof(found_key)); 1462 memcpy(&cache->key, &found_key, sizeof(found_key));
1198 cache->last_alloc = cache->key.objectid; 1463 cache->last_alloc = cache->key.objectid;
1199 cache->first_free = cache->key.objectid; 1464 cache->first_free = cache->key.objectid;
1465 cache->last_prealloc = cache->key.objectid;
1200 cache->pinned = 0; 1466 cache->pinned = 0;
1201 cache->data = (nr & 1); 1467 cache->cached = 0;
1468
1469 if (nr % 3)
1470 cache->data = 1;
1471 else
1472 cache->data = 0;
1202 cache->radix = radix; 1473 cache->radix = radix;
1203 1474
1204 key.objectid = found_key.objectid + found_key.offset; 1475 key.objectid = found_key.objectid + found_key.offset;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d5ac0d8ee3c9..4c2870e2d12f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1539,7 +1539,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1539 kunmap(pages[i]); 1539 kunmap(pages[i]);
1540 } 1540 }
1541 SetPageChecked(pages[i]); 1541 SetPageChecked(pages[i]);
1542 btrfs_update_inode_block_group(trans, inode); 1542 // btrfs_update_inode_block_group(trans, inode);
1543 ret = btrfs_end_transaction(trans, root); 1543 ret = btrfs_end_transaction(trans, root);
1544 BUG_ON(ret); 1544 BUG_ON(ret);
1545 mutex_unlock(&root->fs_info->fs_mutex); 1545 mutex_unlock(&root->fs_info->fs_mutex);
@@ -1914,7 +1914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1914 } 1914 }
1915 BUG_ON(ret); 1915 BUG_ON(ret);
1916 alloc_extent_start = ins.objectid; 1916 alloc_extent_start = ins.objectid;
1917 btrfs_update_inode_block_group(trans, inode); 1917 // btrfs_update_inode_block_group(trans, inode);
1918 ret = btrfs_end_transaction(trans, root); 1918 ret = btrfs_end_transaction(trans, root);
1919 mutex_unlock(&root->fs_info->fs_mutex); 1919 mutex_unlock(&root->fs_info->fs_mutex);
1920 1920
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8bbe9107ff7e..f0f03121b7b2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -122,7 +122,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
122 } 122 }
123 dirty_pages = &trans->transaction->dirty_pages; 123 dirty_pages = &trans->transaction->dirty_pages;
124 while(1) { 124 while(1) {
125 ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); 125 ret = find_first_radix_bit(dirty_pages, gang,
126 0, ARRAY_SIZE(gang));
126 if (!ret) 127 if (!ret)
127 break; 128 break;
128 for (i = 0; i < ret; i++) { 129 for (i = 0; i < ret; i++) {