diff options
author | Chris Mason <chris.mason@oracle.com> | 2007-05-09 20:13:14 -0400 |
---|---|---|
committer | David Woodhouse <dwmw2@hera.kernel.org> | 2007-05-09 20:13:14 -0400 |
commit | e37c9e6921207cf503634b06bee37ecb7904408d (patch) | |
tree | 6f04d434ce9407b01b1fa36139ae8ffc7957fd1c | |
parent | 3e1ad54fe2839319c1aa66b954da0753f5b1f906 (diff) |
Btrfs: many allocator fixes, pretty solid
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/TODO | 1 | ||||
-rw-r--r-- | fs/btrfs/bit-radix.c | 10 | ||||
-rw-r--r-- | fs/btrfs/bit-radix.h | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 375 | ||||
-rw-r--r-- | fs/btrfs/super.c | 4 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 3 |
8 files changed, 340 insertions, 59 deletions
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 6a8c8cd03ca4..f6df246f26c3 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO | |||
@@ -7,6 +7,7 @@ | |||
7 | * Get rid of struct ctree_path, limiting tree levels held at one time | 7 | * Get rid of struct ctree_path, limiting tree levels held at one time |
8 | * Add generation number to key pointer in nodes | 8 | * Add generation number to key pointer in nodes |
9 | * Add generation number to inode | 9 | * Add generation number to inode |
10 | * Add ability to switch a block group from data to metadata or vice versa | ||
10 | * Release | 11 | * Release |
11 | * Do real tree locking | 12 | * Do real tree locking |
12 | * Add extent mirroring (backup copies of blocks) | 13 | * Add extent mirroring (backup copies of blocks) |
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 1a0271445dfb..8f9cd4277231 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c | |||
@@ -77,7 +77,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) | |||
77 | } | 77 | } |
78 | 78 | ||
79 | int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, | 79 | int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, |
80 | int nr) | 80 | unsigned long start, int nr) |
81 | { | 81 | { |
82 | unsigned long *bits; | 82 | unsigned long *bits; |
83 | unsigned long *gang[4]; | 83 | unsigned long *gang[4]; |
@@ -85,10 +85,13 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, | |||
85 | int ret; | 85 | int ret; |
86 | int i; | 86 | int i; |
87 | int total_found = 0; | 87 | int total_found = 0; |
88 | unsigned long slot; | ||
88 | 89 | ||
89 | ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); | 90 | slot = start / BIT_RADIX_BITS_PER_ARRAY; |
91 | ret = radix_tree_gang_lookup(radix, (void **)gang, slot, | ||
92 | ARRAY_SIZE(gang)); | ||
93 | found = start % BIT_RADIX_BITS_PER_ARRAY; | ||
90 | for (i = 0; i < ret && nr > 0; i++) { | 94 | for (i = 0; i < ret && nr > 0; i++) { |
91 | found = 0; | ||
92 | bits = gang[i]; | 95 | bits = gang[i]; |
93 | while(nr > 0) { | 96 | while(nr > 0) { |
94 | found = find_next_bit(bits + 1, | 97 | found = find_next_bit(bits + 1, |
@@ -104,6 +107,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, | |||
104 | } else | 107 | } else |
105 | break; | 108 | break; |
106 | } | 109 | } |
110 | found = 0; | ||
107 | } | 111 | } |
108 | return total_found; | 112 | return total_found; |
109 | } | 113 | } |
diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 56aad4c7d7f7..4e717e30db4f 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h | |||
@@ -6,7 +6,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); | |||
6 | int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); | 6 | int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); |
7 | int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); | 7 | int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); |
8 | int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, | 8 | int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, |
9 | int nr); | 9 | unsigned long start, int nr); |
10 | 10 | ||
11 | static inline void init_bit_radix(struct radix_tree_root *radix) | 11 | static inline void init_bit_radix(struct radix_tree_root *radix) |
12 | { | 12 | { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cdb7c23c41f9..92a6078de827 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -259,7 +259,9 @@ struct btrfs_block_group_cache { | |||
259 | u64 first_free; | 259 | u64 first_free; |
260 | u64 last_alloc; | 260 | u64 last_alloc; |
261 | u64 pinned; | 261 | u64 pinned; |
262 | u64 last_prealloc; | ||
262 | int data; | 263 | int data; |
264 | int cached; | ||
263 | }; | 265 | }; |
264 | 266 | ||
265 | struct crypto_hash; | 267 | struct crypto_hash; |
@@ -273,6 +275,7 @@ struct btrfs_fs_info { | |||
273 | struct radix_tree_root dev_radix; | 275 | struct radix_tree_root dev_radix; |
274 | struct radix_tree_root block_group_radix; | 276 | struct radix_tree_root block_group_radix; |
275 | struct radix_tree_root block_group_data_radix; | 277 | struct radix_tree_root block_group_data_radix; |
278 | struct radix_tree_root extent_map_radix; | ||
276 | 279 | ||
277 | u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; | 280 | u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; |
278 | int extent_tree_insert_nr; | 281 | int extent_tree_insert_nr; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7930458c227e..2dbf422a2b9a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -551,6 +551,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) | |||
551 | 551 | ||
552 | init_bit_radix(&fs_info->pinned_radix); | 552 | init_bit_radix(&fs_info->pinned_radix); |
553 | init_bit_radix(&fs_info->pending_del_radix); | 553 | init_bit_radix(&fs_info->pending_del_radix); |
554 | init_bit_radix(&fs_info->extent_map_radix); | ||
554 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | 555 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); |
555 | INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); | 556 | INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); |
556 | INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); | 557 | INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3edfc300289f..3ac9da453472 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -12,6 +12,97 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct | |||
12 | static int del_pending_extents(struct btrfs_trans_handle *trans, struct | 12 | static int del_pending_extents(struct btrfs_trans_handle *trans, struct |
13 | btrfs_root *extent_root); | 13 | btrfs_root *extent_root); |
14 | 14 | ||
15 | static int cache_block_group(struct btrfs_root *root, | ||
16 | struct btrfs_block_group_cache *block_group) | ||
17 | { | ||
18 | struct btrfs_path *path; | ||
19 | int ret; | ||
20 | struct btrfs_key key; | ||
21 | struct btrfs_leaf *leaf; | ||
22 | struct radix_tree_root *extent_radix; | ||
23 | int slot; | ||
24 | u64 i; | ||
25 | u64 last = 0; | ||
26 | u64 hole_size; | ||
27 | int found = 0; | ||
28 | |||
29 | root = root->fs_info->extent_root; | ||
30 | extent_radix = &root->fs_info->extent_map_radix; | ||
31 | |||
32 | if (block_group->cached) | ||
33 | return 0; | ||
34 | if (block_group->data) | ||
35 | return 0; | ||
36 | path = btrfs_alloc_path(); | ||
37 | if (!path) | ||
38 | return -ENOMEM; | ||
39 | printk("cache block group %Lu\n", block_group->key.objectid); | ||
40 | key.objectid = block_group->key.objectid; | ||
41 | key.flags = 0; | ||
42 | key.offset = 0; | ||
43 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
44 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
45 | if (ret < 0) | ||
46 | return ret; | ||
47 | if (ret && path->slots[0] > 0) | ||
48 | path->slots[0]--; | ||
49 | while(1) { | ||
50 | leaf = btrfs_buffer_leaf(path->nodes[0]); | ||
51 | slot = path->slots[0]; | ||
52 | if (slot >= btrfs_header_nritems(&leaf->header)) { | ||
53 | ret = btrfs_next_leaf(root, path); | ||
54 | if (ret == 0) | ||
55 | continue; | ||
56 | else { | ||
57 | if (found) { | ||
58 | hole_size = block_group->key.objectid + | ||
59 | block_group->key.offset - last; | ||
60 | } else { | ||
61 | last = block_group->key.objectid; | ||
62 | hole_size = block_group->key.offset; | ||
63 | } | ||
64 | for (i = 0; i < hole_size; i++) { | ||
65 | set_radix_bit(extent_radix, | ||
66 | last + i); | ||
67 | } | ||
68 | break; | ||
69 | } | ||
70 | } | ||
71 | btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); | ||
72 | if (key.objectid >= block_group->key.objectid + | ||
73 | block_group->key.offset) { | ||
74 | if (found) { | ||
75 | hole_size = block_group->key.objectid + | ||
76 | block_group->key.offset - last; | ||
77 | } else { | ||
78 | last = block_group->key.objectid; | ||
79 | hole_size = block_group->key.offset; | ||
80 | } | ||
81 | for (i = 0; i < hole_size; i++) { | ||
82 | set_radix_bit(extent_radix, last + i); | ||
83 | } | ||
84 | break; | ||
85 | } | ||
86 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | ||
87 | if (!found) { | ||
88 | last = key.objectid + key.offset; | ||
89 | found = 1; | ||
90 | } else { | ||
91 | hole_size = key.objectid - last; | ||
92 | for (i = 0; i < hole_size; i++) { | ||
93 | set_radix_bit(extent_radix, last + i); | ||
94 | } | ||
95 | last = key.objectid + key.offset; | ||
96 | } | ||
97 | } | ||
98 | path->slots[0]++; | ||
99 | } | ||
100 | |||
101 | block_group->cached = 1; | ||
102 | btrfs_free_path(path); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
15 | static struct btrfs_block_group_cache *lookup_block_group(struct | 106 | static struct btrfs_block_group_cache *lookup_block_group(struct |
16 | btrfs_fs_info *info, | 107 | btrfs_fs_info *info, |
17 | u64 blocknr) | 108 | u64 blocknr) |
@@ -44,6 +135,63 @@ static struct btrfs_block_group_cache *lookup_block_group(struct | |||
44 | return NULL; | 135 | return NULL; |
45 | } | 136 | } |
46 | 137 | ||
138 | static u64 leaf_range(struct btrfs_root *root) | ||
139 | { | ||
140 | u64 size = BTRFS_LEAF_DATA_SIZE(root); | ||
141 | size = size / (sizeof(struct btrfs_extent_item) + | ||
142 | sizeof(struct btrfs_item)); | ||
143 | return size; | ||
144 | } | ||
145 | |||
146 | static u64 find_search_start(struct btrfs_root *root, | ||
147 | struct btrfs_block_group_cache **cache_ret, | ||
148 | u64 search_start, int num) | ||
149 | { | ||
150 | unsigned long gang[8]; | ||
151 | int ret; | ||
152 | struct btrfs_block_group_cache *cache = *cache_ret; | ||
153 | u64 last = max(search_start, cache->key.objectid); | ||
154 | |||
155 | if (cache->data) | ||
156 | goto out; | ||
157 | if (num > 1) { | ||
158 | last = max(last, cache->last_prealloc); | ||
159 | } | ||
160 | again: | ||
161 | cache_block_group(root, cache); | ||
162 | while(1) { | ||
163 | ret = find_first_radix_bit(&root->fs_info->extent_map_radix, | ||
164 | gang, last, ARRAY_SIZE(gang)); | ||
165 | if (!ret) | ||
166 | goto out; | ||
167 | last = gang[ret-1] + 1; | ||
168 | if (num > 1) { | ||
169 | if (ret != ARRAY_SIZE(gang)) { | ||
170 | goto new_group; | ||
171 | } | ||
172 | if (gang[ret-1] - gang[0] > leaf_range(root)) { | ||
173 | continue; | ||
174 | } | ||
175 | } | ||
176 | if (gang[0] >= cache->key.objectid + cache->key.offset) { | ||
177 | goto new_group; | ||
178 | } | ||
179 | return gang[0]; | ||
180 | } | ||
181 | out: | ||
182 | return max(cache->last_alloc, search_start); | ||
183 | |||
184 | new_group: | ||
185 | cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); | ||
186 | if (!cache) { | ||
187 | return max((*cache_ret)->last_alloc, search_start); | ||
188 | } | ||
189 | cache = btrfs_find_block_group(root, cache, | ||
190 | last + cache->key.offset - 1, 0); | ||
191 | *cache_ret = cache; | ||
192 | goto again; | ||
193 | } | ||
194 | |||
47 | struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, | 195 | struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, |
48 | struct btrfs_block_group_cache | 196 | struct btrfs_block_group_cache |
49 | *hint, u64 search_start, | 197 | *hint, u64 search_start, |
@@ -89,13 +237,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, | |||
89 | } | 237 | } |
90 | last = hint->key.offset * 2; | 238 | last = hint->key.offset * 2; |
91 | if (hint->key.objectid >= last) | 239 | if (hint->key.objectid >= last) |
92 | last = max(search_start, hint->key.objectid - last); | 240 | last = max(search_start + hint->key.offset - 1, |
241 | hint->key.objectid - last); | ||
93 | else | 242 | else |
94 | last = hint->key.objectid + hint->key.offset; | 243 | last = hint->key.objectid + hint->key.offset; |
95 | hint_last = last; | 244 | hint_last = last; |
96 | } else { | 245 | } else { |
97 | hint_last = search_start; | 246 | if (hint) |
98 | last = search_start; | 247 | hint_last = max(hint->key.objectid, search_start); |
248 | else | ||
249 | hint_last = search_start; | ||
250 | |||
251 | last = hint_last; | ||
99 | } | 252 | } |
100 | while(1) { | 253 | while(1) { |
101 | ret = radix_tree_gang_lookup_tag(radix, (void **)cache, | 254 | ret = radix_tree_gang_lookup_tag(radix, (void **)cache, |
@@ -357,13 +510,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
357 | 510 | ||
358 | static int update_block_group(struct btrfs_trans_handle *trans, | 511 | static int update_block_group(struct btrfs_trans_handle *trans, |
359 | struct btrfs_root *root, | 512 | struct btrfs_root *root, |
360 | u64 blocknr, u64 num, int alloc) | 513 | u64 blocknr, u64 num, int alloc, int mark_free) |
361 | { | 514 | { |
362 | struct btrfs_block_group_cache *cache; | 515 | struct btrfs_block_group_cache *cache; |
363 | struct btrfs_fs_info *info = root->fs_info; | 516 | struct btrfs_fs_info *info = root->fs_info; |
364 | u64 total = num; | 517 | u64 total = num; |
365 | u64 old_val; | 518 | u64 old_val; |
366 | u64 block_in_group; | 519 | u64 block_in_group; |
520 | u64 i; | ||
367 | 521 | ||
368 | while(total) { | 522 | while(total) { |
369 | cache = lookup_block_group(info, blocknr); | 523 | cache = lookup_block_group(info, blocknr); |
@@ -380,18 +534,38 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
380 | 534 | ||
381 | old_val = btrfs_block_group_used(&cache->item); | 535 | old_val = btrfs_block_group_used(&cache->item); |
382 | num = min(total, cache->key.offset - block_in_group); | 536 | num = min(total, cache->key.offset - block_in_group); |
383 | total -= num; | ||
384 | blocknr += num; | ||
385 | if (alloc) { | 537 | if (alloc) { |
386 | old_val += num; | 538 | old_val += num; |
387 | if (blocknr > cache->last_alloc) | 539 | if (blocknr > cache->last_alloc) |
388 | cache->last_alloc = blocknr; | 540 | cache->last_alloc = blocknr; |
541 | if (!cache->data) { | ||
542 | for (i = 0; i < num; i++) { | ||
543 | clear_radix_bit(&info->extent_map_radix, | ||
544 | blocknr + i); | ||
545 | } | ||
546 | } | ||
389 | } else { | 547 | } else { |
390 | old_val -= num; | 548 | old_val -= num; |
391 | if (blocknr < cache->first_free) | 549 | if (blocknr < cache->first_free) |
392 | cache->first_free = blocknr; | 550 | cache->first_free = blocknr; |
551 | if (!cache->data && mark_free) { | ||
552 | for (i = 0; i < num; i++) { | ||
553 | set_radix_bit(&info->extent_map_radix, | ||
554 | blocknr + i); | ||
555 | } | ||
556 | } | ||
557 | if (old_val < (cache->key.offset * 8) / 10 && | ||
558 | old_val + num >= (cache->key.offset * 8) / 10) { | ||
559 | printk("group %Lu now available\n", cache->key.objectid); | ||
560 | radix_tree_tag_set(cache->radix, | ||
561 | cache->key.objectid + | ||
562 | cache->key.offset - 1, | ||
563 | BTRFS_BLOCK_GROUP_AVAIL); | ||
564 | } | ||
393 | } | 565 | } |
394 | btrfs_set_block_group_used(&cache->item, old_val); | 566 | btrfs_set_block_group_used(&cache->item, old_val); |
567 | total -= num; | ||
568 | blocknr += num; | ||
395 | } | 569 | } |
396 | return 0; | 570 | return 0; |
397 | } | 571 | } |
@@ -413,9 +587,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct | |||
413 | int ret; | 587 | int ret; |
414 | int i; | 588 | int i; |
415 | struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; | 589 | struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; |
590 | struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; | ||
416 | 591 | ||
417 | while(1) { | 592 | while(1) { |
418 | ret = find_first_radix_bit(pinned_radix, gang, | 593 | ret = find_first_radix_bit(pinned_radix, gang, 0, |
419 | ARRAY_SIZE(gang)); | 594 | ARRAY_SIZE(gang)); |
420 | if (!ret) | 595 | if (!ret) |
421 | break; | 596 | break; |
@@ -430,6 +605,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct | |||
430 | block_group->pinned--; | 605 | block_group->pinned--; |
431 | if (gang[i] < block_group->last_alloc) | 606 | if (gang[i] < block_group->last_alloc) |
432 | block_group->last_alloc = gang[i]; | 607 | block_group->last_alloc = gang[i]; |
608 | if (gang[i] < block_group->last_prealloc) | ||
609 | block_group->last_prealloc = gang[i]; | ||
610 | if (!block_group->data) | ||
611 | set_radix_bit(extent_radix, gang[i]); | ||
433 | } | 612 | } |
434 | try_remove_page(btree_inode->i_mapping, | 613 | try_remove_page(btree_inode->i_mapping, |
435 | gang[i] << (PAGE_CACHE_SHIFT - | 614 | gang[i] << (PAGE_CACHE_SHIFT - |
@@ -508,7 +687,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) | |||
508 | * remove an extent from the root, returns 0 on success | 687 | * remove an extent from the root, returns 0 on success |
509 | */ | 688 | */ |
510 | static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root | 689 | static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root |
511 | *root, u64 blocknr, u64 num_blocks, int pin) | 690 | *root, u64 blocknr, u64 num_blocks, int pin, |
691 | int mark_free) | ||
512 | { | 692 | { |
513 | struct btrfs_path *path; | 693 | struct btrfs_path *path; |
514 | struct btrfs_key key; | 694 | struct btrfs_key key; |
@@ -556,10 +736,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root | |||
556 | ret = btrfs_del_item(trans, extent_root, path); | 736 | ret = btrfs_del_item(trans, extent_root, path); |
557 | if (ret) | 737 | if (ret) |
558 | BUG(); | 738 | BUG(); |
559 | ret = update_block_group(trans, root, blocknr, num_blocks, 0); | 739 | ret = update_block_group(trans, root, blocknr, num_blocks, 0, |
740 | mark_free); | ||
560 | BUG_ON(ret); | 741 | BUG_ON(ret); |
561 | } | 742 | } |
562 | btrfs_release_path(extent_root, path); | ||
563 | btrfs_free_path(path); | 743 | btrfs_free_path(path); |
564 | finish_current_insert(trans, extent_root); | 744 | finish_current_insert(trans, extent_root); |
565 | return ret; | 745 | return ret; |
@@ -585,7 +765,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct | |||
585 | pinned_radix = &extent_root->fs_info->pinned_radix; | 765 | pinned_radix = &extent_root->fs_info->pinned_radix; |
586 | 766 | ||
587 | while(1) { | 767 | while(1) { |
588 | ret = find_first_radix_bit(pending_radix, gang, | 768 | ret = find_first_radix_bit(pending_radix, gang, 0, |
589 | ARRAY_SIZE(gang)); | 769 | ARRAY_SIZE(gang)); |
590 | if (!ret) | 770 | if (!ret) |
591 | break; | 771 | break; |
@@ -605,7 +785,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct | |||
605 | wret = clear_radix_bit(pending_radix, gang[i]); | 785 | wret = clear_radix_bit(pending_radix, gang[i]); |
606 | BUG_ON(wret); | 786 | BUG_ON(wret); |
607 | wret = __free_extent(trans, extent_root, | 787 | wret = __free_extent(trans, extent_root, |
608 | gang[i], 1, 0); | 788 | gang[i], 1, 0, 0); |
609 | if (wret) | 789 | if (wret) |
610 | err = wret; | 790 | err = wret; |
611 | } | 791 | } |
@@ -627,7 +807,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root | |||
627 | pin_down_block(root, blocknr, 1); | 807 | pin_down_block(root, blocknr, 1); |
628 | return 0; | 808 | return 0; |
629 | } | 809 | } |
630 | ret = __free_extent(trans, root, blocknr, num_blocks, pin); | 810 | ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); |
631 | pending_ret = del_pending_extents(trans, root->fs_info->extent_root); | 811 | pending_ret = del_pending_extents(trans, root->fs_info->extent_root); |
632 | return ret ? ret : pending_ret; | 812 | return ret ? ret : pending_ret; |
633 | } | 813 | } |
@@ -688,18 +868,45 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root | |||
688 | check_failed: | 868 | check_failed: |
689 | if (!full_scan && block_group->data != data) | 869 | if (!full_scan && block_group->data != data) |
690 | WARN_ON(1); | 870 | WARN_ON(1); |
691 | if (block_group->last_alloc > search_start) | 871 | |
692 | search_start = block_group->last_alloc; | 872 | if (!data) |
873 | search_start = find_search_start(root, &block_group, | ||
874 | search_start, total_needed); | ||
875 | else | ||
876 | search_start = max(block_group->last_alloc, search_start); | ||
877 | |||
693 | btrfs_init_path(path); | 878 | btrfs_init_path(path); |
694 | ins->objectid = search_start; | 879 | ins->objectid = search_start; |
695 | ins->offset = 0; | 880 | ins->offset = 0; |
696 | start_found = 0; | 881 | start_found = 0; |
882 | |||
697 | ret = btrfs_search_slot(trans, root, ins, path, 0, 0); | 883 | ret = btrfs_search_slot(trans, root, ins, path, 0, 0); |
698 | if (ret < 0) | 884 | if (ret < 0) |
699 | goto error; | 885 | goto error; |
700 | 886 | ||
701 | if (path->slots[0] > 0) | 887 | if (path->slots[0] > 0) { |
702 | path->slots[0]--; | 888 | path->slots[0]--; |
889 | } | ||
890 | |||
891 | l = btrfs_buffer_leaf(path->nodes[0]); | ||
892 | btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); | ||
893 | /* | ||
894 | * a rare case, go back one key if we hit a block group item | ||
895 | * instead of an extent item | ||
896 | */ | ||
897 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY && | ||
898 | key.objectid + key.offset >= search_start) { | ||
899 | ins->objectid = key.objectid; | ||
900 | ins->offset = key.offset - 1; | ||
901 | btrfs_release_path(root, path); | ||
902 | ret = btrfs_search_slot(trans, root, ins, path, 0, 0); | ||
903 | if (ret < 0) | ||
904 | goto error; | ||
905 | |||
906 | if (path->slots[0] > 0) { | ||
907 | path->slots[0]--; | ||
908 | } | ||
909 | } | ||
703 | 910 | ||
704 | while (1) { | 911 | while (1) { |
705 | l = btrfs_buffer_leaf(path->nodes[0]); | 912 | l = btrfs_buffer_leaf(path->nodes[0]); |
@@ -725,21 +932,23 @@ check_failed: | |||
725 | ins->offset = search_end - ins->objectid; | 932 | ins->offset = search_end - ins->objectid; |
726 | goto check_pending; | 933 | goto check_pending; |
727 | } | 934 | } |
935 | |||
728 | btrfs_disk_key_to_cpu(&key, &l->items[slot].key); | 936 | btrfs_disk_key_to_cpu(&key, &l->items[slot].key); |
729 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) | 937 | if (key.objectid >= search_start && key.objectid > last_block && |
730 | goto next; | 938 | start_found) { |
731 | if (key.objectid >= search_start) { | 939 | if (last_block < search_start) |
732 | if (start_found) { | 940 | last_block = search_start; |
733 | if (last_block < search_start) | 941 | hole_size = key.objectid - last_block; |
734 | last_block = search_start; | 942 | if (hole_size >= num_blocks) { |
735 | hole_size = key.objectid - last_block; | 943 | ins->objectid = last_block; |
736 | if (hole_size >= num_blocks) { | 944 | ins->offset = hole_size; |
737 | ins->objectid = last_block; | 945 | goto check_pending; |
738 | ins->offset = hole_size; | ||
739 | goto check_pending; | ||
740 | } | ||
741 | } | 946 | } |
742 | } | 947 | } |
948 | |||
949 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) | ||
950 | goto next; | ||
951 | |||
743 | start_found = 1; | 952 | start_found = 1; |
744 | last_block = key.objectid + key.offset; | 953 | last_block = key.objectid + key.offset; |
745 | if (last_block >= block_group->key.objectid + | 954 | if (last_block >= block_group->key.objectid + |
@@ -759,6 +968,7 @@ check_pending: | |||
759 | */ | 968 | */ |
760 | btrfs_release_path(root, path); | 969 | btrfs_release_path(root, path); |
761 | BUG_ON(ins->objectid < search_start); | 970 | BUG_ON(ins->objectid < search_start); |
971 | |||
762 | if (ins->objectid + num_blocks >= search_end) { | 972 | if (ins->objectid + num_blocks >= search_end) { |
763 | if (full_scan) | 973 | if (full_scan) |
764 | return -ENOSPC; | 974 | return -ENOSPC; |
@@ -780,7 +990,7 @@ check_pending: | |||
780 | info->extent_tree_insert[0] && | 990 | info->extent_tree_insert[0] && |
781 | ins->objectid <= last) { | 991 | ins->objectid <= last) { |
782 | search_start = last + 1; | 992 | search_start = last + 1; |
783 | WARN_ON(1); | 993 | WARN_ON(!full_scan); |
784 | goto new_group; | 994 | goto new_group; |
785 | } | 995 | } |
786 | } | 996 | } |
@@ -790,13 +1000,18 @@ check_pending: | |||
790 | if (ins->objectid + num_blocks > first && | 1000 | if (ins->objectid + num_blocks > first && |
791 | ins->objectid <= info->extent_tree_prealloc[0]) { | 1001 | ins->objectid <= info->extent_tree_prealloc[0]) { |
792 | search_start = info->extent_tree_prealloc[0] + 1; | 1002 | search_start = info->extent_tree_prealloc[0] + 1; |
793 | WARN_ON(1); | 1003 | WARN_ON(!full_scan); |
794 | goto new_group; | 1004 | goto new_group; |
795 | } | 1005 | } |
796 | } | 1006 | } |
797 | if (fill_prealloc) { | 1007 | if (fill_prealloc) { |
798 | int nr; | 1008 | int nr; |
799 | test_block = ins->objectid; | 1009 | test_block = ins->objectid; |
1010 | if (test_block - info->extent_tree_prealloc[total_needed - 1] >= | ||
1011 | leaf_range(root)) { | ||
1012 | total_found = 0; | ||
1013 | info->extent_tree_prealloc_nr = total_found; | ||
1014 | } | ||
800 | while(test_block < ins->objectid + ins->offset && | 1015 | while(test_block < ins->objectid + ins->offset && |
801 | total_found < total_needed) { | 1016 | total_found < total_needed) { |
802 | nr = total_needed - total_found - 1; | 1017 | nr = total_needed - total_found - 1; |
@@ -811,11 +1026,15 @@ check_pending: | |||
811 | } | 1026 | } |
812 | info->extent_tree_prealloc_nr = total_found; | 1027 | info->extent_tree_prealloc_nr = total_found; |
813 | } | 1028 | } |
814 | block_group = lookup_block_group(info, ins->objectid); | 1029 | if (!data) { |
815 | if (block_group) { | 1030 | block_group = lookup_block_group(info, ins->objectid); |
816 | block_group->last_alloc = ins->objectid; | 1031 | if (block_group) { |
817 | if (!data) | 1032 | if (fill_prealloc) |
818 | trans->block_group = block_group; | 1033 | block_group->last_prealloc = |
1034 | info->extent_tree_prealloc[total_needed-1]; | ||
1035 | else | ||
1036 | trans->block_group = block_group; | ||
1037 | } | ||
819 | } | 1038 | } |
820 | ins->offset = num_blocks; | 1039 | ins->offset = num_blocks; |
821 | btrfs_free_path(path); | 1040 | btrfs_free_path(path); |
@@ -824,6 +1043,7 @@ check_pending: | |||
824 | new_group: | 1043 | new_group: |
825 | if (search_start + num_blocks >= search_end) { | 1044 | if (search_start + num_blocks >= search_end) { |
826 | search_start = orig_search_start; | 1045 | search_start = orig_search_start; |
1046 | printk("doing full scan!\n"); | ||
827 | full_scan = 1; | 1047 | full_scan = 1; |
828 | } | 1048 | } |
829 | block_group = lookup_block_group(info, search_start); | 1049 | block_group = lookup_block_group(info, search_start); |
@@ -871,26 +1091,57 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
871 | info->extent_tree_insert[info->extent_tree_insert_nr++] = | 1091 | info->extent_tree_insert[info->extent_tree_insert_nr++] = |
872 | ins->objectid; | 1092 | ins->objectid; |
873 | ret = update_block_group(trans, root, | 1093 | ret = update_block_group(trans, root, |
874 | ins->objectid, ins->offset, 1); | 1094 | ins->objectid, ins->offset, 1, 0); |
875 | BUG_ON(ret); | 1095 | BUG_ON(ret); |
876 | return 0; | 1096 | return 0; |
877 | } | 1097 | } |
1098 | |||
1099 | /* | ||
1100 | * if we're doing a data allocation, preallocate room in the | ||
1101 | * extent tree first. This way the extent tree blocks end up | ||
1102 | * in the correct block group. | ||
1103 | */ | ||
1104 | if (data) { | ||
1105 | ret = find_free_extent(trans, root, 0, search_start, | ||
1106 | search_end, &prealloc_key, 0); | ||
1107 | if (ret) { | ||
1108 | return ret; | ||
1109 | } | ||
1110 | if (prealloc_key.objectid + prealloc_key.offset >= search_end) { | ||
1111 | int nr = info->extent_tree_prealloc_nr; | ||
1112 | search_end = info->extent_tree_prealloc[nr - 1] - 1; | ||
1113 | } else { | ||
1114 | search_start = info->extent_tree_prealloc[0] + 1; | ||
1115 | } | ||
1116 | } | ||
878 | /* do the real allocation */ | 1117 | /* do the real allocation */ |
879 | ret = find_free_extent(trans, root, num_blocks, search_start, | 1118 | ret = find_free_extent(trans, root, num_blocks, search_start, |
880 | search_end, ins, data); | 1119 | search_end, ins, data); |
881 | if (ret) | 1120 | if (ret) { |
882 | return ret; | 1121 | return ret; |
1122 | } | ||
883 | 1123 | ||
884 | /* then do prealloc for the extent tree */ | 1124 | /* |
885 | if (ins->objectid + ins->offset >= search_end) | 1125 | * if we're doing a metadata allocation, preallocate space in the |
886 | search_end = ins->objectid - 1; | 1126 | * extent tree second. This way, we don't create a tiny hole |
887 | else | 1127 | * in the allocation map between any unused preallocation blocks |
888 | search_start = ins->objectid + ins->offset; | 1128 | * and the metadata block we're actually allocating. On disk, |
1129 | * it'll go: | ||
1130 | * [block we've allocated], [used prealloc 1], [ unused prealloc ] | ||
1131 | * The unused prealloc will get reused the next time around. | ||
1132 | */ | ||
1133 | if (!data) { | ||
1134 | if (ins->objectid + ins->offset >= search_end) | ||
1135 | search_end = ins->objectid - 1; | ||
1136 | else | ||
1137 | search_start = ins->objectid + ins->offset; | ||
889 | 1138 | ||
890 | ret = find_free_extent(trans, root, 0, search_start, | 1139 | ret = find_free_extent(trans, root, 0, search_start, |
891 | search_end, &prealloc_key, 0); | 1140 | search_end, &prealloc_key, 0); |
892 | if (ret) | 1141 | if (ret) { |
893 | return ret; | 1142 | return ret; |
1143 | } | ||
1144 | } | ||
894 | 1145 | ||
895 | super_blocks_used = btrfs_super_blocks_used(info->disk_super); | 1146 | super_blocks_used = btrfs_super_blocks_used(info->disk_super); |
896 | btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + | 1147 | btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + |
@@ -900,11 +1151,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
900 | 1151 | ||
901 | finish_current_insert(trans, extent_root); | 1152 | finish_current_insert(trans, extent_root); |
902 | pending_ret = del_pending_extents(trans, extent_root); | 1153 | pending_ret = del_pending_extents(trans, extent_root); |
903 | if (ret) | 1154 | if (ret) { |
904 | return ret; | 1155 | return ret; |
905 | if (pending_ret) | 1156 | } |
1157 | if (pending_ret) { | ||
906 | return pending_ret; | 1158 | return pending_ret; |
907 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 1159 | } |
1160 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); | ||
908 | return 0; | 1161 | return 0; |
909 | } | 1162 | } |
910 | 1163 | ||
@@ -920,7 +1173,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
920 | struct buffer_head *buf; | 1173 | struct buffer_head *buf; |
921 | 1174 | ||
922 | ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, | 1175 | ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, |
923 | 1, hint, (unsigned long)-1, &ins, 0); | 1176 | 1, 0, (unsigned long)-1, &ins, 0); |
924 | if (ret) { | 1177 | if (ret) { |
925 | BUG(); | 1178 | BUG(); |
926 | return NULL; | 1179 | return NULL; |
@@ -1134,6 +1387,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
1134 | { | 1387 | { |
1135 | int ret; | 1388 | int ret; |
1136 | int ret2; | 1389 | int ret2; |
1390 | unsigned long gang[16]; | ||
1391 | int i; | ||
1137 | 1392 | ||
1138 | ret = free_block_group_radix(&info->block_group_radix); | 1393 | ret = free_block_group_radix(&info->block_group_radix); |
1139 | ret2 = free_block_group_radix(&info->block_group_data_radix); | 1394 | ret2 = free_block_group_radix(&info->block_group_data_radix); |
@@ -1141,6 +1396,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
1141 | return ret; | 1396 | return ret; |
1142 | if (ret2) | 1397 | if (ret2) |
1143 | return ret2; | 1398 | return ret2; |
1399 | |||
1400 | while(1) { | ||
1401 | ret = find_first_radix_bit(&info->extent_map_radix, | ||
1402 | gang, 0, ARRAY_SIZE(gang)); | ||
1403 | if (!ret) | ||
1404 | break; | ||
1405 | for (i = 0; i < ret; i++) { | ||
1406 | clear_radix_bit(&info->extent_map_radix, gang[i]); | ||
1407 | } | ||
1408 | } | ||
1144 | return 0; | 1409 | return 0; |
1145 | } | 1410 | } |
1146 | 1411 | ||
@@ -1186,7 +1451,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
1186 | break; | 1451 | break; |
1187 | } | 1452 | } |
1188 | 1453 | ||
1189 | if (nr & 1) | 1454 | if (nr % 3) |
1190 | radix = &info->block_group_data_radix; | 1455 | radix = &info->block_group_data_radix; |
1191 | else | 1456 | else |
1192 | radix = &info->block_group_radix; | 1457 | radix = &info->block_group_radix; |
@@ -1197,8 +1462,14 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
1197 | memcpy(&cache->key, &found_key, sizeof(found_key)); | 1462 | memcpy(&cache->key, &found_key, sizeof(found_key)); |
1198 | cache->last_alloc = cache->key.objectid; | 1463 | cache->last_alloc = cache->key.objectid; |
1199 | cache->first_free = cache->key.objectid; | 1464 | cache->first_free = cache->key.objectid; |
1465 | cache->last_prealloc = cache->key.objectid; | ||
1200 | cache->pinned = 0; | 1466 | cache->pinned = 0; |
1201 | cache->data = (nr & 1); | 1467 | cache->cached = 0; |
1468 | |||
1469 | if (nr % 3) | ||
1470 | cache->data = 1; | ||
1471 | else | ||
1472 | cache->data = 0; | ||
1202 | cache->radix = radix; | 1473 | cache->radix = radix; |
1203 | 1474 | ||
1204 | key.objectid = found_key.objectid + found_key.offset; | 1475 | key.objectid = found_key.objectid + found_key.offset; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d5ac0d8ee3c9..4c2870e2d12f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -1539,7 +1539,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
1539 | kunmap(pages[i]); | 1539 | kunmap(pages[i]); |
1540 | } | 1540 | } |
1541 | SetPageChecked(pages[i]); | 1541 | SetPageChecked(pages[i]); |
1542 | btrfs_update_inode_block_group(trans, inode); | 1542 | // btrfs_update_inode_block_group(trans, inode); |
1543 | ret = btrfs_end_transaction(trans, root); | 1543 | ret = btrfs_end_transaction(trans, root); |
1544 | BUG_ON(ret); | 1544 | BUG_ON(ret); |
1545 | mutex_unlock(&root->fs_info->fs_mutex); | 1545 | mutex_unlock(&root->fs_info->fs_mutex); |
@@ -1914,7 +1914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1914 | } | 1914 | } |
1915 | BUG_ON(ret); | 1915 | BUG_ON(ret); |
1916 | alloc_extent_start = ins.objectid; | 1916 | alloc_extent_start = ins.objectid; |
1917 | btrfs_update_inode_block_group(trans, inode); | 1917 | // btrfs_update_inode_block_group(trans, inode); |
1918 | ret = btrfs_end_transaction(trans, root); | 1918 | ret = btrfs_end_transaction(trans, root); |
1919 | mutex_unlock(&root->fs_info->fs_mutex); | 1919 | mutex_unlock(&root->fs_info->fs_mutex); |
1920 | 1920 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8bbe9107ff7e..f0f03121b7b2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -122,7 +122,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | |||
122 | } | 122 | } |
123 | dirty_pages = &trans->transaction->dirty_pages; | 123 | dirty_pages = &trans->transaction->dirty_pages; |
124 | while(1) { | 124 | while(1) { |
125 | ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); | 125 | ret = find_first_radix_bit(dirty_pages, gang, |
126 | 0, ARRAY_SIZE(gang)); | ||
126 | if (!ret) | 127 | if (!ret) |
127 | break; | 128 | break; |
128 | for (i = 0; i < ret; i++) { | 129 | for (i = 0; i < ret; i++) { |