aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@redhat.com>2008-09-23 13:14:11 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:07 -0400
commit0f9dd46cda36b8de3b9f48bc42bd09d20b9c3b52 (patch)
tree2dcba11fd2fb5a4227fd8f8d2d076641f115a7b4
parentef8bbdfe7e12dc9b4e80756f6d606c4639c65851 (diff)
Btrfs: free space accounting redo
1) replace the per fs_info extent_io_tree that tracked free space with two rb-trees per block group to track free space areas via offset and size. The reason to do this is because most allocations come with a hint byte where to start, so we can usually find a chunk of free space at that hint byte to satisfy the allocation and get good space packing. If we cannot find free space at or after the given offset we fall back on looking for a chunk of the given size as close to that given offset as possible. When we fall back on the size search we also try to find a slot as close to the size we want as possible, to avoid breaking small chunks off of huge areas if possible. 2) remove the extent_io_tree that tracked the block group cache from fs_info and replaced it with an rb-tree thats tracks block group cache via offset. also added a per space_info list that tracks the block group cache for the particular space so we can lookup related block groups easily. 3) cleaned up the allocation code to make it a little easier to read and a little less complicated. Basically there are 3 steps, first look from our provided hint. If we couldn't find from that given hint, start back at our original search start and look for space from there. If that fails try to allocate space if we can and start looking again. If not we're screwed and need to start over again. 4) small fixes. there were some issues in volumes.c where we wouldn't allocate the rest of the disk. fixed cow_file_range to actually pass the alloc_hint, which has helped a good bit in making the fs_mark test I run have semi-normal results as we run out of space. Generally with data allocations we don't track where we last allocated from, so everytime we did a data allocation we'd search through every block group that we have looking for free space. Now searching a block group with no free space isn't terribly time consuming, it was causing a slight degradation as we got more data block groups. The alloc_hint has fixed this slight degredation and made things semi-normal. There is still one nagging problem I'm working on where we will get ENOSPC when there is definitely plenty of space. This only happens with metadata allocations, and only when we are almost full. So you generally hit the 85% mark first, but sometimes you'll hit the BUG before you hit the 85% wall. I'm still tracking it down, but until then this seems to be pretty stable and make a significant performance gain. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c3
-rw-r--r--fs/btrfs/ctree.h46
-rw-r--r--fs/btrfs/disk-io.c7
-rw-r--r--fs/btrfs/extent-tree.c869
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/free-space-cache.c415
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/volumes.c11
9 files changed, 925 insertions, 435 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index b7addbfd8c22..eb36ae981bdc 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \ 7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o 10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o
11else 11else
12 12
13# Normal Makefile 13# Normal Makefile
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 18e84472abb5..6f467901246f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2725,9 +2725,8 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2725 2725
2726 total_size = total_data + (nr * sizeof(struct btrfs_item)); 2726 total_size = total_data + (nr * sizeof(struct btrfs_item));
2727 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 2727 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2728 if (ret == 0) { 2728 if (ret == 0)
2729 return -EEXIST; 2729 return -EEXIST;
2730 }
2731 if (ret < 0) 2730 if (ret < 0)
2732 goto out; 2731 goto out;
2733 2732
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eb65fd808883..730aae3bc181 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -483,7 +483,6 @@ struct btrfs_csum_item {
483#define BTRFS_BLOCK_GROUP_DUP (1 << 5) 483#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
484#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) 484#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
485 485
486
487struct btrfs_block_group_item { 486struct btrfs_block_group_item {
488 __le64 used; 487 __le64 used;
489 __le64 chunk_objectid; 488 __le64 chunk_objectid;
@@ -498,17 +497,40 @@ struct btrfs_space_info {
498 int full; 497 int full;
499 int force_alloc; 498 int force_alloc;
500 struct list_head list; 499 struct list_head list;
500
501 /* for block groups in our same type */
502 struct list_head block_groups;
503 spinlock_t lock;
504};
505
506struct btrfs_free_space {
507 struct rb_node bytes_index;
508 struct rb_node offset_index;
509 u64 offset;
510 u64 bytes;
501}; 511};
502 512
503struct btrfs_block_group_cache { 513struct btrfs_block_group_cache {
504 struct btrfs_key key; 514 struct btrfs_key key;
505 struct btrfs_block_group_item item; 515 struct btrfs_block_group_item item;
506 struct btrfs_space_info *space_info;
507 spinlock_t lock; 516 spinlock_t lock;
508 u64 pinned; 517 u64 pinned;
509 u64 flags; 518 u64 flags;
510 int cached; 519 int cached;
511 int ro; 520 int ro;
521 int dirty;
522
523 struct btrfs_space_info *space_info;
524
525 /* free space cache stuff */
526 struct rb_root free_space_bytes;
527 struct rb_root free_space_offset;
528
529 /* block group cache stuff */
530 struct rb_node cache_node;
531
532 /* for block groups in the same raid type */
533 struct list_head list;
512}; 534};
513 535
514struct btrfs_device; 536struct btrfs_device;
@@ -525,8 +547,10 @@ struct btrfs_fs_info {
525 struct btrfs_root *log_root_tree; 547 struct btrfs_root *log_root_tree;
526 struct radix_tree_root fs_roots_radix; 548 struct radix_tree_root fs_roots_radix;
527 549
528 struct extent_io_tree free_space_cache; 550 /* block group cache stuff */
529 struct extent_io_tree block_group_cache; 551 spinlock_t block_group_cache_lock;
552 struct rb_root block_group_cache_tree;
553
530 struct extent_io_tree pinned_extents; 554 struct extent_io_tree pinned_extents;
531 struct extent_io_tree pending_del; 555 struct extent_io_tree pending_del;
532 struct extent_io_tree extent_ins; 556 struct extent_io_tree extent_ins;
@@ -1814,4 +1838,18 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
1814int btrfs_check_acl(struct inode *inode, int mask); 1838int btrfs_check_acl(struct inode *inode, int mask);
1815int btrfs_init_acl(struct inode *inode, struct inode *dir); 1839int btrfs_init_acl(struct inode *inode, struct inode *dir);
1816int btrfs_acl_chmod(struct inode *inode); 1840int btrfs_acl_chmod(struct inode *inode);
1841
1842/* free-space-cache.c */
1843int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1844 u64 bytenr, u64 size);
1845int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1846 u64 bytenr, u64 size);
1847void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
1848 *block_group);
1849struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
1850 *block_group, u64 offset,
1851 u64 bytes);
1852void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1853 u64 bytes);
1854u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
1817#endif 1855#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f6f7821d43a5..535bd0fe1a71 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1410,10 +1410,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1410 1410
1411 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1411 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1412 1412
1413 extent_io_tree_init(&fs_info->free_space_cache, 1413 spin_lock_init(&fs_info->block_group_cache_lock);
1414 fs_info->btree_inode->i_mapping, GFP_NOFS); 1414 fs_info->block_group_cache_tree.rb_node = NULL;
1415 extent_io_tree_init(&fs_info->block_group_cache, 1415
1416 fs_info->btree_inode->i_mapping, GFP_NOFS);
1417 extent_io_tree_init(&fs_info->pinned_extents, 1416 extent_io_tree_init(&fs_info->pinned_extents,
1418 fs_info->btree_inode->i_mapping, GFP_NOFS); 1417 fs_info->btree_inode->i_mapping, GFP_NOFS);
1419 extent_io_tree_init(&fs_info->pending_del, 1418 extent_io_tree_init(&fs_info->pending_del,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1c10ffc837c8..813566acc5d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -29,12 +29,6 @@
29#include "locking.h" 29#include "locking.h"
30#include "ref-cache.h" 30#include "ref-cache.h"
31 31
32#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
33#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
34#define BLOCK_GROUP_SYSTEM EXTENT_NEW
35
36#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
37
38static int finish_current_insert(struct btrfs_trans_handle *trans, struct 32static int finish_current_insert(struct btrfs_trans_handle *trans, struct
39 btrfs_root *extent_root); 33 btrfs_root *extent_root);
40static int del_pending_extents(struct btrfs_trans_handle *trans, struct 34static int del_pending_extents(struct btrfs_trans_handle *trans, struct
@@ -62,6 +56,127 @@ void maybe_unlock_mutex(struct btrfs_root *root)
62 } 56 }
63} 57}
64 58
59static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
60{
61 return (cache->flags & bits) == bits;
62}
63
64/*
65 * this adds the block group to the fs_info rb tree for the block group
66 * cache
67 */
68int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
69 struct btrfs_block_group_cache *block_group)
70{
71 struct rb_node **p;
72 struct rb_node *parent = NULL;
73 struct btrfs_block_group_cache *cache;
74
75 spin_lock(&info->block_group_cache_lock);
76 p = &info->block_group_cache_tree.rb_node;
77
78 while (*p) {
79 parent = *p;
80 cache = rb_entry(parent, struct btrfs_block_group_cache,
81 cache_node);
82 if (block_group->key.objectid < cache->key.objectid) {
83 p = &(*p)->rb_left;
84 } else if (block_group->key.objectid > cache->key.objectid) {
85 p = &(*p)->rb_right;
86 } else {
87 spin_unlock(&info->block_group_cache_lock);
88 return -EEXIST;
89 }
90 }
91
92 rb_link_node(&block_group->cache_node, parent, p);
93 rb_insert_color(&block_group->cache_node,
94 &info->block_group_cache_tree);
95 spin_unlock(&info->block_group_cache_lock);
96
97 return 0;
98}
99
100/*
101 * This will return the block group at or after bytenr if contains is 0, else
102 * it will return the block group that contains the bytenr
103 */
104static struct btrfs_block_group_cache *
105block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
106 int contains)
107{
108 struct btrfs_block_group_cache *cache, *ret = NULL;
109 struct rb_node *n;
110 u64 end, start;
111
112 spin_lock(&info->block_group_cache_lock);
113 n = info->block_group_cache_tree.rb_node;
114
115 while (n) {
116 cache = rb_entry(n, struct btrfs_block_group_cache,
117 cache_node);
118 end = cache->key.objectid + cache->key.offset - 1;
119 start = cache->key.objectid;
120
121 if (bytenr < start) {
122 if (!contains && (!ret || start < ret->key.objectid))
123 ret = cache;
124 n = n->rb_left;
125 } else if (bytenr > start) {
126 if (contains && bytenr <= end) {
127 ret = cache;
128 break;
129 }
130 n = n->rb_right;
131 } else {
132 ret = cache;
133 break;
134 }
135 }
136 spin_unlock(&info->block_group_cache_lock);
137
138 return ret;
139}
140
141/*
142 * this is only called by cache_block_group, since we could have freed extents
143 * we need to check the pinned_extents for any extents that can't be used yet
144 * since their free space will be released as soon as the transaction commits.
145 */
146static int add_new_free_space(struct btrfs_block_group_cache *block_group,
147 struct btrfs_fs_info *info, u64 start, u64 end)
148{
149 u64 extent_start, extent_end, size;
150 int ret;
151
152 while (start < end) {
153 ret = find_first_extent_bit(&info->pinned_extents, start,
154 &extent_start, &extent_end,
155 EXTENT_DIRTY);
156 if (ret)
157 break;
158
159 if (extent_start == start) {
160 start = extent_end + 1;
161 } else if (extent_start > start && extent_start < end) {
162 size = extent_start - start;
163 ret = btrfs_add_free_space(block_group, start, size);
164 BUG_ON(ret);
165 start = extent_end + 1;
166 } else {
167 break;
168 }
169 }
170
171 if (start < end) {
172 size = end - start;
173 ret = btrfs_add_free_space(block_group, start, size);
174 BUG_ON(ret);
175 }
176
177 return 0;
178}
179
65static int cache_block_group(struct btrfs_root *root, 180static int cache_block_group(struct btrfs_root *root,
66 struct btrfs_block_group_cache *block_group) 181 struct btrfs_block_group_cache *block_group)
67{ 182{
@@ -69,10 +184,8 @@ static int cache_block_group(struct btrfs_root *root,
69 int ret = 0; 184 int ret = 0;
70 struct btrfs_key key; 185 struct btrfs_key key;
71 struct extent_buffer *leaf; 186 struct extent_buffer *leaf;
72 struct extent_io_tree *free_space_cache;
73 int slot; 187 int slot;
74 u64 last = 0; 188 u64 last = 0;
75 u64 hole_size;
76 u64 first_free; 189 u64 first_free;
77 int found = 0; 190 int found = 0;
78 191
@@ -80,7 +193,6 @@ static int cache_block_group(struct btrfs_root *root,
80 return 0; 193 return 0;
81 194
82 root = root->fs_info->extent_root; 195 root = root->fs_info->extent_root;
83 free_space_cache = &root->fs_info->free_space_cache;
84 196
85 if (block_group->cached) 197 if (block_group->cached)
86 return 0; 198 return 0;
@@ -96,7 +208,8 @@ static int cache_block_group(struct btrfs_root *root,
96 * skip the locking here 208 * skip the locking here
97 */ 209 */
98 path->skip_locking = 1; 210 path->skip_locking = 1;
99 first_free = block_group->key.objectid; 211 first_free = max_t(u64, block_group->key.objectid,
212 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
100 key.objectid = block_group->key.objectid; 213 key.objectid = block_group->key.objectid;
101 key.offset = 0; 214 key.offset = 0;
102 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 215 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -119,32 +232,28 @@ static int cache_block_group(struct btrfs_root *root,
119 ret = btrfs_next_leaf(root, path); 232 ret = btrfs_next_leaf(root, path);
120 if (ret < 0) 233 if (ret < 0)
121 goto err; 234 goto err;
122 if (ret == 0) { 235 if (ret == 0)
123 continue; 236 continue;
124 } else { 237 else
125 break; 238 break;
126 }
127 } 239 }
128 btrfs_item_key_to_cpu(leaf, &key, slot); 240 btrfs_item_key_to_cpu(leaf, &key, slot);
129 if (key.objectid < block_group->key.objectid) { 241 if (key.objectid < block_group->key.objectid)
130 goto next; 242 goto next;
131 } 243
132 if (key.objectid >= block_group->key.objectid + 244 if (key.objectid >= block_group->key.objectid +
133 block_group->key.offset) { 245 block_group->key.offset)
134 break; 246 break;
135 }
136 247
137 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 248 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
138 if (!found) { 249 if (!found) {
139 last = first_free; 250 last = first_free;
140 found = 1; 251 found = 1;
141 } 252 }
142 if (key.objectid > last) { 253
143 hole_size = key.objectid - last; 254 add_new_free_space(block_group, root->fs_info, last,
144 set_extent_dirty(free_space_cache, last, 255 key.objectid);
145 last + hole_size - 1, 256
146 GFP_NOFS);
147 }
148 last = key.objectid + key.offset; 257 last = key.objectid + key.offset;
149 } 258 }
150next: 259next:
@@ -153,13 +262,11 @@ next:
153 262
154 if (!found) 263 if (!found)
155 last = first_free; 264 last = first_free;
156 if (block_group->key.objectid + 265
157 block_group->key.offset > last) { 266 add_new_free_space(block_group, root->fs_info, last,
158 hole_size = block_group->key.objectid + 267 block_group->key.objectid +
159 block_group->key.offset - last; 268 block_group->key.offset);
160 set_extent_dirty(free_space_cache, last, 269
161 last + hole_size - 1, GFP_NOFS);
162 }
163 block_group->cached = 1; 270 block_group->cached = 1;
164 ret = 0; 271 ret = 0;
165err: 272err:
@@ -167,166 +274,79 @@ err:
167 return ret; 274 return ret;
168} 275}
169 276
277/*
278 * return the block group that starts at or after bytenr
279 */
170struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct 280struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
171 btrfs_fs_info *info, 281 btrfs_fs_info *info,
172 u64 bytenr) 282 u64 bytenr)
173{ 283{
174 struct extent_io_tree *block_group_cache; 284 struct btrfs_block_group_cache *cache;
175 struct btrfs_block_group_cache *block_group = NULL;
176 u64 ptr;
177 u64 start;
178 u64 end;
179 int ret;
180 285
181 bytenr = max_t(u64, bytenr, 286 cache = block_group_cache_tree_search(info, bytenr, 0);
182 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
183 block_group_cache = &info->block_group_cache;
184 ret = find_first_extent_bit(block_group_cache,
185 bytenr, &start, &end,
186 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
187 BLOCK_GROUP_SYSTEM);
188 if (ret) {
189 return NULL;
190 }
191 ret = get_state_private(block_group_cache, start, &ptr);
192 if (ret)
193 return NULL;
194 287
195 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; 288 return cache;
196 return block_group;
197} 289}
198 290
291/*
292 * return the block group that contains teh given bytenr
293 */
199struct btrfs_block_group_cache *btrfs_lookup_block_group(struct 294struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
200 btrfs_fs_info *info, 295 btrfs_fs_info *info,
201 u64 bytenr) 296 u64 bytenr)
202{ 297{
203 struct extent_io_tree *block_group_cache; 298 struct btrfs_block_group_cache *cache;
204 struct btrfs_block_group_cache *block_group = NULL;
205 u64 ptr;
206 u64 start;
207 u64 end;
208 int ret;
209 299
210 bytenr = max_t(u64, bytenr, 300 cache = block_group_cache_tree_search(info, bytenr, 1);
211 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
212 block_group_cache = &info->block_group_cache;
213 ret = find_first_extent_bit(block_group_cache,
214 bytenr, &start, &end,
215 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
216 BLOCK_GROUP_SYSTEM);
217 if (ret) {
218 return NULL;
219 }
220 ret = get_state_private(block_group_cache, start, &ptr);
221 if (ret)
222 return NULL;
223 301
224 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; 302 return cache;
225 if (block_group->key.objectid <= bytenr && bytenr <
226 block_group->key.objectid + block_group->key.offset)
227 return block_group;
228 return NULL;
229} 303}
230 304
231static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 305static int noinline find_free_space(struct btrfs_root *root,
232{ 306 struct btrfs_block_group_cache **cache_ret,
233 return (cache->flags & bits) == bits; 307 u64 *start_ret, u64 num, int data)
234}
235
236static int noinline find_search_start(struct btrfs_root *root,
237 struct btrfs_block_group_cache **cache_ret,
238 u64 *start_ret, u64 num, int data)
239{ 308{
240 int ret; 309 int ret;
241 struct btrfs_block_group_cache *cache = *cache_ret; 310 struct btrfs_block_group_cache *cache = *cache_ret;
242 struct extent_io_tree *free_space_cache; 311 struct btrfs_free_space *info = NULL;
243 struct extent_state *state;
244 u64 last; 312 u64 last;
245 u64 start = 0;
246 u64 cache_miss = 0;
247 u64 total_fs_bytes; 313 u64 total_fs_bytes;
248 u64 search_start = *start_ret; 314 u64 search_start = *start_ret;
249 int wrapped = 0;
250 315
251 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); 316 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
252 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 317 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
253 free_space_cache = &root->fs_info->free_space_cache;
254 318
255 if (!cache) 319 if (!cache)
256 goto out; 320 goto out;
257 321
322 last = max(search_start, cache->key.objectid);
323
258again: 324again:
259 ret = cache_block_group(root, cache); 325 ret = cache_block_group(root, cache);
260 if (ret) { 326 if (ret)
261 goto out; 327 goto out;
262 }
263 328
264 last = max(search_start, cache->key.objectid); 329 if (cache->ro || !block_group_bits(cache, data))
265 if (!block_group_bits(cache, data) || cache->ro)
266 goto new_group; 330 goto new_group;
267 331
268 spin_lock_irq(&free_space_cache->lock); 332 info = btrfs_find_free_space(cache, last, num);
269 state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); 333 if (info) {
270 while(1) { 334 *start_ret = info->offset;
271 if (!state) {
272 if (!cache_miss)
273 cache_miss = last;
274 spin_unlock_irq(&free_space_cache->lock);
275 goto new_group;
276 }
277
278 start = max(last, state->start);
279 last = state->end + 1;
280 if (last - start < num) {
281 do {
282 state = extent_state_next(state);
283 } while(state && !(state->state & EXTENT_DIRTY));
284 continue;
285 }
286 spin_unlock_irq(&free_space_cache->lock);
287 if (cache->ro) {
288 goto new_group;
289 }
290 if (start + num > cache->key.objectid + cache->key.offset)
291 goto new_group;
292 if (!block_group_bits(cache, data)) {
293 printk("block group bits don't match %Lu %d\n", cache->flags, data);
294 }
295 *start_ret = start;
296 return 0; 335 return 0;
297 } 336 }
298out:
299 cache = btrfs_lookup_block_group(root->fs_info, search_start);
300 if (!cache) {
301 printk("Unable to find block group for %Lu\n", search_start);
302 WARN_ON(1);
303 }
304 return -ENOSPC;
305 337
306new_group: 338new_group:
307 last = cache->key.objectid + cache->key.offset; 339 last = cache->key.objectid + cache->key.offset;
308wrapped: 340
309 cache = btrfs_lookup_first_block_group(root->fs_info, last); 341 cache = btrfs_lookup_first_block_group(root->fs_info, last);
310 if (!cache || cache->key.objectid >= total_fs_bytes) { 342 if (!cache || cache->key.objectid >= total_fs_bytes)
311no_cache:
312 if (!wrapped) {
313 wrapped = 1;
314 last = search_start;
315 goto wrapped;
316 }
317 goto out; 343 goto out;
318 } 344
319 if (cache_miss && !cache->cached) {
320 cache_block_group(root, cache);
321 last = cache_miss;
322 cache = btrfs_lookup_first_block_group(root->fs_info, last);
323 }
324 cache_miss = 0;
325 cache = btrfs_find_block_group(root, cache, last, data, 0);
326 if (!cache)
327 goto no_cache;
328 *cache_ret = cache; 345 *cache_ret = cache;
329 goto again; 346 goto again;
347
348out:
349 return -ENOSPC;
330} 350}
331 351
332static u64 div_factor(u64 num, int factor) 352static u64 div_factor(u64 num, int factor)
@@ -338,16 +358,19 @@ static u64 div_factor(u64 num, int factor)
338 return num; 358 return num;
339} 359}
340 360
341static int block_group_state_bits(u64 flags) 361static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
362 u64 flags)
342{ 363{
343 int bits = 0; 364 struct list_head *head = &info->space_info;
344 if (flags & BTRFS_BLOCK_GROUP_DATA) 365 struct list_head *cur;
345 bits |= BLOCK_GROUP_DATA; 366 struct btrfs_space_info *found;
346 if (flags & BTRFS_BLOCK_GROUP_METADATA) 367 list_for_each(cur, head) {
347 bits |= BLOCK_GROUP_METADATA; 368 found = list_entry(cur, struct btrfs_space_info, list);
348 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 369 if (found->flags == flags)
349 bits |= BLOCK_GROUP_SYSTEM; 370 return found;
350 return bits; 371 }
372 return NULL;
373
351} 374}
352 375
353static struct btrfs_block_group_cache * 376static struct btrfs_block_group_cache *
@@ -356,28 +379,19 @@ __btrfs_find_block_group(struct btrfs_root *root,
356 u64 search_start, int data, int owner) 379 u64 search_start, int data, int owner)
357{ 380{
358 struct btrfs_block_group_cache *cache; 381 struct btrfs_block_group_cache *cache;
359 struct extent_io_tree *block_group_cache;
360 struct btrfs_block_group_cache *found_group = NULL; 382 struct btrfs_block_group_cache *found_group = NULL;
361 struct btrfs_fs_info *info = root->fs_info; 383 struct btrfs_fs_info *info = root->fs_info;
384 struct btrfs_space_info *sinfo;
362 u64 used; 385 u64 used;
363 u64 last = 0; 386 u64 last = 0;
364 u64 start;
365 u64 end;
366 u64 free_check; 387 u64 free_check;
367 u64 ptr;
368 int bit;
369 int ret;
370 int full_search = 0; 388 int full_search = 0;
371 int factor = 10; 389 int factor = 10;
372 int wrapped = 0; 390 int wrapped = 0;
373 391
374 block_group_cache = &info->block_group_cache;
375
376 if (data & BTRFS_BLOCK_GROUP_METADATA) 392 if (data & BTRFS_BLOCK_GROUP_METADATA)
377 factor = 9; 393 factor = 9;
378 394
379 bit = block_group_state_bits(data);
380
381 if (search_start) { 395 if (search_start) {
382 struct btrfs_block_group_cache *shint; 396 struct btrfs_block_group_cache *shint;
383 shint = btrfs_lookup_first_block_group(info, search_start); 397 shint = btrfs_lookup_first_block_group(info, search_start);
@@ -408,20 +422,30 @@ __btrfs_find_block_group(struct btrfs_root *root,
408 else 422 else
409 last = search_start; 423 last = search_start;
410 } 424 }
425 sinfo = __find_space_info(root->fs_info, data);
426 if (!sinfo)
427 goto found;
411again: 428again:
412 while(1) { 429 while(1) {
413 ret = find_first_extent_bit(block_group_cache, last, 430 struct list_head *l;
414 &start, &end, bit);
415 if (ret)
416 break;
417 431
418 ret = get_state_private(block_group_cache, start, &ptr); 432 cache = NULL;
419 if (ret) { 433
420 last = end + 1; 434 spin_lock(&sinfo->lock);
421 continue; 435 list_for_each(l, &sinfo->block_groups) {
436 struct btrfs_block_group_cache *entry;
437 entry = list_entry(l, struct btrfs_block_group_cache,
438 list);
439 if ((entry->key.objectid >= last) &&
440 (!cache || (entry->key.objectid <
441 cache->key.objectid)))
442 cache = entry;
422 } 443 }
444 spin_unlock(&sinfo->lock);
445
446 if (!cache)
447 break;
423 448
424 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
425 spin_lock(&cache->lock); 449 spin_lock(&cache->lock);
426 last = cache->key.objectid + cache->key.offset; 450 last = cache->key.objectid + cache->key.offset;
427 used = btrfs_block_group_used(&cache->item); 451 used = btrfs_block_group_used(&cache->item);
@@ -462,6 +486,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
462 ret = __btrfs_find_block_group(root, hint, search_start, data, owner); 486 ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
463 return ret; 487 return ret;
464} 488}
489
465static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, 490static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
466 u64 owner, u64 owner_offset) 491 u64 owner, u64 owner_offset)
467{ 492{
@@ -1175,34 +1200,37 @@ fail:
1175int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1200int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *root) 1201 struct btrfs_root *root)
1177{ 1202{
1178 struct extent_io_tree *block_group_cache; 1203 struct btrfs_block_group_cache *cache, *entry;
1179 struct btrfs_block_group_cache *cache; 1204 struct rb_node *n;
1180 int ret;
1181 int err = 0; 1205 int err = 0;
1182 int werr = 0; 1206 int werr = 0;
1183 struct btrfs_path *path; 1207 struct btrfs_path *path;
1184 u64 last = 0; 1208 u64 last = 0;
1185 u64 start;
1186 u64 end;
1187 u64 ptr;
1188 1209
1189 block_group_cache = &root->fs_info->block_group_cache;
1190 path = btrfs_alloc_path(); 1210 path = btrfs_alloc_path();
1191 if (!path) 1211 if (!path)
1192 return -ENOMEM; 1212 return -ENOMEM;
1193 1213
1194 mutex_lock(&root->fs_info->alloc_mutex); 1214 mutex_lock(&root->fs_info->alloc_mutex);
1195 while(1) { 1215 while(1) {
1196 ret = find_first_extent_bit(block_group_cache, last, 1216 cache = NULL;
1197 &start, &end, BLOCK_GROUP_DIRTY); 1217 spin_lock(&root->fs_info->block_group_cache_lock);
1198 if (ret) 1218 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1199 break; 1219 n; n = rb_next(n)) {
1220 entry = rb_entry(n, struct btrfs_block_group_cache,
1221 cache_node);
1222 if (entry->dirty) {
1223 cache = entry;
1224 break;
1225 }
1226 }
1227 spin_unlock(&root->fs_info->block_group_cache_lock);
1200 1228
1201 last = end + 1; 1229 if (!cache)
1202 ret = get_state_private(block_group_cache, start, &ptr);
1203 if (ret)
1204 break; 1230 break;
1205 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 1231
1232 last += cache->key.offset;
1233
1206 err = write_one_cache_group(trans, root, 1234 err = write_one_cache_group(trans, root,
1207 path, cache); 1235 path, cache);
1208 /* 1236 /*
@@ -1214,29 +1242,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1214 werr = err; 1242 werr = err;
1215 continue; 1243 continue;
1216 } 1244 }
1217 clear_extent_bits(block_group_cache, start, end, 1245
1218 BLOCK_GROUP_DIRTY, GFP_NOFS); 1246 cache->dirty = 0;
1219 } 1247 }
1220 btrfs_free_path(path); 1248 btrfs_free_path(path);
1221 mutex_unlock(&root->fs_info->alloc_mutex); 1249 mutex_unlock(&root->fs_info->alloc_mutex);
1222 return werr; 1250 return werr;
1223} 1251}
1224 1252
1225static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1226 u64 flags)
1227{
1228 struct list_head *head = &info->space_info;
1229 struct list_head *cur;
1230 struct btrfs_space_info *found;
1231 list_for_each(cur, head) {
1232 found = list_entry(cur, struct btrfs_space_info, list);
1233 if (found->flags == flags)
1234 return found;
1235 }
1236 return NULL;
1237
1238}
1239
1240static int update_space_info(struct btrfs_fs_info *info, u64 flags, 1253static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1241 u64 total_bytes, u64 bytes_used, 1254 u64 total_bytes, u64 bytes_used,
1242 struct btrfs_space_info **space_info) 1255 struct btrfs_space_info **space_info)
@@ -1256,6 +1269,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1256 return -ENOMEM; 1269 return -ENOMEM;
1257 1270
1258 list_add(&found->list, &info->space_info); 1271 list_add(&found->list, &info->space_info);
1272 INIT_LIST_HEAD(&found->block_groups);
1273 spin_lock_init(&found->lock);
1259 found->flags = flags; 1274 found->flags = flags;
1260 found->total_bytes = total_bytes; 1275 found->total_bytes = total_bytes;
1261 found->bytes_used = bytes_used; 1276 found->bytes_used = bytes_used;
@@ -1318,7 +1333,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1318 u64 thresh; 1333 u64 thresh;
1319 u64 start; 1334 u64 start;
1320 u64 num_bytes; 1335 u64 num_bytes;
1321 int ret; 1336 int ret = 0;
1322 1337
1323 flags = reduce_alloc_profile(extent_root, flags); 1338 flags = reduce_alloc_profile(extent_root, flags);
1324 1339
@@ -1355,10 +1370,11 @@ printk("space info full %Lu\n", flags);
1355 ret = btrfs_make_block_group(trans, extent_root, 0, flags, 1370 ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1356 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); 1371 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1357 BUG_ON(ret); 1372 BUG_ON(ret);
1373
1358out_unlock: 1374out_unlock:
1359 mutex_unlock(&extent_root->fs_info->chunk_mutex); 1375 mutex_unlock(&extent_root->fs_info->chunk_mutex);
1360out: 1376out:
1361 return 0; 1377 return ret;
1362} 1378}
1363 1379
1364static int update_block_group(struct btrfs_trans_handle *trans, 1380static int update_block_group(struct btrfs_trans_handle *trans,
@@ -1371,8 +1387,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
1371 u64 total = num_bytes; 1387 u64 total = num_bytes;
1372 u64 old_val; 1388 u64 old_val;
1373 u64 byte_in_group; 1389 u64 byte_in_group;
1374 u64 start;
1375 u64 end;
1376 1390
1377 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); 1391 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1378 while(total) { 1392 while(total) {
@@ -1382,12 +1396,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
1382 } 1396 }
1383 byte_in_group = bytenr - cache->key.objectid; 1397 byte_in_group = bytenr - cache->key.objectid;
1384 WARN_ON(byte_in_group > cache->key.offset); 1398 WARN_ON(byte_in_group > cache->key.offset);
1385 start = cache->key.objectid;
1386 end = start + cache->key.offset - 1;
1387 set_extent_bits(&info->block_group_cache, start, end,
1388 BLOCK_GROUP_DIRTY, GFP_NOFS);
1389 1399
1390 spin_lock(&cache->lock); 1400 spin_lock(&cache->lock);
1401 cache->dirty = 1;
1391 old_val = btrfs_block_group_used(&cache->item); 1402 old_val = btrfs_block_group_used(&cache->item);
1392 num_bytes = min(total, cache->key.offset - byte_in_group); 1403 num_bytes = min(total, cache->key.offset - byte_in_group);
1393 if (alloc) { 1404 if (alloc) {
@@ -1401,9 +1412,11 @@ static int update_block_group(struct btrfs_trans_handle *trans,
1401 btrfs_set_block_group_used(&cache->item, old_val); 1412 btrfs_set_block_group_used(&cache->item, old_val);
1402 spin_unlock(&cache->lock); 1413 spin_unlock(&cache->lock);
1403 if (mark_free) { 1414 if (mark_free) {
1404 set_extent_dirty(&info->free_space_cache, 1415 int ret;
1405 bytenr, bytenr + num_bytes - 1, 1416 ret = btrfs_add_free_space(cache, bytenr,
1406 GFP_NOFS); 1417 num_bytes);
1418 if (ret)
1419 return -1;
1407 } 1420 }
1408 } 1421 }
1409 total -= num_bytes; 1422 total -= num_bytes;
@@ -1414,16 +1427,13 @@ static int update_block_group(struct btrfs_trans_handle *trans,
1414 1427
1415static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 1428static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1416{ 1429{
1417 u64 start; 1430 struct btrfs_block_group_cache *cache;
1418 u64 end; 1431
1419 int ret; 1432 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1420 ret = find_first_extent_bit(&root->fs_info->block_group_cache, 1433 if (!cache)
1421 search_start, &start, &end,
1422 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1423 BLOCK_GROUP_SYSTEM);
1424 if (ret)
1425 return 0; 1434 return 0;
1426 return start; 1435
1436 return cache->key.objectid;
1427} 1437}
1428 1438
1429 1439
@@ -1501,8 +1511,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1501 u64 start; 1511 u64 start;
1502 u64 end; 1512 u64 end;
1503 int ret; 1513 int ret;
1504 struct extent_io_tree *free_space_cache; 1514 struct btrfs_block_group_cache *cache;
1505 free_space_cache = &root->fs_info->free_space_cache;
1506 1515
1507 mutex_lock(&root->fs_info->alloc_mutex); 1516 mutex_lock(&root->fs_info->alloc_mutex);
1508 while(1) { 1517 while(1) {
@@ -1512,7 +1521,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1512 break; 1521 break;
1513 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 1522 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1514 clear_extent_dirty(unpin, start, end, GFP_NOFS); 1523 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1515 set_extent_dirty(free_space_cache, start, end, GFP_NOFS); 1524 cache = btrfs_lookup_block_group(root->fs_info, start);
1525 if (cache->cached)
1526 btrfs_add_free_space(cache, start, end - start + 1);
1516 if (need_resched()) { 1527 if (need_resched()) {
1517 mutex_unlock(&root->fs_info->alloc_mutex); 1528 mutex_unlock(&root->fs_info->alloc_mutex);
1518 cond_resched(); 1529 cond_resched();
@@ -1875,9 +1886,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1875 /* if metadata always pin */ 1886 /* if metadata always pin */
1876 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 1887 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
1877 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 1888 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
1889 struct btrfs_block_group_cache *cache;
1890
1878 /* btrfs_free_reserved_extent */ 1891 /* btrfs_free_reserved_extent */
1879 set_extent_dirty(&root->fs_info->free_space_cache, 1892 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
1880 bytenr, bytenr + num_bytes - 1, GFP_NOFS); 1893 BUG_ON(!cache);
1894 btrfs_add_free_space(cache, bytenr, num_bytes);
1881 return 0; 1895 return 0;
1882 } 1896 }
1883 pin = 1; 1897 pin = 1;
@@ -1942,8 +1956,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1942 u64 total_needed = num_bytes; 1956 u64 total_needed = num_bytes;
1943 u64 *last_ptr = NULL; 1957 u64 *last_ptr = NULL;
1944 struct btrfs_block_group_cache *block_group; 1958 struct btrfs_block_group_cache *block_group;
1945 int full_scan = 0;
1946 int wrapped = 0;
1947 int chunk_alloc_done = 0; 1959 int chunk_alloc_done = 0;
1948 int empty_cluster = 2 * 1024 * 1024; 1960 int empty_cluster = 2 * 1024 * 1024;
1949 int allowed_chunk_alloc = 0; 1961 int allowed_chunk_alloc = 0;
@@ -1959,9 +1971,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1959 empty_cluster = 256 * 1024; 1971 empty_cluster = 256 * 1024;
1960 } 1972 }
1961 1973
1962 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 1974 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
1963 last_ptr = &root->fs_info->last_data_alloc; 1975 last_ptr = &root->fs_info->last_data_alloc;
1964 } 1976
1965 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 1977 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
1966 last_ptr = &root->fs_info->last_log_alloc; 1978 last_ptr = &root->fs_info->last_log_alloc;
1967 if (!last_ptr == 0 && root->fs_info->last_alloc) { 1979 if (!last_ptr == 0 && root->fs_info->last_alloc) {
@@ -1972,9 +1984,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1972 if (last_ptr) { 1984 if (last_ptr) {
1973 if (*last_ptr) 1985 if (*last_ptr)
1974 hint_byte = *last_ptr; 1986 hint_byte = *last_ptr;
1975 else { 1987 else
1976 empty_size += empty_cluster; 1988 empty_size += empty_cluster;
1977 }
1978 } 1989 }
1979 1990
1980 search_start = max(search_start, first_logical_byte(root, 0)); 1991 search_start = max(search_start, first_logical_byte(root, 0));
@@ -1983,145 +1994,172 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1983 if (search_end == (u64)-1) 1994 if (search_end == (u64)-1)
1984 search_end = btrfs_super_total_bytes(&info->super_copy); 1995 search_end = btrfs_super_total_bytes(&info->super_copy);
1985 1996
1986 if (hint_byte) {
1987 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1988 if (!block_group)
1989 hint_byte = search_start;
1990 block_group = btrfs_find_block_group(root, block_group,
1991 hint_byte, data, 1);
1992 if (last_ptr && *last_ptr == 0 && block_group)
1993 hint_byte = block_group->key.objectid;
1994 } else {
1995 block_group = btrfs_find_block_group(root,
1996 trans->block_group,
1997 search_start, data, 1);
1998 }
1999 search_start = max(search_start, hint_byte); 1997 search_start = max(search_start, hint_byte);
2000
2001 total_needed += empty_size; 1998 total_needed += empty_size;
2002 1999
2003check_failed: 2000new_group:
2004 if (!block_group) { 2001 block_group = btrfs_lookup_block_group(info, search_start);
2005 block_group = btrfs_lookup_first_block_group(info, 2002
2006 search_start); 2003 /*
2007 if (!block_group) 2004 * Ok this looks a little tricky, buts its really simple. First if we
2008 block_group = btrfs_lookup_first_block_group(info, 2005 * didn't find a block group obviously we want to start over.
2009 orig_search_start); 2006 * Secondly, if the block group we found does not match the type we
2010 } 2007 * need, and we have a last_ptr and its not 0, chances are the last
2011 if (full_scan && !chunk_alloc_done) { 2008 * allocation we made was at the end of the block group, so lets go
2012 if (allowed_chunk_alloc) { 2009 * ahead and skip the looking through the rest of the block groups and
2013 do_chunk_alloc(trans, root, 2010 * start at the beginning. This helps with metadata allocations,
2014 num_bytes + 2 * 1024 * 1024, data, 1); 2011 * since you are likely to have a bunch of data block groups to search
2015 allowed_chunk_alloc = 0; 2012 * through first before you realize that you need to start over, so go
2016 } else if (block_group && block_group_bits(block_group, data)) { 2013 * ahead and start over and save the time.
2017 block_group->space_info->force_alloc = 1; 2014 */
2015 if (!block_group || (!block_group_bits(block_group, data) &&
2016 last_ptr && *last_ptr)) {
2017 if (search_start != orig_search_start) {
2018 if (last_ptr && *last_ptr)
2019 *last_ptr = 0;
2020 search_start = orig_search_start;
2021 goto new_group;
2022 } else if (!chunk_alloc_done && allowed_chunk_alloc) {
2023 ret = do_chunk_alloc(trans, root,
2024 num_bytes + 2 * 1024 * 1024,
2025 data, 1);
2026 if (ret < 0) {
2027 struct btrfs_space_info *info;
2028
2029 info = __find_space_info(root->fs_info, data);
2030 goto error;
2031 }
2032 BUG_ON(ret);
2033 chunk_alloc_done = 1;
2034 search_start = orig_search_start;
2035 goto new_group;
2036 } else {
2037 ret = -ENOSPC;
2038 goto error;
2018 } 2039 }
2019 chunk_alloc_done = 1;
2020 }
2021 ret = find_search_start(root, &block_group, &search_start,
2022 total_needed, data);
2023 if (ret == -ENOSPC && last_ptr && *last_ptr) {
2024 *last_ptr = 0;
2025 block_group = btrfs_lookup_first_block_group(info,
2026 orig_search_start);
2027 search_start = orig_search_start;
2028 ret = find_search_start(root, &block_group, &search_start,
2029 total_needed, data);
2030 } 2040 }
2031 if (ret == -ENOSPC)
2032 goto enospc;
2033 if (ret)
2034 goto error;
2035 2041
2036 if (last_ptr && *last_ptr && search_start != *last_ptr) { 2042 /*
2037 *last_ptr = 0; 2043 * this is going to seach through all of the existing block groups it
2038 if (!empty_size) { 2044 * can find, so if we don't find something we need to see if we can
2039 empty_size += empty_cluster; 2045 * allocate what we need.
2040 total_needed += empty_size; 2046 */
2047 ret = find_free_space(root, &block_group, &search_start,
2048 total_needed, data);
2049 if (ret == -ENOSPC) {
2050 /*
2051 * instead of allocating, start at the original search start
2052 * and see if there is something to be found, if not then we
2053 * allocate
2054 */
2055 if (search_start != orig_search_start) {
2056 if (last_ptr && *last_ptr) {
2057 *last_ptr = 0;
2058 total_needed += empty_cluster;
2059 }
2060 search_start = orig_search_start;
2061 goto new_group;
2041 } 2062 }
2042 block_group = btrfs_lookup_first_block_group(info, 2063
2043 orig_search_start); 2064 /*
2044 search_start = orig_search_start; 2065 * we've already allocated, we're pretty screwed
2045 ret = find_search_start(root, &block_group, 2066 */
2046 &search_start, total_needed, data); 2067 if (chunk_alloc_done) {
2047 if (ret == -ENOSPC)
2048 goto enospc;
2049 if (ret)
2050 goto error; 2068 goto error;
2069 } else if (!allowed_chunk_alloc && block_group &&
2070 block_group_bits(block_group, data)) {
2071 block_group->space_info->force_alloc = 1;
2072 goto error;
2073 } else if (!allowed_chunk_alloc) {
2074 goto error;
2075 }
2076
2077 ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
2078 data, 1);
2079 if (ret < 0)
2080 goto error;
2081
2082 BUG_ON(ret);
2083 chunk_alloc_done = 1;
2084 if (block_group)
2085 search_start = block_group->key.objectid +
2086 block_group->key.offset;
2087 else
2088 search_start = orig_search_start;
2089 goto new_group;
2051 } 2090 }
2052 2091
2092 if (ret)
2093 goto error;
2094
2053 search_start = stripe_align(root, search_start); 2095 search_start = stripe_align(root, search_start);
2054 ins->objectid = search_start; 2096 ins->objectid = search_start;
2055 ins->offset = num_bytes; 2097 ins->offset = num_bytes;
2056 2098
2057 if (ins->objectid + num_bytes >= search_end) 2099 if (ins->objectid + num_bytes >= search_end) {
2058 goto enospc; 2100 search_start = orig_search_start;
2101 if (chunk_alloc_done) {
2102 ret = -ENOSPC;
2103 goto error;
2104 }
2105 goto new_group;
2106 }
2059 2107
2060 if (ins->objectid + num_bytes > 2108 if (ins->objectid + num_bytes >
2061 block_group->key.objectid + block_group->key.offset) { 2109 block_group->key.objectid + block_group->key.offset) {
2110 if (search_start == orig_search_start && chunk_alloc_done) {
2111 ret = -ENOSPC;
2112 goto error;
2113 }
2062 search_start = block_group->key.objectid + 2114 search_start = block_group->key.objectid +
2063 block_group->key.offset; 2115 block_group->key.offset;
2064 goto new_group; 2116 goto new_group;
2065 } 2117 }
2066 2118
2067 if (test_range_bit(&info->extent_ins, ins->objectid,
2068 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
2069 search_start = ins->objectid + num_bytes;
2070 goto new_group;
2071 }
2072
2073 if (test_range_bit(&info->pinned_extents, ins->objectid,
2074 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2075 search_start = ins->objectid + num_bytes;
2076 goto new_group;
2077 }
2078
2079 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && 2119 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2080 ins->objectid < exclude_start + exclude_nr)) { 2120 ins->objectid < exclude_start + exclude_nr)) {
2081 search_start = exclude_start + exclude_nr; 2121 search_start = exclude_start + exclude_nr;
2082 goto new_group; 2122 goto new_group;
2083 } 2123 }
2084 2124
2085 if (!(data & BTRFS_BLOCK_GROUP_DATA)) { 2125 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2086 block_group = btrfs_lookup_block_group(info, ins->objectid); 2126 trans->block_group = block_group;
2087 if (block_group) 2127
2088 trans->block_group = block_group;
2089 }
2090 ins->offset = num_bytes; 2128 ins->offset = num_bytes;
2091 if (last_ptr) { 2129 if (last_ptr) {
2092 *last_ptr = ins->objectid + ins->offset; 2130 *last_ptr = ins->objectid + ins->offset;
2093 if (*last_ptr == 2131 if (*last_ptr ==
2094 btrfs_super_total_bytes(&root->fs_info->super_copy)) { 2132 btrfs_super_total_bytes(&root->fs_info->super_copy))
2095 *last_ptr = 0; 2133 *last_ptr = 0;
2096 }
2097 }
2098 return 0;
2099
2100new_group:
2101 if (search_start + num_bytes >= search_end) {
2102enospc:
2103 search_start = orig_search_start;
2104 if (full_scan) {
2105 ret = -ENOSPC;
2106 goto error;
2107 }
2108 if (wrapped) {
2109 if (!full_scan)
2110 total_needed -= empty_size;
2111 full_scan = 1;
2112 } else
2113 wrapped = 1;
2114 } 2134 }
2115 block_group = btrfs_lookup_first_block_group(info, search_start);
2116 cond_resched();
2117 block_group = btrfs_find_block_group(root, block_group,
2118 search_start, data, 0);
2119 goto check_failed;
2120 2135
2136 ret = 0;
2121error: 2137error:
2122 return ret; 2138 return ret;
2123} 2139}
2124 2140
2141static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2142{
2143 struct btrfs_block_group_cache *cache;
2144 struct list_head *l;
2145
2146 printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2147 info->total_bytes - info->bytes_used - info->bytes_pinned,
2148 (info->full) ? "" : "not ");
2149
2150 spin_lock(&info->lock);
2151 list_for_each(l, &info->block_groups) {
2152 cache = list_entry(l, struct btrfs_block_group_cache, list);
2153 spin_lock(&cache->lock);
2154 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2155 "%Lu pinned\n",
2156 cache->key.objectid, cache->key.offset,
2157 btrfs_block_group_used(&cache->item), cache->pinned);
2158 btrfs_dump_free_space(cache, bytes);
2159 spin_unlock(&cache->lock);
2160 }
2161 spin_unlock(&info->lock);
2162}
2125static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 2163static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2126 struct btrfs_root *root, 2164 struct btrfs_root *root,
2127 u64 num_bytes, u64 min_alloc_size, 2165 u64 num_bytes, u64 min_alloc_size,
@@ -2133,6 +2171,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2133 u64 search_start = 0; 2171 u64 search_start = 0;
2134 u64 alloc_profile; 2172 u64 alloc_profile;
2135 struct btrfs_fs_info *info = root->fs_info; 2173 struct btrfs_fs_info *info = root->fs_info;
2174 struct btrfs_block_group_cache *cache;
2136 2175
2137 if (data) { 2176 if (data) {
2138 alloc_profile = info->avail_data_alloc_bits & 2177 alloc_profile = info->avail_data_alloc_bits &
@@ -2160,11 +2199,9 @@ again:
2160 BTRFS_BLOCK_GROUP_METADATA | 2199 BTRFS_BLOCK_GROUP_METADATA |
2161 (info->metadata_alloc_profile & 2200 (info->metadata_alloc_profile &
2162 info->avail_metadata_alloc_bits), 0); 2201 info->avail_metadata_alloc_bits), 0);
2163 BUG_ON(ret);
2164 } 2202 }
2165 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2203 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2166 num_bytes + 2 * 1024 * 1024, data, 0); 2204 num_bytes + 2 * 1024 * 1024, data, 0);
2167 BUG_ON(ret);
2168 } 2205 }
2169 2206
2170 WARN_ON(num_bytes < root->sectorsize); 2207 WARN_ON(num_bytes < root->sectorsize);
@@ -2175,26 +2212,44 @@ again:
2175 2212
2176 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 2213 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2177 num_bytes = num_bytes >> 1; 2214 num_bytes = num_bytes >> 1;
2215 num_bytes = num_bytes & ~(root->sectorsize - 1);
2178 num_bytes = max(num_bytes, min_alloc_size); 2216 num_bytes = max(num_bytes, min_alloc_size);
2179 do_chunk_alloc(trans, root->fs_info->extent_root, 2217 do_chunk_alloc(trans, root->fs_info->extent_root,
2180 num_bytes, data, 1); 2218 num_bytes, data, 1);
2181 goto again; 2219 goto again;
2182 } 2220 }
2183 if (ret) { 2221 if (ret) {
2184 printk("allocation failed flags %Lu\n", data); 2222 struct btrfs_space_info *sinfo;
2223
2224 sinfo = __find_space_info(root->fs_info, data);
2225 printk("allocation failed flags %Lu, wanted %Lu\n",
2226 data, num_bytes);
2227 dump_space_info(sinfo, num_bytes);
2185 BUG(); 2228 BUG();
2186 } 2229 }
2187 clear_extent_dirty(&root->fs_info->free_space_cache, 2230 cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2188 ins->objectid, ins->objectid + ins->offset - 1, 2231 if (!cache) {
2189 GFP_NOFS); 2232 printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
2190 return 0; 2233 return -ENOSPC;
2234 }
2235
2236 ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
2237
2238 return ret;
2191} 2239}
2192 2240
2193int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 2241int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2194{ 2242{
2243 struct btrfs_block_group_cache *cache;
2244
2195 maybe_lock_mutex(root); 2245 maybe_lock_mutex(root);
2196 set_extent_dirty(&root->fs_info->free_space_cache, 2246 cache = btrfs_lookup_block_group(root->fs_info, start);
2197 start, start + len - 1, GFP_NOFS); 2247 if (!cache) {
2248 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2249 maybe_unlock_mutex(root);
2250 return -ENOSPC;
2251 }
2252 btrfs_add_free_space(cache, start, len);
2198 maybe_unlock_mutex(root); 2253 maybe_unlock_mutex(root);
2199 return 0; 2254 return 0;
2200} 2255}
@@ -2264,8 +2319,8 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2264 2319
2265 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 2320 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2266 sizes, 2); 2321 sizes, 2);
2267
2268 BUG_ON(ret); 2322 BUG_ON(ret);
2323
2269 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2324 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2270 struct btrfs_extent_item); 2325 struct btrfs_extent_item);
2271 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 2326 btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
@@ -2336,9 +2391,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2336 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 2391 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2337 cache_block_group(root, block_group); 2392 cache_block_group(root, block_group);
2338 2393
2339 clear_extent_dirty(&root->fs_info->free_space_cache, 2394 ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
2340 ins->objectid, ins->objectid + ins->offset - 1, 2395 BUG_ON(ret);
2341 GFP_NOFS); 2396
2342 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, 2397 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2343 ref_generation, owner, 2398 ref_generation, owner,
2344 owner_offset, ins); 2399 owner_offset, ins);
@@ -2843,31 +2898,24 @@ out:
2843 2898
2844int btrfs_free_block_groups(struct btrfs_fs_info *info) 2899int btrfs_free_block_groups(struct btrfs_fs_info *info)
2845{ 2900{
2846 u64 start; 2901 struct btrfs_block_group_cache *block_group;
2847 u64 end; 2902 struct rb_node *n;
2848 u64 ptr;
2849 int ret;
2850 2903
2851 mutex_lock(&info->alloc_mutex); 2904 mutex_lock(&info->alloc_mutex);
2852 while(1) { 2905 spin_lock(&info->block_group_cache_lock);
2853 ret = find_first_extent_bit(&info->block_group_cache, 0, 2906 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
2854 &start, &end, (unsigned int)-1); 2907 block_group = rb_entry(n, struct btrfs_block_group_cache,
2855 if (ret) 2908 cache_node);
2856 break; 2909
2857 ret = get_state_private(&info->block_group_cache, start, &ptr); 2910 btrfs_remove_free_space_cache(block_group);
2858 if (!ret) 2911 rb_erase(&block_group->cache_node,
2859 kfree((void *)(unsigned long)ptr); 2912 &info->block_group_cache_tree);
2860 clear_extent_bits(&info->block_group_cache, start, 2913 spin_lock(&block_group->space_info->lock);
2861 end, (unsigned int)-1, GFP_NOFS); 2914 list_del(&block_group->list);
2862 } 2915 spin_unlock(&block_group->space_info->lock);
2863 while(1) { 2916 kfree(block_group);
2864 ret = find_first_extent_bit(&info->free_space_cache, 0, 2917 }
2865 &start, &end, EXTENT_DIRTY); 2918 spin_unlock(&info->block_group_cache_lock);
2866 if (ret)
2867 break;
2868 clear_extent_dirty(&info->free_space_cache, start,
2869 end, GFP_NOFS);
2870 }
2871 mutex_unlock(&info->alloc_mutex); 2919 mutex_unlock(&info->alloc_mutex);
2872 return 0; 2920 return 0;
2873} 2921}
@@ -3386,7 +3434,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3386 u64 total_found; 3434 u64 total_found;
3387 u64 shrink_last_byte; 3435 u64 shrink_last_byte;
3388 struct btrfs_block_group_cache *shrink_block_group; 3436 struct btrfs_block_group_cache *shrink_block_group;
3389 struct btrfs_fs_info *info = root->fs_info;
3390 struct btrfs_key key; 3437 struct btrfs_key key;
3391 struct btrfs_key found_key; 3438 struct btrfs_key found_key;
3392 struct extent_buffer *leaf; 3439 struct extent_buffer *leaf;
@@ -3542,15 +3589,17 @@ next:
3542 goto out; 3589 goto out;
3543 } 3590 }
3544 3591
3545 clear_extent_bits(&info->block_group_cache, key.objectid, 3592 spin_lock(&root->fs_info->block_group_cache_lock);
3546 key.objectid + key.offset - 1, 3593 rb_erase(&shrink_block_group->cache_node,
3547 (unsigned int)-1, GFP_NOFS); 3594 &root->fs_info->block_group_cache_tree);
3548 3595 spin_unlock(&root->fs_info->block_group_cache_lock);
3549
3550 clear_extent_bits(&info->free_space_cache,
3551 key.objectid, key.objectid + key.offset - 1,
3552 (unsigned int)-1, GFP_NOFS);
3553 3596
3597 ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3598 key.offset);
3599 if (ret) {
3600 btrfs_end_transaction(trans, root);
3601 goto out;
3602 }
3554 /* 3603 /*
3555 memset(shrink_block_group, 0, sizeof(*shrink_block_group)); 3604 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3556 kfree(shrink_block_group); 3605 kfree(shrink_block_group);
@@ -3566,9 +3615,9 @@ next:
3566 /* the code to unpin extents might set a few bits in the free 3615 /* the code to unpin extents might set a few bits in the free
3567 * space cache for this range again 3616 * space cache for this range again
3568 */ 3617 */
3569 clear_extent_bits(&info->free_space_cache, 3618 /* XXX? */
3570 key.objectid, key.objectid + key.offset - 1, 3619 ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3571 (unsigned int)-1, GFP_NOFS); 3620 key.offset);
3572out: 3621out:
3573 btrfs_free_path(path); 3622 btrfs_free_path(path);
3574 mutex_unlock(&root->fs_info->alloc_mutex); 3623 mutex_unlock(&root->fs_info->alloc_mutex);
@@ -3616,16 +3665,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
3616{ 3665{
3617 struct btrfs_path *path; 3666 struct btrfs_path *path;
3618 int ret; 3667 int ret;
3619 int bit;
3620 struct btrfs_block_group_cache *cache; 3668 struct btrfs_block_group_cache *cache;
3621 struct btrfs_fs_info *info = root->fs_info; 3669 struct btrfs_fs_info *info = root->fs_info;
3622 struct btrfs_space_info *space_info; 3670 struct btrfs_space_info *space_info;
3623 struct extent_io_tree *block_group_cache;
3624 struct btrfs_key key; 3671 struct btrfs_key key;
3625 struct btrfs_key found_key; 3672 struct btrfs_key found_key;
3626 struct extent_buffer *leaf; 3673 struct extent_buffer *leaf;
3627 3674
3628 block_group_cache = &info->block_group_cache;
3629 root = info->extent_root; 3675 root = info->extent_root;
3630 key.objectid = 0; 3676 key.objectid = 0;
3631 key.offset = 0; 3677 key.offset = 0;
@@ -3653,6 +3699,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
3653 } 3699 }
3654 3700
3655 spin_lock_init(&cache->lock); 3701 spin_lock_init(&cache->lock);
3702 INIT_LIST_HEAD(&cache->list);
3656 read_extent_buffer(leaf, &cache->item, 3703 read_extent_buffer(leaf, &cache->item,
3657 btrfs_item_ptr_offset(leaf, path->slots[0]), 3704 btrfs_item_ptr_offset(leaf, path->slots[0]),
3658 sizeof(cache->item)); 3705 sizeof(cache->item));
@@ -3661,31 +3708,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
3661 key.objectid = found_key.objectid + found_key.offset; 3708 key.objectid = found_key.objectid + found_key.offset;
3662 btrfs_release_path(root, path); 3709 btrfs_release_path(root, path);
3663 cache->flags = btrfs_block_group_flags(&cache->item); 3710 cache->flags = btrfs_block_group_flags(&cache->item);
3664 bit = 0;
3665 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3666 bit = BLOCK_GROUP_DATA;
3667 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3668 bit = BLOCK_GROUP_SYSTEM;
3669 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3670 bit = BLOCK_GROUP_METADATA;
3671 }
3672 set_avail_alloc_bits(info, cache->flags);
3673 3711
3674 ret = update_space_info(info, cache->flags, found_key.offset, 3712 ret = update_space_info(info, cache->flags, found_key.offset,
3675 btrfs_block_group_used(&cache->item), 3713 btrfs_block_group_used(&cache->item),
3676 &space_info); 3714 &space_info);
3677 BUG_ON(ret); 3715 BUG_ON(ret);
3678 cache->space_info = space_info; 3716 cache->space_info = space_info;
3717 spin_lock(&space_info->lock);
3718 list_add(&cache->list, &space_info->block_groups);
3719 spin_unlock(&space_info->lock);
3720
3721 ret = btrfs_add_block_group_cache(root->fs_info, cache);
3722 BUG_ON(ret);
3679 3723
3680 /* use EXTENT_LOCKED to prevent merging */
3681 set_extent_bits(block_group_cache, found_key.objectid,
3682 found_key.objectid + found_key.offset - 1,
3683 EXTENT_LOCKED, GFP_NOFS);
3684 set_state_private(block_group_cache, found_key.objectid,
3685 (unsigned long)cache);
3686 set_extent_bits(block_group_cache, found_key.objectid,
3687 found_key.objectid + found_key.offset - 1,
3688 bit | EXTENT_LOCKED, GFP_NOFS);
3689 if (key.objectid >= 3724 if (key.objectid >=
3690 btrfs_super_total_bytes(&info->super_copy)) 3725 btrfs_super_total_bytes(&info->super_copy))
3691 break; 3726 break;
@@ -3703,22 +3738,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3703 u64 size) 3738 u64 size)
3704{ 3739{
3705 int ret; 3740 int ret;
3706 int bit = 0;
3707 struct btrfs_root *extent_root; 3741 struct btrfs_root *extent_root;
3708 struct btrfs_block_group_cache *cache; 3742 struct btrfs_block_group_cache *cache;
3709 struct extent_io_tree *block_group_cache;
3710 3743
3711 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); 3744 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3712 extent_root = root->fs_info->extent_root; 3745 extent_root = root->fs_info->extent_root;
3713 block_group_cache = &root->fs_info->block_group_cache;
3714 3746
3715 root->fs_info->last_trans_new_blockgroup = trans->transid; 3747 root->fs_info->last_trans_new_blockgroup = trans->transid;
3716 3748
3717 cache = kzalloc(sizeof(*cache), GFP_NOFS); 3749 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3718 BUG_ON(!cache); 3750 if (!cache)
3751 return -ENOMEM;
3752
3719 cache->key.objectid = chunk_offset; 3753 cache->key.objectid = chunk_offset;
3720 cache->key.offset = size; 3754 cache->key.offset = size;
3721 spin_lock_init(&cache->lock); 3755 spin_lock_init(&cache->lock);
3756 INIT_LIST_HEAD(&cache->list);
3722 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); 3757 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3723 3758
3724 btrfs_set_block_group_used(&cache->item, bytes_used); 3759 btrfs_set_block_group_used(&cache->item, bytes_used);
@@ -3729,16 +3764,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3729 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 3764 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3730 &cache->space_info); 3765 &cache->space_info);
3731 BUG_ON(ret); 3766 BUG_ON(ret);
3767 spin_lock(&cache->space_info->lock);
3768 list_add(&cache->list, &cache->space_info->block_groups);
3769 spin_unlock(&cache->space_info->lock);
3732 3770
3733 bit = block_group_state_bits(type); 3771 ret = btrfs_add_block_group_cache(root->fs_info, cache);
3734 set_extent_bits(block_group_cache, chunk_offset, 3772 BUG_ON(ret);
3735 chunk_offset + size - 1,
3736 EXTENT_LOCKED, GFP_NOFS);
3737 set_state_private(block_group_cache, chunk_offset,
3738 (unsigned long)cache);
3739 set_extent_bits(block_group_cache, chunk_offset,
3740 chunk_offset + size - 1,
3741 bit | EXTENT_LOCKED, GFP_NOFS);
3742 3773
3743 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 3774 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3744 sizeof(cache->item)); 3775 sizeof(cache->item));
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 319a0c7a4a58..8624f3e88036 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2634,6 +2634,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2634 if (eb) { 2634 if (eb) {
2635 atomic_inc(&eb->refs); 2635 atomic_inc(&eb->refs);
2636 spin_unlock(&tree->buffer_lock); 2636 spin_unlock(&tree->buffer_lock);
2637 mark_page_accessed(eb->first_page);
2637 return eb; 2638 return eb;
2638 } 2639 }
2639 spin_unlock(&tree->buffer_lock); 2640 spin_unlock(&tree->buffer_lock);
@@ -2713,6 +2714,9 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
2713 atomic_inc(&eb->refs); 2714 atomic_inc(&eb->refs);
2714 spin_unlock(&tree->buffer_lock); 2715 spin_unlock(&tree->buffer_lock);
2715 2716
2717 if (eb)
2718 mark_page_accessed(eb->first_page);
2719
2716 return eb; 2720 return eb;
2717} 2721}
2718EXPORT_SYMBOL(find_extent_buffer); 2722EXPORT_SYMBOL(find_extent_buffer);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
new file mode 100644
index 000000000000..01c26e8ae555
--- /dev/null
+++ b/fs/btrfs/free-space-cache.c
@@ -0,0 +1,415 @@
1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21
22static int tree_insert_offset(struct rb_root *root, u64 offset,
23 struct rb_node *node)
24{
25 struct rb_node **p = &root->rb_node;
26 struct rb_node *parent = NULL;
27 struct btrfs_free_space *info;
28
29 while (*p) {
30 parent = *p;
31 info = rb_entry(parent, struct btrfs_free_space, offset_index);
32
33 if (offset < info->offset)
34 p = &(*p)->rb_left;
35 else if (offset > info->offset)
36 p = &(*p)->rb_right;
37 else
38 return -EEXIST;
39 }
40
41 rb_link_node(node, parent, p);
42 rb_insert_color(node, root);
43
44 return 0;
45}
46
47static int tree_insert_bytes(struct rb_root *root, u64 bytes,
48 struct rb_node *node)
49{
50 struct rb_node **p = &root->rb_node;
51 struct rb_node *parent = NULL;
52 struct btrfs_free_space *info;
53
54 while (*p) {
55 parent = *p;
56 info = rb_entry(parent, struct btrfs_free_space, bytes_index);
57
58 if (bytes < info->bytes)
59 p = &(*p)->rb_left;
60 else
61 p = &(*p)->rb_right;
62 }
63
64 rb_link_node(node, parent, p);
65 rb_insert_color(node, root);
66
67 return 0;
68}
69
70/*
71 * searches the tree for the given offset. If contains is set we will return
72 * the free space that contains the given offset. If contains is not set we
73 * will return the free space that starts at or after the given offset and is
74 * at least bytes long.
75 */
76static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
77 u64 offset, u64 bytes,
78 int contains)
79{
80 struct rb_node *n = root->rb_node;
81 struct btrfs_free_space *entry, *ret = NULL;
82
83 while (n) {
84 entry = rb_entry(n, struct btrfs_free_space, offset_index);
85
86 if (offset < entry->offset) {
87 if (!contains &&
88 (!ret || entry->offset < ret->offset) &&
89 (bytes <= entry->bytes))
90 ret = entry;
91 n = n->rb_left;
92 } else if (offset > entry->offset) {
93 if (contains &&
94 (entry->offset + entry->bytes - 1) >= offset) {
95 ret = entry;
96 break;
97 }
98 n = n->rb_right;
99 } else {
100 if (bytes > entry->bytes) {
101 n = n->rb_right;
102 continue;
103 }
104 ret = entry;
105 break;
106 }
107 }
108
109 return ret;
110}
111
112/*
113 * return a chunk at least bytes size, as close to offset that we can get.
114 */
115static struct btrfs_free_space *tree_search_bytes(struct rb_root *root,
116 u64 offset, u64 bytes)
117{
118 struct rb_node *n = root->rb_node;
119 struct btrfs_free_space *entry, *ret = NULL;
120
121 while (n) {
122 entry = rb_entry(n, struct btrfs_free_space, bytes_index);
123
124 if (bytes < entry->bytes) {
125 /*
126 * We prefer to get a hole size as close to the size we
127 * are asking for so we don't take small slivers out of
128 * huge holes, but we also want to get as close to the
129 * offset as possible so we don't have a whole lot of
130 * fragmentation.
131 */
132 if (offset <= entry->offset) {
133 if (!ret)
134 ret = entry;
135 else if (entry->bytes < ret->bytes)
136 ret = entry;
137 else if (entry->offset < ret->offset)
138 ret = entry;
139 }
140 n = n->rb_left;
141 } else if (bytes > entry->bytes) {
142 n = n->rb_right;
143 } else {
144 /*
145 * Ok we may have multiple chunks of the wanted size,
146 * so we don't want to take the first one we find, we
147 * want to take the one closest to our given offset, so
148 * keep searching just in case theres a better match.
149 */
150 n = n->rb_right;
151 if (offset > entry->offset)
152 continue;
153 else if (!ret || entry->offset < ret->offset)
154 ret = entry;
155 }
156 }
157
158 return ret;
159}
160
161static void unlink_free_space(struct btrfs_block_group_cache *block_group,
162 struct btrfs_free_space *info)
163{
164 rb_erase(&info->offset_index, &block_group->free_space_offset);
165 rb_erase(&info->bytes_index, &block_group->free_space_bytes);
166}
167
168static int link_free_space(struct btrfs_block_group_cache *block_group,
169 struct btrfs_free_space *info)
170{
171 int ret = 0;
172
173
174 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
175 &info->offset_index);
176 if (ret)
177 return ret;
178
179 ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes,
180 &info->bytes_index);
181 if (ret)
182 return ret;
183
184 return ret;
185}
186
187int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
188 u64 offset, u64 bytes)
189{
190 struct btrfs_free_space *right_info;
191 struct btrfs_free_space *left_info;
192 struct btrfs_free_space *info = NULL;
193 struct btrfs_free_space *alloc_info;
194 int ret = 0;
195
196 alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
197 if (!alloc_info)
198 return -ENOMEM;
199
200 /*
201 * first we want to see if there is free space adjacent to the range we
202 * are adding, if there is remove that struct and add a new one to
203 * cover the entire range
204 */
205 spin_lock(&block_group->lock);
206
207 right_info = tree_search_offset(&block_group->free_space_offset,
208 offset+bytes, 0, 1);
209 left_info = tree_search_offset(&block_group->free_space_offset,
210 offset-1, 0, 1);
211
212 if (right_info && right_info->offset == offset+bytes) {
213 unlink_free_space(block_group, right_info);
214 info = right_info;
215 info->offset = offset;
216 info->bytes += bytes;
217 } else if (right_info && right_info->offset != offset+bytes) {
218 printk(KERN_ERR "adding space in the middle of an existing "
219 "free space area. existing: offset=%Lu, bytes=%Lu. "
220 "new: offset=%Lu, bytes=%Lu\n", right_info->offset,
221 right_info->bytes, offset, bytes);
222 BUG();
223 }
224
225 if (left_info) {
226 unlink_free_space(block_group, left_info);
227
228 if (unlikely((left_info->offset + left_info->bytes) !=
229 offset)) {
230 printk(KERN_ERR "free space to the left of new free "
231 "space isn't quite right. existing: offset=%Lu,"
232 " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n",
233 left_info->offset, left_info->bytes, offset,
234 bytes);
235 BUG();
236 }
237
238 if (info) {
239 info->offset = left_info->offset;
240 info->bytes += left_info->bytes;
241 kfree(left_info);
242 } else {
243 info = left_info;
244 info->bytes += bytes;
245 }
246 }
247
248 if (info) {
249 ret = link_free_space(block_group, info);
250 if (!ret)
251 info = NULL;
252 goto out;
253 }
254
255 info = alloc_info;
256 alloc_info = NULL;
257 info->offset = offset;
258 info->bytes = bytes;
259
260 ret = link_free_space(block_group, info);
261 if (ret)
262 kfree(info);
263out:
264 spin_unlock(&block_group->lock);
265 if (ret) {
266 printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
267 if (ret == -EEXIST)
268 BUG();
269 }
270
271 if (alloc_info)
272 kfree(alloc_info);
273
274 return ret;
275}
276
277int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
278 u64 offset, u64 bytes)
279{
280 struct btrfs_free_space *info;
281 int ret = 0;
282
283 spin_lock(&block_group->lock);
284 info = tree_search_offset(&block_group->free_space_offset, offset, 0,
285 1);
286
287 if (info && info->offset == offset) {
288 if (info->bytes < bytes) {
289 printk(KERN_ERR "Found free space at %Lu, size %Lu,"
290 "trying to use %Lu\n",
291 info->offset, info->bytes, bytes);
292 WARN_ON(1);
293 ret = -EINVAL;
294 goto out;
295 }
296
297 unlink_free_space(block_group, info);
298
299 if (info->bytes == bytes) {
300 kfree(info);
301 goto out;
302 }
303
304 info->offset += bytes;
305 info->bytes -= bytes;
306
307 ret = link_free_space(block_group, info);
308 BUG_ON(ret);
309 } else {
310 WARN_ON(1);
311 }
312out:
313 spin_unlock(&block_group->lock);
314 return ret;
315}
316
317void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
318 u64 bytes)
319{
320 struct btrfs_free_space *info;
321 struct rb_node *n;
322 int count = 0;
323
324 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
325 info = rb_entry(n, struct btrfs_free_space, offset_index);
326 if (info->bytes >= bytes)
327 count++;
328 //printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset,
329 // info->bytes);
330 }
331 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
332 "\n", count);
333}
334
335u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
336{
337 struct btrfs_free_space *info;
338 struct rb_node *n;
339 u64 ret = 0;
340
341 for (n = rb_first(&block_group->free_space_offset); n;
342 n = rb_next(n)) {
343 info = rb_entry(n, struct btrfs_free_space, offset_index);
344 ret += info->bytes;
345 }
346
347 return ret;
348}
349
350void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
351{
352 struct btrfs_free_space *info;
353 struct rb_node *node;
354
355 spin_lock(&block_group->lock);
356 while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
357 info = rb_entry(node, struct btrfs_free_space, bytes_index);
358 unlink_free_space(block_group, info);
359 kfree(info);
360 if (need_resched()) {
361 spin_unlock(&block_group->lock);
362 cond_resched();
363 spin_lock(&block_group->lock);
364 }
365 }
366 spin_unlock(&block_group->lock);
367}
368
369struct btrfs_free_space *btrfs_find_free_space_offset(struct
370 btrfs_block_group_cache
371 *block_group, u64 offset,
372 u64 bytes)
373{
374 struct btrfs_free_space *ret;
375
376 spin_lock(&block_group->lock);
377 ret = tree_search_offset(&block_group->free_space_offset, offset,
378 bytes, 0);
379 spin_unlock(&block_group->lock);
380
381 return ret;
382}
383
384struct btrfs_free_space *btrfs_find_free_space_bytes(struct
385 btrfs_block_group_cache
386 *block_group, u64 offset,
387 u64 bytes)
388{
389 struct btrfs_free_space *ret;
390
391 spin_lock(&block_group->lock);
392
393 ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes);
394 spin_unlock(&block_group->lock);
395
396 return ret;
397}
398
399struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
400 *block_group, u64 offset,
401 u64 bytes)
402{
403 struct btrfs_free_space *ret;
404
405 spin_lock(&block_group->lock);
406 ret = tree_search_offset(&block_group->free_space_offset, offset,
407 bytes, 0);
408 if (!ret)
409 ret = tree_search_bytes(&block_group->free_space_bytes,
410 offset, bytes);
411
412 spin_unlock(&block_group->lock);
413
414 return ret;
415}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12c1c0530f3d..65b4f864b0df 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -141,7 +141,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
141 while(num_bytes > 0) { 141 while(num_bytes > 0) {
142 cur_alloc_size = min(num_bytes, root->fs_info->max_extent); 142 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
143 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 143 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
144 root->sectorsize, 0, 0, 144 root->sectorsize, 0, alloc_hint,
145 (u64)-1, &ins, 1); 145 (u64)-1, &ins, 1);
146 if (ret) { 146 if (ret) {
147 WARN_ON(1); 147 WARN_ON(1);
@@ -558,7 +558,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
558 trans->transid, inode->i_ino, 558 trans->transid, inode->i_ino,
559 ordered_extent->file_offset, &ins); 559 ordered_extent->file_offset, &ins);
560 BUG_ON(ret); 560 BUG_ON(ret);
561
562 mutex_lock(&BTRFS_I(inode)->extent_mutex); 561 mutex_lock(&BTRFS_I(inode)->extent_mutex);
563 562
564 ret = btrfs_drop_extents(trans, root, inode, 563 ret = btrfs_drop_extents(trans, root, inode,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1546fa6f4f7d..b9e5c2d82dde 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -64,8 +64,8 @@ static void lock_chunks(struct btrfs_root *root)
64 64
65static void unlock_chunks(struct btrfs_root *root) 65static void unlock_chunks(struct btrfs_root *root)
66{ 66{
67 mutex_unlock(&root->fs_info->alloc_mutex);
68 mutex_unlock(&root->fs_info->chunk_mutex); 67 mutex_unlock(&root->fs_info->chunk_mutex);
68 mutex_unlock(&root->fs_info->alloc_mutex);
69} 69}
70 70
71int btrfs_cleanup_fs_uuids(void) 71int btrfs_cleanup_fs_uuids(void)
@@ -1668,8 +1668,13 @@ again:
1668 else 1668 else
1669 min_free = calc_size; 1669 min_free = calc_size;
1670 1670
1671 /* we add 1MB because we never use the first 1MB of the device */ 1671 /*
1672 min_free += 1024 * 1024; 1672 * we add 1MB because we never use the first 1MB of the device, unless
1673 * we've looped, then we are likely allocating the maximum amount of
1674 * space left already
1675 */
1676 if (!looped)
1677 min_free += 1024 * 1024;
1673 1678
1674 /* build a private list of devices we will allocate from */ 1679 /* build a private list of devices we will allocate from */
1675 while(index < num_stripes) { 1680 while(index < num_stripes) {