diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 2038 |
1 files changed, 1018 insertions, 1020 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72a2b9c28e9f..359a754c782c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -32,12 +32,12 @@ | |||
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "free-space-cache.h" | 33 | #include "free-space-cache.h" |
34 | 34 | ||
35 | static int update_reserved_extents(struct btrfs_root *root, | ||
36 | u64 bytenr, u64 num, int reserve); | ||
37 | static int update_block_group(struct btrfs_trans_handle *trans, | 35 | static int update_block_group(struct btrfs_trans_handle *trans, |
38 | struct btrfs_root *root, | 36 | struct btrfs_root *root, |
39 | u64 bytenr, u64 num_bytes, int alloc, | 37 | u64 bytenr, u64 num_bytes, int alloc, |
40 | int mark_free); | 38 | int mark_free); |
39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
57 | u64 parent, u64 root_objectid, | 57 | u64 parent, u64 root_objectid, |
58 | u64 flags, struct btrfs_disk_key *key, | 58 | u64 flags, struct btrfs_disk_key *key, |
59 | int level, struct btrfs_key *ins); | 59 | int level, struct btrfs_key *ins); |
60 | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *root, | ||
65 | struct btrfs_path *path, | ||
66 | u64 bytenr, u64 num_bytes, | ||
67 | int is_data, int reserved, | ||
68 | struct extent_buffer **must_clean); | ||
69 | static int find_next_key(struct btrfs_path *path, int level, | ||
70 | struct btrfs_key *key); | ||
71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
72 | int dump_block_groups); | ||
64 | 73 | ||
65 | static noinline int | 74 | static noinline int |
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
153 | return ret; | 162 | return ret; |
154 | } | 163 | } |
155 | 164 | ||
156 | /* | 165 | static int add_excluded_extent(struct btrfs_root *root, |
157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | 166 | u64 start, u64 num_bytes) |
158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
159 | * with pinned extents still sitting there because we had a block group caching, | ||
160 | * we need to clear those now, since we are done. | ||
161 | */ | ||
162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
163 | { | 167 | { |
164 | u64 start, end, last = 0; | 168 | u64 end = start + num_bytes - 1; |
165 | int ret; | 169 | set_extent_bits(&root->fs_info->freed_extents[0], |
170 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
171 | set_extent_bits(&root->fs_info->freed_extents[1], | ||
172 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
173 | return 0; | ||
174 | } | ||
166 | 175 | ||
167 | while (1) { | 176 | static void free_excluded_extents(struct btrfs_root *root, |
168 | ret = find_first_extent_bit(&info->pinned_extents, last, | 177 | struct btrfs_block_group_cache *cache) |
169 | &start, &end, | 178 | { |
170 | EXTENT_LOCKED|EXTENT_DIRTY); | 179 | u64 start, end; |
171 | if (ret) | ||
172 | break; | ||
173 | 180 | ||
174 | clear_extent_bits(&info->pinned_extents, start, end, | 181 | start = cache->key.objectid; |
175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | 182 | end = start + cache->key.offset - 1; |
176 | last = end+1; | 183 | |
177 | } | 184 | clear_extent_bits(&root->fs_info->freed_extents[0], |
185 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
186 | clear_extent_bits(&root->fs_info->freed_extents[1], | ||
187 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
178 | } | 188 | } |
179 | 189 | ||
180 | static int remove_sb_from_cache(struct btrfs_root *root, | 190 | static int exclude_super_stripes(struct btrfs_root *root, |
181 | struct btrfs_block_group_cache *cache) | 191 | struct btrfs_block_group_cache *cache) |
182 | { | 192 | { |
183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
184 | u64 bytenr; | 193 | u64 bytenr; |
185 | u64 *logical; | 194 | u64 *logical; |
186 | int stripe_len; | 195 | int stripe_len; |
@@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, | |||
192 | cache->key.objectid, bytenr, | 201 | cache->key.objectid, bytenr, |
193 | 0, &logical, &nr, &stripe_len); | 202 | 0, &logical, &nr, &stripe_len); |
194 | BUG_ON(ret); | 203 | BUG_ON(ret); |
204 | |||
195 | while (nr--) { | 205 | while (nr--) { |
196 | try_lock_extent(&fs_info->pinned_extents, | 206 | cache->bytes_super += stripe_len; |
197 | logical[nr], | 207 | ret = add_excluded_extent(root, logical[nr], |
198 | logical[nr] + stripe_len - 1, GFP_NOFS); | 208 | stripe_len); |
209 | BUG_ON(ret); | ||
199 | } | 210 | } |
211 | |||
200 | kfree(logical); | 212 | kfree(logical); |
201 | } | 213 | } |
202 | |||
203 | return 0; | 214 | return 0; |
204 | } | 215 | } |
205 | 216 | ||
217 | static struct btrfs_caching_control * | ||
218 | get_caching_control(struct btrfs_block_group_cache *cache) | ||
219 | { | ||
220 | struct btrfs_caching_control *ctl; | ||
221 | |||
222 | spin_lock(&cache->lock); | ||
223 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
224 | spin_unlock(&cache->lock); | ||
225 | return NULL; | ||
226 | } | ||
227 | |||
228 | ctl = cache->caching_ctl; | ||
229 | atomic_inc(&ctl->count); | ||
230 | spin_unlock(&cache->lock); | ||
231 | return ctl; | ||
232 | } | ||
233 | |||
234 | static void put_caching_control(struct btrfs_caching_control *ctl) | ||
235 | { | ||
236 | if (atomic_dec_and_test(&ctl->count)) | ||
237 | kfree(ctl); | ||
238 | } | ||
239 | |||
206 | /* | 240 | /* |
207 | * this is only called by cache_block_group, since we could have freed extents | 241 | * this is only called by cache_block_group, since we could have freed extents |
208 | * we need to check the pinned_extents for any extents that can't be used yet | 242 | * we need to check the pinned_extents for any extents that can't be used yet |
@@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
215 | int ret; | 249 | int ret; |
216 | 250 | ||
217 | while (start < end) { | 251 | while (start < end) { |
218 | ret = find_first_extent_bit(&info->pinned_extents, start, | 252 | ret = find_first_extent_bit(info->pinned_extents, start, |
219 | &extent_start, &extent_end, | 253 | &extent_start, &extent_end, |
220 | EXTENT_DIRTY|EXTENT_LOCKED); | 254 | EXTENT_DIRTY | EXTENT_UPTODATE); |
221 | if (ret) | 255 | if (ret) |
222 | break; | 256 | break; |
223 | 257 | ||
@@ -249,22 +283,27 @@ static int caching_kthread(void *data) | |||
249 | { | 283 | { |
250 | struct btrfs_block_group_cache *block_group = data; | 284 | struct btrfs_block_group_cache *block_group = data; |
251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 285 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
252 | u64 last = 0; | 286 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; |
287 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
253 | struct btrfs_path *path; | 288 | struct btrfs_path *path; |
254 | int ret = 0; | ||
255 | struct btrfs_key key; | ||
256 | struct extent_buffer *leaf; | 289 | struct extent_buffer *leaf; |
257 | int slot; | 290 | struct btrfs_key key; |
258 | u64 total_found = 0; | 291 | u64 total_found = 0; |
259 | 292 | u64 last = 0; | |
260 | BUG_ON(!fs_info); | 293 | u32 nritems; |
294 | int ret = 0; | ||
261 | 295 | ||
262 | path = btrfs_alloc_path(); | 296 | path = btrfs_alloc_path(); |
263 | if (!path) | 297 | if (!path) |
264 | return -ENOMEM; | 298 | return -ENOMEM; |
265 | 299 | ||
266 | atomic_inc(&block_group->space_info->caching_threads); | 300 | exclude_super_stripes(extent_root, block_group); |
301 | spin_lock(&block_group->space_info->lock); | ||
302 | block_group->space_info->bytes_super += block_group->bytes_super; | ||
303 | spin_unlock(&block_group->space_info->lock); | ||
304 | |||
267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 305 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
306 | |||
268 | /* | 307 | /* |
269 | * We don't want to deadlock with somebody trying to allocate a new | 308 | * We don't want to deadlock with somebody trying to allocate a new |
270 | * extent for the extent root while also trying to search the extent | 309 | * extent for the extent root while also trying to search the extent |
@@ -277,74 +316,64 @@ static int caching_kthread(void *data) | |||
277 | 316 | ||
278 | key.objectid = last; | 317 | key.objectid = last; |
279 | key.offset = 0; | 318 | key.offset = 0; |
280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 319 | key.type = BTRFS_EXTENT_ITEM_KEY; |
281 | again: | 320 | again: |
321 | mutex_lock(&caching_ctl->mutex); | ||
282 | /* need to make sure the commit_root doesn't disappear */ | 322 | /* need to make sure the commit_root doesn't disappear */ |
283 | down_read(&fs_info->extent_commit_sem); | 323 | down_read(&fs_info->extent_commit_sem); |
284 | 324 | ||
285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | 325 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
286 | if (ret < 0) | 326 | if (ret < 0) |
287 | goto err; | 327 | goto err; |
288 | 328 | ||
329 | leaf = path->nodes[0]; | ||
330 | nritems = btrfs_header_nritems(leaf); | ||
331 | |||
289 | while (1) { | 332 | while (1) { |
290 | smp_mb(); | 333 | smp_mb(); |
291 | if (block_group->fs_info->closing > 1) { | 334 | if (fs_info->closing > 1) { |
292 | last = (u64)-1; | 335 | last = (u64)-1; |
293 | break; | 336 | break; |
294 | } | 337 | } |
295 | 338 | ||
296 | leaf = path->nodes[0]; | 339 | if (path->slots[0] < nritems) { |
297 | slot = path->slots[0]; | 340 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
298 | if (slot >= btrfs_header_nritems(leaf)) { | 341 | } else { |
299 | ret = btrfs_next_leaf(fs_info->extent_root, path); | 342 | ret = find_next_key(path, 0, &key); |
300 | if (ret < 0) | 343 | if (ret) |
301 | goto err; | ||
302 | else if (ret) | ||
303 | break; | 344 | break; |
304 | 345 | ||
305 | if (need_resched() || | 346 | caching_ctl->progress = last; |
306 | btrfs_transaction_in_commit(fs_info)) { | 347 | btrfs_release_path(extent_root, path); |
307 | leaf = path->nodes[0]; | 348 | up_read(&fs_info->extent_commit_sem); |
308 | 349 | mutex_unlock(&caching_ctl->mutex); | |
309 | /* this shouldn't happen, but if the | 350 | if (btrfs_transaction_in_commit(fs_info)) |
310 | * leaf is empty just move on. | ||
311 | */ | ||
312 | if (btrfs_header_nritems(leaf) == 0) | ||
313 | break; | ||
314 | /* | ||
315 | * we need to copy the key out so that | ||
316 | * we are sure the next search advances | ||
317 | * us forward in the btree. | ||
318 | */ | ||
319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
320 | btrfs_release_path(fs_info->extent_root, path); | ||
321 | up_read(&fs_info->extent_commit_sem); | ||
322 | schedule_timeout(1); | 351 | schedule_timeout(1); |
323 | goto again; | 352 | else |
324 | } | 353 | cond_resched(); |
354 | goto again; | ||
355 | } | ||
325 | 356 | ||
357 | if (key.objectid < block_group->key.objectid) { | ||
358 | path->slots[0]++; | ||
326 | continue; | 359 | continue; |
327 | } | 360 | } |
328 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
329 | if (key.objectid < block_group->key.objectid) | ||
330 | goto next; | ||
331 | 361 | ||
332 | if (key.objectid >= block_group->key.objectid + | 362 | if (key.objectid >= block_group->key.objectid + |
333 | block_group->key.offset) | 363 | block_group->key.offset) |
334 | break; | 364 | break; |
335 | 365 | ||
336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 366 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { |
337 | total_found += add_new_free_space(block_group, | 367 | total_found += add_new_free_space(block_group, |
338 | fs_info, last, | 368 | fs_info, last, |
339 | key.objectid); | 369 | key.objectid); |
340 | last = key.objectid + key.offset; | 370 | last = key.objectid + key.offset; |
341 | } | ||
342 | 371 | ||
343 | if (total_found > (1024 * 1024 * 2)) { | 372 | if (total_found > (1024 * 1024 * 2)) { |
344 | total_found = 0; | 373 | total_found = 0; |
345 | wake_up(&block_group->caching_q); | 374 | wake_up(&caching_ctl->wait); |
375 | } | ||
346 | } | 376 | } |
347 | next: | ||
348 | path->slots[0]++; | 377 | path->slots[0]++; |
349 | } | 378 | } |
350 | ret = 0; | 379 | ret = 0; |
@@ -352,33 +381,65 @@ next: | |||
352 | total_found += add_new_free_space(block_group, fs_info, last, | 381 | total_found += add_new_free_space(block_group, fs_info, last, |
353 | block_group->key.objectid + | 382 | block_group->key.objectid + |
354 | block_group->key.offset); | 383 | block_group->key.offset); |
384 | caching_ctl->progress = (u64)-1; | ||
355 | 385 | ||
356 | spin_lock(&block_group->lock); | 386 | spin_lock(&block_group->lock); |
387 | block_group->caching_ctl = NULL; | ||
357 | block_group->cached = BTRFS_CACHE_FINISHED; | 388 | block_group->cached = BTRFS_CACHE_FINISHED; |
358 | spin_unlock(&block_group->lock); | 389 | spin_unlock(&block_group->lock); |
359 | 390 | ||
360 | err: | 391 | err: |
361 | btrfs_free_path(path); | 392 | btrfs_free_path(path); |
362 | up_read(&fs_info->extent_commit_sem); | 393 | up_read(&fs_info->extent_commit_sem); |
363 | atomic_dec(&block_group->space_info->caching_threads); | ||
364 | wake_up(&block_group->caching_q); | ||
365 | 394 | ||
395 | free_excluded_extents(extent_root, block_group); | ||
396 | |||
397 | mutex_unlock(&caching_ctl->mutex); | ||
398 | wake_up(&caching_ctl->wait); | ||
399 | |||
400 | put_caching_control(caching_ctl); | ||
401 | atomic_dec(&block_group->space_info->caching_threads); | ||
366 | return 0; | 402 | return 0; |
367 | } | 403 | } |
368 | 404 | ||
369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 405 | static int cache_block_group(struct btrfs_block_group_cache *cache) |
370 | { | 406 | { |
407 | struct btrfs_fs_info *fs_info = cache->fs_info; | ||
408 | struct btrfs_caching_control *caching_ctl; | ||
371 | struct task_struct *tsk; | 409 | struct task_struct *tsk; |
372 | int ret = 0; | 410 | int ret = 0; |
373 | 411 | ||
412 | smp_mb(); | ||
413 | if (cache->cached != BTRFS_CACHE_NO) | ||
414 | return 0; | ||
415 | |||
416 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | ||
417 | BUG_ON(!caching_ctl); | ||
418 | |||
419 | INIT_LIST_HEAD(&caching_ctl->list); | ||
420 | mutex_init(&caching_ctl->mutex); | ||
421 | init_waitqueue_head(&caching_ctl->wait); | ||
422 | caching_ctl->block_group = cache; | ||
423 | caching_ctl->progress = cache->key.objectid; | ||
424 | /* one for caching kthread, one for caching block group list */ | ||
425 | atomic_set(&caching_ctl->count, 2); | ||
426 | |||
374 | spin_lock(&cache->lock); | 427 | spin_lock(&cache->lock); |
375 | if (cache->cached != BTRFS_CACHE_NO) { | 428 | if (cache->cached != BTRFS_CACHE_NO) { |
376 | spin_unlock(&cache->lock); | 429 | spin_unlock(&cache->lock); |
377 | return ret; | 430 | kfree(caching_ctl); |
431 | return 0; | ||
378 | } | 432 | } |
433 | cache->caching_ctl = caching_ctl; | ||
379 | cache->cached = BTRFS_CACHE_STARTED; | 434 | cache->cached = BTRFS_CACHE_STARTED; |
380 | spin_unlock(&cache->lock); | 435 | spin_unlock(&cache->lock); |
381 | 436 | ||
437 | down_write(&fs_info->extent_commit_sem); | ||
438 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | ||
439 | up_write(&fs_info->extent_commit_sem); | ||
440 | |||
441 | atomic_inc(&cache->space_info->caching_threads); | ||
442 | |||
382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 443 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", |
383 | cache->key.objectid); | 444 | cache->key.objectid); |
384 | if (IS_ERR(tsk)) { | 445 | if (IS_ERR(tsk)) { |
@@ -1511,7 +1572,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1511 | static void btrfs_issue_discard(struct block_device *bdev, | 1572 | static void btrfs_issue_discard(struct block_device *bdev, |
1512 | u64 start, u64 len) | 1573 | u64 start, u64 len) |
1513 | { | 1574 | { |
1514 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); | 1575 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1576 | DISCARD_FL_BARRIER); | ||
1515 | } | 1577 | } |
1516 | #endif | 1578 | #endif |
1517 | 1579 | ||
@@ -1656,7 +1718,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
1656 | parent, ref_root, flags, | 1718 | parent, ref_root, flags, |
1657 | ref->objectid, ref->offset, | 1719 | ref->objectid, ref->offset, |
1658 | &ins, node->ref_mod); | 1720 | &ins, node->ref_mod); |
1659 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1660 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1721 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1661 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1722 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1662 | node->num_bytes, parent, | 1723 | node->num_bytes, parent, |
@@ -1782,7 +1843,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1782 | extent_op->flags_to_set, | 1843 | extent_op->flags_to_set, |
1783 | &extent_op->key, | 1844 | &extent_op->key, |
1784 | ref->level, &ins); | 1845 | ref->level, &ins); |
1785 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1786 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1846 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1787 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1847 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1788 | node->num_bytes, parent, ref_root, | 1848 | node->num_bytes, parent, ref_root, |
@@ -1817,16 +1877,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1817 | BUG_ON(extent_op); | 1877 | BUG_ON(extent_op); |
1818 | head = btrfs_delayed_node_to_head(node); | 1878 | head = btrfs_delayed_node_to_head(node); |
1819 | if (insert_reserved) { | 1879 | if (insert_reserved) { |
1880 | int mark_free = 0; | ||
1881 | struct extent_buffer *must_clean = NULL; | ||
1882 | |||
1883 | ret = pin_down_bytes(trans, root, NULL, | ||
1884 | node->bytenr, node->num_bytes, | ||
1885 | head->is_data, 1, &must_clean); | ||
1886 | if (ret > 0) | ||
1887 | mark_free = 1; | ||
1888 | |||
1889 | if (must_clean) { | ||
1890 | clean_tree_block(NULL, root, must_clean); | ||
1891 | btrfs_tree_unlock(must_clean); | ||
1892 | free_extent_buffer(must_clean); | ||
1893 | } | ||
1820 | if (head->is_data) { | 1894 | if (head->is_data) { |
1821 | ret = btrfs_del_csums(trans, root, | 1895 | ret = btrfs_del_csums(trans, root, |
1822 | node->bytenr, | 1896 | node->bytenr, |
1823 | node->num_bytes); | 1897 | node->num_bytes); |
1824 | BUG_ON(ret); | 1898 | BUG_ON(ret); |
1825 | } | 1899 | } |
1826 | btrfs_update_pinned_extents(root, node->bytenr, | 1900 | if (mark_free) { |
1827 | node->num_bytes, 1); | 1901 | ret = btrfs_free_reserved_extent(root, |
1828 | update_reserved_extents(root, node->bytenr, | 1902 | node->bytenr, |
1829 | node->num_bytes, 0); | 1903 | node->num_bytes); |
1904 | BUG_ON(ret); | ||
1905 | } | ||
1830 | } | 1906 | } |
1831 | mutex_unlock(&head->mutex); | 1907 | mutex_unlock(&head->mutex); |
1832 | return 0; | 1908 | return 0; |
@@ -2691,60 +2767,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
2691 | alloc_target); | 2767 | alloc_target); |
2692 | } | 2768 | } |
2693 | 2769 | ||
2770 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2771 | { | ||
2772 | u64 num_bytes; | ||
2773 | int level; | ||
2774 | |||
2775 | level = BTRFS_MAX_LEVEL - 2; | ||
2776 | /* | ||
2777 | * NOTE: these calculations are absolutely the worst possible case. | ||
2778 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2779 | * that the tree has grown to its maximum level size. | ||
2780 | */ | ||
2781 | |||
2782 | /* | ||
2783 | * for every item we insert we could insert both an extent item and a | ||
2784 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2785 | * both the original leaf, plus the leaf to the left and right of it. | ||
2786 | * | ||
2787 | * Unless we are talking about the extent root, then we just want the | ||
2788 | * number of items * 2, since we just need the extent item plus its ref. | ||
2789 | */ | ||
2790 | if (root == root->fs_info->extent_root) | ||
2791 | num_bytes = num_items * 2; | ||
2792 | else | ||
2793 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2794 | |||
2795 | /* | ||
2796 | * num_bytes is total number of leaves we could need times the leaf | ||
2797 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2798 | * level, down to the leaf level. | ||
2799 | */ | ||
2800 | num_bytes = (num_bytes * root->leafsize) + | ||
2801 | (num_bytes * (level * 2)) * root->nodesize; | ||
2802 | |||
2803 | return num_bytes; | ||
2804 | } | ||
2805 | |||
2694 | /* | 2806 | /* |
2695 | * for now this just makes sure we have at least 5% of our metadata space free | 2807 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
2696 | * for use. | 2808 | * we have extents, this function does nothing. |
2697 | */ | 2809 | */ |
2698 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2810 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
2811 | struct inode *inode, int num_items) | ||
2699 | { | 2812 | { |
2700 | struct btrfs_fs_info *info = root->fs_info; | 2813 | struct btrfs_fs_info *info = root->fs_info; |
2701 | struct btrfs_space_info *meta_sinfo; | 2814 | struct btrfs_space_info *meta_sinfo; |
2702 | u64 alloc_target, thresh; | 2815 | u64 num_bytes; |
2703 | int committed = 0, ret; | 2816 | u64 alloc_target; |
2817 | bool bug = false; | ||
2704 | 2818 | ||
2705 | /* get the space info for where the metadata will live */ | 2819 | /* get the space info for where the metadata will live */ |
2706 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2820 | alloc_target = btrfs_get_alloc_profile(root, 0); |
2707 | meta_sinfo = __find_space_info(info, alloc_target); | 2821 | meta_sinfo = __find_space_info(info, alloc_target); |
2708 | 2822 | ||
2709 | again: | 2823 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
2824 | num_items); | ||
2825 | |||
2710 | spin_lock(&meta_sinfo->lock); | 2826 | spin_lock(&meta_sinfo->lock); |
2711 | if (!meta_sinfo->full) | 2827 | if (BTRFS_I(inode)->delalloc_reserved_extents <= |
2712 | thresh = meta_sinfo->total_bytes * 80; | 2828 | BTRFS_I(inode)->delalloc_extents) { |
2713 | else | 2829 | spin_unlock(&meta_sinfo->lock); |
2714 | thresh = meta_sinfo->total_bytes * 95; | 2830 | return 0; |
2831 | } | ||
2832 | |||
2833 | BTRFS_I(inode)->delalloc_reserved_extents--; | ||
2834 | BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); | ||
2835 | |||
2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2837 | bug = true; | ||
2838 | meta_sinfo->bytes_delalloc = 0; | ||
2839 | } else { | ||
2840 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2841 | } | ||
2842 | spin_unlock(&meta_sinfo->lock); | ||
2843 | |||
2844 | BUG_ON(bug); | ||
2845 | |||
2846 | return 0; | ||
2847 | } | ||
2715 | 2848 | ||
2849 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2850 | { | ||
2851 | u64 thresh; | ||
2852 | |||
2853 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2854 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
2855 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2856 | meta_sinfo->bytes_may_use; | ||
2857 | |||
2858 | thresh = meta_sinfo->total_bytes - thresh; | ||
2859 | thresh *= 80; | ||
2716 | do_div(thresh, 100); | 2860 | do_div(thresh, 100); |
2861 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2862 | meta_sinfo->force_delalloc = 1; | ||
2863 | else | ||
2864 | meta_sinfo->force_delalloc = 0; | ||
2865 | } | ||
2717 | 2866 | ||
2718 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2867 | static int maybe_allocate_chunk(struct btrfs_root *root, |
2719 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | 2868 | struct btrfs_space_info *info) |
2720 | struct btrfs_trans_handle *trans; | 2869 | { |
2721 | if (!meta_sinfo->full) { | 2870 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; |
2722 | meta_sinfo->force_alloc = 1; | 2871 | struct btrfs_trans_handle *trans; |
2723 | spin_unlock(&meta_sinfo->lock); | 2872 | bool wait = false; |
2873 | int ret = 0; | ||
2874 | u64 min_metadata; | ||
2875 | u64 free_space; | ||
2724 | 2876 | ||
2725 | trans = btrfs_start_transaction(root, 1); | 2877 | free_space = btrfs_super_total_bytes(disk_super); |
2726 | if (!trans) | 2878 | /* |
2727 | return -ENOMEM; | 2879 | * we allow the metadata to grow to a max of either 5gb or 5% of the |
2880 | * space in the volume. | ||
2881 | */ | ||
2882 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | ||
2883 | div64_u64(free_space * 5, 100)); | ||
2884 | if (info->total_bytes >= min_metadata) { | ||
2885 | spin_unlock(&info->lock); | ||
2886 | return 0; | ||
2887 | } | ||
2728 | 2888 | ||
2729 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2889 | if (info->full) { |
2730 | 2 * 1024 * 1024, alloc_target, 0); | 2890 | spin_unlock(&info->lock); |
2731 | btrfs_end_transaction(trans, root); | 2891 | return 0; |
2892 | } | ||
2893 | |||
2894 | if (!info->allocating_chunk) { | ||
2895 | info->force_alloc = 1; | ||
2896 | info->allocating_chunk = 1; | ||
2897 | init_waitqueue_head(&info->wait); | ||
2898 | } else { | ||
2899 | wait = true; | ||
2900 | } | ||
2901 | |||
2902 | spin_unlock(&info->lock); | ||
2903 | |||
2904 | if (wait) { | ||
2905 | wait_event(info->wait, | ||
2906 | !info->allocating_chunk); | ||
2907 | return 1; | ||
2908 | } | ||
2909 | |||
2910 | trans = btrfs_start_transaction(root, 1); | ||
2911 | if (!trans) { | ||
2912 | ret = -ENOMEM; | ||
2913 | goto out; | ||
2914 | } | ||
2915 | |||
2916 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2917 | 4096 + 2 * 1024 * 1024, | ||
2918 | info->flags, 0); | ||
2919 | btrfs_end_transaction(trans, root); | ||
2920 | if (ret) | ||
2921 | goto out; | ||
2922 | out: | ||
2923 | spin_lock(&info->lock); | ||
2924 | info->allocating_chunk = 0; | ||
2925 | spin_unlock(&info->lock); | ||
2926 | wake_up(&info->wait); | ||
2927 | |||
2928 | if (ret) | ||
2929 | return 0; | ||
2930 | return 1; | ||
2931 | } | ||
2932 | |||
2933 | /* | ||
2934 | * Reserve metadata space for delalloc. | ||
2935 | */ | ||
2936 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2937 | struct inode *inode, int num_items) | ||
2938 | { | ||
2939 | struct btrfs_fs_info *info = root->fs_info; | ||
2940 | struct btrfs_space_info *meta_sinfo; | ||
2941 | u64 num_bytes; | ||
2942 | u64 used; | ||
2943 | u64 alloc_target; | ||
2944 | int flushed = 0; | ||
2945 | int force_delalloc; | ||
2946 | |||
2947 | /* get the space info for where the metadata will live */ | ||
2948 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2949 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2950 | |||
2951 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
2952 | num_items); | ||
2953 | again: | ||
2954 | spin_lock(&meta_sinfo->lock); | ||
2955 | |||
2956 | force_delalloc = meta_sinfo->force_delalloc; | ||
2957 | |||
2958 | if (unlikely(!meta_sinfo->bytes_root)) | ||
2959 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
2960 | |||
2961 | if (!flushed) | ||
2962 | meta_sinfo->bytes_delalloc += num_bytes; | ||
2963 | |||
2964 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2965 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
2966 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2967 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
2968 | |||
2969 | if (used > meta_sinfo->total_bytes) { | ||
2970 | flushed++; | ||
2971 | |||
2972 | if (flushed == 1) { | ||
2973 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
2974 | goto again; | ||
2975 | flushed++; | ||
2976 | } else { | ||
2977 | spin_unlock(&meta_sinfo->lock); | ||
2978 | } | ||
2979 | |||
2980 | if (flushed == 2) { | ||
2981 | filemap_flush(inode->i_mapping); | ||
2982 | goto again; | ||
2983 | } else if (flushed == 3) { | ||
2984 | btrfs_start_delalloc_inodes(root); | ||
2985 | btrfs_wait_ordered_extents(root, 0); | ||
2732 | goto again; | 2986 | goto again; |
2733 | } | 2987 | } |
2988 | spin_lock(&meta_sinfo->lock); | ||
2989 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2734 | spin_unlock(&meta_sinfo->lock); | 2990 | spin_unlock(&meta_sinfo->lock); |
2991 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
2992 | BTRFS_I(inode)->delalloc_extents, | ||
2993 | BTRFS_I(inode)->delalloc_reserved_extents); | ||
2994 | dump_space_info(meta_sinfo, 0, 0); | ||
2995 | return -ENOSPC; | ||
2996 | } | ||
2735 | 2997 | ||
2736 | if (!committed) { | 2998 | BTRFS_I(inode)->delalloc_reserved_extents++; |
2737 | committed = 1; | 2999 | check_force_delalloc(meta_sinfo); |
2738 | trans = btrfs_join_transaction(root, 1); | 3000 | spin_unlock(&meta_sinfo->lock); |
2739 | if (!trans) | 3001 | |
2740 | return -ENOMEM; | 3002 | if (!flushed && force_delalloc) |
2741 | ret = btrfs_commit_transaction(trans, root); | 3003 | filemap_flush(inode->i_mapping); |
2742 | if (ret) | 3004 | |
2743 | return ret; | 3005 | return 0; |
3006 | } | ||
3007 | |||
3008 | /* | ||
3009 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3010 | * be paired with btrfs_reserve_metadata_space. | ||
3011 | * | ||
3012 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3013 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3014 | * oprations which will result in more used metadata, so we want to make sure we | ||
3015 | * can do that without issue. | ||
3016 | */ | ||
3017 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3018 | { | ||
3019 | struct btrfs_fs_info *info = root->fs_info; | ||
3020 | struct btrfs_space_info *meta_sinfo; | ||
3021 | u64 num_bytes; | ||
3022 | u64 alloc_target; | ||
3023 | bool bug = false; | ||
3024 | |||
3025 | /* get the space info for where the metadata will live */ | ||
3026 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3027 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3028 | |||
3029 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3030 | |||
3031 | spin_lock(&meta_sinfo->lock); | ||
3032 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3033 | bug = true; | ||
3034 | meta_sinfo->bytes_may_use = 0; | ||
3035 | } else { | ||
3036 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3037 | } | ||
3038 | spin_unlock(&meta_sinfo->lock); | ||
3039 | |||
3040 | BUG_ON(bug); | ||
3041 | |||
3042 | return 0; | ||
3043 | } | ||
3044 | |||
3045 | /* | ||
3046 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3047 | * of bytes that would be needed to modify num_items number of items. If we | ||
3048 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3049 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3050 | * items you reserved, since whatever metadata you needed should have already | ||
3051 | * been allocated. | ||
3052 | * | ||
3053 | * This will commit the transaction to make more space if we don't have enough | ||
3054 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3055 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3056 | * callers responsibility to handle it properly. | ||
3057 | */ | ||
3058 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3059 | { | ||
3060 | struct btrfs_fs_info *info = root->fs_info; | ||
3061 | struct btrfs_space_info *meta_sinfo; | ||
3062 | u64 num_bytes; | ||
3063 | u64 used; | ||
3064 | u64 alloc_target; | ||
3065 | int retries = 0; | ||
3066 | |||
3067 | /* get the space info for where the metadata will live */ | ||
3068 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3069 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3070 | |||
3071 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3072 | again: | ||
3073 | spin_lock(&meta_sinfo->lock); | ||
3074 | |||
3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3077 | |||
3078 | if (!retries) | ||
3079 | meta_sinfo->bytes_may_use += num_bytes; | ||
3080 | |||
3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3085 | |||
3086 | if (used > meta_sinfo->total_bytes) { | ||
3087 | retries++; | ||
3088 | if (retries == 1) { | ||
3089 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3090 | goto again; | ||
3091 | retries++; | ||
3092 | } else { | ||
3093 | spin_unlock(&meta_sinfo->lock); | ||
3094 | } | ||
3095 | |||
3096 | if (retries == 2) { | ||
3097 | btrfs_start_delalloc_inodes(root); | ||
3098 | btrfs_wait_ordered_extents(root, 0); | ||
2744 | goto again; | 3099 | goto again; |
2745 | } | 3100 | } |
3101 | spin_lock(&meta_sinfo->lock); | ||
3102 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3103 | spin_unlock(&meta_sinfo->lock); | ||
3104 | |||
3105 | dump_space_info(meta_sinfo, 0, 0); | ||
2746 | return -ENOSPC; | 3106 | return -ENOSPC; |
2747 | } | 3107 | } |
3108 | |||
3109 | check_force_delalloc(meta_sinfo); | ||
2748 | spin_unlock(&meta_sinfo->lock); | 3110 | spin_unlock(&meta_sinfo->lock); |
2749 | 3111 | ||
2750 | return 0; | 3112 | return 0; |
@@ -2764,13 +3126,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
2764 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3126 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2765 | 3127 | ||
2766 | data_sinfo = BTRFS_I(inode)->space_info; | 3128 | data_sinfo = BTRFS_I(inode)->space_info; |
3129 | if (!data_sinfo) | ||
3130 | goto alloc; | ||
3131 | |||
2767 | again: | 3132 | again: |
2768 | /* make sure we have enough space to handle the data first */ | 3133 | /* make sure we have enough space to handle the data first */ |
2769 | spin_lock(&data_sinfo->lock); | 3134 | spin_lock(&data_sinfo->lock); |
2770 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 3135 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - |
2771 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 3136 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - |
2772 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 3137 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - |
2773 | data_sinfo->bytes_may_use < bytes) { | 3138 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { |
2774 | struct btrfs_trans_handle *trans; | 3139 | struct btrfs_trans_handle *trans; |
2775 | 3140 | ||
2776 | /* | 3141 | /* |
@@ -2782,7 +3147,7 @@ again: | |||
2782 | 3147 | ||
2783 | data_sinfo->force_alloc = 1; | 3148 | data_sinfo->force_alloc = 1; |
2784 | spin_unlock(&data_sinfo->lock); | 3149 | spin_unlock(&data_sinfo->lock); |
2785 | 3150 | alloc: | |
2786 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3151 | alloc_target = btrfs_get_alloc_profile(root, 1); |
2787 | trans = btrfs_start_transaction(root, 1); | 3152 | trans = btrfs_start_transaction(root, 1); |
2788 | if (!trans) | 3153 | if (!trans) |
@@ -2794,12 +3159,17 @@ again: | |||
2794 | btrfs_end_transaction(trans, root); | 3159 | btrfs_end_transaction(trans, root); |
2795 | if (ret) | 3160 | if (ret) |
2796 | return ret; | 3161 | return ret; |
3162 | |||
3163 | if (!data_sinfo) { | ||
3164 | btrfs_set_inode_space_info(root, inode); | ||
3165 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3166 | } | ||
2797 | goto again; | 3167 | goto again; |
2798 | } | 3168 | } |
2799 | spin_unlock(&data_sinfo->lock); | 3169 | spin_unlock(&data_sinfo->lock); |
2800 | 3170 | ||
2801 | /* commit the current transaction and try again */ | 3171 | /* commit the current transaction and try again */ |
2802 | if (!committed) { | 3172 | if (!committed && !root->fs_info->open_ioctl_trans) { |
2803 | committed = 1; | 3173 | committed = 1; |
2804 | trans = btrfs_join_transaction(root, 1); | 3174 | trans = btrfs_join_transaction(root, 1); |
2805 | if (!trans) | 3175 | if (!trans) |
@@ -2827,7 +3197,7 @@ again: | |||
2827 | BTRFS_I(inode)->reserved_bytes += bytes; | 3197 | BTRFS_I(inode)->reserved_bytes += bytes; |
2828 | spin_unlock(&data_sinfo->lock); | 3198 | spin_unlock(&data_sinfo->lock); |
2829 | 3199 | ||
2830 | return btrfs_check_metadata_free_space(root); | 3200 | return 0; |
2831 | } | 3201 | } |
2832 | 3202 | ||
2833 | /* | 3203 | /* |
@@ -2926,17 +3296,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2926 | BUG_ON(!space_info); | 3296 | BUG_ON(!space_info); |
2927 | 3297 | ||
2928 | spin_lock(&space_info->lock); | 3298 | spin_lock(&space_info->lock); |
2929 | if (space_info->force_alloc) { | 3299 | if (space_info->force_alloc) |
2930 | force = 1; | 3300 | force = 1; |
2931 | space_info->force_alloc = 0; | ||
2932 | } | ||
2933 | if (space_info->full) { | 3301 | if (space_info->full) { |
2934 | spin_unlock(&space_info->lock); | 3302 | spin_unlock(&space_info->lock); |
2935 | goto out; | 3303 | goto out; |
2936 | } | 3304 | } |
2937 | 3305 | ||
2938 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3306 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
2939 | thresh = div_factor(thresh, 6); | 3307 | thresh = div_factor(thresh, 8); |
2940 | if (!force && | 3308 | if (!force && |
2941 | (space_info->bytes_used + space_info->bytes_pinned + | 3309 | (space_info->bytes_used + space_info->bytes_pinned + |
2942 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3310 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
@@ -2950,7 +3318,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2950 | * we keep a reasonable number of metadata chunks allocated in the | 3318 | * we keep a reasonable number of metadata chunks allocated in the |
2951 | * FS as well. | 3319 | * FS as well. |
2952 | */ | 3320 | */ |
2953 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3321 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
2954 | fs_info->data_chunk_allocations++; | 3322 | fs_info->data_chunk_allocations++; |
2955 | if (!(fs_info->data_chunk_allocations % | 3323 | if (!(fs_info->data_chunk_allocations % |
2956 | fs_info->metadata_ratio)) | 3324 | fs_info->metadata_ratio)) |
@@ -2958,8 +3326,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2958 | } | 3326 | } |
2959 | 3327 | ||
2960 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3328 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3329 | spin_lock(&space_info->lock); | ||
2961 | if (ret) | 3330 | if (ret) |
2962 | space_info->full = 1; | 3331 | space_info->full = 1; |
3332 | space_info->force_alloc = 0; | ||
3333 | spin_unlock(&space_info->lock); | ||
2963 | out: | 3334 | out: |
2964 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3335 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
2965 | return ret; | 3336 | return ret; |
@@ -3008,10 +3379,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3008 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3379 | num_bytes = min(total, cache->key.offset - byte_in_group); |
3009 | if (alloc) { | 3380 | if (alloc) { |
3010 | old_val += num_bytes; | 3381 | old_val += num_bytes; |
3382 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3383 | cache->reserved -= num_bytes; | ||
3011 | cache->space_info->bytes_used += num_bytes; | 3384 | cache->space_info->bytes_used += num_bytes; |
3385 | cache->space_info->bytes_reserved -= num_bytes; | ||
3012 | if (cache->ro) | 3386 | if (cache->ro) |
3013 | cache->space_info->bytes_readonly -= num_bytes; | 3387 | cache->space_info->bytes_readonly -= num_bytes; |
3014 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3015 | spin_unlock(&cache->lock); | 3388 | spin_unlock(&cache->lock); |
3016 | spin_unlock(&cache->space_info->lock); | 3389 | spin_unlock(&cache->space_info->lock); |
3017 | } else { | 3390 | } else { |
@@ -3056,127 +3429,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3056 | return bytenr; | 3429 | return bytenr; |
3057 | } | 3430 | } |
3058 | 3431 | ||
3059 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3432 | /* |
3060 | u64 bytenr, u64 num, int pin) | 3433 | * this function must be called within transaction |
3434 | */ | ||
3435 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3436 | u64 bytenr, u64 num_bytes, int reserved) | ||
3061 | { | 3437 | { |
3062 | u64 len; | ||
3063 | struct btrfs_block_group_cache *cache; | ||
3064 | struct btrfs_fs_info *fs_info = root->fs_info; | 3438 | struct btrfs_fs_info *fs_info = root->fs_info; |
3439 | struct btrfs_block_group_cache *cache; | ||
3065 | 3440 | ||
3066 | if (pin) | 3441 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
3067 | set_extent_dirty(&fs_info->pinned_extents, | 3442 | BUG_ON(!cache); |
3068 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
3069 | |||
3070 | while (num > 0) { | ||
3071 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3072 | BUG_ON(!cache); | ||
3073 | len = min(num, cache->key.offset - | ||
3074 | (bytenr - cache->key.objectid)); | ||
3075 | if (pin) { | ||
3076 | spin_lock(&cache->space_info->lock); | ||
3077 | spin_lock(&cache->lock); | ||
3078 | cache->pinned += len; | ||
3079 | cache->space_info->bytes_pinned += len; | ||
3080 | spin_unlock(&cache->lock); | ||
3081 | spin_unlock(&cache->space_info->lock); | ||
3082 | fs_info->total_pinned += len; | ||
3083 | } else { | ||
3084 | int unpin = 0; | ||
3085 | 3443 | ||
3086 | /* | 3444 | spin_lock(&cache->space_info->lock); |
3087 | * in order to not race with the block group caching, we | 3445 | spin_lock(&cache->lock); |
3088 | * only want to unpin the extent if we are cached. If | 3446 | cache->pinned += num_bytes; |
3089 | * we aren't cached, we want to start async caching this | 3447 | cache->space_info->bytes_pinned += num_bytes; |
3090 | * block group so we can free the extent the next time | 3448 | if (reserved) { |
3091 | * around. | 3449 | cache->reserved -= num_bytes; |
3092 | */ | 3450 | cache->space_info->bytes_reserved -= num_bytes; |
3093 | spin_lock(&cache->space_info->lock); | 3451 | } |
3094 | spin_lock(&cache->lock); | 3452 | spin_unlock(&cache->lock); |
3095 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); | 3453 | spin_unlock(&cache->space_info->lock); |
3096 | if (likely(unpin)) { | ||
3097 | cache->pinned -= len; | ||
3098 | cache->space_info->bytes_pinned -= len; | ||
3099 | fs_info->total_pinned -= len; | ||
3100 | } | ||
3101 | spin_unlock(&cache->lock); | ||
3102 | spin_unlock(&cache->space_info->lock); | ||
3103 | 3454 | ||
3104 | if (likely(unpin)) | 3455 | btrfs_put_block_group(cache); |
3105 | clear_extent_dirty(&fs_info->pinned_extents, | ||
3106 | bytenr, bytenr + len -1, | ||
3107 | GFP_NOFS); | ||
3108 | else | ||
3109 | cache_block_group(cache); | ||
3110 | 3456 | ||
3111 | if (unpin) | 3457 | set_extent_dirty(fs_info->pinned_extents, |
3112 | btrfs_add_free_space(cache, bytenr, len); | 3458 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); |
3113 | } | 3459 | return 0; |
3114 | btrfs_put_block_group(cache); | 3460 | } |
3115 | bytenr += len; | 3461 | |
3116 | num -= len; | 3462 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, |
3463 | u64 num_bytes, int reserve) | ||
3464 | { | ||
3465 | spin_lock(&cache->space_info->lock); | ||
3466 | spin_lock(&cache->lock); | ||
3467 | if (reserve) { | ||
3468 | cache->reserved += num_bytes; | ||
3469 | cache->space_info->bytes_reserved += num_bytes; | ||
3470 | } else { | ||
3471 | cache->reserved -= num_bytes; | ||
3472 | cache->space_info->bytes_reserved -= num_bytes; | ||
3117 | } | 3473 | } |
3474 | spin_unlock(&cache->lock); | ||
3475 | spin_unlock(&cache->space_info->lock); | ||
3118 | return 0; | 3476 | return 0; |
3119 | } | 3477 | } |
3120 | 3478 | ||
3121 | static int update_reserved_extents(struct btrfs_root *root, | 3479 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
3122 | u64 bytenr, u64 num, int reserve) | 3480 | struct btrfs_root *root) |
3123 | { | 3481 | { |
3124 | u64 len; | ||
3125 | struct btrfs_block_group_cache *cache; | ||
3126 | struct btrfs_fs_info *fs_info = root->fs_info; | 3482 | struct btrfs_fs_info *fs_info = root->fs_info; |
3483 | struct btrfs_caching_control *next; | ||
3484 | struct btrfs_caching_control *caching_ctl; | ||
3485 | struct btrfs_block_group_cache *cache; | ||
3127 | 3486 | ||
3128 | while (num > 0) { | 3487 | down_write(&fs_info->extent_commit_sem); |
3129 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3130 | BUG_ON(!cache); | ||
3131 | len = min(num, cache->key.offset - | ||
3132 | (bytenr - cache->key.objectid)); | ||
3133 | 3488 | ||
3134 | spin_lock(&cache->space_info->lock); | 3489 | list_for_each_entry_safe(caching_ctl, next, |
3135 | spin_lock(&cache->lock); | 3490 | &fs_info->caching_block_groups, list) { |
3136 | if (reserve) { | 3491 | cache = caching_ctl->block_group; |
3137 | cache->reserved += len; | 3492 | if (block_group_cache_done(cache)) { |
3138 | cache->space_info->bytes_reserved += len; | 3493 | cache->last_byte_to_unpin = (u64)-1; |
3494 | list_del_init(&caching_ctl->list); | ||
3495 | put_caching_control(caching_ctl); | ||
3139 | } else { | 3496 | } else { |
3140 | cache->reserved -= len; | 3497 | cache->last_byte_to_unpin = caching_ctl->progress; |
3141 | cache->space_info->bytes_reserved -= len; | ||
3142 | } | 3498 | } |
3143 | spin_unlock(&cache->lock); | ||
3144 | spin_unlock(&cache->space_info->lock); | ||
3145 | btrfs_put_block_group(cache); | ||
3146 | bytenr += len; | ||
3147 | num -= len; | ||
3148 | } | 3499 | } |
3500 | |||
3501 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3502 | fs_info->pinned_extents = &fs_info->freed_extents[1]; | ||
3503 | else | ||
3504 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
3505 | |||
3506 | up_write(&fs_info->extent_commit_sem); | ||
3149 | return 0; | 3507 | return 0; |
3150 | } | 3508 | } |
3151 | 3509 | ||
3152 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | 3510 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
3153 | { | 3511 | { |
3154 | u64 last = 0; | 3512 | struct btrfs_fs_info *fs_info = root->fs_info; |
3155 | u64 start; | 3513 | struct btrfs_block_group_cache *cache = NULL; |
3156 | u64 end; | 3514 | u64 len; |
3157 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
3158 | int ret; | ||
3159 | 3515 | ||
3160 | while (1) { | 3516 | while (start <= end) { |
3161 | ret = find_first_extent_bit(pinned_extents, last, | 3517 | if (!cache || |
3162 | &start, &end, EXTENT_DIRTY); | 3518 | start >= cache->key.objectid + cache->key.offset) { |
3163 | if (ret) | 3519 | if (cache) |
3164 | break; | 3520 | btrfs_put_block_group(cache); |
3521 | cache = btrfs_lookup_block_group(fs_info, start); | ||
3522 | BUG_ON(!cache); | ||
3523 | } | ||
3524 | |||
3525 | len = cache->key.objectid + cache->key.offset - start; | ||
3526 | len = min(len, end + 1 - start); | ||
3527 | |||
3528 | if (start < cache->last_byte_to_unpin) { | ||
3529 | len = min(len, cache->last_byte_to_unpin - start); | ||
3530 | btrfs_add_free_space(cache, start, len); | ||
3531 | } | ||
3165 | 3532 | ||
3166 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3533 | spin_lock(&cache->space_info->lock); |
3167 | last = end + 1; | 3534 | spin_lock(&cache->lock); |
3535 | cache->pinned -= len; | ||
3536 | cache->space_info->bytes_pinned -= len; | ||
3537 | spin_unlock(&cache->lock); | ||
3538 | spin_unlock(&cache->space_info->lock); | ||
3539 | |||
3540 | start += len; | ||
3168 | } | 3541 | } |
3542 | |||
3543 | if (cache) | ||
3544 | btrfs_put_block_group(cache); | ||
3169 | return 0; | 3545 | return 0; |
3170 | } | 3546 | } |
3171 | 3547 | ||
3172 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 3548 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
3173 | struct btrfs_root *root, | 3549 | struct btrfs_root *root) |
3174 | struct extent_io_tree *unpin) | ||
3175 | { | 3550 | { |
3551 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3552 | struct extent_io_tree *unpin; | ||
3176 | u64 start; | 3553 | u64 start; |
3177 | u64 end; | 3554 | u64 end; |
3178 | int ret; | 3555 | int ret; |
3179 | 3556 | ||
3557 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3558 | unpin = &fs_info->freed_extents[1]; | ||
3559 | else | ||
3560 | unpin = &fs_info->freed_extents[0]; | ||
3561 | |||
3180 | while (1) { | 3562 | while (1) { |
3181 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3563 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
3182 | EXTENT_DIRTY); | 3564 | EXTENT_DIRTY); |
@@ -3185,10 +3567,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3185 | 3567 | ||
3186 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3568 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
3187 | 3569 | ||
3188 | /* unlocks the pinned mutex */ | ||
3189 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
3190 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3570 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
3191 | 3571 | unpin_extent_range(root, start, end); | |
3192 | cond_resched(); | 3572 | cond_resched(); |
3193 | } | 3573 | } |
3194 | 3574 | ||
@@ -3198,7 +3578,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3198 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 3578 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
3199 | struct btrfs_root *root, | 3579 | struct btrfs_root *root, |
3200 | struct btrfs_path *path, | 3580 | struct btrfs_path *path, |
3201 | u64 bytenr, u64 num_bytes, int is_data, | 3581 | u64 bytenr, u64 num_bytes, |
3582 | int is_data, int reserved, | ||
3202 | struct extent_buffer **must_clean) | 3583 | struct extent_buffer **must_clean) |
3203 | { | 3584 | { |
3204 | int err = 0; | 3585 | int err = 0; |
@@ -3230,15 +3611,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3230 | } | 3611 | } |
3231 | free_extent_buffer(buf); | 3612 | free_extent_buffer(buf); |
3232 | pinit: | 3613 | pinit: |
3233 | btrfs_set_path_blocking(path); | 3614 | if (path) |
3615 | btrfs_set_path_blocking(path); | ||
3234 | /* unlocks the pinned mutex */ | 3616 | /* unlocks the pinned mutex */ |
3235 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3617 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); |
3236 | 3618 | ||
3237 | BUG_ON(err < 0); | 3619 | BUG_ON(err < 0); |
3238 | return 0; | 3620 | return 0; |
3239 | } | 3621 | } |
3240 | 3622 | ||
3241 | |||
3242 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 3623 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
3243 | struct btrfs_root *root, | 3624 | struct btrfs_root *root, |
3244 | u64 bytenr, u64 num_bytes, u64 parent, | 3625 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -3412,7 +3793,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3412 | } | 3793 | } |
3413 | 3794 | ||
3414 | ret = pin_down_bytes(trans, root, path, bytenr, | 3795 | ret = pin_down_bytes(trans, root, path, bytenr, |
3415 | num_bytes, is_data, &must_clean); | 3796 | num_bytes, is_data, 0, &must_clean); |
3416 | if (ret > 0) | 3797 | if (ret > 0) |
3417 | mark_free = 1; | 3798 | mark_free = 1; |
3418 | BUG_ON(ret < 0); | 3799 | BUG_ON(ret < 0); |
@@ -3543,8 +3924,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3543 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 3924 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
3544 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 3925 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
3545 | /* unlocks the pinned mutex */ | 3926 | /* unlocks the pinned mutex */ |
3546 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3927 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
3547 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
3548 | ret = 0; | 3928 | ret = 0; |
3549 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 3929 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
3550 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 3930 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
@@ -3584,19 +3964,33 @@ static noinline int | |||
3584 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | 3964 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
3585 | u64 num_bytes) | 3965 | u64 num_bytes) |
3586 | { | 3966 | { |
3967 | struct btrfs_caching_control *caching_ctl; | ||
3587 | DEFINE_WAIT(wait); | 3968 | DEFINE_WAIT(wait); |
3588 | 3969 | ||
3589 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | 3970 | caching_ctl = get_caching_control(cache); |
3590 | 3971 | if (!caching_ctl) | |
3591 | if (block_group_cache_done(cache)) { | ||
3592 | finish_wait(&cache->caching_q, &wait); | ||
3593 | return 0; | 3972 | return 0; |
3594 | } | ||
3595 | schedule(); | ||
3596 | finish_wait(&cache->caching_q, &wait); | ||
3597 | 3973 | ||
3598 | wait_event(cache->caching_q, block_group_cache_done(cache) || | 3974 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
3599 | (cache->free_space >= num_bytes)); | 3975 | (cache->free_space >= num_bytes)); |
3976 | |||
3977 | put_caching_control(caching_ctl); | ||
3978 | return 0; | ||
3979 | } | ||
3980 | |||
3981 | static noinline int | ||
3982 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
3983 | { | ||
3984 | struct btrfs_caching_control *caching_ctl; | ||
3985 | DEFINE_WAIT(wait); | ||
3986 | |||
3987 | caching_ctl = get_caching_control(cache); | ||
3988 | if (!caching_ctl) | ||
3989 | return 0; | ||
3990 | |||
3991 | wait_event(caching_ctl->wait, block_group_cache_done(cache)); | ||
3992 | |||
3993 | put_caching_control(caching_ctl); | ||
3600 | return 0; | 3994 | return 0; |
3601 | } | 3995 | } |
3602 | 3996 | ||
@@ -3634,6 +4028,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3634 | int last_ptr_loop = 0; | 4028 | int last_ptr_loop = 0; |
3635 | int loop = 0; | 4029 | int loop = 0; |
3636 | bool found_uncached_bg = false; | 4030 | bool found_uncached_bg = false; |
4031 | bool failed_cluster_refill = false; | ||
3637 | 4032 | ||
3638 | WARN_ON(num_bytes < root->sectorsize); | 4033 | WARN_ON(num_bytes < root->sectorsize); |
3639 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4034 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3731,7 +4126,16 @@ have_block_group: | |||
3731 | if (unlikely(block_group->ro)) | 4126 | if (unlikely(block_group->ro)) |
3732 | goto loop; | 4127 | goto loop; |
3733 | 4128 | ||
3734 | if (last_ptr) { | 4129 | /* |
4130 | * Ok we want to try and use the cluster allocator, so lets look | ||
4131 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
4132 | * have tried the cluster allocator plenty of times at this | ||
4133 | * point and not have found anything, so we are likely way too | ||
4134 | * fragmented for the clustering stuff to find anything, so lets | ||
4135 | * just skip it and let the allocator find whatever block it can | ||
4136 | * find | ||
4137 | */ | ||
4138 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
3735 | /* | 4139 | /* |
3736 | * the refill lock keeps out other | 4140 | * the refill lock keeps out other |
3737 | * people trying to start a new cluster | 4141 | * people trying to start a new cluster |
@@ -3806,9 +4210,11 @@ refill_cluster: | |||
3806 | spin_unlock(&last_ptr->refill_lock); | 4210 | spin_unlock(&last_ptr->refill_lock); |
3807 | goto checks; | 4211 | goto checks; |
3808 | } | 4212 | } |
3809 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 4213 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
4214 | && !failed_cluster_refill) { | ||
3810 | spin_unlock(&last_ptr->refill_lock); | 4215 | spin_unlock(&last_ptr->refill_lock); |
3811 | 4216 | ||
4217 | failed_cluster_refill = true; | ||
3812 | wait_block_group_cache_progress(block_group, | 4218 | wait_block_group_cache_progress(block_group, |
3813 | num_bytes + empty_cluster + empty_size); | 4219 | num_bytes + empty_cluster + empty_size); |
3814 | goto have_block_group; | 4220 | goto have_block_group; |
@@ -3820,13 +4226,9 @@ refill_cluster: | |||
3820 | * cluster. Free the cluster we've been trying | 4226 | * cluster. Free the cluster we've been trying |
3821 | * to use, and go to the next block group | 4227 | * to use, and go to the next block group |
3822 | */ | 4228 | */ |
3823 | if (loop < LOOP_NO_EMPTY_SIZE) { | 4229 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
3824 | btrfs_return_cluster_to_free_space(NULL, | ||
3825 | last_ptr); | ||
3826 | spin_unlock(&last_ptr->refill_lock); | ||
3827 | goto loop; | ||
3828 | } | ||
3829 | spin_unlock(&last_ptr->refill_lock); | 4230 | spin_unlock(&last_ptr->refill_lock); |
4231 | goto loop; | ||
3830 | } | 4232 | } |
3831 | 4233 | ||
3832 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4234 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
@@ -3880,9 +4282,12 @@ checks: | |||
3880 | search_start - offset); | 4282 | search_start - offset); |
3881 | BUG_ON(offset > search_start); | 4283 | BUG_ON(offset > search_start); |
3882 | 4284 | ||
4285 | update_reserved_extents(block_group, num_bytes, 1); | ||
4286 | |||
3883 | /* we are all good, lets return */ | 4287 | /* we are all good, lets return */ |
3884 | break; | 4288 | break; |
3885 | loop: | 4289 | loop: |
4290 | failed_cluster_refill = false; | ||
3886 | btrfs_put_block_group(block_group); | 4291 | btrfs_put_block_group(block_group); |
3887 | } | 4292 | } |
3888 | up_read(&space_info->groups_sem); | 4293 | up_read(&space_info->groups_sem); |
@@ -3940,21 +4345,32 @@ loop: | |||
3940 | return ret; | 4345 | return ret; |
3941 | } | 4346 | } |
3942 | 4347 | ||
3943 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4348 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
4349 | int dump_block_groups) | ||
3944 | { | 4350 | { |
3945 | struct btrfs_block_group_cache *cache; | 4351 | struct btrfs_block_group_cache *cache; |
3946 | 4352 | ||
4353 | spin_lock(&info->lock); | ||
3947 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4354 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
3948 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4355 | (unsigned long long)(info->total_bytes - info->bytes_used - |
3949 | info->bytes_pinned - info->bytes_reserved), | 4356 | info->bytes_pinned - info->bytes_reserved - |
4357 | info->bytes_super), | ||
3950 | (info->full) ? "" : "not "); | 4358 | (info->full) ? "" : "not "); |
3951 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4359 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
3952 | " may_use=%llu, used=%llu\n", | 4360 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
4361 | "\n", | ||
3953 | (unsigned long long)info->total_bytes, | 4362 | (unsigned long long)info->total_bytes, |
3954 | (unsigned long long)info->bytes_pinned, | 4363 | (unsigned long long)info->bytes_pinned, |
3955 | (unsigned long long)info->bytes_delalloc, | 4364 | (unsigned long long)info->bytes_delalloc, |
3956 | (unsigned long long)info->bytes_may_use, | 4365 | (unsigned long long)info->bytes_may_use, |
3957 | (unsigned long long)info->bytes_used); | 4366 | (unsigned long long)info->bytes_used, |
4367 | (unsigned long long)info->bytes_root, | ||
4368 | (unsigned long long)info->bytes_super, | ||
4369 | (unsigned long long)info->bytes_reserved); | ||
4370 | spin_unlock(&info->lock); | ||
4371 | |||
4372 | if (!dump_block_groups) | ||
4373 | return; | ||
3958 | 4374 | ||
3959 | down_read(&info->groups_sem); | 4375 | down_read(&info->groups_sem); |
3960 | list_for_each_entry(cache, &info->block_groups, list) { | 4376 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -3972,12 +4388,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3972 | up_read(&info->groups_sem); | 4388 | up_read(&info->groups_sem); |
3973 | } | 4389 | } |
3974 | 4390 | ||
3975 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 4391 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
3976 | struct btrfs_root *root, | 4392 | struct btrfs_root *root, |
3977 | u64 num_bytes, u64 min_alloc_size, | 4393 | u64 num_bytes, u64 min_alloc_size, |
3978 | u64 empty_size, u64 hint_byte, | 4394 | u64 empty_size, u64 hint_byte, |
3979 | u64 search_end, struct btrfs_key *ins, | 4395 | u64 search_end, struct btrfs_key *ins, |
3980 | u64 data) | 4396 | u64 data) |
3981 | { | 4397 | { |
3982 | int ret; | 4398 | int ret; |
3983 | u64 search_start = 0; | 4399 | u64 search_start = 0; |
@@ -4022,7 +4438,7 @@ again: | |||
4022 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4438 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
4023 | "wanted %llu\n", (unsigned long long)data, | 4439 | "wanted %llu\n", (unsigned long long)data, |
4024 | (unsigned long long)num_bytes); | 4440 | (unsigned long long)num_bytes); |
4025 | dump_space_info(sinfo, num_bytes); | 4441 | dump_space_info(sinfo, num_bytes, 1); |
4026 | } | 4442 | } |
4027 | 4443 | ||
4028 | return ret; | 4444 | return ret; |
@@ -4043,25 +4459,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4043 | ret = btrfs_discard_extent(root, start, len); | 4459 | ret = btrfs_discard_extent(root, start, len); |
4044 | 4460 | ||
4045 | btrfs_add_free_space(cache, start, len); | 4461 | btrfs_add_free_space(cache, start, len); |
4462 | update_reserved_extents(cache, len, 0); | ||
4046 | btrfs_put_block_group(cache); | 4463 | btrfs_put_block_group(cache); |
4047 | update_reserved_extents(root, start, len, 0); | ||
4048 | |||
4049 | return ret; | ||
4050 | } | ||
4051 | |||
4052 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
4053 | struct btrfs_root *root, | ||
4054 | u64 num_bytes, u64 min_alloc_size, | ||
4055 | u64 empty_size, u64 hint_byte, | ||
4056 | u64 search_end, struct btrfs_key *ins, | ||
4057 | u64 data) | ||
4058 | { | ||
4059 | int ret; | ||
4060 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
4061 | empty_size, hint_byte, search_end, ins, | ||
4062 | data); | ||
4063 | if (!ret) | ||
4064 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4065 | 4464 | ||
4066 | return ret; | 4465 | return ret; |
4067 | } | 4466 | } |
@@ -4222,15 +4621,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4222 | { | 4621 | { |
4223 | int ret; | 4622 | int ret; |
4224 | struct btrfs_block_group_cache *block_group; | 4623 | struct btrfs_block_group_cache *block_group; |
4624 | struct btrfs_caching_control *caching_ctl; | ||
4625 | u64 start = ins->objectid; | ||
4626 | u64 num_bytes = ins->offset; | ||
4225 | 4627 | ||
4226 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4628 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4227 | cache_block_group(block_group); | 4629 | cache_block_group(block_group); |
4228 | wait_event(block_group->caching_q, | 4630 | caching_ctl = get_caching_control(block_group); |
4229 | block_group_cache_done(block_group)); | ||
4230 | 4631 | ||
4231 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4632 | if (!caching_ctl) { |
4232 | ins->offset); | 4633 | BUG_ON(!block_group_cache_done(block_group)); |
4233 | BUG_ON(ret); | 4634 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
4635 | BUG_ON(ret); | ||
4636 | } else { | ||
4637 | mutex_lock(&caching_ctl->mutex); | ||
4638 | |||
4639 | if (start >= caching_ctl->progress) { | ||
4640 | ret = add_excluded_extent(root, start, num_bytes); | ||
4641 | BUG_ON(ret); | ||
4642 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
4643 | ret = btrfs_remove_free_space(block_group, | ||
4644 | start, num_bytes); | ||
4645 | BUG_ON(ret); | ||
4646 | } else { | ||
4647 | num_bytes = caching_ctl->progress - start; | ||
4648 | ret = btrfs_remove_free_space(block_group, | ||
4649 | start, num_bytes); | ||
4650 | BUG_ON(ret); | ||
4651 | |||
4652 | start = caching_ctl->progress; | ||
4653 | num_bytes = ins->objectid + ins->offset - | ||
4654 | caching_ctl->progress; | ||
4655 | ret = add_excluded_extent(root, start, num_bytes); | ||
4656 | BUG_ON(ret); | ||
4657 | } | ||
4658 | |||
4659 | mutex_unlock(&caching_ctl->mutex); | ||
4660 | put_caching_control(caching_ctl); | ||
4661 | } | ||
4662 | |||
4663 | update_reserved_extents(block_group, ins->offset, 1); | ||
4234 | btrfs_put_block_group(block_group); | 4664 | btrfs_put_block_group(block_group); |
4235 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 4665 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4236 | 0, owner, offset, ins, 1); | 4666 | 0, owner, offset, ins, 1); |
@@ -4254,9 +4684,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4254 | int ret; | 4684 | int ret; |
4255 | u64 flags = 0; | 4685 | u64 flags = 0; |
4256 | 4686 | ||
4257 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4687 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4258 | empty_size, hint_byte, search_end, | 4688 | empty_size, hint_byte, search_end, |
4259 | ins, 0); | 4689 | ins, 0); |
4260 | if (ret) | 4690 | if (ret) |
4261 | return ret; | 4691 | return ret; |
4262 | 4692 | ||
@@ -4267,7 +4697,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4267 | } else | 4697 | } else |
4268 | BUG_ON(parent > 0); | 4698 | BUG_ON(parent > 0); |
4269 | 4699 | ||
4270 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4271 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4700 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
4272 | struct btrfs_delayed_extent_op *extent_op; | 4701 | struct btrfs_delayed_extent_op *extent_op; |
4273 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 4702 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
@@ -4346,452 +4775,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4346 | return buf; | 4775 | return buf; |
4347 | } | 4776 | } |
4348 | 4777 | ||
4349 | #if 0 | 4778 | struct walk_control { |
4350 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4779 | u64 refs[BTRFS_MAX_LEVEL]; |
4351 | struct btrfs_root *root, struct extent_buffer *leaf) | 4780 | u64 flags[BTRFS_MAX_LEVEL]; |
4352 | { | 4781 | struct btrfs_key update_progress; |
4353 | u64 disk_bytenr; | 4782 | int stage; |
4354 | u64 num_bytes; | 4783 | int level; |
4355 | struct btrfs_key key; | 4784 | int shared_level; |
4356 | struct btrfs_file_extent_item *fi; | 4785 | int update_ref; |
4357 | u32 nritems; | 4786 | int keep_locks; |
4358 | int i; | 4787 | int reada_slot; |
4359 | int ret; | 4788 | int reada_count; |
4360 | 4789 | }; | |
4361 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
4362 | nritems = btrfs_header_nritems(leaf); | ||
4363 | |||
4364 | for (i = 0; i < nritems; i++) { | ||
4365 | cond_resched(); | ||
4366 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
4367 | |||
4368 | /* only extents have references, skip everything else */ | ||
4369 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
4370 | continue; | ||
4371 | |||
4372 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
4373 | |||
4374 | /* inline extents live in the btree, they don't have refs */ | ||
4375 | if (btrfs_file_extent_type(leaf, fi) == | ||
4376 | BTRFS_FILE_EXTENT_INLINE) | ||
4377 | continue; | ||
4378 | |||
4379 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
4380 | |||
4381 | /* holes don't have refs */ | ||
4382 | if (disk_bytenr == 0) | ||
4383 | continue; | ||
4384 | |||
4385 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
4386 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, | ||
4387 | leaf->start, 0, key.objectid, 0); | ||
4388 | BUG_ON(ret); | ||
4389 | } | ||
4390 | return 0; | ||
4391 | } | ||
4392 | |||
4393 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
4394 | struct btrfs_root *root, | ||
4395 | struct btrfs_leaf_ref *ref) | ||
4396 | { | ||
4397 | int i; | ||
4398 | int ret; | ||
4399 | struct btrfs_extent_info *info; | ||
4400 | struct refsort *sorted; | ||
4401 | |||
4402 | if (ref->nritems == 0) | ||
4403 | return 0; | ||
4404 | |||
4405 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
4406 | for (i = 0; i < ref->nritems; i++) { | ||
4407 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
4408 | sorted[i].slot = i; | ||
4409 | } | ||
4410 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
4411 | |||
4412 | /* | ||
4413 | * the items in the ref were sorted when the ref was inserted | ||
4414 | * into the ref cache, so this is already in order | ||
4415 | */ | ||
4416 | for (i = 0; i < ref->nritems; i++) { | ||
4417 | info = ref->extents + sorted[i].slot; | ||
4418 | ret = btrfs_free_extent(trans, root, info->bytenr, | ||
4419 | info->num_bytes, ref->bytenr, | ||
4420 | ref->owner, ref->generation, | ||
4421 | info->objectid, 0); | ||
4422 | |||
4423 | atomic_inc(&root->fs_info->throttle_gen); | ||
4424 | wake_up(&root->fs_info->transaction_throttle); | ||
4425 | cond_resched(); | ||
4426 | |||
4427 | BUG_ON(ret); | ||
4428 | info++; | ||
4429 | } | ||
4430 | |||
4431 | kfree(sorted); | ||
4432 | return 0; | ||
4433 | } | ||
4434 | |||
4435 | |||
4436 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | ||
4437 | struct btrfs_root *root, u64 start, | ||
4438 | u64 len, u32 *refs) | ||
4439 | { | ||
4440 | int ret; | ||
4441 | |||
4442 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); | ||
4443 | BUG_ON(ret); | ||
4444 | |||
4445 | #if 0 /* some debugging code in case we see problems here */ | ||
4446 | /* if the refs count is one, it won't get increased again. But | ||
4447 | * if the ref count is > 1, someone may be decreasing it at | ||
4448 | * the same time we are. | ||
4449 | */ | ||
4450 | if (*refs != 1) { | ||
4451 | struct extent_buffer *eb = NULL; | ||
4452 | eb = btrfs_find_create_tree_block(root, start, len); | ||
4453 | if (eb) | ||
4454 | btrfs_tree_lock(eb); | ||
4455 | |||
4456 | mutex_lock(&root->fs_info->alloc_mutex); | ||
4457 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
4458 | BUG_ON(ret); | ||
4459 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
4460 | |||
4461 | if (eb) { | ||
4462 | btrfs_tree_unlock(eb); | ||
4463 | free_extent_buffer(eb); | ||
4464 | } | ||
4465 | if (*refs == 1) { | ||
4466 | printk(KERN_ERR "btrfs block %llu went down to one " | ||
4467 | "during drop_snap\n", (unsigned long long)start); | ||
4468 | } | ||
4469 | |||
4470 | } | ||
4471 | #endif | ||
4472 | |||
4473 | cond_resched(); | ||
4474 | return ret; | ||
4475 | } | ||
4476 | 4790 | ||
4791 | #define DROP_REFERENCE 1 | ||
4792 | #define UPDATE_BACKREF 2 | ||
4477 | 4793 | ||
4478 | /* | 4794 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, |
4479 | * this is used while deleting old snapshots, and it drops the refs | 4795 | struct btrfs_root *root, |
4480 | * on a whole subtree starting from a level 1 node. | 4796 | struct walk_control *wc, |
4481 | * | 4797 | struct btrfs_path *path) |
4482 | * The idea is to sort all the leaf pointers, and then drop the | ||
4483 | * ref on all the leaves in order. Most of the time the leaves | ||
4484 | * will have ref cache entries, so no leaf IOs will be required to | ||
4485 | * find the extents they have references on. | ||
4486 | * | ||
4487 | * For each leaf, any references it has are also dropped in order | ||
4488 | * | ||
4489 | * This ends up dropping the references in something close to optimal | ||
4490 | * order for reading and modifying the extent allocation tree. | ||
4491 | */ | ||
4492 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
4493 | struct btrfs_root *root, | ||
4494 | struct btrfs_path *path) | ||
4495 | { | 4798 | { |
4496 | u64 bytenr; | 4799 | u64 bytenr; |
4497 | u64 root_owner; | 4800 | u64 generation; |
4498 | u64 root_gen; | 4801 | u64 refs; |
4499 | struct extent_buffer *eb = path->nodes[1]; | 4802 | u64 last = 0; |
4500 | struct extent_buffer *leaf; | 4803 | u32 nritems; |
4501 | struct btrfs_leaf_ref *ref; | 4804 | u32 blocksize; |
4502 | struct refsort *sorted = NULL; | 4805 | struct btrfs_key key; |
4503 | int nritems = btrfs_header_nritems(eb); | 4806 | struct extent_buffer *eb; |
4504 | int ret; | 4807 | int ret; |
4505 | int i; | 4808 | int slot; |
4506 | int refi = 0; | 4809 | int nread = 0; |
4507 | int slot = path->slots[1]; | ||
4508 | u32 blocksize = btrfs_level_size(root, 0); | ||
4509 | u32 refs; | ||
4510 | |||
4511 | if (nritems == 0) | ||
4512 | goto out; | ||
4513 | |||
4514 | root_owner = btrfs_header_owner(eb); | ||
4515 | root_gen = btrfs_header_generation(eb); | ||
4516 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
4517 | 4810 | ||
4518 | /* | 4811 | if (path->slots[wc->level] < wc->reada_slot) { |
4519 | * step one, sort all the leaf pointers so we don't scribble | 4812 | wc->reada_count = wc->reada_count * 2 / 3; |
4520 | * randomly into the extent allocation tree | 4813 | wc->reada_count = max(wc->reada_count, 2); |
4521 | */ | 4814 | } else { |
4522 | for (i = slot; i < nritems; i++) { | 4815 | wc->reada_count = wc->reada_count * 3 / 2; |
4523 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | 4816 | wc->reada_count = min_t(int, wc->reada_count, |
4524 | sorted[refi].slot = i; | 4817 | BTRFS_NODEPTRS_PER_BLOCK(root)); |
4525 | refi++; | ||
4526 | } | 4818 | } |
4527 | 4819 | ||
4528 | /* | 4820 | eb = path->nodes[wc->level]; |
4529 | * nritems won't be zero, but if we're picking up drop_snapshot | 4821 | nritems = btrfs_header_nritems(eb); |
4530 | * after a crash, slot might be > 0, so double check things | 4822 | blocksize = btrfs_level_size(root, wc->level - 1); |
4531 | * just in case. | ||
4532 | */ | ||
4533 | if (refi == 0) | ||
4534 | goto out; | ||
4535 | 4823 | ||
4536 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | 4824 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
4825 | if (nread >= wc->reada_count) | ||
4826 | break; | ||
4537 | 4827 | ||
4538 | /* | 4828 | cond_resched(); |
4539 | * the first loop frees everything the leaves point to | 4829 | bytenr = btrfs_node_blockptr(eb, slot); |
4540 | */ | 4830 | generation = btrfs_node_ptr_generation(eb, slot); |
4541 | for (i = 0; i < refi; i++) { | ||
4542 | u64 ptr_gen; | ||
4543 | 4831 | ||
4544 | bytenr = sorted[i].bytenr; | 4832 | if (slot == path->slots[wc->level]) |
4833 | goto reada; | ||
4545 | 4834 | ||
4546 | /* | 4835 | if (wc->stage == UPDATE_BACKREF && |
4547 | * check the reference count on this leaf. If it is > 1 | 4836 | generation <= root->root_key.offset) |
4548 | * we just decrement it below and don't update any | ||
4549 | * of the refs the leaf points to. | ||
4550 | */ | ||
4551 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4552 | blocksize, &refs); | ||
4553 | BUG_ON(ret); | ||
4554 | if (refs != 1) | ||
4555 | continue; | 4837 | continue; |
4556 | 4838 | ||
4557 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | 4839 | if (wc->stage == DROP_REFERENCE) { |
4558 | 4840 | ret = btrfs_lookup_extent_info(trans, root, | |
4559 | /* | 4841 | bytenr, blocksize, |
4560 | * the leaf only had one reference, which means the | 4842 | &refs, NULL); |
4561 | * only thing pointing to this leaf is the snapshot | ||
4562 | * we're deleting. It isn't possible for the reference | ||
4563 | * count to increase again later | ||
4564 | * | ||
4565 | * The reference cache is checked for the leaf, | ||
4566 | * and if found we'll be able to drop any refs held by | ||
4567 | * the leaf without needing to read it in. | ||
4568 | */ | ||
4569 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
4570 | if (ref && ref->generation != ptr_gen) { | ||
4571 | btrfs_free_leaf_ref(root, ref); | ||
4572 | ref = NULL; | ||
4573 | } | ||
4574 | if (ref) { | ||
4575 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
4576 | BUG_ON(ret); | ||
4577 | btrfs_remove_leaf_ref(root, ref); | ||
4578 | btrfs_free_leaf_ref(root, ref); | ||
4579 | } else { | ||
4580 | /* | ||
4581 | * the leaf wasn't in the reference cache, so | ||
4582 | * we have to read it. | ||
4583 | */ | ||
4584 | leaf = read_tree_block(root, bytenr, blocksize, | ||
4585 | ptr_gen); | ||
4586 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
4587 | BUG_ON(ret); | 4843 | BUG_ON(ret); |
4588 | free_extent_buffer(leaf); | 4844 | BUG_ON(refs == 0); |
4589 | } | 4845 | if (refs == 1) |
4590 | atomic_inc(&root->fs_info->throttle_gen); | 4846 | goto reada; |
4591 | wake_up(&root->fs_info->transaction_throttle); | ||
4592 | cond_resched(); | ||
4593 | } | ||
4594 | |||
4595 | /* | ||
4596 | * run through the loop again to free the refs on the leaves. | ||
4597 | * This is faster than doing it in the loop above because | ||
4598 | * the leaves are likely to be clustered together. We end up | ||
4599 | * working in nice chunks on the extent allocation tree. | ||
4600 | */ | ||
4601 | for (i = 0; i < refi; i++) { | ||
4602 | bytenr = sorted[i].bytenr; | ||
4603 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4604 | blocksize, eb->start, | ||
4605 | root_owner, root_gen, 0, 1); | ||
4606 | BUG_ON(ret); | ||
4607 | |||
4608 | atomic_inc(&root->fs_info->throttle_gen); | ||
4609 | wake_up(&root->fs_info->transaction_throttle); | ||
4610 | cond_resched(); | ||
4611 | } | ||
4612 | out: | ||
4613 | kfree(sorted); | ||
4614 | |||
4615 | /* | ||
4616 | * update the path to show we've processed the entire level 1 | ||
4617 | * node. This will get saved into the root's drop_snapshot_progress | ||
4618 | * field so these drops are not repeated again if this transaction | ||
4619 | * commits. | ||
4620 | */ | ||
4621 | path->slots[1] = nritems; | ||
4622 | return 0; | ||
4623 | } | ||
4624 | |||
4625 | /* | ||
4626 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
4627 | * counts as it goes. | ||
4628 | */ | ||
4629 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
4630 | struct btrfs_root *root, | ||
4631 | struct btrfs_path *path, int *level) | ||
4632 | { | ||
4633 | u64 root_owner; | ||
4634 | u64 root_gen; | ||
4635 | u64 bytenr; | ||
4636 | u64 ptr_gen; | ||
4637 | struct extent_buffer *next; | ||
4638 | struct extent_buffer *cur; | ||
4639 | struct extent_buffer *parent; | ||
4640 | u32 blocksize; | ||
4641 | int ret; | ||
4642 | u32 refs; | ||
4643 | |||
4644 | WARN_ON(*level < 0); | ||
4645 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4646 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, | ||
4647 | path->nodes[*level]->len, &refs); | ||
4648 | BUG_ON(ret); | ||
4649 | if (refs > 1) | ||
4650 | goto out; | ||
4651 | |||
4652 | /* | ||
4653 | * walk down to the last node level and free all the leaves | ||
4654 | */ | ||
4655 | while (*level >= 0) { | ||
4656 | WARN_ON(*level < 0); | ||
4657 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4658 | cur = path->nodes[*level]; | ||
4659 | 4847 | ||
4660 | if (btrfs_header_level(cur) != *level) | 4848 | if (!wc->update_ref || |
4661 | WARN_ON(1); | 4849 | generation <= root->root_key.offset) |
4662 | 4850 | continue; | |
4663 | if (path->slots[*level] >= | 4851 | btrfs_node_key_to_cpu(eb, &key, slot); |
4664 | btrfs_header_nritems(cur)) | 4852 | ret = btrfs_comp_cpu_keys(&key, |
4665 | break; | 4853 | &wc->update_progress); |
4666 | 4854 | if (ret < 0) | |
4667 | /* the new code goes down to level 1 and does all the | 4855 | continue; |
4668 | * leaves pointed to that node in bulk. So, this check | ||
4669 | * for level 0 will always be false. | ||
4670 | * | ||
4671 | * But, the disk format allows the drop_snapshot_progress | ||
4672 | * field in the root to leave things in a state where | ||
4673 | * a leaf will need cleaning up here. If someone crashes | ||
4674 | * with the old code and then boots with the new code, | ||
4675 | * we might find a leaf here. | ||
4676 | */ | ||
4677 | if (*level == 0) { | ||
4678 | ret = btrfs_drop_leaf_ref(trans, root, cur); | ||
4679 | BUG_ON(ret); | ||
4680 | break; | ||
4681 | } | 4856 | } |
4682 | 4857 | reada: | |
4683 | /* | 4858 | ret = readahead_tree_block(root, bytenr, blocksize, |
4684 | * once we get to level one, process the whole node | 4859 | generation); |
4685 | * at once, including everything below it. | 4860 | if (ret) |
4686 | */ | ||
4687 | if (*level == 1) { | ||
4688 | ret = drop_level_one_refs(trans, root, path); | ||
4689 | BUG_ON(ret); | ||
4690 | break; | 4861 | break; |
4691 | } | 4862 | last = bytenr + blocksize; |
4692 | 4863 | nread++; | |
4693 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
4694 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
4695 | blocksize = btrfs_level_size(root, *level - 1); | ||
4696 | |||
4697 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4698 | blocksize, &refs); | ||
4699 | BUG_ON(ret); | ||
4700 | |||
4701 | /* | ||
4702 | * if there is more than one reference, we don't need | ||
4703 | * to read that node to drop any references it has. We | ||
4704 | * just drop the ref we hold on that node and move on to the | ||
4705 | * next slot in this level. | ||
4706 | */ | ||
4707 | if (refs != 1) { | ||
4708 | parent = path->nodes[*level]; | ||
4709 | root_owner = btrfs_header_owner(parent); | ||
4710 | root_gen = btrfs_header_generation(parent); | ||
4711 | path->slots[*level]++; | ||
4712 | |||
4713 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4714 | blocksize, parent->start, | ||
4715 | root_owner, root_gen, | ||
4716 | *level - 1, 1); | ||
4717 | BUG_ON(ret); | ||
4718 | |||
4719 | atomic_inc(&root->fs_info->throttle_gen); | ||
4720 | wake_up(&root->fs_info->transaction_throttle); | ||
4721 | cond_resched(); | ||
4722 | |||
4723 | continue; | ||
4724 | } | ||
4725 | |||
4726 | /* | ||
4727 | * we need to keep freeing things in the next level down. | ||
4728 | * read the block and loop around to process it | ||
4729 | */ | ||
4730 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
4731 | WARN_ON(*level <= 0); | ||
4732 | if (path->nodes[*level-1]) | ||
4733 | free_extent_buffer(path->nodes[*level-1]); | ||
4734 | path->nodes[*level-1] = next; | ||
4735 | *level = btrfs_header_level(next); | ||
4736 | path->slots[*level] = 0; | ||
4737 | cond_resched(); | ||
4738 | } | 4864 | } |
4739 | out: | 4865 | wc->reada_slot = slot; |
4740 | WARN_ON(*level < 0); | ||
4741 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4742 | |||
4743 | if (path->nodes[*level] == root->node) { | ||
4744 | parent = path->nodes[*level]; | ||
4745 | bytenr = path->nodes[*level]->start; | ||
4746 | } else { | ||
4747 | parent = path->nodes[*level + 1]; | ||
4748 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
4749 | } | ||
4750 | |||
4751 | blocksize = btrfs_level_size(root, *level); | ||
4752 | root_owner = btrfs_header_owner(parent); | ||
4753 | root_gen = btrfs_header_generation(parent); | ||
4754 | |||
4755 | /* | ||
4756 | * cleanup and free the reference on the last node | ||
4757 | * we processed | ||
4758 | */ | ||
4759 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4760 | parent->start, root_owner, root_gen, | ||
4761 | *level, 1); | ||
4762 | free_extent_buffer(path->nodes[*level]); | ||
4763 | path->nodes[*level] = NULL; | ||
4764 | |||
4765 | *level += 1; | ||
4766 | BUG_ON(ret); | ||
4767 | |||
4768 | cond_resched(); | ||
4769 | return 0; | ||
4770 | } | 4866 | } |
4771 | #endif | ||
4772 | |||
4773 | struct walk_control { | ||
4774 | u64 refs[BTRFS_MAX_LEVEL]; | ||
4775 | u64 flags[BTRFS_MAX_LEVEL]; | ||
4776 | struct btrfs_key update_progress; | ||
4777 | int stage; | ||
4778 | int level; | ||
4779 | int shared_level; | ||
4780 | int update_ref; | ||
4781 | int keep_locks; | ||
4782 | }; | ||
4783 | |||
4784 | #define DROP_REFERENCE 1 | ||
4785 | #define UPDATE_BACKREF 2 | ||
4786 | 4867 | ||
4787 | /* | 4868 | /* |
4788 | * hepler to process tree block while walking down the tree. | 4869 | * hepler to process tree block while walking down the tree. |
4789 | * | 4870 | * |
4790 | * when wc->stage == DROP_REFERENCE, this function checks | ||
4791 | * reference count of the block. if the block is shared and | ||
4792 | * we need update back refs for the subtree rooted at the | ||
4793 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
4794 | * | ||
4795 | * when wc->stage == UPDATE_BACKREF, this function updates | 4871 | * when wc->stage == UPDATE_BACKREF, this function updates |
4796 | * back refs for pointers in the block. | 4872 | * back refs for pointers in the block. |
4797 | * | 4873 | * |
@@ -4804,7 +4880,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4804 | { | 4880 | { |
4805 | int level = wc->level; | 4881 | int level = wc->level; |
4806 | struct extent_buffer *eb = path->nodes[level]; | 4882 | struct extent_buffer *eb = path->nodes[level]; |
4807 | struct btrfs_key key; | ||
4808 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 4883 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
4809 | int ret; | 4884 | int ret; |
4810 | 4885 | ||
@@ -4827,21 +4902,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4827 | BUG_ON(wc->refs[level] == 0); | 4902 | BUG_ON(wc->refs[level] == 0); |
4828 | } | 4903 | } |
4829 | 4904 | ||
4830 | if (wc->stage == DROP_REFERENCE && | ||
4831 | wc->update_ref && wc->refs[level] > 1) { | ||
4832 | BUG_ON(eb == root->node); | ||
4833 | BUG_ON(path->slots[level] > 0); | ||
4834 | if (level == 0) | ||
4835 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
4836 | else | ||
4837 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
4838 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
4839 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
4840 | wc->stage = UPDATE_BACKREF; | ||
4841 | wc->shared_level = level; | ||
4842 | } | ||
4843 | } | ||
4844 | |||
4845 | if (wc->stage == DROP_REFERENCE) { | 4905 | if (wc->stage == DROP_REFERENCE) { |
4846 | if (wc->refs[level] > 1) | 4906 | if (wc->refs[level] > 1) |
4847 | return 1; | 4907 | return 1; |
@@ -4878,6 +4938,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4878 | } | 4938 | } |
4879 | 4939 | ||
4880 | /* | 4940 | /* |
4941 | * hepler to process tree block pointer. | ||
4942 | * | ||
4943 | * when wc->stage == DROP_REFERENCE, this function checks | ||
4944 | * reference count of the block pointed to. if the block | ||
4945 | * is shared and we need update back refs for the subtree | ||
4946 | * rooted at the block, this function changes wc->stage to | ||
4947 | * UPDATE_BACKREF. if the block is shared and there is no | ||
4948 | * need to update back, this function drops the reference | ||
4949 | * to the block. | ||
4950 | * | ||
4951 | * NOTE: return value 1 means we should stop walking down. | ||
4952 | */ | ||
4953 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
4954 | struct btrfs_root *root, | ||
4955 | struct btrfs_path *path, | ||
4956 | struct walk_control *wc) | ||
4957 | { | ||
4958 | u64 bytenr; | ||
4959 | u64 generation; | ||
4960 | u64 parent; | ||
4961 | u32 blocksize; | ||
4962 | struct btrfs_key key; | ||
4963 | struct extent_buffer *next; | ||
4964 | int level = wc->level; | ||
4965 | int reada = 0; | ||
4966 | int ret = 0; | ||
4967 | |||
4968 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
4969 | path->slots[level]); | ||
4970 | /* | ||
4971 | * if the lower level block was created before the snapshot | ||
4972 | * was created, we know there is no need to update back refs | ||
4973 | * for the subtree | ||
4974 | */ | ||
4975 | if (wc->stage == UPDATE_BACKREF && | ||
4976 | generation <= root->root_key.offset) | ||
4977 | return 1; | ||
4978 | |||
4979 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
4980 | blocksize = btrfs_level_size(root, level - 1); | ||
4981 | |||
4982 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
4983 | if (!next) { | ||
4984 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
4985 | reada = 1; | ||
4986 | } | ||
4987 | btrfs_tree_lock(next); | ||
4988 | btrfs_set_lock_blocking(next); | ||
4989 | |||
4990 | if (wc->stage == DROP_REFERENCE) { | ||
4991 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
4992 | &wc->refs[level - 1], | ||
4993 | &wc->flags[level - 1]); | ||
4994 | BUG_ON(ret); | ||
4995 | BUG_ON(wc->refs[level - 1] == 0); | ||
4996 | |||
4997 | if (wc->refs[level - 1] > 1) { | ||
4998 | if (!wc->update_ref || | ||
4999 | generation <= root->root_key.offset) | ||
5000 | goto skip; | ||
5001 | |||
5002 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
5003 | path->slots[level]); | ||
5004 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
5005 | if (ret < 0) | ||
5006 | goto skip; | ||
5007 | |||
5008 | wc->stage = UPDATE_BACKREF; | ||
5009 | wc->shared_level = level - 1; | ||
5010 | } | ||
5011 | } | ||
5012 | |||
5013 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
5014 | btrfs_tree_unlock(next); | ||
5015 | free_extent_buffer(next); | ||
5016 | next = NULL; | ||
5017 | } | ||
5018 | |||
5019 | if (!next) { | ||
5020 | if (reada && level == 1) | ||
5021 | reada_walk_down(trans, root, wc, path); | ||
5022 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
5023 | btrfs_tree_lock(next); | ||
5024 | btrfs_set_lock_blocking(next); | ||
5025 | } | ||
5026 | |||
5027 | level--; | ||
5028 | BUG_ON(level != btrfs_header_level(next)); | ||
5029 | path->nodes[level] = next; | ||
5030 | path->slots[level] = 0; | ||
5031 | path->locks[level] = 1; | ||
5032 | wc->level = level; | ||
5033 | if (wc->level == 1) | ||
5034 | wc->reada_slot = 0; | ||
5035 | return 0; | ||
5036 | skip: | ||
5037 | wc->refs[level - 1] = 0; | ||
5038 | wc->flags[level - 1] = 0; | ||
5039 | |||
5040 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
5041 | parent = path->nodes[level]->start; | ||
5042 | } else { | ||
5043 | BUG_ON(root->root_key.objectid != | ||
5044 | btrfs_header_owner(path->nodes[level])); | ||
5045 | parent = 0; | ||
5046 | } | ||
5047 | |||
5048 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
5049 | root->root_key.objectid, level - 1, 0); | ||
5050 | BUG_ON(ret); | ||
5051 | |||
5052 | btrfs_tree_unlock(next); | ||
5053 | free_extent_buffer(next); | ||
5054 | return 1; | ||
5055 | } | ||
5056 | |||
5057 | /* | ||
4881 | * hepler to process tree block while walking up the tree. | 5058 | * hepler to process tree block while walking up the tree. |
4882 | * | 5059 | * |
4883 | * when wc->stage == DROP_REFERENCE, this function drops | 5060 | * when wc->stage == DROP_REFERENCE, this function drops |
@@ -4904,7 +5081,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4904 | if (level < wc->shared_level) | 5081 | if (level < wc->shared_level) |
4905 | goto out; | 5082 | goto out; |
4906 | 5083 | ||
4907 | BUG_ON(wc->refs[level] <= 1); | ||
4908 | ret = find_next_key(path, level + 1, &wc->update_progress); | 5084 | ret = find_next_key(path, level + 1, &wc->update_progress); |
4909 | if (ret > 0) | 5085 | if (ret > 0) |
4910 | wc->update_ref = 0; | 5086 | wc->update_ref = 0; |
@@ -4935,8 +5111,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4935 | path->locks[level] = 0; | 5111 | path->locks[level] = 0; |
4936 | return 1; | 5112 | return 1; |
4937 | } | 5113 | } |
4938 | } else { | ||
4939 | BUG_ON(level != 0); | ||
4940 | } | 5114 | } |
4941 | } | 5115 | } |
4942 | 5116 | ||
@@ -4989,17 +5163,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4989 | struct btrfs_path *path, | 5163 | struct btrfs_path *path, |
4990 | struct walk_control *wc) | 5164 | struct walk_control *wc) |
4991 | { | 5165 | { |
4992 | struct extent_buffer *next; | ||
4993 | struct extent_buffer *cur; | ||
4994 | u64 bytenr; | ||
4995 | u64 ptr_gen; | ||
4996 | u32 blocksize; | ||
4997 | int level = wc->level; | 5166 | int level = wc->level; |
4998 | int ret; | 5167 | int ret; |
4999 | 5168 | ||
5000 | while (level >= 0) { | 5169 | while (level >= 0) { |
5001 | cur = path->nodes[level]; | 5170 | if (path->slots[level] >= |
5002 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 5171 | btrfs_header_nritems(path->nodes[level])) |
5172 | break; | ||
5003 | 5173 | ||
5004 | ret = walk_down_proc(trans, root, path, wc); | 5174 | ret = walk_down_proc(trans, root, path, wc); |
5005 | if (ret > 0) | 5175 | if (ret > 0) |
@@ -5008,20 +5178,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
5008 | if (level == 0) | 5178 | if (level == 0) |
5009 | break; | 5179 | break; |
5010 | 5180 | ||
5011 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 5181 | ret = do_walk_down(trans, root, path, wc); |
5012 | blocksize = btrfs_level_size(root, level - 1); | 5182 | if (ret > 0) { |
5013 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 5183 | path->slots[level]++; |
5014 | 5184 | continue; | |
5015 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 5185 | } |
5016 | btrfs_tree_lock(next); | 5186 | level = wc->level; |
5017 | btrfs_set_lock_blocking(next); | ||
5018 | |||
5019 | level--; | ||
5020 | BUG_ON(level != btrfs_header_level(next)); | ||
5021 | path->nodes[level] = next; | ||
5022 | path->slots[level] = 0; | ||
5023 | path->locks[level] = 1; | ||
5024 | wc->level = level; | ||
5025 | } | 5187 | } |
5026 | return 0; | 5188 | return 0; |
5027 | } | 5189 | } |
@@ -5111,9 +5273,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5111 | err = ret; | 5273 | err = ret; |
5112 | goto out; | 5274 | goto out; |
5113 | } | 5275 | } |
5114 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 5276 | WARN_ON(ret > 0); |
5115 | path->slots[level]); | ||
5116 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
5117 | 5277 | ||
5118 | /* | 5278 | /* |
5119 | * unlock our path, this is safe because only this | 5279 | * unlock our path, this is safe because only this |
@@ -5148,6 +5308,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5148 | wc->stage = DROP_REFERENCE; | 5308 | wc->stage = DROP_REFERENCE; |
5149 | wc->update_ref = update_ref; | 5309 | wc->update_ref = update_ref; |
5150 | wc->keep_locks = 0; | 5310 | wc->keep_locks = 0; |
5311 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5151 | 5312 | ||
5152 | while (1) { | 5313 | while (1) { |
5153 | ret = walk_down_tree(trans, root, path, wc); | 5314 | ret = walk_down_tree(trans, root, path, wc); |
@@ -5200,9 +5361,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5200 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 5361 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
5201 | BUG_ON(ret); | 5362 | BUG_ON(ret); |
5202 | 5363 | ||
5203 | free_extent_buffer(root->node); | 5364 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
5204 | free_extent_buffer(root->commit_root); | 5365 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, |
5205 | kfree(root); | 5366 | NULL, NULL); |
5367 | BUG_ON(ret < 0); | ||
5368 | if (ret > 0) { | ||
5369 | ret = btrfs_del_orphan_item(trans, tree_root, | ||
5370 | root->root_key.objectid); | ||
5371 | BUG_ON(ret); | ||
5372 | } | ||
5373 | } | ||
5374 | |||
5375 | if (root->in_radix) { | ||
5376 | btrfs_free_fs_root(tree_root->fs_info, root); | ||
5377 | } else { | ||
5378 | free_extent_buffer(root->node); | ||
5379 | free_extent_buffer(root->commit_root); | ||
5380 | kfree(root); | ||
5381 | } | ||
5206 | out: | 5382 | out: |
5207 | btrfs_end_transaction(trans, tree_root); | 5383 | btrfs_end_transaction(trans, tree_root); |
5208 | kfree(wc); | 5384 | kfree(wc); |
@@ -5254,6 +5430,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
5254 | wc->stage = DROP_REFERENCE; | 5430 | wc->stage = DROP_REFERENCE; |
5255 | wc->update_ref = 0; | 5431 | wc->update_ref = 0; |
5256 | wc->keep_locks = 1; | 5432 | wc->keep_locks = 1; |
5433 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5257 | 5434 | ||
5258 | while (1) { | 5435 | while (1) { |
5259 | wret = walk_down_tree(trans, root, path, wc); | 5436 | wret = walk_down_tree(trans, root, path, wc); |
@@ -5396,9 +5573,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
5396 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5573 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
5397 | while (1) { | 5574 | while (1) { |
5398 | int ret; | 5575 | int ret; |
5399 | spin_lock(&em_tree->lock); | 5576 | write_lock(&em_tree->lock); |
5400 | ret = add_extent_mapping(em_tree, em); | 5577 | ret = add_extent_mapping(em_tree, em); |
5401 | spin_unlock(&em_tree->lock); | 5578 | write_unlock(&em_tree->lock); |
5402 | if (ret != -EEXIST) { | 5579 | if (ret != -EEXIST) { |
5403 | free_extent_map(em); | 5580 | free_extent_map(em); |
5404 | break; | 5581 | break; |
@@ -6841,287 +7018,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | |||
6841 | return 0; | 7018 | return 0; |
6842 | } | 7019 | } |
6843 | 7020 | ||
6844 | #if 0 | 7021 | /* |
6845 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 7022 | * checks to see if its even possible to relocate this block group. |
6846 | struct btrfs_root *root, | 7023 | * |
6847 | u64 objectid, u64 size) | 7024 | * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
6848 | { | 7025 | * ok to go ahead and try. |
6849 | struct btrfs_path *path; | 7026 | */ |
6850 | struct btrfs_inode_item *item; | 7027 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) |
6851 | struct extent_buffer *leaf; | ||
6852 | int ret; | ||
6853 | |||
6854 | path = btrfs_alloc_path(); | ||
6855 | if (!path) | ||
6856 | return -ENOMEM; | ||
6857 | |||
6858 | path->leave_spinning = 1; | ||
6859 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
6860 | if (ret) | ||
6861 | goto out; | ||
6862 | |||
6863 | leaf = path->nodes[0]; | ||
6864 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
6865 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
6866 | btrfs_set_inode_generation(leaf, item, 1); | ||
6867 | btrfs_set_inode_size(leaf, item, size); | ||
6868 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
6869 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
6870 | btrfs_mark_buffer_dirty(leaf); | ||
6871 | btrfs_release_path(root, path); | ||
6872 | out: | ||
6873 | btrfs_free_path(path); | ||
6874 | return ret; | ||
6875 | } | ||
6876 | |||
6877 | static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
6878 | struct btrfs_block_group_cache *group) | ||
6879 | { | 7028 | { |
6880 | struct inode *inode = NULL; | 7029 | struct btrfs_block_group_cache *block_group; |
6881 | struct btrfs_trans_handle *trans; | 7030 | struct btrfs_space_info *space_info; |
6882 | struct btrfs_root *root; | 7031 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6883 | struct btrfs_key root_key; | 7032 | struct btrfs_device *device; |
6884 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 7033 | int full = 0; |
6885 | int err = 0; | 7034 | int ret = 0; |
6886 | 7035 | ||
6887 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | 7036 | block_group = btrfs_lookup_block_group(root->fs_info, bytenr); |
6888 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
6889 | root_key.offset = (u64)-1; | ||
6890 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
6891 | if (IS_ERR(root)) | ||
6892 | return ERR_CAST(root); | ||
6893 | 7037 | ||
6894 | trans = btrfs_start_transaction(root, 1); | 7038 | /* odd, couldn't find the block group, leave it alone */ |
6895 | BUG_ON(!trans); | 7039 | if (!block_group) |
7040 | return -1; | ||
6896 | 7041 | ||
6897 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 7042 | /* no bytes used, we're good */ |
6898 | if (err) | 7043 | if (!btrfs_block_group_used(&block_group->item)) |
6899 | goto out; | 7044 | goto out; |
6900 | 7045 | ||
6901 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 7046 | space_info = block_group->space_info; |
6902 | BUG_ON(err); | 7047 | spin_lock(&space_info->lock); |
6903 | |||
6904 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
6905 | group->key.offset, 0, group->key.offset, | ||
6906 | 0, 0, 0); | ||
6907 | BUG_ON(err); | ||
6908 | |||
6909 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
6910 | if (inode->i_state & I_NEW) { | ||
6911 | BTRFS_I(inode)->root = root; | ||
6912 | BTRFS_I(inode)->location.objectid = objectid; | ||
6913 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
6914 | BTRFS_I(inode)->location.offset = 0; | ||
6915 | btrfs_read_locked_inode(inode); | ||
6916 | unlock_new_inode(inode); | ||
6917 | BUG_ON(is_bad_inode(inode)); | ||
6918 | } else { | ||
6919 | BUG_ON(1); | ||
6920 | } | ||
6921 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
6922 | |||
6923 | err = btrfs_orphan_add(trans, inode); | ||
6924 | out: | ||
6925 | btrfs_end_transaction(trans, root); | ||
6926 | if (err) { | ||
6927 | if (inode) | ||
6928 | iput(inode); | ||
6929 | inode = ERR_PTR(err); | ||
6930 | } | ||
6931 | return inode; | ||
6932 | } | ||
6933 | |||
6934 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
6935 | { | ||
6936 | |||
6937 | struct btrfs_ordered_sum *sums; | ||
6938 | struct btrfs_sector_sum *sector_sum; | ||
6939 | struct btrfs_ordered_extent *ordered; | ||
6940 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
6941 | struct list_head list; | ||
6942 | size_t offset; | ||
6943 | int ret; | ||
6944 | u64 disk_bytenr; | ||
6945 | |||
6946 | INIT_LIST_HEAD(&list); | ||
6947 | |||
6948 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
6949 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
6950 | |||
6951 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
6952 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
6953 | disk_bytenr + len - 1, &list); | ||
6954 | |||
6955 | while (!list_empty(&list)) { | ||
6956 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
6957 | list_del_init(&sums->list); | ||
6958 | |||
6959 | sector_sum = sums->sums; | ||
6960 | sums->bytenr = ordered->start; | ||
6961 | 7048 | ||
6962 | offset = 0; | 7049 | full = space_info->full; |
6963 | while (offset < sums->len) { | ||
6964 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
6965 | sector_sum++; | ||
6966 | offset += root->sectorsize; | ||
6967 | } | ||
6968 | 7050 | ||
6969 | btrfs_add_ordered_sum(inode, ordered, sums); | 7051 | /* |
7052 | * if this is the last block group we have in this space, we can't | ||
7053 | * relocate it unless we're able to allocate a new chunk below. | ||
7054 | * | ||
7055 | * Otherwise, we need to make sure we have room in the space to handle | ||
7056 | * all of the extents from this block group. If we can, we're good | ||
7057 | */ | ||
7058 | if ((space_info->total_bytes != block_group->key.offset) && | ||
7059 | (space_info->bytes_used + space_info->bytes_reserved + | ||
7060 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
7061 | btrfs_block_group_used(&block_group->item) < | ||
7062 | space_info->total_bytes)) { | ||
7063 | spin_unlock(&space_info->lock); | ||
7064 | goto out; | ||
6970 | } | 7065 | } |
6971 | btrfs_put_ordered_extent(ordered); | 7066 | spin_unlock(&space_info->lock); |
6972 | return 0; | ||
6973 | } | ||
6974 | |||
6975 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
6976 | { | ||
6977 | struct btrfs_trans_handle *trans; | ||
6978 | struct btrfs_path *path; | ||
6979 | struct btrfs_fs_info *info = root->fs_info; | ||
6980 | struct extent_buffer *leaf; | ||
6981 | struct inode *reloc_inode; | ||
6982 | struct btrfs_block_group_cache *block_group; | ||
6983 | struct btrfs_key key; | ||
6984 | u64 skipped; | ||
6985 | u64 cur_byte; | ||
6986 | u64 total_found; | ||
6987 | u32 nritems; | ||
6988 | int ret; | ||
6989 | int progress; | ||
6990 | int pass = 0; | ||
6991 | |||
6992 | root = root->fs_info->extent_root; | ||
6993 | |||
6994 | block_group = btrfs_lookup_block_group(info, group_start); | ||
6995 | BUG_ON(!block_group); | ||
6996 | |||
6997 | printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", | ||
6998 | (unsigned long long)block_group->key.objectid, | ||
6999 | (unsigned long long)block_group->flags); | ||
7000 | |||
7001 | path = btrfs_alloc_path(); | ||
7002 | BUG_ON(!path); | ||
7003 | |||
7004 | reloc_inode = create_reloc_inode(info, block_group); | ||
7005 | BUG_ON(IS_ERR(reloc_inode)); | ||
7006 | |||
7007 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
7008 | set_block_group_readonly(block_group); | ||
7009 | |||
7010 | btrfs_start_delalloc_inodes(info->tree_root); | ||
7011 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
7012 | again: | ||
7013 | skipped = 0; | ||
7014 | total_found = 0; | ||
7015 | progress = 0; | ||
7016 | key.objectid = block_group->key.objectid; | ||
7017 | key.offset = 0; | ||
7018 | key.type = 0; | ||
7019 | cur_byte = key.objectid; | ||
7020 | |||
7021 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7022 | btrfs_commit_transaction(trans, info->tree_root); | ||
7023 | 7067 | ||
7024 | mutex_lock(&root->fs_info->cleaner_mutex); | 7068 | /* |
7025 | btrfs_clean_old_snapshots(info->tree_root); | 7069 | * ok we don't have enough space, but maybe we have free space on our |
7026 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 7070 | * devices to allocate new chunks for relocation, so loop through our |
7027 | mutex_unlock(&root->fs_info->cleaner_mutex); | 7071 | * alloc devices and guess if we have enough space. However, if we |
7072 | * were marked as full, then we know there aren't enough chunks, and we | ||
7073 | * can just return. | ||
7074 | */ | ||
7075 | ret = -1; | ||
7076 | if (full) | ||
7077 | goto out; | ||
7028 | 7078 | ||
7029 | trans = btrfs_start_transaction(info->tree_root, 1); | 7079 | mutex_lock(&root->fs_info->chunk_mutex); |
7030 | btrfs_commit_transaction(trans, info->tree_root); | 7080 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7081 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
7082 | u64 dev_offset, max_avail; | ||
7031 | 7083 | ||
7032 | while (1) { | 7084 | /* |
7033 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 7085 | * check to make sure we can actually find a chunk with enough |
7034 | if (ret < 0) | 7086 | * space to fit our block group in. |
7035 | goto out; | 7087 | */ |
7036 | next: | 7088 | if (device->total_bytes > device->bytes_used + min_free) { |
7037 | leaf = path->nodes[0]; | 7089 | ret = find_free_dev_extent(NULL, device, min_free, |
7038 | nritems = btrfs_header_nritems(leaf); | 7090 | &dev_offset, &max_avail); |
7039 | if (path->slots[0] >= nritems) { | 7091 | if (!ret) |
7040 | ret = btrfs_next_leaf(root, path); | ||
7041 | if (ret < 0) | ||
7042 | goto out; | ||
7043 | if (ret == 1) { | ||
7044 | ret = 0; | ||
7045 | break; | 7092 | break; |
7046 | } | 7093 | ret = -1; |
7047 | leaf = path->nodes[0]; | ||
7048 | nritems = btrfs_header_nritems(leaf); | ||
7049 | } | ||
7050 | |||
7051 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
7052 | |||
7053 | if (key.objectid >= block_group->key.objectid + | ||
7054 | block_group->key.offset) | ||
7055 | break; | ||
7056 | |||
7057 | if (progress && need_resched()) { | ||
7058 | btrfs_release_path(root, path); | ||
7059 | cond_resched(); | ||
7060 | progress = 0; | ||
7061 | continue; | ||
7062 | } | ||
7063 | progress = 1; | ||
7064 | |||
7065 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
7066 | key.objectid + key.offset <= cur_byte) { | ||
7067 | path->slots[0]++; | ||
7068 | goto next; | ||
7069 | } | ||
7070 | |||
7071 | total_found++; | ||
7072 | cur_byte = key.objectid + key.offset; | ||
7073 | btrfs_release_path(root, path); | ||
7074 | |||
7075 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
7076 | ret = relocate_one_extent(root, path, &key, block_group, | ||
7077 | reloc_inode, pass); | ||
7078 | BUG_ON(ret < 0); | ||
7079 | if (ret > 0) | ||
7080 | skipped++; | ||
7081 | |||
7082 | key.objectid = cur_byte; | ||
7083 | key.type = 0; | ||
7084 | key.offset = 0; | ||
7085 | } | ||
7086 | |||
7087 | btrfs_release_path(root, path); | ||
7088 | |||
7089 | if (pass == 0) { | ||
7090 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
7091 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
7092 | } | ||
7093 | |||
7094 | if (total_found > 0) { | ||
7095 | printk(KERN_INFO "btrfs found %llu extents in pass %d\n", | ||
7096 | (unsigned long long)total_found, pass); | ||
7097 | pass++; | ||
7098 | if (total_found == skipped && pass > 2) { | ||
7099 | iput(reloc_inode); | ||
7100 | reloc_inode = create_reloc_inode(info, block_group); | ||
7101 | pass = 0; | ||
7102 | } | 7094 | } |
7103 | goto again; | ||
7104 | } | 7095 | } |
7105 | 7096 | mutex_unlock(&root->fs_info->chunk_mutex); | |
7106 | /* delete reloc_inode */ | ||
7107 | iput(reloc_inode); | ||
7108 | |||
7109 | /* unpin extents in this range */ | ||
7110 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7111 | btrfs_commit_transaction(trans, info->tree_root); | ||
7112 | |||
7113 | spin_lock(&block_group->lock); | ||
7114 | WARN_ON(block_group->pinned > 0); | ||
7115 | WARN_ON(block_group->reserved > 0); | ||
7116 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
7117 | spin_unlock(&block_group->lock); | ||
7118 | btrfs_put_block_group(block_group); | ||
7119 | ret = 0; | ||
7120 | out: | 7097 | out: |
7121 | btrfs_free_path(path); | 7098 | btrfs_put_block_group(block_group); |
7122 | return ret; | 7099 | return ret; |
7123 | } | 7100 | } |
7124 | #endif | ||
7125 | 7101 | ||
7126 | static int find_first_block_group(struct btrfs_root *root, | 7102 | static int find_first_block_group(struct btrfs_root *root, |
7127 | struct btrfs_path *path, struct btrfs_key *key) | 7103 | struct btrfs_path *path, struct btrfs_key *key) |
@@ -7164,8 +7140,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7164 | { | 7140 | { |
7165 | struct btrfs_block_group_cache *block_group; | 7141 | struct btrfs_block_group_cache *block_group; |
7166 | struct btrfs_space_info *space_info; | 7142 | struct btrfs_space_info *space_info; |
7143 | struct btrfs_caching_control *caching_ctl; | ||
7167 | struct rb_node *n; | 7144 | struct rb_node *n; |
7168 | 7145 | ||
7146 | down_write(&info->extent_commit_sem); | ||
7147 | while (!list_empty(&info->caching_block_groups)) { | ||
7148 | caching_ctl = list_entry(info->caching_block_groups.next, | ||
7149 | struct btrfs_caching_control, list); | ||
7150 | list_del(&caching_ctl->list); | ||
7151 | put_caching_control(caching_ctl); | ||
7152 | } | ||
7153 | up_write(&info->extent_commit_sem); | ||
7154 | |||
7169 | spin_lock(&info->block_group_cache_lock); | 7155 | spin_lock(&info->block_group_cache_lock); |
7170 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 7156 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
7171 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 7157 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
@@ -7179,8 +7165,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7179 | up_write(&block_group->space_info->groups_sem); | 7165 | up_write(&block_group->space_info->groups_sem); |
7180 | 7166 | ||
7181 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7167 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7182 | wait_event(block_group->caching_q, | 7168 | wait_block_group_cache_done(block_group); |
7183 | block_group_cache_done(block_group)); | ||
7184 | 7169 | ||
7185 | btrfs_remove_free_space_cache(block_group); | 7170 | btrfs_remove_free_space_cache(block_group); |
7186 | 7171 | ||
@@ -7250,7 +7235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7250 | spin_lock_init(&cache->lock); | 7235 | spin_lock_init(&cache->lock); |
7251 | spin_lock_init(&cache->tree_lock); | 7236 | spin_lock_init(&cache->tree_lock); |
7252 | cache->fs_info = info; | 7237 | cache->fs_info = info; |
7253 | init_waitqueue_head(&cache->caching_q); | ||
7254 | INIT_LIST_HEAD(&cache->list); | 7238 | INIT_LIST_HEAD(&cache->list); |
7255 | INIT_LIST_HEAD(&cache->cluster_list); | 7239 | INIT_LIST_HEAD(&cache->cluster_list); |
7256 | 7240 | ||
@@ -7272,8 +7256,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7272 | cache->flags = btrfs_block_group_flags(&cache->item); | 7256 | cache->flags = btrfs_block_group_flags(&cache->item); |
7273 | cache->sectorsize = root->sectorsize; | 7257 | cache->sectorsize = root->sectorsize; |
7274 | 7258 | ||
7275 | remove_sb_from_cache(root, cache); | ||
7276 | |||
7277 | /* | 7259 | /* |
7278 | * check for two cases, either we are full, and therefore | 7260 | * check for two cases, either we are full, and therefore |
7279 | * don't need to bother with the caching work since we won't | 7261 | * don't need to bother with the caching work since we won't |
@@ -7282,13 +7264,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7282 | * time, particularly in the full case. | 7264 | * time, particularly in the full case. |
7283 | */ | 7265 | */ |
7284 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 7266 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
7267 | exclude_super_stripes(root, cache); | ||
7268 | cache->last_byte_to_unpin = (u64)-1; | ||
7285 | cache->cached = BTRFS_CACHE_FINISHED; | 7269 | cache->cached = BTRFS_CACHE_FINISHED; |
7270 | free_excluded_extents(root, cache); | ||
7286 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 7271 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
7272 | exclude_super_stripes(root, cache); | ||
7273 | cache->last_byte_to_unpin = (u64)-1; | ||
7287 | cache->cached = BTRFS_CACHE_FINISHED; | 7274 | cache->cached = BTRFS_CACHE_FINISHED; |
7288 | add_new_free_space(cache, root->fs_info, | 7275 | add_new_free_space(cache, root->fs_info, |
7289 | found_key.objectid, | 7276 | found_key.objectid, |
7290 | found_key.objectid + | 7277 | found_key.objectid + |
7291 | found_key.offset); | 7278 | found_key.offset); |
7279 | free_excluded_extents(root, cache); | ||
7292 | } | 7280 | } |
7293 | 7281 | ||
7294 | ret = update_space_info(info, cache->flags, found_key.offset, | 7282 | ret = update_space_info(info, cache->flags, found_key.offset, |
@@ -7296,6 +7284,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7296 | &space_info); | 7284 | &space_info); |
7297 | BUG_ON(ret); | 7285 | BUG_ON(ret); |
7298 | cache->space_info = space_info; | 7286 | cache->space_info = space_info; |
7287 | spin_lock(&cache->space_info->lock); | ||
7288 | cache->space_info->bytes_super += cache->bytes_super; | ||
7289 | spin_unlock(&cache->space_info->lock); | ||
7290 | |||
7299 | down_write(&space_info->groups_sem); | 7291 | down_write(&space_info->groups_sem); |
7300 | list_add_tail(&cache->list, &space_info->block_groups); | 7292 | list_add_tail(&cache->list, &space_info->block_groups); |
7301 | up_write(&space_info->groups_sem); | 7293 | up_write(&space_info->groups_sem); |
@@ -7345,7 +7337,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7345 | atomic_set(&cache->count, 1); | 7337 | atomic_set(&cache->count, 1); |
7346 | spin_lock_init(&cache->lock); | 7338 | spin_lock_init(&cache->lock); |
7347 | spin_lock_init(&cache->tree_lock); | 7339 | spin_lock_init(&cache->tree_lock); |
7348 | init_waitqueue_head(&cache->caching_q); | ||
7349 | INIT_LIST_HEAD(&cache->list); | 7340 | INIT_LIST_HEAD(&cache->list); |
7350 | INIT_LIST_HEAD(&cache->cluster_list); | 7341 | INIT_LIST_HEAD(&cache->cluster_list); |
7351 | 7342 | ||
@@ -7354,15 +7345,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7354 | cache->flags = type; | 7345 | cache->flags = type; |
7355 | btrfs_set_block_group_flags(&cache->item, type); | 7346 | btrfs_set_block_group_flags(&cache->item, type); |
7356 | 7347 | ||
7348 | cache->last_byte_to_unpin = (u64)-1; | ||
7357 | cache->cached = BTRFS_CACHE_FINISHED; | 7349 | cache->cached = BTRFS_CACHE_FINISHED; |
7358 | remove_sb_from_cache(root, cache); | 7350 | exclude_super_stripes(root, cache); |
7359 | 7351 | ||
7360 | add_new_free_space(cache, root->fs_info, chunk_offset, | 7352 | add_new_free_space(cache, root->fs_info, chunk_offset, |
7361 | chunk_offset + size); | 7353 | chunk_offset + size); |
7362 | 7354 | ||
7355 | free_excluded_extents(root, cache); | ||
7356 | |||
7363 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7357 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7364 | &cache->space_info); | 7358 | &cache->space_info); |
7365 | BUG_ON(ret); | 7359 | BUG_ON(ret); |
7360 | |||
7361 | spin_lock(&cache->space_info->lock); | ||
7362 | cache->space_info->bytes_super += cache->bytes_super; | ||
7363 | spin_unlock(&cache->space_info->lock); | ||
7364 | |||
7366 | down_write(&cache->space_info->groups_sem); | 7365 | down_write(&cache->space_info->groups_sem); |
7367 | list_add_tail(&cache->list, &cache->space_info->block_groups); | 7366 | list_add_tail(&cache->list, &cache->space_info->block_groups); |
7368 | up_write(&cache->space_info->groups_sem); | 7367 | up_write(&cache->space_info->groups_sem); |
@@ -7428,8 +7427,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7428 | up_write(&block_group->space_info->groups_sem); | 7427 | up_write(&block_group->space_info->groups_sem); |
7429 | 7428 | ||
7430 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7429 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7431 | wait_event(block_group->caching_q, | 7430 | wait_block_group_cache_done(block_group); |
7432 | block_group_cache_done(block_group)); | ||
7433 | 7431 | ||
7434 | btrfs_remove_free_space_cache(block_group); | 7432 | btrfs_remove_free_space_cache(block_group); |
7435 | 7433 | ||