diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 358 | 
1 files changed, 240 insertions, 118 deletions
| diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1d36702ff7..4e1b153b7c47 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | #include "locking.h" | 34 | #include "locking.h" | 
| 35 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" | 
| 36 | 36 | ||
| 37 | #undef SCRAMBLE_DELAYED_REFS | ||
| 38 | |||
| 37 | /* | 39 | /* | 
| 38 | * control flags for do_chunk_alloc's force field | 40 | * control flags for do_chunk_alloc's force field | 
| 39 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | 41 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | 
| @@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2217 | struct btrfs_delayed_ref_node *ref; | 2219 | struct btrfs_delayed_ref_node *ref; | 
| 2218 | struct btrfs_delayed_ref_head *locked_ref = NULL; | 2220 | struct btrfs_delayed_ref_head *locked_ref = NULL; | 
| 2219 | struct btrfs_delayed_extent_op *extent_op; | 2221 | struct btrfs_delayed_extent_op *extent_op; | 
| 2222 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2220 | int ret; | 2223 | int ret; | 
| 2221 | int count = 0; | 2224 | int count = 0; | 
| 2222 | int must_insert_reserved = 0; | 2225 | int must_insert_reserved = 0; | 
| @@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2255 | ref = select_delayed_ref(locked_ref); | 2258 | ref = select_delayed_ref(locked_ref); | 
| 2256 | 2259 | ||
| 2257 | if (ref && ref->seq && | 2260 | if (ref && ref->seq && | 
| 2258 | btrfs_check_delayed_seq(delayed_refs, ref->seq)) { | 2261 | btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { | 
| 2259 | /* | 2262 | /* | 
| 2260 | * there are still refs with lower seq numbers in the | 2263 | * there are still refs with lower seq numbers in the | 
| 2261 | * process of being added. Don't run this ref yet. | 2264 | * process of being added. Don't run this ref yet. | 
| @@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2337 | } | 2340 | } | 
| 2338 | 2341 | ||
| 2339 | next: | 2342 | next: | 
| 2340 | do_chunk_alloc(trans, root->fs_info->extent_root, | 2343 | do_chunk_alloc(trans, fs_info->extent_root, | 
| 2341 | 2 * 1024 * 1024, | 2344 | 2 * 1024 * 1024, | 
| 2342 | btrfs_get_alloc_profile(root, 0), | 2345 | btrfs_get_alloc_profile(root, 0), | 
| 2343 | CHUNK_ALLOC_NO_FORCE); | 2346 | CHUNK_ALLOC_NO_FORCE); | 
| @@ -2347,21 +2350,99 @@ next: | |||
| 2347 | return count; | 2350 | return count; | 
| 2348 | } | 2351 | } | 
| 2349 | 2352 | ||
| 2350 | static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, | 2353 | static void wait_for_more_refs(struct btrfs_fs_info *fs_info, | 
| 2354 | struct btrfs_delayed_ref_root *delayed_refs, | ||
| 2351 | unsigned long num_refs, | 2355 | unsigned long num_refs, | 
| 2352 | struct list_head *first_seq) | 2356 | struct list_head *first_seq) | 
| 2353 | { | 2357 | { | 
| 2354 | spin_unlock(&delayed_refs->lock); | 2358 | spin_unlock(&delayed_refs->lock); | 
| 2355 | pr_debug("waiting for more refs (num %ld, first %p)\n", | 2359 | pr_debug("waiting for more refs (num %ld, first %p)\n", | 
| 2356 | num_refs, first_seq); | 2360 | num_refs, first_seq); | 
| 2357 | wait_event(delayed_refs->seq_wait, | 2361 | wait_event(fs_info->tree_mod_seq_wait, | 
| 2358 | num_refs != delayed_refs->num_entries || | 2362 | num_refs != delayed_refs->num_entries || | 
| 2359 | delayed_refs->seq_head.next != first_seq); | 2363 | fs_info->tree_mod_seq_list.next != first_seq); | 
| 2360 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | 2364 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | 
| 2361 | delayed_refs->num_entries, delayed_refs->seq_head.next); | 2365 | delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); | 
| 2362 | spin_lock(&delayed_refs->lock); | 2366 | spin_lock(&delayed_refs->lock); | 
| 2363 | } | 2367 | } | 
| 2364 | 2368 | ||
| 2369 | #ifdef SCRAMBLE_DELAYED_REFS | ||
| 2370 | /* | ||
| 2371 | * Normally delayed refs get processed in ascending bytenr order. This | ||
| 2372 | * correlates in most cases to the order added. To expose dependencies on this | ||
| 2373 | * order, we start to process the tree in the middle instead of the beginning | ||
| 2374 | */ | ||
| 2375 | static u64 find_middle(struct rb_root *root) | ||
| 2376 | { | ||
| 2377 | struct rb_node *n = root->rb_node; | ||
| 2378 | struct btrfs_delayed_ref_node *entry; | ||
| 2379 | int alt = 1; | ||
| 2380 | u64 middle; | ||
| 2381 | u64 first = 0, last = 0; | ||
| 2382 | |||
| 2383 | n = rb_first(root); | ||
| 2384 | if (n) { | ||
| 2385 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
| 2386 | first = entry->bytenr; | ||
| 2387 | } | ||
| 2388 | n = rb_last(root); | ||
| 2389 | if (n) { | ||
| 2390 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
| 2391 | last = entry->bytenr; | ||
| 2392 | } | ||
| 2393 | n = root->rb_node; | ||
| 2394 | |||
| 2395 | while (n) { | ||
| 2396 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
| 2397 | WARN_ON(!entry->in_tree); | ||
| 2398 | |||
| 2399 | middle = entry->bytenr; | ||
| 2400 | |||
| 2401 | if (alt) | ||
| 2402 | n = n->rb_left; | ||
| 2403 | else | ||
| 2404 | n = n->rb_right; | ||
| 2405 | |||
| 2406 | alt = 1 - alt; | ||
| 2407 | } | ||
| 2408 | return middle; | ||
| 2409 | } | ||
| 2410 | #endif | ||
| 2411 | |||
| 2412 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
| 2413 | struct btrfs_fs_info *fs_info) | ||
| 2414 | { | ||
| 2415 | struct qgroup_update *qgroup_update; | ||
| 2416 | int ret = 0; | ||
| 2417 | |||
| 2418 | if (list_empty(&trans->qgroup_ref_list) != | ||
| 2419 | !trans->delayed_ref_elem.seq) { | ||
| 2420 | /* list without seq or seq without list */ | ||
| 2421 | printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", | ||
| 2422 | list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
| 2423 | trans->delayed_ref_elem.seq); | ||
| 2424 | BUG(); | ||
| 2425 | } | ||
| 2426 | |||
| 2427 | if (!trans->delayed_ref_elem.seq) | ||
| 2428 | return 0; | ||
| 2429 | |||
| 2430 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
| 2431 | qgroup_update = list_first_entry(&trans->qgroup_ref_list, | ||
| 2432 | struct qgroup_update, list); | ||
| 2433 | list_del(&qgroup_update->list); | ||
| 2434 | if (!ret) | ||
| 2435 | ret = btrfs_qgroup_account_ref( | ||
| 2436 | trans, fs_info, qgroup_update->node, | ||
| 2437 | qgroup_update->extent_op); | ||
| 2438 | kfree(qgroup_update); | ||
| 2439 | } | ||
| 2440 | |||
| 2441 | btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
| 2442 | |||
| 2443 | return ret; | ||
| 2444 | } | ||
| 2445 | |||
| 2365 | /* | 2446 | /* | 
| 2366 | * this starts processing the delayed reference count updates and | 2447 | * this starts processing the delayed reference count updates and | 
| 2367 | * extent insertions we have queued up so far. count can be | 2448 | * extent insertions we have queued up so far. count can be | 
| @@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2398 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | 2479 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | 
| 2399 | CHUNK_ALLOC_NO_FORCE); | 2480 | CHUNK_ALLOC_NO_FORCE); | 
| 2400 | 2481 | ||
| 2482 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 2483 | |||
| 2401 | delayed_refs = &trans->transaction->delayed_refs; | 2484 | delayed_refs = &trans->transaction->delayed_refs; | 
| 2402 | INIT_LIST_HEAD(&cluster); | 2485 | INIT_LIST_HEAD(&cluster); | 
| 2403 | again: | 2486 | again: | 
| 2404 | consider_waiting = 0; | 2487 | consider_waiting = 0; | 
| 2405 | spin_lock(&delayed_refs->lock); | 2488 | spin_lock(&delayed_refs->lock); | 
| 2489 | |||
| 2490 | #ifdef SCRAMBLE_DELAYED_REFS | ||
| 2491 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); | ||
| 2492 | #endif | ||
| 2493 | |||
| 2406 | if (count == 0) { | 2494 | if (count == 0) { | 
| 2407 | count = delayed_refs->num_entries * 2; | 2495 | count = delayed_refs->num_entries * 2; | 
| 2408 | run_most = 1; | 2496 | run_most = 1; | 
| @@ -2437,7 +2525,7 @@ again: | |||
| 2437 | num_refs = delayed_refs->num_entries; | 2525 | num_refs = delayed_refs->num_entries; | 
| 2438 | first_seq = root->fs_info->tree_mod_seq_list.next; | 2526 | first_seq = root->fs_info->tree_mod_seq_list.next; | 
| 2439 | } else { | 2527 | } else { | 
| 2440 | wait_for_more_refs(delayed_refs, | 2528 | wait_for_more_refs(root->fs_info, delayed_refs, | 
| 2441 | num_refs, first_seq); | 2529 | num_refs, first_seq); | 
| 2442 | /* | 2530 | /* | 
| 2443 | * after waiting, things have changed. we | 2531 | * after waiting, things have changed. we | 
| @@ -2502,6 +2590,7 @@ again: | |||
| 2502 | } | 2590 | } | 
| 2503 | out: | 2591 | out: | 
| 2504 | spin_unlock(&delayed_refs->lock); | 2592 | spin_unlock(&delayed_refs->lock); | 
| 2593 | assert_qgroups_uptodate(trans); | ||
| 2505 | return 0; | 2594 | return 0; | 
| 2506 | } | 2595 | } | 
| 2507 | 2596 | ||
| @@ -2581,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, | |||
| 2581 | 2670 | ||
| 2582 | node = rb_prev(node); | 2671 | node = rb_prev(node); | 
| 2583 | if (node) { | 2672 | if (node) { | 
| 2673 | int seq = ref->seq; | ||
| 2674 | |||
| 2584 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 2675 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 
| 2585 | if (ref->bytenr == bytenr) | 2676 | if (ref->bytenr == bytenr && ref->seq == seq) | 
| 2586 | goto out_unlock; | 2677 | goto out_unlock; | 
| 2587 | } | 2678 | } | 
| 2588 | 2679 | ||
| @@ -2903,8 +2994,13 @@ again: | |||
| 2903 | } | 2994 | } | 
| 2904 | 2995 | ||
| 2905 | spin_lock(&block_group->lock); | 2996 | spin_lock(&block_group->lock); | 
| 2906 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | 2997 | if (block_group->cached != BTRFS_CACHE_FINISHED || | 
| 2907 | /* We're not cached, don't bother trying to write stuff out */ | 2998 | !btrfs_test_opt(root, SPACE_CACHE)) { | 
| 2999 | /* | ||
| 3000 | * don't bother trying to write stuff out _if_ | ||
| 3001 | * a) we're not cached, | ||
| 3002 | * b) we're with nospace_cache mount option. | ||
| 3003 | */ | ||
| 2908 | dcs = BTRFS_DC_WRITTEN; | 3004 | dcs = BTRFS_DC_WRITTEN; | 
| 2909 | spin_unlock(&block_group->lock); | 3005 | spin_unlock(&block_group->lock); | 
| 2910 | goto out_put; | 3006 | goto out_put; | 
| @@ -3134,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 3134 | init_waitqueue_head(&found->wait); | 3230 | init_waitqueue_head(&found->wait); | 
| 3135 | *space_info = found; | 3231 | *space_info = found; | 
| 3136 | list_add_rcu(&found->list, &info->space_info); | 3232 | list_add_rcu(&found->list, &info->space_info); | 
| 3233 | if (flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 3234 | info->data_sinfo = found; | ||
| 3137 | return 0; | 3235 | return 0; | 
| 3138 | } | 3236 | } | 
| 3139 | 3237 | ||
| @@ -3263,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
| 3263 | return get_alloc_profile(root, flags); | 3361 | return get_alloc_profile(root, flags); | 
| 3264 | } | 3362 | } | 
| 3265 | 3363 | ||
| 3266 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
| 3267 | { | ||
| 3268 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
| 3269 | BTRFS_BLOCK_GROUP_DATA); | ||
| 3270 | } | ||
| 3271 | |||
| 3272 | /* | 3364 | /* | 
| 3273 | * This will check the space that the inode allocates from to make sure we have | 3365 | * This will check the space that the inode allocates from to make sure we have | 
| 3274 | * enough space for bytes. | 3366 | * enough space for bytes. | 
| @@ -3277,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
| 3277 | { | 3369 | { | 
| 3278 | struct btrfs_space_info *data_sinfo; | 3370 | struct btrfs_space_info *data_sinfo; | 
| 3279 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3371 | struct btrfs_root *root = BTRFS_I(inode)->root; | 
| 3372 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3280 | u64 used; | 3373 | u64 used; | 
| 3281 | int ret = 0, committed = 0, alloc_chunk = 1; | 3374 | int ret = 0, committed = 0, alloc_chunk = 1; | 
| 3282 | 3375 | ||
| @@ -3289,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
| 3289 | committed = 1; | 3382 | committed = 1; | 
| 3290 | } | 3383 | } | 
| 3291 | 3384 | ||
| 3292 | data_sinfo = BTRFS_I(inode)->space_info; | 3385 | data_sinfo = fs_info->data_sinfo; | 
| 3293 | if (!data_sinfo) | 3386 | if (!data_sinfo) | 
| 3294 | goto alloc; | 3387 | goto alloc; | 
| 3295 | 3388 | ||
| @@ -3330,10 +3423,9 @@ alloc: | |||
| 3330 | goto commit_trans; | 3423 | goto commit_trans; | 
| 3331 | } | 3424 | } | 
| 3332 | 3425 | ||
| 3333 | if (!data_sinfo) { | 3426 | if (!data_sinfo) | 
| 3334 | btrfs_set_inode_space_info(root, inode); | 3427 | data_sinfo = fs_info->data_sinfo; | 
| 3335 | data_sinfo = BTRFS_I(inode)->space_info; | 3428 | |
| 3336 | } | ||
| 3337 | goto again; | 3429 | goto again; | 
| 3338 | } | 3430 | } | 
| 3339 | 3431 | ||
| @@ -3380,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
| 3380 | /* make sure bytes are sectorsize aligned */ | 3472 | /* make sure bytes are sectorsize aligned */ | 
| 3381 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3473 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 
| 3382 | 3474 | ||
| 3383 | data_sinfo = BTRFS_I(inode)->space_info; | 3475 | data_sinfo = root->fs_info->data_sinfo; | 
| 3384 | spin_lock(&data_sinfo->lock); | 3476 | spin_lock(&data_sinfo->lock); | 
| 3385 | data_sinfo->bytes_may_use -= bytes; | 3477 | data_sinfo->bytes_may_use -= bytes; | 
| 3386 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 3478 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 
| @@ -3586,89 +3678,58 @@ out: | |||
| 3586 | /* | 3678 | /* | 
| 3587 | * shrink metadata reservation for delalloc | 3679 | * shrink metadata reservation for delalloc | 
| 3588 | */ | 3680 | */ | 
| 3589 | static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, | 3681 | static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | 
| 3590 | bool wait_ordered) | 3682 | bool wait_ordered) | 
| 3591 | { | 3683 | { | 
| 3592 | struct btrfs_block_rsv *block_rsv; | 3684 | struct btrfs_block_rsv *block_rsv; | 
| 3593 | struct btrfs_space_info *space_info; | 3685 | struct btrfs_space_info *space_info; | 
| 3594 | struct btrfs_trans_handle *trans; | 3686 | struct btrfs_trans_handle *trans; | 
| 3595 | u64 reserved; | 3687 | u64 delalloc_bytes; | 
| 3596 | u64 max_reclaim; | 3688 | u64 max_reclaim; | 
| 3597 | u64 reclaimed = 0; | ||
| 3598 | long time_left; | 3689 | long time_left; | 
| 3599 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3690 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 
| 3600 | int loops = 0; | 3691 | int loops = 0; | 
| 3601 | unsigned long progress; | ||
| 3602 | 3692 | ||
| 3603 | trans = (struct btrfs_trans_handle *)current->journal_info; | 3693 | trans = (struct btrfs_trans_handle *)current->journal_info; | 
| 3604 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3694 | block_rsv = &root->fs_info->delalloc_block_rsv; | 
| 3605 | space_info = block_rsv->space_info; | 3695 | space_info = block_rsv->space_info; | 
| 3606 | 3696 | ||
| 3607 | smp_mb(); | 3697 | smp_mb(); | 
| 3608 | reserved = space_info->bytes_may_use; | 3698 | delalloc_bytes = root->fs_info->delalloc_bytes; | 
| 3609 | progress = space_info->reservation_progress; | 3699 | if (delalloc_bytes == 0) { | 
| 3610 | |||
| 3611 | if (reserved == 0) | ||
| 3612 | return 0; | ||
| 3613 | |||
| 3614 | smp_mb(); | ||
| 3615 | if (root->fs_info->delalloc_bytes == 0) { | ||
| 3616 | if (trans) | 3700 | if (trans) | 
| 3617 | return 0; | 3701 | return; | 
| 3618 | btrfs_wait_ordered_extents(root, 0, 0); | 3702 | btrfs_wait_ordered_extents(root, 0, 0); | 
| 3619 | return 0; | 3703 | return; | 
| 3620 | } | 3704 | } | 
| 3621 | 3705 | ||
| 3622 | max_reclaim = min(reserved, to_reclaim); | 3706 | while (delalloc_bytes && loops < 3) { | 
| 3623 | nr_pages = max_t(unsigned long, nr_pages, | 3707 | max_reclaim = min(delalloc_bytes, to_reclaim); | 
| 3624 | max_reclaim >> PAGE_CACHE_SHIFT); | 3708 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 
| 3625 | while (loops < 1024) { | ||
| 3626 | /* have the flusher threads jump in and do some IO */ | ||
| 3627 | smp_mb(); | ||
| 3628 | nr_pages = min_t(unsigned long, nr_pages, | ||
| 3629 | root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); | ||
| 3630 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 3709 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 
| 3631 | WB_REASON_FS_FREE_SPACE); | 3710 | WB_REASON_FS_FREE_SPACE); | 
| 3632 | 3711 | ||
| 3633 | spin_lock(&space_info->lock); | 3712 | spin_lock(&space_info->lock); | 
| 3634 | if (reserved > space_info->bytes_may_use) | 3713 | if (space_info->bytes_used + space_info->bytes_reserved + | 
| 3635 | reclaimed += reserved - space_info->bytes_may_use; | 3714 | space_info->bytes_pinned + space_info->bytes_readonly + | 
| 3636 | reserved = space_info->bytes_may_use; | 3715 | space_info->bytes_may_use + orig <= | 
| 3716 | space_info->total_bytes) { | ||
| 3717 | spin_unlock(&space_info->lock); | ||
| 3718 | break; | ||
| 3719 | } | ||
| 3637 | spin_unlock(&space_info->lock); | 3720 | spin_unlock(&space_info->lock); | 
| 3638 | 3721 | ||
| 3639 | loops++; | 3722 | loops++; | 
| 3640 | |||
| 3641 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
| 3642 | break; | ||
| 3643 | |||
| 3644 | if (trans && trans->transaction->blocked) | ||
| 3645 | return -EAGAIN; | ||
| 3646 | |||
| 3647 | if (wait_ordered && !trans) { | 3723 | if (wait_ordered && !trans) { | 
| 3648 | btrfs_wait_ordered_extents(root, 0, 0); | 3724 | btrfs_wait_ordered_extents(root, 0, 0); | 
| 3649 | } else { | 3725 | } else { | 
| 3650 | time_left = schedule_timeout_interruptible(1); | 3726 | time_left = schedule_timeout_killable(1); | 
| 3651 | |||
| 3652 | /* We were interrupted, exit */ | ||
| 3653 | if (time_left) | 3727 | if (time_left) | 
| 3654 | break; | 3728 | break; | 
| 3655 | } | 3729 | } | 
| 3656 | 3730 | smp_mb(); | |
| 3657 | /* we've kicked the IO a few times, if anything has been freed, | 3731 | delalloc_bytes = root->fs_info->delalloc_bytes; | 
| 3658 | * exit. There is no sense in looping here for a long time | ||
| 3659 | * when we really need to commit the transaction, or there are | ||
| 3660 | * just too many writers without enough free space | ||
| 3661 | */ | ||
| 3662 | |||
| 3663 | if (loops > 3) { | ||
| 3664 | smp_mb(); | ||
| 3665 | if (progress != space_info->reservation_progress) | ||
| 3666 | break; | ||
| 3667 | } | ||
| 3668 | |||
| 3669 | } | 3732 | } | 
| 3670 | |||
| 3671 | return reclaimed >= to_reclaim; | ||
| 3672 | } | 3733 | } | 
| 3673 | 3734 | ||
| 3674 | /** | 3735 | /** | 
| @@ -3728,6 +3789,58 @@ commit: | |||
| 3728 | return btrfs_commit_transaction(trans, root); | 3789 | return btrfs_commit_transaction(trans, root); | 
| 3729 | } | 3790 | } | 
| 3730 | 3791 | ||
| 3792 | enum flush_state { | ||
| 3793 | FLUSH_DELALLOC = 1, | ||
| 3794 | FLUSH_DELALLOC_WAIT = 2, | ||
| 3795 | FLUSH_DELAYED_ITEMS_NR = 3, | ||
| 3796 | FLUSH_DELAYED_ITEMS = 4, | ||
| 3797 | COMMIT_TRANS = 5, | ||
| 3798 | }; | ||
| 3799 | |||
| 3800 | static int flush_space(struct btrfs_root *root, | ||
| 3801 | struct btrfs_space_info *space_info, u64 num_bytes, | ||
| 3802 | u64 orig_bytes, int state) | ||
| 3803 | { | ||
| 3804 | struct btrfs_trans_handle *trans; | ||
| 3805 | int nr; | ||
| 3806 | int ret = 0; | ||
| 3807 | |||
| 3808 | switch (state) { | ||
| 3809 | case FLUSH_DELALLOC: | ||
| 3810 | case FLUSH_DELALLOC_WAIT: | ||
| 3811 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
| 3812 | state == FLUSH_DELALLOC_WAIT); | ||
| 3813 | break; | ||
| 3814 | case FLUSH_DELAYED_ITEMS_NR: | ||
| 3815 | case FLUSH_DELAYED_ITEMS: | ||
| 3816 | if (state == FLUSH_DELAYED_ITEMS_NR) { | ||
| 3817 | u64 bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
| 3818 | |||
| 3819 | nr = (int)div64_u64(num_bytes, bytes); | ||
| 3820 | if (!nr) | ||
| 3821 | nr = 1; | ||
| 3822 | nr *= 2; | ||
| 3823 | } else { | ||
| 3824 | nr = -1; | ||
| 3825 | } | ||
| 3826 | trans = btrfs_join_transaction(root); | ||
| 3827 | if (IS_ERR(trans)) { | ||
| 3828 | ret = PTR_ERR(trans); | ||
| 3829 | break; | ||
| 3830 | } | ||
| 3831 | ret = btrfs_run_delayed_items_nr(trans, root, nr); | ||
| 3832 | btrfs_end_transaction(trans, root); | ||
| 3833 | break; | ||
| 3834 | case COMMIT_TRANS: | ||
| 3835 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | ||
| 3836 | break; | ||
| 3837 | default: | ||
| 3838 | ret = -ENOSPC; | ||
| 3839 | break; | ||
| 3840 | } | ||
| 3841 | |||
| 3842 | return ret; | ||
| 3843 | } | ||
| 3731 | /** | 3844 | /** | 
| 3732 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 3845 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 
| 3733 | * @root - the root we're allocating for | 3846 | * @root - the root we're allocating for | 
| @@ -3749,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root, | |||
| 3749 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3862 | struct btrfs_space_info *space_info = block_rsv->space_info; | 
| 3750 | u64 used; | 3863 | u64 used; | 
| 3751 | u64 num_bytes = orig_bytes; | 3864 | u64 num_bytes = orig_bytes; | 
| 3752 | int retries = 0; | 3865 | int flush_state = FLUSH_DELALLOC; | 
| 3753 | int ret = 0; | 3866 | int ret = 0; | 
| 3754 | bool committed = false; | ||
| 3755 | bool flushing = false; | 3867 | bool flushing = false; | 
| 3756 | bool wait_ordered = false; | 3868 | bool committed = false; | 
| 3757 | 3869 | ||
| 3758 | again: | 3870 | again: | 
| 3759 | ret = 0; | 3871 | ret = 0; | 
| @@ -3812,9 +3924,8 @@ again: | |||
| 3812 | * amount plus the amount of bytes that we need for this | 3924 | * amount plus the amount of bytes that we need for this | 
| 3813 | * reservation. | 3925 | * reservation. | 
| 3814 | */ | 3926 | */ | 
| 3815 | wait_ordered = true; | ||
| 3816 | num_bytes = used - space_info->total_bytes + | 3927 | num_bytes = used - space_info->total_bytes + | 
| 3817 | (orig_bytes * (retries + 1)); | 3928 | (orig_bytes * 2); | 
| 3818 | } | 3929 | } | 
| 3819 | 3930 | ||
| 3820 | if (ret) { | 3931 | if (ret) { | 
| @@ -3867,8 +3978,6 @@ again: | |||
| 3867 | trace_btrfs_space_reservation(root->fs_info, | 3978 | trace_btrfs_space_reservation(root->fs_info, | 
| 3868 | "space_info", space_info->flags, orig_bytes, 1); | 3979 | "space_info", space_info->flags, orig_bytes, 1); | 
| 3869 | ret = 0; | 3980 | ret = 0; | 
| 3870 | } else { | ||
| 3871 | wait_ordered = true; | ||
| 3872 | } | 3981 | } | 
| 3873 | } | 3982 | } | 
| 3874 | 3983 | ||
| @@ -3887,36 +3996,13 @@ again: | |||
| 3887 | if (!ret || !flush) | 3996 | if (!ret || !flush) | 
| 3888 | goto out; | 3997 | goto out; | 
| 3889 | 3998 | ||
| 3890 | /* | 3999 | ret = flush_space(root, space_info, num_bytes, orig_bytes, | 
| 3891 | * We do synchronous shrinking since we don't actually unreserve | 4000 | flush_state); | 
| 3892 | * metadata until after the IO is completed. | 4001 | flush_state++; | 
| 3893 | */ | 4002 | if (!ret) | 
| 3894 | ret = shrink_delalloc(root, num_bytes, wait_ordered); | ||
| 3895 | if (ret < 0) | ||
| 3896 | goto out; | ||
| 3897 | |||
| 3898 | ret = 0; | ||
| 3899 | |||
| 3900 | /* | ||
| 3901 | * So if we were overcommitted it's possible that somebody else flushed | ||
| 3902 | * out enough space and we simply didn't have enough space to reclaim, | ||
| 3903 | * so go back around and try again. | ||
| 3904 | */ | ||
| 3905 | if (retries < 2) { | ||
| 3906 | wait_ordered = true; | ||
| 3907 | retries++; | ||
| 3908 | goto again; | 4003 | goto again; | 
| 3909 | } | 4004 | else if (flush_state <= COMMIT_TRANS) | 
| 3910 | |||
| 3911 | ret = -ENOSPC; | ||
| 3912 | if (committed) | ||
| 3913 | goto out; | ||
| 3914 | |||
| 3915 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | ||
| 3916 | if (!ret) { | ||
| 3917 | committed = true; | ||
| 3918 | goto again; | 4005 | goto again; | 
| 3919 | } | ||
| 3920 | 4006 | ||
| 3921 | out: | 4007 | out: | 
| 3922 | if (flushing) { | 4008 | if (flushing) { | 
| @@ -3934,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv( | |||
| 3934 | { | 4020 | { | 
| 3935 | struct btrfs_block_rsv *block_rsv = NULL; | 4021 | struct btrfs_block_rsv *block_rsv = NULL; | 
| 3936 | 4022 | ||
| 3937 | if (root->ref_cows || root == root->fs_info->csum_root) | 4023 | if (root->ref_cows) | 
| 4024 | block_rsv = trans->block_rsv; | ||
| 4025 | |||
| 4026 | if (root == root->fs_info->csum_root && trans->adding_csums) | ||
| 3938 | block_rsv = trans->block_rsv; | 4027 | block_rsv = trans->block_rsv; | 
| 3939 | 4028 | ||
| 3940 | if (!block_rsv) | 4029 | if (!block_rsv) | 
| @@ -4286,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 4286 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 4375 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 
| 4287 | struct btrfs_root *root) | 4376 | struct btrfs_root *root) | 
| 4288 | { | 4377 | { | 
| 4378 | if (!trans->block_rsv) | ||
| 4379 | return; | ||
| 4380 | |||
| 4289 | if (!trans->bytes_reserved) | 4381 | if (!trans->bytes_reserved) | 
| 4290 | return; | 4382 | return; | 
| 4291 | 4383 | ||
| @@ -4444,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4444 | int ret; | 4536 | int ret; | 
| 4445 | 4537 | ||
| 4446 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 4538 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 
| 4447 | if (btrfs_is_free_space_inode(root, inode)) | 4539 | if (btrfs_is_free_space_inode(inode)) | 
| 4448 | flush = 0; | 4540 | flush = 0; | 
| 4449 | 4541 | ||
| 4450 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4542 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 
| @@ -4476,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4476 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 4568 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 
| 4477 | spin_unlock(&BTRFS_I(inode)->lock); | 4569 | spin_unlock(&BTRFS_I(inode)->lock); | 
| 4478 | 4570 | ||
| 4571 | if (root->fs_info->quota_enabled) { | ||
| 4572 | ret = btrfs_qgroup_reserve(root, num_bytes + | ||
| 4573 | nr_extents * root->leafsize); | ||
| 4574 | if (ret) | ||
| 4575 | return ret; | ||
| 4576 | } | ||
| 4577 | |||
| 4479 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4578 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 
| 4480 | if (ret) { | 4579 | if (ret) { | 
| 4481 | u64 to_free = 0; | 4580 | u64 to_free = 0; | 
| @@ -4554,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
| 4554 | 4653 | ||
| 4555 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 4654 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 
| 4556 | btrfs_ino(inode), to_free, 0); | 4655 | btrfs_ino(inode), to_free, 0); | 
| 4656 | if (root->fs_info->quota_enabled) { | ||
| 4657 | btrfs_qgroup_free(root, num_bytes + | ||
| 4658 | dropped * root->leafsize); | ||
| 4659 | } | ||
| 4660 | |||
| 4557 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4661 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 
| 4558 | to_free); | 4662 | to_free); | 
| 4559 | } | 4663 | } | 
| @@ -5190,8 +5294,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 5190 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 5294 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 
| 5191 | 5295 | ||
| 5192 | delayed_refs->num_entries--; | 5296 | delayed_refs->num_entries--; | 
| 5193 | if (waitqueue_active(&delayed_refs->seq_wait)) | 5297 | smp_mb(); | 
| 5194 | wake_up(&delayed_refs->seq_wait); | 5298 | if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) | 
| 5299 | wake_up(&root->fs_info->tree_mod_seq_wait); | ||
| 5195 | 5300 | ||
| 5196 | /* | 5301 | /* | 
| 5197 | * we don't take a ref on the node because we're removing it from the | 5302 | * we don't take a ref on the node because we're removing it from the | 
| @@ -5748,7 +5853,11 @@ loop: | |||
| 5748 | ret = do_chunk_alloc(trans, root, num_bytes + | 5853 | ret = do_chunk_alloc(trans, root, num_bytes + | 
| 5749 | 2 * 1024 * 1024, data, | 5854 | 2 * 1024 * 1024, data, | 
| 5750 | CHUNK_ALLOC_LIMITED); | 5855 | CHUNK_ALLOC_LIMITED); | 
| 5751 | if (ret < 0) { | 5856 | /* | 
| 5857 | * Do not bail out on ENOSPC since we | ||
| 5858 | * can do more things. | ||
| 5859 | */ | ||
| 5860 | if (ret < 0 && ret != -ENOSPC) { | ||
| 5752 | btrfs_abort_transaction(trans, | 5861 | btrfs_abort_transaction(trans, | 
| 5753 | root, ret); | 5862 | root, ret); | 
| 5754 | goto out; | 5863 | goto out; | 
| @@ -5816,13 +5925,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 5816 | again: | 5925 | again: | 
| 5817 | list_for_each_entry(cache, &info->block_groups[index], list) { | 5926 | list_for_each_entry(cache, &info->block_groups[index], list) { | 
| 5818 | spin_lock(&cache->lock); | 5927 | spin_lock(&cache->lock); | 
| 5819 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 5928 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", | 
| 5820 | "%llu pinned %llu reserved\n", | ||
| 5821 | (unsigned long long)cache->key.objectid, | 5929 | (unsigned long long)cache->key.objectid, | 
| 5822 | (unsigned long long)cache->key.offset, | 5930 | (unsigned long long)cache->key.offset, | 
| 5823 | (unsigned long long)btrfs_block_group_used(&cache->item), | 5931 | (unsigned long long)btrfs_block_group_used(&cache->item), | 
| 5824 | (unsigned long long)cache->pinned, | 5932 | (unsigned long long)cache->pinned, | 
| 5825 | (unsigned long long)cache->reserved); | 5933 | (unsigned long long)cache->reserved, | 
| 5934 | cache->ro ? "[readonly]" : ""); | ||
| 5826 | btrfs_dump_free_space(cache, bytes); | 5935 | btrfs_dump_free_space(cache, bytes); | 
| 5827 | spin_unlock(&cache->lock); | 5936 | spin_unlock(&cache->lock); | 
| 5828 | } | 5937 | } | 
| @@ -7610,8 +7719,21 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7610 | INIT_LIST_HEAD(&cache->list); | 7719 | INIT_LIST_HEAD(&cache->list); | 
| 7611 | INIT_LIST_HEAD(&cache->cluster_list); | 7720 | INIT_LIST_HEAD(&cache->cluster_list); | 
| 7612 | 7721 | ||
| 7613 | if (need_clear) | 7722 | if (need_clear) { | 
| 7723 | /* | ||
| 7724 | * When we mount with old space cache, we need to | ||
| 7725 | * set BTRFS_DC_CLEAR and set dirty flag. | ||
| 7726 | * | ||
| 7727 | * a) Setting 'BTRFS_DC_CLEAR' makes sure that we | ||
| 7728 | * truncate the old free space cache inode and | ||
| 7729 | * setup a new one. | ||
| 7730 | * b) Setting 'dirty flag' makes sure that we flush | ||
| 7731 | * the new space cache info onto disk. | ||
| 7732 | */ | ||
| 7614 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 7733 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 
| 7734 | if (btrfs_test_opt(root, SPACE_CACHE)) | ||
| 7735 | cache->dirty = 1; | ||
| 7736 | } | ||
| 7615 | 7737 | ||
| 7616 | read_extent_buffer(leaf, &cache->item, | 7738 | read_extent_buffer(leaf, &cache->item, | 
| 7617 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 7739 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 
