diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 123 |
1 files changed, 58 insertions, 65 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e1b153b7c47..ba58024d40d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2252 | } | 2252 | } |
2253 | 2253 | ||
2254 | /* | 2254 | /* |
2255 | * We need to try and merge add/drops of the same ref since we | ||
2256 | * can run into issues with relocate dropping the implicit ref | ||
2257 | * and then it being added back again before the drop can | ||
2258 | * finish. If we merged anything we need to re-loop so we can | ||
2259 | * get a good ref. | ||
2260 | */ | ||
2261 | btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, | ||
2262 | locked_ref); | ||
2263 | |||
2264 | /* | ||
2255 | * locked_ref is the head node, so we have to go one | 2265 | * locked_ref is the head node, so we have to go one |
2256 | * node back for any delayed ref updates | 2266 | * node back for any delayed ref updates |
2257 | */ | 2267 | */ |
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2318 | ref->in_tree = 0; | 2328 | ref->in_tree = 0; |
2319 | rb_erase(&ref->rb_node, &delayed_refs->root); | 2329 | rb_erase(&ref->rb_node, &delayed_refs->root); |
2320 | delayed_refs->num_entries--; | 2330 | delayed_refs->num_entries--; |
2321 | /* | 2331 | if (locked_ref) { |
2322 | * we modified num_entries, but as we're currently running | 2332 | /* |
2323 | * delayed refs, skip | 2333 | * when we play the delayed ref, also correct the |
2324 | * wake_up(&delayed_refs->seq_wait); | 2334 | * ref_mod on head |
2325 | * here. | 2335 | */ |
2326 | */ | 2336 | switch (ref->action) { |
2337 | case BTRFS_ADD_DELAYED_REF: | ||
2338 | case BTRFS_ADD_DELAYED_EXTENT: | ||
2339 | locked_ref->node.ref_mod -= ref->ref_mod; | ||
2340 | break; | ||
2341 | case BTRFS_DROP_DELAYED_REF: | ||
2342 | locked_ref->node.ref_mod += ref->ref_mod; | ||
2343 | break; | ||
2344 | default: | ||
2345 | WARN_ON(1); | ||
2346 | } | ||
2347 | } | ||
2327 | spin_unlock(&delayed_refs->lock); | 2348 | spin_unlock(&delayed_refs->lock); |
2328 | 2349 | ||
2329 | ret = run_one_delayed_ref(trans, root, ref, extent_op, | 2350 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
@@ -2350,22 +2371,6 @@ next: | |||
2350 | return count; | 2371 | return count; |
2351 | } | 2372 | } |
2352 | 2373 | ||
2353 | static void wait_for_more_refs(struct btrfs_fs_info *fs_info, | ||
2354 | struct btrfs_delayed_ref_root *delayed_refs, | ||
2355 | unsigned long num_refs, | ||
2356 | struct list_head *first_seq) | ||
2357 | { | ||
2358 | spin_unlock(&delayed_refs->lock); | ||
2359 | pr_debug("waiting for more refs (num %ld, first %p)\n", | ||
2360 | num_refs, first_seq); | ||
2361 | wait_event(fs_info->tree_mod_seq_wait, | ||
2362 | num_refs != delayed_refs->num_entries || | ||
2363 | fs_info->tree_mod_seq_list.next != first_seq); | ||
2364 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | ||
2365 | delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); | ||
2366 | spin_lock(&delayed_refs->lock); | ||
2367 | } | ||
2368 | |||
2369 | #ifdef SCRAMBLE_DELAYED_REFS | 2374 | #ifdef SCRAMBLE_DELAYED_REFS |
2370 | /* | 2375 | /* |
2371 | * Normally delayed refs get processed in ascending bytenr order. This | 2376 | * Normally delayed refs get processed in ascending bytenr order. This |
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2460 | struct btrfs_delayed_ref_root *delayed_refs; | 2465 | struct btrfs_delayed_ref_root *delayed_refs; |
2461 | struct btrfs_delayed_ref_node *ref; | 2466 | struct btrfs_delayed_ref_node *ref; |
2462 | struct list_head cluster; | 2467 | struct list_head cluster; |
2463 | struct list_head *first_seq = NULL; | ||
2464 | int ret; | 2468 | int ret; |
2465 | u64 delayed_start; | 2469 | u64 delayed_start; |
2466 | int run_all = count == (unsigned long)-1; | 2470 | int run_all = count == (unsigned long)-1; |
2467 | int run_most = 0; | 2471 | int run_most = 0; |
2468 | unsigned long num_refs = 0; | 2472 | int loops; |
2469 | int consider_waiting; | ||
2470 | 2473 | ||
2471 | /* We'll clean this up in btrfs_cleanup_transaction */ | 2474 | /* We'll clean this up in btrfs_cleanup_transaction */ |
2472 | if (trans->aborted) | 2475 | if (trans->aborted) |
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2484 | delayed_refs = &trans->transaction->delayed_refs; | 2487 | delayed_refs = &trans->transaction->delayed_refs; |
2485 | INIT_LIST_HEAD(&cluster); | 2488 | INIT_LIST_HEAD(&cluster); |
2486 | again: | 2489 | again: |
2487 | consider_waiting = 0; | 2490 | loops = 0; |
2488 | spin_lock(&delayed_refs->lock); | 2491 | spin_lock(&delayed_refs->lock); |
2489 | 2492 | ||
2490 | #ifdef SCRAMBLE_DELAYED_REFS | 2493 | #ifdef SCRAMBLE_DELAYED_REFS |
@@ -2512,31 +2515,6 @@ again: | |||
2512 | if (ret) | 2515 | if (ret) |
2513 | break; | 2516 | break; |
2514 | 2517 | ||
2515 | if (delayed_start >= delayed_refs->run_delayed_start) { | ||
2516 | if (consider_waiting == 0) { | ||
2517 | /* | ||
2518 | * btrfs_find_ref_cluster looped. let's do one | ||
2519 | * more cycle. if we don't run any delayed ref | ||
2520 | * during that cycle (because we can't because | ||
2521 | * all of them are blocked) and if the number of | ||
2522 | * refs doesn't change, we avoid busy waiting. | ||
2523 | */ | ||
2524 | consider_waiting = 1; | ||
2525 | num_refs = delayed_refs->num_entries; | ||
2526 | first_seq = root->fs_info->tree_mod_seq_list.next; | ||
2527 | } else { | ||
2528 | wait_for_more_refs(root->fs_info, delayed_refs, | ||
2529 | num_refs, first_seq); | ||
2530 | /* | ||
2531 | * after waiting, things have changed. we | ||
2532 | * dropped the lock and someone else might have | ||
2533 | * run some refs, built new clusters and so on. | ||
2534 | * therefore, we restart staleness detection. | ||
2535 | */ | ||
2536 | consider_waiting = 0; | ||
2537 | } | ||
2538 | } | ||
2539 | |||
2540 | ret = run_clustered_refs(trans, root, &cluster); | 2518 | ret = run_clustered_refs(trans, root, &cluster); |
2541 | if (ret < 0) { | 2519 | if (ret < 0) { |
2542 | spin_unlock(&delayed_refs->lock); | 2520 | spin_unlock(&delayed_refs->lock); |
@@ -2549,9 +2527,26 @@ again: | |||
2549 | if (count == 0) | 2527 | if (count == 0) |
2550 | break; | 2528 | break; |
2551 | 2529 | ||
2552 | if (ret || delayed_refs->run_delayed_start == 0) { | 2530 | if (delayed_start >= delayed_refs->run_delayed_start) { |
2531 | if (loops == 0) { | ||
2532 | /* | ||
2533 | * btrfs_find_ref_cluster looped. let's do one | ||
2534 | * more cycle. if we don't run any delayed ref | ||
2535 | * during that cycle (because we can't because | ||
2536 | * all of them are blocked), bail out. | ||
2537 | */ | ||
2538 | loops = 1; | ||
2539 | } else { | ||
2540 | /* | ||
2541 | * no runnable refs left, stop trying | ||
2542 | */ | ||
2543 | BUG_ON(run_all); | ||
2544 | break; | ||
2545 | } | ||
2546 | } | ||
2547 | if (ret) { | ||
2553 | /* refs were run, let's reset staleness detection */ | 2548 | /* refs were run, let's reset staleness detection */ |
2554 | consider_waiting = 0; | 2549 | loops = 0; |
2555 | } | 2550 | } |
2556 | } | 2551 | } |
2557 | 2552 | ||
@@ -3007,17 +3002,16 @@ again: | |||
3007 | } | 3002 | } |
3008 | spin_unlock(&block_group->lock); | 3003 | spin_unlock(&block_group->lock); |
3009 | 3004 | ||
3010 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | 3005 | /* |
3006 | * Try to preallocate enough space based on how big the block group is. | ||
3007 | * Keep in mind this has to include any pinned space which could end up | ||
3008 | * taking up quite a bit since it's not folded into the other space | ||
3009 | * cache. | ||
3010 | */ | ||
3011 | num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); | ||
3011 | if (!num_pages) | 3012 | if (!num_pages) |
3012 | num_pages = 1; | 3013 | num_pages = 1; |
3013 | 3014 | ||
3014 | /* | ||
3015 | * Just to make absolutely sure we have enough space, we're going to | ||
3016 | * preallocate 12 pages worth of space for each block group. In | ||
3017 | * practice we ought to use at most 8, but we need extra space so we can | ||
3018 | * add our header and have a terminator between the extents and the | ||
3019 | * bitmaps. | ||
3020 | */ | ||
3021 | num_pages *= 16; | 3015 | num_pages *= 16; |
3022 | num_pages *= PAGE_CACHE_SIZE; | 3016 | num_pages *= PAGE_CACHE_SIZE; |
3023 | 3017 | ||
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4571 | if (root->fs_info->quota_enabled) { | 4565 | if (root->fs_info->quota_enabled) { |
4572 | ret = btrfs_qgroup_reserve(root, num_bytes + | 4566 | ret = btrfs_qgroup_reserve(root, num_bytes + |
4573 | nr_extents * root->leafsize); | 4567 | nr_extents * root->leafsize); |
4574 | if (ret) | 4568 | if (ret) { |
4569 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
4575 | return ret; | 4570 | return ret; |
4571 | } | ||
4576 | } | 4572 | } |
4577 | 4573 | ||
4578 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4574 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
5294 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 5290 | rb_erase(&head->node.rb_node, &delayed_refs->root); |
5295 | 5291 | ||
5296 | delayed_refs->num_entries--; | 5292 | delayed_refs->num_entries--; |
5297 | smp_mb(); | ||
5298 | if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) | ||
5299 | wake_up(&root->fs_info->tree_mod_seq_wait); | ||
5300 | 5293 | ||
5301 | /* | 5294 | /* |
5302 | * we don't take a ref on the node because we're removing it from the | 5295 | * we don't take a ref on the node because we're removing it from the |