diff options
| author | Josef Bacik <josef@toxicpanda.com> | 2019-06-18 16:09:25 -0400 |
|---|---|---|
| committer | David Sterba <dsterba@suse.com> | 2019-07-02 06:30:53 -0400 |
| commit | 0d9764f6d0fb9dd4d4b773b481f259c0567870c2 (patch) | |
| tree | 0961e6ea42d1c7b3eb462739f643ea440df09fec /fs/btrfs/extent-tree.c | |
| parent | 5da6afeb32e97f956aa3d599b7f94ceb36fcf854 (diff) | |
btrfs: move reserve_metadata_bytes and supporting code to space-info.c
This moves all of the metadata reservation code into space-info.c.
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 709 |
1 files changed, 7 insertions, 702 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e0f1ec0ca4a4..c887f3352341 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -4346,701 +4346,6 @@ out: | |||
| 4346 | return ret; | 4346 | return ret; |
| 4347 | } | 4347 | } |
| 4348 | 4348 | ||
| 4349 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info, | ||
| 4350 | unsigned long nr_pages, int nr_items) | ||
| 4351 | { | ||
| 4352 | struct super_block *sb = fs_info->sb; | ||
| 4353 | |||
| 4354 | if (down_read_trylock(&sb->s_umount)) { | ||
| 4355 | writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); | ||
| 4356 | up_read(&sb->s_umount); | ||
| 4357 | } else { | ||
| 4358 | /* | ||
| 4359 | * We needn't worry the filesystem going from r/w to r/o though | ||
| 4360 | * we don't acquire ->s_umount mutex, because the filesystem | ||
| 4361 | * should guarantee the delalloc inodes list be empty after | ||
| 4362 | * the filesystem is readonly(all dirty pages are written to | ||
| 4363 | * the disk). | ||
| 4364 | */ | ||
| 4365 | btrfs_start_delalloc_roots(fs_info, nr_items); | ||
| 4366 | if (!current->journal_info) | ||
| 4367 | btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1); | ||
| 4368 | } | ||
| 4369 | } | ||
| 4370 | |||
| 4371 | static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, | ||
| 4372 | u64 to_reclaim) | ||
| 4373 | { | ||
| 4374 | u64 bytes; | ||
| 4375 | u64 nr; | ||
| 4376 | |||
| 4377 | bytes = btrfs_calc_trans_metadata_size(fs_info, 1); | ||
| 4378 | nr = div64_u64(to_reclaim, bytes); | ||
| 4379 | if (!nr) | ||
| 4380 | nr = 1; | ||
| 4381 | return nr; | ||
| 4382 | } | ||
| 4383 | |||
| 4384 | #define EXTENT_SIZE_PER_ITEM SZ_256K | ||
| 4385 | |||
| 4386 | /* | ||
| 4387 | * shrink metadata reservation for delalloc | ||
| 4388 | */ | ||
| 4389 | static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, | ||
| 4390 | u64 orig, bool wait_ordered) | ||
| 4391 | { | ||
| 4392 | struct btrfs_space_info *space_info; | ||
| 4393 | struct btrfs_trans_handle *trans; | ||
| 4394 | u64 delalloc_bytes; | ||
| 4395 | u64 dio_bytes; | ||
| 4396 | u64 async_pages; | ||
| 4397 | u64 items; | ||
| 4398 | long time_left; | ||
| 4399 | unsigned long nr_pages; | ||
| 4400 | int loops; | ||
| 4401 | |||
| 4402 | /* Calc the number of the pages we need flush for space reservation */ | ||
| 4403 | items = calc_reclaim_items_nr(fs_info, to_reclaim); | ||
| 4404 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; | ||
| 4405 | |||
| 4406 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
| 4407 | space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 4408 | |||
| 4409 | delalloc_bytes = percpu_counter_sum_positive( | ||
| 4410 | &fs_info->delalloc_bytes); | ||
| 4411 | dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); | ||
| 4412 | if (delalloc_bytes == 0 && dio_bytes == 0) { | ||
| 4413 | if (trans) | ||
| 4414 | return; | ||
| 4415 | if (wait_ordered) | ||
| 4416 | btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); | ||
| 4417 | return; | ||
| 4418 | } | ||
| 4419 | |||
| 4420 | /* | ||
| 4421 | * If we are doing more ordered than delalloc we need to just wait on | ||
| 4422 | * ordered extents, otherwise we'll waste time trying to flush delalloc | ||
| 4423 | * that likely won't give us the space back we need. | ||
| 4424 | */ | ||
| 4425 | if (dio_bytes > delalloc_bytes) | ||
| 4426 | wait_ordered = true; | ||
| 4427 | |||
| 4428 | loops = 0; | ||
| 4429 | while ((delalloc_bytes || dio_bytes) && loops < 3) { | ||
| 4430 | nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; | ||
| 4431 | |||
| 4432 | /* | ||
| 4433 | * Triggers inode writeback for up to nr_pages. This will invoke | ||
| 4434 | * ->writepages callback and trigger delalloc filling | ||
| 4435 | * (btrfs_run_delalloc_range()). | ||
| 4436 | */ | ||
| 4437 | btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items); | ||
| 4438 | |||
| 4439 | /* | ||
| 4440 | * We need to wait for the compressed pages to start before | ||
| 4441 | * we continue. | ||
| 4442 | */ | ||
| 4443 | async_pages = atomic_read(&fs_info->async_delalloc_pages); | ||
| 4444 | if (!async_pages) | ||
| 4445 | goto skip_async; | ||
| 4446 | |||
| 4447 | /* | ||
| 4448 | * Calculate how many compressed pages we want to be written | ||
| 4449 | * before we continue. I.e if there are more async pages than we | ||
| 4450 | * require wait_event will wait until nr_pages are written. | ||
| 4451 | */ | ||
| 4452 | if (async_pages <= nr_pages) | ||
| 4453 | async_pages = 0; | ||
| 4454 | else | ||
| 4455 | async_pages -= nr_pages; | ||
| 4456 | |||
| 4457 | wait_event(fs_info->async_submit_wait, | ||
| 4458 | atomic_read(&fs_info->async_delalloc_pages) <= | ||
| 4459 | (int)async_pages); | ||
| 4460 | skip_async: | ||
| 4461 | spin_lock(&space_info->lock); | ||
| 4462 | if (list_empty(&space_info->tickets) && | ||
| 4463 | list_empty(&space_info->priority_tickets)) { | ||
| 4464 | spin_unlock(&space_info->lock); | ||
| 4465 | break; | ||
| 4466 | } | ||
| 4467 | spin_unlock(&space_info->lock); | ||
| 4468 | |||
| 4469 | loops++; | ||
| 4470 | if (wait_ordered && !trans) { | ||
| 4471 | btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); | ||
| 4472 | } else { | ||
| 4473 | time_left = schedule_timeout_killable(1); | ||
| 4474 | if (time_left) | ||
| 4475 | break; | ||
| 4476 | } | ||
| 4477 | delalloc_bytes = percpu_counter_sum_positive( | ||
| 4478 | &fs_info->delalloc_bytes); | ||
| 4479 | dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); | ||
| 4480 | } | ||
| 4481 | } | ||
| 4482 | |||
| 4483 | /** | ||
| 4484 | * maybe_commit_transaction - possibly commit the transaction if its ok to | ||
| 4485 | * @root - the root we're allocating for | ||
| 4486 | * @bytes - the number of bytes we want to reserve | ||
| 4487 | * @force - force the commit | ||
| 4488 | * | ||
| 4489 | * This will check to make sure that committing the transaction will actually | ||
| 4490 | * get us somewhere and then commit the transaction if it does. Otherwise it | ||
| 4491 | * will return -ENOSPC. | ||
| 4492 | */ | ||
| 4493 | static int may_commit_transaction(struct btrfs_fs_info *fs_info, | ||
| 4494 | struct btrfs_space_info *space_info) | ||
| 4495 | { | ||
| 4496 | struct reserve_ticket *ticket = NULL; | ||
| 4497 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; | ||
| 4498 | struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; | ||
| 4499 | struct btrfs_trans_handle *trans; | ||
| 4500 | u64 bytes_needed; | ||
| 4501 | u64 reclaim_bytes = 0; | ||
| 4502 | |||
| 4503 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
| 4504 | if (trans) | ||
| 4505 | return -EAGAIN; | ||
| 4506 | |||
| 4507 | spin_lock(&space_info->lock); | ||
| 4508 | if (!list_empty(&space_info->priority_tickets)) | ||
| 4509 | ticket = list_first_entry(&space_info->priority_tickets, | ||
| 4510 | struct reserve_ticket, list); | ||
| 4511 | else if (!list_empty(&space_info->tickets)) | ||
| 4512 | ticket = list_first_entry(&space_info->tickets, | ||
| 4513 | struct reserve_ticket, list); | ||
| 4514 | bytes_needed = (ticket) ? ticket->bytes : 0; | ||
| 4515 | spin_unlock(&space_info->lock); | ||
| 4516 | |||
| 4517 | if (!bytes_needed) | ||
| 4518 | return 0; | ||
| 4519 | |||
| 4520 | trans = btrfs_join_transaction(fs_info->extent_root); | ||
| 4521 | if (IS_ERR(trans)) | ||
| 4522 | return PTR_ERR(trans); | ||
| 4523 | |||
| 4524 | /* | ||
| 4525 | * See if there is enough pinned space to make this reservation, or if | ||
| 4526 | * we have block groups that are going to be freed, allowing us to | ||
| 4527 | * possibly do a chunk allocation the next loop through. | ||
| 4528 | */ | ||
| 4529 | if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || | ||
| 4530 | __percpu_counter_compare(&space_info->total_bytes_pinned, | ||
| 4531 | bytes_needed, | ||
| 4532 | BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) | ||
| 4533 | goto commit; | ||
| 4534 | |||
| 4535 | /* | ||
| 4536 | * See if there is some space in the delayed insertion reservation for | ||
| 4537 | * this reservation. | ||
| 4538 | */ | ||
| 4539 | if (space_info != delayed_rsv->space_info) | ||
| 4540 | goto enospc; | ||
| 4541 | |||
| 4542 | spin_lock(&delayed_rsv->lock); | ||
| 4543 | reclaim_bytes += delayed_rsv->reserved; | ||
| 4544 | spin_unlock(&delayed_rsv->lock); | ||
| 4545 | |||
| 4546 | spin_lock(&delayed_refs_rsv->lock); | ||
| 4547 | reclaim_bytes += delayed_refs_rsv->reserved; | ||
| 4548 | spin_unlock(&delayed_refs_rsv->lock); | ||
| 4549 | if (reclaim_bytes >= bytes_needed) | ||
| 4550 | goto commit; | ||
| 4551 | bytes_needed -= reclaim_bytes; | ||
| 4552 | |||
| 4553 | if (__percpu_counter_compare(&space_info->total_bytes_pinned, | ||
| 4554 | bytes_needed, | ||
| 4555 | BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) | ||
| 4556 | goto enospc; | ||
| 4557 | |||
| 4558 | commit: | ||
| 4559 | return btrfs_commit_transaction(trans); | ||
| 4560 | enospc: | ||
| 4561 | btrfs_end_transaction(trans); | ||
| 4562 | return -ENOSPC; | ||
| 4563 | } | ||
| 4564 | |||
| 4565 | /* | ||
| 4566 | * Try to flush some data based on policy set by @state. This is only advisory | ||
| 4567 | * and may fail for various reasons. The caller is supposed to examine the | ||
| 4568 | * state of @space_info to detect the outcome. | ||
| 4569 | */ | ||
| 4570 | static void flush_space(struct btrfs_fs_info *fs_info, | ||
| 4571 | struct btrfs_space_info *space_info, u64 num_bytes, | ||
| 4572 | int state) | ||
| 4573 | { | ||
| 4574 | struct btrfs_root *root = fs_info->extent_root; | ||
| 4575 | struct btrfs_trans_handle *trans; | ||
| 4576 | int nr; | ||
| 4577 | int ret = 0; | ||
| 4578 | |||
| 4579 | switch (state) { | ||
| 4580 | case FLUSH_DELAYED_ITEMS_NR: | ||
| 4581 | case FLUSH_DELAYED_ITEMS: | ||
| 4582 | if (state == FLUSH_DELAYED_ITEMS_NR) | ||
| 4583 | nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2; | ||
| 4584 | else | ||
| 4585 | nr = -1; | ||
| 4586 | |||
| 4587 | trans = btrfs_join_transaction(root); | ||
| 4588 | if (IS_ERR(trans)) { | ||
| 4589 | ret = PTR_ERR(trans); | ||
| 4590 | break; | ||
| 4591 | } | ||
| 4592 | ret = btrfs_run_delayed_items_nr(trans, nr); | ||
| 4593 | btrfs_end_transaction(trans); | ||
| 4594 | break; | ||
| 4595 | case FLUSH_DELALLOC: | ||
| 4596 | case FLUSH_DELALLOC_WAIT: | ||
| 4597 | shrink_delalloc(fs_info, num_bytes * 2, num_bytes, | ||
| 4598 | state == FLUSH_DELALLOC_WAIT); | ||
| 4599 | break; | ||
| 4600 | case FLUSH_DELAYED_REFS_NR: | ||
| 4601 | case FLUSH_DELAYED_REFS: | ||
| 4602 | trans = btrfs_join_transaction(root); | ||
| 4603 | if (IS_ERR(trans)) { | ||
| 4604 | ret = PTR_ERR(trans); | ||
| 4605 | break; | ||
| 4606 | } | ||
| 4607 | if (state == FLUSH_DELAYED_REFS_NR) | ||
| 4608 | nr = calc_reclaim_items_nr(fs_info, num_bytes); | ||
| 4609 | else | ||
| 4610 | nr = 0; | ||
| 4611 | btrfs_run_delayed_refs(trans, nr); | ||
| 4612 | btrfs_end_transaction(trans); | ||
| 4613 | break; | ||
| 4614 | case ALLOC_CHUNK: | ||
| 4615 | case ALLOC_CHUNK_FORCE: | ||
| 4616 | trans = btrfs_join_transaction(root); | ||
| 4617 | if (IS_ERR(trans)) { | ||
| 4618 | ret = PTR_ERR(trans); | ||
| 4619 | break; | ||
| 4620 | } | ||
| 4621 | ret = btrfs_chunk_alloc(trans, | ||
| 4622 | btrfs_metadata_alloc_profile(fs_info), | ||
| 4623 | (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE : | ||
| 4624 | CHUNK_ALLOC_FORCE); | ||
| 4625 | btrfs_end_transaction(trans); | ||
| 4626 | if (ret > 0 || ret == -ENOSPC) | ||
| 4627 | ret = 0; | ||
| 4628 | break; | ||
| 4629 | case COMMIT_TRANS: | ||
| 4630 | /* | ||
| 4631 | * If we have pending delayed iputs then we could free up a | ||
| 4632 | * bunch of pinned space, so make sure we run the iputs before | ||
| 4633 | * we do our pinned bytes check below. | ||
| 4634 | */ | ||
| 4635 | btrfs_run_delayed_iputs(fs_info); | ||
| 4636 | btrfs_wait_on_delayed_iputs(fs_info); | ||
| 4637 | |||
| 4638 | ret = may_commit_transaction(fs_info, space_info); | ||
| 4639 | break; | ||
| 4640 | default: | ||
| 4641 | ret = -ENOSPC; | ||
| 4642 | break; | ||
| 4643 | } | ||
| 4644 | |||
| 4645 | trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, | ||
| 4646 | ret); | ||
| 4647 | return; | ||
| 4648 | } | ||
| 4649 | |||
| 4650 | static inline u64 | ||
| 4651 | btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, | ||
| 4652 | struct btrfs_space_info *space_info, | ||
| 4653 | bool system_chunk) | ||
| 4654 | { | ||
| 4655 | struct reserve_ticket *ticket; | ||
| 4656 | u64 used; | ||
| 4657 | u64 expected; | ||
| 4658 | u64 to_reclaim = 0; | ||
| 4659 | |||
| 4660 | list_for_each_entry(ticket, &space_info->tickets, list) | ||
| 4661 | to_reclaim += ticket->bytes; | ||
| 4662 | list_for_each_entry(ticket, &space_info->priority_tickets, list) | ||
| 4663 | to_reclaim += ticket->bytes; | ||
| 4664 | if (to_reclaim) | ||
| 4665 | return to_reclaim; | ||
| 4666 | |||
| 4667 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | ||
| 4668 | if (btrfs_can_overcommit(fs_info, space_info, to_reclaim, | ||
| 4669 | BTRFS_RESERVE_FLUSH_ALL, system_chunk)) | ||
| 4670 | return 0; | ||
| 4671 | |||
| 4672 | used = btrfs_space_info_used(space_info, true); | ||
| 4673 | |||
| 4674 | if (btrfs_can_overcommit(fs_info, space_info, SZ_1M, | ||
| 4675 | BTRFS_RESERVE_FLUSH_ALL, system_chunk)) | ||
| 4676 | expected = div_factor_fine(space_info->total_bytes, 95); | ||
| 4677 | else | ||
| 4678 | expected = div_factor_fine(space_info->total_bytes, 90); | ||
| 4679 | |||
| 4680 | if (used > expected) | ||
| 4681 | to_reclaim = used - expected; | ||
| 4682 | else | ||
| 4683 | to_reclaim = 0; | ||
| 4684 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + | ||
| 4685 | space_info->bytes_reserved); | ||
| 4686 | return to_reclaim; | ||
| 4687 | } | ||
| 4688 | |||
| 4689 | static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, | ||
| 4690 | struct btrfs_space_info *space_info, | ||
| 4691 | u64 used, bool system_chunk) | ||
| 4692 | { | ||
| 4693 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); | ||
| 4694 | |||
| 4695 | /* If we're just plain full then async reclaim just slows us down. */ | ||
| 4696 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) | ||
| 4697 | return 0; | ||
| 4698 | |||
| 4699 | if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, | ||
| 4700 | system_chunk)) | ||
| 4701 | return 0; | ||
| 4702 | |||
| 4703 | return (used >= thresh && !btrfs_fs_closing(fs_info) && | ||
| 4704 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | ||
| 4705 | } | ||
| 4706 | |||
| 4707 | static bool wake_all_tickets(struct list_head *head) | ||
| 4708 | { | ||
| 4709 | struct reserve_ticket *ticket; | ||
| 4710 | |||
| 4711 | while (!list_empty(head)) { | ||
| 4712 | ticket = list_first_entry(head, struct reserve_ticket, list); | ||
| 4713 | list_del_init(&ticket->list); | ||
| 4714 | ticket->error = -ENOSPC; | ||
| 4715 | wake_up(&ticket->wait); | ||
| 4716 | if (ticket->bytes != ticket->orig_bytes) | ||
| 4717 | return true; | ||
| 4718 | } | ||
| 4719 | return false; | ||
| 4720 | } | ||
| 4721 | |||
| 4722 | /* | ||
| 4723 | * This is for normal flushers, we can wait all goddamned day if we want to. We | ||
| 4724 | * will loop and continuously try to flush as long as we are making progress. | ||
| 4725 | * We count progress as clearing off tickets each time we have to loop. | ||
| 4726 | */ | ||
| 4727 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | ||
| 4728 | { | ||
| 4729 | struct btrfs_fs_info *fs_info; | ||
| 4730 | struct btrfs_space_info *space_info; | ||
| 4731 | u64 to_reclaim; | ||
| 4732 | int flush_state; | ||
| 4733 | int commit_cycles = 0; | ||
| 4734 | u64 last_tickets_id; | ||
| 4735 | |||
| 4736 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | ||
| 4737 | space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 4738 | |||
| 4739 | spin_lock(&space_info->lock); | ||
| 4740 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, | ||
| 4741 | false); | ||
| 4742 | if (!to_reclaim) { | ||
| 4743 | space_info->flush = 0; | ||
| 4744 | spin_unlock(&space_info->lock); | ||
| 4745 | return; | ||
| 4746 | } | ||
| 4747 | last_tickets_id = space_info->tickets_id; | ||
| 4748 | spin_unlock(&space_info->lock); | ||
| 4749 | |||
| 4750 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
| 4751 | do { | ||
| 4752 | flush_space(fs_info, space_info, to_reclaim, flush_state); | ||
| 4753 | spin_lock(&space_info->lock); | ||
| 4754 | if (list_empty(&space_info->tickets)) { | ||
| 4755 | space_info->flush = 0; | ||
| 4756 | spin_unlock(&space_info->lock); | ||
| 4757 | return; | ||
| 4758 | } | ||
| 4759 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, | ||
| 4760 | space_info, | ||
| 4761 | false); | ||
| 4762 | if (last_tickets_id == space_info->tickets_id) { | ||
| 4763 | flush_state++; | ||
| 4764 | } else { | ||
| 4765 | last_tickets_id = space_info->tickets_id; | ||
| 4766 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
| 4767 | if (commit_cycles) | ||
| 4768 | commit_cycles--; | ||
| 4769 | } | ||
| 4770 | |||
| 4771 | /* | ||
| 4772 | * We don't want to force a chunk allocation until we've tried | ||
| 4773 | * pretty hard to reclaim space. Think of the case where we | ||
| 4774 | * freed up a bunch of space and so have a lot of pinned space | ||
| 4775 | * to reclaim. We would rather use that than possibly create a | ||
| 4776 | * underutilized metadata chunk. So if this is our first run | ||
| 4777 | * through the flushing state machine skip ALLOC_CHUNK_FORCE and | ||
| 4778 | * commit the transaction. If nothing has changed the next go | ||
| 4779 | * around then we can force a chunk allocation. | ||
| 4780 | */ | ||
| 4781 | if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles) | ||
| 4782 | flush_state++; | ||
| 4783 | |||
| 4784 | if (flush_state > COMMIT_TRANS) { | ||
| 4785 | commit_cycles++; | ||
| 4786 | if (commit_cycles > 2) { | ||
| 4787 | if (wake_all_tickets(&space_info->tickets)) { | ||
| 4788 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
| 4789 | commit_cycles--; | ||
| 4790 | } else { | ||
| 4791 | space_info->flush = 0; | ||
| 4792 | } | ||
| 4793 | } else { | ||
| 4794 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
| 4795 | } | ||
| 4796 | } | ||
| 4797 | spin_unlock(&space_info->lock); | ||
| 4798 | } while (flush_state <= COMMIT_TRANS); | ||
| 4799 | } | ||
| 4800 | |||
| 4801 | void btrfs_init_async_reclaim_work(struct work_struct *work) | ||
| 4802 | { | ||
| 4803 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | ||
| 4804 | } | ||
| 4805 | |||
| 4806 | static const enum btrfs_flush_state priority_flush_states[] = { | ||
| 4807 | FLUSH_DELAYED_ITEMS_NR, | ||
| 4808 | FLUSH_DELAYED_ITEMS, | ||
| 4809 | ALLOC_CHUNK, | ||
| 4810 | }; | ||
| 4811 | |||
| 4812 | static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, | ||
| 4813 | struct btrfs_space_info *space_info, | ||
| 4814 | struct reserve_ticket *ticket) | ||
| 4815 | { | ||
| 4816 | u64 to_reclaim; | ||
| 4817 | int flush_state; | ||
| 4818 | |||
| 4819 | spin_lock(&space_info->lock); | ||
| 4820 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, | ||
| 4821 | false); | ||
| 4822 | if (!to_reclaim) { | ||
| 4823 | spin_unlock(&space_info->lock); | ||
| 4824 | return; | ||
| 4825 | } | ||
| 4826 | spin_unlock(&space_info->lock); | ||
| 4827 | |||
| 4828 | flush_state = 0; | ||
| 4829 | do { | ||
| 4830 | flush_space(fs_info, space_info, to_reclaim, | ||
| 4831 | priority_flush_states[flush_state]); | ||
| 4832 | flush_state++; | ||
| 4833 | spin_lock(&space_info->lock); | ||
| 4834 | if (ticket->bytes == 0) { | ||
| 4835 | spin_unlock(&space_info->lock); | ||
| 4836 | return; | ||
| 4837 | } | ||
| 4838 | spin_unlock(&space_info->lock); | ||
| 4839 | } while (flush_state < ARRAY_SIZE(priority_flush_states)); | ||
| 4840 | } | ||
| 4841 | |||
| 4842 | static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, | ||
| 4843 | struct btrfs_space_info *space_info, | ||
| 4844 | struct reserve_ticket *ticket) | ||
| 4845 | |||
| 4846 | { | ||
| 4847 | DEFINE_WAIT(wait); | ||
| 4848 | u64 reclaim_bytes = 0; | ||
| 4849 | int ret = 0; | ||
| 4850 | |||
| 4851 | spin_lock(&space_info->lock); | ||
| 4852 | while (ticket->bytes > 0 && ticket->error == 0) { | ||
| 4853 | ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); | ||
| 4854 | if (ret) { | ||
| 4855 | ret = -EINTR; | ||
| 4856 | break; | ||
| 4857 | } | ||
| 4858 | spin_unlock(&space_info->lock); | ||
| 4859 | |||
| 4860 | schedule(); | ||
| 4861 | |||
| 4862 | finish_wait(&ticket->wait, &wait); | ||
| 4863 | spin_lock(&space_info->lock); | ||
| 4864 | } | ||
| 4865 | if (!ret) | ||
| 4866 | ret = ticket->error; | ||
| 4867 | if (!list_empty(&ticket->list)) | ||
| 4868 | list_del_init(&ticket->list); | ||
| 4869 | if (ticket->bytes && ticket->bytes < ticket->orig_bytes) | ||
| 4870 | reclaim_bytes = ticket->orig_bytes - ticket->bytes; | ||
| 4871 | spin_unlock(&space_info->lock); | ||
| 4872 | |||
| 4873 | if (reclaim_bytes) | ||
| 4874 | btrfs_space_info_add_old_bytes(fs_info, space_info, | ||
| 4875 | reclaim_bytes); | ||
| 4876 | return ret; | ||
| 4877 | } | ||
| 4878 | |||
| 4879 | /** | ||
| 4880 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
| 4881 | * @root - the root we're allocating for | ||
| 4882 | * @space_info - the space info we want to allocate from | ||
| 4883 | * @orig_bytes - the number of bytes we want | ||
| 4884 | * @flush - whether or not we can flush to make our reservation | ||
| 4885 | * | ||
| 4886 | * This will reserve orig_bytes number of bytes from the space info associated | ||
| 4887 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
| 4888 | * flush out space to make room. It will do this by flushing delalloc if | ||
| 4889 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
| 4890 | * regain reservations will be made and this will fail if there is not enough | ||
| 4891 | * space already. | ||
| 4892 | */ | ||
| 4893 | static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, | ||
| 4894 | struct btrfs_space_info *space_info, | ||
| 4895 | u64 orig_bytes, | ||
| 4896 | enum btrfs_reserve_flush_enum flush, | ||
| 4897 | bool system_chunk) | ||
| 4898 | { | ||
| 4899 | struct reserve_ticket ticket; | ||
| 4900 | u64 used; | ||
| 4901 | u64 reclaim_bytes = 0; | ||
| 4902 | int ret = 0; | ||
| 4903 | |||
| 4904 | ASSERT(orig_bytes); | ||
| 4905 | ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); | ||
| 4906 | |||
| 4907 | spin_lock(&space_info->lock); | ||
| 4908 | ret = -ENOSPC; | ||
| 4909 | used = btrfs_space_info_used(space_info, true); | ||
| 4910 | |||
| 4911 | /* | ||
| 4912 | * If we have enough space then hooray, make our reservation and carry | ||
| 4913 | * on. If not see if we can overcommit, and if we can, hooray carry on. | ||
| 4914 | * If not things get more complicated. | ||
| 4915 | */ | ||
| 4916 | if (used + orig_bytes <= space_info->total_bytes) { | ||
| 4917 | btrfs_space_info_update_bytes_may_use(fs_info, space_info, | ||
| 4918 | orig_bytes); | ||
| 4919 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
| 4920 | space_info->flags, orig_bytes, 1); | ||
| 4921 | ret = 0; | ||
| 4922 | } else if (btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush, | ||
| 4923 | system_chunk)) { | ||
| 4924 | btrfs_space_info_update_bytes_may_use(fs_info, space_info, | ||
| 4925 | orig_bytes); | ||
| 4926 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
| 4927 | space_info->flags, orig_bytes, 1); | ||
| 4928 | ret = 0; | ||
| 4929 | } | ||
| 4930 | |||
| 4931 | /* | ||
| 4932 | * If we couldn't make a reservation then setup our reservation ticket | ||
| 4933 | * and kick the async worker if it's not already running. | ||
| 4934 | * | ||
| 4935 | * If we are a priority flusher then we just need to add our ticket to | ||
| 4936 | * the list and we will do our own flushing further down. | ||
| 4937 | */ | ||
| 4938 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | ||
| 4939 | ticket.orig_bytes = orig_bytes; | ||
| 4940 | ticket.bytes = orig_bytes; | ||
| 4941 | ticket.error = 0; | ||
| 4942 | init_waitqueue_head(&ticket.wait); | ||
| 4943 | if (flush == BTRFS_RESERVE_FLUSH_ALL) { | ||
| 4944 | list_add_tail(&ticket.list, &space_info->tickets); | ||
| 4945 | if (!space_info->flush) { | ||
| 4946 | space_info->flush = 1; | ||
| 4947 | trace_btrfs_trigger_flush(fs_info, | ||
| 4948 | space_info->flags, | ||
| 4949 | orig_bytes, flush, | ||
| 4950 | "enospc"); | ||
| 4951 | queue_work(system_unbound_wq, | ||
| 4952 | &fs_info->async_reclaim_work); | ||
| 4953 | } | ||
| 4954 | } else { | ||
| 4955 | list_add_tail(&ticket.list, | ||
| 4956 | &space_info->priority_tickets); | ||
| 4957 | } | ||
| 4958 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
| 4959 | used += orig_bytes; | ||
| 4960 | /* | ||
| 4961 | * We will do the space reservation dance during log replay, | ||
| 4962 | * which means we won't have fs_info->fs_root set, so don't do | ||
| 4963 | * the async reclaim as we will panic. | ||
| 4964 | */ | ||
| 4965 | if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && | ||
| 4966 | need_do_async_reclaim(fs_info, space_info, | ||
| 4967 | used, system_chunk) && | ||
| 4968 | !work_busy(&fs_info->async_reclaim_work)) { | ||
| 4969 | trace_btrfs_trigger_flush(fs_info, space_info->flags, | ||
| 4970 | orig_bytes, flush, "preempt"); | ||
| 4971 | queue_work(system_unbound_wq, | ||
| 4972 | &fs_info->async_reclaim_work); | ||
| 4973 | } | ||
| 4974 | } | ||
| 4975 | spin_unlock(&space_info->lock); | ||
| 4976 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | ||
| 4977 | return ret; | ||
| 4978 | |||
| 4979 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | ||
| 4980 | return wait_reserve_ticket(fs_info, space_info, &ticket); | ||
| 4981 | |||
| 4982 | ret = 0; | ||
| 4983 | priority_reclaim_metadata_space(fs_info, space_info, &ticket); | ||
| 4984 | spin_lock(&space_info->lock); | ||
| 4985 | if (ticket.bytes) { | ||
| 4986 | if (ticket.bytes < orig_bytes) | ||
| 4987 | reclaim_bytes = orig_bytes - ticket.bytes; | ||
| 4988 | list_del_init(&ticket.list); | ||
| 4989 | ret = -ENOSPC; | ||
| 4990 | } | ||
| 4991 | spin_unlock(&space_info->lock); | ||
| 4992 | |||
| 4993 | if (reclaim_bytes) | ||
| 4994 | btrfs_space_info_add_old_bytes(fs_info, space_info, | ||
| 4995 | reclaim_bytes); | ||
| 4996 | ASSERT(list_empty(&ticket.list)); | ||
| 4997 | return ret; | ||
| 4998 | } | ||
| 4999 | |||
| 5000 | /** | ||
| 5001 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
| 5002 | * @root - the root we're allocating for | ||
| 5003 | * @block_rsv - the block_rsv we're allocating for | ||
| 5004 | * @orig_bytes - the number of bytes we want | ||
| 5005 | * @flush - whether or not we can flush to make our reservation | ||
| 5006 | * | ||
| 5007 | * This will reserve orig_bytes number of bytes from the space info associated | ||
| 5008 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
| 5009 | * flush out space to make room. It will do this by flushing delalloc if | ||
| 5010 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
| 5011 | * regain reservations will be made and this will fail if there is not enough | ||
| 5012 | * space already. | ||
| 5013 | */ | ||
| 5014 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
| 5015 | struct btrfs_block_rsv *block_rsv, | ||
| 5016 | u64 orig_bytes, | ||
| 5017 | enum btrfs_reserve_flush_enum flush) | ||
| 5018 | { | ||
| 5019 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 5020 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
| 5021 | int ret; | ||
| 5022 | bool system_chunk = (root == fs_info->chunk_root); | ||
| 5023 | |||
| 5024 | ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, | ||
| 5025 | orig_bytes, flush, system_chunk); | ||
| 5026 | if (ret == -ENOSPC && | ||
| 5027 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | ||
| 5028 | if (block_rsv != global_rsv && | ||
| 5029 | !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes)) | ||
| 5030 | ret = 0; | ||
| 5031 | } | ||
| 5032 | if (ret == -ENOSPC) { | ||
| 5033 | trace_btrfs_space_reservation(fs_info, "space_info:enospc", | ||
| 5034 | block_rsv->space_info->flags, | ||
| 5035 | orig_bytes, 1); | ||
| 5036 | |||
| 5037 | if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) | ||
| 5038 | btrfs_dump_space_info(fs_info, block_rsv->space_info, | ||
| 5039 | orig_bytes, 0); | ||
| 5040 | } | ||
| 5041 | return ret; | ||
| 5042 | } | ||
| 5043 | |||
| 5044 | static struct btrfs_block_rsv *get_block_rsv( | 4349 | static struct btrfs_block_rsv *get_block_rsv( |
| 5045 | const struct btrfs_trans_handle *trans, | 4350 | const struct btrfs_trans_handle *trans, |
| 5046 | const struct btrfs_root *root) | 4351 | const struct btrfs_root *root) |
| @@ -5187,8 +4492,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, | |||
| 5187 | if (!num_bytes) | 4492 | if (!num_bytes) |
| 5188 | return 0; | 4493 | return 0; |
| 5189 | 4494 | ||
| 5190 | ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv, | 4495 | ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv, |
| 5191 | num_bytes, flush); | 4496 | num_bytes, flush); |
| 5192 | if (ret) | 4497 | if (ret) |
| 5193 | return ret; | 4498 | return ret; |
| 5194 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | 4499 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
| @@ -5314,7 +4619,7 @@ int btrfs_block_rsv_add(struct btrfs_root *root, | |||
| 5314 | if (num_bytes == 0) | 4619 | if (num_bytes == 0) |
| 5315 | return 0; | 4620 | return 0; |
| 5316 | 4621 | ||
| 5317 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); | 4622 | ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
| 5318 | if (!ret) | 4623 | if (!ret) |
| 5319 | block_rsv_add_bytes(block_rsv, num_bytes, true); | 4624 | block_rsv_add_bytes(block_rsv, num_bytes, true); |
| 5320 | 4625 | ||
| @@ -5359,7 +4664,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
| 5359 | if (!ret) | 4664 | if (!ret) |
| 5360 | return 0; | 4665 | return 0; |
| 5361 | 4666 | ||
| 5362 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); | 4667 | ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
| 5363 | if (!ret) { | 4668 | if (!ret) { |
| 5364 | block_rsv_add_bytes(block_rsv, num_bytes, false); | 4669 | block_rsv_add_bytes(block_rsv, num_bytes, false); |
| 5365 | return 0; | 4670 | return 0; |
| @@ -5733,7 +5038,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) | |||
| 5733 | ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); | 5038 | ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); |
| 5734 | if (ret) | 5039 | if (ret) |
| 5735 | goto out_fail; | 5040 | goto out_fail; |
| 5736 | ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); | 5041 | ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); |
| 5737 | if (ret) | 5042 | if (ret) |
| 5738 | goto out_qgroup; | 5043 | goto out_qgroup; |
| 5739 | 5044 | ||
| @@ -8102,8 +7407,8 @@ again: | |||
| 8102 | "BTRFS: block rsv returned %d\n", ret); | 7407 | "BTRFS: block rsv returned %d\n", ret); |
| 8103 | } | 7408 | } |
| 8104 | try_reserve: | 7409 | try_reserve: |
| 8105 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, | 7410 | ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize, |
| 8106 | BTRFS_RESERVE_NO_FLUSH); | 7411 | BTRFS_RESERVE_NO_FLUSH); |
| 8107 | if (!ret) | 7412 | if (!ret) |
| 8108 | return block_rsv; | 7413 | return block_rsv; |
| 8109 | /* | 7414 | /* |
