diff options
author | Li Zefan <lizf@cn.fujitsu.com> | 2011-12-29 01:47:27 -0500 |
---|---|---|
committer | Li Zefan <lizf@cn.fujitsu.com> | 2012-01-10 21:26:48 -0500 |
commit | 7fe1e641502616220437079258506196bc4d8cbf (patch) | |
tree | da48e34d3e826f1bfe87bf7f7743bbb0e47ab2c3 /fs | |
parent | ec9ef7a13be4dcce964c8503e8999087945e5b9e (diff) |
Btrfs: rewrite btrfs_trim_block_group()
There are various bugs in block group trimming:
- It may trim from offset smaller than user-specified offset.
- It may trim beyond user-specified range.
- It may leak free space for extents smaller than specified minlen.
- It may truncate the last trimmed extent thus leak free space.
- With mixed extents+bitmaps, some extents may not be trimmed.
- With mixed extents+bitmaps, some bitmaps may not be trimmed (even
none will be trimmed). Even for those trimmed, not all the free space
in the bitmaps will be trimmed.
I rewrite btrfs_trim_block_group() and break it into two functions.
One is to trim extents only, and the other is to trim bitmaps only.
Before patching:
# fstrim -v /mnt/
/mnt/: 1496465408 bytes were trimmed
After patching:
# fstrim -v /mnt/
/mnt/: 2193768448 bytes were trimmed
And this matches the total free space:
# btrfs fi df /mnt
Data: total=3.58GB, used=1.79GB
System, DUP: total=8.00MB, used=4.00KB
System: total=4.00MB, used=0.00
Metadata, DUP: total=205.12MB, used=97.14MB
Metadata: total=8.00MB, used=0.00
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 235 |
1 files changed, 164 insertions, 71 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e4eb222147cc..b3cbb8939fa3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -2594,17 +2594,57 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2594 | cluster->block_group = NULL; | 2594 | cluster->block_group = NULL; |
2595 | } | 2595 | } |
2596 | 2596 | ||
2597 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | 2597 | static int do_trimming(struct btrfs_block_group_cache *block_group, |
2598 | u64 *trimmed, u64 start, u64 end, u64 minlen) | 2598 | u64 *total_trimmed, u64 start, u64 bytes, |
2599 | u64 reserved_start, u64 reserved_bytes) | ||
2599 | { | 2600 | { |
2600 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2601 | struct btrfs_space_info *space_info = block_group->space_info; |
2601 | struct btrfs_free_space *entry = NULL; | ||
2602 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 2602 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
2603 | u64 bytes = 0; | 2603 | int ret; |
2604 | u64 actually_trimmed; | 2604 | int update = 0; |
2605 | int ret = 0; | 2605 | u64 trimmed = 0; |
2606 | 2606 | ||
2607 | *trimmed = 0; | 2607 | spin_lock(&space_info->lock); |
2608 | spin_lock(&block_group->lock); | ||
2609 | if (!block_group->ro) { | ||
2610 | block_group->reserved += reserved_bytes; | ||
2611 | space_info->bytes_reserved += reserved_bytes; | ||
2612 | update = 1; | ||
2613 | } | ||
2614 | spin_unlock(&block_group->lock); | ||
2615 | spin_unlock(&space_info->lock); | ||
2616 | |||
2617 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2618 | start, bytes, &trimmed); | ||
2619 | if (!ret) | ||
2620 | *total_trimmed += trimmed; | ||
2621 | |||
2622 | btrfs_add_free_space(block_group, reserved_start, reserved_bytes); | ||
2623 | |||
2624 | if (update) { | ||
2625 | spin_lock(&space_info->lock); | ||
2626 | spin_lock(&block_group->lock); | ||
2627 | if (block_group->ro) | ||
2628 | space_info->bytes_readonly += reserved_bytes; | ||
2629 | block_group->reserved -= reserved_bytes; | ||
2630 | space_info->bytes_reserved -= reserved_bytes; | ||
2631 | spin_unlock(&space_info->lock); | ||
2632 | spin_unlock(&block_group->lock); | ||
2633 | } | ||
2634 | |||
2635 | return ret; | ||
2636 | } | ||
2637 | |||
2638 | static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | ||
2639 | u64 *total_trimmed, u64 start, u64 end, u64 minlen) | ||
2640 | { | ||
2641 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2642 | struct btrfs_free_space *entry; | ||
2643 | struct rb_node *node; | ||
2644 | int ret = 0; | ||
2645 | u64 extent_start; | ||
2646 | u64 extent_bytes; | ||
2647 | u64 bytes; | ||
2608 | 2648 | ||
2609 | while (start < end) { | 2649 | while (start < end) { |
2610 | spin_lock(&ctl->tree_lock); | 2650 | spin_lock(&ctl->tree_lock); |
@@ -2615,81 +2655,118 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2615 | } | 2655 | } |
2616 | 2656 | ||
2617 | entry = tree_search_offset(ctl, start, 0, 1); | 2657 | entry = tree_search_offset(ctl, start, 0, 1); |
2618 | if (!entry) | 2658 | if (!entry) { |
2619 | entry = tree_search_offset(ctl, | ||
2620 | offset_to_bitmap(ctl, start), | ||
2621 | 1, 1); | ||
2622 | |||
2623 | if (!entry || entry->offset >= end) { | ||
2624 | spin_unlock(&ctl->tree_lock); | 2659 | spin_unlock(&ctl->tree_lock); |
2625 | break; | 2660 | break; |
2626 | } | 2661 | } |
2627 | 2662 | ||
2628 | if (entry->bitmap) { | 2663 | /* skip bitmaps */ |
2629 | ret = search_bitmap(ctl, entry, &start, &bytes); | 2664 | while (entry->bitmap) { |
2630 | if (!ret) { | 2665 | node = rb_next(&entry->offset_index); |
2631 | if (start >= end) { | 2666 | if (!node) { |
2632 | spin_unlock(&ctl->tree_lock); | ||
2633 | break; | ||
2634 | } | ||
2635 | bytes = min(bytes, end - start); | ||
2636 | bitmap_clear_bits(ctl, entry, start, bytes); | ||
2637 | if (entry->bytes == 0) | ||
2638 | free_bitmap(ctl, entry); | ||
2639 | } else { | ||
2640 | start = entry->offset + BITS_PER_BITMAP * | ||
2641 | block_group->sectorsize; | ||
2642 | spin_unlock(&ctl->tree_lock); | 2667 | spin_unlock(&ctl->tree_lock); |
2643 | ret = 0; | 2668 | goto out; |
2644 | continue; | ||
2645 | } | 2669 | } |
2646 | } else { | 2670 | entry = rb_entry(node, struct btrfs_free_space, |
2647 | start = entry->offset; | 2671 | offset_index); |
2648 | bytes = min(entry->bytes, end - start); | 2672 | } |
2649 | unlink_free_space(ctl, entry); | 2673 | |
2650 | kmem_cache_free(btrfs_free_space_cachep, entry); | 2674 | if (entry->offset >= end) { |
2675 | spin_unlock(&ctl->tree_lock); | ||
2676 | break; | ||
2677 | } | ||
2678 | |||
2679 | extent_start = entry->offset; | ||
2680 | extent_bytes = entry->bytes; | ||
2681 | start = max(start, extent_start); | ||
2682 | bytes = min(extent_start + extent_bytes, end) - start; | ||
2683 | if (bytes < minlen) { | ||
2684 | spin_unlock(&ctl->tree_lock); | ||
2685 | goto next; | ||
2651 | } | 2686 | } |
2652 | 2687 | ||
2688 | unlink_free_space(ctl, entry); | ||
2689 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2690 | |||
2653 | spin_unlock(&ctl->tree_lock); | 2691 | spin_unlock(&ctl->tree_lock); |
2654 | 2692 | ||
2655 | if (bytes >= minlen) { | 2693 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
2656 | struct btrfs_space_info *space_info; | 2694 | extent_start, extent_bytes); |
2657 | int update = 0; | 2695 | if (ret) |
2658 | 2696 | break; | |
2659 | space_info = block_group->space_info; | 2697 | next: |
2660 | spin_lock(&space_info->lock); | 2698 | start += bytes; |
2661 | spin_lock(&block_group->lock); | ||
2662 | if (!block_group->ro) { | ||
2663 | block_group->reserved += bytes; | ||
2664 | space_info->bytes_reserved += bytes; | ||
2665 | update = 1; | ||
2666 | } | ||
2667 | spin_unlock(&block_group->lock); | ||
2668 | spin_unlock(&space_info->lock); | ||
2669 | |||
2670 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2671 | start, | ||
2672 | bytes, | ||
2673 | &actually_trimmed); | ||
2674 | |||
2675 | btrfs_add_free_space(block_group, start, bytes); | ||
2676 | if (update) { | ||
2677 | spin_lock(&space_info->lock); | ||
2678 | spin_lock(&block_group->lock); | ||
2679 | if (block_group->ro) | ||
2680 | space_info->bytes_readonly += bytes; | ||
2681 | block_group->reserved -= bytes; | ||
2682 | space_info->bytes_reserved -= bytes; | ||
2683 | spin_unlock(&space_info->lock); | ||
2684 | spin_unlock(&block_group->lock); | ||
2685 | } | ||
2686 | 2699 | ||
2687 | if (ret) | 2700 | if (fatal_signal_pending(current)) { |
2688 | break; | 2701 | ret = -ERESTARTSYS; |
2689 | *trimmed += actually_trimmed; | 2702 | break; |
2703 | } | ||
2704 | |||
2705 | cond_resched(); | ||
2706 | } | ||
2707 | out: | ||
2708 | return ret; | ||
2709 | } | ||
2710 | |||
2711 | static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | ||
2712 | u64 *total_trimmed, u64 start, u64 end, u64 minlen) | ||
2713 | { | ||
2714 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2715 | struct btrfs_free_space *entry; | ||
2716 | int ret = 0; | ||
2717 | int ret2; | ||
2718 | u64 bytes; | ||
2719 | u64 offset = offset_to_bitmap(ctl, start); | ||
2720 | |||
2721 | while (offset < end) { | ||
2722 | bool next_bitmap = false; | ||
2723 | |||
2724 | spin_lock(&ctl->tree_lock); | ||
2725 | |||
2726 | if (ctl->free_space < minlen) { | ||
2727 | spin_unlock(&ctl->tree_lock); | ||
2728 | break; | ||
2729 | } | ||
2730 | |||
2731 | entry = tree_search_offset(ctl, offset, 1, 0); | ||
2732 | if (!entry) { | ||
2733 | spin_unlock(&ctl->tree_lock); | ||
2734 | next_bitmap = true; | ||
2735 | goto next; | ||
2736 | } | ||
2737 | |||
2738 | bytes = minlen; | ||
2739 | ret2 = search_bitmap(ctl, entry, &start, &bytes); | ||
2740 | if (ret2 || start >= end) { | ||
2741 | spin_unlock(&ctl->tree_lock); | ||
2742 | next_bitmap = true; | ||
2743 | goto next; | ||
2744 | } | ||
2745 | |||
2746 | bytes = min(bytes, end - start); | ||
2747 | if (bytes < minlen) { | ||
2748 | spin_unlock(&ctl->tree_lock); | ||
2749 | goto next; | ||
2750 | } | ||
2751 | |||
2752 | bitmap_clear_bits(ctl, entry, start, bytes); | ||
2753 | if (entry->bytes == 0) | ||
2754 | free_bitmap(ctl, entry); | ||
2755 | |||
2756 | spin_unlock(&ctl->tree_lock); | ||
2757 | |||
2758 | ret = do_trimming(block_group, total_trimmed, start, bytes, | ||
2759 | start, bytes); | ||
2760 | if (ret) | ||
2761 | break; | ||
2762 | next: | ||
2763 | if (next_bitmap) { | ||
2764 | offset += BITS_PER_BITMAP * ctl->unit; | ||
2765 | } else { | ||
2766 | start += bytes; | ||
2767 | if (start >= offset + BITS_PER_BITMAP * ctl->unit) | ||
2768 | offset += BITS_PER_BITMAP * ctl->unit; | ||
2690 | } | 2769 | } |
2691 | start += bytes; | ||
2692 | bytes = 0; | ||
2693 | 2770 | ||
2694 | if (fatal_signal_pending(current)) { | 2771 | if (fatal_signal_pending(current)) { |
2695 | ret = -ERESTARTSYS; | 2772 | ret = -ERESTARTSYS; |
@@ -2702,6 +2779,22 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2702 | return ret; | 2779 | return ret; |
2703 | } | 2780 | } |
2704 | 2781 | ||
2782 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2783 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2784 | { | ||
2785 | int ret; | ||
2786 | |||
2787 | *trimmed = 0; | ||
2788 | |||
2789 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); | ||
2790 | if (ret) | ||
2791 | return ret; | ||
2792 | |||
2793 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); | ||
2794 | |||
2795 | return ret; | ||
2796 | } | ||
2797 | |||
2705 | /* | 2798 | /* |
2706 | * Find the left-most item in the cache tree, and then return the | 2799 | * Find the left-most item in the cache tree, and then return the |
2707 | * smallest inode number in the item. | 2800 | * smallest inode number in the item. |