aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2011-01-05 05:07:31 -0500
committerChris Mason <chris.mason@oracle.com>2011-01-16 11:30:19 -0500
commit6d07bcec969af335d4e35b3921131b7929bd634e (patch)
tree2d1e0bb5f69bdf9dafa2862b6cad965184d67c84 /fs/btrfs
parentb2117a39fa96cf4814e7cab8c11494149ba6f29d (diff)
btrfs: fix wrong free space information of btrfs
When we store data by raid profile in btrfs with two or more different size disks, df command shows there is some free space in the filesystem, but the user can not write any data in fact, df command shows the wrong free space information of btrfs. # mkfs.btrfs -d raid1 /dev/sda9 /dev/sda10 # btrfs-show Label: none uuid: a95cd49e-6e33-45b8-8741-a36153ce4b64 Total devices 2 FS bytes used 28.00KB devid 1 size 5.01GB used 2.03GB path /dev/sda9 devid 2 size 10.00GB used 2.01GB path /dev/sda10 # btrfs device scan /dev/sda9 /dev/sda10 # mount /dev/sda9 /mnt # dd if=/dev/zero of=tmpfile0 bs=4K count=9999999999 (fill the filesystem) # sync # df -TH Filesystem Type Size Used Avail Use% Mounted on /dev/sda9 btrfs 17G 8.6G 5.4G 62% /mnt # btrfs-show Label: none uuid: a95cd49e-6e33-45b8-8741-a36153ce4b64 Total devices 2 FS bytes used 3.99GB devid 1 size 5.01GB used 5.01GB path /dev/sda9 devid 2 size 10.00GB used 4.99GB path /dev/sda10 It is because btrfs cannot allocate chunks when one of the pairing disks has no space, the free space on the other disks can not be used for ever, and should be subtracted from the total space, but btrfs doesn't subtract this space from the total. It is strange to the user. This patch fixes it by calcing the free space that can be used to allocate chunks. Implementation: 1. get all the devices free space, and align them by stripe length. 2. sort the devices by the free space. 3. check the free space of the devices, 3.1. if it is not zero, and then check the number of the devices that has more free space than this device, if the number of the devices is beyond the min stripe number, the free space can be used, and add into total free space. if the number of the devices is below the min stripe number, we can not use the free space, the check ends. 3.2. if the free space is zero, check the next devices, goto 3.1 This implementation is just likely fake chunk allocation. After appling this patch, df can show correct space information: # df -TH Filesystem Type Size Used Avail Use% Mounted on /dev/sda9 btrfs 17G 8.6G 0 100% /mnt Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c58
-rw-r--r--fs/btrfs/super.c146
-rw-r--r--fs/btrfs/volumes.c84
-rw-r--r--fs/btrfs/volumes.h3
5 files changed, 286 insertions, 7 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0cb322cc4fc0..0995f4f68d7a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2158,6 +2158,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2158int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 2158int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
2159 struct btrfs_root *root, u64 group_start); 2159 struct btrfs_root *root, u64 group_start);
2160u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 2160u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
2161u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
2161void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 2162void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
2162void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 2163void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
2163int btrfs_check_data_free_space(struct inode *inode, u64 bytes); 2164int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
@@ -2201,6 +2202,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
2201int btrfs_set_block_group_rw(struct btrfs_root *root, 2202int btrfs_set_block_group_rw(struct btrfs_root *root,
2202 struct btrfs_block_group_cache *cache); 2203 struct btrfs_block_group_cache *cache);
2203void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2204void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2205u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
2204/* ctree.c */ 2206/* ctree.c */
2205int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2207int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2206 int level, int *slot); 2208 int level, int *slot);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1e1c9a177626..04bfc3a2bd9f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3090,7 +3090,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
3090 return btrfs_reduce_alloc_profile(root, flags); 3090 return btrfs_reduce_alloc_profile(root, flags);
3091} 3091}
3092 3092
3093static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3093u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3094{ 3094{
3095 u64 flags; 3095 u64 flags;
3096 3096
@@ -8019,6 +8019,62 @@ out:
8019 return ret; 8019 return ret;
8020} 8020}
8021 8021
8022/*
8023 * helper to account the unused space of all the readonly block group in the
8024 * list. takes mirrors into account.
8025 */
8026static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
8027{
8028 struct btrfs_block_group_cache *block_group;
8029 u64 free_bytes = 0;
8030 int factor;
8031
8032 list_for_each_entry(block_group, groups_list, list) {
8033 spin_lock(&block_group->lock);
8034
8035 if (!block_group->ro) {
8036 spin_unlock(&block_group->lock);
8037 continue;
8038 }
8039
8040 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
8041 BTRFS_BLOCK_GROUP_RAID10 |
8042 BTRFS_BLOCK_GROUP_DUP))
8043 factor = 2;
8044 else
8045 factor = 1;
8046
8047 free_bytes += (block_group->key.offset -
8048 btrfs_block_group_used(&block_group->item)) *
8049 factor;
8050
8051 spin_unlock(&block_group->lock);
8052 }
8053
8054 return free_bytes;
8055}
8056
8057/*
8058 * helper to account the unused space of all the readonly block group in the
8059 * space_info. takes mirrors into account.
8060 */
8061u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
8062{
8063 int i;
8064 u64 free_bytes = 0;
8065
8066 spin_lock(&sinfo->lock);
8067
8068 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
8069 if (!list_empty(&sinfo->block_groups[i]))
8070 free_bytes += __btrfs_get_ro_block_group_free_space(
8071 &sinfo->block_groups[i]);
8072
8073 spin_unlock(&sinfo->lock);
8074
8075 return free_bytes;
8076}
8077
8022int btrfs_set_block_group_rw(struct btrfs_root *root, 8078int btrfs_set_block_group_rw(struct btrfs_root *root,
8023 struct btrfs_block_group_cache *cache) 8079 struct btrfs_block_group_cache *cache)
8024{ 8080{
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index caa5bcc62f16..2963376e77f4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -777,6 +777,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
777 return 0; 777 return 0;
778} 778}
779 779
780/*
781 * The helper to calc the free space on the devices that can be used to store
782 * file data.
783 */
784static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
785{
786 struct btrfs_fs_info *fs_info = root->fs_info;
787 struct btrfs_device_info *devices_info;
788 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
789 struct btrfs_device *device;
790 u64 skip_space;
791 u64 type;
792 u64 avail_space;
793 u64 used_space;
794 u64 min_stripe_size;
795 int min_stripes = 1;
796 int i = 0, nr_devices;
797 int ret;
798
799 nr_devices = fs_info->fs_devices->rw_devices;
800 BUG_ON(!nr_devices);
801
802 devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
803 GFP_NOFS);
804 if (!devices_info)
805 return -ENOMEM;
806
807 /* calc min stripe number for data space alloction */
808 type = btrfs_get_alloc_profile(root, 1);
809 if (type & BTRFS_BLOCK_GROUP_RAID0)
810 min_stripes = 2;
811 else if (type & BTRFS_BLOCK_GROUP_RAID1)
812 min_stripes = 2;
813 else if (type & BTRFS_BLOCK_GROUP_RAID10)
814 min_stripes = 4;
815
816 if (type & BTRFS_BLOCK_GROUP_DUP)
817 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
818 else
819 min_stripe_size = BTRFS_STRIPE_LEN;
820
821 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
822 if (!device->in_fs_metadata)
823 continue;
824
825 avail_space = device->total_bytes - device->bytes_used;
826
827 /* align with stripe_len */
828 do_div(avail_space, BTRFS_STRIPE_LEN);
829 avail_space *= BTRFS_STRIPE_LEN;
830
831 /*
832 * In order to avoid overwritting the superblock on the drive,
833 * btrfs starts at an offset of at least 1MB when doing chunk
834 * allocation.
835 */
836 skip_space = 1024 * 1024;
837
838 /* user can set the offset in fs_info->alloc_start. */
839 if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
840 device->total_bytes)
841 skip_space = max(fs_info->alloc_start, skip_space);
842
843 /*
844 * btrfs can not use the free space in [0, skip_space - 1],
845 * we must subtract it from the total. In order to implement
846 * it, we account the used space in this range first.
847 */
848 ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
849 &used_space);
850 if (ret) {
851 kfree(devices_info);
852 return ret;
853 }
854
855 /* calc the free space in [0, skip_space - 1] */
856 skip_space -= used_space;
857
858 /*
859 * we can use the free space in [0, skip_space - 1], subtract
860 * it from the total.
861 */
862 if (avail_space && avail_space >= skip_space)
863 avail_space -= skip_space;
864 else
865 avail_space = 0;
866
867 if (avail_space < min_stripe_size)
868 continue;
869
870 devices_info[i].dev = device;
871 devices_info[i].max_avail = avail_space;
872
873 i++;
874 }
875
876 nr_devices = i;
877
878 btrfs_descending_sort_devices(devices_info, nr_devices);
879
880 i = nr_devices - 1;
881 avail_space = 0;
882 while (nr_devices >= min_stripes) {
883 if (devices_info[i].max_avail >= min_stripe_size) {
884 int j;
885 u64 alloc_size;
886
887 avail_space += devices_info[i].max_avail * min_stripes;
888 alloc_size = devices_info[i].max_avail;
889 for (j = i + 1 - min_stripes; j <= i; j++)
890 devices_info[j].max_avail -= alloc_size;
891 }
892 i--;
893 nr_devices--;
894 }
895
896 kfree(devices_info);
897 *free_bytes = avail_space;
898 return 0;
899}
900
780static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 901static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
781{ 902{
782 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 903 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -784,16 +905,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
784 struct list_head *head = &root->fs_info->space_info; 905 struct list_head *head = &root->fs_info->space_info;
785 struct btrfs_space_info *found; 906 struct btrfs_space_info *found;
786 u64 total_used = 0; 907 u64 total_used = 0;
787 u64 total_used_data = 0; 908 u64 total_free_data = 0;
788 int bits = dentry->d_sb->s_blocksize_bits; 909 int bits = dentry->d_sb->s_blocksize_bits;
789 __be32 *fsid = (__be32 *)root->fs_info->fsid; 910 __be32 *fsid = (__be32 *)root->fs_info->fsid;
911 int ret;
790 912
913 /* holding chunk_muext to avoid allocating new chunks */
914 mutex_lock(&root->fs_info->chunk_mutex);
791 rcu_read_lock(); 915 rcu_read_lock();
792 list_for_each_entry_rcu(found, head, list) { 916 list_for_each_entry_rcu(found, head, list) {
793 if (found->flags & BTRFS_BLOCK_GROUP_DATA) 917 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
794 total_used_data += found->disk_used; 918 total_free_data += found->disk_total - found->disk_used;
795 else 919 total_free_data -=
796 total_used_data += found->disk_total; 920 btrfs_account_ro_block_groups_free_space(found);
921 }
922
797 total_used += found->disk_used; 923 total_used += found->disk_used;
798 } 924 }
799 rcu_read_unlock(); 925 rcu_read_unlock();
@@ -801,9 +927,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
801 buf->f_namelen = BTRFS_NAME_LEN; 927 buf->f_namelen = BTRFS_NAME_LEN;
802 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 928 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
803 buf->f_bfree = buf->f_blocks - (total_used >> bits); 929 buf->f_bfree = buf->f_blocks - (total_used >> bits);
804 buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
805 buf->f_bsize = dentry->d_sb->s_blocksize; 930 buf->f_bsize = dentry->d_sb->s_blocksize;
806 buf->f_type = BTRFS_SUPER_MAGIC; 931 buf->f_type = BTRFS_SUPER_MAGIC;
932 buf->f_bavail = total_free_data;
933 ret = btrfs_calc_avail_data_space(root, &total_free_data);
934 if (ret) {
935 mutex_unlock(&root->fs_info->chunk_mutex);
936 return ret;
937 }
938 buf->f_bavail += total_free_data;
939 buf->f_bavail = buf->f_bavail >> bits;
940 mutex_unlock(&root->fs_info->chunk_mutex);
807 941
808 /* We treat it as constant endianness (it doesn't matter _which_) 942 /* We treat it as constant endianness (it doesn't matter _which_)
809 because we want the fsid to come out the same whether mounted 943 because we want the fsid to come out the same whether mounted
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c22784b989b7..0c7f478cf645 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -728,6 +728,90 @@ error:
728 return ret; 728 return ret;
729} 729}
730 730
731/* helper to account the used device space in the range */
732int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
733 u64 end, u64 *length)
734{
735 struct btrfs_key key;
736 struct btrfs_root *root = device->dev_root;
737 struct btrfs_dev_extent *dev_extent;
738 struct btrfs_path *path;
739 u64 extent_end;
740 int ret;
741 int slot;
742 struct extent_buffer *l;
743
744 *length = 0;
745
746 if (start >= device->total_bytes)
747 return 0;
748
749 path = btrfs_alloc_path();
750 if (!path)
751 return -ENOMEM;
752 path->reada = 2;
753
754 key.objectid = device->devid;
755 key.offset = start;
756 key.type = BTRFS_DEV_EXTENT_KEY;
757
758 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
759 if (ret < 0)
760 goto out;
761 if (ret > 0) {
762 ret = btrfs_previous_item(root, path, key.objectid, key.type);
763 if (ret < 0)
764 goto out;
765 }
766
767 while (1) {
768 l = path->nodes[0];
769 slot = path->slots[0];
770 if (slot >= btrfs_header_nritems(l)) {
771 ret = btrfs_next_leaf(root, path);
772 if (ret == 0)
773 continue;
774 if (ret < 0)
775 goto out;
776
777 break;
778 }
779 btrfs_item_key_to_cpu(l, &key, slot);
780
781 if (key.objectid < device->devid)
782 goto next;
783
784 if (key.objectid > device->devid)
785 break;
786
787 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
788 goto next;
789
790 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
791 extent_end = key.offset + btrfs_dev_extent_length(l,
792 dev_extent);
793 if (key.offset <= start && extent_end > end) {
794 *length = end - start + 1;
795 break;
796 } else if (key.offset <= start && extent_end > start)
797 *length += extent_end - start;
798 else if (key.offset > start && extent_end <= end)
799 *length += extent_end - key.offset;
800 else if (key.offset > start && key.offset <= end) {
801 *length += end - key.offset + 1;
802 break;
803 } else if (key.offset > end)
804 break;
805
806next:
807 path->slots[0]++;
808 }
809 ret = 0;
810out:
811 btrfs_free_path(path);
812 return ret;
813}
814
731/* 815/*
732 * find_free_dev_extent - find free space in the specified device 816 * find_free_dev_extent - find free space in the specified device
733 * @trans: transaction handler 817 * @trans: transaction handler
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a5cfedf393f9..7af6144a7954 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -161,6 +161,9 @@ static inline void btrfs_descending_sort_devices(
161 btrfs_cmp_device_free_bytes, NULL); 161 btrfs_cmp_device_free_bytes, NULL);
162} 162}
163 163
164int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
165 u64 end, u64 *length);
166
164#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ 167#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
165 (sizeof(struct btrfs_bio_stripe) * (n))) 168 (sizeof(struct btrfs_bio_stripe) * (n)))
166 169