aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/super.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2011-01-05 05:07:31 -0500
committerChris Mason <chris.mason@oracle.com>2011-01-16 11:30:19 -0500
commit6d07bcec969af335d4e35b3921131b7929bd634e (patch)
tree2d1e0bb5f69bdf9dafa2862b6cad965184d67c84 /fs/btrfs/super.c
parentb2117a39fa96cf4814e7cab8c11494149ba6f29d (diff)
btrfs: fix wrong free space information of btrfs
When we store data by raid profile in btrfs with two or more different size disks, df command shows there is some free space in the filesystem, but the user can not write any data in fact, df command shows the wrong free space information of btrfs. # mkfs.btrfs -d raid1 /dev/sda9 /dev/sda10 # btrfs-show Label: none uuid: a95cd49e-6e33-45b8-8741-a36153ce4b64 Total devices 2 FS bytes used 28.00KB devid 1 size 5.01GB used 2.03GB path /dev/sda9 devid 2 size 10.00GB used 2.01GB path /dev/sda10 # btrfs device scan /dev/sda9 /dev/sda10 # mount /dev/sda9 /mnt # dd if=/dev/zero of=tmpfile0 bs=4K count=9999999999 (fill the filesystem) # sync # df -TH Filesystem Type Size Used Avail Use% Mounted on /dev/sda9 btrfs 17G 8.6G 5.4G 62% /mnt # btrfs-show Label: none uuid: a95cd49e-6e33-45b8-8741-a36153ce4b64 Total devices 2 FS bytes used 3.99GB devid 1 size 5.01GB used 5.01GB path /dev/sda9 devid 2 size 10.00GB used 4.99GB path /dev/sda10 It is because btrfs cannot allocate chunks when one of the pairing disks has no space, the free space on the other disks can not be used for ever, and should be subtracted from the total space, but btrfs doesn't subtract this space from the total. It is strange to the user. This patch fixes it by calcing the free space that can be used to allocate chunks. Implementation: 1. get all the devices free space, and align them by stripe length. 2. sort the devices by the free space. 3. check the free space of the devices, 3.1. if it is not zero, and then check the number of the devices that has more free space than this device, if the number of the devices is beyond the min stripe number, the free space can be used, and add into total free space. if the number of the devices is below the min stripe number, we can not use the free space, the check ends. 3.2. if the free space is zero, check the next devices, goto 3.1 This implementation is just likely fake chunk allocation. After appling this patch, df can show correct space information: # df -TH Filesystem Type Size Used Avail Use% Mounted on /dev/sda9 btrfs 17G 8.6G 0 100% /mnt Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r--fs/btrfs/super.c146
1 files changed, 140 insertions, 6 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index caa5bcc62f16..2963376e77f4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -777,6 +777,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
777 return 0; 777 return 0;
778} 778}
779 779
780/*
781 * The helper to calc the free space on the devices that can be used to store
782 * file data.
783 */
784static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
785{
786 struct btrfs_fs_info *fs_info = root->fs_info;
787 struct btrfs_device_info *devices_info;
788 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
789 struct btrfs_device *device;
790 u64 skip_space;
791 u64 type;
792 u64 avail_space;
793 u64 used_space;
794 u64 min_stripe_size;
795 int min_stripes = 1;
796 int i = 0, nr_devices;
797 int ret;
798
799 nr_devices = fs_info->fs_devices->rw_devices;
800 BUG_ON(!nr_devices);
801
802 devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
803 GFP_NOFS);
804 if (!devices_info)
805 return -ENOMEM;
806
807 /* calc min stripe number for data space alloction */
808 type = btrfs_get_alloc_profile(root, 1);
809 if (type & BTRFS_BLOCK_GROUP_RAID0)
810 min_stripes = 2;
811 else if (type & BTRFS_BLOCK_GROUP_RAID1)
812 min_stripes = 2;
813 else if (type & BTRFS_BLOCK_GROUP_RAID10)
814 min_stripes = 4;
815
816 if (type & BTRFS_BLOCK_GROUP_DUP)
817 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
818 else
819 min_stripe_size = BTRFS_STRIPE_LEN;
820
821 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
822 if (!device->in_fs_metadata)
823 continue;
824
825 avail_space = device->total_bytes - device->bytes_used;
826
827 /* align with stripe_len */
828 do_div(avail_space, BTRFS_STRIPE_LEN);
829 avail_space *= BTRFS_STRIPE_LEN;
830
831 /*
832 * In order to avoid overwritting the superblock on the drive,
833 * btrfs starts at an offset of at least 1MB when doing chunk
834 * allocation.
835 */
836 skip_space = 1024 * 1024;
837
838 /* user can set the offset in fs_info->alloc_start. */
839 if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
840 device->total_bytes)
841 skip_space = max(fs_info->alloc_start, skip_space);
842
843 /*
844 * btrfs can not use the free space in [0, skip_space - 1],
845 * we must subtract it from the total. In order to implement
846 * it, we account the used space in this range first.
847 */
848 ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
849 &used_space);
850 if (ret) {
851 kfree(devices_info);
852 return ret;
853 }
854
855 /* calc the free space in [0, skip_space - 1] */
856 skip_space -= used_space;
857
858 /*
859 * we can use the free space in [0, skip_space - 1], subtract
860 * it from the total.
861 */
862 if (avail_space && avail_space >= skip_space)
863 avail_space -= skip_space;
864 else
865 avail_space = 0;
866
867 if (avail_space < min_stripe_size)
868 continue;
869
870 devices_info[i].dev = device;
871 devices_info[i].max_avail = avail_space;
872
873 i++;
874 }
875
876 nr_devices = i;
877
878 btrfs_descending_sort_devices(devices_info, nr_devices);
879
880 i = nr_devices - 1;
881 avail_space = 0;
882 while (nr_devices >= min_stripes) {
883 if (devices_info[i].max_avail >= min_stripe_size) {
884 int j;
885 u64 alloc_size;
886
887 avail_space += devices_info[i].max_avail * min_stripes;
888 alloc_size = devices_info[i].max_avail;
889 for (j = i + 1 - min_stripes; j <= i; j++)
890 devices_info[j].max_avail -= alloc_size;
891 }
892 i--;
893 nr_devices--;
894 }
895
896 kfree(devices_info);
897 *free_bytes = avail_space;
898 return 0;
899}
900
780static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 901static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
781{ 902{
782 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 903 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -784,16 +905,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
784 struct list_head *head = &root->fs_info->space_info; 905 struct list_head *head = &root->fs_info->space_info;
785 struct btrfs_space_info *found; 906 struct btrfs_space_info *found;
786 u64 total_used = 0; 907 u64 total_used = 0;
787 u64 total_used_data = 0; 908 u64 total_free_data = 0;
788 int bits = dentry->d_sb->s_blocksize_bits; 909 int bits = dentry->d_sb->s_blocksize_bits;
789 __be32 *fsid = (__be32 *)root->fs_info->fsid; 910 __be32 *fsid = (__be32 *)root->fs_info->fsid;
911 int ret;
790 912
913 /* holding chunk_muext to avoid allocating new chunks */
914 mutex_lock(&root->fs_info->chunk_mutex);
791 rcu_read_lock(); 915 rcu_read_lock();
792 list_for_each_entry_rcu(found, head, list) { 916 list_for_each_entry_rcu(found, head, list) {
793 if (found->flags & BTRFS_BLOCK_GROUP_DATA) 917 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
794 total_used_data += found->disk_used; 918 total_free_data += found->disk_total - found->disk_used;
795 else 919 total_free_data -=
796 total_used_data += found->disk_total; 920 btrfs_account_ro_block_groups_free_space(found);
921 }
922
797 total_used += found->disk_used; 923 total_used += found->disk_used;
798 } 924 }
799 rcu_read_unlock(); 925 rcu_read_unlock();
@@ -801,9 +927,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
801 buf->f_namelen = BTRFS_NAME_LEN; 927 buf->f_namelen = BTRFS_NAME_LEN;
802 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 928 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
803 buf->f_bfree = buf->f_blocks - (total_used >> bits); 929 buf->f_bfree = buf->f_blocks - (total_used >> bits);
804 buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
805 buf->f_bsize = dentry->d_sb->s_blocksize; 930 buf->f_bsize = dentry->d_sb->s_blocksize;
806 buf->f_type = BTRFS_SUPER_MAGIC; 931 buf->f_type = BTRFS_SUPER_MAGIC;
932 buf->f_bavail = total_free_data;
933 ret = btrfs_calc_avail_data_space(root, &total_free_data);
934 if (ret) {
935 mutex_unlock(&root->fs_info->chunk_mutex);
936 return ret;
937 }
938 buf->f_bavail += total_free_data;
939 buf->f_bavail = buf->f_bavail >> bits;
940 mutex_unlock(&root->fs_info->chunk_mutex);
807 941
808 /* We treat it as constant endianness (it doesn't matter _which_) 942 /* We treat it as constant endianness (it doesn't matter _which_)
809 because we want the fsid to come out the same whether mounted 943 because we want the fsid to come out the same whether mounted