diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-17 17:43:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-17 17:43:43 -0500 |
commit | eee2a817df7c5a6e569f353f8be78cc1b3604bb6 (patch) | |
tree | f721beb4712c732702d3383d3c6a52da8b5bbb20 /fs/btrfs/super.c | |
parent | 83896fb5e51594281720d145164f866ba769abd5 (diff) | |
parent | acce952b0263825da32cf10489413dec78053347 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (25 commits)
Btrfs: forced readonly mounts on errors
btrfs: Require CAP_SYS_ADMIN for filesystem rebalance
Btrfs: don't warn if we get ENOSPC in btrfs_block_rsv_check
btrfs: Fix memory leak in btrfs_read_fs_root_no_radix()
btrfs: check NULL or not
btrfs: Don't pass NULL ptr to func that may deref it.
btrfs: mount failure return value fix
btrfs: Mem leak in btrfs_get_acl()
btrfs: fix wrong free space information of btrfs
btrfs: make the chunk allocator utilize the devices better
btrfs: restructure find_free_dev_extent()
btrfs: fix wrong calculation of stripe size
btrfs: try to reclaim some space when chunk allocation fails
btrfs: fix wrong data space statistics
fs/btrfs: Fix build of ctree
Btrfs: fix off by one while setting block groups readonly
Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls
Btrfs: Add readonly snapshots support
Btrfs: Refactor btrfs_ioctl_snap_create()
btrfs: Extract duplicate decompress code
...
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r-- | fs/btrfs/super.c | 281 |
1 files changed, 263 insertions, 18 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 22acdaa78ce1..b2130c46fdb5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -54,6 +54,90 @@ | |||
54 | 54 | ||
55 | static const struct super_operations btrfs_super_ops; | 55 | static const struct super_operations btrfs_super_ops; |
56 | 56 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
58 | char nbuf[16]) | ||
59 | { | ||
60 | char *errstr = NULL; | ||
61 | |||
62 | switch (errno) { | ||
63 | case -EIO: | ||
64 | errstr = "IO failure"; | ||
65 | break; | ||
66 | case -ENOMEM: | ||
67 | errstr = "Out of memory"; | ||
68 | break; | ||
69 | case -EROFS: | ||
70 | errstr = "Readonly filesystem"; | ||
71 | break; | ||
72 | default: | ||
73 | if (nbuf) { | ||
74 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
75 | errstr = nbuf; | ||
76 | } | ||
77 | break; | ||
78 | } | ||
79 | |||
80 | return errstr; | ||
81 | } | ||
82 | |||
83 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
84 | { | ||
85 | /* | ||
86 | * today we only save the error info into ram. Long term we'll | ||
87 | * also send it down to the disk | ||
88 | */ | ||
89 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
90 | } | ||
91 | |||
92 | /* NOTE: | ||
93 | * We move write_super stuff at umount in order to avoid deadlock | ||
94 | * for umount hold all lock. | ||
95 | */ | ||
96 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
97 | { | ||
98 | __save_error_info(fs_info); | ||
99 | } | ||
100 | |||
101 | /* btrfs handle error by forcing the filesystem readonly */ | ||
102 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
103 | { | ||
104 | struct super_block *sb = fs_info->sb; | ||
105 | |||
106 | if (sb->s_flags & MS_RDONLY) | ||
107 | return; | ||
108 | |||
109 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
110 | sb->s_flags |= MS_RDONLY; | ||
111 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * __btrfs_std_error decodes expected errors from the caller and | ||
117 | * invokes the approciate error response. | ||
118 | */ | ||
119 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
120 | unsigned int line, int errno) | ||
121 | { | ||
122 | struct super_block *sb = fs_info->sb; | ||
123 | char nbuf[16]; | ||
124 | const char *errstr; | ||
125 | |||
126 | /* | ||
127 | * Special case: if the error is EROFS, and we're already | ||
128 | * under MS_RDONLY, then it is safe here. | ||
129 | */ | ||
130 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
131 | return; | ||
132 | |||
133 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
134 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
135 | sb->s_id, function, line, errstr); | ||
136 | save_error_info(fs_info); | ||
137 | |||
138 | btrfs_handle_error(fs_info); | ||
139 | } | ||
140 | |||
57 | static void btrfs_put_super(struct super_block *sb) | 141 | static void btrfs_put_super(struct super_block *sb) |
58 | { | 142 | { |
59 | struct btrfs_root *root = btrfs_sb(sb); | 143 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -69,9 +153,9 @@ enum { | |||
69 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 153 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
70 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 154 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
71 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 155 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
72 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
73 | Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, | 157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
74 | Opt_user_subvol_rm_allowed, | 158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, |
75 | }; | 159 | }; |
76 | 160 | ||
77 | static match_table_t tokens = { | 161 | static match_table_t tokens = { |
@@ -86,7 +170,9 @@ static match_table_t tokens = { | |||
86 | {Opt_alloc_start, "alloc_start=%s"}, | 170 | {Opt_alloc_start, "alloc_start=%s"}, |
87 | {Opt_thread_pool, "thread_pool=%d"}, | 171 | {Opt_thread_pool, "thread_pool=%d"}, |
88 | {Opt_compress, "compress"}, | 172 | {Opt_compress, "compress"}, |
173 | {Opt_compress_type, "compress=%s"}, | ||
89 | {Opt_compress_force, "compress-force"}, | 174 | {Opt_compress_force, "compress-force"}, |
175 | {Opt_compress_force_type, "compress-force=%s"}, | ||
90 | {Opt_ssd, "ssd"}, | 176 | {Opt_ssd, "ssd"}, |
91 | {Opt_ssd_spread, "ssd_spread"}, | 177 | {Opt_ssd_spread, "ssd_spread"}, |
92 | {Opt_nossd, "nossd"}, | 178 | {Opt_nossd, "nossd"}, |
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
112 | char *p, *num, *orig; | 198 | char *p, *num, *orig; |
113 | int intarg; | 199 | int intarg; |
114 | int ret = 0; | 200 | int ret = 0; |
201 | char *compress_type; | ||
202 | bool compress_force = false; | ||
115 | 203 | ||
116 | if (!options) | 204 | if (!options) |
117 | return 0; | 205 | return 0; |
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
154 | btrfs_set_opt(info->mount_opt, NODATACOW); | 242 | btrfs_set_opt(info->mount_opt, NODATACOW); |
155 | btrfs_set_opt(info->mount_opt, NODATASUM); | 243 | btrfs_set_opt(info->mount_opt, NODATASUM); |
156 | break; | 244 | break; |
157 | case Opt_compress: | ||
158 | printk(KERN_INFO "btrfs: use compression\n"); | ||
159 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
160 | break; | ||
161 | case Opt_compress_force: | 245 | case Opt_compress_force: |
162 | printk(KERN_INFO "btrfs: forcing compression\n"); | 246 | case Opt_compress_force_type: |
163 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 247 | compress_force = true; |
248 | case Opt_compress: | ||
249 | case Opt_compress_type: | ||
250 | if (token == Opt_compress || | ||
251 | token == Opt_compress_force || | ||
252 | strcmp(args[0].from, "zlib") == 0) { | ||
253 | compress_type = "zlib"; | ||
254 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
255 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
256 | compress_type = "lzo"; | ||
257 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
258 | } else { | ||
259 | ret = -EINVAL; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
164 | btrfs_set_opt(info->mount_opt, COMPRESS); | 263 | btrfs_set_opt(info->mount_opt, COMPRESS); |
264 | if (compress_force) { | ||
265 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
266 | pr_info("btrfs: force %s compression\n", | ||
267 | compress_type); | ||
268 | } else | ||
269 | pr_info("btrfs: use %s compression\n", | ||
270 | compress_type); | ||
165 | break; | 271 | break; |
166 | case Opt_ssd: | 272 | case Opt_ssd: |
167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 273 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
753 | return 0; | 859 | return 0; |
754 | } | 860 | } |
755 | 861 | ||
862 | /* | ||
863 | * The helper to calc the free space on the devices that can be used to store | ||
864 | * file data. | ||
865 | */ | ||
866 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
867 | { | ||
868 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
869 | struct btrfs_device_info *devices_info; | ||
870 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
871 | struct btrfs_device *device; | ||
872 | u64 skip_space; | ||
873 | u64 type; | ||
874 | u64 avail_space; | ||
875 | u64 used_space; | ||
876 | u64 min_stripe_size; | ||
877 | int min_stripes = 1; | ||
878 | int i = 0, nr_devices; | ||
879 | int ret; | ||
880 | |||
881 | nr_devices = fs_info->fs_devices->rw_devices; | ||
882 | BUG_ON(!nr_devices); | ||
883 | |||
884 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
885 | GFP_NOFS); | ||
886 | if (!devices_info) | ||
887 | return -ENOMEM; | ||
888 | |||
889 | /* calc min stripe number for data space alloction */ | ||
890 | type = btrfs_get_alloc_profile(root, 1); | ||
891 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
892 | min_stripes = 2; | ||
893 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
894 | min_stripes = 2; | ||
895 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
896 | min_stripes = 4; | ||
897 | |||
898 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
899 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
900 | else | ||
901 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
902 | |||
903 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
904 | if (!device->in_fs_metadata) | ||
905 | continue; | ||
906 | |||
907 | avail_space = device->total_bytes - device->bytes_used; | ||
908 | |||
909 | /* align with stripe_len */ | ||
910 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
911 | avail_space *= BTRFS_STRIPE_LEN; | ||
912 | |||
913 | /* | ||
914 | * In order to avoid overwritting the superblock on the drive, | ||
915 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
916 | * allocation. | ||
917 | */ | ||
918 | skip_space = 1024 * 1024; | ||
919 | |||
920 | /* user can set the offset in fs_info->alloc_start. */ | ||
921 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
922 | device->total_bytes) | ||
923 | skip_space = max(fs_info->alloc_start, skip_space); | ||
924 | |||
925 | /* | ||
926 | * btrfs can not use the free space in [0, skip_space - 1], | ||
927 | * we must subtract it from the total. In order to implement | ||
928 | * it, we account the used space in this range first. | ||
929 | */ | ||
930 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
931 | &used_space); | ||
932 | if (ret) { | ||
933 | kfree(devices_info); | ||
934 | return ret; | ||
935 | } | ||
936 | |||
937 | /* calc the free space in [0, skip_space - 1] */ | ||
938 | skip_space -= used_space; | ||
939 | |||
940 | /* | ||
941 | * we can use the free space in [0, skip_space - 1], subtract | ||
942 | * it from the total. | ||
943 | */ | ||
944 | if (avail_space && avail_space >= skip_space) | ||
945 | avail_space -= skip_space; | ||
946 | else | ||
947 | avail_space = 0; | ||
948 | |||
949 | if (avail_space < min_stripe_size) | ||
950 | continue; | ||
951 | |||
952 | devices_info[i].dev = device; | ||
953 | devices_info[i].max_avail = avail_space; | ||
954 | |||
955 | i++; | ||
956 | } | ||
957 | |||
958 | nr_devices = i; | ||
959 | |||
960 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
961 | |||
962 | i = nr_devices - 1; | ||
963 | avail_space = 0; | ||
964 | while (nr_devices >= min_stripes) { | ||
965 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
966 | int j; | ||
967 | u64 alloc_size; | ||
968 | |||
969 | avail_space += devices_info[i].max_avail * min_stripes; | ||
970 | alloc_size = devices_info[i].max_avail; | ||
971 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
972 | devices_info[j].max_avail -= alloc_size; | ||
973 | } | ||
974 | i--; | ||
975 | nr_devices--; | ||
976 | } | ||
977 | |||
978 | kfree(devices_info); | ||
979 | *free_bytes = avail_space; | ||
980 | return 0; | ||
981 | } | ||
982 | |||
756 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 983 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
757 | { | 984 | { |
758 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 985 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
@@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
760 | struct list_head *head = &root->fs_info->space_info; | 987 | struct list_head *head = &root->fs_info->space_info; |
761 | struct btrfs_space_info *found; | 988 | struct btrfs_space_info *found; |
762 | u64 total_used = 0; | 989 | u64 total_used = 0; |
763 | u64 total_used_data = 0; | 990 | u64 total_free_data = 0; |
764 | int bits = dentry->d_sb->s_blocksize_bits; | 991 | int bits = dentry->d_sb->s_blocksize_bits; |
765 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 992 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
993 | int ret; | ||
766 | 994 | ||
995 | /* holding chunk_muext to avoid allocating new chunks */ | ||
996 | mutex_lock(&root->fs_info->chunk_mutex); | ||
767 | rcu_read_lock(); | 997 | rcu_read_lock(); |
768 | list_for_each_entry_rcu(found, head, list) { | 998 | list_for_each_entry_rcu(found, head, list) { |
769 | if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | | 999 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
770 | BTRFS_BLOCK_GROUP_SYSTEM)) | 1000 | total_free_data += found->disk_total - found->disk_used; |
771 | total_used_data += found->disk_total; | 1001 | total_free_data -= |
772 | else | 1002 | btrfs_account_ro_block_groups_free_space(found); |
773 | total_used_data += found->disk_used; | 1003 | } |
1004 | |||
774 | total_used += found->disk_used; | 1005 | total_used += found->disk_used; |
775 | } | 1006 | } |
776 | rcu_read_unlock(); | 1007 | rcu_read_unlock(); |
@@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
778 | buf->f_namelen = BTRFS_NAME_LEN; | 1009 | buf->f_namelen = BTRFS_NAME_LEN; |
779 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1010 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
780 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1011 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
781 | buf->f_bavail = buf->f_blocks - (total_used_data >> bits); | ||
782 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1012 | buf->f_bsize = dentry->d_sb->s_blocksize; |
783 | buf->f_type = BTRFS_SUPER_MAGIC; | 1013 | buf->f_type = BTRFS_SUPER_MAGIC; |
1014 | buf->f_bavail = total_free_data; | ||
1015 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1016 | if (ret) { | ||
1017 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1018 | return ret; | ||
1019 | } | ||
1020 | buf->f_bavail += total_free_data; | ||
1021 | buf->f_bavail = buf->f_bavail >> bits; | ||
1022 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
784 | 1023 | ||
785 | /* We treat it as constant endianness (it doesn't matter _which_) | 1024 | /* We treat it as constant endianness (it doesn't matter _which_) |
786 | because we want the fsid to come out the same whether mounted | 1025 | because we want the fsid to come out the same whether mounted |
@@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void) | |||
897 | if (err) | 1136 | if (err) |
898 | return err; | 1137 | return err; |
899 | 1138 | ||
900 | err = btrfs_init_cachep(); | 1139 | err = btrfs_init_compress(); |
901 | if (err) | 1140 | if (err) |
902 | goto free_sysfs; | 1141 | goto free_sysfs; |
903 | 1142 | ||
1143 | err = btrfs_init_cachep(); | ||
1144 | if (err) | ||
1145 | goto free_compress; | ||
1146 | |||
904 | err = extent_io_init(); | 1147 | err = extent_io_init(); |
905 | if (err) | 1148 | if (err) |
906 | goto free_cachep; | 1149 | goto free_cachep; |
@@ -928,6 +1171,8 @@ free_extent_io: | |||
928 | extent_io_exit(); | 1171 | extent_io_exit(); |
929 | free_cachep: | 1172 | free_cachep: |
930 | btrfs_destroy_cachep(); | 1173 | btrfs_destroy_cachep(); |
1174 | free_compress: | ||
1175 | btrfs_exit_compress(); | ||
931 | free_sysfs: | 1176 | free_sysfs: |
932 | btrfs_exit_sysfs(); | 1177 | btrfs_exit_sysfs(); |
933 | return err; | 1178 | return err; |
@@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void) | |||
942 | unregister_filesystem(&btrfs_fs_type); | 1187 | unregister_filesystem(&btrfs_fs_type); |
943 | btrfs_exit_sysfs(); | 1188 | btrfs_exit_sysfs(); |
944 | btrfs_cleanup_fs_uuids(); | 1189 | btrfs_cleanup_fs_uuids(); |
945 | btrfs_zlib_exit(); | 1190 | btrfs_exit_compress(); |
946 | } | 1191 | } |
947 | 1192 | ||
948 | module_init(init_btrfs_fs) | 1193 | module_init(init_btrfs_fs) |