aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r--fs/btrfs/super.c599
1 files changed, 498 insertions, 101 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8a1ea6e64575..b2130c46fdb5 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -38,6 +38,7 @@
38#include <linux/namei.h> 38#include <linux/namei.h>
39#include <linux/miscdevice.h> 39#include <linux/miscdevice.h>
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -53,6 +54,90 @@
53 54
54static const struct super_operations btrfs_super_ops; 55static const struct super_operations btrfs_super_ops;
55 56
57static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
58 char nbuf[16])
59{
60 char *errstr = NULL;
61
62 switch (errno) {
63 case -EIO:
64 errstr = "IO failure";
65 break;
66 case -ENOMEM:
67 errstr = "Out of memory";
68 break;
69 case -EROFS:
70 errstr = "Readonly filesystem";
71 break;
72 default:
73 if (nbuf) {
74 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
75 errstr = nbuf;
76 }
77 break;
78 }
79
80 return errstr;
81}
82
83static void __save_error_info(struct btrfs_fs_info *fs_info)
84{
85 /*
86 * today we only save the error info into ram. Long term we'll
87 * also send it down to the disk
88 */
89 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
90}
91
92/* NOTE:
93 * We move write_super stuff at umount in order to avoid deadlock
94 * for umount hold all lock.
95 */
96static void save_error_info(struct btrfs_fs_info *fs_info)
97{
98 __save_error_info(fs_info);
99}
100
101/* btrfs handle error by forcing the filesystem readonly */
102static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
103{
104 struct super_block *sb = fs_info->sb;
105
106 if (sb->s_flags & MS_RDONLY)
107 return;
108
109 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
110 sb->s_flags |= MS_RDONLY;
111 printk(KERN_INFO "btrfs is forced readonly\n");
112 }
113}
114
115/*
116 * __btrfs_std_error decodes expected errors from the caller and
117 * invokes the approciate error response.
118 */
119void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
120 unsigned int line, int errno)
121{
122 struct super_block *sb = fs_info->sb;
123 char nbuf[16];
124 const char *errstr;
125
126 /*
127 * Special case: if the error is EROFS, and we're already
128 * under MS_RDONLY, then it is safe here.
129 */
130 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
131 return;
132
133 errstr = btrfs_decode_error(fs_info, errno, nbuf);
134 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
135 sb->s_id, function, line, errstr);
136 save_error_info(fs_info);
137
138 btrfs_handle_error(fs_info);
139}
140
56static void btrfs_put_super(struct super_block *sb) 141static void btrfs_put_super(struct super_block *sb)
57{ 142{
58 struct btrfs_root *root = btrfs_sb(sb); 143 struct btrfs_root *root = btrfs_sb(sb);
@@ -60,30 +145,34 @@ static void btrfs_put_super(struct super_block *sb)
60 145
61 ret = close_ctree(root); 146 ret = close_ctree(root);
62 sb->s_fs_info = NULL; 147 sb->s_fs_info = NULL;
148
149 (void)ret; /* FIXME: need to fix VFS to return error? */
63} 150}
64 151
65enum { 152enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 153 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 154 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
69 Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
70 Opt_flushoncommit, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
71 Opt_discard, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
72}; 159};
73 160
74static match_table_t tokens = { 161static match_table_t tokens = {
75 {Opt_degraded, "degraded"}, 162 {Opt_degraded, "degraded"},
76 {Opt_subvol, "subvol=%s"}, 163 {Opt_subvol, "subvol=%s"},
164 {Opt_subvolid, "subvolid=%d"},
77 {Opt_device, "device=%s"}, 165 {Opt_device, "device=%s"},
78 {Opt_nodatasum, "nodatasum"}, 166 {Opt_nodatasum, "nodatasum"},
79 {Opt_nodatacow, "nodatacow"}, 167 {Opt_nodatacow, "nodatacow"},
80 {Opt_nobarrier, "nobarrier"}, 168 {Opt_nobarrier, "nobarrier"},
81 {Opt_max_extent, "max_extent=%s"},
82 {Opt_max_inline, "max_inline=%s"}, 169 {Opt_max_inline, "max_inline=%s"},
83 {Opt_alloc_start, "alloc_start=%s"}, 170 {Opt_alloc_start, "alloc_start=%s"},
84 {Opt_thread_pool, "thread_pool=%d"}, 171 {Opt_thread_pool, "thread_pool=%d"},
85 {Opt_compress, "compress"}, 172 {Opt_compress, "compress"},
173 {Opt_compress_type, "compress=%s"},
86 {Opt_compress_force, "compress-force"}, 174 {Opt_compress_force, "compress-force"},
175 {Opt_compress_force_type, "compress-force=%s"},
87 {Opt_ssd, "ssd"}, 176 {Opt_ssd, "ssd"},
88 {Opt_ssd_spread, "ssd_spread"}, 177 {Opt_ssd_spread, "ssd_spread"},
89 {Opt_nossd, "nossd"}, 178 {Opt_nossd, "nossd"},
@@ -92,34 +181,12 @@ static match_table_t tokens = {
92 {Opt_flushoncommit, "flushoncommit"}, 181 {Opt_flushoncommit, "flushoncommit"},
93 {Opt_ratio, "metadata_ratio=%d"}, 182 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"}, 183 {Opt_discard, "discard"},
184 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
95 {Opt_err, NULL}, 187 {Opt_err, NULL},
96}; 188};
97 189
98u64 btrfs_parse_size(char *str)
99{
100 u64 res;
101 int mult = 1;
102 char *end;
103 char last;
104
105 res = simple_strtoul(str, &end, 10);
106
107 last = end[0];
108 if (isalpha(last)) {
109 last = tolower(last);
110 switch (last) {
111 case 'g':
112 mult *= 1024;
113 case 'm':
114 mult *= 1024;
115 case 'k':
116 mult *= 1024;
117 }
118 res = res * mult;
119 }
120 return res;
121}
122
123/* 190/*
124 * Regular mount options parser. Everything that is needed only when 191 * Regular mount options parser. Everything that is needed only when
125 * reading in a new superblock is parsed here. 192 * reading in a new superblock is parsed here.
@@ -128,9 +195,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
128{ 195{
129 struct btrfs_fs_info *info = root->fs_info; 196 struct btrfs_fs_info *info = root->fs_info;
130 substring_t args[MAX_OPT_ARGS]; 197 substring_t args[MAX_OPT_ARGS];
131 char *p, *num; 198 char *p, *num, *orig;
132 int intarg; 199 int intarg;
133 int ret = 0; 200 int ret = 0;
201 char *compress_type;
202 bool compress_force = false;
134 203
135 if (!options) 204 if (!options)
136 return 0; 205 return 0;
@@ -143,6 +212,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
143 if (!options) 212 if (!options)
144 return -ENOMEM; 213 return -ENOMEM;
145 214
215 orig = options;
146 216
147 while ((p = strsep(&options, ",")) != NULL) { 217 while ((p = strsep(&options, ",")) != NULL) {
148 int token; 218 int token;
@@ -156,6 +226,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
156 btrfs_set_opt(info->mount_opt, DEGRADED); 226 btrfs_set_opt(info->mount_opt, DEGRADED);
157 break; 227 break;
158 case Opt_subvol: 228 case Opt_subvol:
229 case Opt_subvolid:
159 case Opt_device: 230 case Opt_device:
160 /* 231 /*
161 * These are parsed by btrfs_parse_early_options 232 * These are parsed by btrfs_parse_early_options
@@ -171,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
171 btrfs_set_opt(info->mount_opt, NODATACOW); 242 btrfs_set_opt(info->mount_opt, NODATACOW);
172 btrfs_set_opt(info->mount_opt, NODATASUM); 243 btrfs_set_opt(info->mount_opt, NODATASUM);
173 break; 244 break;
174 case Opt_compress:
175 printk(KERN_INFO "btrfs: use compression\n");
176 btrfs_set_opt(info->mount_opt, COMPRESS);
177 break;
178 case Opt_compress_force: 245 case Opt_compress_force:
179 printk(KERN_INFO "btrfs: forcing compression\n"); 246 case Opt_compress_force_type:
180 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 247 compress_force = true;
248 case Opt_compress:
249 case Opt_compress_type:
250 if (token == Opt_compress ||
251 token == Opt_compress_force ||
252 strcmp(args[0].from, "zlib") == 0) {
253 compress_type = "zlib";
254 info->compress_type = BTRFS_COMPRESS_ZLIB;
255 } else if (strcmp(args[0].from, "lzo") == 0) {
256 compress_type = "lzo";
257 info->compress_type = BTRFS_COMPRESS_LZO;
258 } else {
259 ret = -EINVAL;
260 goto out;
261 }
262
181 btrfs_set_opt(info->mount_opt, COMPRESS); 263 btrfs_set_opt(info->mount_opt, COMPRESS);
264 if (compress_force) {
265 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
266 pr_info("btrfs: force %s compression\n",
267 compress_type);
268 } else
269 pr_info("btrfs: use %s compression\n",
270 compress_type);
182 break; 271 break;
183 case Opt_ssd: 272 case Opt_ssd:
184 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 273 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -210,22 +299,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
210 info->thread_pool_size); 299 info->thread_pool_size);
211 } 300 }
212 break; 301 break;
213 case Opt_max_extent:
214 num = match_strdup(&args[0]);
215 if (num) {
216 info->max_extent = btrfs_parse_size(num);
217 kfree(num);
218
219 info->max_extent = max_t(u64,
220 info->max_extent, root->sectorsize);
221 printk(KERN_INFO "btrfs: max_extent at %llu\n",
222 (unsigned long long)info->max_extent);
223 }
224 break;
225 case Opt_max_inline: 302 case Opt_max_inline:
226 num = match_strdup(&args[0]); 303 num = match_strdup(&args[0]);
227 if (num) { 304 if (num) {
228 info->max_inline = btrfs_parse_size(num); 305 info->max_inline = memparse(num, NULL);
229 kfree(num); 306 kfree(num);
230 307
231 if (info->max_inline) { 308 if (info->max_inline) {
@@ -240,7 +317,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
240 case Opt_alloc_start: 317 case Opt_alloc_start:
241 num = match_strdup(&args[0]); 318 num = match_strdup(&args[0]);
242 if (num) { 319 if (num) {
243 info->alloc_start = btrfs_parse_size(num); 320 info->alloc_start = memparse(num, NULL);
244 kfree(num); 321 kfree(num);
245 printk(KERN_INFO 322 printk(KERN_INFO
246 "btrfs: allocations start at %llu\n", 323 "btrfs: allocations start at %llu\n",
@@ -270,6 +347,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
270 case Opt_discard: 347 case Opt_discard:
271 btrfs_set_opt(info->mount_opt, DISCARD); 348 btrfs_set_opt(info->mount_opt, DISCARD);
272 break; 349 break;
350 case Opt_space_cache:
351 printk(KERN_INFO "btrfs: enabling disk space caching\n");
352 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
353 break;
354 case Opt_clear_cache:
355 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
356 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
357 break;
358 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break;
273 case Opt_err: 361 case Opt_err:
274 printk(KERN_INFO "btrfs: unrecognized mount option " 362 printk(KERN_INFO "btrfs: unrecognized mount option "
275 "'%s'\n", p); 363 "'%s'\n", p);
@@ -280,7 +368,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
280 } 368 }
281 } 369 }
282out: 370out:
283 kfree(options); 371 kfree(orig);
284 return ret; 372 return ret;
285} 373}
286 374
@@ -291,12 +379,13 @@ out:
291 * only when we need to allocate a new super block. 379 * only when we need to allocate a new super block.
292 */ 380 */
293static int btrfs_parse_early_options(const char *options, fmode_t flags, 381static int btrfs_parse_early_options(const char *options, fmode_t flags,
294 void *holder, char **subvol_name, 382 void *holder, char **subvol_name, u64 *subvol_objectid,
295 struct btrfs_fs_devices **fs_devices) 383 struct btrfs_fs_devices **fs_devices)
296{ 384{
297 substring_t args[MAX_OPT_ARGS]; 385 substring_t args[MAX_OPT_ARGS];
298 char *opts, *p; 386 char *opts, *p;
299 int error = 0; 387 int error = 0;
388 int intarg;
300 389
301 if (!options) 390 if (!options)
302 goto out; 391 goto out;
@@ -319,6 +408,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
319 case Opt_subvol: 408 case Opt_subvol:
320 *subvol_name = match_strdup(&args[0]); 409 *subvol_name = match_strdup(&args[0]);
321 break; 410 break;
411 case Opt_subvolid:
412 intarg = 0;
413 error = match_int(&args[0], &intarg);
414 if (!error) {
415 /* we want the original fs_tree */
416 if (!intarg)
417 *subvol_objectid =
418 BTRFS_FS_TREE_OBJECTID;
419 else
420 *subvol_objectid = intarg;
421 }
422 break;
322 case Opt_device: 423 case Opt_device:
323 error = btrfs_scan_one_device(match_strdup(&args[0]), 424 error = btrfs_scan_one_device(match_strdup(&args[0]),
324 flags, holder, fs_devices); 425 flags, holder, fs_devices);
@@ -346,13 +447,118 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
346 return error; 447 return error;
347} 448}
348 449
450static struct dentry *get_default_root(struct super_block *sb,
451 u64 subvol_objectid)
452{
453 struct btrfs_root *root = sb->s_fs_info;
454 struct btrfs_root *new_root;
455 struct btrfs_dir_item *di;
456 struct btrfs_path *path;
457 struct btrfs_key location;
458 struct inode *inode;
459 struct dentry *dentry;
460 u64 dir_id;
461 int new = 0;
462
463 /*
464 * We have a specific subvol we want to mount, just setup location and
465 * go look up the root.
466 */
467 if (subvol_objectid) {
468 location.objectid = subvol_objectid;
469 location.type = BTRFS_ROOT_ITEM_KEY;
470 location.offset = (u64)-1;
471 goto find_root;
472 }
473
474 path = btrfs_alloc_path();
475 if (!path)
476 return ERR_PTR(-ENOMEM);
477 path->leave_spinning = 1;
478
479 /*
480 * Find the "default" dir item which points to the root item that we
481 * will mount by default if we haven't been given a specific subvolume
482 * to mount.
483 */
484 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
485 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
486 if (IS_ERR(di))
487 return ERR_CAST(di);
488 if (!di) {
489 /*
490 * Ok the default dir item isn't there. This is weird since
491 * it's always been there, but don't freak out, just try and
492 * mount to root most subvolume.
493 */
494 btrfs_free_path(path);
495 dir_id = BTRFS_FIRST_FREE_OBJECTID;
496 new_root = root->fs_info->fs_root;
497 goto setup_root;
498 }
499
500 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
501 btrfs_free_path(path);
502
503find_root:
504 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
505 if (IS_ERR(new_root))
506 return ERR_CAST(new_root);
507
508 if (btrfs_root_refs(&new_root->root_item) == 0)
509 return ERR_PTR(-ENOENT);
510
511 dir_id = btrfs_root_dirid(&new_root->root_item);
512setup_root:
513 location.objectid = dir_id;
514 location.type = BTRFS_INODE_ITEM_KEY;
515 location.offset = 0;
516
517 inode = btrfs_iget(sb, &location, new_root, &new);
518 if (IS_ERR(inode))
519 return ERR_CAST(inode);
520
521 /*
522 * If we're just mounting the root most subvol put the inode and return
523 * a reference to the dentry. We will have already gotten a reference
524 * to the inode in btrfs_fill_super so we're good to go.
525 */
526 if (!new && sb->s_root->d_inode == inode) {
527 iput(inode);
528 return dget(sb->s_root);
529 }
530
531 if (new) {
532 const struct qstr name = { .name = "/", .len = 1 };
533
534 /*
535 * New inode, we need to make the dentry a sibling of s_root so
536 * everything gets cleaned up properly on unmount.
537 */
538 dentry = d_alloc(sb->s_root, &name);
539 if (!dentry) {
540 iput(inode);
541 return ERR_PTR(-ENOMEM);
542 }
543 d_splice_alias(inode, dentry);
544 } else {
545 /*
546 * We found the inode in cache, just find a dentry for it and
547 * put the reference to the inode we just got.
548 */
549 dentry = d_find_alias(inode);
550 iput(inode);
551 }
552
553 return dentry;
554}
555
349static int btrfs_fill_super(struct super_block *sb, 556static int btrfs_fill_super(struct super_block *sb,
350 struct btrfs_fs_devices *fs_devices, 557 struct btrfs_fs_devices *fs_devices,
351 void *data, int silent) 558 void *data, int silent)
352{ 559{
353 struct inode *inode; 560 struct inode *inode;
354 struct dentry *root_dentry; 561 struct dentry *root_dentry;
355 struct btrfs_super_block *disk_super;
356 struct btrfs_root *tree_root; 562 struct btrfs_root *tree_root;
357 struct btrfs_key key; 563 struct btrfs_key key;
358 int err; 564 int err;
@@ -360,6 +566,7 @@ static int btrfs_fill_super(struct super_block *sb,
360 sb->s_maxbytes = MAX_LFS_FILESIZE; 566 sb->s_maxbytes = MAX_LFS_FILESIZE;
361 sb->s_magic = BTRFS_SUPER_MAGIC; 567 sb->s_magic = BTRFS_SUPER_MAGIC;
362 sb->s_op = &btrfs_super_ops; 568 sb->s_op = &btrfs_super_ops;
569 sb->s_d_op = &btrfs_dentry_operations;
363 sb->s_export_op = &btrfs_export_ops; 570 sb->s_export_op = &btrfs_export_ops;
364 sb->s_xattr = btrfs_xattr_handlers; 571 sb->s_xattr = btrfs_xattr_handlers;
365 sb->s_time_gran = 1; 572 sb->s_time_gran = 1;
@@ -374,12 +581,11 @@ static int btrfs_fill_super(struct super_block *sb,
374 return PTR_ERR(tree_root); 581 return PTR_ERR(tree_root);
375 } 582 }
376 sb->s_fs_info = tree_root; 583 sb->s_fs_info = tree_root;
377 disk_super = &tree_root->fs_info->super_copy;
378 584
379 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 585 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
380 key.type = BTRFS_INODE_ITEM_KEY; 586 key.type = BTRFS_INODE_ITEM_KEY;
381 key.offset = 0; 587 key.offset = 0;
382 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root); 588 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL);
383 if (IS_ERR(inode)) { 589 if (IS_ERR(inode)) {
384 err = PTR_ERR(inode); 590 err = PTR_ERR(inode);
385 goto fail_close; 591 goto fail_close;
@@ -391,12 +597,6 @@ static int btrfs_fill_super(struct super_block *sb,
391 err = -ENOMEM; 597 err = -ENOMEM;
392 goto fail_close; 598 goto fail_close;
393 } 599 }
394#if 0
395 /* this does the super kobj at the same time */
396 err = btrfs_sysfs_add_super(tree_root->fs_info);
397 if (err)
398 goto fail_close;
399#endif
400 600
401 sb->s_root = root_dentry; 601 sb->s_root = root_dentry;
402 602
@@ -422,7 +622,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
422 btrfs_start_delalloc_inodes(root, 0); 622 btrfs_start_delalloc_inodes(root, 0);
423 btrfs_wait_ordered_extents(root, 0, 0); 623 btrfs_wait_ordered_extents(root, 0, 0);
424 624
425 trans = btrfs_start_transaction(root, 1); 625 trans = btrfs_start_transaction(root, 0);
426 ret = btrfs_commit_transaction(trans, root); 626 ret = btrfs_commit_transaction(trans, root);
427 return ret; 627 return ret;
428} 628}
@@ -440,9 +640,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
440 seq_puts(seq, ",nodatacow"); 640 seq_puts(seq, ",nodatacow");
441 if (btrfs_test_opt(root, NOBARRIER)) 641 if (btrfs_test_opt(root, NOBARRIER))
442 seq_puts(seq, ",nobarrier"); 642 seq_puts(seq, ",nobarrier");
443 if (info->max_extent != (u64)-1)
444 seq_printf(seq, ",max_extent=%llu",
445 (unsigned long long)info->max_extent);
446 if (info->max_inline != 8192 * 1024) 643 if (info->max_inline != 8192 * 1024)
447 seq_printf(seq, ",max_inline=%llu", 644 seq_printf(seq, ",max_inline=%llu",
448 (unsigned long long)info->max_inline); 645 (unsigned long long)info->max_inline);
@@ -473,36 +670,54 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
473 670
474static int btrfs_test_super(struct super_block *s, void *data) 671static int btrfs_test_super(struct super_block *s, void *data)
475{ 672{
476 struct btrfs_fs_devices *test_fs_devices = data; 673 struct btrfs_root *test_root = data;
477 struct btrfs_root *root = btrfs_sb(s); 674 struct btrfs_root *root = btrfs_sb(s);
478 675
479 return root->fs_info->fs_devices == test_fs_devices; 676 /*
677 * If this super block is going away, return false as it
678 * can't match as an existing super block.
679 */
680 if (!atomic_read(&s->s_active))
681 return 0;
682 return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
683}
684
685static int btrfs_set_super(struct super_block *s, void *data)
686{
687 s->s_fs_info = data;
688
689 return set_anon_super(s, data);
480} 690}
481 691
692
482/* 693/*
483 * Find a superblock for the given device / mount point. 694 * Find a superblock for the given device / mount point.
484 * 695 *
485 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 696 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
486 * for multiple device setup. Make sure to keep it in sync. 697 * for multiple device setup. Make sure to keep it in sync.
487 */ 698 */
488static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 699static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
489 const char *dev_name, void *data, struct vfsmount *mnt) 700 const char *dev_name, void *data)
490{ 701{
491 char *subvol_name = NULL;
492 struct block_device *bdev = NULL; 702 struct block_device *bdev = NULL;
493 struct super_block *s; 703 struct super_block *s;
494 struct dentry *root; 704 struct dentry *root;
495 struct btrfs_fs_devices *fs_devices = NULL; 705 struct btrfs_fs_devices *fs_devices = NULL;
706 struct btrfs_root *tree_root = NULL;
707 struct btrfs_fs_info *fs_info = NULL;
496 fmode_t mode = FMODE_READ; 708 fmode_t mode = FMODE_READ;
709 char *subvol_name = NULL;
710 u64 subvol_objectid = 0;
497 int error = 0; 711 int error = 0;
498 712
499 if (!(flags & MS_RDONLY)) 713 if (!(flags & MS_RDONLY))
500 mode |= FMODE_WRITE; 714 mode |= FMODE_WRITE;
501 715
502 error = btrfs_parse_early_options(data, mode, fs_type, 716 error = btrfs_parse_early_options(data, mode, fs_type,
503 &subvol_name, &fs_devices); 717 &subvol_name, &subvol_objectid,
718 &fs_devices);
504 if (error) 719 if (error)
505 return error; 720 return ERR_PTR(error);
506 721
507 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 722 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
508 if (error) 723 if (error)
@@ -517,8 +732,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
517 goto error_close_devices; 732 goto error_close_devices;
518 } 733 }
519 734
735 /*
736 * Setup a dummy root and fs_info for test/set super. This is because
737 * we don't actually fill this stuff out until open_ctree, but we need
738 * it for searching for existing supers, so this lets us do that and
739 * then open_ctree will properly initialize everything later.
740 */
741 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
742 tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
743 if (!fs_info || !tree_root) {
744 error = -ENOMEM;
745 goto error_close_devices;
746 }
747 fs_info->tree_root = tree_root;
748 fs_info->fs_devices = fs_devices;
749 tree_root->fs_info = fs_info;
750
520 bdev = fs_devices->latest_bdev; 751 bdev = fs_devices->latest_bdev;
521 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); 752 s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
522 if (IS_ERR(s)) 753 if (IS_ERR(s))
523 goto error_s; 754 goto error_s;
524 755
@@ -546,40 +777,49 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
546 s->s_flags |= MS_ACTIVE; 777 s->s_flags |= MS_ACTIVE;
547 } 778 }
548 779
549 if (!strcmp(subvol_name, ".")) 780 root = get_default_root(s, subvol_objectid);
550 root = dget(s->s_root); 781 if (IS_ERR(root)) {
551 else { 782 error = PTR_ERR(root);
552 mutex_lock(&s->s_root->d_inode->i_mutex); 783 deactivate_locked_super(s);
553 root = lookup_one_len(subvol_name, s->s_root, 784 goto error_free_subvol_name;
785 }
786 /* if they gave us a subvolume name bind mount into that */
787 if (strcmp(subvol_name, ".")) {
788 struct dentry *new_root;
789 mutex_lock(&root->d_inode->i_mutex);
790 new_root = lookup_one_len(subvol_name, root,
554 strlen(subvol_name)); 791 strlen(subvol_name));
555 mutex_unlock(&s->s_root->d_inode->i_mutex); 792 mutex_unlock(&root->d_inode->i_mutex);
556 793
557 if (IS_ERR(root)) { 794 if (IS_ERR(new_root)) {
795 dput(root);
558 deactivate_locked_super(s); 796 deactivate_locked_super(s);
559 error = PTR_ERR(root); 797 error = PTR_ERR(new_root);
560 goto error_free_subvol_name; 798 goto error_free_subvol_name;
561 } 799 }
562 if (!root->d_inode) { 800 if (!new_root->d_inode) {
563 dput(root); 801 dput(root);
802 dput(new_root);
564 deactivate_locked_super(s); 803 deactivate_locked_super(s);
565 error = -ENXIO; 804 error = -ENXIO;
566 goto error_free_subvol_name; 805 goto error_free_subvol_name;
567 } 806 }
807 dput(root);
808 root = new_root;
568 } 809 }
569 810
570 mnt->mnt_sb = s;
571 mnt->mnt_root = root;
572
573 kfree(subvol_name); 811 kfree(subvol_name);
574 return 0; 812 return root;
575 813
576error_s: 814error_s:
577 error = PTR_ERR(s); 815 error = PTR_ERR(s);
578error_close_devices: 816error_close_devices:
579 btrfs_close_devices(fs_devices); 817 btrfs_close_devices(fs_devices);
818 kfree(fs_info);
819 kfree(tree_root);
580error_free_subvol_name: 820error_free_subvol_name:
581 kfree(subvol_name); 821 kfree(subvol_name);
582 return error; 822 return ERR_PTR(error);
583} 823}
584 824
585static int btrfs_remount(struct super_block *sb, int *flags, char *data) 825static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -606,11 +846,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
606 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) 846 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
607 return -EINVAL; 847 return -EINVAL;
608 848
609 /* recover relocation */ 849 ret = btrfs_cleanup_fs_roots(root->fs_info);
610 ret = btrfs_recover_relocation(root);
611 WARN_ON(ret); 850 WARN_ON(ret);
612 851
613 ret = btrfs_cleanup_fs_roots(root->fs_info); 852 /* recover relocation */
853 ret = btrfs_recover_relocation(root);
614 WARN_ON(ret); 854 WARN_ON(ret);
615 855
616 sb->s_flags &= ~MS_RDONLY; 856 sb->s_flags &= ~MS_RDONLY;
@@ -619,20 +859,167 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
619 return 0; 859 return 0;
620} 860}
621 861
862/*
863 * The helper to calc the free space on the devices that can be used to store
864 * file data.
865 */
866static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
867{
868 struct btrfs_fs_info *fs_info = root->fs_info;
869 struct btrfs_device_info *devices_info;
870 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
871 struct btrfs_device *device;
872 u64 skip_space;
873 u64 type;
874 u64 avail_space;
875 u64 used_space;
876 u64 min_stripe_size;
877 int min_stripes = 1;
878 int i = 0, nr_devices;
879 int ret;
880
881 nr_devices = fs_info->fs_devices->rw_devices;
882 BUG_ON(!nr_devices);
883
884 devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
885 GFP_NOFS);
886 if (!devices_info)
887 return -ENOMEM;
888
889 /* calc min stripe number for data space alloction */
890 type = btrfs_get_alloc_profile(root, 1);
891 if (type & BTRFS_BLOCK_GROUP_RAID0)
892 min_stripes = 2;
893 else if (type & BTRFS_BLOCK_GROUP_RAID1)
894 min_stripes = 2;
895 else if (type & BTRFS_BLOCK_GROUP_RAID10)
896 min_stripes = 4;
897
898 if (type & BTRFS_BLOCK_GROUP_DUP)
899 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
900 else
901 min_stripe_size = BTRFS_STRIPE_LEN;
902
903 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
904 if (!device->in_fs_metadata)
905 continue;
906
907 avail_space = device->total_bytes - device->bytes_used;
908
909 /* align with stripe_len */
910 do_div(avail_space, BTRFS_STRIPE_LEN);
911 avail_space *= BTRFS_STRIPE_LEN;
912
913 /*
914 * In order to avoid overwritting the superblock on the drive,
915 * btrfs starts at an offset of at least 1MB when doing chunk
916 * allocation.
917 */
918 skip_space = 1024 * 1024;
919
920 /* user can set the offset in fs_info->alloc_start. */
921 if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
922 device->total_bytes)
923 skip_space = max(fs_info->alloc_start, skip_space);
924
925 /*
926 * btrfs can not use the free space in [0, skip_space - 1],
927 * we must subtract it from the total. In order to implement
928 * it, we account the used space in this range first.
929 */
930 ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
931 &used_space);
932 if (ret) {
933 kfree(devices_info);
934 return ret;
935 }
936
937 /* calc the free space in [0, skip_space - 1] */
938 skip_space -= used_space;
939
940 /*
941 * we can use the free space in [0, skip_space - 1], subtract
942 * it from the total.
943 */
944 if (avail_space && avail_space >= skip_space)
945 avail_space -= skip_space;
946 else
947 avail_space = 0;
948
949 if (avail_space < min_stripe_size)
950 continue;
951
952 devices_info[i].dev = device;
953 devices_info[i].max_avail = avail_space;
954
955 i++;
956 }
957
958 nr_devices = i;
959
960 btrfs_descending_sort_devices(devices_info, nr_devices);
961
962 i = nr_devices - 1;
963 avail_space = 0;
964 while (nr_devices >= min_stripes) {
965 if (devices_info[i].max_avail >= min_stripe_size) {
966 int j;
967 u64 alloc_size;
968
969 avail_space += devices_info[i].max_avail * min_stripes;
970 alloc_size = devices_info[i].max_avail;
971 for (j = i + 1 - min_stripes; j <= i; j++)
972 devices_info[j].max_avail -= alloc_size;
973 }
974 i--;
975 nr_devices--;
976 }
977
978 kfree(devices_info);
979 *free_bytes = avail_space;
980 return 0;
981}
982
622static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 983static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
623{ 984{
624 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 985 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
625 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 986 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
987 struct list_head *head = &root->fs_info->space_info;
988 struct btrfs_space_info *found;
989 u64 total_used = 0;
990 u64 total_free_data = 0;
626 int bits = dentry->d_sb->s_blocksize_bits; 991 int bits = dentry->d_sb->s_blocksize_bits;
627 __be32 *fsid = (__be32 *)root->fs_info->fsid; 992 __be32 *fsid = (__be32 *)root->fs_info->fsid;
993 int ret;
994
995 /* holding chunk_muext to avoid allocating new chunks */
996 mutex_lock(&root->fs_info->chunk_mutex);
997 rcu_read_lock();
998 list_for_each_entry_rcu(found, head, list) {
999 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1000 total_free_data += found->disk_total - found->disk_used;
1001 total_free_data -=
1002 btrfs_account_ro_block_groups_free_space(found);
1003 }
1004
1005 total_used += found->disk_used;
1006 }
1007 rcu_read_unlock();
628 1008
629 buf->f_namelen = BTRFS_NAME_LEN; 1009 buf->f_namelen = BTRFS_NAME_LEN;
630 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1010 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
631 buf->f_bfree = buf->f_blocks - 1011 buf->f_bfree = buf->f_blocks - (total_used >> bits);
632 (btrfs_super_bytes_used(disk_super) >> bits);
633 buf->f_bavail = buf->f_bfree;
634 buf->f_bsize = dentry->d_sb->s_blocksize; 1012 buf->f_bsize = dentry->d_sb->s_blocksize;
635 buf->f_type = BTRFS_SUPER_MAGIC; 1013 buf->f_type = BTRFS_SUPER_MAGIC;
1014 buf->f_bavail = total_free_data;
1015 ret = btrfs_calc_avail_data_space(root, &total_free_data);
1016 if (ret) {
1017 mutex_unlock(&root->fs_info->chunk_mutex);
1018 return ret;
1019 }
1020 buf->f_bavail += total_free_data;
1021 buf->f_bavail = buf->f_bavail >> bits;
1022 mutex_unlock(&root->fs_info->chunk_mutex);
636 1023
637 /* We treat it as constant endianness (it doesn't matter _which_) 1024 /* We treat it as constant endianness (it doesn't matter _which_)
638 because we want the fsid to come out the same whether mounted 1025 because we want the fsid to come out the same whether mounted
@@ -649,7 +1036,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
649static struct file_system_type btrfs_fs_type = { 1036static struct file_system_type btrfs_fs_type = {
650 .owner = THIS_MODULE, 1037 .owner = THIS_MODULE,
651 .name = "btrfs", 1038 .name = "btrfs",
652 .get_sb = btrfs_get_sb, 1039 .mount = btrfs_mount,
653 .kill_sb = kill_anon_super, 1040 .kill_sb = kill_anon_super,
654 .fs_flags = FS_REQUIRES_DEV, 1041 .fs_flags = FS_REQUIRES_DEV,
655}; 1042};
@@ -700,7 +1087,7 @@ static int btrfs_unfreeze(struct super_block *sb)
700 1087
701static const struct super_operations btrfs_super_ops = { 1088static const struct super_operations btrfs_super_ops = {
702 .drop_inode = btrfs_drop_inode, 1089 .drop_inode = btrfs_drop_inode,
703 .delete_inode = btrfs_delete_inode, 1090 .evict_inode = btrfs_evict_inode,
704 .put_super = btrfs_put_super, 1091 .put_super = btrfs_put_super,
705 .sync_fs = btrfs_sync_fs, 1092 .sync_fs = btrfs_sync_fs,
706 .show_options = btrfs_show_options, 1093 .show_options = btrfs_show_options,
@@ -718,14 +1105,18 @@ static const struct file_operations btrfs_ctl_fops = {
718 .unlocked_ioctl = btrfs_control_ioctl, 1105 .unlocked_ioctl = btrfs_control_ioctl,
719 .compat_ioctl = btrfs_control_ioctl, 1106 .compat_ioctl = btrfs_control_ioctl,
720 .owner = THIS_MODULE, 1107 .owner = THIS_MODULE,
1108 .llseek = noop_llseek,
721}; 1109};
722 1110
723static struct miscdevice btrfs_misc = { 1111static struct miscdevice btrfs_misc = {
724 .minor = MISC_DYNAMIC_MINOR, 1112 .minor = BTRFS_MINOR,
725 .name = "btrfs-control", 1113 .name = "btrfs-control",
726 .fops = &btrfs_ctl_fops 1114 .fops = &btrfs_ctl_fops
727}; 1115};
728 1116
1117MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
1118MODULE_ALIAS("devname:btrfs-control");
1119
729static int btrfs_interface_init(void) 1120static int btrfs_interface_init(void)
730{ 1121{
731 return misc_register(&btrfs_misc); 1122 return misc_register(&btrfs_misc);
@@ -745,10 +1136,14 @@ static int __init init_btrfs_fs(void)
745 if (err) 1136 if (err)
746 return err; 1137 return err;
747 1138
748 err = btrfs_init_cachep(); 1139 err = btrfs_init_compress();
749 if (err) 1140 if (err)
750 goto free_sysfs; 1141 goto free_sysfs;
751 1142
1143 err = btrfs_init_cachep();
1144 if (err)
1145 goto free_compress;
1146
752 err = extent_io_init(); 1147 err = extent_io_init();
753 if (err) 1148 if (err)
754 goto free_cachep; 1149 goto free_cachep;
@@ -776,6 +1171,8 @@ free_extent_io:
776 extent_io_exit(); 1171 extent_io_exit();
777free_cachep: 1172free_cachep:
778 btrfs_destroy_cachep(); 1173 btrfs_destroy_cachep();
1174free_compress:
1175 btrfs_exit_compress();
779free_sysfs: 1176free_sysfs:
780 btrfs_exit_sysfs(); 1177 btrfs_exit_sysfs();
781 return err; 1178 return err;
@@ -790,7 +1187,7 @@ static void __exit exit_btrfs_fs(void)
790 unregister_filesystem(&btrfs_fs_type); 1187 unregister_filesystem(&btrfs_fs_type);
791 btrfs_exit_sysfs(); 1188 btrfs_exit_sysfs();
792 btrfs_cleanup_fs_uuids(); 1189 btrfs_cleanup_fs_uuids();
793 btrfs_zlib_exit(); 1190 btrfs_exit_compress();
794} 1191}
795 1192
796module_init(init_btrfs_fs) 1193module_init(init_btrfs_fs)