aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/super.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/btrfs/super.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r--fs/btrfs/super.c504
1 files changed, 455 insertions, 49 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1776dbd8dc98..15634d4648d7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -39,7 +39,9 @@
39#include <linux/miscdevice.h> 39#include <linux/miscdevice.h>
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h>
42#include "compat.h" 43#include "compat.h"
44#include "delayed-inode.h"
43#include "ctree.h" 45#include "ctree.h"
44#include "disk-io.h" 46#include "disk-io.h"
45#include "transaction.h" 47#include "transaction.h"
@@ -52,8 +54,95 @@
52#include "export.h" 54#include "export.h"
53#include "compression.h" 55#include "compression.h"
54 56
57#define CREATE_TRACE_POINTS
58#include <trace/events/btrfs.h>
59
55static const struct super_operations btrfs_super_ops; 60static const struct super_operations btrfs_super_ops;
56 61
62static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
63 char nbuf[16])
64{
65 char *errstr = NULL;
66
67 switch (errno) {
68 case -EIO:
69 errstr = "IO failure";
70 break;
71 case -ENOMEM:
72 errstr = "Out of memory";
73 break;
74 case -EROFS:
75 errstr = "Readonly filesystem";
76 break;
77 default:
78 if (nbuf) {
79 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
80 errstr = nbuf;
81 }
82 break;
83 }
84
85 return errstr;
86}
87
88static void __save_error_info(struct btrfs_fs_info *fs_info)
89{
90 /*
91 * today we only save the error info into ram. Long term we'll
92 * also send it down to the disk
93 */
94 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
95}
96
97/* NOTE:
98 * We move write_super stuff at umount in order to avoid deadlock
99 * for umount hold all lock.
100 */
101static void save_error_info(struct btrfs_fs_info *fs_info)
102{
103 __save_error_info(fs_info);
104}
105
106/* btrfs handle error by forcing the filesystem readonly */
107static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
108{
109 struct super_block *sb = fs_info->sb;
110
111 if (sb->s_flags & MS_RDONLY)
112 return;
113
114 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
115 sb->s_flags |= MS_RDONLY;
116 printk(KERN_INFO "btrfs is forced readonly\n");
117 }
118}
119
120/*
121 * __btrfs_std_error decodes expected errors from the caller and
122 * invokes the approciate error response.
123 */
124void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
125 unsigned int line, int errno)
126{
127 struct super_block *sb = fs_info->sb;
128 char nbuf[16];
129 const char *errstr;
130
131 /*
132 * Special case: if the error is EROFS, and we're already
133 * under MS_RDONLY, then it is safe here.
134 */
135 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
136 return;
137
138 errstr = btrfs_decode_error(fs_info, errno, nbuf);
139 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
140 sb->s_id, function, line, errstr);
141 save_error_info(fs_info);
142
143 btrfs_handle_error(fs_info);
144}
145
57static void btrfs_put_super(struct super_block *sb) 146static void btrfs_put_super(struct super_block *sb)
58{ 147{
59 struct btrfs_root *root = btrfs_sb(sb); 148 struct btrfs_root *root = btrfs_sb(sb);
@@ -61,14 +150,19 @@ static void btrfs_put_super(struct super_block *sb)
61 150
62 ret = close_ctree(root); 151 ret = close_ctree(root);
63 sb->s_fs_info = NULL; 152 sb->s_fs_info = NULL;
153
154 (void)ret; /* FIXME: need to fix VFS to return error? */
64} 155}
65 156
66enum { 157enum {
67 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, 158 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 159 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 160 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
71 Opt_discard, Opt_err, 162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
165 Opt_inode_cache, Opt_err,
72}; 166};
73 167
74static match_table_t tokens = { 168static match_table_t tokens = {
@@ -83,7 +177,9 @@ static match_table_t tokens = {
83 {Opt_alloc_start, "alloc_start=%s"}, 177 {Opt_alloc_start, "alloc_start=%s"},
84 {Opt_thread_pool, "thread_pool=%d"}, 178 {Opt_thread_pool, "thread_pool=%d"},
85 {Opt_compress, "compress"}, 179 {Opt_compress, "compress"},
180 {Opt_compress_type, "compress=%s"},
86 {Opt_compress_force, "compress-force"}, 181 {Opt_compress_force, "compress-force"},
182 {Opt_compress_force_type, "compress-force=%s"},
87 {Opt_ssd, "ssd"}, 183 {Opt_ssd, "ssd"},
88 {Opt_ssd_spread, "ssd_spread"}, 184 {Opt_ssd_spread, "ssd_spread"},
89 {Opt_nossd, "nossd"}, 185 {Opt_nossd, "nossd"},
@@ -92,6 +188,13 @@ static match_table_t tokens = {
92 {Opt_flushoncommit, "flushoncommit"}, 188 {Opt_flushoncommit, "flushoncommit"},
93 {Opt_ratio, "metadata_ratio=%d"}, 189 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"}, 190 {Opt_discard, "discard"},
191 {Opt_space_cache, "space_cache"},
192 {Opt_clear_cache, "clear_cache"},
193 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
194 {Opt_enospc_debug, "enospc_debug"},
195 {Opt_subvolrootid, "subvolrootid=%d"},
196 {Opt_defrag, "autodefrag"},
197 {Opt_inode_cache, "inode_cache"},
95 {Opt_err, NULL}, 198 {Opt_err, NULL},
96}; 199};
97 200
@@ -106,6 +209,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
106 char *p, *num, *orig; 209 char *p, *num, *orig;
107 int intarg; 210 int intarg;
108 int ret = 0; 211 int ret = 0;
212 char *compress_type;
213 bool compress_force = false;
109 214
110 if (!options) 215 if (!options)
111 return 0; 216 return 0;
@@ -133,6 +238,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
133 break; 238 break;
134 case Opt_subvol: 239 case Opt_subvol:
135 case Opt_subvolid: 240 case Opt_subvolid:
241 case Opt_subvolrootid:
136 case Opt_device: 242 case Opt_device:
137 /* 243 /*
138 * These are parsed by btrfs_parse_early_options 244 * These are parsed by btrfs_parse_early_options
@@ -148,14 +254,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
148 btrfs_set_opt(info->mount_opt, NODATACOW); 254 btrfs_set_opt(info->mount_opt, NODATACOW);
149 btrfs_set_opt(info->mount_opt, NODATASUM); 255 btrfs_set_opt(info->mount_opt, NODATASUM);
150 break; 256 break;
151 case Opt_compress:
152 printk(KERN_INFO "btrfs: use compression\n");
153 btrfs_set_opt(info->mount_opt, COMPRESS);
154 break;
155 case Opt_compress_force: 257 case Opt_compress_force:
156 printk(KERN_INFO "btrfs: forcing compression\n"); 258 case Opt_compress_force_type:
157 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 259 compress_force = true;
260 case Opt_compress:
261 case Opt_compress_type:
262 if (token == Opt_compress ||
263 token == Opt_compress_force ||
264 strcmp(args[0].from, "zlib") == 0) {
265 compress_type = "zlib";
266 info->compress_type = BTRFS_COMPRESS_ZLIB;
267 } else if (strcmp(args[0].from, "lzo") == 0) {
268 compress_type = "lzo";
269 info->compress_type = BTRFS_COMPRESS_LZO;
270 } else {
271 ret = -EINVAL;
272 goto out;
273 }
274
158 btrfs_set_opt(info->mount_opt, COMPRESS); 275 btrfs_set_opt(info->mount_opt, COMPRESS);
276 if (compress_force) {
277 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
278 pr_info("btrfs: force %s compression\n",
279 compress_type);
280 } else
281 pr_info("btrfs: use %s compression\n",
282 compress_type);
159 break; 283 break;
160 case Opt_ssd: 284 case Opt_ssd:
161 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 285 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -235,6 +359,28 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
235 case Opt_discard: 359 case Opt_discard:
236 btrfs_set_opt(info->mount_opt, DISCARD); 360 btrfs_set_opt(info->mount_opt, DISCARD);
237 break; 361 break;
362 case Opt_space_cache:
363 printk(KERN_INFO "btrfs: enabling disk space caching\n");
364 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
365 break;
366 case Opt_inode_cache:
367 printk(KERN_INFO "btrfs: enabling inode map caching\n");
368 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
369 break;
370 case Opt_clear_cache:
371 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
372 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
373 break;
374 case Opt_user_subvol_rm_allowed:
375 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
376 break;
377 case Opt_enospc_debug:
378 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
379 break;
380 case Opt_defrag:
381 printk(KERN_INFO "btrfs: enabling auto defrag");
382 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
383 break;
238 case Opt_err: 384 case Opt_err:
239 printk(KERN_INFO "btrfs: unrecognized mount option " 385 printk(KERN_INFO "btrfs: unrecognized mount option "
240 "'%s'\n", p); 386 "'%s'\n", p);
@@ -257,10 +403,10 @@ out:
257 */ 403 */
258static int btrfs_parse_early_options(const char *options, fmode_t flags, 404static int btrfs_parse_early_options(const char *options, fmode_t flags,
259 void *holder, char **subvol_name, u64 *subvol_objectid, 405 void *holder, char **subvol_name, u64 *subvol_objectid,
260 struct btrfs_fs_devices **fs_devices) 406 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
261{ 407{
262 substring_t args[MAX_OPT_ARGS]; 408 substring_t args[MAX_OPT_ARGS];
263 char *opts, *p; 409 char *opts, *orig, *p;
264 int error = 0; 410 int error = 0;
265 int intarg; 411 int intarg;
266 412
@@ -274,6 +420,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
274 opts = kstrdup(options, GFP_KERNEL); 420 opts = kstrdup(options, GFP_KERNEL);
275 if (!opts) 421 if (!opts)
276 return -ENOMEM; 422 return -ENOMEM;
423 orig = opts;
277 424
278 while ((p = strsep(&opts, ",")) != NULL) { 425 while ((p = strsep(&opts, ",")) != NULL) {
279 int token; 426 int token;
@@ -297,6 +444,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
297 *subvol_objectid = intarg; 444 *subvol_objectid = intarg;
298 } 445 }
299 break; 446 break;
447 case Opt_subvolrootid:
448 intarg = 0;
449 error = match_int(&args[0], &intarg);
450 if (!error) {
451 /* we want the original fs_tree */
452 if (!intarg)
453 *subvol_rootid =
454 BTRFS_FS_TREE_OBJECTID;
455 else
456 *subvol_rootid = intarg;
457 }
458 break;
300 case Opt_device: 459 case Opt_device:
301 error = btrfs_scan_one_device(match_strdup(&args[0]), 460 error = btrfs_scan_one_device(match_strdup(&args[0]),
302 flags, holder, fs_devices); 461 flags, holder, fs_devices);
@@ -309,7 +468,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
309 } 468 }
310 469
311 out_free_opts: 470 out_free_opts:
312 kfree(opts); 471 kfree(orig);
313 out: 472 out:
314 /* 473 /*
315 * If no subvolume name is specified we use the default one. Allocate 474 * If no subvolume name is specified we use the default one. Allocate
@@ -360,8 +519,10 @@ static struct dentry *get_default_root(struct super_block *sb,
360 */ 519 */
361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 520 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); 521 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
363 if (IS_ERR(di)) 522 if (IS_ERR(di)) {
523 btrfs_free_path(path);
364 return ERR_CAST(di); 524 return ERR_CAST(di);
525 }
365 if (!di) { 526 if (!di) {
366 /* 527 /*
367 * Ok the default dir item isn't there. This is weird since 528 * Ok the default dir item isn't there. This is weird since
@@ -380,7 +541,7 @@ static struct dentry *get_default_root(struct super_block *sb,
380find_root: 541find_root:
381 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 542 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
382 if (IS_ERR(new_root)) 543 if (IS_ERR(new_root))
383 return ERR_PTR(PTR_ERR(new_root)); 544 return ERR_CAST(new_root);
384 545
385 if (btrfs_root_refs(&new_root->root_item) == 0) 546 if (btrfs_root_refs(&new_root->root_item) == 0)
386 return ERR_PTR(-ENOENT); 547 return ERR_PTR(-ENOENT);
@@ -436,7 +597,6 @@ static int btrfs_fill_super(struct super_block *sb,
436{ 597{
437 struct inode *inode; 598 struct inode *inode;
438 struct dentry *root_dentry; 599 struct dentry *root_dentry;
439 struct btrfs_super_block *disk_super;
440 struct btrfs_root *tree_root; 600 struct btrfs_root *tree_root;
441 struct btrfs_key key; 601 struct btrfs_key key;
442 int err; 602 int err;
@@ -444,6 +604,7 @@ static int btrfs_fill_super(struct super_block *sb,
444 sb->s_maxbytes = MAX_LFS_FILESIZE; 604 sb->s_maxbytes = MAX_LFS_FILESIZE;
445 sb->s_magic = BTRFS_SUPER_MAGIC; 605 sb->s_magic = BTRFS_SUPER_MAGIC;
446 sb->s_op = &btrfs_super_ops; 606 sb->s_op = &btrfs_super_ops;
607 sb->s_d_op = &btrfs_dentry_operations;
447 sb->s_export_op = &btrfs_export_ops; 608 sb->s_export_op = &btrfs_export_ops;
448 sb->s_xattr = btrfs_xattr_handlers; 609 sb->s_xattr = btrfs_xattr_handlers;
449 sb->s_time_gran = 1; 610 sb->s_time_gran = 1;
@@ -458,7 +619,6 @@ static int btrfs_fill_super(struct super_block *sb,
458 return PTR_ERR(tree_root); 619 return PTR_ERR(tree_root);
459 } 620 }
460 sb->s_fs_info = tree_root; 621 sb->s_fs_info = tree_root;
461 disk_super = &tree_root->fs_info->super_copy;
462 622
463 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 623 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
464 key.type = BTRFS_INODE_ITEM_KEY; 624 key.type = BTRFS_INODE_ITEM_KEY;
@@ -479,6 +639,7 @@ static int btrfs_fill_super(struct super_block *sb,
479 sb->s_root = root_dentry; 639 sb->s_root = root_dentry;
480 640
481 save_mount_options(sb, data); 641 save_mount_options(sb, data);
642 cleancache_init_fs(sb);
482 return 0; 643 return 0;
483 644
484fail_close: 645fail_close:
@@ -492,6 +653,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
492 struct btrfs_root *root = btrfs_sb(sb); 653 struct btrfs_root *root = btrfs_sb(sb);
493 int ret; 654 int ret;
494 655
656 trace_btrfs_sync_fs(wait);
657
495 if (!wait) { 658 if (!wait) {
496 filemap_flush(root->fs_info->btree_inode->i_mapping); 659 filemap_flush(root->fs_info->btree_inode->i_mapping);
497 return 0; 660 return 0;
@@ -501,6 +664,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
501 btrfs_wait_ordered_extents(root, 0, 0); 664 btrfs_wait_ordered_extents(root, 0, 0);
502 665
503 trans = btrfs_start_transaction(root, 0); 666 trans = btrfs_start_transaction(root, 0);
667 if (IS_ERR(trans))
668 return PTR_ERR(trans);
504 ret = btrfs_commit_transaction(trans, root); 669 ret = btrfs_commit_transaction(trans, root);
505 return ret; 670 return ret;
506} 671}
@@ -509,6 +674,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
509{ 674{
510 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 675 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
511 struct btrfs_fs_info *info = root->fs_info; 676 struct btrfs_fs_info *info = root->fs_info;
677 char *compress_type;
512 678
513 if (btrfs_test_opt(root, DEGRADED)) 679 if (btrfs_test_opt(root, DEGRADED))
514 seq_puts(seq, ",degraded"); 680 seq_puts(seq, ",degraded");
@@ -527,8 +693,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
527 if (info->thread_pool_size != min_t(unsigned long, 693 if (info->thread_pool_size != min_t(unsigned long,
528 num_online_cpus() + 2, 8)) 694 num_online_cpus() + 2, 8))
529 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 695 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
530 if (btrfs_test_opt(root, COMPRESS)) 696 if (btrfs_test_opt(root, COMPRESS)) {
531 seq_puts(seq, ",compress"); 697 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
698 compress_type = "zlib";
699 else
700 compress_type = "lzo";
701 if (btrfs_test_opt(root, FORCE_COMPRESS))
702 seq_printf(seq, ",compress-force=%s", compress_type);
703 else
704 seq_printf(seq, ",compress=%s", compress_type);
705 }
532 if (btrfs_test_opt(root, NOSSD)) 706 if (btrfs_test_opt(root, NOSSD))
533 seq_puts(seq, ",nossd"); 707 seq_puts(seq, ",nossd");
534 if (btrfs_test_opt(root, SSD_SPREAD)) 708 if (btrfs_test_opt(root, SSD_SPREAD))
@@ -543,46 +717,74 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
543 seq_puts(seq, ",discard"); 717 seq_puts(seq, ",discard");
544 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 718 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
545 seq_puts(seq, ",noacl"); 719 seq_puts(seq, ",noacl");
720 if (btrfs_test_opt(root, SPACE_CACHE))
721 seq_puts(seq, ",space_cache");
722 if (btrfs_test_opt(root, CLEAR_CACHE))
723 seq_puts(seq, ",clear_cache");
724 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
725 seq_puts(seq, ",user_subvol_rm_allowed");
726 if (btrfs_test_opt(root, ENOSPC_DEBUG))
727 seq_puts(seq, ",enospc_debug");
728 if (btrfs_test_opt(root, AUTO_DEFRAG))
729 seq_puts(seq, ",autodefrag");
730 if (btrfs_test_opt(root, INODE_MAP_CACHE))
731 seq_puts(seq, ",inode_cache");
546 return 0; 732 return 0;
547} 733}
548 734
549static int btrfs_test_super(struct super_block *s, void *data) 735static int btrfs_test_super(struct super_block *s, void *data)
550{ 736{
551 struct btrfs_fs_devices *test_fs_devices = data; 737 struct btrfs_root *test_root = data;
552 struct btrfs_root *root = btrfs_sb(s); 738 struct btrfs_root *root = btrfs_sb(s);
553 739
554 return root->fs_info->fs_devices == test_fs_devices; 740 /*
741 * If this super block is going away, return false as it
742 * can't match as an existing super block.
743 */
744 if (!atomic_read(&s->s_active))
745 return 0;
746 return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
555} 747}
556 748
749static int btrfs_set_super(struct super_block *s, void *data)
750{
751 s->s_fs_info = data;
752
753 return set_anon_super(s, data);
754}
755
756
557/* 757/*
558 * Find a superblock for the given device / mount point. 758 * Find a superblock for the given device / mount point.
559 * 759 *
560 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 760 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
561 * for multiple device setup. Make sure to keep it in sync. 761 * for multiple device setup. Make sure to keep it in sync.
562 */ 762 */
563static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 763static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
564 const char *dev_name, void *data, struct vfsmount *mnt) 764 const char *device_name, void *data)
565{ 765{
566 struct block_device *bdev = NULL; 766 struct block_device *bdev = NULL;
567 struct super_block *s; 767 struct super_block *s;
568 struct dentry *root; 768 struct dentry *root;
569 struct btrfs_fs_devices *fs_devices = NULL; 769 struct btrfs_fs_devices *fs_devices = NULL;
770 struct btrfs_root *tree_root = NULL;
771 struct btrfs_fs_info *fs_info = NULL;
570 fmode_t mode = FMODE_READ; 772 fmode_t mode = FMODE_READ;
571 char *subvol_name = NULL; 773 char *subvol_name = NULL;
572 u64 subvol_objectid = 0; 774 u64 subvol_objectid = 0;
775 u64 subvol_rootid = 0;
573 int error = 0; 776 int error = 0;
574 int found = 0;
575 777
576 if (!(flags & MS_RDONLY)) 778 if (!(flags & MS_RDONLY))
577 mode |= FMODE_WRITE; 779 mode |= FMODE_WRITE;
578 780
579 error = btrfs_parse_early_options(data, mode, fs_type, 781 error = btrfs_parse_early_options(data, mode, fs_type,
580 &subvol_name, &subvol_objectid, 782 &subvol_name, &subvol_objectid,
581 &fs_devices); 783 &subvol_rootid, &fs_devices);
582 if (error) 784 if (error)
583 return error; 785 return ERR_PTR(error);
584 786
585 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 787 error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
586 if (error) 788 if (error)
587 goto error_free_subvol_name; 789 goto error_free_subvol_name;
588 790
@@ -595,8 +797,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
595 goto error_close_devices; 797 goto error_close_devices;
596 } 798 }
597 799
800 /*
801 * Setup a dummy root and fs_info for test/set super. This is because
802 * we don't actually fill this stuff out until open_ctree, but we need
803 * it for searching for existing supers, so this lets us do that and
804 * then open_ctree will properly initialize everything later.
805 */
806 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
807 tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
808 if (!fs_info || !tree_root) {
809 error = -ENOMEM;
810 goto error_close_devices;
811 }
812 fs_info->tree_root = tree_root;
813 fs_info->fs_devices = fs_devices;
814 tree_root->fs_info = fs_info;
815
598 bdev = fs_devices->latest_bdev; 816 bdev = fs_devices->latest_bdev;
599 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); 817 s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
600 if (IS_ERR(s)) 818 if (IS_ERR(s))
601 goto error_s; 819 goto error_s;
602 820
@@ -607,12 +825,13 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
607 goto error_close_devices; 825 goto error_close_devices;
608 } 826 }
609 827
610 found = 1;
611 btrfs_close_devices(fs_devices); 828 btrfs_close_devices(fs_devices);
829 kfree(fs_info);
830 kfree(tree_root);
612 } else { 831 } else {
613 char b[BDEVNAME_SIZE]; 832 char b[BDEVNAME_SIZE];
614 833
615 s->s_flags = flags; 834 s->s_flags = flags | MS_NOSEC;
616 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 835 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
617 error = btrfs_fill_super(s, fs_devices, data, 836 error = btrfs_fill_super(s, fs_devices, data,
618 flags & MS_SILENT ? 1 : 0); 837 flags & MS_SILENT ? 1 : 0);
@@ -625,51 +844,58 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
625 s->s_flags |= MS_ACTIVE; 844 s->s_flags |= MS_ACTIVE;
626 } 845 }
627 846
628 root = get_default_root(s, subvol_objectid);
629 if (IS_ERR(root)) {
630 error = PTR_ERR(root);
631 deactivate_locked_super(s);
632 goto error;
633 }
634 /* if they gave us a subvolume name bind mount into that */ 847 /* if they gave us a subvolume name bind mount into that */
635 if (strcmp(subvol_name, ".")) { 848 if (strcmp(subvol_name, ".")) {
636 struct dentry *new_root; 849 struct dentry *new_root;
850
851 root = get_default_root(s, subvol_rootid);
852 if (IS_ERR(root)) {
853 error = PTR_ERR(root);
854 deactivate_locked_super(s);
855 goto error_free_subvol_name;
856 }
857
637 mutex_lock(&root->d_inode->i_mutex); 858 mutex_lock(&root->d_inode->i_mutex);
638 new_root = lookup_one_len(subvol_name, root, 859 new_root = lookup_one_len(subvol_name, root,
639 strlen(subvol_name)); 860 strlen(subvol_name));
640 mutex_unlock(&root->d_inode->i_mutex); 861 mutex_unlock(&root->d_inode->i_mutex);
641 862
642 if (IS_ERR(new_root)) { 863 if (IS_ERR(new_root)) {
864 dput(root);
643 deactivate_locked_super(s); 865 deactivate_locked_super(s);
644 error = PTR_ERR(new_root); 866 error = PTR_ERR(new_root);
645 dput(root); 867 goto error_free_subvol_name;
646 goto error_close_devices;
647 } 868 }
648 if (!new_root->d_inode) { 869 if (!new_root->d_inode) {
649 dput(root); 870 dput(root);
650 dput(new_root); 871 dput(new_root);
651 deactivate_locked_super(s); 872 deactivate_locked_super(s);
652 error = -ENXIO; 873 error = -ENXIO;
653 goto error_close_devices; 874 goto error_free_subvol_name;
654 } 875 }
655 dput(root); 876 dput(root);
656 root = new_root; 877 root = new_root;
878 } else {
879 root = get_default_root(s, subvol_objectid);
880 if (IS_ERR(root)) {
881 error = PTR_ERR(root);
882 deactivate_locked_super(s);
883 goto error_free_subvol_name;
884 }
657 } 885 }
658 886
659 mnt->mnt_sb = s;
660 mnt->mnt_root = root;
661
662 kfree(subvol_name); 887 kfree(subvol_name);
663 return 0; 888 return root;
664 889
665error_s: 890error_s:
666 error = PTR_ERR(s); 891 error = PTR_ERR(s);
667error_close_devices: 892error_close_devices:
668 btrfs_close_devices(fs_devices); 893 btrfs_close_devices(fs_devices);
894 kfree(fs_info);
895 kfree(tree_root);
669error_free_subvol_name: 896error_free_subvol_name:
670 kfree(subvol_name); 897 kfree(subvol_name);
671error: 898 return ERR_PTR(error);
672 return error;
673} 899}
674 900
675static int btrfs_remount(struct super_block *sb, int *flags, char *data) 901static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -709,6 +935,153 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
709 return 0; 935 return 0;
710} 936}
711 937
938/* Used to sort the devices by max_avail(descending sort) */
939static int btrfs_cmp_device_free_bytes(const void *dev_info1,
940 const void *dev_info2)
941{
942 if (((struct btrfs_device_info *)dev_info1)->max_avail >
943 ((struct btrfs_device_info *)dev_info2)->max_avail)
944 return -1;
945 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
946 ((struct btrfs_device_info *)dev_info2)->max_avail)
947 return 1;
948 else
949 return 0;
950}
951
952/*
953 * sort the devices by max_avail, in which max free extent size of each device
954 * is stored.(Descending Sort)
955 */
956static inline void btrfs_descending_sort_devices(
957 struct btrfs_device_info *devices,
958 size_t nr_devices)
959{
960 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
961 btrfs_cmp_device_free_bytes, NULL);
962}
963
964/*
965 * The helper to calc the free space on the devices that can be used to store
966 * file data.
967 */
968static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
969{
970 struct btrfs_fs_info *fs_info = root->fs_info;
971 struct btrfs_device_info *devices_info;
972 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
973 struct btrfs_device *device;
974 u64 skip_space;
975 u64 type;
976 u64 avail_space;
977 u64 used_space;
978 u64 min_stripe_size;
979 int min_stripes = 1;
980 int i = 0, nr_devices;
981 int ret;
982
983 nr_devices = fs_info->fs_devices->rw_devices;
984 BUG_ON(!nr_devices);
985
986 devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
987 GFP_NOFS);
988 if (!devices_info)
989 return -ENOMEM;
990
991 /* calc min stripe number for data space alloction */
992 type = btrfs_get_alloc_profile(root, 1);
993 if (type & BTRFS_BLOCK_GROUP_RAID0)
994 min_stripes = 2;
995 else if (type & BTRFS_BLOCK_GROUP_RAID1)
996 min_stripes = 2;
997 else if (type & BTRFS_BLOCK_GROUP_RAID10)
998 min_stripes = 4;
999
1000 if (type & BTRFS_BLOCK_GROUP_DUP)
1001 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
1002 else
1003 min_stripe_size = BTRFS_STRIPE_LEN;
1004
1005 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
1006 if (!device->in_fs_metadata)
1007 continue;
1008
1009 avail_space = device->total_bytes - device->bytes_used;
1010
1011 /* align with stripe_len */
1012 do_div(avail_space, BTRFS_STRIPE_LEN);
1013 avail_space *= BTRFS_STRIPE_LEN;
1014
1015 /*
1016 * In order to avoid overwritting the superblock on the drive,
1017 * btrfs starts at an offset of at least 1MB when doing chunk
1018 * allocation.
1019 */
1020 skip_space = 1024 * 1024;
1021
1022 /* user can set the offset in fs_info->alloc_start. */
1023 if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
1024 device->total_bytes)
1025 skip_space = max(fs_info->alloc_start, skip_space);
1026
1027 /*
1028 * btrfs can not use the free space in [0, skip_space - 1],
1029 * we must subtract it from the total. In order to implement
1030 * it, we account the used space in this range first.
1031 */
1032 ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
1033 &used_space);
1034 if (ret) {
1035 kfree(devices_info);
1036 return ret;
1037 }
1038
1039 /* calc the free space in [0, skip_space - 1] */
1040 skip_space -= used_space;
1041
1042 /*
1043 * we can use the free space in [0, skip_space - 1], subtract
1044 * it from the total.
1045 */
1046 if (avail_space && avail_space >= skip_space)
1047 avail_space -= skip_space;
1048 else
1049 avail_space = 0;
1050
1051 if (avail_space < min_stripe_size)
1052 continue;
1053
1054 devices_info[i].dev = device;
1055 devices_info[i].max_avail = avail_space;
1056
1057 i++;
1058 }
1059
1060 nr_devices = i;
1061
1062 btrfs_descending_sort_devices(devices_info, nr_devices);
1063
1064 i = nr_devices - 1;
1065 avail_space = 0;
1066 while (nr_devices >= min_stripes) {
1067 if (devices_info[i].max_avail >= min_stripe_size) {
1068 int j;
1069 u64 alloc_size;
1070
1071 avail_space += devices_info[i].max_avail * min_stripes;
1072 alloc_size = devices_info[i].max_avail;
1073 for (j = i + 1 - min_stripes; j <= i; j++)
1074 devices_info[j].max_avail -= alloc_size;
1075 }
1076 i--;
1077 nr_devices--;
1078 }
1079
1080 kfree(devices_info);
1081 *free_bytes = avail_space;
1082 return 0;
1083}
1084
712static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 1085static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
713{ 1086{
714 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 1087 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -716,20 +1089,39 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
716 struct list_head *head = &root->fs_info->space_info; 1089 struct list_head *head = &root->fs_info->space_info;
717 struct btrfs_space_info *found; 1090 struct btrfs_space_info *found;
718 u64 total_used = 0; 1091 u64 total_used = 0;
1092 u64 total_free_data = 0;
719 int bits = dentry->d_sb->s_blocksize_bits; 1093 int bits = dentry->d_sb->s_blocksize_bits;
720 __be32 *fsid = (__be32 *)root->fs_info->fsid; 1094 __be32 *fsid = (__be32 *)root->fs_info->fsid;
1095 int ret;
721 1096
1097 /* holding chunk_muext to avoid allocating new chunks */
1098 mutex_lock(&root->fs_info->chunk_mutex);
722 rcu_read_lock(); 1099 rcu_read_lock();
723 list_for_each_entry_rcu(found, head, list) 1100 list_for_each_entry_rcu(found, head, list) {
1101 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1102 total_free_data += found->disk_total - found->disk_used;
1103 total_free_data -=
1104 btrfs_account_ro_block_groups_free_space(found);
1105 }
1106
724 total_used += found->disk_used; 1107 total_used += found->disk_used;
1108 }
725 rcu_read_unlock(); 1109 rcu_read_unlock();
726 1110
727 buf->f_namelen = BTRFS_NAME_LEN; 1111 buf->f_namelen = BTRFS_NAME_LEN;
728 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1112 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
729 buf->f_bfree = buf->f_blocks - (total_used >> bits); 1113 buf->f_bfree = buf->f_blocks - (total_used >> bits);
730 buf->f_bavail = buf->f_bfree;
731 buf->f_bsize = dentry->d_sb->s_blocksize; 1114 buf->f_bsize = dentry->d_sb->s_blocksize;
732 buf->f_type = BTRFS_SUPER_MAGIC; 1115 buf->f_type = BTRFS_SUPER_MAGIC;
1116 buf->f_bavail = total_free_data;
1117 ret = btrfs_calc_avail_data_space(root, &total_free_data);
1118 if (ret) {
1119 mutex_unlock(&root->fs_info->chunk_mutex);
1120 return ret;
1121 }
1122 buf->f_bavail += total_free_data;
1123 buf->f_bavail = buf->f_bavail >> bits;
1124 mutex_unlock(&root->fs_info->chunk_mutex);
733 1125
734 /* We treat it as constant endianness (it doesn't matter _which_) 1126 /* We treat it as constant endianness (it doesn't matter _which_)
735 because we want the fsid to come out the same whether mounted 1127 because we want the fsid to come out the same whether mounted
@@ -746,7 +1138,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
746static struct file_system_type btrfs_fs_type = { 1138static struct file_system_type btrfs_fs_type = {
747 .owner = THIS_MODULE, 1139 .owner = THIS_MODULE,
748 .name = "btrfs", 1140 .name = "btrfs",
749 .get_sb = btrfs_get_sb, 1141 .mount = btrfs_mount,
750 .kill_sb = kill_anon_super, 1142 .kill_sb = kill_anon_super,
751 .fs_flags = FS_REQUIRES_DEV, 1143 .fs_flags = FS_REQUIRES_DEV,
752}; 1144};
@@ -815,6 +1207,7 @@ static const struct file_operations btrfs_ctl_fops = {
815 .unlocked_ioctl = btrfs_control_ioctl, 1207 .unlocked_ioctl = btrfs_control_ioctl,
816 .compat_ioctl = btrfs_control_ioctl, 1208 .compat_ioctl = btrfs_control_ioctl,
817 .owner = THIS_MODULE, 1209 .owner = THIS_MODULE,
1210 .llseek = noop_llseek,
818}; 1211};
819 1212
820static struct miscdevice btrfs_misc = { 1213static struct miscdevice btrfs_misc = {
@@ -845,10 +1238,14 @@ static int __init init_btrfs_fs(void)
845 if (err) 1238 if (err)
846 return err; 1239 return err;
847 1240
848 err = btrfs_init_cachep(); 1241 err = btrfs_init_compress();
849 if (err) 1242 if (err)
850 goto free_sysfs; 1243 goto free_sysfs;
851 1244
1245 err = btrfs_init_cachep();
1246 if (err)
1247 goto free_compress;
1248
852 err = extent_io_init(); 1249 err = extent_io_init();
853 if (err) 1250 if (err)
854 goto free_cachep; 1251 goto free_cachep;
@@ -857,10 +1254,14 @@ static int __init init_btrfs_fs(void)
857 if (err) 1254 if (err)
858 goto free_extent_io; 1255 goto free_extent_io;
859 1256
860 err = btrfs_interface_init(); 1257 err = btrfs_delayed_inode_init();
861 if (err) 1258 if (err)
862 goto free_extent_map; 1259 goto free_extent_map;
863 1260
1261 err = btrfs_interface_init();
1262 if (err)
1263 goto free_delayed_inode;
1264
864 err = register_filesystem(&btrfs_fs_type); 1265 err = register_filesystem(&btrfs_fs_type);
865 if (err) 1266 if (err)
866 goto unregister_ioctl; 1267 goto unregister_ioctl;
@@ -870,12 +1271,16 @@ static int __init init_btrfs_fs(void)
870 1271
871unregister_ioctl: 1272unregister_ioctl:
872 btrfs_interface_exit(); 1273 btrfs_interface_exit();
1274free_delayed_inode:
1275 btrfs_delayed_inode_exit();
873free_extent_map: 1276free_extent_map:
874 extent_map_exit(); 1277 extent_map_exit();
875free_extent_io: 1278free_extent_io:
876 extent_io_exit(); 1279 extent_io_exit();
877free_cachep: 1280free_cachep:
878 btrfs_destroy_cachep(); 1281 btrfs_destroy_cachep();
1282free_compress:
1283 btrfs_exit_compress();
879free_sysfs: 1284free_sysfs:
880 btrfs_exit_sysfs(); 1285 btrfs_exit_sysfs();
881 return err; 1286 return err;
@@ -884,13 +1289,14 @@ free_sysfs:
884static void __exit exit_btrfs_fs(void) 1289static void __exit exit_btrfs_fs(void)
885{ 1290{
886 btrfs_destroy_cachep(); 1291 btrfs_destroy_cachep();
1292 btrfs_delayed_inode_exit();
887 extent_map_exit(); 1293 extent_map_exit();
888 extent_io_exit(); 1294 extent_io_exit();
889 btrfs_interface_exit(); 1295 btrfs_interface_exit();
890 unregister_filesystem(&btrfs_fs_type); 1296 unregister_filesystem(&btrfs_fs_type);
891 btrfs_exit_sysfs(); 1297 btrfs_exit_sysfs();
892 btrfs_cleanup_fs_uuids(); 1298 btrfs_cleanup_fs_uuids();
893 btrfs_zlib_exit(); 1299 btrfs_exit_compress();
894} 1300}
895 1301
896module_init(init_btrfs_fs) 1302module_init(init_btrfs_fs)