aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-06-20 11:58:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-20 11:58:53 -0400
commit90a800de0a29426ea900ecd53f2929d5f4bc4578 (patch)
tree22b4ec6119e438c73d695522042669b601672579
parent10e18e62309a882c513b4f516527055b6a60d668 (diff)
parente999376f094162aa425ae749aa1df95ab928d010 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: avoid delayed metadata items during commits btrfs: fix uninitialized return value btrfs: fix wrong reservation when doing delayed inode operations btrfs: Remove unused sysfs code btrfs: fix dereference of ERR_PTR value Btrfs: fix relocation races Btrfs: set no_trans_join after trying to expand the transaction Btrfs: protect the pending_snapshots list with trans_lock Btrfs: fix path leakage on subvol deletion Btrfs: drop the delalloc_bytes check in shrink_delalloc Btrfs: check the return value from set_anon_super
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/delayed-inode.c32
-rw-r--r--fs/btrfs/delayed-inode.h5
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c4
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c114
-rw-r--r--fs/btrfs/tree-log.c2
11 files changed, 174 insertions, 189 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..300628795fdb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -967,6 +967,12 @@ struct btrfs_fs_info {
967 struct srcu_struct subvol_srcu; 967 struct srcu_struct subvol_srcu;
968 968
969 spinlock_t trans_lock; 969 spinlock_t trans_lock;
970 /*
971 * the reloc mutex goes with the trans lock, it is taken
972 * during commit to protect us from the relocation code
973 */
974 struct mutex reloc_mutex;
975
970 struct list_head trans_list; 976 struct list_head trans_list;
971 struct list_head hashers; 977 struct list_head hashers;
972 struct list_head dead_roots; 978 struct list_head dead_roots;
@@ -1172,6 +1178,14 @@ struct btrfs_root {
1172 u32 type; 1178 u32 type;
1173 1179
1174 u64 highest_objectid; 1180 u64 highest_objectid;
1181
1182 /* btrfs_record_root_in_trans is a multi-step process,
1183 * and it can race with the balancing code. But the
1184 * race is very small, and only the first time the root
1185 * is added to each transaction. So in_trans_setup
1186 * is used to tell us when more checks are required
1187 */
1188 unsigned long in_trans_setup;
1175 int ref_cows; 1189 int ref_cows;
1176 int track_dirty; 1190 int track_dirty;
1177 int in_radix; 1191 int in_radix;
@@ -1181,7 +1195,6 @@ struct btrfs_root {
1181 struct btrfs_key defrag_max; 1195 struct btrfs_key defrag_max;
1182 int defrag_running; 1196 int defrag_running;
1183 char *name; 1197 char *name;
1184 int in_sysfs;
1185 1198
1186 /* the dirty list is only used by non-reference counted roots */ 1199 /* the dirty list is only used by non-reference counted roots */
1187 struct list_head dirty_list; 1200 struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..f1cbd028f7b3 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
297 item->data_len = data_len; 297 item->data_len = data_len;
298 item->ins_or_del = 0; 298 item->ins_or_del = 0;
299 item->bytes_reserved = 0; 299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL; 300 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1); 301 atomic_set(&item->refs, 1);
303 } 302 }
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
593 592
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 593 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 594 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) { 595 if (!ret)
597 item->bytes_reserved = num_bytes; 596 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600 597
601 return ret; 598 return ret;
602} 599}
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, 601static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item) 602 struct btrfs_delayed_item *item)
606{ 603{
604 struct btrfs_block_rsv *rsv;
605
607 if (!item->bytes_reserved) 606 if (!item->bytes_reserved)
608 return; 607 return;
609 608
610 btrfs_block_rsv_release(root, item->block_rsv, 609 rsv = &root->fs_info->global_block_rsv;
610 btrfs_block_rsv_release(root, rsv,
611 item->bytes_reserved); 611 item->bytes_reserved);
612} 612}
613 613
@@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1014 struct btrfs_delayed_root *delayed_root; 1014 struct btrfs_delayed_root *delayed_root;
1015 struct btrfs_delayed_node *curr_node, *prev_node; 1015 struct btrfs_delayed_node *curr_node, *prev_node;
1016 struct btrfs_path *path; 1016 struct btrfs_path *path;
1017 struct btrfs_block_rsv *block_rsv;
1017 int ret = 0; 1018 int ret = 0;
1018 1019
1019 path = btrfs_alloc_path(); 1020 path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1021 return -ENOMEM; 1022 return -ENOMEM;
1022 path->leave_spinning = 1; 1023 path->leave_spinning = 1;
1023 1024
1025 block_rsv = trans->block_rsv;
1026 trans->block_rsv = &root->fs_info->global_block_rsv;
1027
1024 delayed_root = btrfs_get_delayed_root(root); 1028 delayed_root = btrfs_get_delayed_root(root);
1025 1029
1026 curr_node = btrfs_first_delayed_node(delayed_root); 1030 curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1045 } 1049 }
1046 1050
1047 btrfs_free_path(path); 1051 btrfs_free_path(path);
1052 trans->block_rsv = block_rsv;
1048 return ret; 1053 return ret;
1049} 1054}
1050 1055
@@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1052 struct btrfs_delayed_node *node) 1057 struct btrfs_delayed_node *node)
1053{ 1058{
1054 struct btrfs_path *path; 1059 struct btrfs_path *path;
1060 struct btrfs_block_rsv *block_rsv;
1055 int ret; 1061 int ret;
1056 1062
1057 path = btrfs_alloc_path(); 1063 path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1059 return -ENOMEM; 1065 return -ENOMEM;
1060 path->leave_spinning = 1; 1066 path->leave_spinning = 1;
1061 1067
1068 block_rsv = trans->block_rsv;
1069 trans->block_rsv = &node->root->fs_info->global_block_rsv;
1070
1062 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1071 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1063 if (!ret) 1072 if (!ret)
1064 ret = btrfs_delete_delayed_items(trans, path, node->root, node); 1073 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1066 ret = btrfs_update_delayed_inode(trans, node->root, path, node); 1075 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1067 btrfs_free_path(path); 1076 btrfs_free_path(path);
1068 1077
1078 trans->block_rsv = block_rsv;
1069 return ret; 1079 return ret;
1070} 1080}
1071 1081
@@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1116 struct btrfs_path *path; 1126 struct btrfs_path *path;
1117 struct btrfs_delayed_node *delayed_node = NULL; 1127 struct btrfs_delayed_node *delayed_node = NULL;
1118 struct btrfs_root *root; 1128 struct btrfs_root *root;
1129 struct btrfs_block_rsv *block_rsv;
1119 unsigned long nr = 0; 1130 unsigned long nr = 0;
1120 int need_requeue = 0; 1131 int need_requeue = 0;
1121 int ret; 1132 int ret;
@@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1134 if (IS_ERR(trans)) 1145 if (IS_ERR(trans))
1135 goto free_path; 1146 goto free_path;
1136 1147
1148 block_rsv = trans->block_rsv;
1149 trans->block_rsv = &root->fs_info->global_block_rsv;
1150
1137 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); 1151 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1138 if (!ret) 1152 if (!ret)
1139 ret = btrfs_delete_delayed_items(trans, path, root, 1153 ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1176 1190
1177 nr = trans->blocks_used; 1191 nr = trans->blocks_used;
1178 1192
1193 trans->block_rsv = block_rsv;
1179 btrfs_end_transaction_dmeta(trans, root); 1194 btrfs_end_transaction_dmeta(trans, root);
1180 __btrfs_btree_balance_dirty(root, nr); 1195 __btrfs_btree_balance_dirty(root, nr);
1181free_path: 1196free_path:
@@ -1222,6 +1237,13 @@ again:
1222 return 0; 1237 return 0;
1223} 1238}
1224 1239
1240void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1241{
1242 struct btrfs_delayed_root *delayed_root;
1243 delayed_root = btrfs_get_delayed_root(root);
1244 WARN_ON(btrfs_first_delayed_node(delayed_root));
1245}
1246
1225void btrfs_balance_delayed_items(struct btrfs_root *root) 1247void btrfs_balance_delayed_items(struct btrfs_root *root)
1226{ 1248{
1227 struct btrfs_delayed_root *delayed_root; 1249 struct btrfs_delayed_root *delayed_root;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..d1a6a2915c66 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
75 struct list_head tree_list; /* used for batch insert/delete items */ 75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */ 76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved; 77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node; 78 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs; 79 atomic_t refs;
81 int ins_or_del; 80 int ins_or_del;
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
138/* for init */ 137/* for init */
139int __init btrfs_delayed_inode_init(void); 138int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void); 139void btrfs_delayed_inode_exit(void);
140
141/* for debugging */
142void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
143
141#endif 144#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9f68c6898653..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1044 root->last_trans = 0; 1044 root->last_trans = 0;
1045 root->highest_objectid = 0; 1045 root->highest_objectid = 0;
1046 root->name = NULL; 1046 root->name = NULL;
1047 root->in_sysfs = 0;
1048 root->inode_tree = RB_ROOT; 1047 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1048 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1050 root->block_rsv = NULL; 1049 root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
1300 return root; 1299 return root;
1301 1300
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); 1301 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), 1302 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS); 1303 GFP_NOFS);
1307 if (!root->free_ino_pinned) 1304 if (!root->free_ino_pinned || !root->free_ino_ctl) {
1305 ret = -ENOMEM;
1308 goto fail; 1306 goto fail;
1307 }
1309 1308
1310 btrfs_init_free_ino_ctl(root); 1309 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex); 1310 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock); 1311 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait); 1312 init_waitqueue_head(&root->cache_wait);
1314 1313
1315 set_anon_super(&root->anon_super, NULL); 1314 ret = set_anon_super(&root->anon_super, NULL);
1315 if (ret)
1316 goto fail;
1316 1317
1317 if (btrfs_root_refs(&root->root_item) == 0) { 1318 if (btrfs_root_refs(&root->root_item) == 0) {
1318 ret = -ENOENT; 1319 ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1619 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1620 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock); 1621 spin_lock_init(&fs_info->defrag_inodes_lock);
1622 mutex_init(&fs_info->reloc_mutex);
1621 1623
1622 init_completion(&fs_info->kobj_unregister); 1624 init_completion(&fs_info->kobj_unregister);
1623 fs_info->tree_root = tree_root; 1625 fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2ded51..1f61bf5b4960 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3314 if (reserved == 0) 3314 if (reserved == 0)
3315 return 0; 3315 return 0;
3316 3316
3317 /* nothing to shrink - nothing to reclaim */
3318 if (root->fs_info->delalloc_bytes == 0)
3319 return 0;
3320
3321 max_reclaim = min(reserved, to_reclaim); 3317 max_reclaim = min(reserved, to_reclaim);
3322 3318
3323 while (loops < 1024) { 3319 while (loops < 1024) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 751ddf8fc58a..0a9b10c5b0a7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3076 ret = btrfs_update_inode(trans, root, dir); 3076 ret = btrfs_update_inode(trans, root, dir);
3077 BUG_ON(ret); 3077 BUG_ON(ret);
3078 3078
3079 btrfs_free_path(path);
3079 return 0; 3080 return 0;
3080} 3081}
3081 3082
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b793d112d1f6..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
483 BUG_ON(ret); 483 BUG_ON(ret);
484 484
485 spin_lock(&root->fs_info->trans_lock);
485 list_add(&pending_snapshot->list, 486 list_add(&pending_snapshot->list,
486 &trans->transaction->pending_snapshots); 487 &trans->transaction->pending_snapshots);
488 spin_unlock(&root->fs_info->trans_lock);
487 if (async_transid) { 489 if (async_transid) {
488 *async_transid = trans->transid; 490 *async_transid = trans->transid;
489 ret = btrfs_commit_transaction_async(trans, 491 ret = btrfs_commit_transaction_async(trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1368 int ret; 1368 int ret;
1369 1369
1370 if (!root->reloc_root) 1370 if (!root->reloc_root)
1371 return 0; 1371 goto out;
1372 1372
1373 reloc_root = root->reloc_root; 1373 reloc_root = root->reloc_root;
1374 root_item = &reloc_root->root_item; 1374 root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1390 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1390 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1391 &reloc_root->root_key, root_item); 1391 &reloc_root->root_key, root_item);
1392 BUG_ON(ret); 1392 BUG_ON(ret);
1393
1394out:
1393 return 0; 1395 return 0;
1394} 1396}
1395 1397
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2142 u64 num_bytes = 0; 2144 u64 num_bytes = 0;
2143 int ret; 2145 int ret;
2144 2146
2145 spin_lock(&root->fs_info->trans_lock); 2147 mutex_lock(&root->fs_info->reloc_mutex);
2146 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2148 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2147 rc->merging_rsv_size += rc->nodes_relocated * 2; 2149 rc->merging_rsv_size += rc->nodes_relocated * 2;
2148 spin_unlock(&root->fs_info->trans_lock); 2150 mutex_unlock(&root->fs_info->reloc_mutex);
2151
2149again: 2152again:
2150 if (!err) { 2153 if (!err) {
2151 num_bytes = rc->merging_rsv_size; 2154 num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
2214 int ret; 2217 int ret;
2215again: 2218again:
2216 root = rc->extent_root; 2219 root = rc->extent_root;
2217 spin_lock(&root->fs_info->trans_lock); 2220
2221 /*
2222 * this serializes us with btrfs_record_root_in_transaction,
2223 * we have to make sure nobody is in the middle of
2224 * adding their roots to the list while we are
2225 * doing this splice
2226 */
2227 mutex_lock(&root->fs_info->reloc_mutex);
2218 list_splice_init(&rc->reloc_roots, &reloc_roots); 2228 list_splice_init(&rc->reloc_roots, &reloc_roots);
2219 spin_unlock(&root->fs_info->trans_lock); 2229 mutex_unlock(&root->fs_info->reloc_mutex);
2220 2230
2221 while (!list_empty(&reloc_roots)) { 2231 while (!list_empty(&reloc_roots)) {
2222 found = 1; 2232 found = 1;
@@ -3590,17 +3600,19 @@ next:
3590static void set_reloc_control(struct reloc_control *rc) 3600static void set_reloc_control(struct reloc_control *rc)
3591{ 3601{
3592 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3602 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3593 spin_lock(&fs_info->trans_lock); 3603
3604 mutex_lock(&fs_info->reloc_mutex);
3594 fs_info->reloc_ctl = rc; 3605 fs_info->reloc_ctl = rc;
3595 spin_unlock(&fs_info->trans_lock); 3606 mutex_unlock(&fs_info->reloc_mutex);
3596} 3607}
3597 3608
3598static void unset_reloc_control(struct reloc_control *rc) 3609static void unset_reloc_control(struct reloc_control *rc)
3599{ 3610{
3600 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3611 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3601 spin_lock(&fs_info->trans_lock); 3612
3613 mutex_lock(&fs_info->reloc_mutex);
3602 fs_info->reloc_ctl = NULL; 3614 fs_info->reloc_ctl = NULL;
3603 spin_unlock(&fs_info->trans_lock); 3615 mutex_unlock(&fs_info->reloc_mutex);
3604} 3616}
3605 3617
3606static int check_extent_flags(u64 flags) 3618static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
28#include "disk-io.h" 28#include "disk-io.h"
29#include "transaction.h" 29#include "transaction.h"
30 30
31static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
32{
33 return snprintf(buf, PAGE_SIZE, "%llu\n",
34 (unsigned long long)btrfs_root_used(&root->root_item));
35}
36
37static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
38{
39 return snprintf(buf, PAGE_SIZE, "%llu\n",
40 (unsigned long long)btrfs_root_limit(&root->root_item));
41}
42
43static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
44{
45
46 return snprintf(buf, PAGE_SIZE, "%llu\n",
47 (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
48}
49
50static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
51{
52 return snprintf(buf, PAGE_SIZE, "%llu\n",
53 (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
54}
55
56static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
57{
58 return snprintf(buf, PAGE_SIZE, "%llu\n",
59 (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
60}
61
62/* this is for root attrs (subvols/snapshots) */
63struct btrfs_root_attr {
64 struct attribute attr;
65 ssize_t (*show)(struct btrfs_root *, char *);
66 ssize_t (*store)(struct btrfs_root *, const char *, size_t);
67};
68
69#define ROOT_ATTR(name, mode, show, store) \
70static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
71 show, store)
72
73ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
74ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
75
76static struct attribute *btrfs_root_attrs[] = {
77 &btrfs_root_attr_blocks_used.attr,
78 &btrfs_root_attr_block_limit.attr,
79 NULL,
80};
81
82/* this is for super attrs (actual full fs) */
83struct btrfs_super_attr {
84 struct attribute attr;
85 ssize_t (*show)(struct btrfs_fs_info *, char *);
86 ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
87};
88
89#define SUPER_ATTR(name, mode, show, store) \
90static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
91 show, store)
92
93SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
94SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
95SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
96
97static struct attribute *btrfs_super_attrs[] = {
98 &btrfs_super_attr_blocks_used.attr,
99 &btrfs_super_attr_total_blocks.attr,
100 &btrfs_super_attr_blocksize.attr,
101 NULL,
102};
103
104static ssize_t btrfs_super_attr_show(struct kobject *kobj,
105 struct attribute *attr, char *buf)
106{
107 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
108 super_kobj);
109 struct btrfs_super_attr *a = container_of(attr,
110 struct btrfs_super_attr,
111 attr);
112
113 return a->show ? a->show(fs, buf) : 0;
114}
115
116static ssize_t btrfs_super_attr_store(struct kobject *kobj,
117 struct attribute *attr,
118 const char *buf, size_t len)
119{
120 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
121 super_kobj);
122 struct btrfs_super_attr *a = container_of(attr,
123 struct btrfs_super_attr,
124 attr);
125
126 return a->store ? a->store(fs, buf, len) : 0;
127}
128
129static ssize_t btrfs_root_attr_show(struct kobject *kobj,
130 struct attribute *attr, char *buf)
131{
132 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
133 root_kobj);
134 struct btrfs_root_attr *a = container_of(attr,
135 struct btrfs_root_attr,
136 attr);
137
138 return a->show ? a->show(root, buf) : 0;
139}
140
141static ssize_t btrfs_root_attr_store(struct kobject *kobj,
142 struct attribute *attr,
143 const char *buf, size_t len)
144{
145 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
146 root_kobj);
147 struct btrfs_root_attr *a = container_of(attr,
148 struct btrfs_root_attr,
149 attr);
150 return a->store ? a->store(root, buf, len) : 0;
151}
152
153static void btrfs_super_release(struct kobject *kobj)
154{
155 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
156 super_kobj);
157 complete(&fs->kobj_unregister);
158}
159
160static void btrfs_root_release(struct kobject *kobj)
161{
162 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
163 root_kobj);
164 complete(&root->kobj_unregister);
165}
166
167static const struct sysfs_ops btrfs_super_attr_ops = {
168 .show = btrfs_super_attr_show,
169 .store = btrfs_super_attr_store,
170};
171
172static const struct sysfs_ops btrfs_root_attr_ops = {
173 .show = btrfs_root_attr_show,
174 .store = btrfs_root_attr_store,
175};
176
177/* /sys/fs/btrfs/ entry */ 31/* /sys/fs/btrfs/ entry */
178static struct kset *btrfs_kset; 32static struct kset *btrfs_kset;
179 33
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b9fe98..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
126 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
127 * when the transaction commits 127 * when the transaction commits
128 */ 128 */
129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 129static int record_root_in_trans(struct btrfs_trans_handle *trans,
130 struct btrfs_root *root) 130 struct btrfs_root *root)
131{ 131{
132 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
133 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
134 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
135 135
136 /*
137 * see below for in_trans_setup usage rules
138 * we have the reloc mutex held now, so there
139 * is only one writer in this function
140 */
141 root->in_trans_setup = 1;
142
143 /* make sure readers find in_trans_setup before
144 * they find our root->last_trans update
145 */
146 smp_wmb();
147
136 spin_lock(&root->fs_info->fs_roots_radix_lock); 148 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) { 149 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock); 150 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0; 151 return 0;
140 } 152 }
141 root->last_trans = trans->transid;
142 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 153 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
143 (unsigned long)root->root_key.objectid, 154 (unsigned long)root->root_key.objectid,
144 BTRFS_ROOT_TRANS_TAG); 155 BTRFS_ROOT_TRANS_TAG);
145 spin_unlock(&root->fs_info->fs_roots_radix_lock); 156 spin_unlock(&root->fs_info->fs_roots_radix_lock);
157 root->last_trans = trans->transid;
158
159 /* this is pretty tricky. We don't want to
160 * take the relocation lock in btrfs_record_root_in_trans
161 * unless we're really doing the first setup for this root in
162 * this transaction.
163 *
164 * Normally we'd use root->last_trans as a flag to decide
165 * if we want to take the expensive mutex.
166 *
167 * But, we have to set root->last_trans before we
168 * init the relocation root, otherwise, we trip over warnings
169 * in ctree.c. The solution used here is to flag ourselves
170 * with root->in_trans_setup. When this is 1, we're still
171 * fixing up the reloc trees and everyone must wait.
172 *
173 * When this is zero, they can trust root->last_trans and fly
174 * through btrfs_record_root_in_trans without having to take the
175 * lock. smp_wmb() makes sure that all the writes above are
176 * done before we pop in the zero below
177 */
146 btrfs_init_reloc_root(trans, root); 178 btrfs_init_reloc_root(trans, root);
179 smp_wmb();
180 root->in_trans_setup = 0;
147 } 181 }
148 return 0; 182 return 0;
149} 183}
150 184
185
186int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root)
188{
189 if (!root->ref_cows)
190 return 0;
191
192 /*
193 * see record_root_in_trans for comments about in_trans_setup usage
194 * and barriers
195 */
196 smp_rmb();
197 if (root->last_trans == trans->transid &&
198 !root->in_trans_setup)
199 return 0;
200
201 mutex_lock(&root->fs_info->reloc_mutex);
202 record_root_in_trans(trans, root);
203 mutex_unlock(&root->fs_info->reloc_mutex);
204
205 return 0;
206}
207
151/* wait for commit against the current transaction to become unblocked 208/* wait for commit against the current transaction to become unblocked
152 * when this is done, it is safe to start a new transaction, but the current 209 * when this is done, it is safe to start a new transaction, but the current
153 * transaction might not be fully on disk. 210 * transaction might not be fully on disk.
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
882 parent = dget_parent(dentry); 939 parent = dget_parent(dentry);
883 parent_inode = parent->d_inode; 940 parent_inode = parent->d_inode;
884 parent_root = BTRFS_I(parent_inode)->root; 941 parent_root = BTRFS_I(parent_inode)->root;
885 btrfs_record_root_in_trans(trans, parent_root); 942 record_root_in_trans(trans, parent_root);
886 943
887 /* 944 /*
888 * insert the directory item 945 * insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
900 ret = btrfs_update_inode(trans, parent_root, parent_inode); 957 ret = btrfs_update_inode(trans, parent_root, parent_inode);
901 BUG_ON(ret); 958 BUG_ON(ret);
902 959
903 btrfs_record_root_in_trans(trans, root); 960 /*
961 * pull in the delayed directory update
962 * and the delayed inode item
963 * otherwise we corrupt the FS during
964 * snapshot
965 */
966 ret = btrfs_run_delayed_items(trans, root);
967 BUG_ON(ret);
968
969 record_root_in_trans(trans, root);
904 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 970 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
905 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 971 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
906 btrfs_check_and_init_root_item(new_root_item); 972 btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
961 int ret; 1027 int ret;
962 1028
963 list_for_each_entry(pending, head, list) { 1029 list_for_each_entry(pending, head, list) {
964 /*
965 * We must deal with the delayed items before creating
966 * snapshots, or we will create a snapthot with inconsistent
967 * information.
968 */
969 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
970 BUG_ON(ret);
971
972 ret = create_pending_snapshot(trans, fs_info, pending); 1030 ret = create_pending_snapshot(trans, fs_info, pending);
973 BUG_ON(ret); 1031 BUG_ON(ret);
974 } 1032 }
@@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1241 schedule_timeout(1); 1299 schedule_timeout(1);
1242 1300
1243 finish_wait(&cur_trans->writer_wait, &wait); 1301 finish_wait(&cur_trans->writer_wait, &wait);
1244 spin_lock(&root->fs_info->trans_lock);
1245 root->fs_info->trans_no_join = 1;
1246 spin_unlock(&root->fs_info->trans_lock);
1247 } while (atomic_read(&cur_trans->num_writers) > 1 || 1302 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1248 (should_grow && cur_trans->num_joined != joined)); 1303 (should_grow && cur_trans->num_joined != joined));
1249 1304
1250 ret = create_pending_snapshots(trans, root->fs_info); 1305 /*
1251 BUG_ON(ret); 1306 * Ok now we need to make sure to block out any other joins while we
1307 * commit the transaction. We could have started a join before setting
1308 * no_join so make sure to wait for num_writers to == 1 again.
1309 */
1310 spin_lock(&root->fs_info->trans_lock);
1311 root->fs_info->trans_no_join = 1;
1312 spin_unlock(&root->fs_info->trans_lock);
1313 wait_event(cur_trans->writer_wait,
1314 atomic_read(&cur_trans->num_writers) == 1);
1315
1316 /*
1317 * the reloc mutex makes sure that we stop
1318 * the balancing code from coming in and moving
1319 * extents around in the middle of the commit
1320 */
1321 mutex_lock(&root->fs_info->reloc_mutex);
1252 1322
1253 ret = btrfs_run_delayed_items(trans, root); 1323 ret = btrfs_run_delayed_items(trans, root);
1254 BUG_ON(ret); 1324 BUG_ON(ret);
1255 1325
1326 ret = create_pending_snapshots(trans, root->fs_info);
1327 BUG_ON(ret);
1328
1256 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1329 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1257 BUG_ON(ret); 1330 BUG_ON(ret);
1258 1331
1332 /*
1333 * make sure none of the code above managed to slip in a
1334 * delayed item
1335 */
1336 btrfs_assert_delayed_root_empty(root);
1337
1259 WARN_ON(cur_trans != trans->transaction); 1338 WARN_ON(cur_trans != trans->transaction);
1260 1339
1261 btrfs_scrub_pause(root); 1340 btrfs_scrub_pause(root);
@@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1312 root->fs_info->running_transaction = NULL; 1391 root->fs_info->running_transaction = NULL;
1313 root->fs_info->trans_no_join = 0; 1392 root->fs_info->trans_no_join = 0;
1314 spin_unlock(&root->fs_info->trans_lock); 1393 spin_unlock(&root->fs_info->trans_lock);
1394 mutex_unlock(&root->fs_info->reloc_mutex);
1315 1395
1316 wake_up(&root->fs_info->transaction_wait); 1396 wake_up(&root->fs_info->transaction_wait);
1317 1397
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
3177 tmp_key.offset = (u64)-1; 3177 tmp_key.offset = (u64)-1;
3178 3178
3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
3180 BUG_ON(!wc.replay_dest); 3180 BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
3181 3181
3182 wc.replay_dest->log_root = log; 3182 wc.replay_dest->log_root = log;
3183 btrfs_record_root_in_trans(trans, wc.replay_dest); 3183 btrfs_record_root_in_trans(trans, wc.replay_dest);