aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-06-30 01:23:59 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-06-30 01:23:59 -0400
commit6da49a292534d31ca391928ea97f8225774dc1e0 (patch)
tree060c5f5f40dceb10d4528763bc7fc1ccc88210d9 /fs/btrfs
parent17bdc6c0e979ae61879806e4dd93ec3b169d0931 (diff)
parentca56a95eedcc95f8fea7b49c87565cd961d74fe2 (diff)
Merge remote branch 'origin/master' into next
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h16
-rw-r--r--fs/btrfs/delayed-inode.c136
-rw-r--r--fs/btrfs/delayed-inode.h6
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c63
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c172
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/btrfs/ioctl.c25
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/scrub.c69
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c121
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c8
16 files changed, 507 insertions, 334 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d84089349c82..2e667868e0d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root,
1228 u32 nr; 1228 u32 nr;
1229 u32 blocksize; 1229 u32 blocksize;
1230 u32 nscan = 0; 1230 u32 nscan = 0;
1231 bool map = true;
1231 1232
1232 if (level != 1) 1233 if (level != 1)
1233 return; 1234 return;
@@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root,
1249 1250
1250 nritems = btrfs_header_nritems(node); 1251 nritems = btrfs_header_nritems(node);
1251 nr = slot; 1252 nr = slot;
1253 if (node->map_token || path->skip_locking)
1254 map = false;
1255
1252 while (1) { 1256 while (1) {
1253 if (!node->map_token) { 1257 if (map && !node->map_token) {
1254 unsigned long offset = btrfs_node_key_ptr_offset(nr); 1258 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1255 map_private_extent_buffer(node, offset, 1259 map_private_extent_buffer(node, offset,
1256 sizeof(struct btrfs_key_ptr), 1260 sizeof(struct btrfs_key_ptr),
@@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root,
1277 if ((search <= target && target - search <= 65536) || 1281 if ((search <= target && target - search <= 65536) ||
1278 (search > target && search - target <= 65536)) { 1282 (search > target && search - target <= 65536)) {
1279 gen = btrfs_node_ptr_generation(node, nr); 1283 gen = btrfs_node_ptr_generation(node, nr);
1280 if (node->map_token) { 1284 if (map && node->map_token) {
1281 unmap_extent_buffer(node, node->map_token, 1285 unmap_extent_buffer(node, node->map_token,
1282 KM_USER1); 1286 KM_USER1);
1283 node->map_token = NULL; 1287 node->map_token = NULL;
@@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root,
1289 if ((nread > 65536 || nscan > 32)) 1293 if ((nread > 65536 || nscan > 32))
1290 break; 1294 break;
1291 } 1295 }
1292 if (node->map_token) { 1296 if (map && node->map_token) {
1293 unmap_extent_buffer(node, node->map_token, KM_USER1); 1297 unmap_extent_buffer(node, node->map_token, KM_USER1);
1294 node->map_token = NULL; 1298 node->map_token = NULL;
1295 } 1299 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..f30ac05dbda7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,7 +19,6 @@
19#ifndef __BTRFS_CTREE__ 19#ifndef __BTRFS_CTREE__
20#define __BTRFS_CTREE__ 20#define __BTRFS_CTREE__
21 21
22#include <linux/version.h>
23#include <linux/mm.h> 22#include <linux/mm.h>
24#include <linux/highmem.h> 23#include <linux/highmem.h>
25#include <linux/fs.h> 24#include <linux/fs.h>
@@ -967,6 +966,12 @@ struct btrfs_fs_info {
967 struct srcu_struct subvol_srcu; 966 struct srcu_struct subvol_srcu;
968 967
969 spinlock_t trans_lock; 968 spinlock_t trans_lock;
969 /*
970 * the reloc mutex goes with the trans lock, it is taken
971 * during commit to protect us from the relocation code
972 */
973 struct mutex reloc_mutex;
974
970 struct list_head trans_list; 975 struct list_head trans_list;
971 struct list_head hashers; 976 struct list_head hashers;
972 struct list_head dead_roots; 977 struct list_head dead_roots;
@@ -1172,6 +1177,14 @@ struct btrfs_root {
1172 u32 type; 1177 u32 type;
1173 1178
1174 u64 highest_objectid; 1179 u64 highest_objectid;
1180
1181 /* btrfs_record_root_in_trans is a multi-step process,
1182 * and it can race with the balancing code. But the
1183 * race is very small, and only the first time the root
1184 * is added to each transaction. So in_trans_setup
1185 * is used to tell us when more checks are required
1186 */
1187 unsigned long in_trans_setup;
1175 int ref_cows; 1188 int ref_cows;
1176 int track_dirty; 1189 int track_dirty;
1177 int in_radix; 1190 int in_radix;
@@ -1181,7 +1194,6 @@ struct btrfs_root {
1181 struct btrfs_key defrag_max; 1194 struct btrfs_key defrag_max;
1182 int defrag_running; 1195 int defrag_running;
1183 char *name; 1196 char *name;
1184 int in_sysfs;
1185 1197
1186 /* the dirty list is only used by non-reference counted roots */ 1198 /* the dirty list is only used by non-reference counted roots */
1187 struct list_head dirty_list; 1199 struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..98c68e658a9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
82 return root->fs_info->delayed_root; 82 return root->fs_info->delayed_root;
83} 83}
84 84
85static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( 85static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
86 struct inode *inode)
87{ 86{
88 struct btrfs_delayed_node *node;
89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode); 87 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90 struct btrfs_root *root = btrfs_inode->root; 88 struct btrfs_root *root = btrfs_inode->root;
91 u64 ino = btrfs_ino(inode); 89 u64 ino = btrfs_ino(inode);
92 int ret; 90 struct btrfs_delayed_node *node;
93 91
94again:
95 node = ACCESS_ONCE(btrfs_inode->delayed_node); 92 node = ACCESS_ONCE(btrfs_inode->delayed_node);
96 if (node) { 93 if (node) {
97 atomic_inc(&node->refs); /* can be accessed */ 94 atomic_inc(&node->refs);
98 return node; 95 return node;
99 } 96 }
100 97
@@ -102,8 +99,10 @@ again:
102 node = radix_tree_lookup(&root->delayed_nodes_tree, ino); 99 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
103 if (node) { 100 if (node) {
104 if (btrfs_inode->delayed_node) { 101 if (btrfs_inode->delayed_node) {
102 atomic_inc(&node->refs); /* can be accessed */
103 BUG_ON(btrfs_inode->delayed_node != node);
105 spin_unlock(&root->inode_lock); 104 spin_unlock(&root->inode_lock);
106 goto again; 105 return node;
107 } 106 }
108 btrfs_inode->delayed_node = node; 107 btrfs_inode->delayed_node = node;
109 atomic_inc(&node->refs); /* can be accessed */ 108 atomic_inc(&node->refs); /* can be accessed */
@@ -113,6 +112,23 @@ again:
113 } 112 }
114 spin_unlock(&root->inode_lock); 113 spin_unlock(&root->inode_lock);
115 114
115 return NULL;
116}
117
118static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
119 struct inode *inode)
120{
121 struct btrfs_delayed_node *node;
122 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
123 struct btrfs_root *root = btrfs_inode->root;
124 u64 ino = btrfs_ino(inode);
125 int ret;
126
127again:
128 node = btrfs_get_delayed_node(inode);
129 if (node)
130 return node;
131
116 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); 132 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
117 if (!node) 133 if (!node)
118 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
@@ -297,7 +313,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
297 item->data_len = data_len; 313 item->data_len = data_len;
298 item->ins_or_del = 0; 314 item->ins_or_del = 0;
299 item->bytes_reserved = 0; 315 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL; 316 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1); 317 atomic_set(&item->refs, 1);
303 } 318 }
@@ -549,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
549 return next; 564 return next;
550} 565}
551 566
552static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
553 struct inode *inode)
554{
555 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
556 struct btrfs_delayed_node *delayed_node;
557
558 delayed_node = btrfs_inode->delayed_node;
559 if (delayed_node)
560 atomic_inc(&delayed_node->refs);
561
562 return delayed_node;
563}
564
565static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, 567static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
566 u64 root_id) 568 u64 root_id)
567{ 569{
@@ -593,10 +595,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
593 595
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 596 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 597 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) { 598 if (!ret)
597 item->bytes_reserved = num_bytes; 599 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600 600
601 return ret; 601 return ret;
602} 602}
@@ -604,10 +604,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, 604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item) 605 struct btrfs_delayed_item *item)
606{ 606{
607 struct btrfs_block_rsv *rsv;
608
607 if (!item->bytes_reserved) 609 if (!item->bytes_reserved)
608 return; 610 return;
609 611
610 btrfs_block_rsv_release(root, item->block_rsv, 612 rsv = &root->fs_info->global_block_rsv;
613 btrfs_block_rsv_release(root, rsv,
611 item->bytes_reserved); 614 item->bytes_reserved);
612} 615}
613 616
@@ -1014,6 +1017,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1014 struct btrfs_delayed_root *delayed_root; 1017 struct btrfs_delayed_root *delayed_root;
1015 struct btrfs_delayed_node *curr_node, *prev_node; 1018 struct btrfs_delayed_node *curr_node, *prev_node;
1016 struct btrfs_path *path; 1019 struct btrfs_path *path;
1020 struct btrfs_block_rsv *block_rsv;
1017 int ret = 0; 1021 int ret = 0;
1018 1022
1019 path = btrfs_alloc_path(); 1023 path = btrfs_alloc_path();
@@ -1021,6 +1025,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1021 return -ENOMEM; 1025 return -ENOMEM;
1022 path->leave_spinning = 1; 1026 path->leave_spinning = 1;
1023 1027
1028 block_rsv = trans->block_rsv;
1029 trans->block_rsv = &root->fs_info->global_block_rsv;
1030
1024 delayed_root = btrfs_get_delayed_root(root); 1031 delayed_root = btrfs_get_delayed_root(root);
1025 1032
1026 curr_node = btrfs_first_delayed_node(delayed_root); 1033 curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1052,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1045 } 1052 }
1046 1053
1047 btrfs_free_path(path); 1054 btrfs_free_path(path);
1055 trans->block_rsv = block_rsv;
1048 return ret; 1056 return ret;
1049} 1057}
1050 1058
@@ -1052,6 +1060,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1052 struct btrfs_delayed_node *node) 1060 struct btrfs_delayed_node *node)
1053{ 1061{
1054 struct btrfs_path *path; 1062 struct btrfs_path *path;
1063 struct btrfs_block_rsv *block_rsv;
1055 int ret; 1064 int ret;
1056 1065
1057 path = btrfs_alloc_path(); 1066 path = btrfs_alloc_path();
@@ -1059,6 +1068,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1059 return -ENOMEM; 1068 return -ENOMEM;
1060 path->leave_spinning = 1; 1069 path->leave_spinning = 1;
1061 1070
1071 block_rsv = trans->block_rsv;
1072 trans->block_rsv = &node->root->fs_info->global_block_rsv;
1073
1062 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1074 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1063 if (!ret) 1075 if (!ret)
1064 ret = btrfs_delete_delayed_items(trans, path, node->root, node); 1076 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1078,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1066 ret = btrfs_update_delayed_inode(trans, node->root, path, node); 1078 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1067 btrfs_free_path(path); 1079 btrfs_free_path(path);
1068 1080
1081 trans->block_rsv = block_rsv;
1069 return ret; 1082 return ret;
1070} 1083}
1071 1084
@@ -1116,6 +1129,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1116 struct btrfs_path *path; 1129 struct btrfs_path *path;
1117 struct btrfs_delayed_node *delayed_node = NULL; 1130 struct btrfs_delayed_node *delayed_node = NULL;
1118 struct btrfs_root *root; 1131 struct btrfs_root *root;
1132 struct btrfs_block_rsv *block_rsv;
1119 unsigned long nr = 0; 1133 unsigned long nr = 0;
1120 int need_requeue = 0; 1134 int need_requeue = 0;
1121 int ret; 1135 int ret;
@@ -1134,6 +1148,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1134 if (IS_ERR(trans)) 1148 if (IS_ERR(trans))
1135 goto free_path; 1149 goto free_path;
1136 1150
1151 block_rsv = trans->block_rsv;
1152 trans->block_rsv = &root->fs_info->global_block_rsv;
1153
1137 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); 1154 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1138 if (!ret) 1155 if (!ret)
1139 ret = btrfs_delete_delayed_items(trans, path, root, 1156 ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1193,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1176 1193
1177 nr = trans->blocks_used; 1194 nr = trans->blocks_used;
1178 1195
1196 trans->block_rsv = block_rsv;
1179 btrfs_end_transaction_dmeta(trans, root); 1197 btrfs_end_transaction_dmeta(trans, root);
1180 __btrfs_btree_balance_dirty(root, nr); 1198 __btrfs_btree_balance_dirty(root, nr);
1181free_path: 1199free_path:
@@ -1222,6 +1240,13 @@ again:
1222 return 0; 1240 return 0;
1223} 1241}
1224 1242
1243void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1244{
1245 struct btrfs_delayed_root *delayed_root;
1246 delayed_root = btrfs_get_delayed_root(root);
1247 WARN_ON(btrfs_first_delayed_node(delayed_root));
1248}
1249
1225void btrfs_balance_delayed_items(struct btrfs_root *root) 1250void btrfs_balance_delayed_items(struct btrfs_root *root)
1226{ 1251{
1227 struct btrfs_delayed_root *delayed_root; 1252 struct btrfs_delayed_root *delayed_root;
@@ -1382,8 +1407,7 @@ end:
1382 1407
1383int btrfs_inode_delayed_dir_index_count(struct inode *inode) 1408int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1384{ 1409{
1385 struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node; 1410 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1386 int ret = 0;
1387 1411
1388 if (!delayed_node) 1412 if (!delayed_node)
1389 return -ENOENT; 1413 return -ENOENT;
@@ -1393,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1393 * a new directory index is added into the delayed node and index_cnt 1417 * a new directory index is added into the delayed node and index_cnt
1394 * is updated now. So we needn't lock the delayed node. 1418 * is updated now. So we needn't lock the delayed node.
1395 */ 1419 */
1396 if (!delayed_node->index_cnt) 1420 if (!delayed_node->index_cnt) {
1421 btrfs_release_delayed_node(delayed_node);
1397 return -EINVAL; 1422 return -EINVAL;
1423 }
1398 1424
1399 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; 1425 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1400 return ret; 1426 btrfs_release_delayed_node(delayed_node);
1427 return 0;
1401} 1428}
1402 1429
1403void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 1430void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
@@ -1591,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1591 inode->i_ctime.tv_nsec); 1618 inode->i_ctime.tv_nsec);
1592} 1619}
1593 1620
1621int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1622{
1623 struct btrfs_delayed_node *delayed_node;
1624 struct btrfs_inode_item *inode_item;
1625 struct btrfs_timespec *tspec;
1626
1627 delayed_node = btrfs_get_delayed_node(inode);
1628 if (!delayed_node)
1629 return -ENOENT;
1630
1631 mutex_lock(&delayed_node->mutex);
1632 if (!delayed_node->inode_dirty) {
1633 mutex_unlock(&delayed_node->mutex);
1634 btrfs_release_delayed_node(delayed_node);
1635 return -ENOENT;
1636 }
1637
1638 inode_item = &delayed_node->inode_item;
1639
1640 inode->i_uid = btrfs_stack_inode_uid(inode_item);
1641 inode->i_gid = btrfs_stack_inode_gid(inode_item);
1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1643 inode->i_mode = btrfs_stack_inode_mode(inode_item);
1644 inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
1648 inode->i_rdev = 0;
1649 *rdev = btrfs_stack_inode_rdev(inode_item);
1650 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1651
1652 tspec = btrfs_inode_atime(inode_item);
1653 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1654 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1655
1656 tspec = btrfs_inode_mtime(inode_item);
1657 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1658 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1659
1660 tspec = btrfs_inode_ctime(inode_item);
1661 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1662 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1663
1664 inode->i_generation = BTRFS_I(inode)->generation;
1665 BTRFS_I(inode)->index_cnt = (u64)-1;
1666
1667 mutex_unlock(&delayed_node->mutex);
1668 btrfs_release_delayed_node(delayed_node);
1669 return 0;
1670}
1671
1594int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 1672int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode) 1673 struct btrfs_root *root, struct inode *inode)
1596{ 1674{
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..8d27af4bd8b9 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
75 struct list_head tree_list; /* used for batch insert/delete items */ 75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */ 76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved; 77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node; 78 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs; 79 atomic_t refs;
81 int ins_or_del; 80 int ins_or_del;
@@ -120,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
120 119
121int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 120int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
122 struct btrfs_root *root, struct inode *inode); 121 struct btrfs_root *root, struct inode *inode);
122int btrfs_fill_inode(struct inode *inode, u32 *rdev);
123 123
124/* Used for drop dead root */ 124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
@@ -138,4 +138,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
138/* for init */ 138/* for init */
139int __init btrfs_delayed_inode_init(void); 139int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void); 140void btrfs_delayed_inode_exit(void);
141
142/* for debugging */
143void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
144
141#endif 145#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d363184d..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1044 root->last_trans = 0; 1044 root->last_trans = 0;
1045 root->highest_objectid = 0; 1045 root->highest_objectid = 0;
1046 root->name = NULL; 1046 root->name = NULL;
1047 root->in_sysfs = 0;
1048 root->inode_tree = RB_ROOT; 1047 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1048 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1050 root->block_rsv = NULL; 1049 root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
1300 return root; 1299 return root;
1301 1300
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); 1301 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), 1302 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS); 1303 GFP_NOFS);
1307 if (!root->free_ino_pinned) 1304 if (!root->free_ino_pinned || !root->free_ino_ctl) {
1305 ret = -ENOMEM;
1308 goto fail; 1306 goto fail;
1307 }
1309 1308
1310 btrfs_init_free_ino_ctl(root); 1309 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex); 1310 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock); 1311 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait); 1312 init_waitqueue_head(&root->cache_wait);
1314 1313
1315 set_anon_super(&root->anon_super, NULL); 1314 ret = set_anon_super(&root->anon_super, NULL);
1315 if (ret)
1316 goto fail;
1316 1317
1317 if (btrfs_root_refs(&root->root_item) == 0) { 1318 if (btrfs_root_refs(&root->root_item) == 0) {
1318 ret = -ENOENT; 1319 ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1619 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1620 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock); 1621 spin_lock_init(&fs_info->defrag_inodes_lock);
1622 mutex_init(&fs_info->reloc_mutex);
1621 1623
1622 init_completion(&fs_info->kobj_unregister); 1624 init_completion(&fs_info->kobj_unregister);
1623 fs_info->tree_root = tree_root; 1625 fs_info->tree_root = tree_root;
@@ -1668,8 +1670,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1668 init_waitqueue_head(&fs_info->scrub_pause_wait); 1670 init_waitqueue_head(&fs_info->scrub_pause_wait);
1669 init_rwsem(&fs_info->scrub_super_lock); 1671 init_rwsem(&fs_info->scrub_super_lock);
1670 fs_info->scrub_workers_refcnt = 0; 1672 fs_info->scrub_workers_refcnt = 0;
1671 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1672 fs_info->thread_pool_size, &fs_info->generic_worker);
1673 1673
1674 sb->s_blocksize = 4096; 1674 sb->s_blocksize = 4096;
1675 sb->s_blocksize_bits = blksize_bits(4096); 1675 sb->s_blocksize_bits = blksize_bits(4096);
@@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2911 2911
2912 INIT_LIST_HEAD(&splice); 2912 INIT_LIST_HEAD(&splice);
2913 2913
2914 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2915
2916 spin_lock(&root->fs_info->delalloc_lock); 2914 spin_lock(&root->fs_info->delalloc_lock);
2915 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2917 2916
2918 while (!list_empty(&splice)) { 2917 while (!list_empty(&splice)) {
2919 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2918 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5b9b6b6df242..71cd456fdb60 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,6 +3089,13 @@ alloc:
3089 } 3089 }
3090 goto again; 3090 goto again;
3091 } 3091 }
3092
3093 /*
3094 * If we have less pinned bytes than we want to allocate then
3095 * don't bother committing the transaction, it won't help us.
3096 */
3097 if (data_sinfo->bytes_pinned < bytes)
3098 committed = 1;
3092 spin_unlock(&data_sinfo->lock); 3099 spin_unlock(&data_sinfo->lock);
3093 3100
3094 /* commit the current transaction and try again */ 3101 /* commit the current transaction and try again */
@@ -3307,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3307 if (reserved == 0) 3314 if (reserved == 0)
3308 return 0; 3315 return 0;
3309 3316
3310 /* nothing to shrink - nothing to reclaim */
3311 if (root->fs_info->delalloc_bytes == 0)
3312 return 0;
3313
3314 max_reclaim = min(reserved, to_reclaim); 3317 max_reclaim = min(reserved, to_reclaim);
3315 3318
3316 while (loops < 1024) { 3319 while (loops < 1024) {
@@ -4839,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4839 u64 num_bytes, u64 empty_size, 4842 u64 num_bytes, u64 empty_size,
4840 u64 search_start, u64 search_end, 4843 u64 search_start, u64 search_end,
4841 u64 hint_byte, struct btrfs_key *ins, 4844 u64 hint_byte, struct btrfs_key *ins,
4842 int data) 4845 u64 data)
4843{ 4846{
4844 int ret = 0; 4847 int ret = 0;
4845 struct btrfs_root *root = orig_root->fs_info->extent_root; 4848 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4866,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4866 4869
4867 space_info = __find_space_info(root->fs_info, data); 4870 space_info = __find_space_info(root->fs_info, data);
4868 if (!space_info) { 4871 if (!space_info) {
4869 printk(KERN_ERR "No space info for %d\n", data); 4872 printk(KERN_ERR "No space info for %llu\n", data);
4870 return -ENOSPC; 4873 return -ENOSPC;
4871 } 4874 }
4872 4875
@@ -5211,9 +5214,7 @@ loop:
5211 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5214 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
5212 * again 5215 * again
5213 */ 5216 */
5214 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5217 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5215 (found_uncached_bg || empty_size || empty_cluster ||
5216 allowed_chunk_alloc)) {
5217 index = 0; 5218 index = 0;
5218 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5219 found_uncached_bg = false; 5220 found_uncached_bg = false;
@@ -5253,32 +5254,36 @@ loop:
5253 goto search; 5254 goto search;
5254 } 5255 }
5255 5256
5256 if (loop < LOOP_CACHING_WAIT) { 5257 loop++;
5257 loop++;
5258 goto search;
5259 }
5260 5258
5261 if (loop == LOOP_ALLOC_CHUNK) { 5259 if (loop == LOOP_ALLOC_CHUNK) {
5262 empty_size = 0; 5260 if (allowed_chunk_alloc) {
5263 empty_cluster = 0; 5261 ret = do_chunk_alloc(trans, root, num_bytes +
5264 } 5262 2 * 1024 * 1024, data,
5263 CHUNK_ALLOC_LIMITED);
5264 allowed_chunk_alloc = 0;
5265 if (ret == 1)
5266 done_chunk_alloc = 1;
5267 } else if (!done_chunk_alloc &&
5268 space_info->force_alloc ==
5269 CHUNK_ALLOC_NO_FORCE) {
5270 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5271 }
5265 5272
5266 if (allowed_chunk_alloc) { 5273 /*
5267 ret = do_chunk_alloc(trans, root, num_bytes + 5274 * We didn't allocate a chunk, go ahead and drop the
5268 2 * 1024 * 1024, data, 5275 * empty size and loop again.
5269 CHUNK_ALLOC_LIMITED); 5276 */
5270 allowed_chunk_alloc = 0; 5277 if (!done_chunk_alloc)
5271 done_chunk_alloc = 1; 5278 loop = LOOP_NO_EMPTY_SIZE;
5272 } else if (!done_chunk_alloc &&
5273 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5274 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5275 } 5279 }
5276 5280
5277 if (loop < LOOP_NO_EMPTY_SIZE) { 5281 if (loop == LOOP_NO_EMPTY_SIZE) {
5278 loop++; 5282 empty_size = 0;
5279 goto search; 5283 empty_cluster = 0;
5280 } 5284 }
5281 ret = -ENOSPC; 5285
5286 goto search;
5282 } else if (!ins->objectid) { 5287 } else if (!ins->objectid) {
5283 ret = -ENOSPC; 5288 ret = -ENOSPC;
5284 } else if (ins->objectid) { 5289 } else if (ins->objectid) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..a11a92ee2d30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
126 unsigned long map_len; 126 unsigned long map_len;
127 struct page *first_page; 127 struct page *first_page;
128 unsigned long bflags; 128 unsigned long bflags;
129 atomic_t refs;
130 struct list_head leak_list; 129 struct list_head leak_list;
131 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 atomic_t refs;
132 132
133 /* the spinlock is used to protect most operations */ 133 /* the spinlock is used to protect most operations */
134 spinlock_t lock; 134 spinlock_t lock;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ad144736a5fd..bf0d61567f3d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
250 pgoff_t index = 0; 250 pgoff_t index = 0;
251 unsigned long first_page_offset; 251 unsigned long first_page_offset;
252 int num_checksums; 252 int num_checksums;
253 int ret = 0, ret2; 253 int ret = 0;
254 254
255 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
256 256
@@ -421,11 +421,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
421 goto free_cache; 421 goto free_cache;
422 } 422 }
423 spin_lock(&ctl->tree_lock); 423 spin_lock(&ctl->tree_lock);
424 ret2 = link_free_space(ctl, e); 424 ret = link_free_space(ctl, e);
425 ctl->total_bitmaps++; 425 ctl->total_bitmaps++;
426 ctl->op->recalc_thresholds(ctl); 426 ctl->op->recalc_thresholds(ctl);
427 spin_unlock(&ctl->tree_lock); 427 spin_unlock(&ctl->tree_lock);
428 list_add_tail(&e->list, &bitmaps);
429 if (ret) { 428 if (ret) {
430 printk(KERN_ERR "Duplicate entries in " 429 printk(KERN_ERR "Duplicate entries in "
431 "free space cache, dumping\n"); 430 "free space cache, dumping\n");
@@ -434,6 +433,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
434 page_cache_release(page); 433 page_cache_release(page);
435 goto free_cache; 434 goto free_cache;
436 } 435 }
436 list_add_tail(&e->list, &bitmaps);
437 } 437 }
438 438
439 num_entries--; 439 num_entries--;
@@ -1417,6 +1417,23 @@ again:
1417 return 0; 1417 return 0;
1418} 1418}
1419 1419
1420static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info, u64 offset,
1422 u64 bytes)
1423{
1424 u64 bytes_to_set = 0;
1425 u64 end;
1426
1427 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1428
1429 bytes_to_set = min(end - offset, bytes);
1430
1431 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1432
1433 return bytes_to_set;
1434
1435}
1436
1420static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1437static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info) 1438 struct btrfs_free_space *info)
1422{ 1439{
@@ -1453,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1453 return true; 1470 return true;
1454} 1471}
1455 1472
1473static struct btrfs_free_space_op free_space_op = {
1474 .recalc_thresholds = recalculate_thresholds,
1475 .use_bitmap = use_bitmap,
1476};
1477
1456static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1478static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1457 struct btrfs_free_space *info) 1479 struct btrfs_free_space *info)
1458{ 1480{
1459 struct btrfs_free_space *bitmap_info; 1481 struct btrfs_free_space *bitmap_info;
1482 struct btrfs_block_group_cache *block_group = NULL;
1460 int added = 0; 1483 int added = 0;
1461 u64 bytes, offset, end; 1484 u64 bytes, offset, bytes_added;
1462 int ret; 1485 int ret;
1463 1486
1464 bytes = info->bytes; 1487 bytes = info->bytes;
@@ -1467,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1467 if (!ctl->op->use_bitmap(ctl, info)) 1490 if (!ctl->op->use_bitmap(ctl, info))
1468 return 0; 1491 return 0;
1469 1492
1493 if (ctl->op == &free_space_op)
1494 block_group = ctl->private;
1470again: 1495again:
1496 /*
1497 * Since we link bitmaps right into the cluster we need to see if we
1498 * have a cluster here, and if so and it has our bitmap we need to add
1499 * the free space to that bitmap.
1500 */
1501 if (block_group && !list_empty(&block_group->cluster_list)) {
1502 struct btrfs_free_cluster *cluster;
1503 struct rb_node *node;
1504 struct btrfs_free_space *entry;
1505
1506 cluster = list_entry(block_group->cluster_list.next,
1507 struct btrfs_free_cluster,
1508 block_group_list);
1509 spin_lock(&cluster->lock);
1510 node = rb_first(&cluster->root);
1511 if (!node) {
1512 spin_unlock(&cluster->lock);
1513 goto no_cluster_bitmap;
1514 }
1515
1516 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1517 if (!entry->bitmap) {
1518 spin_unlock(&cluster->lock);
1519 goto no_cluster_bitmap;
1520 }
1521
1522 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1523 bytes_added = add_bytes_to_bitmap(ctl, entry,
1524 offset, bytes);
1525 bytes -= bytes_added;
1526 offset += bytes_added;
1527 }
1528 spin_unlock(&cluster->lock);
1529 if (!bytes) {
1530 ret = 1;
1531 goto out;
1532 }
1533 }
1534
1535no_cluster_bitmap:
1471 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1536 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1472 1, 0); 1537 1, 0);
1473 if (!bitmap_info) { 1538 if (!bitmap_info) {
@@ -1475,19 +1540,10 @@ again:
1475 goto new_bitmap; 1540 goto new_bitmap;
1476 } 1541 }
1477 1542
1478 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1543 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1479 1544 bytes -= bytes_added;
1480 if (offset >= bitmap_info->offset && offset + bytes > end) { 1545 offset += bytes_added;
1481 bitmap_set_bits(ctl, bitmap_info, offset, end - offset); 1546 added = 0;
1482 bytes -= end - offset;
1483 offset = end;
1484 added = 0;
1485 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1486 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1487 bytes = 0;
1488 } else {
1489 BUG();
1490 }
1491 1547
1492 if (!bytes) { 1548 if (!bytes) {
1493 ret = 1; 1549 ret = 1;
@@ -1766,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1766 "\n", count); 1822 "\n", count);
1767} 1823}
1768 1824
1769static struct btrfs_free_space_op free_space_op = {
1770 .recalc_thresholds = recalculate_thresholds,
1771 .use_bitmap = use_bitmap,
1772};
1773
1774void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1825void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1775{ 1826{
1776 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1827 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -1842,9 +1893,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1842 1893
1843 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 1894 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1844 info = rb_entry(node, struct btrfs_free_space, offset_index); 1895 info = rb_entry(node, struct btrfs_free_space, offset_index);
1845 unlink_free_space(ctl, info); 1896 if (!info->bitmap) {
1846 kfree(info->bitmap); 1897 unlink_free_space(ctl, info);
1847 kmem_cache_free(btrfs_free_space_cachep, info); 1898 kmem_cache_free(btrfs_free_space_cachep, info);
1899 } else {
1900 free_bitmap(ctl, info);
1901 }
1848 if (need_resched()) { 1902 if (need_resched()) {
1849 spin_unlock(&ctl->tree_lock); 1903 spin_unlock(&ctl->tree_lock);
1850 cond_resched(); 1904 cond_resched();
@@ -2142,9 +2196,11 @@ again:
2142/* 2196/*
2143 * This searches the block group for just extents to fill the cluster with. 2197 * This searches the block group for just extents to fill the cluster with.
2144 */ 2198 */
2145static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2199static noinline int
2146 struct btrfs_free_cluster *cluster, 2200setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2147 u64 offset, u64 bytes, u64 min_bytes) 2201 struct btrfs_free_cluster *cluster,
2202 struct list_head *bitmaps, u64 offset, u64 bytes,
2203 u64 min_bytes)
2148{ 2204{
2149 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2205 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2150 struct btrfs_free_space *first = NULL; 2206 struct btrfs_free_space *first = NULL;
@@ -2166,6 +2222,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2166 * extent entry. 2222 * extent entry.
2167 */ 2223 */
2168 while (entry->bitmap) { 2224 while (entry->bitmap) {
2225 if (list_empty(&entry->list))
2226 list_add_tail(&entry->list, bitmaps);
2169 node = rb_next(&entry->offset_index); 2227 node = rb_next(&entry->offset_index);
2170 if (!node) 2228 if (!node)
2171 return -ENOSPC; 2229 return -ENOSPC;
@@ -2185,8 +2243,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2185 return -ENOSPC; 2243 return -ENOSPC;
2186 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2244 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2187 2245
2188 if (entry->bitmap) 2246 if (entry->bitmap) {
2247 if (list_empty(&entry->list))
2248 list_add_tail(&entry->list, bitmaps);
2189 continue; 2249 continue;
2250 }
2251
2190 /* 2252 /*
2191 * we haven't filled the empty size and the window is 2253 * we haven't filled the empty size and the window is
2192 * very large. reset and try again 2254 * very large. reset and try again
@@ -2238,9 +2300,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2238 * This specifically looks for bitmaps that may work in the cluster, we assume 2300 * This specifically looks for bitmaps that may work in the cluster, we assume
2239 * that we have already failed to find extents that will work. 2301 * that we have already failed to find extents that will work.
2240 */ 2302 */
2241static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2303static noinline int
2242 struct btrfs_free_cluster *cluster, 2304setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2243 u64 offset, u64 bytes, u64 min_bytes) 2305 struct btrfs_free_cluster *cluster,
2306 struct list_head *bitmaps, u64 offset, u64 bytes,
2307 u64 min_bytes)
2244{ 2308{
2245 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2309 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2246 struct btrfs_free_space *entry; 2310 struct btrfs_free_space *entry;
@@ -2250,10 +2314,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2250 if (ctl->total_bitmaps == 0) 2314 if (ctl->total_bitmaps == 0)
2251 return -ENOSPC; 2315 return -ENOSPC;
2252 2316
2317 /*
2318 * First check our cached list of bitmaps and see if there is an entry
2319 * here that will work.
2320 */
2321 list_for_each_entry(entry, bitmaps, list) {
2322 if (entry->bytes < min_bytes)
2323 continue;
2324 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2325 bytes, min_bytes);
2326 if (!ret)
2327 return 0;
2328 }
2329
2330 /*
2331 * If we do have entries on our list and we are here then we didn't find
2332 * anything, so go ahead and get the next entry after the last entry in
2333 * this list and start the search from there.
2334 */
2335 if (!list_empty(bitmaps)) {
2336 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2337 list);
2338 node = rb_next(&entry->offset_index);
2339 if (!node)
2340 return -ENOSPC;
2341 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2342 goto search;
2343 }
2344
2253 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2345 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2254 if (!entry) 2346 if (!entry)
2255 return -ENOSPC; 2347 return -ENOSPC;
2256 2348
2349search:
2257 node = &entry->offset_index; 2350 node = &entry->offset_index;
2258 do { 2351 do {
2259 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2352 entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2284,6 +2377,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2284 u64 offset, u64 bytes, u64 empty_size) 2377 u64 offset, u64 bytes, u64 empty_size)
2285{ 2378{
2286 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2379 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2380 struct list_head bitmaps;
2381 struct btrfs_free_space *entry, *tmp;
2287 u64 min_bytes; 2382 u64 min_bytes;
2288 int ret; 2383 int ret;
2289 2384
@@ -2322,11 +2417,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2322 goto out; 2417 goto out;
2323 } 2418 }
2324 2419
2325 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, 2420 INIT_LIST_HEAD(&bitmaps);
2326 min_bytes); 2421 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2422 bytes, min_bytes);
2327 if (ret) 2423 if (ret)
2328 ret = setup_cluster_bitmap(block_group, cluster, offset, 2424 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2329 bytes, min_bytes); 2425 offset, bytes, min_bytes);
2426
2427 /* Clear our temporary list */
2428 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2429 list_del_init(&entry->list);
2330 2430
2331 if (!ret) { 2431 if (!ret) {
2332 atomic_inc(&block_group->count); 2432 atomic_inc(&block_group->count);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ebf95f7a44d6..d340f63d8f07 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1986,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1986 } 1986 }
1987 1987
1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
1989 return 0; 1989 goto good;
1990 1990
1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { 1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
2509 int maybe_acls; 2509 int maybe_acls;
2510 u32 rdev; 2510 u32 rdev;
2511 int ret; 2511 int ret;
2512 bool filled = false;
2513
2514 ret = btrfs_fill_inode(inode, &rdev);
2515 if (!ret)
2516 filled = true;
2512 2517
2513 path = btrfs_alloc_path(); 2518 path = btrfs_alloc_path();
2514 BUG_ON(!path); 2519 BUG_ON(!path);
@@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode)
2520 goto make_bad; 2525 goto make_bad;
2521 2526
2522 leaf = path->nodes[0]; 2527 leaf = path->nodes[0];
2528
2529 if (filled)
2530 goto cache_acl;
2531
2523 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2532 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2524 struct btrfs_inode_item); 2533 struct btrfs_inode_item);
2525 if (!leaf->map_token) 2534 if (!leaf->map_token)
@@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
2556 2565
2557 BTRFS_I(inode)->index_cnt = (u64)-1; 2566 BTRFS_I(inode)->index_cnt = (u64)-1;
2558 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2567 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2559 2568cache_acl:
2560 /* 2569 /*
2561 * try to precache a NULL acl entry for files that don't have 2570 * try to precache a NULL acl entry for files that don't have
2562 * any xattrs or acls 2571 * any xattrs or acls
@@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2572 } 2581 }
2573 2582
2574 btrfs_free_path(path); 2583 btrfs_free_path(path);
2575 inode_item = NULL;
2576 2584
2577 switch (inode->i_mode & S_IFMT) { 2585 switch (inode->i_mode & S_IFMT) {
2578 case S_IFREG: 2586 case S_IFREG:
@@ -3076,6 +3084,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3076 ret = btrfs_update_inode(trans, root, dir); 3084 ret = btrfs_update_inode(trans, root, dir);
3077 BUG_ON(ret); 3085 BUG_ON(ret);
3078 3086
3087 btrfs_free_path(path);
3079 return 0; 3088 return 0;
3080} 3089}
3081 3090
@@ -3646,7 +3655,7 @@ void btrfs_evict_inode(struct inode *inode)
3646 btrfs_i_size_write(inode, 0); 3655 btrfs_i_size_write(inode, 0);
3647 3656
3648 while (1) { 3657 while (1) {
3649 trans = btrfs_start_transaction(root, 0); 3658 trans = btrfs_join_transaction(root);
3650 BUG_ON(IS_ERR(trans)); 3659 BUG_ON(IS_ERR(trans));
3651 trans->block_rsv = root->orphan_block_rsv; 3660 trans->block_rsv = root->orphan_block_rsv;
3652 3661
@@ -4519,6 +4528,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4519 inode_tree_add(inode); 4528 inode_tree_add(inode);
4520 4529
4521 trace_btrfs_inode_new(inode); 4530 trace_btrfs_inode_new(inode);
4531 btrfs_set_inode_last_trans(trans, inode);
4522 4532
4523 return inode; 4533 return inode;
4524fail: 4534fail:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ac37040e426a..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
483 BUG_ON(ret); 483 BUG_ON(ret);
484 484
485 spin_lock(&root->fs_info->trans_lock);
485 list_add(&pending_snapshot->list, 486 list_add(&pending_snapshot->list,
486 &trans->transaction->pending_snapshots); 487 &trans->transaction->pending_snapshots);
488 spin_unlock(&root->fs_info->trans_lock);
487 if (async_transid) { 489 if (async_transid) {
488 *async_transid = trans->transid; 490 *async_transid = trans->transid;
489 ret = btrfs_commit_transaction_async(trans, 491 ret = btrfs_commit_transaction_async(trans,
@@ -2054,29 +2056,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2054 2056
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2057static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{ 2058{
2057 struct btrfs_ioctl_fs_info_args fi_args; 2059 struct btrfs_ioctl_fs_info_args *fi_args;
2058 struct btrfs_device *device; 2060 struct btrfs_device *device;
2059 struct btrfs_device *next; 2061 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2062 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2063 int ret = 0;
2061 2064
2062 if (!capable(CAP_SYS_ADMIN)) 2065 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM; 2066 return -EPERM;
2064 2067
2065 fi_args.num_devices = fs_devices->num_devices; 2068 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2066 fi_args.max_id = 0; 2069 if (!fi_args)
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); 2070 return -ENOMEM;
2071
2072 fi_args->num_devices = fs_devices->num_devices;
2073 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2068 2074
2069 mutex_lock(&fs_devices->device_list_mutex); 2075 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2076 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id) 2077 if (device->devid > fi_args->max_id)
2072 fi_args.max_id = device->devid; 2078 fi_args->max_id = device->devid;
2073 } 2079 }
2074 mutex_unlock(&fs_devices->device_list_mutex); 2080 mutex_unlock(&fs_devices->device_list_mutex);
2075 2081
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args))) 2082 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2077 return -EFAULT; 2083 ret = -EFAULT;
2078 2084
2079 return 0; 2085 kfree(fi_args);
2086 return ret;
2080} 2087}
2081 2088
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2089static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1368 int ret; 1368 int ret;
1369 1369
1370 if (!root->reloc_root) 1370 if (!root->reloc_root)
1371 return 0; 1371 goto out;
1372 1372
1373 reloc_root = root->reloc_root; 1373 reloc_root = root->reloc_root;
1374 root_item = &reloc_root->root_item; 1374 root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1390 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1390 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1391 &reloc_root->root_key, root_item); 1391 &reloc_root->root_key, root_item);
1392 BUG_ON(ret); 1392 BUG_ON(ret);
1393
1394out:
1393 return 0; 1395 return 0;
1394} 1396}
1395 1397
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2142 u64 num_bytes = 0; 2144 u64 num_bytes = 0;
2143 int ret; 2145 int ret;
2144 2146
2145 spin_lock(&root->fs_info->trans_lock); 2147 mutex_lock(&root->fs_info->reloc_mutex);
2146 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2148 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2147 rc->merging_rsv_size += rc->nodes_relocated * 2; 2149 rc->merging_rsv_size += rc->nodes_relocated * 2;
2148 spin_unlock(&root->fs_info->trans_lock); 2150 mutex_unlock(&root->fs_info->reloc_mutex);
2151
2149again: 2152again:
2150 if (!err) { 2153 if (!err) {
2151 num_bytes = rc->merging_rsv_size; 2154 num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
2214 int ret; 2217 int ret;
2215again: 2218again:
2216 root = rc->extent_root; 2219 root = rc->extent_root;
2217 spin_lock(&root->fs_info->trans_lock); 2220
2221 /*
2222 * this serializes us with btrfs_record_root_in_transaction,
2223 * we have to make sure nobody is in the middle of
2224 * adding their roots to the list while we are
2225 * doing this splice
2226 */
2227 mutex_lock(&root->fs_info->reloc_mutex);
2218 list_splice_init(&rc->reloc_roots, &reloc_roots); 2228 list_splice_init(&rc->reloc_roots, &reloc_roots);
2219 spin_unlock(&root->fs_info->trans_lock); 2229 mutex_unlock(&root->fs_info->reloc_mutex);
2220 2230
2221 while (!list_empty(&reloc_roots)) { 2231 while (!list_empty(&reloc_roots)) {
2222 found = 1; 2232 found = 1;
@@ -3590,17 +3600,19 @@ next:
3590static void set_reloc_control(struct reloc_control *rc) 3600static void set_reloc_control(struct reloc_control *rc)
3591{ 3601{
3592 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3602 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3593 spin_lock(&fs_info->trans_lock); 3603
3604 mutex_lock(&fs_info->reloc_mutex);
3594 fs_info->reloc_ctl = rc; 3605 fs_info->reloc_ctl = rc;
3595 spin_unlock(&fs_info->trans_lock); 3606 mutex_unlock(&fs_info->reloc_mutex);
3596} 3607}
3597 3608
3598static void unset_reloc_control(struct reloc_control *rc) 3609static void unset_reloc_control(struct reloc_control *rc)
3599{ 3610{
3600 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3611 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3601 spin_lock(&fs_info->trans_lock); 3612
3613 mutex_lock(&fs_info->reloc_mutex);
3602 fs_info->reloc_ctl = NULL; 3614 fs_info->reloc_ctl = NULL;
3603 spin_unlock(&fs_info->trans_lock); 3615 mutex_unlock(&fs_info->reloc_mutex);
3604} 3616}
3605 3617
3606static int check_extent_flags(u64 flags) 3618static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index df50fd1eca8f..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h> 19#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h" 20#include "ctree.h"
27#include "volumes.h" 21#include "volumes.h"
28#include "disk-io.h" 22#include "disk-io.h"
@@ -804,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
804 798
805 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 799 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
806 if (ret < 0) 800 if (ret < 0)
807 goto out; 801 goto out_noplug;
808
809 l = path->nodes[0];
810 slot = path->slots[0];
811 btrfs_item_key_to_cpu(l, &key, slot);
812 if (key.objectid != logical) {
813 ret = btrfs_previous_item(root, path, 0,
814 BTRFS_EXTENT_ITEM_KEY);
815 if (ret < 0)
816 goto out;
817 }
818 802
803 /*
804 * we might miss half an extent here, but that doesn't matter,
805 * as it's only the prefetch
806 */
819 while (1) { 807 while (1) {
820 l = path->nodes[0]; 808 l = path->nodes[0];
821 slot = path->slots[0]; 809 slot = path->slots[0];
@@ -824,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
824 if (ret == 0) 812 if (ret == 0)
825 continue; 813 continue;
826 if (ret < 0) 814 if (ret < 0)
827 goto out; 815 goto out_noplug;
828 816
829 break; 817 break;
830 } 818 }
@@ -906,15 +894,20 @@ again:
906 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
907 if (ret < 0) 895 if (ret < 0)
908 goto out; 896 goto out;
909 897 if (ret > 0) {
910 l = path->nodes[0];
911 slot = path->slots[0];
912 btrfs_item_key_to_cpu(l, &key, slot);
913 if (key.objectid != logical) {
914 ret = btrfs_previous_item(root, path, 0, 898 ret = btrfs_previous_item(root, path, 0,
915 BTRFS_EXTENT_ITEM_KEY); 899 BTRFS_EXTENT_ITEM_KEY);
916 if (ret < 0) 900 if (ret < 0)
917 goto out; 901 goto out;
902 if (ret > 0) {
903 /* there's no smaller item, so stick with the
904 * larger one */
905 btrfs_release_path(path);
906 ret = btrfs_search_slot(NULL, root, &key,
907 path, 0, 0);
908 if (ret < 0)
909 goto out;
910 }
918 } 911 }
919 912
920 while (1) { 913 while (1) {
@@ -989,6 +982,7 @@ next:
989 982
990out: 983out:
991 blk_finish_plug(&plug); 984 blk_finish_plug(&plug);
985out_noplug:
992 btrfs_free_path(path); 986 btrfs_free_path(path);
993 return ret < 0 ? ret : 0; 987 return ret < 0 ? ret : 0;
994} 988}
@@ -1064,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1064 while (1) { 1058 while (1) {
1065 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1066 if (ret < 0) 1060 if (ret < 0)
1067 goto out; 1061 break;
1068 ret = 0; 1062 if (ret > 0) {
1063 if (path->slots[0] >=
1064 btrfs_header_nritems(path->nodes[0])) {
1065 ret = btrfs_next_leaf(root, path);
1066 if (ret)
1067 break;
1068 }
1069 }
1069 1070
1070 l = path->nodes[0]; 1071 l = path->nodes[0];
1071 slot = path->slots[0]; 1072 slot = path->slots[0];
@@ -1075,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1075 if (found_key.objectid != sdev->dev->devid) 1076 if (found_key.objectid != sdev->dev->devid)
1076 break; 1077 break;
1077 1078
1078 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 1079 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1079 break; 1080 break;
1080 1081
1081 if (found_key.offset >= end) 1082 if (found_key.offset >= end)
@@ -1104,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1104 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 1105 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1105 if (!cache) { 1106 if (!cache) {
1106 ret = -ENOENT; 1107 ret = -ENOENT;
1107 goto out; 1108 break;
1108 } 1109 }
1109 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1110 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1110 chunk_offset, length); 1111 chunk_offset, length);
@@ -1116,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1116 btrfs_release_path(path); 1117 btrfs_release_path(path);
1117 } 1118 }
1118 1119
1119out:
1120 btrfs_free_path(path); 1120 btrfs_free_path(path);
1121 return ret; 1121
1122 /*
1123 * ret can still be 1 from search_slot or next_leaf,
1124 * that's not an error
1125 */
1126 return ret < 0 ? ret : 0;
1122} 1127}
1123 1128
1124static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) 1129static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1155,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1155 struct btrfs_fs_info *fs_info = root->fs_info; 1160 struct btrfs_fs_info *fs_info = root->fs_info;
1156 1161
1157 mutex_lock(&fs_info->scrub_lock); 1162 mutex_lock(&fs_info->scrub_lock);
1158 if (fs_info->scrub_workers_refcnt == 0) 1163 if (fs_info->scrub_workers_refcnt == 0) {
1164 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165 fs_info->thread_pool_size, &fs_info->generic_worker);
1166 fs_info->scrub_workers.idle_thresh = 4;
1159 btrfs_start_workers(&fs_info->scrub_workers, 1); 1167 btrfs_start_workers(&fs_info->scrub_workers, 1);
1168 }
1160 ++fs_info->scrub_workers_refcnt; 1169 ++fs_info->scrub_workers_refcnt;
1161 mutex_unlock(&fs_info->scrub_lock); 1170 mutex_unlock(&fs_info->scrub_lock);
1162 1171
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
28#include "disk-io.h" 28#include "disk-io.h"
29#include "transaction.h" 29#include "transaction.h"
30 30
31static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
32{
33 return snprintf(buf, PAGE_SIZE, "%llu\n",
34 (unsigned long long)btrfs_root_used(&root->root_item));
35}
36
37static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
38{
39 return snprintf(buf, PAGE_SIZE, "%llu\n",
40 (unsigned long long)btrfs_root_limit(&root->root_item));
41}
42
43static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
44{
45
46 return snprintf(buf, PAGE_SIZE, "%llu\n",
47 (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
48}
49
50static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
51{
52 return snprintf(buf, PAGE_SIZE, "%llu\n",
53 (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
54}
55
56static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
57{
58 return snprintf(buf, PAGE_SIZE, "%llu\n",
59 (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
60}
61
62/* this is for root attrs (subvols/snapshots) */
63struct btrfs_root_attr {
64 struct attribute attr;
65 ssize_t (*show)(struct btrfs_root *, char *);
66 ssize_t (*store)(struct btrfs_root *, const char *, size_t);
67};
68
69#define ROOT_ATTR(name, mode, show, store) \
70static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
71 show, store)
72
73ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
74ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
75
76static struct attribute *btrfs_root_attrs[] = {
77 &btrfs_root_attr_blocks_used.attr,
78 &btrfs_root_attr_block_limit.attr,
79 NULL,
80};
81
82/* this is for super attrs (actual full fs) */
83struct btrfs_super_attr {
84 struct attribute attr;
85 ssize_t (*show)(struct btrfs_fs_info *, char *);
86 ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
87};
88
89#define SUPER_ATTR(name, mode, show, store) \
90static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
91 show, store)
92
93SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
94SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
95SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
96
97static struct attribute *btrfs_super_attrs[] = {
98 &btrfs_super_attr_blocks_used.attr,
99 &btrfs_super_attr_total_blocks.attr,
100 &btrfs_super_attr_blocksize.attr,
101 NULL,
102};
103
104static ssize_t btrfs_super_attr_show(struct kobject *kobj,
105 struct attribute *attr, char *buf)
106{
107 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
108 super_kobj);
109 struct btrfs_super_attr *a = container_of(attr,
110 struct btrfs_super_attr,
111 attr);
112
113 return a->show ? a->show(fs, buf) : 0;
114}
115
116static ssize_t btrfs_super_attr_store(struct kobject *kobj,
117 struct attribute *attr,
118 const char *buf, size_t len)
119{
120 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
121 super_kobj);
122 struct btrfs_super_attr *a = container_of(attr,
123 struct btrfs_super_attr,
124 attr);
125
126 return a->store ? a->store(fs, buf, len) : 0;
127}
128
129static ssize_t btrfs_root_attr_show(struct kobject *kobj,
130 struct attribute *attr, char *buf)
131{
132 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
133 root_kobj);
134 struct btrfs_root_attr *a = container_of(attr,
135 struct btrfs_root_attr,
136 attr);
137
138 return a->show ? a->show(root, buf) : 0;
139}
140
141static ssize_t btrfs_root_attr_store(struct kobject *kobj,
142 struct attribute *attr,
143 const char *buf, size_t len)
144{
145 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
146 root_kobj);
147 struct btrfs_root_attr *a = container_of(attr,
148 struct btrfs_root_attr,
149 attr);
150 return a->store ? a->store(root, buf, len) : 0;
151}
152
153static void btrfs_super_release(struct kobject *kobj)
154{
155 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
156 super_kobj);
157 complete(&fs->kobj_unregister);
158}
159
160static void btrfs_root_release(struct kobject *kobj)
161{
162 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
163 root_kobj);
164 complete(&root->kobj_unregister);
165}
166
167static const struct sysfs_ops btrfs_super_attr_ops = {
168 .show = btrfs_super_attr_show,
169 .store = btrfs_super_attr_store,
170};
171
172static const struct sysfs_ops btrfs_root_attr_ops = {
173 .show = btrfs_root_attr_show,
174 .store = btrfs_root_attr_store,
175};
176
177/* /sys/fs/btrfs/ entry */ 31/* /sys/fs/btrfs/ entry */
178static struct kset *btrfs_kset; 32static struct kset *btrfs_kset;
179 33
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dd719662340e..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
126 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
127 * when the transaction commits 127 * when the transaction commits
128 */ 128 */
129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 129static int record_root_in_trans(struct btrfs_trans_handle *trans,
130 struct btrfs_root *root) 130 struct btrfs_root *root)
131{ 131{
132 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
133 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
134 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
135 135
136 /*
137 * see below for in_trans_setup usage rules
138 * we have the reloc mutex held now, so there
139 * is only one writer in this function
140 */
141 root->in_trans_setup = 1;
142
143 /* make sure readers find in_trans_setup before
144 * they find our root->last_trans update
145 */
146 smp_wmb();
147
136 spin_lock(&root->fs_info->fs_roots_radix_lock); 148 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) { 149 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock); 150 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0; 151 return 0;
140 } 152 }
141 root->last_trans = trans->transid;
142 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 153 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
143 (unsigned long)root->root_key.objectid, 154 (unsigned long)root->root_key.objectid,
144 BTRFS_ROOT_TRANS_TAG); 155 BTRFS_ROOT_TRANS_TAG);
145 spin_unlock(&root->fs_info->fs_roots_radix_lock); 156 spin_unlock(&root->fs_info->fs_roots_radix_lock);
157 root->last_trans = trans->transid;
158
159 /* this is pretty tricky. We don't want to
160 * take the relocation lock in btrfs_record_root_in_trans
161 * unless we're really doing the first setup for this root in
162 * this transaction.
163 *
164 * Normally we'd use root->last_trans as a flag to decide
165 * if we want to take the expensive mutex.
166 *
167 * But, we have to set root->last_trans before we
168 * init the relocation root, otherwise, we trip over warnings
169 * in ctree.c. The solution used here is to flag ourselves
170 * with root->in_trans_setup. When this is 1, we're still
171 * fixing up the reloc trees and everyone must wait.
172 *
173 * When this is zero, they can trust root->last_trans and fly
174 * through btrfs_record_root_in_trans without having to take the
175 * lock. smp_wmb() makes sure that all the writes above are
176 * done before we pop in the zero below
177 */
146 btrfs_init_reloc_root(trans, root); 178 btrfs_init_reloc_root(trans, root);
179 smp_wmb();
180 root->in_trans_setup = 0;
147 } 181 }
148 return 0; 182 return 0;
149} 183}
150 184
185
186int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root)
188{
189 if (!root->ref_cows)
190 return 0;
191
192 /*
193 * see record_root_in_trans for comments about in_trans_setup usage
194 * and barriers
195 */
196 smp_rmb();
197 if (root->last_trans == trans->transid &&
198 !root->in_trans_setup)
199 return 0;
200
201 mutex_lock(&root->fs_info->reloc_mutex);
202 record_root_in_trans(trans, root);
203 mutex_unlock(&root->fs_info->reloc_mutex);
204
205 return 0;
206}
207
151/* wait for commit against the current transaction to become unblocked 208/* wait for commit against the current transaction to become unblocked
152 * when this is done, it is safe to start a new transaction, but the current 209 * when this is done, it is safe to start a new transaction, but the current
153 * transaction might not be fully on disk. 210 * transaction might not be fully on disk.
@@ -349,7 +406,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
349 list) { 406 list) {
350 if (t->in_commit) { 407 if (t->in_commit) {
351 if (t->commit_done) 408 if (t->commit_done)
352 goto out; 409 break;
353 cur_trans = t; 410 cur_trans = t;
354 atomic_inc(&cur_trans->use_count); 411 atomic_inc(&cur_trans->use_count);
355 break; 412 break;
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
882 parent = dget_parent(dentry); 939 parent = dget_parent(dentry);
883 parent_inode = parent->d_inode; 940 parent_inode = parent->d_inode;
884 parent_root = BTRFS_I(parent_inode)->root; 941 parent_root = BTRFS_I(parent_inode)->root;
885 btrfs_record_root_in_trans(trans, parent_root); 942 record_root_in_trans(trans, parent_root);
886 943
887 /* 944 /*
888 * insert the directory item 945 * insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
900 ret = btrfs_update_inode(trans, parent_root, parent_inode); 957 ret = btrfs_update_inode(trans, parent_root, parent_inode);
901 BUG_ON(ret); 958 BUG_ON(ret);
902 959
903 btrfs_record_root_in_trans(trans, root); 960 /*
961 * pull in the delayed directory update
962 * and the delayed inode item
963 * otherwise we corrupt the FS during
964 * snapshot
965 */
966 ret = btrfs_run_delayed_items(trans, root);
967 BUG_ON(ret);
968
969 record_root_in_trans(trans, root);
904 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 970 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
905 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 971 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
906 btrfs_check_and_init_root_item(new_root_item); 972 btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
961 int ret; 1027 int ret;
962 1028
963 list_for_each_entry(pending, head, list) { 1029 list_for_each_entry(pending, head, list) {
964 /*
965 * We must deal with the delayed items before creating
966 * snapshots, or we will create a snapthot with inconsistent
967 * information.
968 */
969 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
970 BUG_ON(ret);
971
972 ret = create_pending_snapshot(trans, fs_info, pending); 1030 ret = create_pending_snapshot(trans, fs_info, pending);
973 BUG_ON(ret); 1031 BUG_ON(ret);
974 } 1032 }
@@ -1118,8 +1176,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1118 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1176 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1119 else 1177 else
1120 wait_current_trans_commit_start(root, cur_trans); 1178 wait_current_trans_commit_start(root, cur_trans);
1121 put_transaction(cur_trans);
1122 1179
1180 if (current->journal_info == trans)
1181 current->journal_info = NULL;
1182
1183 put_transaction(cur_trans);
1123 return 0; 1184 return 0;
1124} 1185}
1125 1186
@@ -1238,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1238 schedule_timeout(1); 1299 schedule_timeout(1);
1239 1300
1240 finish_wait(&cur_trans->writer_wait, &wait); 1301 finish_wait(&cur_trans->writer_wait, &wait);
1241 spin_lock(&root->fs_info->trans_lock);
1242 root->fs_info->trans_no_join = 1;
1243 spin_unlock(&root->fs_info->trans_lock);
1244 } while (atomic_read(&cur_trans->num_writers) > 1 || 1302 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1245 (should_grow && cur_trans->num_joined != joined)); 1303 (should_grow && cur_trans->num_joined != joined));
1246 1304
1247 ret = create_pending_snapshots(trans, root->fs_info); 1305 /*
1248 BUG_ON(ret); 1306 * Ok now we need to make sure to block out any other joins while we
1307 * commit the transaction. We could have started a join before setting
1308 * no_join so make sure to wait for num_writers to == 1 again.
1309 */
1310 spin_lock(&root->fs_info->trans_lock);
1311 root->fs_info->trans_no_join = 1;
1312 spin_unlock(&root->fs_info->trans_lock);
1313 wait_event(cur_trans->writer_wait,
1314 atomic_read(&cur_trans->num_writers) == 1);
1315
1316 /*
1317 * the reloc mutex makes sure that we stop
1318 * the balancing code from coming in and moving
1319 * extents around in the middle of the commit
1320 */
1321 mutex_lock(&root->fs_info->reloc_mutex);
1249 1322
1250 ret = btrfs_run_delayed_items(trans, root); 1323 ret = btrfs_run_delayed_items(trans, root);
1251 BUG_ON(ret); 1324 BUG_ON(ret);
1252 1325
1326 ret = create_pending_snapshots(trans, root->fs_info);
1327 BUG_ON(ret);
1328
1253 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1329 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1254 BUG_ON(ret); 1330 BUG_ON(ret);
1255 1331
1332 /*
1333 * make sure none of the code above managed to slip in a
1334 * delayed item
1335 */
1336 btrfs_assert_delayed_root_empty(root);
1337
1256 WARN_ON(cur_trans != trans->transaction); 1338 WARN_ON(cur_trans != trans->transaction);
1257 1339
1258 btrfs_scrub_pause(root); 1340 btrfs_scrub_pause(root);
@@ -1309,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1309 root->fs_info->running_transaction = NULL; 1391 root->fs_info->running_transaction = NULL;
1310 root->fs_info->trans_no_join = 0; 1392 root->fs_info->trans_no_join = 0;
1311 spin_unlock(&root->fs_info->trans_lock); 1393 spin_unlock(&root->fs_info->trans_lock);
1394 mutex_unlock(&root->fs_info->reloc_mutex);
1312 1395
1313 wake_up(&root->fs_info->transaction_wait); 1396 wake_up(&root->fs_info->transaction_wait);
1314 1397
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
3177 tmp_key.offset = (u64)-1; 3177 tmp_key.offset = (u64)-1;
3178 3178
3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
3180 BUG_ON(!wc.replay_dest); 3180 BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
3181 3181
3182 wc.replay_dest->log_root = log; 3182 wc.replay_dest->log_root = log;
3183 btrfs_record_root_in_trans(trans, wc.replay_dest); 3183 btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da541dfca2e3..1efa56e18f9b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
689 transid = btrfs_super_generation(disk_super); 689 transid = btrfs_super_generation(disk_super);
690 if (disk_super->label[0]) 690 if (disk_super->label[0])
691 printk(KERN_INFO "device label %s ", disk_super->label); 691 printk(KERN_INFO "device label %s ", disk_super->label);
692 else { 692 else
693 /* FIXME, make a readl uuid parser */ 693 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
694 printk(KERN_INFO "device fsid %llx-%llx ",
695 *(unsigned long long *)disk_super->fsid,
696 *(unsigned long long *)(disk_super->fsid + 8));
697 }
698 printk(KERN_CONT "devid %llu transid %llu %s\n", 694 printk(KERN_CONT "devid %llu transid %llu %s\n",
699 (unsigned long long)devid, (unsigned long long)transid, path); 695 (unsigned long long)devid, (unsigned long long)transid, path);
700 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 696 ret = device_list_add(path, disk_super, devid, fs_devices_ret);