aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/backref.c41
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c63
-rw-r--r--fs/btrfs/ctree.h45
-rw-r--r--fs/btrfs/delayed-inode.c46
-rw-r--r--fs/btrfs/dev-replace.c25
-rw-r--r--fs/btrfs/disk-io.c108
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/extent-tree.c309
-rw-r--r--fs/btrfs/extent_io.c97
-rw-r--r--fs/btrfs/extent_io.h65
-rw-r--r--fs/btrfs/file.c90
-rw-r--r--fs/btrfs/free-space-cache.c13
-rw-r--r--fs/btrfs/inode-item.c9
-rw-r--r--fs/btrfs/inode.c221
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/qgroup.c5
-rw-r--r--fs/btrfs/raid56.c103
-rw-r--r--fs/btrfs/raid56.h11
-rw-r--r--fs/btrfs/reada.c19
-rw-r--r--fs/btrfs/relocation.c12
-rw-r--r--fs/btrfs/scrub.c315
-rw-r--r--fs/btrfs/send.c180
-rw-r--r--fs/btrfs/super.c20
-rw-r--r--fs/btrfs/sysfs.c10
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c2
-rw-r--r--fs/btrfs/tests/extent-io-tests.c3
-rw-r--r--fs/btrfs/tests/inode-tests.c201
-rw-r--r--fs/btrfs/tests/qgroup-tests.c23
-rw-r--r--fs/btrfs/transaction.c43
-rw-r--r--fs/btrfs/transaction.h7
-rw-r--r--fs/btrfs/tree-log.c237
-rw-r--r--fs/btrfs/volumes.c249
-rw-r--r--fs/btrfs/volumes.h18
-rw-r--r--fs/btrfs/xattr.c8
38 files changed, 1670 insertions, 952 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index a66768ebc8d1..80e9c18ea64f 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -8,6 +8,7 @@ config BTRFS_FS
8 select LZO_DECOMPRESS 8 select LZO_DECOMPRESS
9 select RAID6_PQ 9 select RAID6_PQ
10 select XOR_BLOCKS 10 select XOR_BLOCKS
11 select SRCU
11 12
12 help 13 help
13 Btrfs is a general purpose copy-on-write filesystem with extents, 14 Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2d3e32ebfd15..f55721ff9385 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
1246 return ret; 1246 return ret;
1247} 1247}
1248 1248
1249/*
1250 * this makes the path point to (inum INODE_ITEM ioff)
1251 */
1252int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1253 struct btrfs_path *path)
1254{
1255 struct btrfs_key key;
1256 return btrfs_find_item(fs_root, path, inum, ioff,
1257 BTRFS_INODE_ITEM_KEY, &key);
1258}
1259
1260static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1261 struct btrfs_path *path,
1262 struct btrfs_key *found_key)
1263{
1264 return btrfs_find_item(fs_root, path, inum, ioff,
1265 BTRFS_INODE_REF_KEY, found_key);
1266}
1267
1268int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1249int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1269 u64 start_off, struct btrfs_path *path, 1250 u64 start_off, struct btrfs_path *path,
1270 struct btrfs_inode_extref **ret_extref, 1251 struct btrfs_inode_extref **ret_extref,
@@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1374 btrfs_tree_read_unlock_blocking(eb); 1355 btrfs_tree_read_unlock_blocking(eb);
1375 free_extent_buffer(eb); 1356 free_extent_buffer(eb);
1376 } 1357 }
1377 ret = inode_ref_info(parent, 0, fs_root, path, &found_key); 1358 ret = btrfs_find_item(fs_root, path, parent, 0,
1359 BTRFS_INODE_REF_KEY, &found_key);
1378 if (ret > 0) 1360 if (ret > 0)
1379 ret = -ENOENT; 1361 ret = -ENOENT;
1380 if (ret) 1362 if (ret)
@@ -1552,7 +1534,6 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
1552{ 1534{
1553 int ret; 1535 int ret;
1554 int type; 1536 int type;
1555 struct btrfs_tree_block_info *info;
1556 struct btrfs_extent_inline_ref *eiref; 1537 struct btrfs_extent_inline_ref *eiref;
1557 1538
1558 if (*ptr == (unsigned long)-1) 1539 if (*ptr == (unsigned long)-1)
@@ -1573,9 +1554,17 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
1573 } 1554 }
1574 1555
1575 /* we can treat both ref types equally here */ 1556 /* we can treat both ref types equally here */
1576 info = (struct btrfs_tree_block_info *)(ei + 1);
1577 *out_root = btrfs_extent_inline_ref_offset(eb, eiref); 1557 *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
1578 *out_level = btrfs_tree_block_level(eb, info); 1558
1559 if (key->type == BTRFS_EXTENT_ITEM_KEY) {
1560 struct btrfs_tree_block_info *info;
1561
1562 info = (struct btrfs_tree_block_info *)(ei + 1);
1563 *out_level = btrfs_tree_block_level(eb, info);
1564 } else {
1565 ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
1566 *out_level = (u8)key->offset;
1567 }
1579 1568
1580 if (ret == 1) 1569 if (ret == 1)
1581 *ptr = (unsigned long)-1; 1570 *ptr = (unsigned long)-1;
@@ -1720,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1720 struct btrfs_key found_key; 1709 struct btrfs_key found_key;
1721 1710
1722 while (!ret) { 1711 while (!ret) {
1723 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, 1712 ret = btrfs_find_item(fs_root, path, inum,
1724 &found_key); 1713 parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
1714 &found_key);
1715
1725 if (ret < 0) 1716 if (ret < 0)
1726 break; 1717 break;
1727 if (ret) { 1718 if (ret) {
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 2a1ac6bfc724..9c41fbac3009 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -32,9 +32,6 @@ struct inode_fs_paths {
32typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, 32typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
33 void *ctx); 33 void *ctx);
34 34
35int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
36 struct btrfs_path *path);
37
38int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, 35int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
39 struct btrfs_path *path, struct btrfs_key *found_key, 36 struct btrfs_path *path, struct btrfs_key *found_key,
40 u64 *flags); 37 u64 *flags);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4aadadcfab20..de5e4f2adfea 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -185,6 +185,9 @@ struct btrfs_inode {
185 185
186 struct btrfs_delayed_node *delayed_node; 186 struct btrfs_delayed_node *delayed_node;
187 187
188 /* File creation time. */
189 struct timespec i_otime;
190
188 struct inode vfs_inode; 191 struct inode vfs_inode;
189}; 192};
190 193
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 14a72ed14ef7..6d67f32e648d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
213 */ 213 */
214static void add_root_to_dirty_list(struct btrfs_root *root) 214static void add_root_to_dirty_list(struct btrfs_root *root)
215{ 215{
216 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
217 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
218 return;
219
216 spin_lock(&root->fs_info->trans_lock); 220 spin_lock(&root->fs_info->trans_lock);
217 if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) && 221 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
218 list_empty(&root->dirty_list)) { 222 /* Want the extent tree to be the last on the list */
219 list_add(&root->dirty_list, 223 if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
220 &root->fs_info->dirty_cowonly_roots); 224 list_move_tail(&root->dirty_list,
225 &root->fs_info->dirty_cowonly_roots);
226 else
227 list_move(&root->dirty_list,
228 &root->fs_info->dirty_cowonly_roots);
221 } 229 }
222 spin_unlock(&root->fs_info->trans_lock); 230 spin_unlock(&root->fs_info->trans_lock);
223} 231}
@@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1363 1371
1364 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1372 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1365 BUG_ON(tm->slot != 0); 1373 BUG_ON(tm->slot != 0);
1366 eb_rewin = alloc_dummy_extent_buffer(eb->start, 1374 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
1367 fs_info->tree_root->nodesize);
1368 if (!eb_rewin) { 1375 if (!eb_rewin) {
1369 btrfs_tree_read_unlock_blocking(eb); 1376 btrfs_tree_read_unlock_blocking(eb);
1370 free_extent_buffer(eb); 1377 free_extent_buffer(eb);
@@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1444 } else if (old_root) { 1451 } else if (old_root) {
1445 btrfs_tree_read_unlock(eb_root); 1452 btrfs_tree_read_unlock(eb_root);
1446 free_extent_buffer(eb_root); 1453 free_extent_buffer(eb_root);
1447 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1454 eb = alloc_dummy_extent_buffer(root->fs_info, logical);
1448 } else { 1455 } else {
1449 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); 1456 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
1450 eb = btrfs_clone_extent_buffer(eb_root); 1457 eb = btrfs_clone_extent_buffer(eb_root);
@@ -1638,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1638 1645
1639 parent_nritems = btrfs_header_nritems(parent); 1646 parent_nritems = btrfs_header_nritems(parent);
1640 blocksize = root->nodesize; 1647 blocksize = root->nodesize;
1641 end_slot = parent_nritems; 1648 end_slot = parent_nritems - 1;
1642 1649
1643 if (parent_nritems == 1) 1650 if (parent_nritems <= 1)
1644 return 0; 1651 return 0;
1645 1652
1646 btrfs_set_lock_blocking(parent); 1653 btrfs_set_lock_blocking(parent);
1647 1654
1648 for (i = start_slot; i < end_slot; i++) { 1655 for (i = start_slot; i <= end_slot; i++) {
1649 int close = 1; 1656 int close = 1;
1650 1657
1651 btrfs_node_key(parent, &disk_key, i); 1658 btrfs_node_key(parent, &disk_key, i);
@@ -1662,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1662 other = btrfs_node_blockptr(parent, i - 1); 1669 other = btrfs_node_blockptr(parent, i - 1);
1663 close = close_blocks(blocknr, other, blocksize); 1670 close = close_blocks(blocknr, other, blocksize);
1664 } 1671 }
1665 if (!close && i < end_slot - 2) { 1672 if (!close && i < end_slot) {
1666 other = btrfs_node_blockptr(parent, i + 1); 1673 other = btrfs_node_blockptr(parent, i + 1);
1667 close = close_blocks(blocknr, other, blocksize); 1674 close = close_blocks(blocknr, other, blocksize);
1668 } 1675 }
@@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root,
2282 if ((search <= target && target - search <= 65536) || 2289 if ((search <= target && target - search <= 65536) ||
2283 (search > target && search - target <= 65536)) { 2290 (search > target && search - target <= 65536)) {
2284 gen = btrfs_node_ptr_generation(node, nr); 2291 gen = btrfs_node_ptr_generation(node, nr);
2285 readahead_tree_block(root, search, blocksize); 2292 readahead_tree_block(root, search);
2286 nread += blocksize; 2293 nread += blocksize;
2287 } 2294 }
2288 nscan++; 2295 nscan++;
@@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2301 u64 gen; 2308 u64 gen;
2302 u64 block1 = 0; 2309 u64 block1 = 0;
2303 u64 block2 = 0; 2310 u64 block2 = 0;
2304 int blocksize;
2305 2311
2306 parent = path->nodes[level + 1]; 2312 parent = path->nodes[level + 1];
2307 if (!parent) 2313 if (!parent)
@@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2309 2315
2310 nritems = btrfs_header_nritems(parent); 2316 nritems = btrfs_header_nritems(parent);
2311 slot = path->slots[level + 1]; 2317 slot = path->slots[level + 1];
2312 blocksize = root->nodesize;
2313 2318
2314 if (slot > 0) { 2319 if (slot > 0) {
2315 block1 = btrfs_node_blockptr(parent, slot - 1); 2320 block1 = btrfs_node_blockptr(parent, slot - 1);
@@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2334 } 2339 }
2335 2340
2336 if (block1) 2341 if (block1)
2337 readahead_tree_block(root, block1, blocksize); 2342 readahead_tree_block(root, block1);
2338 if (block2) 2343 if (block2)
2339 readahead_tree_block(root, block2, blocksize); 2344 readahead_tree_block(root, block2);
2340} 2345}
2341 2346
2342 2347
@@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2609 return 0; 2614 return 0;
2610} 2615}
2611 2616
2612int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, 2617int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
2613 u64 iobjectid, u64 ioff, u8 key_type, 2618 u64 iobjectid, u64 ioff, u8 key_type,
2614 struct btrfs_key *found_key) 2619 struct btrfs_key *found_key)
2615{ 2620{
2616 int ret; 2621 int ret;
2617 struct btrfs_key key; 2622 struct btrfs_key key;
2618 struct extent_buffer *eb; 2623 struct extent_buffer *eb;
2619 struct btrfs_path *path; 2624
2625 ASSERT(path);
2626 ASSERT(found_key);
2620 2627
2621 key.type = key_type; 2628 key.type = key_type;
2622 key.objectid = iobjectid; 2629 key.objectid = iobjectid;
2623 key.offset = ioff; 2630 key.offset = ioff;
2624 2631
2625 if (found_path == NULL) {
2626 path = btrfs_alloc_path();
2627 if (!path)
2628 return -ENOMEM;
2629 } else
2630 path = found_path;
2631
2632 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); 2632 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
2633 if ((ret < 0) || (found_key == NULL)) { 2633 if (ret < 0)
2634 if (path != found_path)
2635 btrfs_free_path(path);
2636 return ret; 2634 return ret;
2637 }
2638 2635
2639 eb = path->nodes[0]; 2636 eb = path->nodes[0];
2640 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { 2637 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
@@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3383 add_root_to_dirty_list(root); 3380 add_root_to_dirty_list(root);
3384 extent_buffer_get(c); 3381 extent_buffer_get(c);
3385 path->nodes[level] = c; 3382 path->nodes[level] = c;
3386 path->locks[level] = BTRFS_WRITE_LOCK; 3383 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
3387 path->slots[level] = 0; 3384 path->slots[level] = 0;
3388 return 0; 3385 return 0;
3389} 3386}
@@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4356 path->search_for_split = 1; 4353 path->search_for_split = 1;
4357 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 4354 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4358 path->search_for_split = 0; 4355 path->search_for_split = 0;
4356 if (ret > 0)
4357 ret = -EAGAIN;
4359 if (ret < 0) 4358 if (ret < 0)
4360 goto err; 4359 goto err;
4361 4360
4362 ret = -EAGAIN; 4361 ret = -EAGAIN;
4363 leaf = path->nodes[0]; 4362 leaf = path->nodes[0];
4364 /* if our item isn't there or got smaller, return now */ 4363 /* if our item isn't there, return now */
4365 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 4364 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
4366 goto err; 4365 goto err;
4367 4366
4368 /* the leaf has changed, it now has room. return now */ 4367 /* the leaf has changed, it now has room. return now */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7e607416755a..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
198 198
199#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) 199#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
200 200
201#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
202
201/* 203/*
202 * The key defines the order in the tree, and so it also defines (optimal) 204 * The key defines the order in the tree, and so it also defines (optimal)
203 * block layout. 205 * block layout.
@@ -1020,6 +1022,9 @@ enum btrfs_raid_types {
1020 BTRFS_BLOCK_GROUP_RAID6 | \ 1022 BTRFS_BLOCK_GROUP_RAID6 | \
1021 BTRFS_BLOCK_GROUP_DUP | \ 1023 BTRFS_BLOCK_GROUP_DUP | \
1022 BTRFS_BLOCK_GROUP_RAID10) 1024 BTRFS_BLOCK_GROUP_RAID10)
1025#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
1026 BTRFS_BLOCK_GROUP_RAID6)
1027
1023/* 1028/*
1024 * We need a bit for restriper to be able to tell when chunks of type 1029 * We need a bit for restriper to be able to tell when chunks of type
1025 * SINGLE are available. This "extended" profile format is used in 1030 * SINGLE are available. This "extended" profile format is used in
@@ -1171,6 +1176,7 @@ struct btrfs_space_info {
1171 struct percpu_counter total_bytes_pinned; 1176 struct percpu_counter total_bytes_pinned;
1172 1177
1173 struct list_head list; 1178 struct list_head list;
1179 /* Protected by the spinlock 'lock'. */
1174 struct list_head ro_bgs; 1180 struct list_head ro_bgs;
1175 1181
1176 struct rw_semaphore groups_sem; 1182 struct rw_semaphore groups_sem;
@@ -1238,7 +1244,6 @@ enum btrfs_disk_cache_state {
1238 BTRFS_DC_ERROR = 1, 1244 BTRFS_DC_ERROR = 1,
1239 BTRFS_DC_CLEAR = 2, 1245 BTRFS_DC_CLEAR = 2,
1240 BTRFS_DC_SETUP = 3, 1246 BTRFS_DC_SETUP = 3,
1241 BTRFS_DC_NEED_WRITE = 4,
1242}; 1247};
1243 1248
1244struct btrfs_caching_control { 1249struct btrfs_caching_control {
@@ -1276,7 +1281,6 @@ struct btrfs_block_group_cache {
1276 unsigned long full_stripe_len; 1281 unsigned long full_stripe_len;
1277 1282
1278 unsigned int ro:1; 1283 unsigned int ro:1;
1279 unsigned int dirty:1;
1280 unsigned int iref:1; 1284 unsigned int iref:1;
1281 unsigned int has_caching_ctl:1; 1285 unsigned int has_caching_ctl:1;
1282 unsigned int removed:1; 1286 unsigned int removed:1;
@@ -1314,6 +1318,9 @@ struct btrfs_block_group_cache {
1314 struct list_head ro_list; 1318 struct list_head ro_list;
1315 1319
1316 atomic_t trimming; 1320 atomic_t trimming;
1321
1322 /* For dirty block groups */
1323 struct list_head dirty_list;
1317}; 1324};
1318 1325
1319/* delayed seq elem */ 1326/* delayed seq elem */
@@ -1740,6 +1747,7 @@ struct btrfs_fs_info {
1740 1747
1741 spinlock_t unused_bgs_lock; 1748 spinlock_t unused_bgs_lock;
1742 struct list_head unused_bgs; 1749 struct list_head unused_bgs;
1750 struct mutex unused_bg_unpin_mutex;
1743 1751
1744 /* For btrfs to record security options */ 1752 /* For btrfs to record security options */
1745 struct security_mnt_opts security_opts; 1753 struct security_mnt_opts security_opts;
@@ -1775,6 +1783,7 @@ struct btrfs_subvolume_writers {
1775#define BTRFS_ROOT_DEFRAG_RUNNING 6 1783#define BTRFS_ROOT_DEFRAG_RUNNING 6
1776#define BTRFS_ROOT_FORCE_COW 7 1784#define BTRFS_ROOT_FORCE_COW 7
1777#define BTRFS_ROOT_MULTI_LOG_TASKS 8 1785#define BTRFS_ROOT_MULTI_LOG_TASKS 8
1786#define BTRFS_ROOT_DIRTY 9
1778 1787
1779/* 1788/*
1780 * in ram representation of the tree. extent_root is used for all allocations 1789 * in ram representation of the tree. extent_root is used for all allocations
@@ -1793,8 +1802,6 @@ struct btrfs_root {
1793 struct btrfs_fs_info *fs_info; 1802 struct btrfs_fs_info *fs_info;
1794 struct extent_io_tree dirty_log_pages; 1803 struct extent_io_tree dirty_log_pages;
1795 1804
1796 struct kobject root_kobj;
1797 struct completion kobj_unregister;
1798 struct mutex objectid_mutex; 1805 struct mutex objectid_mutex;
1799 1806
1800 spinlock_t accounting_lock; 1807 spinlock_t accounting_lock;
@@ -2464,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
2464BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); 2471BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
2465BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); 2472BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
2466BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); 2473BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
2467
2468static inline struct btrfs_timespec *
2469btrfs_inode_atime(struct btrfs_inode_item *inode_item)
2470{
2471 unsigned long ptr = (unsigned long)inode_item;
2472 ptr += offsetof(struct btrfs_inode_item, atime);
2473 return (struct btrfs_timespec *)ptr;
2474}
2475
2476static inline struct btrfs_timespec *
2477btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
2478{
2479 unsigned long ptr = (unsigned long)inode_item;
2480 ptr += offsetof(struct btrfs_inode_item, mtime);
2481 return (struct btrfs_timespec *)ptr;
2482}
2483
2484static inline struct btrfs_timespec *
2485btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
2486{
2487 unsigned long ptr = (unsigned long)inode_item;
2488 ptr += offsetof(struct btrfs_inode_item, ctime);
2489 return (struct btrfs_timespec *)ptr;
2490}
2491
2492BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 2474BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
2493BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 2475BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
2494BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); 2476BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
@@ -3405,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3405 3387
3406int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3407 struct btrfs_root *root); 3389 struct btrfs_root *root);
3390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3391 struct btrfs_root *root);
3408int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 3392int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
3409int btrfs_free_block_groups(struct btrfs_fs_info *info); 3393int btrfs_free_block_groups(struct btrfs_fs_info *info);
3410int btrfs_read_block_groups(struct btrfs_root *root); 3394int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3927,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
3927 loff_t actual_len, u64 *alloc_hint); 3911 loff_t actual_len, u64 *alloc_hint);
3928int btrfs_inode_check_errors(struct inode *inode); 3912int btrfs_inode_check_errors(struct inode *inode);
3929extern const struct dentry_operations btrfs_dentry_operations; 3913extern const struct dentry_operations btrfs_dentry_operations;
3914#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3915void btrfs_test_inode_set_ops(struct inode *inode);
3916#endif
3930 3917
3931/* ioctl.c */ 3918/* ioctl.c */
3932long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3919long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 054577bddaf2..82f0c7c95474 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1755 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1755 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1756 btrfs_set_stack_inode_block_group(inode_item, 0); 1756 btrfs_set_stack_inode_block_group(inode_item, 0);
1757 1757
1758 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), 1758 btrfs_set_stack_timespec_sec(&inode_item->atime,
1759 inode->i_atime.tv_sec); 1759 inode->i_atime.tv_sec);
1760 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item), 1760 btrfs_set_stack_timespec_nsec(&inode_item->atime,
1761 inode->i_atime.tv_nsec); 1761 inode->i_atime.tv_nsec);
1762 1762
1763 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item), 1763 btrfs_set_stack_timespec_sec(&inode_item->mtime,
1764 inode->i_mtime.tv_sec); 1764 inode->i_mtime.tv_sec);
1765 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item), 1765 btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1766 inode->i_mtime.tv_nsec); 1766 inode->i_mtime.tv_nsec);
1767 1767
1768 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item), 1768 btrfs_set_stack_timespec_sec(&inode_item->ctime,
1769 inode->i_ctime.tv_sec); 1769 inode->i_ctime.tv_sec);
1770 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item), 1770 btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1771 inode->i_ctime.tv_nsec); 1771 inode->i_ctime.tv_nsec);
1772
1773 btrfs_set_stack_timespec_sec(&inode_item->otime,
1774 BTRFS_I(inode)->i_otime.tv_sec);
1775 btrfs_set_stack_timespec_nsec(&inode_item->otime,
1776 BTRFS_I(inode)->i_otime.tv_nsec);
1772} 1777}
1773 1778
1774int btrfs_fill_inode(struct inode *inode, u32 *rdev) 1779int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1775{ 1780{
1776 struct btrfs_delayed_node *delayed_node; 1781 struct btrfs_delayed_node *delayed_node;
1777 struct btrfs_inode_item *inode_item; 1782 struct btrfs_inode_item *inode_item;
1778 struct btrfs_timespec *tspec;
1779 1783
1780 delayed_node = btrfs_get_delayed_node(inode); 1784 delayed_node = btrfs_get_delayed_node(inode);
1781 if (!delayed_node) 1785 if (!delayed_node)
@@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1802 *rdev = btrfs_stack_inode_rdev(inode_item); 1806 *rdev = btrfs_stack_inode_rdev(inode_item);
1803 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); 1807 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1804 1808
1805 tspec = btrfs_inode_atime(inode_item); 1809 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1806 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); 1810 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1807 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); 1811
1812 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1813 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1808 1814
1809 tspec = btrfs_inode_mtime(inode_item); 1815 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1810 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); 1816 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1811 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1812 1817
1813 tspec = btrfs_inode_ctime(inode_item); 1818 BTRFS_I(inode)->i_otime.tv_sec =
1814 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); 1819 btrfs_stack_timespec_sec(&inode_item->otime);
1815 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); 1820 BTRFS_I(inode)->i_otime.tv_nsec =
1821 btrfs_stack_timespec_nsec(&inode_item->otime);
1816 1822
1817 inode->i_generation = BTRFS_I(inode)->generation; 1823 inode->i_generation = BTRFS_I(inode)->generation;
1818 BTRFS_I(inode)->index_cnt = (u64)-1; 1824 BTRFS_I(inode)->index_cnt = (u64)-1;
@@ -1857,6 +1863,14 @@ int btrfs_delayed_delete_inode_ref(struct inode *inode)
1857{ 1863{
1858 struct btrfs_delayed_node *delayed_node; 1864 struct btrfs_delayed_node *delayed_node;
1859 1865
1866 /*
1867 * we don't do delayed inode updates during log recovery because it
1868 * leads to enospc problems. This means we also can't do
1869 * delayed inode refs
1870 */
1871 if (BTRFS_I(inode)->root->fs_info->log_root_recovering)
1872 return -EAGAIN;
1873
1860 delayed_node = btrfs_get_or_create_delayed_node(inode); 1874 delayed_node = btrfs_get_or_create_delayed_node(inode);
1861 if (IS_ERR(delayed_node)) 1875 if (IS_ERR(delayed_node))
1862 return PTR_ERR(delayed_node); 1876 return PTR_ERR(delayed_node);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ca6a3a3b6b6c..5ec03d999c37 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -440,18 +440,9 @@ leave:
440 */ 440 */
441static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) 441static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
442{ 442{
443 s64 writers;
444 DEFINE_WAIT(wait);
445
446 set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); 443 set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
447 do { 444 wait_event(fs_info->replace_wait, !percpu_counter_sum(
448 prepare_to_wait(&fs_info->replace_wait, &wait, 445 &fs_info->bio_counter));
449 TASK_UNINTERRUPTIBLE);
450 writers = percpu_counter_sum(&fs_info->bio_counter);
451 if (writers)
452 schedule();
453 finish_wait(&fs_info->replace_wait, &wait);
454 } while (writers);
455} 446}
456 447
457/* 448/*
@@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
932 923
933void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) 924void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
934{ 925{
935 DEFINE_WAIT(wait); 926 while (1) {
936again: 927 percpu_counter_inc(&fs_info->bio_counter);
937 percpu_counter_inc(&fs_info->bio_counter); 928 if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
938 if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { 929 &fs_info->fs_state)))
930 break;
931
939 btrfs_bio_counter_dec(fs_info); 932 btrfs_bio_counter_dec(fs_info);
940 wait_event(fs_info->replace_wait, 933 wait_event(fs_info->replace_wait,
941 !test_bit(BTRFS_FS_STATE_DEV_REPLACING, 934 !test_bit(BTRFS_FS_STATE_DEV_REPLACING,
942 &fs_info->fs_state)); 935 &fs_info->fs_state));
943 goto again;
944 } 936 }
945
946} 937}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8c63419a7f70..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
318 memcpy(&found, result, csum_size); 318 memcpy(&found, result, csum_size);
319 319
320 read_extent_buffer(buf, &val, 0, csum_size); 320 read_extent_buffer(buf, &val, 0, csum_size);
321 printk_ratelimited(KERN_INFO 321 printk_ratelimited(KERN_WARNING
322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X " 322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
323 "level %d\n", 323 "level %d\n",
324 root->fs_info->sb->s_id, buf->start, 324 root->fs_info->sb->s_id, buf->start,
@@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
367 ret = 0; 367 ret = 0;
368 goto out; 368 goto out;
369 } 369 }
370 printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", 370 printk_ratelimited(KERN_ERR
371 "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
371 eb->fs_info->sb->s_id, eb->start, 372 eb->fs_info->sb->s_id, eb->start,
372 parent_transid, btrfs_header_generation(eb)); 373 parent_transid, btrfs_header_generation(eb));
373 ret = 1; 374 ret = 1;
@@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
633 634
634 found_start = btrfs_header_bytenr(eb); 635 found_start = btrfs_header_bytenr(eb);
635 if (found_start != eb->start) { 636 if (found_start != eb->start) {
636 printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " 637 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
637 "%llu %llu\n", 638 "%llu %llu\n",
638 eb->fs_info->sb->s_id, found_start, eb->start); 639 eb->fs_info->sb->s_id, found_start, eb->start);
639 ret = -EIO; 640 ret = -EIO;
640 goto err; 641 goto err;
641 } 642 }
642 if (check_tree_block_fsid(root, eb)) { 643 if (check_tree_block_fsid(root, eb)) {
643 printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", 644 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
644 eb->fs_info->sb->s_id, eb->start); 645 eb->fs_info->sb->s_id, eb->start);
645 ret = -EIO; 646 ret = -EIO;
646 goto err; 647 goto err;
647 } 648 }
648 found_level = btrfs_header_level(eb); 649 found_level = btrfs_header_level(eb);
649 if (found_level >= BTRFS_MAX_LEVEL) { 650 if (found_level >= BTRFS_MAX_LEVEL) {
650 btrfs_info(root->fs_info, "bad tree block level %d", 651 btrfs_err(root->fs_info, "bad tree block level %d",
651 (int)btrfs_header_level(eb)); 652 (int)btrfs_header_level(eb));
652 ret = -EIO; 653 ret = -EIO;
653 goto err; 654 goto err;
@@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
1073 .set_page_dirty = btree_set_page_dirty, 1074 .set_page_dirty = btree_set_page_dirty,
1074}; 1075};
1075 1076
1076void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) 1077void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
1077{ 1078{
1078 struct extent_buffer *buf = NULL; 1079 struct extent_buffer *buf = NULL;
1079 struct inode *btree_inode = root->fs_info->btree_inode; 1080 struct inode *btree_inode = root->fs_info->btree_inode;
1080 1081
1081 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 1082 buf = btrfs_find_create_tree_block(root, bytenr);
1082 if (!buf) 1083 if (!buf)
1083 return; 1084 return;
1084 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 1085 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
1086 free_extent_buffer(buf); 1087 free_extent_buffer(buf);
1087} 1088}
1088 1089
1089int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, 1090int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
1090 int mirror_num, struct extent_buffer **eb) 1091 int mirror_num, struct extent_buffer **eb)
1091{ 1092{
1092 struct extent_buffer *buf = NULL; 1093 struct extent_buffer *buf = NULL;
@@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1094 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; 1095 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
1095 int ret; 1096 int ret;
1096 1097
1097 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 1098 buf = btrfs_find_create_tree_block(root, bytenr);
1098 if (!buf) 1099 if (!buf)
1099 return 0; 1100 return 0;
1100 1101
@@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
1125} 1126}
1126 1127
1127struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1128struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1128 u64 bytenr, u32 blocksize) 1129 u64 bytenr)
1129{ 1130{
1130 if (btrfs_test_is_dummy_root(root)) 1131 if (btrfs_test_is_dummy_root(root))
1131 return alloc_test_extent_buffer(root->fs_info, bytenr, 1132 return alloc_test_extent_buffer(root->fs_info, bytenr);
1132 blocksize); 1133 return alloc_extent_buffer(root->fs_info, bytenr);
1133 return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
1134} 1134}
1135 1135
1136 1136
@@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1152 struct extent_buffer *buf = NULL; 1152 struct extent_buffer *buf = NULL;
1153 int ret; 1153 int ret;
1154 1154
1155 buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); 1155 buf = btrfs_find_create_tree_block(root, bytenr);
1156 if (!buf) 1156 if (!buf)
1157 return NULL; 1157 return NULL;
1158 1158
@@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
1275 memset(&root->root_key, 0, sizeof(root->root_key)); 1275 memset(&root->root_key, 0, sizeof(root->root_key));
1276 memset(&root->root_item, 0, sizeof(root->root_item)); 1276 memset(&root->root_item, 0, sizeof(root->root_item));
1277 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 1277 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
1278 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
1279 if (fs_info) 1278 if (fs_info)
1280 root->defrag_trans_start = fs_info->generation; 1279 root->defrag_trans_start = fs_info->generation;
1281 else 1280 else
1282 root->defrag_trans_start = 0; 1281 root->defrag_trans_start = 0;
1283 init_completion(&root->kobj_unregister);
1284 root->root_key.objectid = objectid; 1282 root->root_key.objectid = objectid;
1285 root->anon_dev = 0; 1283 root->anon_dev = 0;
1286 1284
@@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
1630 bool check_ref) 1628 bool check_ref)
1631{ 1629{
1632 struct btrfs_root *root; 1630 struct btrfs_root *root;
1631 struct btrfs_path *path;
1632 struct btrfs_key key;
1633 int ret; 1633 int ret;
1634 1634
1635 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) 1635 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@@ -1669,8 +1669,17 @@ again:
1669 if (ret) 1669 if (ret)
1670 goto fail; 1670 goto fail;
1671 1671
1672 ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID, 1672 path = btrfs_alloc_path();
1673 location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL); 1673 if (!path) {
1674 ret = -ENOMEM;
1675 goto fail;
1676 }
1677 key.objectid = BTRFS_ORPHAN_OBJECTID;
1678 key.type = BTRFS_ORPHAN_ITEM_KEY;
1679 key.offset = location->objectid;
1680
1681 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
1682 btrfs_free_path(path);
1674 if (ret < 0) 1683 if (ret < 0)
1675 goto fail; 1684 goto fail;
1676 if (ret == 0) 1685 if (ret == 0)
@@ -1715,12 +1724,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1715{ 1724{
1716 int err; 1725 int err;
1717 1726
1718 bdi->capabilities = BDI_CAP_MAP_COPY; 1727 err = bdi_setup_and_register(bdi, "btrfs");
1719 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1720 if (err) 1728 if (err)
1721 return err; 1729 return err;
1722 1730
1723 bdi->ra_pages = default_backing_dev_info.ra_pages; 1731 bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
1724 bdi->congested_fn = btrfs_congested_fn; 1732 bdi->congested_fn = btrfs_congested_fn;
1725 bdi->congested_data = info; 1733 bdi->congested_data = info;
1726 return 0; 1734 return 0;
@@ -2233,6 +2241,7 @@ int open_ctree(struct super_block *sb,
2233 spin_lock_init(&fs_info->qgroup_op_lock); 2241 spin_lock_init(&fs_info->qgroup_op_lock);
2234 spin_lock_init(&fs_info->buffer_lock); 2242 spin_lock_init(&fs_info->buffer_lock);
2235 spin_lock_init(&fs_info->unused_bgs_lock); 2243 spin_lock_init(&fs_info->unused_bgs_lock);
2244 mutex_init(&fs_info->unused_bg_unpin_mutex);
2236 rwlock_init(&fs_info->tree_mod_log_lock); 2245 rwlock_init(&fs_info->tree_mod_log_lock);
2237 mutex_init(&fs_info->reloc_mutex); 2246 mutex_init(&fs_info->reloc_mutex);
2238 mutex_init(&fs_info->delalloc_root_mutex); 2247 mutex_init(&fs_info->delalloc_root_mutex);
@@ -2319,7 +2328,6 @@ int open_ctree(struct super_block *sb,
2319 */ 2328 */
2320 fs_info->btree_inode->i_size = OFFSET_MAX; 2329 fs_info->btree_inode->i_size = OFFSET_MAX;
2321 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 2330 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2322 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
2323 2331
2324 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 2332 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2325 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 2333 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
@@ -2498,7 +2506,7 @@ int open_ctree(struct super_block *sb,
2498 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2506 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2499 2507
2500 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2508 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2501 printk(KERN_ERR "BTRFS: has skinny extents\n"); 2509 printk(KERN_INFO "BTRFS: has skinny extents\n");
2502 2510
2503 /* 2511 /*
2504 * flag our filesystem as having big metadata blocks if 2512 * flag our filesystem as having big metadata blocks if
@@ -2522,7 +2530,7 @@ int open_ctree(struct super_block *sb,
2522 */ 2530 */
2523 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2531 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2524 (sectorsize != nodesize)) { 2532 (sectorsize != nodesize)) {
2525 printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " 2533 printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
2526 "are not allowed for mixed block groups on %s\n", 2534 "are not allowed for mixed block groups on %s\n",
2527 sb->s_id); 2535 sb->s_id);
2528 goto fail_alloc; 2536 goto fail_alloc;
@@ -2630,12 +2638,12 @@ int open_ctree(struct super_block *sb,
2630 sb->s_blocksize_bits = blksize_bits(sectorsize); 2638 sb->s_blocksize_bits = blksize_bits(sectorsize);
2631 2639
2632 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { 2640 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2633 printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id); 2641 printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
2634 goto fail_sb_buffer; 2642 goto fail_sb_buffer;
2635 } 2643 }
2636 2644
2637 if (sectorsize != PAGE_SIZE) { 2645 if (sectorsize != PAGE_SIZE) {
2638 printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) " 2646 printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
2639 "found on %s\n", (unsigned long)sectorsize, sb->s_id); 2647 "found on %s\n", (unsigned long)sectorsize, sb->s_id);
2640 goto fail_sb_buffer; 2648 goto fail_sb_buffer;
2641 } 2649 }
@@ -2644,7 +2652,7 @@ int open_ctree(struct super_block *sb,
2644 ret = btrfs_read_sys_array(tree_root); 2652 ret = btrfs_read_sys_array(tree_root);
2645 mutex_unlock(&fs_info->chunk_mutex); 2653 mutex_unlock(&fs_info->chunk_mutex);
2646 if (ret) { 2654 if (ret) {
2647 printk(KERN_WARNING "BTRFS: failed to read the system " 2655 printk(KERN_ERR "BTRFS: failed to read the system "
2648 "array on %s\n", sb->s_id); 2656 "array on %s\n", sb->s_id);
2649 goto fail_sb_buffer; 2657 goto fail_sb_buffer;
2650 } 2658 }
@@ -2659,7 +2667,7 @@ int open_ctree(struct super_block *sb,
2659 generation); 2667 generation);
2660 if (!chunk_root->node || 2668 if (!chunk_root->node ||
2661 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2669 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2662 printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", 2670 printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
2663 sb->s_id); 2671 sb->s_id);
2664 goto fail_tree_roots; 2672 goto fail_tree_roots;
2665 } 2673 }
@@ -2671,7 +2679,7 @@ int open_ctree(struct super_block *sb,
2671 2679
2672 ret = btrfs_read_chunk_tree(chunk_root); 2680 ret = btrfs_read_chunk_tree(chunk_root);
2673 if (ret) { 2681 if (ret) {
2674 printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n", 2682 printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
2675 sb->s_id); 2683 sb->s_id);
2676 goto fail_tree_roots; 2684 goto fail_tree_roots;
2677 } 2685 }
@@ -2683,7 +2691,7 @@ int open_ctree(struct super_block *sb,
2683 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2691 btrfs_close_extra_devices(fs_info, fs_devices, 0);
2684 2692
2685 if (!fs_devices->latest_bdev) { 2693 if (!fs_devices->latest_bdev) {
2686 printk(KERN_CRIT "BTRFS: failed to read devices on %s\n", 2694 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
2687 sb->s_id); 2695 sb->s_id);
2688 goto fail_tree_roots; 2696 goto fail_tree_roots;
2689 } 2697 }
@@ -2767,7 +2775,7 @@ retry_root_backup:
2767 2775
2768 ret = btrfs_recover_balance(fs_info); 2776 ret = btrfs_recover_balance(fs_info);
2769 if (ret) { 2777 if (ret) {
2770 printk(KERN_WARNING "BTRFS: failed to recover balance\n"); 2778 printk(KERN_ERR "BTRFS: failed to recover balance\n");
2771 goto fail_block_groups; 2779 goto fail_block_groups;
2772 } 2780 }
2773 2781
@@ -3862,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3862 printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", 3870 printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
3863 btrfs_super_log_root(sb)); 3871 btrfs_super_log_root(sb));
3864 3872
3873 /*
3874 * Check the lower bound, the alignment and other constraints are
3875 * checked later.
3876 */
3877 if (btrfs_super_nodesize(sb) < 4096) {
3878 printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
3879 btrfs_super_nodesize(sb));
3880 ret = -EINVAL;
3881 }
3882 if (btrfs_super_sectorsize(sb) < 4096) {
3883 printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
3884 btrfs_super_sectorsize(sb));
3885 ret = -EINVAL;
3886 }
3887
3865 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { 3888 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
3866 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", 3889 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
3867 fs_info->fsid, sb->dev_item.fsid); 3890 fs_info->fsid, sb->dev_item.fsid);
@@ -3875,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3875 if (btrfs_super_num_devices(sb) > (1UL << 31)) 3898 if (btrfs_super_num_devices(sb) > (1UL << 31))
3876 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", 3899 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
3877 btrfs_super_num_devices(sb)); 3900 btrfs_super_num_devices(sb));
3901 if (btrfs_super_num_devices(sb) == 0) {
3902 printk(KERN_ERR "BTRFS: number of devices is 0\n");
3903 ret = -EINVAL;
3904 }
3878 3905
3879 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { 3906 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
3880 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", 3907 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@@ -3883,6 +3910,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3883 } 3910 }
3884 3911
3885 /* 3912 /*
3913 * Obvious sys_chunk_array corruptions, it must hold at least one key
3914 * and one chunk
3915 */
3916 if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
3917 printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
3918 btrfs_super_sys_array_size(sb),
3919 BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
3920 ret = -EINVAL;
3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk));
3928 ret = -EINVAL;
3929 }
3930
3931 /*
3886 * The generation is a global counter, we'll trust it more than the others 3932 * The generation is a global counter, we'll trust it more than the others
3887 * but it's still possible that it's the one that's wrong. 3933 * but it's still possible that it's the one that's wrong.
3888 */ 3934 */
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 414651821fb3..27d44c0fd236 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -46,11 +46,11 @@ struct btrfs_fs_devices;
46 46
47struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, 47struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
48 u64 parent_transid); 48 u64 parent_transid);
49void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); 49void readahead_tree_block(struct btrfs_root *root, u64 bytenr);
50int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, 50int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
51 int mirror_num, struct extent_buffer **eb); 51 int mirror_num, struct extent_buffer **eb);
52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
53 u64 bytenr, u32 blocksize); 53 u64 bytenr);
54void clean_tree_block(struct btrfs_trans_handle *trans, 54void clean_tree_block(struct btrfs_trans_handle *trans,
55 struct btrfs_root *root, struct extent_buffer *buf); 55 struct btrfs_root *root, struct extent_buffer *buf);
56int open_ctree(struct super_block *sb, 56int open_ctree(struct super_block *sb,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a80b97100d90..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -74,8 +74,9 @@ enum {
74 RESERVE_ALLOC_NO_ACCOUNT = 2, 74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75}; 75};
76 76
77static int update_block_group(struct btrfs_root *root, 77static int update_block_group(struct btrfs_trans_handle *trans,
78 u64 bytenr, u64 num_bytes, int alloc); 78 struct btrfs_root *root, u64 bytenr,
79 u64 num_bytes, int alloc);
79static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 80static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 struct btrfs_root *root, 81 struct btrfs_root *root,
81 u64 bytenr, u64 num_bytes, u64 parent, 82 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1925 */ 1926 */
1926 ret = 0; 1927 ret = 0;
1927 } 1928 }
1928 kfree(bbio); 1929 btrfs_put_bbio(bbio);
1929 } 1930 }
1930 1931
1931 if (actual_bytes) 1932 if (actual_bytes)
@@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2768 struct btrfs_delayed_ref_head *head; 2769 struct btrfs_delayed_ref_head *head;
2769 int ret; 2770 int ret;
2770 int run_all = count == (unsigned long)-1; 2771 int run_all = count == (unsigned long)-1;
2771 int run_most = 0;
2772 2772
2773 /* We'll clean this up in btrfs_cleanup_transaction */ 2773 /* We'll clean this up in btrfs_cleanup_transaction */
2774 if (trans->aborted) 2774 if (trans->aborted)
@@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2778 root = root->fs_info->tree_root; 2778 root = root->fs_info->tree_root;
2779 2779
2780 delayed_refs = &trans->transaction->delayed_refs; 2780 delayed_refs = &trans->transaction->delayed_refs;
2781 if (count == 0) { 2781 if (count == 0)
2782 count = atomic_read(&delayed_refs->num_entries) * 2; 2782 count = atomic_read(&delayed_refs->num_entries) * 2;
2783 run_most = 1;
2784 }
2785 2783
2786again: 2784again:
2787#ifdef SCRAMBLE_DELAYED_REFS 2785#ifdef SCRAMBLE_DELAYED_REFS
@@ -3139,9 +3137,11 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
3139 struct extent_buffer *leaf; 3137 struct extent_buffer *leaf;
3140 3138
3141 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 3139 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3142 if (ret < 0) 3140 if (ret) {
3141 if (ret > 0)
3142 ret = -ENOENT;
3143 goto fail; 3143 goto fail;
3144 BUG_ON(ret); /* Corruption */ 3144 }
3145 3145
3146 leaf = path->nodes[0]; 3146 leaf = path->nodes[0];
3147 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 3147 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -3149,11 +3149,9 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
3149 btrfs_mark_buffer_dirty(leaf); 3149 btrfs_mark_buffer_dirty(leaf);
3150 btrfs_release_path(path); 3150 btrfs_release_path(path);
3151fail: 3151fail:
3152 if (ret) { 3152 if (ret)
3153 btrfs_abort_transaction(trans, root, ret); 3153 btrfs_abort_transaction(trans, root, ret);
3154 return ret; 3154 return ret;
3155 }
3156 return 0;
3157 3155
3158} 3156}
3159 3157
@@ -3210,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3210 return 0; 3208 return 0;
3211 } 3209 }
3212 3210
3211 if (trans->aborted)
3212 return 0;
3213again: 3213again:
3214 inode = lookup_free_space_inode(root, block_group, path); 3214 inode = lookup_free_space_inode(root, block_group, path);
3215 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 3215 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3245,6 +3245,20 @@ again:
3245 */ 3245 */
3246 BTRFS_I(inode)->generation = 0; 3246 BTRFS_I(inode)->generation = 0;
3247 ret = btrfs_update_inode(trans, root, inode); 3247 ret = btrfs_update_inode(trans, root, inode);
3248 if (ret) {
3249 /*
3250 * So theoretically we could recover from this, simply set the
3251 * super cache generation to 0 so we know to invalidate the
3252 * cache, but then we'd have to keep track of the block groups
3253 * that fail this way so we know we _have_ to reset this cache
3254 * before the next commit or risk reading stale cache. So to
3255 * limit our exposure to horrible edge cases lets just abort the
3256 * transaction, this only happens in really bad situations
3257 * anyway.
3258 */
3259 btrfs_abort_transaction(trans, root, ret);
3260 goto out_put;
3261 }
3248 WARN_ON(ret); 3262 WARN_ON(ret);
3249 3263
3250 if (i_size_read(inode) > 0) { 3264 if (i_size_read(inode) > 0) {
@@ -3311,124 +3325,72 @@ out:
3311 return ret; 3325 return ret;
3312} 3326}
3313 3327
3314int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3328int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3315 struct btrfs_root *root) 3329 struct btrfs_root *root)
3316{ 3330{
3317 struct btrfs_block_group_cache *cache; 3331 struct btrfs_block_group_cache *cache, *tmp;
3318 int err = 0; 3332 struct btrfs_transaction *cur_trans = trans->transaction;
3319 struct btrfs_path *path; 3333 struct btrfs_path *path;
3320 u64 last = 0; 3334
3335 if (list_empty(&cur_trans->dirty_bgs) ||
3336 !btrfs_test_opt(root, SPACE_CACHE))
3337 return 0;
3321 3338
3322 path = btrfs_alloc_path(); 3339 path = btrfs_alloc_path();
3323 if (!path) 3340 if (!path)
3324 return -ENOMEM; 3341 return -ENOMEM;
3325 3342
3326again: 3343 /* Could add new block groups, use _safe just in case */
3327 while (1) { 3344 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3328 cache = btrfs_lookup_first_block_group(root->fs_info, last); 3345 dirty_list) {
3329 while (cache) { 3346 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3330 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 3347 cache_save_setup(cache, trans, path);
3331 break;
3332 cache = next_block_group(root, cache);
3333 }
3334 if (!cache) {
3335 if (last == 0)
3336 break;
3337 last = 0;
3338 continue;
3339 }
3340 err = cache_save_setup(cache, trans, path);
3341 last = cache->key.objectid + cache->key.offset;
3342 btrfs_put_block_group(cache);
3343 } 3348 }
3344 3349
3345 while (1) { 3350 btrfs_free_path(path);
3346 if (last == 0) { 3351 return 0;
3347 err = btrfs_run_delayed_refs(trans, root, 3352}
3348 (unsigned long)-1);
3349 if (err) /* File system offline */
3350 goto out;
3351 }
3352
3353 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3354 while (cache) {
3355 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
3356 btrfs_put_block_group(cache);
3357 goto again;
3358 }
3359
3360 if (cache->dirty)
3361 break;
3362 cache = next_block_group(root, cache);
3363 }
3364 if (!cache) {
3365 if (last == 0)
3366 break;
3367 last = 0;
3368 continue;
3369 }
3370
3371 if (cache->disk_cache_state == BTRFS_DC_SETUP)
3372 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
3373 cache->dirty = 0;
3374 last = cache->key.objectid + cache->key.offset;
3375
3376 err = write_one_cache_group(trans, root, path, cache);
3377 btrfs_put_block_group(cache);
3378 if (err) /* File system offline */
3379 goto out;
3380 }
3381 3353
3382 while (1) { 3354int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3383 /* 3355 struct btrfs_root *root)
3384 * I don't think this is needed since we're just marking our 3356{
3385 * preallocated extent as written, but just in case it can't 3357 struct btrfs_block_group_cache *cache;
3386 * hurt. 3358 struct btrfs_transaction *cur_trans = trans->transaction;
3387 */ 3359 int ret = 0;
3388 if (last == 0) { 3360 struct btrfs_path *path;
3389 err = btrfs_run_delayed_refs(trans, root,
3390 (unsigned long)-1);
3391 if (err) /* File system offline */
3392 goto out;
3393 }
3394 3361
3395 cache = btrfs_lookup_first_block_group(root->fs_info, last); 3362 if (list_empty(&cur_trans->dirty_bgs))
3396 while (cache) { 3363 return 0;
3397 /*
3398 * Really this shouldn't happen, but it could if we
3399 * couldn't write the entire preallocated extent and
3400 * splitting the extent resulted in a new block.
3401 */
3402 if (cache->dirty) {
3403 btrfs_put_block_group(cache);
3404 goto again;
3405 }
3406 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3407 break;
3408 cache = next_block_group(root, cache);
3409 }
3410 if (!cache) {
3411 if (last == 0)
3412 break;
3413 last = 0;
3414 continue;
3415 }
3416 3364
3417 err = btrfs_write_out_cache(root, trans, cache, path); 3365 path = btrfs_alloc_path();
3366 if (!path)
3367 return -ENOMEM;
3418 3368
3419 /* 3369 /*
3420 * If we didn't have an error then the cache state is still 3370 * We don't need the lock here since we are protected by the transaction
3421 * NEED_WRITE, so we can set it to WRITTEN. 3371 * commit. We want to do the cache_save_setup first and then run the
3422 */ 3372 * delayed refs to make sure we have the best chance at doing this all
3423 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 3373 * in one shot.
3424 cache->disk_cache_state = BTRFS_DC_WRITTEN; 3374 */
3425 last = cache->key.objectid + cache->key.offset; 3375 while (!list_empty(&cur_trans->dirty_bgs)) {
3376 cache = list_first_entry(&cur_trans->dirty_bgs,
3377 struct btrfs_block_group_cache,
3378 dirty_list);
3379 list_del_init(&cache->dirty_list);
3380 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3381 cache_save_setup(cache, trans, path);
3382 if (!ret)
3383 ret = btrfs_run_delayed_refs(trans, root,
3384 (unsigned long) -1);
3385 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
3386 btrfs_write_out_cache(root, trans, cache, path);
3387 if (!ret)
3388 ret = write_one_cache_group(trans, root, path, cache);
3426 btrfs_put_block_group(cache); 3389 btrfs_put_block_group(cache);
3427 } 3390 }
3428out:
3429 3391
3430 btrfs_free_path(path); 3392 btrfs_free_path(path);
3431 return err; 3393 return ret;
3432} 3394}
3433 3395
3434int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 3396int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -5043,19 +5005,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5043/** 5005/**
5044 * drop_outstanding_extent - drop an outstanding extent 5006 * drop_outstanding_extent - drop an outstanding extent
5045 * @inode: the inode we're dropping the extent for 5007 * @inode: the inode we're dropping the extent for
5008 * @num_bytes: the number of bytes we're relaseing.
5046 * 5009 *
5047 * This is called when we are freeing up an outstanding extent, either called 5010 * This is called when we are freeing up an outstanding extent, either called
5048 * after an error or after an extent is written. This will return the number of 5011 * after an error or after an extent is written. This will return the number of
5049 * reserved extents that need to be freed. This must be called with 5012 * reserved extents that need to be freed. This must be called with
5050 * BTRFS_I(inode)->lock held. 5013 * BTRFS_I(inode)->lock held.
5051 */ 5014 */
5052static unsigned drop_outstanding_extent(struct inode *inode) 5015static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
5053{ 5016{
5054 unsigned drop_inode_space = 0; 5017 unsigned drop_inode_space = 0;
5055 unsigned dropped_extents = 0; 5018 unsigned dropped_extents = 0;
5019 unsigned num_extents = 0;
5056 5020
5057 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 5021 num_extents = (unsigned)div64_u64(num_bytes +
5058 BTRFS_I(inode)->outstanding_extents--; 5022 BTRFS_MAX_EXTENT_SIZE - 1,
5023 BTRFS_MAX_EXTENT_SIZE);
5024 ASSERT(num_extents);
5025 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5026 BTRFS_I(inode)->outstanding_extents -= num_extents;
5059 5027
5060 if (BTRFS_I(inode)->outstanding_extents == 0 && 5028 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5061 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, 5029 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@@ -5168,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5168 num_bytes = ALIGN(num_bytes, root->sectorsize); 5136 num_bytes = ALIGN(num_bytes, root->sectorsize);
5169 5137
5170 spin_lock(&BTRFS_I(inode)->lock); 5138 spin_lock(&BTRFS_I(inode)->lock);
5171 BTRFS_I(inode)->outstanding_extents++; 5139 nr_extents = (unsigned)div64_u64(num_bytes +
5140 BTRFS_MAX_EXTENT_SIZE - 1,
5141 BTRFS_MAX_EXTENT_SIZE);
5142 BTRFS_I(inode)->outstanding_extents += nr_extents;
5143 nr_extents = 0;
5172 5144
5173 if (BTRFS_I(inode)->outstanding_extents > 5145 if (BTRFS_I(inode)->outstanding_extents >
5174 BTRFS_I(inode)->reserved_extents) 5146 BTRFS_I(inode)->reserved_extents)
@@ -5226,7 +5198,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5226 5198
5227out_fail: 5199out_fail:
5228 spin_lock(&BTRFS_I(inode)->lock); 5200 spin_lock(&BTRFS_I(inode)->lock);
5229 dropped = drop_outstanding_extent(inode); 5201 dropped = drop_outstanding_extent(inode, num_bytes);
5230 /* 5202 /*
5231 * If the inodes csum_bytes is the same as the original 5203 * If the inodes csum_bytes is the same as the original
5232 * csum_bytes then we know we haven't raced with any free()ers 5204 * csum_bytes then we know we haven't raced with any free()ers
@@ -5305,7 +5277,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5305 5277
5306 num_bytes = ALIGN(num_bytes, root->sectorsize); 5278 num_bytes = ALIGN(num_bytes, root->sectorsize);
5307 spin_lock(&BTRFS_I(inode)->lock); 5279 spin_lock(&BTRFS_I(inode)->lock);
5308 dropped = drop_outstanding_extent(inode); 5280 dropped = drop_outstanding_extent(inode, num_bytes);
5309 5281
5310 if (num_bytes) 5282 if (num_bytes)
5311 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 5283 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
@@ -5313,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5313 if (dropped > 0) 5285 if (dropped > 0)
5314 to_free += btrfs_calc_trans_metadata_size(root, dropped); 5286 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5315 5287
5288 if (btrfs_test_is_dummy_root(root))
5289 return;
5290
5316 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5291 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5317 btrfs_ino(inode), to_free, 0); 5292 btrfs_ino(inode), to_free, 0);
5318 if (root->fs_info->quota_enabled) { 5293 if (root->fs_info->quota_enabled) {
@@ -5375,8 +5350,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5375 btrfs_free_reserved_data_space(inode, num_bytes); 5350 btrfs_free_reserved_data_space(inode, num_bytes);
5376} 5351}
5377 5352
5378static int update_block_group(struct btrfs_root *root, 5353static int update_block_group(struct btrfs_trans_handle *trans,
5379 u64 bytenr, u64 num_bytes, int alloc) 5354 struct btrfs_root *root, u64 bytenr,
5355 u64 num_bytes, int alloc)
5380{ 5356{
5381 struct btrfs_block_group_cache *cache = NULL; 5357 struct btrfs_block_group_cache *cache = NULL;
5382 struct btrfs_fs_info *info = root->fs_info; 5358 struct btrfs_fs_info *info = root->fs_info;
@@ -5414,6 +5390,14 @@ static int update_block_group(struct btrfs_root *root,
5414 if (!alloc && cache->cached == BTRFS_CACHE_NO) 5390 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5415 cache_block_group(cache, 1); 5391 cache_block_group(cache, 1);
5416 5392
5393 spin_lock(&trans->transaction->dirty_bgs_lock);
5394 if (list_empty(&cache->dirty_list)) {
5395 list_add_tail(&cache->dirty_list,
5396 &trans->transaction->dirty_bgs);
5397 btrfs_get_block_group(cache);
5398 }
5399 spin_unlock(&trans->transaction->dirty_bgs_lock);
5400
5417 byte_in_group = bytenr - cache->key.objectid; 5401 byte_in_group = bytenr - cache->key.objectid;
5418 WARN_ON(byte_in_group > cache->key.offset); 5402 WARN_ON(byte_in_group > cache->key.offset);
5419 5403
@@ -5424,7 +5408,6 @@ static int update_block_group(struct btrfs_root *root,
5424 cache->disk_cache_state < BTRFS_DC_CLEAR) 5408 cache->disk_cache_state < BTRFS_DC_CLEAR)
5425 cache->disk_cache_state = BTRFS_DC_CLEAR; 5409 cache->disk_cache_state = BTRFS_DC_CLEAR;
5426 5410
5427 cache->dirty = 1;
5428 old_val = btrfs_block_group_used(&cache->item); 5411 old_val = btrfs_block_group_used(&cache->item);
5429 num_bytes = min(total, cache->key.offset - byte_in_group); 5412 num_bytes = min(total, cache->key.offset - byte_in_group);
5430 if (alloc) { 5413 if (alloc) {
@@ -5807,10 +5790,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5807 unpin = &fs_info->freed_extents[0]; 5790 unpin = &fs_info->freed_extents[0];
5808 5791
5809 while (1) { 5792 while (1) {
5793 mutex_lock(&fs_info->unused_bg_unpin_mutex);
5810 ret = find_first_extent_bit(unpin, 0, &start, &end, 5794 ret = find_first_extent_bit(unpin, 0, &start, &end,
5811 EXTENT_DIRTY, NULL); 5795 EXTENT_DIRTY, NULL);
5812 if (ret) 5796 if (ret) {
5797 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
5813 break; 5798 break;
5799 }
5814 5800
5815 if (btrfs_test_opt(root, DISCARD)) 5801 if (btrfs_test_opt(root, DISCARD))
5816 ret = btrfs_discard_extent(root, start, 5802 ret = btrfs_discard_extent(root, start,
@@ -5818,6 +5804,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5818 5804
5819 clear_extent_dirty(unpin, start, end, GFP_NOFS); 5805 clear_extent_dirty(unpin, start, end, GFP_NOFS);
5820 unpin_extent_range(root, start, end, true); 5806 unpin_extent_range(root, start, end, true);
5807 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
5821 cond_resched(); 5808 cond_resched();
5822 } 5809 }
5823 5810
@@ -6103,7 +6090,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6103 } 6090 }
6104 } 6091 }
6105 6092
6106 ret = update_block_group(root, bytenr, num_bytes, 0); 6093 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
6107 if (ret) { 6094 if (ret) {
6108 btrfs_abort_transaction(trans, extent_root, ret); 6095 btrfs_abort_transaction(trans, extent_root, ret);
6109 goto out; 6096 goto out;
@@ -6205,7 +6192,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6205 struct extent_buffer *buf, 6192 struct extent_buffer *buf,
6206 u64 parent, int last_ref) 6193 u64 parent, int last_ref)
6207{ 6194{
6208 struct btrfs_block_group_cache *cache = NULL;
6209 int pin = 1; 6195 int pin = 1;
6210 int ret; 6196 int ret;
6211 6197
@@ -6221,17 +6207,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6221 if (!last_ref) 6207 if (!last_ref)
6222 return; 6208 return;
6223 6209
6224 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6225
6226 if (btrfs_header_generation(buf) == trans->transid) { 6210 if (btrfs_header_generation(buf) == trans->transid) {
6211 struct btrfs_block_group_cache *cache;
6212
6227 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 6213 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6228 ret = check_ref_cleanup(trans, root, buf->start); 6214 ret = check_ref_cleanup(trans, root, buf->start);
6229 if (!ret) 6215 if (!ret)
6230 goto out; 6216 goto out;
6231 } 6217 }
6232 6218
6219 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6220
6233 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 6221 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6234 pin_down_extent(root, cache, buf->start, buf->len, 1); 6222 pin_down_extent(root, cache, buf->start, buf->len, 1);
6223 btrfs_put_block_group(cache);
6235 goto out; 6224 goto out;
6236 } 6225 }
6237 6226
@@ -6239,6 +6228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6239 6228
6240 btrfs_add_free_space(cache, buf->start, buf->len); 6229 btrfs_add_free_space(cache, buf->start, buf->len);
6241 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); 6230 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6231 btrfs_put_block_group(cache);
6242 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 6232 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6243 pin = 0; 6233 pin = 0;
6244 } 6234 }
@@ -6253,7 +6243,6 @@ out:
6253 * anymore. 6243 * anymore.
6254 */ 6244 */
6255 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 6245 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
6256 btrfs_put_block_group(cache);
6257} 6246}
6258 6247
6259/* Can return -ENOMEM */ 6248/* Can return -ENOMEM */
@@ -7063,7 +7052,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7063 if (ret) 7052 if (ret)
7064 return ret; 7053 return ret;
7065 7054
7066 ret = update_block_group(root, ins->objectid, ins->offset, 1); 7055 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
7067 if (ret) { /* -ENOENT, logic error */ 7056 if (ret) { /* -ENOENT, logic error */
7068 btrfs_err(fs_info, "update block group failed for %llu %llu", 7057 btrfs_err(fs_info, "update block group failed for %llu %llu",
7069 ins->objectid, ins->offset); 7058 ins->objectid, ins->offset);
@@ -7152,7 +7141,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7152 return ret; 7141 return ret;
7153 } 7142 }
7154 7143
7155 ret = update_block_group(root, ins->objectid, root->nodesize, 1); 7144 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7145 1);
7156 if (ret) { /* -ENOENT, logic error */ 7146 if (ret) { /* -ENOENT, logic error */
7157 btrfs_err(fs_info, "update block group failed for %llu %llu", 7147 btrfs_err(fs_info, "update block group failed for %llu %llu",
7158 ins->objectid, ins->offset); 7148 ins->objectid, ins->offset);
@@ -7217,11 +7207,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7217 7207
7218static struct extent_buffer * 7208static struct extent_buffer *
7219btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, 7209btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7220 u64 bytenr, u32 blocksize, int level) 7210 u64 bytenr, int level)
7221{ 7211{
7222 struct extent_buffer *buf; 7212 struct extent_buffer *buf;
7223 7213
7224 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 7214 buf = btrfs_find_create_tree_block(root, bytenr);
7225 if (!buf) 7215 if (!buf)
7226 return ERR_PTR(-ENOMEM); 7216 return ERR_PTR(-ENOMEM);
7227 btrfs_set_header_generation(buf, trans->transid); 7217 btrfs_set_header_generation(buf, trans->transid);
@@ -7340,7 +7330,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7340 7330
7341 if (btrfs_test_is_dummy_root(root)) { 7331 if (btrfs_test_is_dummy_root(root)) {
7342 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, 7332 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
7343 blocksize, level); 7333 level);
7344 if (!IS_ERR(buf)) 7334 if (!IS_ERR(buf))
7345 root->alloc_bytenr += blocksize; 7335 root->alloc_bytenr += blocksize;
7346 return buf; 7336 return buf;
@@ -7357,8 +7347,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7357 return ERR_PTR(ret); 7347 return ERR_PTR(ret);
7358 } 7348 }
7359 7349
7360 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 7350 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
7361 blocksize, level);
7362 BUG_ON(IS_ERR(buf)); /* -ENOMEM */ 7351 BUG_ON(IS_ERR(buf)); /* -ENOMEM */
7363 7352
7364 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 7353 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
@@ -7487,7 +7476,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
7487 continue; 7476 continue;
7488 } 7477 }
7489reada: 7478reada:
7490 readahead_tree_block(root, bytenr, blocksize); 7479 readahead_tree_block(root, bytenr);
7491 nread++; 7480 nread++;
7492 } 7481 }
7493 wc->reada_slot = slot; 7482 wc->reada_slot = slot;
@@ -7828,7 +7817,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7828 7817
7829 next = btrfs_find_tree_block(root, bytenr); 7818 next = btrfs_find_tree_block(root, bytenr);
7830 if (!next) { 7819 if (!next) {
7831 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 7820 next = btrfs_find_create_tree_block(root, bytenr);
7832 if (!next) 7821 if (!next)
7833 return -ENOMEM; 7822 return -ENOMEM;
7834 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, 7823 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
@@ -8548,14 +8537,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8548 if (IS_ERR(trans)) 8537 if (IS_ERR(trans))
8549 return PTR_ERR(trans); 8538 return PTR_ERR(trans);
8550 8539
8551 alloc_flags = update_block_group_flags(root, cache->flags);
8552 if (alloc_flags != cache->flags) {
8553 ret = do_chunk_alloc(trans, root, alloc_flags,
8554 CHUNK_ALLOC_FORCE);
8555 if (ret < 0)
8556 goto out;
8557 }
8558
8559 ret = set_block_group_ro(cache, 0); 8540 ret = set_block_group_ro(cache, 0);
8560 if (!ret) 8541 if (!ret)
8561 goto out; 8542 goto out;
@@ -8566,6 +8547,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8566 goto out; 8547 goto out;
8567 ret = set_block_group_ro(cache, 0); 8548 ret = set_block_group_ro(cache, 0);
8568out: 8549out:
8550 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
8551 alloc_flags = update_block_group_flags(root, cache->flags);
8552 check_system_chunk(trans, root, alloc_flags);
8553 }
8554
8569 btrfs_end_transaction(trans, root); 8555 btrfs_end_transaction(trans, root);
8570 return ret; 8556 return ret;
8571} 8557}
@@ -9005,6 +8991,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9005 INIT_LIST_HEAD(&cache->cluster_list); 8991 INIT_LIST_HEAD(&cache->cluster_list);
9006 INIT_LIST_HEAD(&cache->bg_list); 8992 INIT_LIST_HEAD(&cache->bg_list);
9007 INIT_LIST_HEAD(&cache->ro_list); 8993 INIT_LIST_HEAD(&cache->ro_list);
8994 INIT_LIST_HEAD(&cache->dirty_list);
9008 btrfs_init_free_space_ctl(cache); 8995 btrfs_init_free_space_ctl(cache);
9009 atomic_set(&cache->trimming, 0); 8996 atomic_set(&cache->trimming, 0);
9010 8997
@@ -9068,9 +9055,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
9068 * b) Setting 'dirty flag' makes sure that we flush 9055 * b) Setting 'dirty flag' makes sure that we flush
9069 * the new space cache info onto disk. 9056 * the new space cache info onto disk.
9070 */ 9057 */
9071 cache->disk_cache_state = BTRFS_DC_CLEAR;
9072 if (btrfs_test_opt(root, SPACE_CACHE)) 9058 if (btrfs_test_opt(root, SPACE_CACHE))
9073 cache->dirty = 1; 9059 cache->disk_cache_state = BTRFS_DC_CLEAR;
9074 } 9060 }
9075 9061
9076 read_extent_buffer(leaf, &cache->item, 9062 read_extent_buffer(leaf, &cache->item,
@@ -9422,7 +9408,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9422 * are still on the list after taking the semaphore 9408 * are still on the list after taking the semaphore
9423 */ 9409 */
9424 list_del_init(&block_group->list); 9410 list_del_init(&block_group->list);
9425 list_del_init(&block_group->ro_list);
9426 if (list_empty(&block_group->space_info->block_groups[index])) { 9411 if (list_empty(&block_group->space_info->block_groups[index])) {
9427 kobj = block_group->space_info->block_group_kobjs[index]; 9412 kobj = block_group->space_info->block_group_kobjs[index];
9428 block_group->space_info->block_group_kobjs[index] = NULL; 9413 block_group->space_info->block_group_kobjs[index] = NULL;
@@ -9461,9 +9446,17 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9461 } 9446 }
9462 } 9447 }
9463 9448
9449 spin_lock(&trans->transaction->dirty_bgs_lock);
9450 if (!list_empty(&block_group->dirty_list)) {
9451 list_del_init(&block_group->dirty_list);
9452 btrfs_put_block_group(block_group);
9453 }
9454 spin_unlock(&trans->transaction->dirty_bgs_lock);
9455
9464 btrfs_remove_free_space_cache(block_group); 9456 btrfs_remove_free_space_cache(block_group);
9465 9457
9466 spin_lock(&block_group->space_info->lock); 9458 spin_lock(&block_group->space_info->lock);
9459 list_del_init(&block_group->ro_list);
9467 block_group->space_info->total_bytes -= block_group->key.offset; 9460 block_group->space_info->total_bytes -= block_group->key.offset;
9468 block_group->space_info->bytes_readonly -= block_group->key.offset; 9461 block_group->space_info->bytes_readonly -= block_group->key.offset;
9469 block_group->space_info->disk_total -= block_group->key.offset * factor; 9462 block_group->space_info->disk_total -= block_group->key.offset * factor;
@@ -9611,7 +9604,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9611 * Want to do this before we do anything else so we can recover 9604 * Want to do this before we do anything else so we can recover
9612 * properly if we fail to join the transaction. 9605 * properly if we fail to join the transaction.
9613 */ 9606 */
9614 trans = btrfs_join_transaction(root); 9607 /* 1 for btrfs_orphan_reserve_metadata() */
9608 trans = btrfs_start_transaction(root, 1);
9615 if (IS_ERR(trans)) { 9609 if (IS_ERR(trans)) {
9616 btrfs_set_block_group_rw(root, block_group); 9610 btrfs_set_block_group_rw(root, block_group);
9617 ret = PTR_ERR(trans); 9611 ret = PTR_ERR(trans);
@@ -9624,18 +9618,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9624 */ 9618 */
9625 start = block_group->key.objectid; 9619 start = block_group->key.objectid;
9626 end = start + block_group->key.offset - 1; 9620 end = start + block_group->key.offset - 1;
9621 /*
9622 * Hold the unused_bg_unpin_mutex lock to avoid racing with
9623 * btrfs_finish_extent_commit(). If we are at transaction N,
9624 * another task might be running finish_extent_commit() for the
9625 * previous transaction N - 1, and have seen a range belonging
9626 * to the block group in freed_extents[] before we were able to
9627 * clear the whole block group range from freed_extents[]. This
9628 * means that task can lookup for the block group after we
9629 * unpinned it from freed_extents[] and removed it, leading to
9630 * a BUG_ON() at btrfs_unpin_extent_range().
9631 */
9632 mutex_lock(&fs_info->unused_bg_unpin_mutex);
9627 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, 9633 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
9628 EXTENT_DIRTY, GFP_NOFS); 9634 EXTENT_DIRTY, GFP_NOFS);
9629 if (ret) { 9635 if (ret) {
9636 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9630 btrfs_set_block_group_rw(root, block_group); 9637 btrfs_set_block_group_rw(root, block_group);
9631 goto end_trans; 9638 goto end_trans;
9632 } 9639 }
9633 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, 9640 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
9634 EXTENT_DIRTY, GFP_NOFS); 9641 EXTENT_DIRTY, GFP_NOFS);
9635 if (ret) { 9642 if (ret) {
9643 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9636 btrfs_set_block_group_rw(root, block_group); 9644 btrfs_set_block_group_rw(root, block_group);
9637 goto end_trans; 9645 goto end_trans;
9638 } 9646 }
9647 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9639 9648
9640 /* Reset pinned so btrfs_put_block_group doesn't complain */ 9649 /* Reset pinned so btrfs_put_block_group doesn't complain */
9641 block_group->pinned = 0; 9650 block_group->pinned = 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ebabd237153..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void)
64 64
65 while (!list_empty(&states)) { 65 while (!list_empty(&states)) {
66 state = list_entry(states.next, struct extent_state, leak_list); 66 state = list_entry(states.next, struct extent_state, leak_list);
67 pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", 67 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
68 state->start, state->end, state->state, 68 state->start, state->end, state->state,
69 extent_state_in_tree(state), 69 extent_state_in_tree(state),
70 atomic_read(&state->refs)); 70 atomic_read(&state->refs));
@@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree,
396} 396}
397 397
398static void set_state_cb(struct extent_io_tree *tree, 398static void set_state_cb(struct extent_io_tree *tree,
399 struct extent_state *state, unsigned long *bits) 399 struct extent_state *state, unsigned *bits)
400{ 400{
401 if (tree->ops && tree->ops->set_bit_hook) 401 if (tree->ops && tree->ops->set_bit_hook)
402 tree->ops->set_bit_hook(tree->mapping->host, state, bits); 402 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
403} 403}
404 404
405static void clear_state_cb(struct extent_io_tree *tree, 405static void clear_state_cb(struct extent_io_tree *tree,
406 struct extent_state *state, unsigned long *bits) 406 struct extent_state *state, unsigned *bits)
407{ 407{
408 if (tree->ops && tree->ops->clear_bit_hook) 408 if (tree->ops && tree->ops->clear_bit_hook)
409 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 409 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
410} 410}
411 411
412static void set_state_bits(struct extent_io_tree *tree, 412static void set_state_bits(struct extent_io_tree *tree,
413 struct extent_state *state, unsigned long *bits); 413 struct extent_state *state, unsigned *bits);
414 414
415/* 415/*
416 * insert an extent_state struct into the tree. 'bits' are set on the 416 * insert an extent_state struct into the tree. 'bits' are set on the
@@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree,
426 struct extent_state *state, u64 start, u64 end, 426 struct extent_state *state, u64 start, u64 end,
427 struct rb_node ***p, 427 struct rb_node ***p,
428 struct rb_node **parent, 428 struct rb_node **parent,
429 unsigned long *bits) 429 unsigned *bits)
430{ 430{
431 struct rb_node *node; 431 struct rb_node *node;
432 432
@@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state)
511 */ 511 */
512static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 512static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
513 struct extent_state *state, 513 struct extent_state *state,
514 unsigned long *bits, int wake) 514 unsigned *bits, int wake)
515{ 515{
516 struct extent_state *next; 516 struct extent_state *next;
517 unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS; 517 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
518 518
519 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 519 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
520 u64 range = state->end - state->start + 1; 520 u64 range = state->end - state->start + 1;
@@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
570 * This takes the tree lock, and returns 0 on success and < 0 on error. 570 * This takes the tree lock, and returns 0 on success and < 0 on error.
571 */ 571 */
572int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 572int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
573 unsigned long bits, int wake, int delete, 573 unsigned bits, int wake, int delete,
574 struct extent_state **cached_state, 574 struct extent_state **cached_state,
575 gfp_t mask) 575 gfp_t mask)
576{ 576{
@@ -789,9 +789,9 @@ out:
789 789
790static void set_state_bits(struct extent_io_tree *tree, 790static void set_state_bits(struct extent_io_tree *tree,
791 struct extent_state *state, 791 struct extent_state *state,
792 unsigned long *bits) 792 unsigned *bits)
793{ 793{
794 unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS; 794 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
795 795
796 set_state_cb(tree, state, bits); 796 set_state_cb(tree, state, bits);
797 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 797 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree,
803 803
804static void cache_state_if_flags(struct extent_state *state, 804static void cache_state_if_flags(struct extent_state *state,
805 struct extent_state **cached_ptr, 805 struct extent_state **cached_ptr,
806 const u64 flags) 806 unsigned flags)
807{ 807{
808 if (cached_ptr && !(*cached_ptr)) { 808 if (cached_ptr && !(*cached_ptr)) {
809 if (!flags || (state->state & flags)) { 809 if (!flags || (state->state & flags)) {
@@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state,
833 833
834static int __must_check 834static int __must_check
835__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 835__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
836 unsigned long bits, unsigned long exclusive_bits, 836 unsigned bits, unsigned exclusive_bits,
837 u64 *failed_start, struct extent_state **cached_state, 837 u64 *failed_start, struct extent_state **cached_state,
838 gfp_t mask) 838 gfp_t mask)
839{ 839{
@@ -1034,7 +1034,7 @@ search_again:
1034} 1034}
1035 1035
1036int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1036int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1037 unsigned long bits, u64 * failed_start, 1037 unsigned bits, u64 * failed_start,
1038 struct extent_state **cached_state, gfp_t mask) 1038 struct extent_state **cached_state, gfp_t mask)
1039{ 1039{
1040 return __set_extent_bit(tree, start, end, bits, 0, failed_start, 1040 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
@@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1060 * boundary bits like LOCK. 1060 * boundary bits like LOCK.
1061 */ 1061 */
1062int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1062int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1063 unsigned long bits, unsigned long clear_bits, 1063 unsigned bits, unsigned clear_bits,
1064 struct extent_state **cached_state, gfp_t mask) 1064 struct extent_state **cached_state, gfp_t mask)
1065{ 1065{
1066 struct extent_state *state; 1066 struct extent_state *state;
@@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1268} 1268}
1269 1269
1270int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1270int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1271 unsigned long bits, gfp_t mask) 1271 unsigned bits, gfp_t mask)
1272{ 1272{
1273 return set_extent_bit(tree, start, end, bits, NULL, 1273 return set_extent_bit(tree, start, end, bits, NULL,
1274 NULL, mask); 1274 NULL, mask);
1275} 1275}
1276 1276
1277int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1277int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1278 unsigned long bits, gfp_t mask) 1278 unsigned bits, gfp_t mask)
1279{ 1279{
1280 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); 1280 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
1281} 1281}
@@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1330 * us if waiting is desired. 1330 * us if waiting is desired.
1331 */ 1331 */
1332int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1332int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1333 unsigned long bits, struct extent_state **cached_state) 1333 unsigned bits, struct extent_state **cached_state)
1334{ 1334{
1335 int err; 1335 int err;
1336 u64 failed_start; 1336 u64 failed_start;
1337
1337 while (1) { 1338 while (1) {
1338 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, 1339 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
1339 EXTENT_LOCKED, &failed_start, 1340 EXTENT_LOCKED, &failed_start,
@@ -1407,8 +1408,8 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1407 while (index <= end_index) { 1408 while (index <= end_index) {
1408 page = find_get_page(inode->i_mapping, index); 1409 page = find_get_page(inode->i_mapping, index);
1409 BUG_ON(!page); /* Pages should be in the extent_io_tree */ 1410 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1410 account_page_redirty(page);
1411 __set_page_dirty_nobuffers(page); 1411 __set_page_dirty_nobuffers(page);
1412 account_page_redirty(page);
1412 page_cache_release(page); 1413 page_cache_release(page);
1413 index++; 1414 index++;
1414 } 1415 }
@@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1440 */ 1441 */
1441static struct extent_state * 1442static struct extent_state *
1442find_first_extent_bit_state(struct extent_io_tree *tree, 1443find_first_extent_bit_state(struct extent_io_tree *tree,
1443 u64 start, unsigned long bits) 1444 u64 start, unsigned bits)
1444{ 1445{
1445 struct rb_node *node; 1446 struct rb_node *node;
1446 struct extent_state *state; 1447 struct extent_state *state;
@@ -1474,7 +1475,7 @@ out:
1474 * If nothing was found, 1 is returned. If found something, return 0. 1475 * If nothing was found, 1 is returned. If found something, return 0.
1475 */ 1476 */
1476int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1477int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1477 u64 *start_ret, u64 *end_ret, unsigned long bits, 1478 u64 *start_ret, u64 *end_ret, unsigned bits,
1478 struct extent_state **cached_state) 1479 struct extent_state **cached_state)
1479{ 1480{
1480 struct extent_state *state; 1481 struct extent_state *state;
@@ -1753,7 +1754,7 @@ out_failed:
1753 1754
1754int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, 1755int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1755 struct page *locked_page, 1756 struct page *locked_page,
1756 unsigned long clear_bits, 1757 unsigned clear_bits,
1757 unsigned long page_ops) 1758 unsigned long page_ops)
1758{ 1759{
1759 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 1760 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1810 */ 1811 */
1811u64 count_range_bits(struct extent_io_tree *tree, 1812u64 count_range_bits(struct extent_io_tree *tree,
1812 u64 *start, u64 search_end, u64 max_bytes, 1813 u64 *start, u64 search_end, u64 max_bytes,
1813 unsigned long bits, int contig) 1814 unsigned bits, int contig)
1814{ 1815{
1815 struct rb_node *node; 1816 struct rb_node *node;
1816 struct extent_state *state; 1817 struct extent_state *state;
@@ -1928,7 +1929,7 @@ out:
1928 * range is found set. 1929 * range is found set.
1929 */ 1930 */
1930int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1931int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1931 unsigned long bits, int filled, struct extent_state *cached) 1932 unsigned bits, int filled, struct extent_state *cached)
1932{ 1933{
1933 struct extent_state *state = NULL; 1934 struct extent_state *state = NULL;
1934 struct rb_node *node; 1935 struct rb_node *node;
@@ -2057,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
2057 sector = bbio->stripes[mirror_num-1].physical >> 9; 2058 sector = bbio->stripes[mirror_num-1].physical >> 9;
2058 bio->bi_iter.bi_sector = sector; 2059 bio->bi_iter.bi_sector = sector;
2059 dev = bbio->stripes[mirror_num-1].dev; 2060 dev = bbio->stripes[mirror_num-1].dev;
2060 kfree(bbio); 2061 btrfs_put_bbio(bbio);
2061 if (!dev || !dev->bdev || !dev->writeable) { 2062 if (!dev || !dev->bdev || !dev->writeable) {
2062 bio_put(bio); 2063 bio_put(bio);
2063 return -EIO; 2064 return -EIO;
@@ -2190,7 +2191,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2190 2191
2191 next = next_state(state); 2192 next = next_state(state);
2192 2193
2193 failrec = (struct io_failure_record *)state->private; 2194 failrec = (struct io_failure_record *)(unsigned long)state->private;
2194 free_extent_state(state); 2195 free_extent_state(state);
2195 kfree(failrec); 2196 kfree(failrec);
2196 2197
@@ -2816,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2816 bio_add_page(bio, page, page_size, offset) < page_size) { 2817 bio_add_page(bio, page, page_size, offset) < page_size) {
2817 ret = submit_one_bio(rw, bio, mirror_num, 2818 ret = submit_one_bio(rw, bio, mirror_num,
2818 prev_bio_flags); 2819 prev_bio_flags);
2819 if (ret < 0) 2820 if (ret < 0) {
2821 *bio_ret = NULL;
2820 return ret; 2822 return ret;
2823 }
2821 bio = NULL; 2824 bio = NULL;
2822 } else { 2825 } else {
2823 return 0; 2826 return 0;
@@ -3239,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
3239 page, 3242 page,
3240 &delalloc_start, 3243 &delalloc_start,
3241 &delalloc_end, 3244 &delalloc_end,
3242 128 * 1024 * 1024); 3245 BTRFS_MAX_EXTENT_SIZE);
3243 if (nr_delalloc == 0) { 3246 if (nr_delalloc == 0) {
3244 delalloc_start = delalloc_end + 1; 3247 delalloc_start = delalloc_end + 1;
3245 continue; 3248 continue;
@@ -4598,11 +4601,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4598 4601
4599static struct extent_buffer * 4602static struct extent_buffer *
4600__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, 4603__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4601 unsigned long len, gfp_t mask) 4604 unsigned long len)
4602{ 4605{
4603 struct extent_buffer *eb = NULL; 4606 struct extent_buffer *eb = NULL;
4604 4607
4605 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 4608 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
4606 if (eb == NULL) 4609 if (eb == NULL)
4607 return NULL; 4610 return NULL;
4608 eb->start = start; 4611 eb->start = start;
@@ -4643,7 +4646,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4643 struct extent_buffer *new; 4646 struct extent_buffer *new;
4644 unsigned long num_pages = num_extent_pages(src->start, src->len); 4647 unsigned long num_pages = num_extent_pages(src->start, src->len);
4645 4648
4646 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); 4649 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4647 if (new == NULL) 4650 if (new == NULL)
4648 return NULL; 4651 return NULL;
4649 4652
@@ -4666,13 +4669,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4666 return new; 4669 return new;
4667} 4670}
4668 4671
4669struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) 4672struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4673 u64 start)
4670{ 4674{
4671 struct extent_buffer *eb; 4675 struct extent_buffer *eb;
4672 unsigned long num_pages = num_extent_pages(0, len); 4676 unsigned long len;
4677 unsigned long num_pages;
4673 unsigned long i; 4678 unsigned long i;
4674 4679
4675 eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); 4680 if (!fs_info) {
4681 /*
4682 * Called only from tests that don't always have a fs_info
4683 * available, but we know that nodesize is 4096
4684 */
4685 len = 4096;
4686 } else {
4687 len = fs_info->tree_root->nodesize;
4688 }
4689 num_pages = num_extent_pages(0, len);
4690
4691 eb = __alloc_extent_buffer(fs_info, start, len);
4676 if (!eb) 4692 if (!eb)
4677 return NULL; 4693 return NULL;
4678 4694
@@ -4762,7 +4778,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4762 4778
4763#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4779#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4764struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 4780struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4765 u64 start, unsigned long len) 4781 u64 start)
4766{ 4782{
4767 struct extent_buffer *eb, *exists = NULL; 4783 struct extent_buffer *eb, *exists = NULL;
4768 int ret; 4784 int ret;
@@ -4770,7 +4786,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4770 eb = find_extent_buffer(fs_info, start); 4786 eb = find_extent_buffer(fs_info, start);
4771 if (eb) 4787 if (eb)
4772 return eb; 4788 return eb;
4773 eb = alloc_dummy_extent_buffer(start, len); 4789 eb = alloc_dummy_extent_buffer(fs_info, start);
4774 if (!eb) 4790 if (!eb)
4775 return NULL; 4791 return NULL;
4776 eb->fs_info = fs_info; 4792 eb->fs_info = fs_info;
@@ -4808,8 +4824,9 @@ free_eb:
4808#endif 4824#endif
4809 4825
4810struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 4826struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4811 u64 start, unsigned long len) 4827 u64 start)
4812{ 4828{
4829 unsigned long len = fs_info->tree_root->nodesize;
4813 unsigned long num_pages = num_extent_pages(start, len); 4830 unsigned long num_pages = num_extent_pages(start, len);
4814 unsigned long i; 4831 unsigned long i;
4815 unsigned long index = start >> PAGE_CACHE_SHIFT; 4832 unsigned long index = start >> PAGE_CACHE_SHIFT;
@@ -4824,7 +4841,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4824 if (eb) 4841 if (eb)
4825 return eb; 4842 return eb;
4826 4843
4827 eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS); 4844 eb = __alloc_extent_buffer(fs_info, start, len);
4828 if (!eb) 4845 if (!eb)
4829 return NULL; 4846 return NULL;
4830 4847
@@ -4951,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
4951 4968
4952 /* Should be safe to release our pages at this point */ 4969 /* Should be safe to release our pages at this point */
4953 btrfs_release_extent_buffer_page(eb); 4970 btrfs_release_extent_buffer_page(eb);
4971#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4972 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4973 __free_extent_buffer(eb);
4974 return 1;
4975 }
4976#endif
4954 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4977 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4955 return 1; 4978 return 1;
4956 } 4979 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index ece9ce87edff..695b0ccfb755 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -4,22 +4,22 @@
4#include <linux/rbtree.h> 4#include <linux/rbtree.h>
5 5
6/* bits for the extent state */ 6/* bits for the extent state */
7#define EXTENT_DIRTY 1 7#define EXTENT_DIRTY (1U << 0)
8#define EXTENT_WRITEBACK (1 << 1) 8#define EXTENT_WRITEBACK (1U << 1)
9#define EXTENT_UPTODATE (1 << 2) 9#define EXTENT_UPTODATE (1U << 2)
10#define EXTENT_LOCKED (1 << 3) 10#define EXTENT_LOCKED (1U << 3)
11#define EXTENT_NEW (1 << 4) 11#define EXTENT_NEW (1U << 4)
12#define EXTENT_DELALLOC (1 << 5) 12#define EXTENT_DELALLOC (1U << 5)
13#define EXTENT_DEFRAG (1 << 6) 13#define EXTENT_DEFRAG (1U << 6)
14#define EXTENT_BOUNDARY (1 << 9) 14#define EXTENT_BOUNDARY (1U << 9)
15#define EXTENT_NODATASUM (1 << 10) 15#define EXTENT_NODATASUM (1U << 10)
16#define EXTENT_DO_ACCOUNTING (1 << 11) 16#define EXTENT_DO_ACCOUNTING (1U << 11)
17#define EXTENT_FIRST_DELALLOC (1 << 12) 17#define EXTENT_FIRST_DELALLOC (1U << 12)
18#define EXTENT_NEED_WAIT (1 << 13) 18#define EXTENT_NEED_WAIT (1U << 13)
19#define EXTENT_DAMAGED (1 << 14) 19#define EXTENT_DAMAGED (1U << 14)
20#define EXTENT_NORESERVE (1 << 15) 20#define EXTENT_NORESERVE (1U << 15)
21#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 21#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
22#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 22#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
23 23
24/* 24/*
25 * flags for bio submission. The high bits indicate the compression 25 * flags for bio submission. The high bits indicate the compression
@@ -81,9 +81,9 @@ struct extent_io_ops {
81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
82 struct extent_state *state, int uptodate); 82 struct extent_state *state, int uptodate);
83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
84 unsigned long *bits); 84 unsigned *bits);
85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
86 unsigned long *bits); 86 unsigned *bits);
87 void (*merge_extent_hook)(struct inode *inode, 87 void (*merge_extent_hook)(struct inode *inode,
88 struct extent_state *new, 88 struct extent_state *new,
89 struct extent_state *other); 89 struct extent_state *other);
@@ -108,7 +108,7 @@ struct extent_state {
108 /* ADD NEW ELEMENTS AFTER THIS */ 108 /* ADD NEW ELEMENTS AFTER THIS */
109 wait_queue_head_t wq; 109 wait_queue_head_t wq;
110 atomic_t refs; 110 atomic_t refs;
111 unsigned long state; 111 unsigned state;
112 112
113 /* for use by the FS */ 113 /* for use by the FS */
114 u64 private; 114 u64 private;
@@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
188int try_release_extent_buffer(struct page *page); 188int try_release_extent_buffer(struct page *page);
189int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 189int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
190int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 190int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
191 unsigned long bits, struct extent_state **cached); 191 unsigned bits, struct extent_state **cached);
192int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); 192int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
193int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, 193int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
194 struct extent_state **cached, gfp_t mask); 194 struct extent_state **cached, gfp_t mask);
@@ -202,21 +202,21 @@ void extent_io_exit(void);
202 202
203u64 count_range_bits(struct extent_io_tree *tree, 203u64 count_range_bits(struct extent_io_tree *tree,
204 u64 *start, u64 search_end, 204 u64 *start, u64 search_end,
205 u64 max_bytes, unsigned long bits, int contig); 205 u64 max_bytes, unsigned bits, int contig);
206 206
207void free_extent_state(struct extent_state *state); 207void free_extent_state(struct extent_state *state);
208int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 208int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
209 unsigned long bits, int filled, 209 unsigned bits, int filled,
210 struct extent_state *cached_state); 210 struct extent_state *cached_state);
211int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 211int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
212 unsigned long bits, gfp_t mask); 212 unsigned bits, gfp_t mask);
213int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 213int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
214 unsigned long bits, int wake, int delete, 214 unsigned bits, int wake, int delete,
215 struct extent_state **cached, gfp_t mask); 215 struct extent_state **cached, gfp_t mask);
216int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 216int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
217 unsigned long bits, gfp_t mask); 217 unsigned bits, gfp_t mask);
218int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 218int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
219 unsigned long bits, u64 *failed_start, 219 unsigned bits, u64 *failed_start,
220 struct extent_state **cached_state, gfp_t mask); 220 struct extent_state **cached_state, gfp_t mask);
221int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 221int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
222 struct extent_state **cached_state, gfp_t mask); 222 struct extent_state **cached_state, gfp_t mask);
@@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
229int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 229int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
230 gfp_t mask); 230 gfp_t mask);
231int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 231int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
232 unsigned long bits, unsigned long clear_bits, 232 unsigned bits, unsigned clear_bits,
233 struct extent_state **cached_state, gfp_t mask); 233 struct extent_state **cached_state, gfp_t mask);
234int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 234int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
235 struct extent_state **cached_state, gfp_t mask); 235 struct extent_state **cached_state, gfp_t mask);
236int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, 236int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
237 struct extent_state **cached_state, gfp_t mask); 237 struct extent_state **cached_state, gfp_t mask);
238int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 238int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
239 u64 *start_ret, u64 *end_ret, unsigned long bits, 239 u64 *start_ret, u64 *end_ret, unsigned bits,
240 struct extent_state **cached_state); 240 struct extent_state **cached_state);
241int extent_invalidatepage(struct extent_io_tree *tree, 241int extent_invalidatepage(struct extent_io_tree *tree,
242 struct page *page, unsigned long offset); 242 struct page *page, unsigned long offset);
@@ -262,8 +262,9 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
262void set_page_extent_mapped(struct page *page); 262void set_page_extent_mapped(struct page *page);
263 263
264struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 264struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
265 u64 start, unsigned long len); 265 u64 start);
266struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); 266struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
267 u64 start);
267struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); 268struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
268struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, 269struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
269 u64 start); 270 u64 start);
@@ -322,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
322int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 323int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
323int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, 324int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
324 struct page *locked_page, 325 struct page *locked_page,
325 unsigned long bits_to_clear, 326 unsigned bits_to_clear,
326 unsigned long page_ops); 327 unsigned long page_ops);
327struct bio * 328struct bio *
328btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 329btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
@@ -377,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
377 u64 *end, u64 max_bytes); 378 u64 *end, u64 max_bytes);
378#endif 379#endif
379struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 380struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
380 u64 start, unsigned long len); 381 u64 start);
381#endif 382#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e4090259569b..30982bbd31c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1746,7 +1746,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1746 1746
1747 mutex_lock(&inode->i_mutex); 1747 mutex_lock(&inode->i_mutex);
1748 1748
1749 current->backing_dev_info = inode->i_mapping->backing_dev_info; 1749 current->backing_dev_info = inode_to_bdi(inode);
1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1751 if (err) { 1751 if (err) {
1752 mutex_unlock(&inode->i_mutex); 1752 mutex_unlock(&inode->i_mutex);
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1811 mutex_unlock(&inode->i_mutex); 1811 mutex_unlock(&inode->i_mutex);
1812 1812
1813 /* 1813 /*
1814 * we want to make sure fsync finds this change
1815 * but we haven't joined a transaction running right now.
1816 *
1817 * Later on, someone is sure to update the inode and get the
1818 * real transid recorded.
1819 *
1820 * We set last_trans now to the fs_info generation + 1,
1821 * this will either be one more than the running transaction
1822 * or the generation used for the next transaction if there isn't
1823 * one running right now.
1824 *
1825 * We also have to set last_sub_trans to the current log transid, 1814 * We also have to set last_sub_trans to the current log transid,
1826 * otherwise subsequent syncs to a file that's been synced in this 1815 * otherwise subsequent syncs to a file that's been synced in this
1827 * transaction will appear to have already occured. 1816 * transaction will appear to have already occured.
1828 */ 1817 */
1829 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1830 BTRFS_I(inode)->last_sub_trans = root->log_transid; 1818 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1831 if (num_written > 0) { 1819 if (num_written > 0) {
1832 err = generic_write_sync(file, pos, num_written); 1820 err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1959 atomic_inc(&root->log_batch); 1947 atomic_inc(&root->log_batch);
1960 1948
1961 /* 1949 /*
1962 * check the transaction that last modified this inode 1950 * If the last transaction that changed this file was before the current
1963 * and see if its already been committed 1951 * transaction and we have the full sync flag set in our inode, we can
1964 */ 1952 * bail out now without any syncing.
1965 if (!BTRFS_I(inode)->last_trans) { 1953 *
1966 mutex_unlock(&inode->i_mutex); 1954 * Note that we can't bail out if the full sync flag isn't set. This is
1967 goto out; 1955 * because when the full sync flag is set we start all ordered extents
1968 } 1956 * and wait for them to fully complete - when they complete they update
1969 1957 * the inode's last_trans field through:
1970 /* 1958 *
1971 * if the last transaction that changed this file was before 1959 * btrfs_finish_ordered_io() ->
1972 * the current transaction, we can bail out now without any 1960 * btrfs_update_inode_fallback() ->
1973 * syncing 1961 * btrfs_update_inode() ->
1962 * btrfs_set_inode_last_trans()
1963 *
1964 * So we are sure that last_trans is up to date and can do this check to
1965 * bail out safely. For the fast path, when the full sync flag is not
1966 * set in our inode, we can not do it because we start only our ordered
1967 * extents and don't wait for them to complete (that is when
1968 * btrfs_finish_ordered_io runs), so here at this point their last_trans
1969 * value might be less than or equals to fs_info->last_trans_committed,
1970 * and setting a speculative last_trans for an inode when a buffered
1971 * write is made (such as fs_info->generation + 1 for example) would not
1972 * be reliable since after setting the value and before fsync is called
1973 * any number of transactions can start and commit (transaction kthread
1974 * commits the current transaction periodically), and a transaction
1975 * commit does not start nor waits for ordered extents to complete.
1974 */ 1976 */
1975 smp_mb(); 1977 smp_mb();
1976 if (btrfs_inode_in_log(inode, root->fs_info->generation) || 1978 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1977 BTRFS_I(inode)->last_trans <= 1979 (full_sync && BTRFS_I(inode)->last_trans <=
1978 root->fs_info->last_trans_committed) { 1980 root->fs_info->last_trans_committed)) {
1979 BTRFS_I(inode)->last_trans = 0;
1980
1981 /* 1981 /*
1982 * We'v had everything committed since the last time we were 1982 * We'v had everything committed since the last time we were
1983 * modified so clear this flag in case it was set for whatever 1983 * modified so clear this flag in case it was set for whatever
@@ -2081,7 +2081,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
2081 .fault = filemap_fault, 2081 .fault = filemap_fault,
2082 .map_pages = filemap_map_pages, 2082 .map_pages = filemap_map_pages,
2083 .page_mkwrite = btrfs_page_mkwrite, 2083 .page_mkwrite = btrfs_page_mkwrite,
2084 .remap_pages = generic_file_remap_pages,
2085}; 2084};
2086 2085
2087static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 2086static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -2276,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2276 bool same_page; 2275 bool same_page;
2277 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2276 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2278 u64 ino_size; 2277 u64 ino_size;
2278 bool truncated_page = false;
2279 bool updated_inode = false;
2279 2280
2280 ret = btrfs_wait_ordered_range(inode, offset, len); 2281 ret = btrfs_wait_ordered_range(inode, offset, len);
2281 if (ret) 2282 if (ret)
@@ -2307,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2307 * entire page. 2308 * entire page.
2308 */ 2309 */
2309 if (same_page && len < PAGE_CACHE_SIZE) { 2310 if (same_page && len < PAGE_CACHE_SIZE) {
2310 if (offset < ino_size) 2311 if (offset < ino_size) {
2312 truncated_page = true;
2311 ret = btrfs_truncate_page(inode, offset, len, 0); 2313 ret = btrfs_truncate_page(inode, offset, len, 0);
2314 } else {
2315 ret = 0;
2316 }
2312 goto out_only_mutex; 2317 goto out_only_mutex;
2313 } 2318 }
2314 2319
2315 /* zero back part of the first page */ 2320 /* zero back part of the first page */
2316 if (offset < ino_size) { 2321 if (offset < ino_size) {
2322 truncated_page = true;
2317 ret = btrfs_truncate_page(inode, offset, 0, 0); 2323 ret = btrfs_truncate_page(inode, offset, 0, 0);
2318 if (ret) { 2324 if (ret) {
2319 mutex_unlock(&inode->i_mutex); 2325 mutex_unlock(&inode->i_mutex);
@@ -2349,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2349 if (!ret) { 2355 if (!ret) {
2350 /* zero the front end of the last page */ 2356 /* zero the front end of the last page */
2351 if (tail_start + tail_len < ino_size) { 2357 if (tail_start + tail_len < ino_size) {
2358 truncated_page = true;
2352 ret = btrfs_truncate_page(inode, 2359 ret = btrfs_truncate_page(inode,
2353 tail_start + tail_len, 0, 1); 2360 tail_start + tail_len, 0, 1);
2354 if (ret) 2361 if (ret)
@@ -2358,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2358 } 2365 }
2359 2366
2360 if (lockend < lockstart) { 2367 if (lockend < lockstart) {
2361 mutex_unlock(&inode->i_mutex); 2368 ret = 0;
2362 return 0; 2369 goto out_only_mutex;
2363 } 2370 }
2364 2371
2365 while (1) { 2372 while (1) {
@@ -2507,6 +2514,7 @@ out_trans:
2507 2514
2508 trans->block_rsv = &root->fs_info->trans_block_rsv; 2515 trans->block_rsv = &root->fs_info->trans_block_rsv;
2509 ret = btrfs_update_inode(trans, root, inode); 2516 ret = btrfs_update_inode(trans, root, inode);
2517 updated_inode = true;
2510 btrfs_end_transaction(trans, root); 2518 btrfs_end_transaction(trans, root);
2511 btrfs_btree_balance_dirty(root); 2519 btrfs_btree_balance_dirty(root);
2512out_free: 2520out_free:
@@ -2516,6 +2524,22 @@ out:
2516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2524 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2517 &cached_state, GFP_NOFS); 2525 &cached_state, GFP_NOFS);
2518out_only_mutex: 2526out_only_mutex:
2527 if (!updated_inode && truncated_page && !ret && !err) {
2528 /*
2529 * If we only end up zeroing part of a page, we still need to
2530 * update the inode item, so that all the time fields are
2531 * updated as well as the necessary btrfs inode in memory fields
2532 * for detecting, at fsync time, if the inode isn't yet in the
2533 * log tree or it's there but not up to date.
2534 */
2535 trans = btrfs_start_transaction(root, 1);
2536 if (IS_ERR(trans)) {
2537 err = PTR_ERR(trans);
2538 } else {
2539 err = btrfs_update_inode(trans, root, inode);
2540 ret = btrfs_end_transaction(trans, root);
2541 }
2542 }
2519 mutex_unlock(&inode->i_mutex); 2543 mutex_unlock(&inode->i_mutex);
2520 if (ret && !err) 2544 if (ret && !err)
2521 err = ret; 2545 err = ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d6c03f7f136b..a71978578fa7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -651,15 +651,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
651 struct io_ctl io_ctl; 651 struct io_ctl io_ctl;
652 struct btrfs_key key; 652 struct btrfs_key key;
653 struct btrfs_free_space *e, *n; 653 struct btrfs_free_space *e, *n;
654 struct list_head bitmaps; 654 LIST_HEAD(bitmaps);
655 u64 num_entries; 655 u64 num_entries;
656 u64 num_bitmaps; 656 u64 num_bitmaps;
657 u64 generation; 657 u64 generation;
658 u8 type; 658 u8 type;
659 int ret = 0; 659 int ret = 0;
660 660
661 INIT_LIST_HEAD(&bitmaps);
662
663 /* Nothing in the space cache, goodbye */ 661 /* Nothing in the space cache, goodbye */
664 if (!i_size_read(inode)) 662 if (!i_size_read(inode))
665 return 0; 663 return 0;
@@ -1243,6 +1241,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1243 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1241 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1244 struct inode *inode; 1242 struct inode *inode;
1245 int ret = 0; 1243 int ret = 0;
1244 enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
1246 1245
1247 root = root->fs_info->tree_root; 1246 root = root->fs_info->tree_root;
1248 1247
@@ -1266,9 +1265,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1266 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1265 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1267 path, block_group->key.objectid); 1266 path, block_group->key.objectid);
1268 if (ret) { 1267 if (ret) {
1269 spin_lock(&block_group->lock); 1268 dcs = BTRFS_DC_ERROR;
1270 block_group->disk_cache_state = BTRFS_DC_ERROR;
1271 spin_unlock(&block_group->lock);
1272 ret = 0; 1269 ret = 0;
1273#ifdef DEBUG 1270#ifdef DEBUG
1274 btrfs_err(root->fs_info, 1271 btrfs_err(root->fs_info,
@@ -1277,6 +1274,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1277#endif 1274#endif
1278 } 1275 }
1279 1276
1277 spin_lock(&block_group->lock);
1278 block_group->disk_cache_state = dcs;
1279 spin_unlock(&block_group->lock);
1280 iput(inode); 1280 iput(inode);
1281 return ret; 1281 return ret;
1282} 1282}
@@ -2903,7 +2903,6 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
2903 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2903 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2904 min_bytes); 2904 min_bytes);
2905 2905
2906 INIT_LIST_HEAD(&bitmaps);
2907 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2906 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2908 bytes + empty_size, 2907 bytes + empty_size,
2909 cont1_bytes, min_bytes); 2908 cont1_bytes, min_bytes);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 8ffa4783cbf4..265e03c73f4d 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -344,6 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
344 return -ENOMEM; 344 return -ENOMEM;
345 345
346 path->leave_spinning = 1; 346 path->leave_spinning = 1;
347 path->skip_release_on_error = 1;
347 ret = btrfs_insert_empty_item(trans, root, path, &key, 348 ret = btrfs_insert_empty_item(trans, root, path, &key,
348 ins_len); 349 ins_len);
349 if (ret == -EEXIST) { 350 if (ret == -EEXIST) {
@@ -362,8 +363,12 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
362 ptr = (unsigned long)(ref + 1); 363 ptr = (unsigned long)(ref + 1);
363 ret = 0; 364 ret = 0;
364 } else if (ret < 0) { 365 } else if (ret < 0) {
365 if (ret == -EOVERFLOW) 366 if (ret == -EOVERFLOW) {
366 ret = -EMLINK; 367 if (find_name_in_backref(path, name, name_len, &ref))
368 ret = -EEXIST;
369 else
370 ret = -EMLINK;
371 }
367 goto out; 372 goto out;
368 } else { 373 } else {
369 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 374 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e687bb0dc73a..d2e732d7af52 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
108 108
109static int btrfs_dirty_inode(struct inode *inode); 109static int btrfs_dirty_inode(struct inode *inode);
110 110
111#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
112void btrfs_test_inode_set_ops(struct inode *inode)
113{
114 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
115}
116#endif
117
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 118static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir, 119 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr) 120 const struct qstr *qstr)
@@ -1530,10 +1537,32 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1530static void btrfs_split_extent_hook(struct inode *inode, 1537static void btrfs_split_extent_hook(struct inode *inode,
1531 struct extent_state *orig, u64 split) 1538 struct extent_state *orig, u64 split)
1532{ 1539{
1540 u64 size;
1541
1533 /* not delalloc, ignore it */ 1542 /* not delalloc, ignore it */
1534 if (!(orig->state & EXTENT_DELALLOC)) 1543 if (!(orig->state & EXTENT_DELALLOC))
1535 return; 1544 return;
1536 1545
1546 size = orig->end - orig->start + 1;
1547 if (size > BTRFS_MAX_EXTENT_SIZE) {
1548 u64 num_extents;
1549 u64 new_size;
1550
1551 /*
1552 * See the explanation in btrfs_merge_extent_hook, the same
1553 * applies here, just in reverse.
1554 */
1555 new_size = orig->end - split + 1;
1556 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1557 BTRFS_MAX_EXTENT_SIZE);
1558 new_size = split - orig->start;
1559 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1560 BTRFS_MAX_EXTENT_SIZE);
1561 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1562 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1563 return;
1564 }
1565
1537 spin_lock(&BTRFS_I(inode)->lock); 1566 spin_lock(&BTRFS_I(inode)->lock);
1538 BTRFS_I(inode)->outstanding_extents++; 1567 BTRFS_I(inode)->outstanding_extents++;
1539 spin_unlock(&BTRFS_I(inode)->lock); 1568 spin_unlock(&BTRFS_I(inode)->lock);
@@ -1549,10 +1578,55 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1549 struct extent_state *new, 1578 struct extent_state *new,
1550 struct extent_state *other) 1579 struct extent_state *other)
1551{ 1580{
1581 u64 new_size, old_size;
1582 u64 num_extents;
1583
1552 /* not delalloc, ignore it */ 1584 /* not delalloc, ignore it */
1553 if (!(other->state & EXTENT_DELALLOC)) 1585 if (!(other->state & EXTENT_DELALLOC))
1554 return; 1586 return;
1555 1587
1588 if (new->start > other->start)
1589 new_size = new->end - other->start + 1;
1590 else
1591 new_size = other->end - new->start + 1;
1592
1593 /* we're not bigger than the max, unreserve the space and go */
1594 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1595 spin_lock(&BTRFS_I(inode)->lock);
1596 BTRFS_I(inode)->outstanding_extents--;
1597 spin_unlock(&BTRFS_I(inode)->lock);
1598 return;
1599 }
1600
1601 /*
1602 * We have to add up either side to figure out how many extents were
1603 * accounted for before we merged into one big extent. If the number of
1604 * extents we accounted for is <= the amount we need for the new range
1605 * then we can return, otherwise drop. Think of it like this
1606 *
1607 * [ 4k][MAX_SIZE]
1608 *
1609 * So we've grown the extent by a MAX_SIZE extent, this would mean we
1610 * need 2 outstanding extents, on one side we have 1 and the other side
1611 * we have 1 so they are == and we can return. But in this case
1612 *
1613 * [MAX_SIZE+4k][MAX_SIZE+4k]
1614 *
1615 * Each range on their own accounts for 2 extents, but merged together
1616 * they are only 3 extents worth of accounting, so we need to drop in
1617 * this case.
1618 */
1619 old_size = other->end - other->start + 1;
1620 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1621 BTRFS_MAX_EXTENT_SIZE);
1622 old_size = new->end - new->start + 1;
1623 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1624 BTRFS_MAX_EXTENT_SIZE);
1625
1626 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1627 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1628 return;
1629
1556 spin_lock(&BTRFS_I(inode)->lock); 1630 spin_lock(&BTRFS_I(inode)->lock);
1557 BTRFS_I(inode)->outstanding_extents--; 1631 BTRFS_I(inode)->outstanding_extents--;
1558 spin_unlock(&BTRFS_I(inode)->lock); 1632 spin_unlock(&BTRFS_I(inode)->lock);
@@ -1604,7 +1678,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1604 * have pending delalloc work to be done. 1678 * have pending delalloc work to be done.
1605 */ 1679 */
1606static void btrfs_set_bit_hook(struct inode *inode, 1680static void btrfs_set_bit_hook(struct inode *inode,
1607 struct extent_state *state, unsigned long *bits) 1681 struct extent_state *state, unsigned *bits)
1608{ 1682{
1609 1683
1610 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) 1684 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@@ -1627,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
1627 spin_unlock(&BTRFS_I(inode)->lock); 1701 spin_unlock(&BTRFS_I(inode)->lock);
1628 } 1702 }
1629 1703
1704 /* For sanity tests */
1705 if (btrfs_test_is_dummy_root(root))
1706 return;
1707
1630 __percpu_counter_add(&root->fs_info->delalloc_bytes, len, 1708 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1631 root->fs_info->delalloc_batch); 1709 root->fs_info->delalloc_batch);
1632 spin_lock(&BTRFS_I(inode)->lock); 1710 spin_lock(&BTRFS_I(inode)->lock);
@@ -1645,9 +1723,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
1645 */ 1723 */
1646static void btrfs_clear_bit_hook(struct inode *inode, 1724static void btrfs_clear_bit_hook(struct inode *inode,
1647 struct extent_state *state, 1725 struct extent_state *state,
1648 unsigned long *bits) 1726 unsigned *bits)
1649{ 1727{
1650 u64 len = state->end + 1 - state->start; 1728 u64 len = state->end + 1 - state->start;
1729 u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
1730 BTRFS_MAX_EXTENT_SIZE);
1651 1731
1652 spin_lock(&BTRFS_I(inode)->lock); 1732 spin_lock(&BTRFS_I(inode)->lock);
1653 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) 1733 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1667,7 +1747,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1667 *bits &= ~EXTENT_FIRST_DELALLOC; 1747 *bits &= ~EXTENT_FIRST_DELALLOC;
1668 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { 1748 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1669 spin_lock(&BTRFS_I(inode)->lock); 1749 spin_lock(&BTRFS_I(inode)->lock);
1670 BTRFS_I(inode)->outstanding_extents--; 1750 BTRFS_I(inode)->outstanding_extents -= num_extents;
1671 spin_unlock(&BTRFS_I(inode)->lock); 1751 spin_unlock(&BTRFS_I(inode)->lock);
1672 } 1752 }
1673 1753
@@ -1680,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1680 root != root->fs_info->tree_root) 1760 root != root->fs_info->tree_root)
1681 btrfs_delalloc_release_metadata(inode, len); 1761 btrfs_delalloc_release_metadata(inode, len);
1682 1762
1763 /* For sanity tests. */
1764 if (btrfs_test_is_dummy_root(root))
1765 return;
1766
1683 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1684 && do_list && !(state->state & EXTENT_NORESERVE)) 1768 && do_list && !(state->state & EXTENT_NORESERVE))
1685 btrfs_free_reserved_data_space(inode, len); 1769 btrfs_free_reserved_data_space(inode, len);
@@ -2945,7 +3029,7 @@ static int __readpage_endio_check(struct inode *inode,
2945 return 0; 3029 return 0;
2946zeroit: 3030zeroit:
2947 if (__ratelimit(&_rs)) 3031 if (__ratelimit(&_rs))
2948 btrfs_info(BTRFS_I(inode)->root->fs_info, 3032 btrfs_warn(BTRFS_I(inode)->root->fs_info,
2949 "csum failed ino %llu off %llu csum %u expected csum %u", 3033 "csum failed ino %llu off %llu csum %u expected csum %u",
2950 btrfs_ino(inode), start, csum, csum_expected); 3034 btrfs_ino(inode), start, csum, csum_expected);
2951 memset(kaddr + pgoff, 1, len); 3035 memset(kaddr + pgoff, 1, len);
@@ -3407,7 +3491,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3407 3491
3408out: 3492out:
3409 if (ret) 3493 if (ret)
3410 btrfs_crit(root->fs_info, 3494 btrfs_err(root->fs_info,
3411 "could not do orphan cleanup %d", ret); 3495 "could not do orphan cleanup %d", ret);
3412 btrfs_free_path(path); 3496 btrfs_free_path(path);
3413 return ret; 3497 return ret;
@@ -3490,7 +3574,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
3490 struct btrfs_path *path; 3574 struct btrfs_path *path;
3491 struct extent_buffer *leaf; 3575 struct extent_buffer *leaf;
3492 struct btrfs_inode_item *inode_item; 3576 struct btrfs_inode_item *inode_item;
3493 struct btrfs_timespec *tspec;
3494 struct btrfs_root *root = BTRFS_I(inode)->root; 3577 struct btrfs_root *root = BTRFS_I(inode)->root;
3495 struct btrfs_key location; 3578 struct btrfs_key location;
3496 unsigned long ptr; 3579 unsigned long ptr;
@@ -3527,17 +3610,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
3527 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); 3610 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3528 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 3611 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
3529 3612
3530 tspec = btrfs_inode_atime(inode_item); 3613 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3531 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); 3614 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3532 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); 3615
3616 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3617 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3533 3618
3534 tspec = btrfs_inode_mtime(inode_item); 3619 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3535 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); 3620 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3536 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
3537 3621
3538 tspec = btrfs_inode_ctime(inode_item); 3622 BTRFS_I(inode)->i_otime.tv_sec =
3539 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); 3623 btrfs_timespec_sec(leaf, &inode_item->otime);
3540 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); 3624 BTRFS_I(inode)->i_otime.tv_nsec =
3625 btrfs_timespec_nsec(leaf, &inode_item->otime);
3541 3626
3542 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 3627 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3543 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 3628 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
@@ -3608,7 +3693,6 @@ cache_acl:
3608 switch (inode->i_mode & S_IFMT) { 3693 switch (inode->i_mode & S_IFMT) {
3609 case S_IFREG: 3694 case S_IFREG:
3610 inode->i_mapping->a_ops = &btrfs_aops; 3695 inode->i_mapping->a_ops = &btrfs_aops;
3611 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3612 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 3696 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3613 inode->i_fop = &btrfs_file_operations; 3697 inode->i_fop = &btrfs_file_operations;
3614 inode->i_op = &btrfs_file_inode_operations; 3698 inode->i_op = &btrfs_file_inode_operations;
@@ -3623,7 +3707,6 @@ cache_acl:
3623 case S_IFLNK: 3707 case S_IFLNK:
3624 inode->i_op = &btrfs_symlink_inode_operations; 3708 inode->i_op = &btrfs_symlink_inode_operations;
3625 inode->i_mapping->a_ops = &btrfs_symlink_aops; 3709 inode->i_mapping->a_ops = &btrfs_symlink_aops;
3626 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3627 break; 3710 break;
3628 default: 3711 default:
3629 inode->i_op = &btrfs_special_inode_operations; 3712 inode->i_op = &btrfs_special_inode_operations;
@@ -3658,21 +3741,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3658 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 3741 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3659 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 3742 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3660 3743
3661 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 3744 btrfs_set_token_timespec_sec(leaf, &item->atime,
3662 inode->i_atime.tv_sec, &token); 3745 inode->i_atime.tv_sec, &token);
3663 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 3746 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3664 inode->i_atime.tv_nsec, &token); 3747 inode->i_atime.tv_nsec, &token);
3665 3748
3666 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 3749 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3667 inode->i_mtime.tv_sec, &token); 3750 inode->i_mtime.tv_sec, &token);
3668 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 3751 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3669 inode->i_mtime.tv_nsec, &token); 3752 inode->i_mtime.tv_nsec, &token);
3670 3753
3671 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 3754 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3672 inode->i_ctime.tv_sec, &token); 3755 inode->i_ctime.tv_sec, &token);
3673 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 3756 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3674 inode->i_ctime.tv_nsec, &token); 3757 inode->i_ctime.tv_nsec, &token);
3675 3758
3759 btrfs_set_token_timespec_sec(leaf, &item->otime,
3760 BTRFS_I(inode)->i_otime.tv_sec, &token);
3761 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3762 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3763
3676 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 3764 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3677 &token); 3765 &token);
3678 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, 3766 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
@@ -5009,6 +5097,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
5009 struct btrfs_root *new_root; 5097 struct btrfs_root *new_root;
5010 struct btrfs_root_ref *ref; 5098 struct btrfs_root_ref *ref;
5011 struct extent_buffer *leaf; 5099 struct extent_buffer *leaf;
5100 struct btrfs_key key;
5012 int ret; 5101 int ret;
5013 int err = 0; 5102 int err = 0;
5014 5103
@@ -5019,9 +5108,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
5019 } 5108 }
5020 5109
5021 err = -ENOENT; 5110 err = -ENOENT;
5022 ret = btrfs_find_item(root->fs_info->tree_root, path, 5111 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5023 BTRFS_I(dir)->root->root_key.objectid, 5112 key.type = BTRFS_ROOT_REF_KEY;
5024 location->objectid, BTRFS_ROOT_REF_KEY, NULL); 5113 key.offset = location->objectid;
5114
5115 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
5116 0, 0);
5025 if (ret) { 5117 if (ret) {
5026 if (ret < 0) 5118 if (ret < 0)
5027 err = ret; 5119 err = ret;
@@ -5260,7 +5352,10 @@ static struct inode *new_simple_dir(struct super_block *s,
5260 inode->i_op = &btrfs_dir_ro_inode_operations; 5352 inode->i_op = &btrfs_dir_ro_inode_operations;
5261 inode->i_fop = &simple_dir_operations; 5353 inode->i_fop = &simple_dir_operations;
5262 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; 5354 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5263 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 5355 inode->i_mtime = CURRENT_TIME;
5356 inode->i_atime = inode->i_mtime;
5357 inode->i_ctime = inode->i_mtime;
5358 BTRFS_I(inode)->i_otime = inode->i_mtime;
5264 5359
5265 return inode; 5360 return inode;
5266} 5361}
@@ -5828,7 +5923,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5828 5923
5829 inode_init_owner(inode, dir, mode); 5924 inode_init_owner(inode, dir, mode);
5830 inode_set_bytes(inode, 0); 5925 inode_set_bytes(inode, 0);
5831 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 5926
5927 inode->i_mtime = CURRENT_TIME;
5928 inode->i_atime = inode->i_mtime;
5929 inode->i_ctime = inode->i_mtime;
5930 BTRFS_I(inode)->i_otime = inode->i_mtime;
5931
5832 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 5932 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
5833 struct btrfs_inode_item); 5933 struct btrfs_inode_item);
5834 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, 5934 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
@@ -6088,7 +6188,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
6088 inode->i_fop = &btrfs_file_operations; 6188 inode->i_fop = &btrfs_file_operations;
6089 inode->i_op = &btrfs_file_inode_operations; 6189 inode->i_op = &btrfs_file_inode_operations;
6090 inode->i_mapping->a_ops = &btrfs_aops; 6190 inode->i_mapping->a_ops = &btrfs_aops;
6091 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
6092 6191
6093 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 6192 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6094 if (err) 6193 if (err)
@@ -6255,8 +6354,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6255 6354
6256out_fail: 6355out_fail:
6257 btrfs_end_transaction(trans, root); 6356 btrfs_end_transaction(trans, root);
6258 if (drop_on_err) 6357 if (drop_on_err) {
6358 inode_dec_link_count(inode);
6259 iput(inode); 6359 iput(inode);
6360 }
6260 btrfs_balance_delayed_items(root); 6361 btrfs_balance_delayed_items(root);
6261 btrfs_btree_balance_dirty(root); 6362 btrfs_btree_balance_dirty(root);
6262 return err; 6363 return err;
@@ -7135,17 +7236,28 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7135 u64 start = iblock << inode->i_blkbits; 7236 u64 start = iblock << inode->i_blkbits;
7136 u64 lockstart, lockend; 7237 u64 lockstart, lockend;
7137 u64 len = bh_result->b_size; 7238 u64 len = bh_result->b_size;
7239 u64 *outstanding_extents = NULL;
7138 int unlock_bits = EXTENT_LOCKED; 7240 int unlock_bits = EXTENT_LOCKED;
7139 int ret = 0; 7241 int ret = 0;
7140 7242
7141 if (create) 7243 if (create)
7142 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 7244 unlock_bits |= EXTENT_DIRTY;
7143 else 7245 else
7144 len = min_t(u64, len, root->sectorsize); 7246 len = min_t(u64, len, root->sectorsize);
7145 7247
7146 lockstart = start; 7248 lockstart = start;
7147 lockend = start + len - 1; 7249 lockend = start + len - 1;
7148 7250
7251 if (current->journal_info) {
7252 /*
7253 * Need to pull our outstanding extents and set journal_info to NULL so
7254 * that anything that needs to check if there's a transction doesn't get
7255 * confused.
7256 */
7257 outstanding_extents = current->journal_info;
7258 current->journal_info = NULL;
7259 }
7260
7149 /* 7261 /*
7150 * If this errors out it's because we couldn't invalidate pagecache for 7262 * If this errors out it's because we couldn't invalidate pagecache for
7151 * this range and we need to fallback to buffered. 7263 * this range and we need to fallback to buffered.
@@ -7206,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7206 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && 7318 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7207 em->block_start != EXTENT_MAP_HOLE)) { 7319 em->block_start != EXTENT_MAP_HOLE)) {
7208 int type; 7320 int type;
7209 int ret;
7210 u64 block_start, orig_start, orig_block_len, ram_bytes; 7321 u64 block_start, orig_start, orig_block_len, ram_bytes;
7211 7322
7212 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 7323 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7270,14 +7381,21 @@ unlock:
7270 if (start + len > i_size_read(inode)) 7381 if (start + len > i_size_read(inode))
7271 i_size_write(inode, start + len); 7382 i_size_write(inode, start + len);
7272 7383
7273 spin_lock(&BTRFS_I(inode)->lock); 7384 /*
7274 BTRFS_I(inode)->outstanding_extents++; 7385 * If we have an outstanding_extents count still set then we're
7275 spin_unlock(&BTRFS_I(inode)->lock); 7386 * within our reservation, otherwise we need to adjust our inode
7387 * counter appropriately.
7388 */
7389 if (*outstanding_extents) {
7390 (*outstanding_extents)--;
7391 } else {
7392 spin_lock(&BTRFS_I(inode)->lock);
7393 BTRFS_I(inode)->outstanding_extents++;
7394 spin_unlock(&BTRFS_I(inode)->lock);
7395 }
7276 7396
7277 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 7397 current->journal_info = outstanding_extents;
7278 lockstart + len - 1, EXTENT_DELALLOC, NULL, 7398 btrfs_free_reserved_data_space(inode, len);
7279 &cached_state, GFP_NOFS);
7280 BUG_ON(ret);
7281 } 7399 }
7282 7400
7283 /* 7401 /*
@@ -7300,6 +7418,8 @@ unlock:
7300unlock_err: 7418unlock_err:
7301 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7419 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7302 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7420 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7421 if (outstanding_extents)
7422 current->journal_info = outstanding_extents;
7303 return ret; 7423 return ret;
7304} 7424}
7305 7425
@@ -7806,8 +7926,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7806 } 7926 }
7807 7927
7808 /* async crcs make it difficult to collect full stripe writes. */ 7928 /* async crcs make it difficult to collect full stripe writes. */
7809 if (btrfs_get_alloc_profile(root, 1) & 7929 if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
7810 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
7811 async_submit = 0; 7930 async_submit = 0;
7812 else 7931 else
7813 async_submit = 1; 7932 async_submit = 1;
@@ -8000,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8000{ 8119{
8001 struct file *file = iocb->ki_filp; 8120 struct file *file = iocb->ki_filp;
8002 struct inode *inode = file->f_mapping->host; 8121 struct inode *inode = file->f_mapping->host;
8122 u64 outstanding_extents = 0;
8003 size_t count = 0; 8123 size_t count = 0;
8004 int flags = 0; 8124 int flags = 0;
8005 bool wakeup = true; 8125 bool wakeup = true;
@@ -8037,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8037 ret = btrfs_delalloc_reserve_space(inode, count); 8157 ret = btrfs_delalloc_reserve_space(inode, count);
8038 if (ret) 8158 if (ret)
8039 goto out; 8159 goto out;
8160 outstanding_extents = div64_u64(count +
8161 BTRFS_MAX_EXTENT_SIZE - 1,
8162 BTRFS_MAX_EXTENT_SIZE);
8163
8164 /*
8165 * We need to know how many extents we reserved so that we can
8166 * do the accounting properly if we go over the number we
8167 * originally calculated. Abuse current->journal_info for this.
8168 */
8169 current->journal_info = &outstanding_extents;
8040 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8170 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8041 &BTRFS_I(inode)->runtime_flags)) { 8171 &BTRFS_I(inode)->runtime_flags)) {
8042 inode_dio_done(inode); 8172 inode_dio_done(inode);
@@ -8049,13 +8179,12 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8049 iter, offset, btrfs_get_blocks_direct, NULL, 8179 iter, offset, btrfs_get_blocks_direct, NULL,
8050 btrfs_submit_direct, flags); 8180 btrfs_submit_direct, flags);
8051 if (rw & WRITE) { 8181 if (rw & WRITE) {
8182 current->journal_info = NULL;
8052 if (ret < 0 && ret != -EIOCBQUEUED) 8183 if (ret < 0 && ret != -EIOCBQUEUED)
8053 btrfs_delalloc_release_space(inode, count); 8184 btrfs_delalloc_release_space(inode, count);
8054 else if (ret >= 0 && (size_t)ret < count) 8185 else if (ret >= 0 && (size_t)ret < count)
8055 btrfs_delalloc_release_space(inode, 8186 btrfs_delalloc_release_space(inode,
8056 count - (size_t)ret); 8187 count - (size_t)ret);
8057 else
8058 btrfs_delalloc_release_metadata(inode, 0);
8059 } 8188 }
8060out: 8189out:
8061 if (wakeup) 8190 if (wakeup)
@@ -8576,6 +8705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
8576 8705
8577 ei->delayed_node = NULL; 8706 ei->delayed_node = NULL;
8578 8707
8708 ei->i_otime.tv_sec = 0;
8709 ei->i_otime.tv_nsec = 0;
8710
8579 inode = &ei->vfs_inode; 8711 inode = &ei->vfs_inode;
8580 extent_map_tree_init(&ei->extent_tree); 8712 extent_map_tree_init(&ei->extent_tree);
8581 extent_io_tree_init(&ei->io_tree, &inode->i_data); 8713 extent_io_tree_init(&ei->io_tree, &inode->i_data);
@@ -9201,7 +9333,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9201 inode->i_fop = &btrfs_file_operations; 9333 inode->i_fop = &btrfs_file_operations;
9202 inode->i_op = &btrfs_file_inode_operations; 9334 inode->i_op = &btrfs_file_inode_operations;
9203 inode->i_mapping->a_ops = &btrfs_aops; 9335 inode->i_mapping->a_ops = &btrfs_aops;
9204 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9205 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 9336 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9206 9337
9207 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 9338 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
@@ -9245,7 +9376,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9245 9376
9246 inode->i_op = &btrfs_symlink_inode_operations; 9377 inode->i_op = &btrfs_symlink_inode_operations;
9247 inode->i_mapping->a_ops = &btrfs_symlink_aops; 9378 inode->i_mapping->a_ops = &btrfs_symlink_aops;
9248 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9249 inode_set_bytes(inode, name_len); 9379 inode_set_bytes(inode, name_len);
9250 btrfs_i_size_write(inode, name_len); 9380 btrfs_i_size_write(inode, name_len);
9251 err = btrfs_update_inode(trans, root, inode); 9381 err = btrfs_update_inode(trans, root, inode);
@@ -9457,7 +9587,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
9457 inode->i_op = &btrfs_file_inode_operations; 9587 inode->i_op = &btrfs_file_inode_operations;
9458 9588
9459 inode->i_mapping->a_ops = &btrfs_aops; 9589 inode->i_mapping->a_ops = &btrfs_aops;
9460 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9461 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 9590 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9462 9591
9463 ret = btrfs_init_inode_security(trans, inode, dir, NULL); 9592 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d49fe8a0f6b5..74609b931ba5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -776,11 +776,11 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
776 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 776 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
777 return -EPERM; 777 return -EPERM;
778 if (isdir) { 778 if (isdir) {
779 if (!S_ISDIR(victim->d_inode->i_mode)) 779 if (!d_is_dir(victim))
780 return -ENOTDIR; 780 return -ENOTDIR;
781 if (IS_ROOT(victim)) 781 if (IS_ROOT(victim))
782 return -EBUSY; 782 return -EBUSY;
783 } else if (S_ISDIR(victim->d_inode->i_mode)) 783 } else if (d_is_dir(victim))
784 return -EISDIR; 784 return -EISDIR;
785 if (IS_DEADDIR(dir)) 785 if (IS_DEADDIR(dir))
786 return -ENOENT; 786 return -ENOENT;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
452 continue; 452 continue;
453 if (entry_end(ordered) <= start) 453 if (entry_end(ordered) <= start)
454 break; 454 break;
455 if (!list_empty(&ordered->log_list)) 455 if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
456 continue;
457 if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
458 continue; 456 continue;
459 list_add(&ordered->log_list, logged_list); 457 list_add(&ordered->log_list, logged_list);
460 atomic_inc(&ordered->refs); 458 atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
511 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 509 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
512 &ordered->flags)); 510 &ordered->flags));
513 511
514 if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 512 list_add_tail(&ordered->trans_list, &trans->ordered);
515 list_add_tail(&ordered->trans_list, &trans->ordered);
516 spin_lock_irq(&log->log_extents_lock[index]); 513 spin_lock_irq(&log->log_extents_lock[index]);
517 } 514 }
518 spin_unlock_irq(&log->log_extents_lock[index]); 515 spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 48b60dbf807f..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1259 if (oper1->seq < oper2->seq) 1259 if (oper1->seq < oper2->seq)
1260 return -1; 1260 return -1;
1261 if (oper1->seq > oper2->seq) 1261 if (oper1->seq > oper2->seq)
1262 return -1; 1262 return 1;
1263 if (oper1->ref_root < oper2->ref_root) 1263 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1264 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1265 if (oper1->ref_root > oper2->ref_root)
@@ -1431,9 +1431,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1431 qgroup = u64_to_ptr(unode->aux); 1431 qgroup = u64_to_ptr(unode->aux);
1432 qgroup->rfer += sign * oper->num_bytes; 1432 qgroup->rfer += sign * oper->num_bytes;
1433 qgroup->rfer_cmpr += sign * oper->num_bytes; 1433 qgroup->rfer_cmpr += sign * oper->num_bytes;
1434 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1434 qgroup->excl += sign * oper->num_bytes; 1435 qgroup->excl += sign * oper->num_bytes;
1435 if (sign < 0)
1436 WARN_ON(qgroup->excl < oper->num_bytes);
1437 qgroup->excl_cmpr += sign * oper->num_bytes; 1436 qgroup->excl_cmpr += sign * oper->num_bytes;
1438 qgroup_dirty(fs_info, qgroup); 1437 qgroup_dirty(fs_info, qgroup);
1439 1438
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 8ab2a17bbba8..5264858ed768 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -58,15 +58,6 @@
58 */ 58 */
59#define RBIO_CACHE_READY_BIT 3 59#define RBIO_CACHE_READY_BIT 3
60 60
61/*
62 * bbio and raid_map is managed by the caller, so we shouldn't free
63 * them here. And besides that, all rbios with this flag should not
64 * be cached, because we need raid_map to check the rbios' stripe
65 * is the same or not, but it is very likely that the caller has
66 * free raid_map, so don't cache those rbios.
67 */
68#define RBIO_HOLD_BBIO_MAP_BIT 4
69
70#define RBIO_CACHE_SIZE 1024 61#define RBIO_CACHE_SIZE 1024
71 62
72enum btrfs_rbio_ops { 63enum btrfs_rbio_ops {
@@ -79,13 +70,6 @@ struct btrfs_raid_bio {
79 struct btrfs_fs_info *fs_info; 70 struct btrfs_fs_info *fs_info;
80 struct btrfs_bio *bbio; 71 struct btrfs_bio *bbio;
81 72
82 /*
83 * logical block numbers for the start of each stripe
84 * The last one or two are p/q. These are sorted,
85 * so raid_map[0] is the start of our full stripe
86 */
87 u64 *raid_map;
88
89 /* while we're doing rmw on a stripe 73 /* while we're doing rmw on a stripe
90 * we put it into a hash table so we can 74 * we put it into a hash table so we can
91 * lock the stripe and merge more rbios 75 * lock the stripe and merge more rbios
@@ -303,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
303 */ 287 */
304static int rbio_bucket(struct btrfs_raid_bio *rbio) 288static int rbio_bucket(struct btrfs_raid_bio *rbio)
305{ 289{
306 u64 num = rbio->raid_map[0]; 290 u64 num = rbio->bbio->raid_map[0];
307 291
308 /* 292 /*
309 * we shift down quite a bit. We're using byte 293 * we shift down quite a bit. We're using byte
@@ -606,8 +590,8 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
606 test_bit(RBIO_CACHE_BIT, &cur->flags)) 590 test_bit(RBIO_CACHE_BIT, &cur->flags))
607 return 0; 591 return 0;
608 592
609 if (last->raid_map[0] != 593 if (last->bbio->raid_map[0] !=
610 cur->raid_map[0]) 594 cur->bbio->raid_map[0])
611 return 0; 595 return 0;
612 596
613 /* we can't merge with different operations */ 597 /* we can't merge with different operations */
@@ -689,7 +673,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
689 spin_lock_irqsave(&h->lock, flags); 673 spin_lock_irqsave(&h->lock, flags);
690 list_for_each_entry(cur, &h->hash_list, hash_list) { 674 list_for_each_entry(cur, &h->hash_list, hash_list) {
691 walk++; 675 walk++;
692 if (cur->raid_map[0] == rbio->raid_map[0]) { 676 if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
693 spin_lock(&cur->bio_list_lock); 677 spin_lock(&cur->bio_list_lock);
694 678
695 /* can we steal this cached rbio's pages? */ 679 /* can we steal this cached rbio's pages? */
@@ -841,21 +825,6 @@ done_nolock:
841 remove_rbio_from_cache(rbio); 825 remove_rbio_from_cache(rbio);
842} 826}
843 827
844static inline void
845__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
846{
847 if (need) {
848 kfree(raid_map);
849 kfree(bbio);
850 }
851}
852
853static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
854{
855 __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
856 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
857}
858
859static void __free_raid_bio(struct btrfs_raid_bio *rbio) 828static void __free_raid_bio(struct btrfs_raid_bio *rbio)
860{ 829{
861 int i; 830 int i;
@@ -875,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
875 } 844 }
876 } 845 }
877 846
878 free_bbio_and_raid_map(rbio); 847 btrfs_put_bbio(rbio->bbio);
879
880 kfree(rbio); 848 kfree(rbio);
881} 849}
882 850
@@ -985,8 +953,7 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
985 * this does not allocate any pages for rbio->pages. 953 * this does not allocate any pages for rbio->pages.
986 */ 954 */
987static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, 955static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
988 struct btrfs_bio *bbio, u64 *raid_map, 956 struct btrfs_bio *bbio, u64 stripe_len)
989 u64 stripe_len)
990{ 957{
991 struct btrfs_raid_bio *rbio; 958 struct btrfs_raid_bio *rbio;
992 int nr_data = 0; 959 int nr_data = 0;
@@ -1007,7 +974,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
1007 INIT_LIST_HEAD(&rbio->stripe_cache); 974 INIT_LIST_HEAD(&rbio->stripe_cache);
1008 INIT_LIST_HEAD(&rbio->hash_list); 975 INIT_LIST_HEAD(&rbio->hash_list);
1009 rbio->bbio = bbio; 976 rbio->bbio = bbio;
1010 rbio->raid_map = raid_map;
1011 rbio->fs_info = root->fs_info; 977 rbio->fs_info = root->fs_info;
1012 rbio->stripe_len = stripe_len; 978 rbio->stripe_len = stripe_len;
1013 rbio->nr_pages = num_pages; 979 rbio->nr_pages = num_pages;
@@ -1028,10 +994,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
1028 rbio->bio_pages = p + sizeof(struct page *) * num_pages; 994 rbio->bio_pages = p + sizeof(struct page *) * num_pages;
1029 rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; 995 rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
1030 996
1031 if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) 997 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
998 nr_data = real_stripes - 1;
999 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1032 nr_data = real_stripes - 2; 1000 nr_data = real_stripes - 2;
1033 else 1001 else
1034 nr_data = real_stripes - 1; 1002 BUG();
1035 1003
1036 rbio->nr_data = nr_data; 1004 rbio->nr_data = nr_data;
1037 return rbio; 1005 return rbio;
@@ -1182,7 +1150,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1182 spin_lock_irq(&rbio->bio_list_lock); 1150 spin_lock_irq(&rbio->bio_list_lock);
1183 bio_list_for_each(bio, &rbio->bio_list) { 1151 bio_list_for_each(bio, &rbio->bio_list) {
1184 start = (u64)bio->bi_iter.bi_sector << 9; 1152 start = (u64)bio->bi_iter.bi_sector << 9;
1185 stripe_offset = start - rbio->raid_map[0]; 1153 stripe_offset = start - rbio->bbio->raid_map[0];
1186 page_index = stripe_offset >> PAGE_CACHE_SHIFT; 1154 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1187 1155
1188 for (i = 0; i < bio->bi_vcnt; i++) { 1156 for (i = 0; i < bio->bi_vcnt; i++) {
@@ -1402,7 +1370,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1402 logical <<= 9; 1370 logical <<= 9;
1403 1371
1404 for (i = 0; i < rbio->nr_data; i++) { 1372 for (i = 0; i < rbio->nr_data; i++) {
1405 stripe_start = rbio->raid_map[i]; 1373 stripe_start = rbio->bbio->raid_map[i];
1406 if (logical >= stripe_start && 1374 if (logical >= stripe_start &&
1407 logical < stripe_start + rbio->stripe_len) { 1375 logical < stripe_start + rbio->stripe_len) {
1408 return i; 1376 return i;
@@ -1776,17 +1744,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1776 * our main entry point for writes from the rest of the FS. 1744 * our main entry point for writes from the rest of the FS.
1777 */ 1745 */
1778int raid56_parity_write(struct btrfs_root *root, struct bio *bio, 1746int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1779 struct btrfs_bio *bbio, u64 *raid_map, 1747 struct btrfs_bio *bbio, u64 stripe_len)
1780 u64 stripe_len)
1781{ 1748{
1782 struct btrfs_raid_bio *rbio; 1749 struct btrfs_raid_bio *rbio;
1783 struct btrfs_plug_cb *plug = NULL; 1750 struct btrfs_plug_cb *plug = NULL;
1784 struct blk_plug_cb *cb; 1751 struct blk_plug_cb *cb;
1785 int ret; 1752 int ret;
1786 1753
1787 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 1754 rbio = alloc_rbio(root, bbio, stripe_len);
1788 if (IS_ERR(rbio)) { 1755 if (IS_ERR(rbio)) {
1789 __free_bbio_and_raid_map(bbio, raid_map, 1); 1756 btrfs_put_bbio(bbio);
1790 return PTR_ERR(rbio); 1757 return PTR_ERR(rbio);
1791 } 1758 }
1792 bio_list_add(&rbio->bio_list, bio); 1759 bio_list_add(&rbio->bio_list, bio);
@@ -1885,9 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1885 } 1852 }
1886 1853
1887 /* all raid6 handling here */ 1854 /* all raid6 handling here */
1888 if (rbio->raid_map[rbio->real_stripes - 1] == 1855 if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
1889 RAID6_Q_STRIPE) {
1890
1891 /* 1856 /*
1892 * single failure, rebuild from parity raid5 1857 * single failure, rebuild from parity raid5
1893 * style 1858 * style
@@ -1922,8 +1887,9 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1922 * here due to a crc mismatch and we can't give them the 1887 * here due to a crc mismatch and we can't give them the
1923 * data they want 1888 * data they want
1924 */ 1889 */
1925 if (rbio->raid_map[failb] == RAID6_Q_STRIPE) { 1890 if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
1926 if (rbio->raid_map[faila] == RAID5_P_STRIPE) { 1891 if (rbio->bbio->raid_map[faila] ==
1892 RAID5_P_STRIPE) {
1927 err = -EIO; 1893 err = -EIO;
1928 goto cleanup; 1894 goto cleanup;
1929 } 1895 }
@@ -1934,7 +1900,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1934 goto pstripe; 1900 goto pstripe;
1935 } 1901 }
1936 1902
1937 if (rbio->raid_map[failb] == RAID5_P_STRIPE) { 1903 if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
1938 raid6_datap_recov(rbio->real_stripes, 1904 raid6_datap_recov(rbio->real_stripes,
1939 PAGE_SIZE, faila, pointers); 1905 PAGE_SIZE, faila, pointers);
1940 } else { 1906 } else {
@@ -2001,8 +1967,7 @@ cleanup:
2001 1967
2002cleanup_io: 1968cleanup_io:
2003 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 1969 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
2004 if (err == 0 && 1970 if (err == 0)
2005 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
2006 cache_rbio_pages(rbio); 1971 cache_rbio_pages(rbio);
2007 else 1972 else
2008 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 1973 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -2156,15 +2121,16 @@ cleanup:
2156 * of the drive. 2121 * of the drive.
2157 */ 2122 */
2158int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, 2123int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2159 struct btrfs_bio *bbio, u64 *raid_map, 2124 struct btrfs_bio *bbio, u64 stripe_len,
2160 u64 stripe_len, int mirror_num, int generic_io) 2125 int mirror_num, int generic_io)
2161{ 2126{
2162 struct btrfs_raid_bio *rbio; 2127 struct btrfs_raid_bio *rbio;
2163 int ret; 2128 int ret;
2164 2129
2165 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2130 rbio = alloc_rbio(root, bbio, stripe_len);
2166 if (IS_ERR(rbio)) { 2131 if (IS_ERR(rbio)) {
2167 __free_bbio_and_raid_map(bbio, raid_map, generic_io); 2132 if (generic_io)
2133 btrfs_put_bbio(bbio);
2168 return PTR_ERR(rbio); 2134 return PTR_ERR(rbio);
2169 } 2135 }
2170 2136
@@ -2175,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2175 rbio->faila = find_logical_bio_stripe(rbio, bio); 2141 rbio->faila = find_logical_bio_stripe(rbio, bio);
2176 if (rbio->faila == -1) { 2142 if (rbio->faila == -1) {
2177 BUG(); 2143 BUG();
2178 __free_bbio_and_raid_map(bbio, raid_map, generic_io); 2144 if (generic_io)
2145 btrfs_put_bbio(bbio);
2179 kfree(rbio); 2146 kfree(rbio);
2180 return -EIO; 2147 return -EIO;
2181 } 2148 }
@@ -2184,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2184 btrfs_bio_counter_inc_noblocked(root->fs_info); 2151 btrfs_bio_counter_inc_noblocked(root->fs_info);
2185 rbio->generic_bio_cnt = 1; 2152 rbio->generic_bio_cnt = 1;
2186 } else { 2153 } else {
2187 set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); 2154 btrfs_get_bbio(bbio);
2188 } 2155 }
2189 2156
2190 /* 2157 /*
@@ -2240,14 +2207,14 @@ static void read_rebuild_work(struct btrfs_work *work)
2240 2207
2241struct btrfs_raid_bio * 2208struct btrfs_raid_bio *
2242raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, 2209raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
2243 struct btrfs_bio *bbio, u64 *raid_map, 2210 struct btrfs_bio *bbio, u64 stripe_len,
2244 u64 stripe_len, struct btrfs_device *scrub_dev, 2211 struct btrfs_device *scrub_dev,
2245 unsigned long *dbitmap, int stripe_nsectors) 2212 unsigned long *dbitmap, int stripe_nsectors)
2246{ 2213{
2247 struct btrfs_raid_bio *rbio; 2214 struct btrfs_raid_bio *rbio;
2248 int i; 2215 int i;
2249 2216
2250 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2217 rbio = alloc_rbio(root, bbio, stripe_len);
2251 if (IS_ERR(rbio)) 2218 if (IS_ERR(rbio))
2252 return NULL; 2219 return NULL;
2253 bio_list_add(&rbio->bio_list, bio); 2220 bio_list_add(&rbio->bio_list, bio);
@@ -2279,10 +2246,10 @@ void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
2279 int stripe_offset; 2246 int stripe_offset;
2280 int index; 2247 int index;
2281 2248
2282 ASSERT(logical >= rbio->raid_map[0]); 2249 ASSERT(logical >= rbio->bbio->raid_map[0]);
2283 ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + 2250 ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
2284 rbio->stripe_len * rbio->nr_data); 2251 rbio->stripe_len * rbio->nr_data);
2285 stripe_offset = (int)(logical - rbio->raid_map[0]); 2252 stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
2286 index = stripe_offset >> PAGE_CACHE_SHIFT; 2253 index = stripe_offset >> PAGE_CACHE_SHIFT;
2287 rbio->bio_pages[index] = page; 2254 rbio->bio_pages[index] = page;
2288} 2255}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 31d4a157b5e3..2b5d7977d83b 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -43,16 +43,15 @@ struct btrfs_raid_bio;
43struct btrfs_device; 43struct btrfs_device;
44 44
45int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, 45int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
46 struct btrfs_bio *bbio, u64 *raid_map, 46 struct btrfs_bio *bbio, u64 stripe_len,
47 u64 stripe_len, int mirror_num, int generic_io); 47 int mirror_num, int generic_io);
48int raid56_parity_write(struct btrfs_root *root, struct bio *bio, 48int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
49 struct btrfs_bio *bbio, u64 *raid_map, 49 struct btrfs_bio *bbio, u64 stripe_len);
50 u64 stripe_len);
51 50
52struct btrfs_raid_bio * 51struct btrfs_raid_bio *
53raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, 52raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
54 struct btrfs_bio *bbio, u64 *raid_map, 53 struct btrfs_bio *bbio, u64 stripe_len,
55 u64 stripe_len, struct btrfs_device *scrub_dev, 54 struct btrfs_device *scrub_dev,
56 unsigned long *dbitmap, int stripe_nsectors); 55 unsigned long *dbitmap, int stripe_nsectors);
57void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, 56void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
58 struct page *page, u64 logical); 57 struct page *page, u64 logical);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index b63ae20618fb..0e7beea92b4c 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -66,7 +66,6 @@ struct reada_extctl {
66struct reada_extent { 66struct reada_extent {
67 u64 logical; 67 u64 logical;
68 struct btrfs_key top; 68 struct btrfs_key top;
69 u32 blocksize;
70 int err; 69 int err;
71 struct list_head extctl; 70 struct list_head extctl;
72 int refcnt; 71 int refcnt;
@@ -349,7 +348,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
349 348
350 blocksize = root->nodesize; 349 blocksize = root->nodesize;
351 re->logical = logical; 350 re->logical = logical;
352 re->blocksize = blocksize;
353 re->top = *top; 351 re->top = *top;
354 INIT_LIST_HEAD(&re->extctl); 352 INIT_LIST_HEAD(&re->extctl);
355 spin_lock_init(&re->lock); 353 spin_lock_init(&re->lock);
@@ -463,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
463 spin_unlock(&fs_info->reada_lock); 461 spin_unlock(&fs_info->reada_lock);
464 btrfs_dev_replace_unlock(&fs_info->dev_replace); 462 btrfs_dev_replace_unlock(&fs_info->dev_replace);
465 463
466 kfree(bbio); 464 btrfs_put_bbio(bbio);
467 return re; 465 return re;
468 466
469error: 467error:
@@ -488,7 +486,7 @@ error:
488 kref_put(&zone->refcnt, reada_zone_release); 486 kref_put(&zone->refcnt, reada_zone_release);
489 spin_unlock(&fs_info->reada_lock); 487 spin_unlock(&fs_info->reada_lock);
490 } 488 }
491 kfree(bbio); 489 btrfs_put_bbio(bbio);
492 kfree(re); 490 kfree(re);
493 return re_exist; 491 return re_exist;
494} 492}
@@ -660,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
660 int mirror_num = 0; 658 int mirror_num = 0;
661 struct extent_buffer *eb = NULL; 659 struct extent_buffer *eb = NULL;
662 u64 logical; 660 u64 logical;
663 u32 blocksize;
664 int ret; 661 int ret;
665 int i; 662 int i;
666 int need_kick = 0; 663 int need_kick = 0;
@@ -694,7 +691,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
694 spin_unlock(&fs_info->reada_lock); 691 spin_unlock(&fs_info->reada_lock);
695 return 0; 692 return 0;
696 } 693 }
697 dev->reada_next = re->logical + re->blocksize; 694 dev->reada_next = re->logical + fs_info->tree_root->nodesize;
698 re->refcnt++; 695 re->refcnt++;
699 696
700 spin_unlock(&fs_info->reada_lock); 697 spin_unlock(&fs_info->reada_lock);
@@ -709,7 +706,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
709 } 706 }
710 } 707 }
711 logical = re->logical; 708 logical = re->logical;
712 blocksize = re->blocksize;
713 709
714 spin_lock(&re->lock); 710 spin_lock(&re->lock);
715 if (re->scheduled_for == NULL) { 711 if (re->scheduled_for == NULL) {
@@ -724,8 +720,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
724 return 0; 720 return 0;
725 721
726 atomic_inc(&dev->reada_in_flight); 722 atomic_inc(&dev->reada_in_flight);
727 ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, 723 ret = reada_tree_block_flagged(fs_info->extent_root, logical,
728 mirror_num, &eb); 724 mirror_num, &eb);
729 if (ret) 725 if (ret)
730 __readahead_hook(fs_info->extent_root, NULL, logical, ret); 726 __readahead_hook(fs_info->extent_root, NULL, logical, ret);
731 else if (eb) 727 else if (eb)
@@ -851,7 +847,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
851 break; 847 break;
852 printk(KERN_DEBUG 848 printk(KERN_DEBUG
853 " re: logical %llu size %u empty %d for %lld", 849 " re: logical %llu size %u empty %d for %lld",
854 re->logical, re->blocksize, 850 re->logical, fs_info->tree_root->nodesize,
855 list_empty(&re->extctl), re->scheduled_for ? 851 list_empty(&re->extctl), re->scheduled_for ?
856 re->scheduled_for->devid : -1); 852 re->scheduled_for->devid : -1);
857 853
@@ -886,7 +882,8 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
886 } 882 }
887 printk(KERN_DEBUG 883 printk(KERN_DEBUG
888 "re: logical %llu size %u list empty %d for %lld", 884 "re: logical %llu size %u list empty %d for %lld",
889 re->logical, re->blocksize, list_empty(&re->extctl), 885 re->logical, fs_info->tree_root->nodesize,
886 list_empty(&re->extctl),
890 re->scheduled_for ? re->scheduled_for->devid : -1); 887 re->scheduled_for ? re->scheduled_for->devid : -1);
891 for (i = 0; i < re->nzones; ++i) { 888 for (i = 0; i < re->nzones; ++i) {
892 printk(KERN_CONT " zone %llu-%llu devs", 889 printk(KERN_CONT " zone %llu-%llu devs",
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 74257d6436ad..d83085381bcc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2855,9 +2855,10 @@ static void update_processed_blocks(struct reloc_control *rc,
2855 } 2855 }
2856} 2856}
2857 2857
2858static int tree_block_processed(u64 bytenr, u32 blocksize, 2858static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2859 struct reloc_control *rc)
2860{ 2859{
2860 u32 blocksize = rc->extent_root->nodesize;
2861
2861 if (test_range_bit(&rc->processed_blocks, bytenr, 2862 if (test_range_bit(&rc->processed_blocks, bytenr,
2862 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2863 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2863 return 1; 2864 return 1;
@@ -2965,8 +2966,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2965 while (rb_node) { 2966 while (rb_node) {
2966 block = rb_entry(rb_node, struct tree_block, rb_node); 2967 block = rb_entry(rb_node, struct tree_block, rb_node);
2967 if (!block->key_ready) 2968 if (!block->key_ready)
2968 readahead_tree_block(rc->extent_root, block->bytenr, 2969 readahead_tree_block(rc->extent_root, block->bytenr);
2969 block->key.objectid);
2970 rb_node = rb_next(rb_node); 2970 rb_node = rb_next(rb_node);
2971 } 2971 }
2972 2972
@@ -3353,7 +3353,7 @@ static int __add_tree_block(struct reloc_control *rc,
3353 bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, 3353 bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info,
3354 SKINNY_METADATA); 3354 SKINNY_METADATA);
3355 3355
3356 if (tree_block_processed(bytenr, blocksize, rc)) 3356 if (tree_block_processed(bytenr, rc))
3357 return 0; 3357 return 0;
3358 3358
3359 if (tree_search(blocks, bytenr)) 3359 if (tree_search(blocks, bytenr))
@@ -3611,7 +3611,7 @@ static int find_data_references(struct reloc_control *rc,
3611 if (added) 3611 if (added)
3612 goto next; 3612 goto next;
3613 3613
3614 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 3614 if (!tree_block_processed(leaf->start, rc)) {
3615 block = kmalloc(sizeof(*block), GFP_NOFS); 3615 block = kmalloc(sizeof(*block), GFP_NOFS);
3616 if (!block) { 3616 if (!block) {
3617 err = -ENOMEM; 3617 err = -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f2bb13a23f86..ec57687c9a4d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -66,7 +66,6 @@ struct scrub_ctx;
66struct scrub_recover { 66struct scrub_recover {
67 atomic_t refs; 67 atomic_t refs;
68 struct btrfs_bio *bbio; 68 struct btrfs_bio *bbio;
69 u64 *raid_map;
70 u64 map_length; 69 u64 map_length;
71}; 70};
72 71
@@ -80,7 +79,7 @@ struct scrub_page {
80 u64 logical; 79 u64 logical;
81 u64 physical; 80 u64 physical;
82 u64 physical_for_dev_replace; 81 u64 physical_for_dev_replace;
83 atomic_t ref_count; 82 atomic_t refs;
84 struct { 83 struct {
85 unsigned int mirror_num:8; 84 unsigned int mirror_num:8;
86 unsigned int have_csum:1; 85 unsigned int have_csum:1;
@@ -113,7 +112,7 @@ struct scrub_block {
113 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; 112 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
114 int page_count; 113 int page_count;
115 atomic_t outstanding_pages; 114 atomic_t outstanding_pages;
116 atomic_t ref_count; /* free mem on transition to zero */ 115 atomic_t refs; /* free mem on transition to zero */
117 struct scrub_ctx *sctx; 116 struct scrub_ctx *sctx;
118 struct scrub_parity *sparity; 117 struct scrub_parity *sparity;
119 struct { 118 struct {
@@ -142,7 +141,7 @@ struct scrub_parity {
142 141
143 int stripe_len; 142 int stripe_len;
144 143
145 atomic_t ref_count; 144 atomic_t refs;
146 145
147 struct list_head spages; 146 struct list_head spages;
148 147
@@ -194,6 +193,15 @@ struct scrub_ctx {
194 */ 193 */
195 struct btrfs_scrub_progress stat; 194 struct btrfs_scrub_progress stat;
196 spinlock_t stat_lock; 195 spinlock_t stat_lock;
196
197 /*
198 * Use a ref counter to avoid use-after-free issues. Scrub workers
199 * decrement bios_in_flight and workers_pending and then do a wakeup
200 * on the list_wait wait queue. We must ensure the main scrub task
201 * doesn't free the scrub context before or while the workers are
202 * doing the wakeup() call.
203 */
204 atomic_t refs;
197}; 205};
198 206
199struct scrub_fixup_nodatasum { 207struct scrub_fixup_nodatasum {
@@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
236static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); 244static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
237static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); 245static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
238static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); 246static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
239static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 247static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
240 struct btrfs_fs_info *fs_info,
241 struct scrub_block *original_sblock,
242 u64 length, u64 logical,
243 struct scrub_block *sblocks_for_recheck); 248 struct scrub_block *sblocks_for_recheck);
244static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 249static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
245 struct scrub_block *sblock, int is_metadata, 250 struct scrub_block *sblock, int is_metadata,
@@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
251 const u8 *csum, u64 generation, 256 const u8 *csum, u64 generation,
252 u16 csum_size); 257 u16 csum_size);
253static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 258static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
254 struct scrub_block *sblock_good, 259 struct scrub_block *sblock_good);
255 int force_write);
256static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, 260static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
257 struct scrub_block *sblock_good, 261 struct scrub_block *sblock_good,
258 int page_num, int force_write); 262 int page_num, int force_write);
@@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
302static void copy_nocow_pages_worker(struct btrfs_work *work); 306static void copy_nocow_pages_worker(struct btrfs_work *work);
303static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 307static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 308static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
309static void scrub_put_ctx(struct scrub_ctx *sctx);
305 310
306 311
307static void scrub_pending_bio_inc(struct scrub_ctx *sctx) 312static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
308{ 313{
314 atomic_inc(&sctx->refs);
309 atomic_inc(&sctx->bios_in_flight); 315 atomic_inc(&sctx->bios_in_flight);
310} 316}
311 317
@@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
313{ 319{
314 atomic_dec(&sctx->bios_in_flight); 320 atomic_dec(&sctx->bios_in_flight);
315 wake_up(&sctx->list_wait); 321 wake_up(&sctx->list_wait);
322 scrub_put_ctx(sctx);
316} 323}
317 324
318static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) 325static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
@@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
346{ 353{
347 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; 354 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
348 355
356 atomic_inc(&sctx->refs);
349 /* 357 /*
350 * increment scrubs_running to prevent cancel requests from 358 * increment scrubs_running to prevent cancel requests from
351 * completing as long as a worker is running. we must also 359 * completing as long as a worker is running. we must also
@@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
388 atomic_dec(&sctx->workers_pending); 396 atomic_dec(&sctx->workers_pending);
389 wake_up(&fs_info->scrub_pause_wait); 397 wake_up(&fs_info->scrub_pause_wait);
390 wake_up(&sctx->list_wait); 398 wake_up(&sctx->list_wait);
399 scrub_put_ctx(sctx);
391} 400}
392 401
393static void scrub_free_csums(struct scrub_ctx *sctx) 402static void scrub_free_csums(struct scrub_ctx *sctx)
@@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
433 kfree(sctx); 442 kfree(sctx);
434} 443}
435 444
445static void scrub_put_ctx(struct scrub_ctx *sctx)
446{
447 if (atomic_dec_and_test(&sctx->refs))
448 scrub_free_ctx(sctx);
449}
450
436static noinline_for_stack 451static noinline_for_stack
437struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) 452struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
438{ 453{
@@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
457 sctx = kzalloc(sizeof(*sctx), GFP_NOFS); 472 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
458 if (!sctx) 473 if (!sctx)
459 goto nomem; 474 goto nomem;
475 atomic_set(&sctx->refs, 1);
460 sctx->is_dev_replace = is_dev_replace; 476 sctx->is_dev_replace = is_dev_replace;
461 sctx->pages_per_rd_bio = pages_per_rd_bio; 477 sctx->pages_per_rd_bio = pages_per_rd_bio;
462 sctx->curr = -1; 478 sctx->curr = -1;
@@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
520 struct inode_fs_paths *ipath = NULL; 536 struct inode_fs_paths *ipath = NULL;
521 struct btrfs_root *local_root; 537 struct btrfs_root *local_root;
522 struct btrfs_key root_key; 538 struct btrfs_key root_key;
539 struct btrfs_key key;
523 540
524 root_key.objectid = root; 541 root_key.objectid = root;
525 root_key.type = BTRFS_ROOT_ITEM_KEY; 542 root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
530 goto err; 547 goto err;
531 } 548 }
532 549
533 ret = inode_item_info(inum, 0, local_root, swarn->path); 550 /*
551 * this makes the path point to (inum INODE_ITEM ioff)
552 */
553 key.objectid = inum;
554 key.type = BTRFS_INODE_ITEM_KEY;
555 key.offset = 0;
556
557 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
534 if (ret) { 558 if (ret) {
535 btrfs_release_path(swarn->path); 559 btrfs_release_path(swarn->path);
536 goto err; 560 goto err;
@@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
848static inline void scrub_put_recover(struct scrub_recover *recover) 872static inline void scrub_put_recover(struct scrub_recover *recover)
849{ 873{
850 if (atomic_dec_and_test(&recover->refs)) { 874 if (atomic_dec_and_test(&recover->refs)) {
851 kfree(recover->bbio); 875 btrfs_put_bbio(recover->bbio);
852 kfree(recover->raid_map);
853 kfree(recover); 876 kfree(recover);
854 } 877 }
855} 878}
@@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
955 } 978 }
956 979
957 /* setup the context, map the logical blocks and alloc the pages */ 980 /* setup the context, map the logical blocks and alloc the pages */
958 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, 981 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
959 logical, sblocks_for_recheck);
960 if (ret) { 982 if (ret) {
961 spin_lock(&sctx->stat_lock); 983 spin_lock(&sctx->stat_lock);
962 sctx->stat.read_errors++; 984 sctx->stat.read_errors++;
@@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
1030 if (!is_metadata && !have_csum) { 1052 if (!is_metadata && !have_csum) {
1031 struct scrub_fixup_nodatasum *fixup_nodatasum; 1053 struct scrub_fixup_nodatasum *fixup_nodatasum;
1032 1054
1033nodatasum_case:
1034 WARN_ON(sctx->is_dev_replace); 1055 WARN_ON(sctx->is_dev_replace);
1035 1056
1057nodatasum_case:
1058
1036 /* 1059 /*
1037 * !is_metadata and !have_csum, this means that the data 1060 * !is_metadata and !have_csum, this means that the data
1038 * might not be COW'ed, that it might be modified 1061 * might not be COW'ed, that it might be modified
@@ -1091,76 +1114,20 @@ nodatasum_case:
1091 sblock_other->no_io_error_seen) { 1114 sblock_other->no_io_error_seen) {
1092 if (sctx->is_dev_replace) { 1115 if (sctx->is_dev_replace) {
1093 scrub_write_block_to_dev_replace(sblock_other); 1116 scrub_write_block_to_dev_replace(sblock_other);
1117 goto corrected_error;
1094 } else { 1118 } else {
1095 int force_write = is_metadata || have_csum;
1096
1097 ret = scrub_repair_block_from_good_copy( 1119 ret = scrub_repair_block_from_good_copy(
1098 sblock_bad, sblock_other, 1120 sblock_bad, sblock_other);
1099 force_write); 1121 if (!ret)
1122 goto corrected_error;
1100 } 1123 }
1101 if (0 == ret)
1102 goto corrected_error;
1103 } 1124 }
1104 } 1125 }
1105 1126
1106 /* 1127 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1107 * for dev_replace, pick good pages and write to the target device. 1128 goto did_not_correct_error;
1108 */
1109 if (sctx->is_dev_replace) {
1110 success = 1;
1111 for (page_num = 0; page_num < sblock_bad->page_count;
1112 page_num++) {
1113 int sub_success;
1114
1115 sub_success = 0;
1116 for (mirror_index = 0;
1117 mirror_index < BTRFS_MAX_MIRRORS &&
1118 sblocks_for_recheck[mirror_index].page_count > 0;
1119 mirror_index++) {
1120 struct scrub_block *sblock_other =
1121 sblocks_for_recheck + mirror_index;
1122 struct scrub_page *page_other =
1123 sblock_other->pagev[page_num];
1124
1125 if (!page_other->io_error) {
1126 ret = scrub_write_page_to_dev_replace(
1127 sblock_other, page_num);
1128 if (ret == 0) {
1129 /* succeeded for this page */
1130 sub_success = 1;
1131 break;
1132 } else {
1133 btrfs_dev_replace_stats_inc(
1134 &sctx->dev_root->
1135 fs_info->dev_replace.
1136 num_write_errors);
1137 }
1138 }
1139 }
1140
1141 if (!sub_success) {
1142 /*
1143 * did not find a mirror to fetch the page
1144 * from. scrub_write_page_to_dev_replace()
1145 * handles this case (page->io_error), by
1146 * filling the block with zeros before
1147 * submitting the write request
1148 */
1149 success = 0;
1150 ret = scrub_write_page_to_dev_replace(
1151 sblock_bad, page_num);
1152 if (ret)
1153 btrfs_dev_replace_stats_inc(
1154 &sctx->dev_root->fs_info->
1155 dev_replace.num_write_errors);
1156 }
1157 }
1158
1159 goto out;
1160 }
1161 1129
1162 /* 1130 /*
1163 * for regular scrub, repair those pages that are errored.
1164 * In case of I/O errors in the area that is supposed to be 1131 * In case of I/O errors in the area that is supposed to be
1165 * repaired, continue by picking good copies of those pages. 1132 * repaired, continue by picking good copies of those pages.
1166 * Select the good pages from mirrors to rewrite bad pages from 1133 * Select the good pages from mirrors to rewrite bad pages from
@@ -1184,44 +1151,64 @@ nodatasum_case:
1184 * mirror, even if other 512 byte sectors in the same PAGE_SIZE 1151 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
1185 * area are unreadable. 1152 * area are unreadable.
1186 */ 1153 */
1187
1188 /* can only fix I/O errors from here on */
1189 if (sblock_bad->no_io_error_seen)
1190 goto did_not_correct_error;
1191
1192 success = 1; 1154 success = 1;
1193 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { 1155 for (page_num = 0; page_num < sblock_bad->page_count;
1156 page_num++) {
1194 struct scrub_page *page_bad = sblock_bad->pagev[page_num]; 1157 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1158 struct scrub_block *sblock_other = NULL;
1195 1159
1196 if (!page_bad->io_error) 1160 /* skip no-io-error page in scrub */
1161 if (!page_bad->io_error && !sctx->is_dev_replace)
1197 continue; 1162 continue;
1198 1163
1199 for (mirror_index = 0; 1164 /* try to find no-io-error page in mirrors */
1200 mirror_index < BTRFS_MAX_MIRRORS && 1165 if (page_bad->io_error) {
1201 sblocks_for_recheck[mirror_index].page_count > 0; 1166 for (mirror_index = 0;
1202 mirror_index++) { 1167 mirror_index < BTRFS_MAX_MIRRORS &&
1203 struct scrub_block *sblock_other = sblocks_for_recheck + 1168 sblocks_for_recheck[mirror_index].page_count > 0;
1204 mirror_index; 1169 mirror_index++) {
1205 struct scrub_page *page_other = sblock_other->pagev[ 1170 if (!sblocks_for_recheck[mirror_index].
1206 page_num]; 1171 pagev[page_num]->io_error) {
1207 1172 sblock_other = sblocks_for_recheck +
1208 if (!page_other->io_error) { 1173 mirror_index;
1209 ret = scrub_repair_page_from_good_copy( 1174 break;
1210 sblock_bad, sblock_other, page_num, 0);
1211 if (0 == ret) {
1212 page_bad->io_error = 0;
1213 break; /* succeeded for this page */
1214 } 1175 }
1215 } 1176 }
1177 if (!sblock_other)
1178 success = 0;
1216 } 1179 }
1217 1180
1218 if (page_bad->io_error) { 1181 if (sctx->is_dev_replace) {
1219 /* did not find a mirror to copy the page from */ 1182 /*
1220 success = 0; 1183 * did not find a mirror to fetch the page
1184 * from. scrub_write_page_to_dev_replace()
1185 * handles this case (page->io_error), by
1186 * filling the block with zeros before
1187 * submitting the write request
1188 */
1189 if (!sblock_other)
1190 sblock_other = sblock_bad;
1191
1192 if (scrub_write_page_to_dev_replace(sblock_other,
1193 page_num) != 0) {
1194 btrfs_dev_replace_stats_inc(
1195 &sctx->dev_root->
1196 fs_info->dev_replace.
1197 num_write_errors);
1198 success = 0;
1199 }
1200 } else if (sblock_other) {
1201 ret = scrub_repair_page_from_good_copy(sblock_bad,
1202 sblock_other,
1203 page_num, 0);
1204 if (0 == ret)
1205 page_bad->io_error = 0;
1206 else
1207 success = 0;
1221 } 1208 }
1222 } 1209 }
1223 1210
1224 if (success) { 1211 if (success && !sctx->is_dev_replace) {
1225 if (is_metadata || have_csum) { 1212 if (is_metadata || have_csum) {
1226 /* 1213 /*
1227 * need to verify the checksum now that all 1214 * need to verify the checksum now that all
@@ -1288,19 +1275,18 @@ out:
1288 return 0; 1275 return 0;
1289} 1276}
1290 1277
1291static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) 1278static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1292{ 1279{
1293 if (raid_map) { 1280 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1294 if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) 1281 return 2;
1295 return 3; 1282 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1296 else 1283 return 3;
1297 return 2; 1284 else
1298 } else {
1299 return (int)bbio->num_stripes; 1285 return (int)bbio->num_stripes;
1300 }
1301} 1286}
1302 1287
1303static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, 1288static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1289 u64 *raid_map,
1304 u64 mapped_length, 1290 u64 mapped_length,
1305 int nstripes, int mirror, 1291 int nstripes, int mirror,
1306 int *stripe_index, 1292 int *stripe_index,
@@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1308{ 1294{
1309 int i; 1295 int i;
1310 1296
1311 if (raid_map) { 1297 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1312 /* RAID5/6 */ 1298 /* RAID5/6 */
1313 for (i = 0; i < nstripes; i++) { 1299 for (i = 0; i < nstripes; i++) {
1314 if (raid_map[i] == RAID6_Q_STRIPE || 1300 if (raid_map[i] == RAID6_Q_STRIPE ||
@@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1329 } 1315 }
1330} 1316}
1331 1317
1332static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 1318static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1333 struct btrfs_fs_info *fs_info,
1334 struct scrub_block *original_sblock,
1335 u64 length, u64 logical,
1336 struct scrub_block *sblocks_for_recheck) 1319 struct scrub_block *sblocks_for_recheck)
1337{ 1320{
1321 struct scrub_ctx *sctx = original_sblock->sctx;
1322 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1323 u64 length = original_sblock->page_count * PAGE_SIZE;
1324 u64 logical = original_sblock->pagev[0]->logical;
1338 struct scrub_recover *recover; 1325 struct scrub_recover *recover;
1339 struct btrfs_bio *bbio; 1326 struct btrfs_bio *bbio;
1340 u64 *raid_map;
1341 u64 sublen; 1327 u64 sublen;
1342 u64 mapped_length; 1328 u64 mapped_length;
1343 u64 stripe_offset; 1329 u64 stripe_offset;
1344 int stripe_index; 1330 int stripe_index;
1345 int page_index; 1331 int page_index = 0;
1346 int mirror_index; 1332 int mirror_index;
1347 int nmirrors; 1333 int nmirrors;
1348 int ret; 1334 int ret;
1349 1335
1350 /* 1336 /*
1351 * note: the two members ref_count and outstanding_pages 1337 * note: the two members refs and outstanding_pages
1352 * are not used (and not set) in the blocks that are used for 1338 * are not used (and not set) in the blocks that are used for
1353 * the recheck procedure 1339 * the recheck procedure
1354 */ 1340 */
1355 1341
1356 page_index = 0;
1357 while (length > 0) { 1342 while (length > 0) {
1358 sublen = min_t(u64, length, PAGE_SIZE); 1343 sublen = min_t(u64, length, PAGE_SIZE);
1359 mapped_length = sublen; 1344 mapped_length = sublen;
1360 bbio = NULL; 1345 bbio = NULL;
1361 raid_map = NULL;
1362 1346
1363 /* 1347 /*
1364 * with a length of PAGE_SIZE, each returned stripe 1348 * with a length of PAGE_SIZE, each returned stripe
1365 * represents one mirror 1349 * represents one mirror
1366 */ 1350 */
1367 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, 1351 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1368 &mapped_length, &bbio, 0, &raid_map); 1352 &mapped_length, &bbio, 0, 1);
1369 if (ret || !bbio || mapped_length < sublen) { 1353 if (ret || !bbio || mapped_length < sublen) {
1370 kfree(bbio); 1354 btrfs_put_bbio(bbio);
1371 kfree(raid_map);
1372 return -EIO; 1355 return -EIO;
1373 } 1356 }
1374 1357
1375 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); 1358 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1376 if (!recover) { 1359 if (!recover) {
1377 kfree(bbio); 1360 btrfs_put_bbio(bbio);
1378 kfree(raid_map);
1379 return -ENOMEM; 1361 return -ENOMEM;
1380 } 1362 }
1381 1363
1382 atomic_set(&recover->refs, 1); 1364 atomic_set(&recover->refs, 1);
1383 recover->bbio = bbio; 1365 recover->bbio = bbio;
1384 recover->raid_map = raid_map;
1385 recover->map_length = mapped_length; 1366 recover->map_length = mapped_length;
1386 1367
1387 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); 1368 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1388 1369
1389 nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); 1370 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1371
1390 for (mirror_index = 0; mirror_index < nmirrors; 1372 for (mirror_index = 0; mirror_index < nmirrors;
1391 mirror_index++) { 1373 mirror_index++) {
1392 struct scrub_block *sblock; 1374 struct scrub_block *sblock;
1393 struct scrub_page *page; 1375 struct scrub_page *page;
1394 1376
1395 if (mirror_index >= BTRFS_MAX_MIRRORS)
1396 continue;
1397
1398 sblock = sblocks_for_recheck + mirror_index; 1377 sblock = sblocks_for_recheck + mirror_index;
1399 sblock->sctx = sctx; 1378 sblock->sctx = sctx;
1400 page = kzalloc(sizeof(*page), GFP_NOFS); 1379 page = kzalloc(sizeof(*page), GFP_NOFS);
@@ -1410,9 +1389,12 @@ leave_nomem:
1410 sblock->pagev[page_index] = page; 1389 sblock->pagev[page_index] = page;
1411 page->logical = logical; 1390 page->logical = logical;
1412 1391
1413 scrub_stripe_index_and_offset(logical, raid_map, 1392 scrub_stripe_index_and_offset(logical,
1393 bbio->map_type,
1394 bbio->raid_map,
1414 mapped_length, 1395 mapped_length,
1415 bbio->num_stripes, 1396 bbio->num_stripes -
1397 bbio->num_tgtdevs,
1416 mirror_index, 1398 mirror_index,
1417 &stripe_index, 1399 &stripe_index,
1418 &stripe_offset); 1400 &stripe_offset);
@@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error)
1458 1440
1459static inline int scrub_is_page_on_raid56(struct scrub_page *page) 1441static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1460{ 1442{
1461 return page->recover && page->recover->raid_map; 1443 return page->recover &&
1444 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
1462} 1445}
1463 1446
1464static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, 1447static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
@@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1475 bio->bi_end_io = scrub_bio_wait_endio; 1458 bio->bi_end_io = scrub_bio_wait_endio;
1476 1459
1477 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, 1460 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1478 page->recover->raid_map,
1479 page->recover->map_length, 1461 page->recover->map_length,
1480 page->mirror_num, 0); 1462 page->mirror_num, 0);
1481 if (ret) 1463 if (ret)
@@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1615} 1597}
1616 1598
1617static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 1599static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1618 struct scrub_block *sblock_good, 1600 struct scrub_block *sblock_good)
1619 int force_write)
1620{ 1601{
1621 int page_num; 1602 int page_num;
1622 int ret = 0; 1603 int ret = 0;
@@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1626 1607
1627 ret_sub = scrub_repair_page_from_good_copy(sblock_bad, 1608 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1628 sblock_good, 1609 sblock_good,
1629 page_num, 1610 page_num, 1);
1630 force_write);
1631 if (ret_sub) 1611 if (ret_sub)
1632 ret = ret_sub; 1612 ret = ret_sub;
1633 } 1613 }
@@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
2067 2047
2068static void scrub_block_get(struct scrub_block *sblock) 2048static void scrub_block_get(struct scrub_block *sblock)
2069{ 2049{
2070 atomic_inc(&sblock->ref_count); 2050 atomic_inc(&sblock->refs);
2071} 2051}
2072 2052
2073static void scrub_block_put(struct scrub_block *sblock) 2053static void scrub_block_put(struct scrub_block *sblock)
2074{ 2054{
2075 if (atomic_dec_and_test(&sblock->ref_count)) { 2055 if (atomic_dec_and_test(&sblock->refs)) {
2076 int i; 2056 int i;
2077 2057
2078 if (sblock->sparity) 2058 if (sblock->sparity)
@@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock)
2086 2066
2087static void scrub_page_get(struct scrub_page *spage) 2067static void scrub_page_get(struct scrub_page *spage)
2088{ 2068{
2089 atomic_inc(&spage->ref_count); 2069 atomic_inc(&spage->refs);
2090} 2070}
2091 2071
2092static void scrub_page_put(struct scrub_page *spage) 2072static void scrub_page_put(struct scrub_page *spage)
2093{ 2073{
2094 if (atomic_dec_and_test(&spage->ref_count)) { 2074 if (atomic_dec_and_test(&spage->refs)) {
2095 if (spage->page) 2075 if (spage->page)
2096 __free_page(spage->page); 2076 __free_page(spage->page);
2097 kfree(spage); 2077 kfree(spage);
@@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2217 2197
2218 /* one ref inside this function, plus one for each page added to 2198 /* one ref inside this function, plus one for each page added to
2219 * a bio later on */ 2199 * a bio later on */
2220 atomic_set(&sblock->ref_count, 1); 2200 atomic_set(&sblock->refs, 1);
2221 sblock->sctx = sctx; 2201 sblock->sctx = sctx;
2222 sblock->no_io_error_seen = 1; 2202 sblock->no_io_error_seen = 1;
2223 2203
@@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
2510 2490
2511 /* one ref inside this function, plus one for each page added to 2491 /* one ref inside this function, plus one for each page added to
2512 * a bio later on */ 2492 * a bio later on */
2513 atomic_set(&sblock->ref_count, 1); 2493 atomic_set(&sblock->refs, 1);
2514 sblock->sctx = sctx; 2494 sblock->sctx = sctx;
2515 sblock->no_io_error_seen = 1; 2495 sblock->no_io_error_seen = 1;
2516 sblock->sparity = sparity; 2496 sblock->sparity = sparity;
@@ -2607,9 +2587,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
2607 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, 2587 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2608 flags, gen, mirror_num, 2588 flags, gen, mirror_num,
2609 have_csum ? csum : NULL); 2589 have_csum ? csum : NULL);
2610skip:
2611 if (ret) 2590 if (ret)
2612 return ret; 2591 return ret;
2592skip:
2613 len -= l; 2593 len -= l;
2614 logical += l; 2594 logical += l;
2615 physical += l; 2595 physical += l;
@@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2705 struct btrfs_raid_bio *rbio; 2685 struct btrfs_raid_bio *rbio;
2706 struct scrub_page *spage; 2686 struct scrub_page *spage;
2707 struct btrfs_bio *bbio = NULL; 2687 struct btrfs_bio *bbio = NULL;
2708 u64 *raid_map = NULL;
2709 u64 length; 2688 u64 length;
2710 int ret; 2689 int ret;
2711 2690
@@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2716 length = sparity->logic_end - sparity->logic_start + 1; 2695 length = sparity->logic_end - sparity->logic_start + 1;
2717 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, 2696 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2718 sparity->logic_start, 2697 sparity->logic_start,
2719 &length, &bbio, 0, &raid_map); 2698 &length, &bbio, 0, 1);
2720 if (ret || !bbio || !raid_map) 2699 if (ret || !bbio || !bbio->raid_map)
2721 goto bbio_out; 2700 goto bbio_out;
2722 2701
2723 bio = btrfs_io_bio_alloc(GFP_NOFS, 0); 2702 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
@@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2729 bio->bi_end_io = scrub_parity_bio_endio; 2708 bio->bi_end_io = scrub_parity_bio_endio;
2730 2709
2731 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, 2710 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2732 raid_map, length, 2711 length, sparity->scrub_dev,
2733 sparity->scrub_dev,
2734 sparity->dbitmap, 2712 sparity->dbitmap,
2735 sparity->nsectors); 2713 sparity->nsectors);
2736 if (!rbio) 2714 if (!rbio)
@@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2747rbio_out: 2725rbio_out:
2748 bio_put(bio); 2726 bio_put(bio);
2749bbio_out: 2727bbio_out:
2750 kfree(bbio); 2728 btrfs_put_bbio(bbio);
2751 kfree(raid_map);
2752 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, 2729 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2753 sparity->nsectors); 2730 sparity->nsectors);
2754 spin_lock(&sctx->stat_lock); 2731 spin_lock(&sctx->stat_lock);
@@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
2765 2742
2766static void scrub_parity_get(struct scrub_parity *sparity) 2743static void scrub_parity_get(struct scrub_parity *sparity)
2767{ 2744{
2768 atomic_inc(&sparity->ref_count); 2745 atomic_inc(&sparity->refs);
2769} 2746}
2770 2747
2771static void scrub_parity_put(struct scrub_parity *sparity) 2748static void scrub_parity_put(struct scrub_parity *sparity)
2772{ 2749{
2773 if (!atomic_dec_and_test(&sparity->ref_count)) 2750 if (!atomic_dec_and_test(&sparity->refs))
2774 return; 2751 return;
2775 2752
2776 scrub_parity_check_and_repair(sparity); 2753 scrub_parity_check_and_repair(sparity);
@@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2820 sparity->scrub_dev = sdev; 2797 sparity->scrub_dev = sdev;
2821 sparity->logic_start = logic_start; 2798 sparity->logic_start = logic_start;
2822 sparity->logic_end = logic_end; 2799 sparity->logic_end = logic_end;
2823 atomic_set(&sparity->ref_count, 1); 2800 atomic_set(&sparity->refs, 1);
2824 INIT_LIST_HEAD(&sparity->spages); 2801 INIT_LIST_HEAD(&sparity->spages);
2825 sparity->dbitmap = sparity->bitmap; 2802 sparity->dbitmap = sparity->bitmap;
2826 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; 2803 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3037 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 3014 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3038 increment = map->stripe_len; 3015 increment = map->stripe_len;
3039 mirror_num = num % map->num_stripes + 1; 3016 mirror_num = num % map->num_stripes + 1;
3040 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3017 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3041 BTRFS_BLOCK_GROUP_RAID6)) {
3042 get_raid56_logic_offset(physical, num, map, &offset, NULL); 3018 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3043 increment = map->stripe_len * nr_data_stripes(map); 3019 increment = map->stripe_len * nr_data_stripes(map);
3044 mirror_num = 1; 3020 mirror_num = 1;
@@ -3053,7 +3029,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3053 3029
3054 ppath = btrfs_alloc_path(); 3030 ppath = btrfs_alloc_path();
3055 if (!ppath) { 3031 if (!ppath) {
3056 btrfs_free_path(ppath); 3032 btrfs_free_path(path);
3057 return -ENOMEM; 3033 return -ENOMEM;
3058 } 3034 }
3059 3035
@@ -3065,6 +3041,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3065 path->search_commit_root = 1; 3041 path->search_commit_root = 1;
3066 path->skip_locking = 1; 3042 path->skip_locking = 1;
3067 3043
3044 ppath->search_commit_root = 1;
3045 ppath->skip_locking = 1;
3068 /* 3046 /*
3069 * trigger the readahead for extent tree csum tree and wait for 3047 * trigger the readahead for extent tree csum tree and wait for
3070 * completion. During readahead, the scrub is officially paused 3048 * completion. During readahead, the scrub is officially paused
@@ -3072,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3072 */ 3050 */
3073 logical = base + offset; 3051 logical = base + offset;
3074 physical_end = physical + nstripes * map->stripe_len; 3052 physical_end = physical + nstripes * map->stripe_len;
3075 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3053 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3076 BTRFS_BLOCK_GROUP_RAID6)) {
3077 get_raid56_logic_offset(physical_end, num, 3054 get_raid56_logic_offset(physical_end, num,
3078 map, &logic_end, NULL); 3055 map, &logic_end, NULL);
3079 logic_end += base; 3056 logic_end += base;
@@ -3119,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3119 ret = 0; 3096 ret = 0;
3120 while (physical < physical_end) { 3097 while (physical < physical_end) {
3121 /* for raid56, we skip parity stripe */ 3098 /* for raid56, we skip parity stripe */
3122 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3099 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3123 BTRFS_BLOCK_GROUP_RAID6)) {
3124 ret = get_raid56_logic_offset(physical, num, 3100 ret = get_raid56_logic_offset(physical, num,
3125 map, &logical, &stripe_logical); 3101 map, &logical, &stripe_logical);
3126 logical += base; 3102 logical += base;
@@ -3278,8 +3254,7 @@ again:
3278 scrub_free_csums(sctx); 3254 scrub_free_csums(sctx);
3279 if (extent_logical + extent_len < 3255 if (extent_logical + extent_len <
3280 key.objectid + bytes) { 3256 key.objectid + bytes) {
3281 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3257 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3282 BTRFS_BLOCK_GROUP_RAID6)) {
3283 /* 3258 /*
3284 * loop until we find next data stripe 3259 * loop until we find next data stripe
3285 * or we have finished all stripes. 3260 * or we have finished all stripes.
@@ -3773,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3773 scrub_workers_put(fs_info); 3748 scrub_workers_put(fs_info);
3774 mutex_unlock(&fs_info->scrub_lock); 3749 mutex_unlock(&fs_info->scrub_lock);
3775 3750
3776 scrub_free_ctx(sctx); 3751 scrub_put_ctx(sctx);
3777 3752
3778 return ret; 3753 return ret;
3779} 3754}
@@ -3879,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3879 &mapped_length, &bbio, 0); 3854 &mapped_length, &bbio, 0);
3880 if (ret || !bbio || mapped_length < extent_len || 3855 if (ret || !bbio || mapped_length < extent_len ||
3881 !bbio->stripes[0].dev->bdev) { 3856 !bbio->stripes[0].dev->bdev) {
3882 kfree(bbio); 3857 btrfs_put_bbio(bbio);
3883 return; 3858 return;
3884 } 3859 }
3885 3860
3886 *extent_physical = bbio->stripes[0].physical; 3861 *extent_physical = bbio->stripes[0].physical;
3887 *extent_mirror_num = bbio->mirror_num; 3862 *extent_mirror_num = bbio->mirror_num;
3888 *extent_dev = bbio->stripes[0].dev; 3863 *extent_dev = bbio->stripes[0].dev;
3889 kfree(bbio); 3864 btrfs_put_bbio(bbio);
3890} 3865}
3891 3866
3892static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, 3867static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 804432dbc351..d6033f540cc7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
230 u64 parent_ino; 230 u64 parent_ino;
231 u64 ino; 231 u64 ino;
232 u64 gen; 232 u64 gen;
233 bool is_orphan;
233 struct list_head update_refs; 234 struct list_head update_refs;
234}; 235};
235 236
@@ -2471,12 +2472,9 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
2471 if (ret < 0) 2472 if (ret < 0)
2472 goto out; 2473 goto out;
2473 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2474 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2474 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, 2475 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2475 btrfs_inode_atime(ii)); 2476 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2476 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, 2477 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2477 btrfs_inode_mtime(ii));
2478 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2479 btrfs_inode_ctime(ii));
2480 /* TODO Add otime support when the otime patches get into upstream */ 2478 /* TODO Add otime support when the otime patches get into upstream */
2481 2479
2482 ret = send_cmd(sctx); 2480 ret = send_cmd(sctx);
@@ -2987,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2987 u64 ino_gen, 2985 u64 ino_gen,
2988 u64 parent_ino, 2986 u64 parent_ino,
2989 struct list_head *new_refs, 2987 struct list_head *new_refs,
2990 struct list_head *deleted_refs) 2988 struct list_head *deleted_refs,
2989 const bool is_orphan)
2991{ 2990{
2992 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2991 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2993 struct rb_node *parent = NULL; 2992 struct rb_node *parent = NULL;
@@ -3002,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
3002 pm->parent_ino = parent_ino; 3001 pm->parent_ino = parent_ino;
3003 pm->ino = ino; 3002 pm->ino = ino;
3004 pm->gen = ino_gen; 3003 pm->gen = ino_gen;
3004 pm->is_orphan = is_orphan;
3005 INIT_LIST_HEAD(&pm->list); 3005 INIT_LIST_HEAD(&pm->list);
3006 INIT_LIST_HEAD(&pm->update_refs); 3006 INIT_LIST_HEAD(&pm->update_refs);
3007 RB_CLEAR_NODE(&pm->node); 3007 RB_CLEAR_NODE(&pm->node);
@@ -3134,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3134 rmdir_ino = dm->rmdir_ino; 3134 rmdir_ino = dm->rmdir_ino;
3135 free_waiting_dir_move(sctx, dm); 3135 free_waiting_dir_move(sctx, dm);
3136 3136
3137 ret = get_first_ref(sctx->parent_root, pm->ino, 3137 if (pm->is_orphan) {
3138 &parent_ino, &parent_gen, name); 3138 ret = gen_unique_name(sctx, pm->ino,
3139 if (ret < 0) 3139 pm->gen, from_path);
3140 goto out; 3140 } else {
3141 3141 ret = get_first_ref(sctx->parent_root, pm->ino,
3142 ret = get_cur_path(sctx, parent_ino, parent_gen, 3142 &parent_ino, &parent_gen, name);
3143 from_path); 3143 if (ret < 0)
3144 if (ret < 0) 3144 goto out;
3145 goto out; 3145 ret = get_cur_path(sctx, parent_ino, parent_gen,
3146 ret = fs_path_add_path(from_path, name); 3146 from_path);
3147 if (ret < 0)
3148 goto out;
3149 ret = fs_path_add_path(from_path, name);
3150 }
3147 if (ret < 0) 3151 if (ret < 0)
3148 goto out; 3152 goto out;
3149 3153
@@ -3153,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3153 LIST_HEAD(deleted_refs); 3157 LIST_HEAD(deleted_refs);
3154 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); 3158 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3155 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, 3159 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3156 &pm->update_refs, &deleted_refs); 3160 &pm->update_refs, &deleted_refs,
3161 pm->is_orphan);
3157 if (ret < 0) 3162 if (ret < 0)
3158 goto out; 3163 goto out;
3159 if (rmdir_ino) { 3164 if (rmdir_ino) {
@@ -3286,6 +3291,127 @@ out:
3286 return ret; 3291 return ret;
3287} 3292}
3288 3293
3294/*
3295 * We might need to delay a directory rename even when no ancestor directory
3296 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3297 * renamed. This happens when we rename a directory to the old name (the name
3298 * in the parent root) of some other unrelated directory that got its rename
3299 * delayed due to some ancestor with higher number that got renamed.
3300 *
3301 * Example:
3302 *
3303 * Parent snapshot:
3304 * . (ino 256)
3305 * |---- a/ (ino 257)
3306 * | |---- file (ino 260)
3307 * |
3308 * |---- b/ (ino 258)
3309 * |---- c/ (ino 259)
3310 *
3311 * Send snapshot:
3312 * . (ino 256)
3313 * |---- a/ (ino 258)
3314 * |---- x/ (ino 259)
3315 * |---- y/ (ino 257)
3316 * |----- file (ino 260)
3317 *
3318 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3319 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3320 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3321 * must issue is:
3322 *
3323 * 1 - rename 259 from 'c' to 'x'
3324 * 2 - rename 257 from 'a' to 'x/y'
3325 * 3 - rename 258 from 'b' to 'a'
3326 *
3327 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3328 * be done right away and < 0 on error.
3329 */
3330static int wait_for_dest_dir_move(struct send_ctx *sctx,
3331 struct recorded_ref *parent_ref,
3332 const bool is_orphan)
3333{
3334 struct btrfs_path *path;
3335 struct btrfs_key key;
3336 struct btrfs_key di_key;
3337 struct btrfs_dir_item *di;
3338 u64 left_gen;
3339 u64 right_gen;
3340 int ret = 0;
3341
3342 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3343 return 0;
3344
3345 path = alloc_path_for_send();
3346 if (!path)
3347 return -ENOMEM;
3348
3349 key.objectid = parent_ref->dir;
3350 key.type = BTRFS_DIR_ITEM_KEY;
3351 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3352
3353 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3354 if (ret < 0) {
3355 goto out;
3356 } else if (ret > 0) {
3357 ret = 0;
3358 goto out;
3359 }
3360
3361 di = btrfs_match_dir_item_name(sctx->parent_root, path,
3362 parent_ref->name, parent_ref->name_len);
3363 if (!di) {
3364 ret = 0;
3365 goto out;
3366 }
3367 /*
3368 * di_key.objectid has the number of the inode that has a dentry in the
3369 * parent directory with the same name that sctx->cur_ino is being
3370 * renamed to. We need to check if that inode is in the send root as
3371 * well and if it is currently marked as an inode with a pending rename,
3372 * if it is, we need to delay the rename of sctx->cur_ino as well, so
3373 * that it happens after that other inode is renamed.
3374 */
3375 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3376 if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3377 ret = 0;
3378 goto out;
3379 }
3380
3381 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3382 &left_gen, NULL, NULL, NULL, NULL);
3383 if (ret < 0)
3384 goto out;
3385 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3386 &right_gen, NULL, NULL, NULL, NULL);
3387 if (ret < 0) {
3388 if (ret == -ENOENT)
3389 ret = 0;
3390 goto out;
3391 }
3392
3393 /* Different inode, no need to delay the rename of sctx->cur_ino */
3394 if (right_gen != left_gen) {
3395 ret = 0;
3396 goto out;
3397 }
3398
3399 if (is_waiting_for_move(sctx, di_key.objectid)) {
3400 ret = add_pending_dir_move(sctx,
3401 sctx->cur_ino,
3402 sctx->cur_inode_gen,
3403 di_key.objectid,
3404 &sctx->new_refs,
3405 &sctx->deleted_refs,
3406 is_orphan);
3407 if (!ret)
3408 ret = 1;
3409 }
3410out:
3411 btrfs_free_path(path);
3412 return ret;
3413}
3414
3289static int wait_for_parent_move(struct send_ctx *sctx, 3415static int wait_for_parent_move(struct send_ctx *sctx,
3290 struct recorded_ref *parent_ref) 3416 struct recorded_ref *parent_ref)
3291{ 3417{
@@ -3352,7 +3478,8 @@ out:
3352 sctx->cur_inode_gen, 3478 sctx->cur_inode_gen,
3353 ino, 3479 ino,
3354 &sctx->new_refs, 3480 &sctx->new_refs,
3355 &sctx->deleted_refs); 3481 &sctx->deleted_refs,
3482 false);
3356 if (!ret) 3483 if (!ret)
3357 ret = 1; 3484 ret = 1;
3358 } 3485 }
@@ -3375,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3375 int did_overwrite = 0; 3502 int did_overwrite = 0;
3376 int is_orphan = 0; 3503 int is_orphan = 0;
3377 u64 last_dir_ino_rm = 0; 3504 u64 last_dir_ino_rm = 0;
3505 bool can_rename = true;
3378 3506
3379verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3507verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3380 3508
@@ -3493,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3493 } 3621 }
3494 } 3622 }
3495 3623
3624 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3625 ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3626 if (ret < 0)
3627 goto out;
3628 if (ret == 1) {
3629 can_rename = false;
3630 *pending_move = 1;
3631 }
3632 }
3633
3496 /* 3634 /*
3497 * link/move the ref to the new place. If we have an orphan 3635 * link/move the ref to the new place. If we have an orphan
3498 * inode, move it and update valid_path. If not, link or move 3636 * inode, move it and update valid_path. If not, link or move
3499 * it depending on the inode mode. 3637 * it depending on the inode mode.
3500 */ 3638 */
3501 if (is_orphan) { 3639 if (is_orphan && can_rename) {
3502 ret = send_rename(sctx, valid_path, cur->full_path); 3640 ret = send_rename(sctx, valid_path, cur->full_path);
3503 if (ret < 0) 3641 if (ret < 0)
3504 goto out; 3642 goto out;
@@ -3506,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3506 ret = fs_path_copy(valid_path, cur->full_path); 3644 ret = fs_path_copy(valid_path, cur->full_path);
3507 if (ret < 0) 3645 if (ret < 0)
3508 goto out; 3646 goto out;
3509 } else { 3647 } else if (can_rename) {
3510 if (S_ISDIR(sctx->cur_inode_mode)) { 3648 if (S_ISDIR(sctx->cur_inode_mode)) {
3511 /* 3649 /*
3512 * Dirs can't be linked, so move it. For moved 3650 * Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 60f7cbe815e9..05fef198ff94 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1000,10 +1000,20 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
1000 */ 1000 */
1001 if (fs_info->pending_changes == 0) 1001 if (fs_info->pending_changes == 0)
1002 return 0; 1002 return 0;
1003 /*
1004 * A non-blocking test if the fs is frozen. We must not
1005 * start a new transaction here otherwise a deadlock
1006 * happens. The pending operations are delayed to the
1007 * next commit after thawing.
1008 */
1009 if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
1010 __sb_end_write(sb, SB_FREEZE_WRITE);
1011 else
1012 return 0;
1003 trans = btrfs_start_transaction(root, 0); 1013 trans = btrfs_start_transaction(root, 0);
1004 } else {
1005 return PTR_ERR(trans);
1006 } 1014 }
1015 if (IS_ERR(trans))
1016 return PTR_ERR(trans);
1007 } 1017 }
1008 return btrfs_commit_transaction(trans, root); 1018 return btrfs_commit_transaction(trans, root);
1009} 1019}
@@ -1948,11 +1958,6 @@ static int btrfs_freeze(struct super_block *sb)
1948 return btrfs_commit_transaction(trans, root); 1958 return btrfs_commit_transaction(trans, root);
1949} 1959}
1950 1960
1951static int btrfs_unfreeze(struct super_block *sb)
1952{
1953 return 0;
1954}
1955
1956static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 1961static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1957{ 1962{
1958 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 1963 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2001,7 +2006,6 @@ static const struct super_operations btrfs_super_ops = {
2001 .statfs = btrfs_statfs, 2006 .statfs = btrfs_statfs,
2002 .remount_fs = btrfs_remount, 2007 .remount_fs = btrfs_remount,
2003 .freeze_fs = btrfs_freeze, 2008 .freeze_fs = btrfs_freeze,
2004 .unfreeze_fs = btrfs_unfreeze,
2005}; 2009};
2006 2010
2007static const struct file_operations btrfs_ctl_fops = { 2011static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 92db3f648df4..94edb0a2a026 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -733,10 +733,18 @@ int btrfs_init_sysfs(void)
733 733
734 ret = btrfs_init_debugfs(); 734 ret = btrfs_init_debugfs();
735 if (ret) 735 if (ret)
736 return ret; 736 goto out1;
737 737
738 init_feature_attrs(); 738 init_feature_attrs();
739 ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); 739 ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
740 if (ret)
741 goto out2;
742
743 return 0;
744out2:
745 debugfs_remove_recursive(btrfs_debugfs_root_dentry);
746out1:
747 kset_unregister(btrfs_kset);
740 748
741 return ret; 749 return ret;
742} 750}
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index cc286ce97d1e..f51963a8f929 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -53,7 +53,7 @@ static int test_btrfs_split_item(void)
53 return -ENOMEM; 53 return -ENOMEM;
54 } 54 }
55 55
56 path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); 56 path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
57 if (!eb) { 57 if (!eb) {
58 test_msg("Could not allocate dummy buffer\n"); 58 test_msg("Could not allocate dummy buffer\n");
59 ret = -ENOMEM; 59 ret = -ENOMEM;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 7e99c2f98dd0..9e9f2368177d 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -258,8 +258,7 @@ static int test_find_delalloc(void)
258 } 258 }
259 ret = 0; 259 ret = 0;
260out_bits: 260out_bits:
261 clear_extent_bits(&tmp, 0, total_dirty - 1, 261 clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
262 (unsigned long)-1, GFP_NOFS);
263out: 262out:
264 if (locked_page) 263 if (locked_page)
265 page_cache_release(locked_page); 264 page_cache_release(locked_page);
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 3ae0f5b8bb80..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void)
255 goto out; 255 goto out;
256 } 256 }
257 257
258 root->node = alloc_dummy_extent_buffer(0, 4096); 258 root->node = alloc_dummy_extent_buffer(NULL, 4096);
259 if (!root->node) { 259 if (!root->node) {
260 test_msg("Couldn't allocate dummy buffer\n"); 260 test_msg("Couldn't allocate dummy buffer\n");
261 goto out; 261 goto out;
@@ -843,7 +843,7 @@ static int test_hole_first(void)
843 goto out; 843 goto out;
844 } 844 }
845 845
846 root->node = alloc_dummy_extent_buffer(0, 4096); 846 root->node = alloc_dummy_extent_buffer(NULL, 4096);
847 if (!root->node) { 847 if (!root->node) {
848 test_msg("Couldn't allocate dummy buffer\n"); 848 test_msg("Couldn't allocate dummy buffer\n");
849 goto out; 849 goto out;
@@ -911,6 +911,197 @@ out:
911 return ret; 911 return ret;
912} 912}
913 913
914static int test_extent_accounting(void)
915{
916 struct inode *inode = NULL;
917 struct btrfs_root *root = NULL;
918 int ret = -ENOMEM;
919
920 inode = btrfs_new_test_inode();
921 if (!inode) {
922 test_msg("Couldn't allocate inode\n");
923 return ret;
924 }
925
926 root = btrfs_alloc_dummy_root();
927 if (IS_ERR(root)) {
928 test_msg("Couldn't allocate root\n");
929 goto out;
930 }
931
932 root->fs_info = btrfs_alloc_dummy_fs_info();
933 if (!root->fs_info) {
934 test_msg("Couldn't allocate dummy fs info\n");
935 goto out;
936 }
937
938 BTRFS_I(inode)->root = root;
939 btrfs_test_inode_set_ops(inode);
940
941 /* [BTRFS_MAX_EXTENT_SIZE] */
942 BTRFS_I(inode)->outstanding_extents++;
943 ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
944 NULL);
945 if (ret) {
946 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
947 goto out;
948 }
949 if (BTRFS_I(inode)->outstanding_extents != 1) {
950 ret = -EINVAL;
951 test_msg("Miscount, wanted 1, got %u\n",
952 BTRFS_I(inode)->outstanding_extents);
953 goto out;
954 }
955
956 /* [BTRFS_MAX_EXTENT_SIZE][4k] */
957 BTRFS_I(inode)->outstanding_extents++;
958 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
959 BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
960 if (ret) {
961 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
962 goto out;
963 }
964 if (BTRFS_I(inode)->outstanding_extents != 2) {
965 ret = -EINVAL;
966 test_msg("Miscount, wanted 2, got %u\n",
967 BTRFS_I(inode)->outstanding_extents);
968 goto out;
969 }
970
971 /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
972 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
973 BTRFS_MAX_EXTENT_SIZE >> 1,
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS);
978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out;
981 }
982 if (BTRFS_I(inode)->outstanding_extents != 2) {
983 ret = -EINVAL;
984 test_msg("Miscount, wanted 2, got %u\n",
985 BTRFS_I(inode)->outstanding_extents);
986 goto out;
987 }
988
989 /* [BTRFS_MAX_EXTENT_SIZE][4K] */
990 BTRFS_I(inode)->outstanding_extents++;
991 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
992 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
993 NULL);
994 if (ret) {
995 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
996 goto out;
997 }
998 if (BTRFS_I(inode)->outstanding_extents != 2) {
999 ret = -EINVAL;
1000 test_msg("Miscount, wanted 2, got %u\n",
1001 BTRFS_I(inode)->outstanding_extents);
1002 goto out;
1003 }
1004
1005 /*
1006 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
1007 *
1008 * I'm artificially adding 2 to outstanding_extents because in the
1009 * buffered IO case we'd add things up as we go, but I don't feel like
1010 * doing that here, this isn't the interesting case we want to test.
1011 */
1012 BTRFS_I(inode)->outstanding_extents += 2;
1013 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
1014 (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
1015 NULL);
1016 if (ret) {
1017 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1018 goto out;
1019 }
1020 if (BTRFS_I(inode)->outstanding_extents != 4) {
1021 ret = -EINVAL;
1022 test_msg("Miscount, wanted 4, got %u\n",
1023 BTRFS_I(inode)->outstanding_extents);
1024 goto out;
1025 }
1026
1027 /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
1028 BTRFS_I(inode)->outstanding_extents++;
1029 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1030 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1031 if (ret) {
1032 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1033 goto out;
1034 }
1035 if (BTRFS_I(inode)->outstanding_extents != 3) {
1036 ret = -EINVAL;
1037 test_msg("Miscount, wanted 3, got %u\n",
1038 BTRFS_I(inode)->outstanding_extents);
1039 goto out;
1040 }
1041
1042 /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
1043 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
1044 BTRFS_MAX_EXTENT_SIZE+4096,
1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS);
1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out;
1052 }
1053 if (BTRFS_I(inode)->outstanding_extents != 4) {
1054 ret = -EINVAL;
1055 test_msg("Miscount, wanted 4, got %u\n",
1056 BTRFS_I(inode)->outstanding_extents);
1057 goto out;
1058 }
1059
1060 /*
1061 * Refill the hole again just for good measure, because I thought it
1062 * might fail and I'd rather satisfy my paranoia at this point.
1063 */
1064 BTRFS_I(inode)->outstanding_extents++;
1065 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1066 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1067 if (ret) {
1068 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1069 goto out;
1070 }
1071 if (BTRFS_I(inode)->outstanding_extents != 3) {
1072 ret = -EINVAL;
1073 test_msg("Miscount, wanted 3, got %u\n",
1074 BTRFS_I(inode)->outstanding_extents);
1075 goto out;
1076 }
1077
1078 /* Empty */
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS);
1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out;
1086 }
1087 if (BTRFS_I(inode)->outstanding_extents) {
1088 ret = -EINVAL;
1089 test_msg("Miscount, wanted 0, got %u\n",
1090 BTRFS_I(inode)->outstanding_extents);
1091 goto out;
1092 }
1093 ret = 0;
1094out:
1095 if (ret)
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS);
1100 iput(inode);
1101 btrfs_free_dummy_root(root);
1102 return ret;
1103}
1104
914int btrfs_test_inodes(void) 1105int btrfs_test_inodes(void)
915{ 1106{
916 int ret; 1107 int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
924 if (ret) 1115 if (ret)
925 return ret; 1116 return ret;
926 test_msg("Running hole first btrfs_get_extent test\n"); 1117 test_msg("Running hole first btrfs_get_extent test\n");
927 return test_hole_first(); 1118 ret = test_hole_first();
1119 if (ret)
1120 return ret;
1121 test_msg("Running outstanding_extents tests\n");
1122 return test_extent_accounting();
928} 1123}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ec3dcb202357..73f299ebdabb 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -404,12 +404,22 @@ int btrfs_test_qgroups(void)
404 ret = -ENOMEM; 404 ret = -ENOMEM;
405 goto out; 405 goto out;
406 } 406 }
407 /* We are using this root as our extent root */
408 root->fs_info->extent_root = root;
409
410 /*
411 * Some of the paths we test assume we have a filled out fs_info, so we
412 * just need to add the root in there so we don't panic.
413 */
414 root->fs_info->tree_root = root;
415 root->fs_info->quota_root = root;
416 root->fs_info->quota_enabled = 1;
407 417
408 /* 418 /*
409 * Can't use bytenr 0, some things freak out 419 * Can't use bytenr 0, some things freak out
410 * *cough*backref walking code*cough* 420 * *cough*backref walking code*cough*
411 */ 421 */
412 root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); 422 root->node = alloc_test_extent_buffer(root->fs_info, 4096);
413 if (!root->node) { 423 if (!root->node) {
414 test_msg("Couldn't allocate dummy buffer\n"); 424 test_msg("Couldn't allocate dummy buffer\n");
415 ret = -ENOMEM; 425 ret = -ENOMEM;
@@ -448,17 +458,6 @@ int btrfs_test_qgroups(void)
448 goto out; 458 goto out;
449 } 459 }
450 460
451 /* We are using this root as our extent root */
452 root->fs_info->extent_root = root;
453
454 /*
455 * Some of the paths we test assume we have a filled out fs_info, so we
456 * just need to addt he root in there so we don't panic.
457 */
458 root->fs_info->tree_root = root;
459 root->fs_info->quota_root = root;
460 root->fs_info->quota_enabled = 1;
461
462 test_msg("Running qgroup tests\n"); 461 test_msg("Running qgroup tests\n");
463 ret = test_no_shared_qgroup(root); 462 ret = test_no_shared_qgroup(root);
464 if (ret) 463 if (ret)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a605d4e2f2bc..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -220,6 +220,7 @@ loop:
220 * commit the transaction. 220 * commit the transaction.
221 */ 221 */
222 atomic_set(&cur_trans->use_count, 2); 222 atomic_set(&cur_trans->use_count, 2);
223 cur_trans->have_free_bgs = 0;
223 cur_trans->start_time = get_seconds(); 224 cur_trans->start_time = get_seconds();
224 225
225 cur_trans->delayed_refs.href_root = RB_ROOT; 226 cur_trans->delayed_refs.href_root = RB_ROOT;
@@ -248,6 +249,8 @@ loop:
248 INIT_LIST_HEAD(&cur_trans->pending_chunks); 249 INIT_LIST_HEAD(&cur_trans->pending_chunks);
249 INIT_LIST_HEAD(&cur_trans->switch_commits); 250 INIT_LIST_HEAD(&cur_trans->switch_commits);
250 INIT_LIST_HEAD(&cur_trans->pending_ordered); 251 INIT_LIST_HEAD(&cur_trans->pending_ordered);
252 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
253 spin_lock_init(&cur_trans->dirty_bgs_lock);
251 list_add_tail(&cur_trans->list, &fs_info->trans_list); 254 list_add_tail(&cur_trans->list, &fs_info->trans_list);
252 extent_io_tree_init(&cur_trans->dirty_pages, 255 extent_io_tree_init(&cur_trans->dirty_pages,
253 fs_info->btree_inode->i_mapping); 256 fs_info->btree_inode->i_mapping);
@@ -1022,7 +1025,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1022 struct btrfs_root *tree_root = root->fs_info->tree_root; 1025 struct btrfs_root *tree_root = root->fs_info->tree_root;
1023 1026
1024 old_root_used = btrfs_root_used(&root->root_item); 1027 old_root_used = btrfs_root_used(&root->root_item);
1025 btrfs_write_dirty_block_groups(trans, root);
1026 1028
1027 while (1) { 1029 while (1) {
1028 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1030 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
@@ -1038,9 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1038 return ret; 1040 return ret;
1039 1041
1040 old_root_used = btrfs_root_used(&root->root_item); 1042 old_root_used = btrfs_root_used(&root->root_item);
1041 ret = btrfs_write_dirty_block_groups(trans, root);
1042 if (ret)
1043 return ret;
1044 } 1043 }
1045 1044
1046 return 0; 1045 return 0;
@@ -1057,14 +1056,11 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1057 struct btrfs_root *root) 1056 struct btrfs_root *root)
1058{ 1057{
1059 struct btrfs_fs_info *fs_info = root->fs_info; 1058 struct btrfs_fs_info *fs_info = root->fs_info;
1059 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1060 struct list_head *next; 1060 struct list_head *next;
1061 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1062 int ret; 1062 int ret;
1063 1063
1064 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1065 if (ret)
1066 return ret;
1067
1068 eb = btrfs_lock_root_node(fs_info->tree_root); 1064 eb = btrfs_lock_root_node(fs_info->tree_root);
1069 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 1065 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
1070 0, &eb); 1066 0, &eb);
@@ -1088,15 +1084,20 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1088 if (ret) 1084 if (ret)
1089 return ret; 1085 return ret;
1090 1086
1087 ret = btrfs_setup_space_cache(trans, root);
1088 if (ret)
1089 return ret;
1090
1091 /* run_qgroups might have added some more refs */ 1091 /* run_qgroups might have added some more refs */
1092 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1092 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1093 if (ret) 1093 if (ret)
1094 return ret; 1094 return ret;
1095 1095again:
1096 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 1096 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
1097 next = fs_info->dirty_cowonly_roots.next; 1097 next = fs_info->dirty_cowonly_roots.next;
1098 list_del_init(next); 1098 list_del_init(next);
1099 root = list_entry(next, struct btrfs_root, dirty_list); 1099 root = list_entry(next, struct btrfs_root, dirty_list);
1100 clear_bit(BTRFS_ROOT_DIRTY, &root->state);
1100 1101
1101 if (root != fs_info->extent_root) 1102 if (root != fs_info->extent_root)
1102 list_add_tail(&root->dirty_list, 1103 list_add_tail(&root->dirty_list,
@@ -1104,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1104 ret = update_cowonly_root(trans, root); 1105 ret = update_cowonly_root(trans, root);
1105 if (ret) 1106 if (ret)
1106 return ret; 1107 return ret;
1108 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1109 if (ret)
1110 return ret;
1111 }
1112
1113 while (!list_empty(dirty_bgs)) {
1114 ret = btrfs_write_dirty_block_groups(trans, root);
1115 if (ret)
1116 return ret;
1117 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1118 if (ret)
1119 return ret;
1107 } 1120 }
1108 1121
1122 if (!list_empty(&fs_info->dirty_cowonly_roots))
1123 goto again;
1124
1109 list_add_tail(&fs_info->extent_root->dirty_list, 1125 list_add_tail(&fs_info->extent_root->dirty_list,
1110 &trans->transaction->switch_commits); 1126 &trans->transaction->switch_commits);
1111 btrfs_after_dev_replace_commit(fs_info); 1127 btrfs_after_dev_replace_commit(fs_info);
@@ -1803,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1803 1819
1804 wait_for_commit(root, cur_trans); 1820 wait_for_commit(root, cur_trans);
1805 1821
1822 if (unlikely(cur_trans->aborted))
1823 ret = cur_trans->aborted;
1824
1806 btrfs_put_transaction(cur_trans); 1825 btrfs_put_transaction(cur_trans);
1807 1826
1808 return ret; 1827 return ret;
@@ -1983,6 +2002,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1983 switch_commit_roots(cur_trans, root->fs_info); 2002 switch_commit_roots(cur_trans, root->fs_info);
1984 2003
1985 assert_qgroups_uptodate(trans); 2004 assert_qgroups_uptodate(trans);
2005 ASSERT(list_empty(&cur_trans->dirty_bgs));
1986 update_super_roots(root); 2006 update_super_roots(root);
1987 2007
1988 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 2008 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
@@ -2026,6 +2046,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2026 2046
2027 btrfs_finish_extent_commit(trans, root); 2047 btrfs_finish_extent_commit(trans, root);
2028 2048
2049 if (cur_trans->have_free_bgs)
2050 btrfs_clear_space_info_full(root->fs_info);
2051
2029 root->fs_info->last_trans_committed = cur_trans->transid; 2052 root->fs_info->last_trans_committed = cur_trans->transid;
2030 /* 2053 /*
2031 * We needn't acquire the lock here because there is no other task 2054 * We needn't acquire the lock here because there is no other task
@@ -2118,7 +2141,7 @@ void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
2118 unsigned long prev; 2141 unsigned long prev;
2119 unsigned long bit; 2142 unsigned long bit;
2120 2143
2121 prev = cmpxchg(&fs_info->pending_changes, 0, 0); 2144 prev = xchg(&fs_info->pending_changes, 0);
2122 if (!prev) 2145 if (!prev)
2123 return; 2146 return;
2124 2147
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 00ed29c4b3f9..937050a2b68e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -47,6 +47,11 @@ struct btrfs_transaction {
47 atomic_t num_writers; 47 atomic_t num_writers;
48 atomic_t use_count; 48 atomic_t use_count;
49 49
50 /*
51 * true if there is free bgs operations in this transaction
52 */
53 int have_free_bgs;
54
50 /* Be protected by fs_info->trans_lock when we want to change it. */ 55 /* Be protected by fs_info->trans_lock when we want to change it. */
51 enum btrfs_trans_state state; 56 enum btrfs_trans_state state;
52 struct list_head list; 57 struct list_head list;
@@ -58,6 +63,8 @@ struct btrfs_transaction {
58 struct list_head pending_chunks; 63 struct list_head pending_chunks;
59 struct list_head pending_ordered; 64 struct list_head pending_ordered;
60 struct list_head switch_commits; 65 struct list_head switch_commits;
66 struct list_head dirty_bgs;
67 spinlock_t dirty_bgs_lock;
61 struct btrfs_delayed_ref_root delayed_refs; 68 struct btrfs_delayed_ref_root delayed_refs;
62 int aborted; 69 int aborted;
63}; 70};
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a02da16f2be..c5b8ba37f88e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
453insert: 453insert:
454 btrfs_release_path(path); 454 btrfs_release_path(path);
455 /* try to insert the key into the destination tree */ 455 /* try to insert the key into the destination tree */
456 path->skip_release_on_error = 1;
456 ret = btrfs_insert_empty_item(trans, root, path, 457 ret = btrfs_insert_empty_item(trans, root, path,
457 key, item_size); 458 key, item_size);
459 path->skip_release_on_error = 0;
458 460
459 /* make sure any existing item is the correct size */ 461 /* make sure any existing item is the correct size */
460 if (ret == -EEXIST) { 462 if (ret == -EEXIST || ret == -EOVERFLOW) {
461 u32 found_size; 463 u32 found_size;
462 found_size = btrfs_item_size_nr(path->nodes[0], 464 found_size = btrfs_item_size_nr(path->nodes[0],
463 path->slots[0]); 465 path->slots[0]);
@@ -488,8 +490,20 @@ insert:
488 src_item = (struct btrfs_inode_item *)src_ptr; 490 src_item = (struct btrfs_inode_item *)src_ptr;
489 dst_item = (struct btrfs_inode_item *)dst_ptr; 491 dst_item = (struct btrfs_inode_item *)dst_ptr;
490 492
491 if (btrfs_inode_generation(eb, src_item) == 0) 493 if (btrfs_inode_generation(eb, src_item) == 0) {
494 struct extent_buffer *dst_eb = path->nodes[0];
495
496 if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497 S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
498 struct btrfs_map_token token;
499 u64 ino_size = btrfs_inode_size(eb, src_item);
500
501 btrfs_init_map_token(&token);
502 btrfs_set_token_inode_size(dst_eb, dst_item,
503 ino_size, &token);
504 }
492 goto no_copy; 505 goto no_copy;
506 }
493 507
494 if (overwrite_root && 508 if (overwrite_root &&
495 S_ISDIR(btrfs_inode_mode(eb, src_item)) && 509 S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
@@ -844,7 +858,7 @@ out:
844static noinline int backref_in_log(struct btrfs_root *log, 858static noinline int backref_in_log(struct btrfs_root *log,
845 struct btrfs_key *key, 859 struct btrfs_key *key,
846 u64 ref_objectid, 860 u64 ref_objectid,
847 char *name, int namelen) 861 const char *name, int namelen)
848{ 862{
849 struct btrfs_path *path; 863 struct btrfs_path *path;
850 struct btrfs_inode_ref *ref; 864 struct btrfs_inode_ref *ref;
@@ -998,7 +1012,7 @@ again:
998 base = btrfs_item_ptr_offset(leaf, path->slots[0]); 1012 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
999 1013
1000 while (cur_offset < item_size) { 1014 while (cur_offset < item_size) {
1001 extref = (struct btrfs_inode_extref *)base + cur_offset; 1015 extref = (struct btrfs_inode_extref *)(base + cur_offset);
1002 1016
1003 victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 1017 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
1004 1018
@@ -1254,13 +1268,14 @@ out:
1254} 1268}
1255 1269
1256static int insert_orphan_item(struct btrfs_trans_handle *trans, 1270static int insert_orphan_item(struct btrfs_trans_handle *trans,
1257 struct btrfs_root *root, u64 offset) 1271 struct btrfs_root *root, u64 ino)
1258{ 1272{
1259 int ret; 1273 int ret;
1260 ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID, 1274
1261 offset, BTRFS_ORPHAN_ITEM_KEY, NULL); 1275 ret = btrfs_insert_orphan_item(trans, root, ino);
1262 if (ret > 0) 1276 if (ret == -EEXIST)
1263 ret = btrfs_insert_orphan_item(trans, root, offset); 1277 ret = 0;
1278
1264 return ret; 1279 return ret;
1265} 1280}
1266 1281
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
1287 leaf = path->nodes[0]; 1302 leaf = path->nodes[0];
1288 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1303 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1289 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1304 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1305 cur_offset = 0;
1290 1306
1291 while (cur_offset < item_size) { 1307 while (cur_offset < item_size) {
1292 extref = (struct btrfs_inode_extref *) (ptr + cur_offset); 1308 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
1302 } 1318 }
1303 btrfs_release_path(path); 1319 btrfs_release_path(path);
1304 1320
1305 if (ret < 0) 1321 if (ret < 0 && ret != -ENOENT)
1306 return ret; 1322 return ret;
1307 return nlink; 1323 return nlink;
1308} 1324}
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1394 nlink = ret; 1410 nlink = ret;
1395 1411
1396 ret = count_inode_extrefs(root, inode, path); 1412 ret = count_inode_extrefs(root, inode, path);
1397 if (ret == -ENOENT)
1398 ret = 0;
1399
1400 if (ret < 0) 1413 if (ret < 0)
1401 goto out; 1414 goto out;
1402 1415
@@ -1557,6 +1570,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1557} 1570}
1558 1571
1559/* 1572/*
1573 * Return true if an inode reference exists in the log for the given name,
1574 * inode and parent inode.
1575 */
1576static bool name_in_log_ref(struct btrfs_root *log_root,
1577 const char *name, const int name_len,
1578 const u64 dirid, const u64 ino)
1579{
1580 struct btrfs_key search_key;
1581
1582 search_key.objectid = ino;
1583 search_key.type = BTRFS_INODE_REF_KEY;
1584 search_key.offset = dirid;
1585 if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1586 return true;
1587
1588 search_key.type = BTRFS_INODE_EXTREF_KEY;
1589 search_key.offset = btrfs_extref_hash(dirid, name, name_len);
1590 if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1591 return true;
1592
1593 return false;
1594}
1595
1596/*
1560 * take a single entry in a log directory item and replay it into 1597 * take a single entry in a log directory item and replay it into
1561 * the subvolume. 1598 * the subvolume.
1562 * 1599 *
@@ -1666,10 +1703,17 @@ out:
1666 return ret; 1703 return ret;
1667 1704
1668insert: 1705insert:
1706 if (name_in_log_ref(root->log_root, name, name_len,
1707 key->objectid, log_key.objectid)) {
1708 /* The dentry will be added later. */
1709 ret = 0;
1710 update_size = false;
1711 goto out;
1712 }
1669 btrfs_release_path(path); 1713 btrfs_release_path(path);
1670 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1714 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1671 name, name_len, log_type, &log_key); 1715 name, name_len, log_type, &log_key);
1672 if (ret && ret != -ENOENT) 1716 if (ret && ret != -ENOENT && ret != -EEXIST)
1673 goto out; 1717 goto out;
1674 update_size = false; 1718 update_size = false;
1675 ret = 0; 1719 ret = 0;
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2164 parent = path->nodes[*level]; 2208 parent = path->nodes[*level];
2165 root_owner = btrfs_header_owner(parent); 2209 root_owner = btrfs_header_owner(parent);
2166 2210
2167 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 2211 next = btrfs_find_create_tree_block(root, bytenr);
2168 if (!next) 2212 if (!next)
2169 return -ENOMEM; 2213 return -ENOMEM;
2170 2214
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
2416 mutex_unlock(&root->log_mutex); 2460 mutex_unlock(&root->log_mutex);
2417 if (atomic_read(&root->log_writers)) 2461 if (atomic_read(&root->log_writers))
2418 schedule(); 2462 schedule();
2419 mutex_lock(&root->log_mutex);
2420 finish_wait(&root->log_writer_wait, &wait); 2463 finish_wait(&root->log_writer_wait, &wait);
2464 mutex_lock(&root->log_mutex);
2421 } 2465 }
2422} 2466}
2423 2467
@@ -2591,6 +2635,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2591 } 2635 }
2592 2636
2593 if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { 2637 if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
2638 blk_finish_plug(&plug);
2594 mutex_unlock(&log_root_tree->log_mutex); 2639 mutex_unlock(&log_root_tree->log_mutex);
2595 ret = root_log_ctx.log_ret; 2640 ret = root_log_ctx.log_ret;
2596 goto out; 2641 goto out;
@@ -3218,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
3218static void fill_inode_item(struct btrfs_trans_handle *trans, 3263static void fill_inode_item(struct btrfs_trans_handle *trans,
3219 struct extent_buffer *leaf, 3264 struct extent_buffer *leaf,
3220 struct btrfs_inode_item *item, 3265 struct btrfs_inode_item *item,
3221 struct inode *inode, int log_inode_only) 3266 struct inode *inode, int log_inode_only,
3267 u64 logged_isize)
3222{ 3268{
3223 struct btrfs_map_token token; 3269 struct btrfs_map_token token;
3224 3270
@@ -3231,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3231 * to say 'update this inode with these values' 3277 * to say 'update this inode with these values'
3232 */ 3278 */
3233 btrfs_set_token_inode_generation(leaf, item, 0, &token); 3279 btrfs_set_token_inode_generation(leaf, item, 0, &token);
3234 btrfs_set_token_inode_size(leaf, item, 0, &token); 3280 btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
3235 } else { 3281 } else {
3236 btrfs_set_token_inode_generation(leaf, item, 3282 btrfs_set_token_inode_generation(leaf, item,
3237 BTRFS_I(inode)->generation, 3283 BTRFS_I(inode)->generation,
@@ -3244,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3244 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 3290 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3245 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 3291 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3246 3292
3247 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 3293 btrfs_set_token_timespec_sec(leaf, &item->atime,
3248 inode->i_atime.tv_sec, &token); 3294 inode->i_atime.tv_sec, &token);
3249 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 3295 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3250 inode->i_atime.tv_nsec, &token); 3296 inode->i_atime.tv_nsec, &token);
3251 3297
3252 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 3298 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3253 inode->i_mtime.tv_sec, &token); 3299 inode->i_mtime.tv_sec, &token);
3254 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 3300 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3255 inode->i_mtime.tv_nsec, &token); 3301 inode->i_mtime.tv_nsec, &token);
3256 3302
3257 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 3303 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3258 inode->i_ctime.tv_sec, &token); 3304 inode->i_ctime.tv_sec, &token);
3259 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 3305 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3260 inode->i_ctime.tv_nsec, &token); 3306 inode->i_ctime.tv_nsec, &token);
3261 3307
3262 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 3308 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
@@ -3283,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
3283 return ret; 3329 return ret;
3284 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3330 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3285 struct btrfs_inode_item); 3331 struct btrfs_inode_item);
3286 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); 3332 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
3287 btrfs_release_path(path); 3333 btrfs_release_path(path);
3288 return 0; 3334 return 0;
3289} 3335}
@@ -3292,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3292 struct inode *inode, 3338 struct inode *inode,
3293 struct btrfs_path *dst_path, 3339 struct btrfs_path *dst_path,
3294 struct btrfs_path *src_path, u64 *last_extent, 3340 struct btrfs_path *src_path, u64 *last_extent,
3295 int start_slot, int nr, int inode_only) 3341 int start_slot, int nr, int inode_only,
3342 u64 logged_isize)
3296{ 3343{
3297 unsigned long src_offset; 3344 unsigned long src_offset;
3298 unsigned long dst_offset; 3345 unsigned long dst_offset;
@@ -3349,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3349 dst_path->slots[0], 3396 dst_path->slots[0],
3350 struct btrfs_inode_item); 3397 struct btrfs_inode_item);
3351 fill_inode_item(trans, dst_path->nodes[0], inode_item, 3398 fill_inode_item(trans, dst_path->nodes[0], inode_item,
3352 inode, inode_only == LOG_INODE_EXISTS); 3399 inode, inode_only == LOG_INODE_EXISTS,
3400 logged_isize);
3353 } else { 3401 } else {
3354 copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 3402 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
3355 src_offset, ins_sizes[i]); 3403 src_offset, ins_sizes[i]);
@@ -3901,6 +3949,33 @@ process:
3901 return ret; 3949 return ret;
3902} 3950}
3903 3951
3952static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3953 struct btrfs_path *path, u64 *size_ret)
3954{
3955 struct btrfs_key key;
3956 int ret;
3957
3958 key.objectid = btrfs_ino(inode);
3959 key.type = BTRFS_INODE_ITEM_KEY;
3960 key.offset = 0;
3961
3962 ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
3963 if (ret < 0) {
3964 return ret;
3965 } else if (ret > 0) {
3966 *size_ret = i_size_read(inode);
3967 } else {
3968 struct btrfs_inode_item *item;
3969
3970 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3971 struct btrfs_inode_item);
3972 *size_ret = btrfs_inode_size(path->nodes[0], item);
3973 }
3974
3975 btrfs_release_path(path);
3976 return 0;
3977}
3978
3904/* log a single inode in the tree log. 3979/* log a single inode in the tree log.
3905 * At least one parent directory for this inode must exist in the tree 3980 * At least one parent directory for this inode must exist in the tree
3906 * or be logged already. 3981 * or be logged already.
@@ -3938,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3938 bool fast_search = false; 4013 bool fast_search = false;
3939 u64 ino = btrfs_ino(inode); 4014 u64 ino = btrfs_ino(inode);
3940 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4015 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4016 u64 logged_isize = 0;
3941 4017
3942 path = btrfs_alloc_path(); 4018 path = btrfs_alloc_path();
3943 if (!path) 4019 if (!path)
@@ -3965,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3965 max_key.type = (u8)-1; 4041 max_key.type = (u8)-1;
3966 max_key.offset = (u64)-1; 4042 max_key.offset = (u64)-1;
3967 4043
3968 /* Only run delayed items if we are a dir or a new file */ 4044 /*
4045 * Only run delayed items if we are a dir or a new file.
4046 * Otherwise commit the delayed inode only, which is needed in
4047 * order for the log replay code to mark inodes for link count
4048 * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
4049 */
3969 if (S_ISDIR(inode->i_mode) || 4050 if (S_ISDIR(inode->i_mode) ||
3970 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { 4051 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
3971 ret = btrfs_commit_inode_delayed_items(trans, inode); 4052 ret = btrfs_commit_inode_delayed_items(trans, inode);
3972 if (ret) { 4053 else
3973 btrfs_free_path(path); 4054 ret = btrfs_commit_inode_delayed_inode(inode);
3974 btrfs_free_path(dst_path); 4055
3975 return ret; 4056 if (ret) {
3976 } 4057 btrfs_free_path(path);
4058 btrfs_free_path(dst_path);
4059 return ret;
3977 } 4060 }
3978 4061
3979 mutex_lock(&BTRFS_I(inode)->log_mutex); 4062 mutex_lock(&BTRFS_I(inode)->log_mutex);
@@ -3987,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3987 if (S_ISDIR(inode->i_mode)) { 4070 if (S_ISDIR(inode->i_mode)) {
3988 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4071 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
3989 4072
3990 if (inode_only == LOG_INODE_EXISTS) 4073 if (inode_only == LOG_INODE_EXISTS) {
3991 max_key_type = BTRFS_XATTR_ITEM_KEY; 4074 max_key_type = BTRFS_INODE_EXTREF_KEY;
4075 max_key.type = max_key_type;
4076 }
3992 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4077 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
3993 } else { 4078 } else {
3994 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4079 if (inode_only == LOG_INODE_EXISTS) {
3995 &BTRFS_I(inode)->runtime_flags)) { 4080 /*
3996 clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4081 * Make sure the new inode item we write to the log has
3997 &BTRFS_I(inode)->runtime_flags); 4082 * the same isize as the current one (if it exists).
3998 ret = btrfs_truncate_inode_items(trans, log, 4083 * This is necessary to prevent data loss after log
3999 inode, 0, 0); 4084 * replay, and also to prevent doing a wrong expanding
4000 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4085 * truncate - for e.g. create file, write 4K into offset
4001 &BTRFS_I(inode)->runtime_flags) || 4086 * 0, fsync, write 4K into offset 4096, add hard link,
4087 * fsync some other file (to sync log), power fail - if
4088 * we use the inode's current i_size, after log replay
4089 * we get a 8Kb file, with the last 4Kb extent as a hole
4090 * (zeroes), as if an expanding truncate happened,
4091 * instead of getting a file of 4Kb only.
4092 */
4093 err = logged_inode_size(log, inode, path,
4094 &logged_isize);
4095 if (err)
4096 goto out_unlock;
4097 }
4098 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4099 &BTRFS_I(inode)->runtime_flags)) {
4100 if (inode_only == LOG_INODE_EXISTS) {
4101 max_key.type = BTRFS_INODE_EXTREF_KEY;
4102 ret = drop_objectid_items(trans, log, path, ino,
4103 max_key.type);
4104 } else {
4105 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4106 &BTRFS_I(inode)->runtime_flags);
4107 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4108 &BTRFS_I(inode)->runtime_flags);
4109 ret = btrfs_truncate_inode_items(trans, log,
4110 inode, 0, 0);
4111 }
4112 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
4113 &BTRFS_I(inode)->runtime_flags) ||
4002 inode_only == LOG_INODE_EXISTS) { 4114 inode_only == LOG_INODE_EXISTS) {
4003 if (inode_only == LOG_INODE_ALL) 4115 if (inode_only == LOG_INODE_ALL) {
4116 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4117 &BTRFS_I(inode)->runtime_flags);
4004 fast_search = true; 4118 fast_search = true;
4005 max_key.type = BTRFS_XATTR_ITEM_KEY; 4119 max_key.type = BTRFS_XATTR_ITEM_KEY;
4120 } else {
4121 max_key.type = BTRFS_INODE_EXTREF_KEY;
4122 }
4006 ret = drop_objectid_items(trans, log, path, ino, 4123 ret = drop_objectid_items(trans, log, path, ino,
4007 max_key.type); 4124 max_key.type);
4008 } else { 4125 } else {
@@ -4046,7 +4163,8 @@ again:
4046 } 4163 }
4047 4164
4048 ret = copy_items(trans, inode, dst_path, path, &last_extent, 4165 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4049 ins_start_slot, ins_nr, inode_only); 4166 ins_start_slot, ins_nr, inode_only,
4167 logged_isize);
4050 if (ret < 0) { 4168 if (ret < 0) {
4051 err = ret; 4169 err = ret;
4052 goto out_unlock; 4170 goto out_unlock;
@@ -4070,7 +4188,7 @@ next_slot:
4070 if (ins_nr) { 4188 if (ins_nr) {
4071 ret = copy_items(trans, inode, dst_path, path, 4189 ret = copy_items(trans, inode, dst_path, path,
4072 &last_extent, ins_start_slot, 4190 &last_extent, ins_start_slot,
4073 ins_nr, inode_only); 4191 ins_nr, inode_only, logged_isize);
4074 if (ret < 0) { 4192 if (ret < 0) {
4075 err = ret; 4193 err = ret;
4076 goto out_unlock; 4194 goto out_unlock;
@@ -4091,7 +4209,8 @@ next_slot:
4091 } 4209 }
4092 if (ins_nr) { 4210 if (ins_nr) {
4093 ret = copy_items(trans, inode, dst_path, path, &last_extent, 4211 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4094 ins_start_slot, ins_nr, inode_only); 4212 ins_start_slot, ins_nr, inode_only,
4213 logged_isize);
4095 if (ret < 0) { 4214 if (ret < 0) {
4096 err = ret; 4215 err = ret;
4097 goto out_unlock; 4216 goto out_unlock;
@@ -4272,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4272 struct dentry *old_parent = NULL; 4391 struct dentry *old_parent = NULL;
4273 int ret = 0; 4392 int ret = 0;
4274 u64 last_committed = root->fs_info->last_trans_committed; 4393 u64 last_committed = root->fs_info->last_trans_committed;
4394 const struct dentry * const first_parent = parent;
4395 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4396 last_committed);
4275 4397
4276 sb = inode->i_sb; 4398 sb = inode->i_sb;
4277 4399
@@ -4327,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4327 goto end_trans; 4449 goto end_trans;
4328 } 4450 }
4329 4451
4330 inode_only = LOG_INODE_EXISTS;
4331 while (1) { 4452 while (1) {
4332 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4453 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
4333 break; 4454 break;
@@ -4336,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4336 if (root != BTRFS_I(inode)->root) 4457 if (root != BTRFS_I(inode)->root)
4337 break; 4458 break;
4338 4459
4460 /*
4461 * On unlink we must make sure our immediate parent directory
4462 * inode is fully logged. This is to prevent leaving dangling
4463 * directory index entries and a wrong directory inode's i_size.
4464 * Not doing so can result in a directory being impossible to
4465 * delete after log replay (rmdir will always fail with error
4466 * -ENOTEMPTY).
4467 */
4468 if (did_unlink && parent == first_parent)
4469 inode_only = LOG_INODE_ALL;
4470 else
4471 inode_only = LOG_INODE_EXISTS;
4472
4339 if (BTRFS_I(inode)->generation > 4473 if (BTRFS_I(inode)->generation >
4340 root->fs_info->last_trans_committed) { 4474 root->fs_info->last_trans_committed ||
4475 inode_only == LOG_INODE_ALL) {
4341 ret = btrfs_log_inode(trans, root, inode, inode_only, 4476 ret = btrfs_log_inode(trans, root, inode, inode_only,
4342 0, LLONG_MAX, ctx); 4477 0, LLONG_MAX, ctx);
4343 if (ret) 4478 if (ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 50c5a8762aed..8222f6f74147 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1310,6 +1310,8 @@ again:
1310 if (ret) { 1310 if (ret) {
1311 btrfs_error(root->fs_info, ret, 1311 btrfs_error(root->fs_info, ret,
1312 "Failed to remove dev extent item"); 1312 "Failed to remove dev extent item");
1313 } else {
1314 trans->transaction->have_free_bgs = 1;
1313 } 1315 }
1314out: 1316out:
1315 btrfs_free_path(path); 1317 btrfs_free_path(path);
@@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
4196 4198
4197static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) 4199static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4198{ 4200{
4199 if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) 4201 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4200 return; 4202 return;
4201 4203
4202 btrfs_set_fs_incompat(info, RAID56); 4204 btrfs_set_fs_incompat(info, RAID56);
@@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
4803 4805
4804 BUG_ON(em->start > logical || em->start + em->len < logical); 4806 BUG_ON(em->start > logical || em->start + em->len < logical);
4805 map = (struct map_lookup *)em->bdev; 4807 map = (struct map_lookup *)em->bdev;
4806 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 4808 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
4807 BTRFS_BLOCK_GROUP_RAID6)) {
4808 len = map->stripe_len * nr_data_stripes(map); 4809 len = map->stripe_len * nr_data_stripes(map);
4809 }
4810 free_extent_map(em); 4810 free_extent_map(em);
4811 return len; 4811 return len;
4812} 4812}
@@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
4826 4826
4827 BUG_ON(em->start > logical || em->start + em->len < logical); 4827 BUG_ON(em->start > logical || em->start + em->len < logical);
4828 map = (struct map_lookup *)em->bdev; 4828 map = (struct map_lookup *)em->bdev;
4829 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 4829 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
4830 BTRFS_BLOCK_GROUP_RAID6))
4831 ret = 1; 4830 ret = 1;
4832 free_extent_map(em); 4831 free_extent_map(em);
4833 return ret; 4832 return ret;
@@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b)
4876} 4875}
4877 4876
4878/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ 4877/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
4879static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) 4878static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
4880{ 4879{
4881 struct btrfs_bio_stripe s; 4880 struct btrfs_bio_stripe s;
4882 int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
4883 int i; 4881 int i;
4884 u64 l; 4882 u64 l;
4885 int again = 1; 4883 int again = 1;
4886 int m;
4887 4884
4888 while (again) { 4885 while (again) {
4889 again = 0; 4886 again = 0;
4890 for (i = 0; i < real_stripes - 1; i++) { 4887 for (i = 0; i < num_stripes - 1; i++) {
4891 if (parity_smaller(raid_map[i], raid_map[i+1])) { 4888 if (parity_smaller(bbio->raid_map[i],
4889 bbio->raid_map[i+1])) {
4892 s = bbio->stripes[i]; 4890 s = bbio->stripes[i];
4893 l = raid_map[i]; 4891 l = bbio->raid_map[i];
4894 bbio->stripes[i] = bbio->stripes[i+1]; 4892 bbio->stripes[i] = bbio->stripes[i+1];
4895 raid_map[i] = raid_map[i+1]; 4893 bbio->raid_map[i] = bbio->raid_map[i+1];
4896 bbio->stripes[i+1] = s; 4894 bbio->stripes[i+1] = s;
4897 raid_map[i+1] = l; 4895 bbio->raid_map[i+1] = l;
4898
4899 if (bbio->tgtdev_map) {
4900 m = bbio->tgtdev_map[i];
4901 bbio->tgtdev_map[i] =
4902 bbio->tgtdev_map[i + 1];
4903 bbio->tgtdev_map[i + 1] = m;
4904 }
4905 4896
4906 again = 1; 4897 again = 1;
4907 } 4898 }
@@ -4909,10 +4900,48 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
4909 } 4900 }
4910} 4901}
4911 4902
4903static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
4904{
4905 struct btrfs_bio *bbio = kzalloc(
4906 /* the size of the btrfs_bio */
4907 sizeof(struct btrfs_bio) +
4908 /* plus the variable array for the stripes */
4909 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
4910 /* plus the variable array for the tgt dev */
4911 sizeof(int) * (real_stripes) +
4912 /*
4913 * plus the raid_map, which includes both the tgt dev
4914 * and the stripes
4915 */
4916 sizeof(u64) * (total_stripes),
4917 GFP_NOFS);
4918 if (!bbio)
4919 return NULL;
4920
4921 atomic_set(&bbio->error, 0);
4922 atomic_set(&bbio->refs, 1);
4923
4924 return bbio;
4925}
4926
4927void btrfs_get_bbio(struct btrfs_bio *bbio)
4928{
4929 WARN_ON(!atomic_read(&bbio->refs));
4930 atomic_inc(&bbio->refs);
4931}
4932
4933void btrfs_put_bbio(struct btrfs_bio *bbio)
4934{
4935 if (!bbio)
4936 return;
4937 if (atomic_dec_and_test(&bbio->refs))
4938 kfree(bbio);
4939}
4940
4912static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, 4941static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4913 u64 logical, u64 *length, 4942 u64 logical, u64 *length,
4914 struct btrfs_bio **bbio_ret, 4943 struct btrfs_bio **bbio_ret,
4915 int mirror_num, u64 **raid_map_ret) 4944 int mirror_num, int need_raid_map)
4916{ 4945{
4917 struct extent_map *em; 4946 struct extent_map *em;
4918 struct map_lookup *map; 4947 struct map_lookup *map;
@@ -4925,7 +4954,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4925 u64 stripe_nr_orig; 4954 u64 stripe_nr_orig;
4926 u64 stripe_nr_end; 4955 u64 stripe_nr_end;
4927 u64 stripe_len; 4956 u64 stripe_len;
4928 u64 *raid_map = NULL;
4929 int stripe_index; 4957 int stripe_index;
4930 int i; 4958 int i;
4931 int ret = 0; 4959 int ret = 0;
@@ -4976,7 +5004,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4976 stripe_offset = offset - stripe_offset; 5004 stripe_offset = offset - stripe_offset;
4977 5005
4978 /* if we're here for raid56, we need to know the stripe aligned start */ 5006 /* if we're here for raid56, we need to know the stripe aligned start */
4979 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { 5007 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
4980 unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); 5008 unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
4981 raid56_full_stripe_start = offset; 5009 raid56_full_stripe_start = offset;
4982 5010
@@ -4989,8 +5017,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4989 5017
4990 if (rw & REQ_DISCARD) { 5018 if (rw & REQ_DISCARD) {
4991 /* we don't discard raid56 yet */ 5019 /* we don't discard raid56 yet */
4992 if (map->type & 5020 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
4993 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
4994 ret = -EOPNOTSUPP; 5021 ret = -EOPNOTSUPP;
4995 goto out; 5022 goto out;
4996 } 5023 }
@@ -5000,7 +5027,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5000 /* For writes to RAID[56], allow a full stripeset across all disks. 5027 /* For writes to RAID[56], allow a full stripeset across all disks.
5001 For other RAID types and for RAID[56] reads, just allow a single 5028 For other RAID types and for RAID[56] reads, just allow a single
5002 stripe (on a single disk). */ 5029 stripe (on a single disk). */
5003 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) && 5030 if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
5004 (rw & REQ_WRITE)) { 5031 (rw & REQ_WRITE)) {
5005 max_len = stripe_len * nr_data_stripes(map) - 5032 max_len = stripe_len * nr_data_stripes(map) -
5006 (offset - raid56_full_stripe_start); 5033 (offset - raid56_full_stripe_start);
@@ -5047,7 +5074,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5047 u64 physical_of_found = 0; 5074 u64 physical_of_found = 0;
5048 5075
5049 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, 5076 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
5050 logical, &tmp_length, &tmp_bbio, 0, NULL); 5077 logical, &tmp_length, &tmp_bbio, 0, 0);
5051 if (ret) { 5078 if (ret) {
5052 WARN_ON(tmp_bbio != NULL); 5079 WARN_ON(tmp_bbio != NULL);
5053 goto out; 5080 goto out;
@@ -5061,7 +5088,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5061 * is not left of the left cursor 5088 * is not left of the left cursor
5062 */ 5089 */
5063 ret = -EIO; 5090 ret = -EIO;
5064 kfree(tmp_bbio); 5091 btrfs_put_bbio(tmp_bbio);
5065 goto out; 5092 goto out;
5066 } 5093 }
5067 5094
@@ -5096,11 +5123,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5096 } else { 5123 } else {
5097 WARN_ON(1); 5124 WARN_ON(1);
5098 ret = -EIO; 5125 ret = -EIO;
5099 kfree(tmp_bbio); 5126 btrfs_put_bbio(tmp_bbio);
5100 goto out; 5127 goto out;
5101 } 5128 }
5102 5129
5103 kfree(tmp_bbio); 5130 btrfs_put_bbio(tmp_bbio);
5104 } else if (mirror_num > map->num_stripes) { 5131 } else if (mirror_num > map->num_stripes) {
5105 mirror_num = 0; 5132 mirror_num = 0;
5106 } 5133 }
@@ -5166,15 +5193,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5166 mirror_num = stripe_index - old_stripe_index + 1; 5193 mirror_num = stripe_index - old_stripe_index + 1;
5167 } 5194 }
5168 5195
5169 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 5196 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5170 BTRFS_BLOCK_GROUP_RAID6)) { 5197 if (need_raid_map &&
5171 u64 tmp;
5172
5173 if (raid_map_ret &&
5174 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || 5198 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5175 mirror_num > 1)) { 5199 mirror_num > 1)) {
5176 int i, rot;
5177
5178 /* push stripe_nr back to the start of the full stripe */ 5200 /* push stripe_nr back to the start of the full stripe */
5179 stripe_nr = raid56_full_stripe_start; 5201 stripe_nr = raid56_full_stripe_start;
5180 do_div(stripe_nr, stripe_len * nr_data_stripes(map)); 5202 do_div(stripe_nr, stripe_len * nr_data_stripes(map));
@@ -5183,32 +5205,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5183 num_stripes = map->num_stripes; 5205 num_stripes = map->num_stripes;
5184 max_errors = nr_parity_stripes(map); 5206 max_errors = nr_parity_stripes(map);
5185 5207
5186 raid_map = kmalloc_array(num_stripes, sizeof(u64),
5187 GFP_NOFS);
5188 if (!raid_map) {
5189 ret = -ENOMEM;
5190 goto out;
5191 }
5192
5193 /* Work out the disk rotation on this stripe-set */
5194 tmp = stripe_nr;
5195 rot = do_div(tmp, num_stripes);
5196
5197 /* Fill in the logical address of each stripe */
5198 tmp = stripe_nr * nr_data_stripes(map);
5199 for (i = 0; i < nr_data_stripes(map); i++)
5200 raid_map[(i+rot) % num_stripes] =
5201 em->start + (tmp + i) * map->stripe_len;
5202
5203 raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
5204 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5205 raid_map[(i+rot+1) % num_stripes] =
5206 RAID6_Q_STRIPE;
5207
5208 *length = map->stripe_len; 5208 *length = map->stripe_len;
5209 stripe_index = 0; 5209 stripe_index = 0;
5210 stripe_offset = 0; 5210 stripe_offset = 0;
5211 } else { 5211 } else {
5212 u64 tmp;
5213
5212 /* 5214 /*
5213 * Mirror #0 or #1 means the original data block. 5215 * Mirror #0 or #1 means the original data block.
5214 * Mirror #2 is RAID5 parity block. 5216 * Mirror #2 is RAID5 parity block.
@@ -5246,17 +5248,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5246 tgtdev_indexes = num_stripes; 5248 tgtdev_indexes = num_stripes;
5247 } 5249 }
5248 5250
5249 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), 5251 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
5250 GFP_NOFS);
5251 if (!bbio) { 5252 if (!bbio) {
5252 kfree(raid_map);
5253 ret = -ENOMEM; 5253 ret = -ENOMEM;
5254 goto out; 5254 goto out;
5255 } 5255 }
5256 atomic_set(&bbio->error, 0);
5257 if (dev_replace_is_ongoing) 5256 if (dev_replace_is_ongoing)
5258 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); 5257 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
5259 5258
5259 /* build raid_map */
5260 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
5261 need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5262 mirror_num > 1)) {
5263 u64 tmp;
5264 int i, rot;
5265
5266 bbio->raid_map = (u64 *)((void *)bbio->stripes +
5267 sizeof(struct btrfs_bio_stripe) *
5268 num_alloc_stripes +
5269 sizeof(int) * tgtdev_indexes);
5270
5271 /* Work out the disk rotation on this stripe-set */
5272 tmp = stripe_nr;
5273 rot = do_div(tmp, num_stripes);
5274
5275 /* Fill in the logical address of each stripe */
5276 tmp = stripe_nr * nr_data_stripes(map);
5277 for (i = 0; i < nr_data_stripes(map); i++)
5278 bbio->raid_map[(i+rot) % num_stripes] =
5279 em->start + (tmp + i) * map->stripe_len;
5280
5281 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
5282 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5283 bbio->raid_map[(i+rot+1) % num_stripes] =
5284 RAID6_Q_STRIPE;
5285 }
5286
5260 if (rw & REQ_DISCARD) { 5287 if (rw & REQ_DISCARD) {
5261 int factor = 0; 5288 int factor = 0;
5262 int sub_stripes = 0; 5289 int sub_stripes = 0;
@@ -5340,6 +5367,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5340 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) 5367 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
5341 max_errors = btrfs_chunk_max_errors(map); 5368 max_errors = btrfs_chunk_max_errors(map);
5342 5369
5370 if (bbio->raid_map)
5371 sort_parity_stripes(bbio, num_stripes);
5372
5343 tgtdev_indexes = 0; 5373 tgtdev_indexes = 0;
5344 if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && 5374 if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
5345 dev_replace->tgtdev != NULL) { 5375 dev_replace->tgtdev != NULL) {
@@ -5427,6 +5457,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5427 } 5457 }
5428 5458
5429 *bbio_ret = bbio; 5459 *bbio_ret = bbio;
5460 bbio->map_type = map->type;
5430 bbio->num_stripes = num_stripes; 5461 bbio->num_stripes = num_stripes;
5431 bbio->max_errors = max_errors; 5462 bbio->max_errors = max_errors;
5432 bbio->mirror_num = mirror_num; 5463 bbio->mirror_num = mirror_num;
@@ -5443,10 +5474,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5443 bbio->stripes[0].physical = physical_to_patch_in_first_stripe; 5474 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
5444 bbio->mirror_num = map->num_stripes + 1; 5475 bbio->mirror_num = map->num_stripes + 1;
5445 } 5476 }
5446 if (raid_map) {
5447 sort_parity_stripes(bbio, raid_map);
5448 *raid_map_ret = raid_map;
5449 }
5450out: 5477out:
5451 if (dev_replace_is_ongoing) 5478 if (dev_replace_is_ongoing)
5452 btrfs_dev_replace_unlock(dev_replace); 5479 btrfs_dev_replace_unlock(dev_replace);
@@ -5459,17 +5486,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5459 struct btrfs_bio **bbio_ret, int mirror_num) 5486 struct btrfs_bio **bbio_ret, int mirror_num)
5460{ 5487{
5461 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, 5488 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
5462 mirror_num, NULL); 5489 mirror_num, 0);
5463} 5490}
5464 5491
5465/* For Scrub/replace */ 5492/* For Scrub/replace */
5466int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, 5493int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
5467 u64 logical, u64 *length, 5494 u64 logical, u64 *length,
5468 struct btrfs_bio **bbio_ret, int mirror_num, 5495 struct btrfs_bio **bbio_ret, int mirror_num,
5469 u64 **raid_map_ret) 5496 int need_raid_map)
5470{ 5497{
5471 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, 5498 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
5472 mirror_num, raid_map_ret); 5499 mirror_num, need_raid_map);
5473} 5500}
5474 5501
5475int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 5502int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -5511,8 +5538,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5511 do_div(length, map->num_stripes / map->sub_stripes); 5538 do_div(length, map->num_stripes / map->sub_stripes);
5512 else if (map->type & BTRFS_BLOCK_GROUP_RAID0) 5539 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5513 do_div(length, map->num_stripes); 5540 do_div(length, map->num_stripes);
5514 else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 5541 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5515 BTRFS_BLOCK_GROUP_RAID6)) {
5516 do_div(length, nr_data_stripes(map)); 5542 do_div(length, nr_data_stripes(map));
5517 rmap_len = map->stripe_len * nr_data_stripes(map); 5543 rmap_len = map->stripe_len * nr_data_stripes(map);
5518 } 5544 }
@@ -5565,7 +5591,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
5565 bio_endio_nodec(bio, err); 5591 bio_endio_nodec(bio, err);
5566 else 5592 else
5567 bio_endio(bio, err); 5593 bio_endio(bio, err);
5568 kfree(bbio); 5594 btrfs_put_bbio(bbio);
5569} 5595}
5570 5596
5571static void btrfs_end_bio(struct bio *bio, int err) 5597static void btrfs_end_bio(struct bio *bio, int err)
@@ -5808,7 +5834,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5808 u64 logical = (u64)bio->bi_iter.bi_sector << 9; 5834 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
5809 u64 length = 0; 5835 u64 length = 0;
5810 u64 map_length; 5836 u64 map_length;
5811 u64 *raid_map = NULL;
5812 int ret; 5837 int ret;
5813 int dev_nr = 0; 5838 int dev_nr = 0;
5814 int total_devs = 1; 5839 int total_devs = 1;
@@ -5819,7 +5844,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5819 5844
5820 btrfs_bio_counter_inc_blocked(root->fs_info); 5845 btrfs_bio_counter_inc_blocked(root->fs_info);
5821 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, 5846 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
5822 mirror_num, &raid_map); 5847 mirror_num, 1);
5823 if (ret) { 5848 if (ret) {
5824 btrfs_bio_counter_dec(root->fs_info); 5849 btrfs_bio_counter_dec(root->fs_info);
5825 return ret; 5850 return ret;
@@ -5832,15 +5857,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5832 bbio->fs_info = root->fs_info; 5857 bbio->fs_info = root->fs_info;
5833 atomic_set(&bbio->stripes_pending, bbio->num_stripes); 5858 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
5834 5859
5835 if (raid_map) { 5860 if (bbio->raid_map) {
5836 /* In this case, map_length has been set to the length of 5861 /* In this case, map_length has been set to the length of
5837 a single stripe; not the whole write */ 5862 a single stripe; not the whole write */
5838 if (rw & WRITE) { 5863 if (rw & WRITE) {
5839 ret = raid56_parity_write(root, bio, bbio, 5864 ret = raid56_parity_write(root, bio, bbio, map_length);
5840 raid_map, map_length);
5841 } else { 5865 } else {
5842 ret = raid56_parity_recover(root, bio, bbio, 5866 ret = raid56_parity_recover(root, bio, bbio, map_length,
5843 raid_map, map_length,
5844 mirror_num, 1); 5867 mirror_num, 1);
5845 } 5868 }
5846 5869
@@ -6238,17 +6261,22 @@ int btrfs_read_sys_array(struct btrfs_root *root)
6238 struct extent_buffer *sb; 6261 struct extent_buffer *sb;
6239 struct btrfs_disk_key *disk_key; 6262 struct btrfs_disk_key *disk_key;
6240 struct btrfs_chunk *chunk; 6263 struct btrfs_chunk *chunk;
6241 u8 *ptr; 6264 u8 *array_ptr;
6242 unsigned long sb_ptr; 6265 unsigned long sb_array_offset;
6243 int ret = 0; 6266 int ret = 0;
6244 u32 num_stripes; 6267 u32 num_stripes;
6245 u32 array_size; 6268 u32 array_size;
6246 u32 len = 0; 6269 u32 len = 0;
6247 u32 cur; 6270 u32 cur_offset;
6248 struct btrfs_key key; 6271 struct btrfs_key key;
6249 6272
6250 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, 6273 ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
6251 BTRFS_SUPER_INFO_SIZE); 6274 /*
6275 * This will create extent buffer of nodesize, superblock size is
6276 * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
6277 * overallocate but we can keep it as-is, only the first page is used.
6278 */
6279 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
6252 if (!sb) 6280 if (!sb)
6253 return -ENOMEM; 6281 return -ENOMEM;
6254 btrfs_set_buffer_uptodate(sb); 6282 btrfs_set_buffer_uptodate(sb);
@@ -6271,35 +6299,56 @@ int btrfs_read_sys_array(struct btrfs_root *root)
6271 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 6299 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
6272 array_size = btrfs_super_sys_array_size(super_copy); 6300 array_size = btrfs_super_sys_array_size(super_copy);
6273 6301
6274 ptr = super_copy->sys_chunk_array; 6302 array_ptr = super_copy->sys_chunk_array;
6275 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); 6303 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
6276 cur = 0; 6304 cur_offset = 0;
6305
6306 while (cur_offset < array_size) {
6307 disk_key = (struct btrfs_disk_key *)array_ptr;
6308 len = sizeof(*disk_key);
6309 if (cur_offset + len > array_size)
6310 goto out_short_read;
6277 6311
6278 while (cur < array_size) {
6279 disk_key = (struct btrfs_disk_key *)ptr;
6280 btrfs_disk_key_to_cpu(&key, disk_key); 6312 btrfs_disk_key_to_cpu(&key, disk_key);
6281 6313
6282 len = sizeof(*disk_key); ptr += len; 6314 array_ptr += len;
6283 sb_ptr += len; 6315 sb_array_offset += len;
6284 cur += len; 6316 cur_offset += len;
6285 6317
6286 if (key.type == BTRFS_CHUNK_ITEM_KEY) { 6318 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6287 chunk = (struct btrfs_chunk *)sb_ptr; 6319 chunk = (struct btrfs_chunk *)sb_array_offset;
6320 /*
6321 * At least one btrfs_chunk with one stripe must be
6322 * present, exact stripe count check comes afterwards
6323 */
6324 len = btrfs_chunk_item_size(1);
6325 if (cur_offset + len > array_size)
6326 goto out_short_read;
6327
6328 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6329 len = btrfs_chunk_item_size(num_stripes);
6330 if (cur_offset + len > array_size)
6331 goto out_short_read;
6332
6288 ret = read_one_chunk(root, &key, sb, chunk); 6333 ret = read_one_chunk(root, &key, sb, chunk);
6289 if (ret) 6334 if (ret)
6290 break; 6335 break;
6291 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6292 len = btrfs_chunk_item_size(num_stripes);
6293 } else { 6336 } else {
6294 ret = -EIO; 6337 ret = -EIO;
6295 break; 6338 break;
6296 } 6339 }
6297 ptr += len; 6340 array_ptr += len;
6298 sb_ptr += len; 6341 sb_array_offset += len;
6299 cur += len; 6342 cur_offset += len;
6300 } 6343 }
6301 free_extent_buffer(sb); 6344 free_extent_buffer(sb);
6302 return ret; 6345 return ret;
6346
6347out_short_read:
6348 printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
6349 len, cur_offset);
6350 free_extent_buffer(sb);
6351 return -EIO;
6303} 6352}
6304 6353
6305int btrfs_read_chunk_tree(struct btrfs_root *root) 6354int btrfs_read_chunk_tree(struct btrfs_root *root)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d6fe73c0f4a2..83069dec6898 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -295,8 +295,10 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
295#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) 295#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0)
296 296
297struct btrfs_bio { 297struct btrfs_bio {
298 atomic_t refs;
298 atomic_t stripes_pending; 299 atomic_t stripes_pending;
299 struct btrfs_fs_info *fs_info; 300 struct btrfs_fs_info *fs_info;
301 u64 map_type; /* get from map_lookup->type */
300 bio_end_io_t *end_io; 302 bio_end_io_t *end_io;
301 struct bio *orig_bio; 303 struct bio *orig_bio;
302 unsigned long flags; 304 unsigned long flags;
@@ -307,6 +309,12 @@ struct btrfs_bio {
307 int mirror_num; 309 int mirror_num;
308 int num_tgtdevs; 310 int num_tgtdevs;
309 int *tgtdev_map; 311 int *tgtdev_map;
312 /*
313 * logical block numbers for the start of each stripe
314 * The last one or two are p/q. These are sorted,
315 * so raid_map[0] is the start of our full stripe
316 */
317 u64 *raid_map;
310 struct btrfs_bio_stripe stripes[]; 318 struct btrfs_bio_stripe stripes[];
311}; 319};
312 320
@@ -388,19 +396,15 @@ struct btrfs_balance_control {
388 396
389int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 397int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
390 u64 end, u64 *length); 398 u64 end, u64 *length);
391 399void btrfs_get_bbio(struct btrfs_bio *bbio);
392#define btrfs_bio_size(total_stripes, real_stripes) \ 400void btrfs_put_bbio(struct btrfs_bio *bbio);
393 (sizeof(struct btrfs_bio) + \
394 (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \
395 (sizeof(int) * (real_stripes)))
396
397int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, 401int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
398 u64 logical, u64 *length, 402 u64 logical, u64 *length,
399 struct btrfs_bio **bbio_ret, int mirror_num); 403 struct btrfs_bio **bbio_ret, int mirror_num);
400int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, 404int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
401 u64 logical, u64 *length, 405 u64 logical, u64 *length,
402 struct btrfs_bio **bbio_ret, int mirror_num, 406 struct btrfs_bio **bbio_ret, int mirror_num,
403 u64 **raid_map_ret); 407 int need_raid_map);
404int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 408int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
405 u64 chunk_start, u64 physical, u64 devid, 409 u64 chunk_start, u64 physical, u64 devid,
406 u64 **logical, int *naddrs, int *stripe_len); 410 u64 **logical, int *naddrs, int *stripe_len);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..883b93623bc5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
111 name, name_len, -1); 111 name, name_len, -1);
112 if (!di && (flags & XATTR_REPLACE)) 112 if (!di && (flags & XATTR_REPLACE))
113 ret = -ENODATA; 113 ret = -ENODATA;
114 else if (IS_ERR(di))
115 ret = PTR_ERR(di);
114 else if (di) 116 else if (di)
115 ret = btrfs_delete_one_dir_name(trans, root, path, di); 117 ret = btrfs_delete_one_dir_name(trans, root, path, di);
116 goto out; 118 goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
127 ASSERT(mutex_is_locked(&inode->i_mutex)); 129 ASSERT(mutex_is_locked(&inode->i_mutex));
128 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), 130 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
129 name, name_len, 0); 131 name, name_len, 0);
130 if (!di) { 132 if (!di)
131 ret = -ENODATA; 133 ret = -ENODATA;
134 else if (IS_ERR(di))
135 ret = PTR_ERR(di);
136 if (ret)
132 goto out; 137 goto out;
133 }
134 btrfs_release_path(path); 138 btrfs_release_path(path);
135 di = NULL; 139 di = NULL;
136 } 140 }