aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 17:36:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 17:36:00 -0500
commit2b9fb532d4168e8974fe49709e2c4c8d5352a64c (patch)
tree610cbe2d1bb32e28db135a767f158ade31452e2e
parent4533f6e27a366ecc3da4876074ebfe0cc0ea4f0f (diff)
parenta742994aa2e271eb8cd8e043d276515ec858ed73 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This pull is mostly cleanups and fixes: - The raid5/6 cleanups from Zhao Lei fixup some long standing warts in the code and add improvements on top of the scrubbing support from 3.19. - Josef has round one of our ENOSPC fixes coming from large btrfs clusters here at FB. - Dave Sterba continues a long series of cleanups (thanks Dave), and Filipe continues hammering on corner cases in fsync and others This all was held up a little trying to track down a use-after-free in btrfs raid5/6. It's not clear yet if this is just made easier to trigger with this pull or if its a new bug from the raid5/6 cleanups. Dave Sterba is the only one to trigger it so far, but he has a consistent way to reproduce, so we'll get it nailed shortly" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits) Btrfs: don't remove extents and xattrs when logging new names Btrfs: fix fsync data loss after adding hard link to inode Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group Btrfs: account for large extents with enospc Btrfs: don't set and clear delalloc for O_DIRECT writes Btrfs: only adjust outstanding_extents when we do a short write btrfs: Fix out-of-space bug Btrfs: scrub, fix sleep in atomic context Btrfs: fix scheduler warning when syncing log Btrfs: Remove unnecessary placeholder in btrfs_err_code btrfs: cleanup init for list in free-space-cache btrfs: delete chunk allocation attemp when setting block group ro btrfs: clear bio reference after submit_one_bio() Btrfs: fix scrub race leading to use-after-free Btrfs: add missing cleanup on sysfs init failure Btrfs: fix race between transaction commit and empty block group removal btrfs: add more checks to btrfs_read_sys_array btrfs: cleanup, rename a few variables in btrfs_read_sys_array btrfs: add checks for sys_chunk_array sizes btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize ...
-rw-r--r--fs/btrfs/backref.c28
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c55
-rw-r--r--fs/btrfs/ctree.h39
-rw-r--r--fs/btrfs/delayed-inode.c38
-rw-r--r--fs/btrfs/dev-replace.c25
-rw-r--r--fs/btrfs/disk-io.c102
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/extent-tree.c250
-rw-r--r--fs/btrfs/extent_io.c87
-rw-r--r--fs/btrfs/extent_io.h65
-rw-r--r--fs/btrfs/free-space-cache.c13
-rw-r--r--fs/btrfs/inode-item.c9
-rw-r--r--fs/btrfs/inode.c156
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/raid56.c103
-rw-r--r--fs/btrfs/raid56.h11
-rw-r--r--fs/btrfs/reada.c19
-rw-r--r--fs/btrfs/relocation.c12
-rw-r--r--fs/btrfs/scrub.c309
-rw-r--r--fs/btrfs/send.c9
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/sysfs.c10
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c2
-rw-r--r--fs/btrfs/tests/extent-io-tests.c3
-rw-r--r--fs/btrfs/tests/inode-tests.c4
-rw-r--r--fs/btrfs/tests/qgroup-tests.c23
-rw-r--r--fs/btrfs/transaction.c27
-rw-r--r--fs/btrfs/transaction.h7
-rw-r--r--fs/btrfs/tree-log.c234
-rw-r--r--fs/btrfs/volumes.c242
-rw-r--r--fs/btrfs/volumes.h18
-rw-r--r--include/uapi/linux/btrfs.h3
34 files changed, 1063 insertions, 861 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8729cf68d2fe..f55721ff9385 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
1246 return ret; 1246 return ret;
1247} 1247}
1248 1248
1249/*
1250 * this makes the path point to (inum INODE_ITEM ioff)
1251 */
1252int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1253 struct btrfs_path *path)
1254{
1255 struct btrfs_key key;
1256 return btrfs_find_item(fs_root, path, inum, ioff,
1257 BTRFS_INODE_ITEM_KEY, &key);
1258}
1259
1260static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1261 struct btrfs_path *path,
1262 struct btrfs_key *found_key)
1263{
1264 return btrfs_find_item(fs_root, path, inum, ioff,
1265 BTRFS_INODE_REF_KEY, found_key);
1266}
1267
1268int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1249int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1269 u64 start_off, struct btrfs_path *path, 1250 u64 start_off, struct btrfs_path *path,
1270 struct btrfs_inode_extref **ret_extref, 1251 struct btrfs_inode_extref **ret_extref,
@@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1374 btrfs_tree_read_unlock_blocking(eb); 1355 btrfs_tree_read_unlock_blocking(eb);
1375 free_extent_buffer(eb); 1356 free_extent_buffer(eb);
1376 } 1357 }
1377 ret = inode_ref_info(parent, 0, fs_root, path, &found_key); 1358 ret = btrfs_find_item(fs_root, path, parent, 0,
1359 BTRFS_INODE_REF_KEY, &found_key);
1378 if (ret > 0) 1360 if (ret > 0)
1379 ret = -ENOENT; 1361 ret = -ENOENT;
1380 if (ret) 1362 if (ret)
@@ -1727,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1727 struct btrfs_key found_key; 1709 struct btrfs_key found_key;
1728 1710
1729 while (!ret) { 1711 while (!ret) {
1730 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, 1712 ret = btrfs_find_item(fs_root, path, inum,
1731 &found_key); 1713 parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
1714 &found_key);
1715
1732 if (ret < 0) 1716 if (ret < 0)
1733 break; 1717 break;
1734 if (ret) { 1718 if (ret) {
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 2a1ac6bfc724..9c41fbac3009 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -32,9 +32,6 @@ struct inode_fs_paths {
32typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, 32typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
33 void *ctx); 33 void *ctx);
34 34
35int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
36 struct btrfs_path *path);
37
38int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, 35int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
39 struct btrfs_path *path, struct btrfs_key *found_key, 36 struct btrfs_path *path, struct btrfs_key *found_key,
40 u64 *flags); 37 u64 *flags);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4aadadcfab20..de5e4f2adfea 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -185,6 +185,9 @@ struct btrfs_inode {
185 185
186 struct btrfs_delayed_node *delayed_node; 186 struct btrfs_delayed_node *delayed_node;
187 187
188 /* File creation time. */
189 struct timespec i_otime;
190
188 struct inode vfs_inode; 191 struct inode vfs_inode;
189}; 192};
190 193
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 14a72ed14ef7..993642199326 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
213 */ 213 */
214static void add_root_to_dirty_list(struct btrfs_root *root) 214static void add_root_to_dirty_list(struct btrfs_root *root)
215{ 215{
216 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
217 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
218 return;
219
216 spin_lock(&root->fs_info->trans_lock); 220 spin_lock(&root->fs_info->trans_lock);
217 if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) && 221 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
218 list_empty(&root->dirty_list)) { 222 /* Want the extent tree to be the last on the list */
219 list_add(&root->dirty_list, 223 if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
220 &root->fs_info->dirty_cowonly_roots); 224 list_move_tail(&root->dirty_list,
225 &root->fs_info->dirty_cowonly_roots);
226 else
227 list_move(&root->dirty_list,
228 &root->fs_info->dirty_cowonly_roots);
221 } 229 }
222 spin_unlock(&root->fs_info->trans_lock); 230 spin_unlock(&root->fs_info->trans_lock);
223} 231}
@@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1363 1371
1364 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1372 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1365 BUG_ON(tm->slot != 0); 1373 BUG_ON(tm->slot != 0);
1366 eb_rewin = alloc_dummy_extent_buffer(eb->start, 1374 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
1367 fs_info->tree_root->nodesize);
1368 if (!eb_rewin) { 1375 if (!eb_rewin) {
1369 btrfs_tree_read_unlock_blocking(eb); 1376 btrfs_tree_read_unlock_blocking(eb);
1370 free_extent_buffer(eb); 1377 free_extent_buffer(eb);
@@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1444 } else if (old_root) { 1451 } else if (old_root) {
1445 btrfs_tree_read_unlock(eb_root); 1452 btrfs_tree_read_unlock(eb_root);
1446 free_extent_buffer(eb_root); 1453 free_extent_buffer(eb_root);
1447 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1454 eb = alloc_dummy_extent_buffer(root->fs_info, logical);
1448 } else { 1455 } else {
1449 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); 1456 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
1450 eb = btrfs_clone_extent_buffer(eb_root); 1457 eb = btrfs_clone_extent_buffer(eb_root);
@@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root,
2282 if ((search <= target && target - search <= 65536) || 2289 if ((search <= target && target - search <= 65536) ||
2283 (search > target && search - target <= 65536)) { 2290 (search > target && search - target <= 65536)) {
2284 gen = btrfs_node_ptr_generation(node, nr); 2291 gen = btrfs_node_ptr_generation(node, nr);
2285 readahead_tree_block(root, search, blocksize); 2292 readahead_tree_block(root, search);
2286 nread += blocksize; 2293 nread += blocksize;
2287 } 2294 }
2288 nscan++; 2295 nscan++;
@@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2301 u64 gen; 2308 u64 gen;
2302 u64 block1 = 0; 2309 u64 block1 = 0;
2303 u64 block2 = 0; 2310 u64 block2 = 0;
2304 int blocksize;
2305 2311
2306 parent = path->nodes[level + 1]; 2312 parent = path->nodes[level + 1];
2307 if (!parent) 2313 if (!parent)
@@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2309 2315
2310 nritems = btrfs_header_nritems(parent); 2316 nritems = btrfs_header_nritems(parent);
2311 slot = path->slots[level + 1]; 2317 slot = path->slots[level + 1];
2312 blocksize = root->nodesize;
2313 2318
2314 if (slot > 0) { 2319 if (slot > 0) {
2315 block1 = btrfs_node_blockptr(parent, slot - 1); 2320 block1 = btrfs_node_blockptr(parent, slot - 1);
@@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2334 } 2339 }
2335 2340
2336 if (block1) 2341 if (block1)
2337 readahead_tree_block(root, block1, blocksize); 2342 readahead_tree_block(root, block1);
2338 if (block2) 2343 if (block2)
2339 readahead_tree_block(root, block2, blocksize); 2344 readahead_tree_block(root, block2);
2340} 2345}
2341 2346
2342 2347
@@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2609 return 0; 2614 return 0;
2610} 2615}
2611 2616
2612int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, 2617int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
2613 u64 iobjectid, u64 ioff, u8 key_type, 2618 u64 iobjectid, u64 ioff, u8 key_type,
2614 struct btrfs_key *found_key) 2619 struct btrfs_key *found_key)
2615{ 2620{
2616 int ret; 2621 int ret;
2617 struct btrfs_key key; 2622 struct btrfs_key key;
2618 struct extent_buffer *eb; 2623 struct extent_buffer *eb;
2619 struct btrfs_path *path; 2624
2625 ASSERT(path);
2626 ASSERT(found_key);
2620 2627
2621 key.type = key_type; 2628 key.type = key_type;
2622 key.objectid = iobjectid; 2629 key.objectid = iobjectid;
2623 key.offset = ioff; 2630 key.offset = ioff;
2624 2631
2625 if (found_path == NULL) {
2626 path = btrfs_alloc_path();
2627 if (!path)
2628 return -ENOMEM;
2629 } else
2630 path = found_path;
2631
2632 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); 2632 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
2633 if ((ret < 0) || (found_key == NULL)) { 2633 if (ret < 0)
2634 if (path != found_path)
2635 btrfs_free_path(path);
2636 return ret; 2634 return ret;
2637 }
2638 2635
2639 eb = path->nodes[0]; 2636 eb = path->nodes[0];
2640 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { 2637 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
@@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3383 add_root_to_dirty_list(root); 3380 add_root_to_dirty_list(root);
3384 extent_buffer_get(c); 3381 extent_buffer_get(c);
3385 path->nodes[level] = c; 3382 path->nodes[level] = c;
3386 path->locks[level] = BTRFS_WRITE_LOCK; 3383 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
3387 path->slots[level] = 0; 3384 path->slots[level] = 0;
3388 return 0; 3385 return 0;
3389} 3386}
@@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4356 path->search_for_split = 1; 4353 path->search_for_split = 1;
4357 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 4354 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4358 path->search_for_split = 0; 4355 path->search_for_split = 0;
4356 if (ret > 0)
4357 ret = -EAGAIN;
4359 if (ret < 0) 4358 if (ret < 0)
4360 goto err; 4359 goto err;
4361 4360
4362 ret = -EAGAIN; 4361 ret = -EAGAIN;
4363 leaf = path->nodes[0]; 4362 leaf = path->nodes[0];
4364 /* if our item isn't there or got smaller, return now */ 4363 /* if our item isn't there, return now */
4365 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 4364 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
4366 goto err; 4365 goto err;
4367 4366
4368 /* the leaf has changed, it now has room. return now */ 4367 /* the leaf has changed, it now has room. return now */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0b180708bf79..84c3b00f3de8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
198 198
199#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) 199#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
200 200
201#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
202
201/* 203/*
202 * The key defines the order in the tree, and so it also defines (optimal) 204 * The key defines the order in the tree, and so it also defines (optimal)
203 * block layout. 205 * block layout.
@@ -1020,6 +1022,9 @@ enum btrfs_raid_types {
1020 BTRFS_BLOCK_GROUP_RAID6 | \ 1022 BTRFS_BLOCK_GROUP_RAID6 | \
1021 BTRFS_BLOCK_GROUP_DUP | \ 1023 BTRFS_BLOCK_GROUP_DUP | \
1022 BTRFS_BLOCK_GROUP_RAID10) 1024 BTRFS_BLOCK_GROUP_RAID10)
1025#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
1026 BTRFS_BLOCK_GROUP_RAID6)
1027
1023/* 1028/*
1024 * We need a bit for restriper to be able to tell when chunks of type 1029 * We need a bit for restriper to be able to tell when chunks of type
1025 * SINGLE are available. This "extended" profile format is used in 1030 * SINGLE are available. This "extended" profile format is used in
@@ -1239,7 +1244,6 @@ enum btrfs_disk_cache_state {
1239 BTRFS_DC_ERROR = 1, 1244 BTRFS_DC_ERROR = 1,
1240 BTRFS_DC_CLEAR = 2, 1245 BTRFS_DC_CLEAR = 2,
1241 BTRFS_DC_SETUP = 3, 1246 BTRFS_DC_SETUP = 3,
1242 BTRFS_DC_NEED_WRITE = 4,
1243}; 1247};
1244 1248
1245struct btrfs_caching_control { 1249struct btrfs_caching_control {
@@ -1277,7 +1281,6 @@ struct btrfs_block_group_cache {
1277 unsigned long full_stripe_len; 1281 unsigned long full_stripe_len;
1278 1282
1279 unsigned int ro:1; 1283 unsigned int ro:1;
1280 unsigned int dirty:1;
1281 unsigned int iref:1; 1284 unsigned int iref:1;
1282 unsigned int has_caching_ctl:1; 1285 unsigned int has_caching_ctl:1;
1283 unsigned int removed:1; 1286 unsigned int removed:1;
@@ -1315,6 +1318,9 @@ struct btrfs_block_group_cache {
1315 struct list_head ro_list; 1318 struct list_head ro_list;
1316 1319
1317 atomic_t trimming; 1320 atomic_t trimming;
1321
1322 /* For dirty block groups */
1323 struct list_head dirty_list;
1318}; 1324};
1319 1325
1320/* delayed seq elem */ 1326/* delayed seq elem */
@@ -1741,6 +1747,7 @@ struct btrfs_fs_info {
1741 1747
1742 spinlock_t unused_bgs_lock; 1748 spinlock_t unused_bgs_lock;
1743 struct list_head unused_bgs; 1749 struct list_head unused_bgs;
1750 struct mutex unused_bg_unpin_mutex;
1744 1751
1745 /* For btrfs to record security options */ 1752 /* For btrfs to record security options */
1746 struct security_mnt_opts security_opts; 1753 struct security_mnt_opts security_opts;
@@ -1776,6 +1783,7 @@ struct btrfs_subvolume_writers {
1776#define BTRFS_ROOT_DEFRAG_RUNNING 6 1783#define BTRFS_ROOT_DEFRAG_RUNNING 6
1777#define BTRFS_ROOT_FORCE_COW 7 1784#define BTRFS_ROOT_FORCE_COW 7
1778#define BTRFS_ROOT_MULTI_LOG_TASKS 8 1785#define BTRFS_ROOT_MULTI_LOG_TASKS 8
1786#define BTRFS_ROOT_DIRTY 9
1779 1787
1780/* 1788/*
1781 * in ram representation of the tree. extent_root is used for all allocations 1789 * in ram representation of the tree. extent_root is used for all allocations
@@ -1794,8 +1802,6 @@ struct btrfs_root {
1794 struct btrfs_fs_info *fs_info; 1802 struct btrfs_fs_info *fs_info;
1795 struct extent_io_tree dirty_log_pages; 1803 struct extent_io_tree dirty_log_pages;
1796 1804
1797 struct kobject root_kobj;
1798 struct completion kobj_unregister;
1799 struct mutex objectid_mutex; 1805 struct mutex objectid_mutex;
1800 1806
1801 spinlock_t accounting_lock; 1807 spinlock_t accounting_lock;
@@ -2465,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
2465BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); 2471BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
2466BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); 2472BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
2467BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); 2473BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
2468
2469static inline struct btrfs_timespec *
2470btrfs_inode_atime(struct btrfs_inode_item *inode_item)
2471{
2472 unsigned long ptr = (unsigned long)inode_item;
2473 ptr += offsetof(struct btrfs_inode_item, atime);
2474 return (struct btrfs_timespec *)ptr;
2475}
2476
2477static inline struct btrfs_timespec *
2478btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
2479{
2480 unsigned long ptr = (unsigned long)inode_item;
2481 ptr += offsetof(struct btrfs_inode_item, mtime);
2482 return (struct btrfs_timespec *)ptr;
2483}
2484
2485static inline struct btrfs_timespec *
2486btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
2487{
2488 unsigned long ptr = (unsigned long)inode_item;
2489 ptr += offsetof(struct btrfs_inode_item, ctime);
2490 return (struct btrfs_timespec *)ptr;
2491}
2492
2493BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 2474BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
2494BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 2475BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
2495BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); 2476BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index de4e70fb3cbb..82f0c7c95474 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1755 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1755 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1756 btrfs_set_stack_inode_block_group(inode_item, 0); 1756 btrfs_set_stack_inode_block_group(inode_item, 0);
1757 1757
1758 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), 1758 btrfs_set_stack_timespec_sec(&inode_item->atime,
1759 inode->i_atime.tv_sec); 1759 inode->i_atime.tv_sec);
1760 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item), 1760 btrfs_set_stack_timespec_nsec(&inode_item->atime,
1761 inode->i_atime.tv_nsec); 1761 inode->i_atime.tv_nsec);
1762 1762
1763 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item), 1763 btrfs_set_stack_timespec_sec(&inode_item->mtime,
1764 inode->i_mtime.tv_sec); 1764 inode->i_mtime.tv_sec);
1765 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item), 1765 btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1766 inode->i_mtime.tv_nsec); 1766 inode->i_mtime.tv_nsec);
1767 1767
1768 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item), 1768 btrfs_set_stack_timespec_sec(&inode_item->ctime,
1769 inode->i_ctime.tv_sec); 1769 inode->i_ctime.tv_sec);
1770 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item), 1770 btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1771 inode->i_ctime.tv_nsec); 1771 inode->i_ctime.tv_nsec);
1772
1773 btrfs_set_stack_timespec_sec(&inode_item->otime,
1774 BTRFS_I(inode)->i_otime.tv_sec);
1775 btrfs_set_stack_timespec_nsec(&inode_item->otime,
1776 BTRFS_I(inode)->i_otime.tv_nsec);
1772} 1777}
1773 1778
1774int btrfs_fill_inode(struct inode *inode, u32 *rdev) 1779int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1775{ 1780{
1776 struct btrfs_delayed_node *delayed_node; 1781 struct btrfs_delayed_node *delayed_node;
1777 struct btrfs_inode_item *inode_item; 1782 struct btrfs_inode_item *inode_item;
1778 struct btrfs_timespec *tspec;
1779 1783
1780 delayed_node = btrfs_get_delayed_node(inode); 1784 delayed_node = btrfs_get_delayed_node(inode);
1781 if (!delayed_node) 1785 if (!delayed_node)
@@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1802 *rdev = btrfs_stack_inode_rdev(inode_item); 1806 *rdev = btrfs_stack_inode_rdev(inode_item);
1803 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); 1807 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1804 1808
1805 tspec = btrfs_inode_atime(inode_item); 1809 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1806 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); 1810 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1807 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); 1811
1812 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1813 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1808 1814
1809 tspec = btrfs_inode_mtime(inode_item); 1815 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1810 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); 1816 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1811 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1812 1817
1813 tspec = btrfs_inode_ctime(inode_item); 1818 BTRFS_I(inode)->i_otime.tv_sec =
1814 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); 1819 btrfs_stack_timespec_sec(&inode_item->otime);
1815 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); 1820 BTRFS_I(inode)->i_otime.tv_nsec =
1821 btrfs_stack_timespec_nsec(&inode_item->otime);
1816 1822
1817 inode->i_generation = BTRFS_I(inode)->generation; 1823 inode->i_generation = BTRFS_I(inode)->generation;
1818 BTRFS_I(inode)->index_cnt = (u64)-1; 1824 BTRFS_I(inode)->index_cnt = (u64)-1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ca6a3a3b6b6c..5ec03d999c37 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -440,18 +440,9 @@ leave:
440 */ 440 */
441static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) 441static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
442{ 442{
443 s64 writers;
444 DEFINE_WAIT(wait);
445
446 set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); 443 set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
447 do { 444 wait_event(fs_info->replace_wait, !percpu_counter_sum(
448 prepare_to_wait(&fs_info->replace_wait, &wait, 445 &fs_info->bio_counter));
449 TASK_UNINTERRUPTIBLE);
450 writers = percpu_counter_sum(&fs_info->bio_counter);
451 if (writers)
452 schedule();
453 finish_wait(&fs_info->replace_wait, &wait);
454 } while (writers);
455} 446}
456 447
457/* 448/*
@@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
932 923
933void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) 924void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
934{ 925{
935 DEFINE_WAIT(wait); 926 while (1) {
936again: 927 percpu_counter_inc(&fs_info->bio_counter);
937 percpu_counter_inc(&fs_info->bio_counter); 928 if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
938 if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { 929 &fs_info->fs_state)))
930 break;
931
939 btrfs_bio_counter_dec(fs_info); 932 btrfs_bio_counter_dec(fs_info);
940 wait_event(fs_info->replace_wait, 933 wait_event(fs_info->replace_wait,
941 !test_bit(BTRFS_FS_STATE_DEV_REPLACING, 934 !test_bit(BTRFS_FS_STATE_DEV_REPLACING,
942 &fs_info->fs_state)); 935 &fs_info->fs_state));
943 goto again;
944 } 936 }
945
946} 937}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1afb18226da8..f79f38542a73 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
318 memcpy(&found, result, csum_size); 318 memcpy(&found, result, csum_size);
319 319
320 read_extent_buffer(buf, &val, 0, csum_size); 320 read_extent_buffer(buf, &val, 0, csum_size);
321 printk_ratelimited(KERN_INFO 321 printk_ratelimited(KERN_WARNING
322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X " 322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
323 "level %d\n", 323 "level %d\n",
324 root->fs_info->sb->s_id, buf->start, 324 root->fs_info->sb->s_id, buf->start,
@@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
367 ret = 0; 367 ret = 0;
368 goto out; 368 goto out;
369 } 369 }
370 printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", 370 printk_ratelimited(KERN_ERR
371 "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
371 eb->fs_info->sb->s_id, eb->start, 372 eb->fs_info->sb->s_id, eb->start,
372 parent_transid, btrfs_header_generation(eb)); 373 parent_transid, btrfs_header_generation(eb));
373 ret = 1; 374 ret = 1;
@@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
633 634
634 found_start = btrfs_header_bytenr(eb); 635 found_start = btrfs_header_bytenr(eb);
635 if (found_start != eb->start) { 636 if (found_start != eb->start) {
636 printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " 637 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
637 "%llu %llu\n", 638 "%llu %llu\n",
638 eb->fs_info->sb->s_id, found_start, eb->start); 639 eb->fs_info->sb->s_id, found_start, eb->start);
639 ret = -EIO; 640 ret = -EIO;
640 goto err; 641 goto err;
641 } 642 }
642 if (check_tree_block_fsid(root, eb)) { 643 if (check_tree_block_fsid(root, eb)) {
643 printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", 644 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
644 eb->fs_info->sb->s_id, eb->start); 645 eb->fs_info->sb->s_id, eb->start);
645 ret = -EIO; 646 ret = -EIO;
646 goto err; 647 goto err;
647 } 648 }
648 found_level = btrfs_header_level(eb); 649 found_level = btrfs_header_level(eb);
649 if (found_level >= BTRFS_MAX_LEVEL) { 650 if (found_level >= BTRFS_MAX_LEVEL) {
650 btrfs_info(root->fs_info, "bad tree block level %d", 651 btrfs_err(root->fs_info, "bad tree block level %d",
651 (int)btrfs_header_level(eb)); 652 (int)btrfs_header_level(eb));
652 ret = -EIO; 653 ret = -EIO;
653 goto err; 654 goto err;
@@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
1073 .set_page_dirty = btree_set_page_dirty, 1074 .set_page_dirty = btree_set_page_dirty,
1074}; 1075};
1075 1076
1076void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) 1077void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
1077{ 1078{
1078 struct extent_buffer *buf = NULL; 1079 struct extent_buffer *buf = NULL;
1079 struct inode *btree_inode = root->fs_info->btree_inode; 1080 struct inode *btree_inode = root->fs_info->btree_inode;
1080 1081
1081 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 1082 buf = btrfs_find_create_tree_block(root, bytenr);
1082 if (!buf) 1083 if (!buf)
1083 return; 1084 return;
1084 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 1085 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
1086 free_extent_buffer(buf); 1087 free_extent_buffer(buf);
1087} 1088}
1088 1089
1089int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, 1090int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
1090 int mirror_num, struct extent_buffer **eb) 1091 int mirror_num, struct extent_buffer **eb)
1091{ 1092{
1092 struct extent_buffer *buf = NULL; 1093 struct extent_buffer *buf = NULL;
@@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1094 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; 1095 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
1095 int ret; 1096 int ret;
1096 1097
1097 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 1098 buf = btrfs_find_create_tree_block(root, bytenr);
1098 if (!buf) 1099 if (!buf)
1099 return 0; 1100 return 0;
1100 1101
@@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
1125} 1126}
1126 1127
1127struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1128struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1128 u64 bytenr, u32 blocksize) 1129 u64 bytenr)
1129{ 1130{
1130 if (btrfs_test_is_dummy_root(root)) 1131 if (btrfs_test_is_dummy_root(root))
1131 return alloc_test_extent_buffer(root->fs_info, bytenr, 1132 return alloc_test_extent_buffer(root->fs_info, bytenr);
1132 blocksize); 1133 return alloc_extent_buffer(root->fs_info, bytenr);
1133 return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
1134} 1134}
1135 1135
1136 1136
@@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1152 struct extent_buffer *buf = NULL; 1152 struct extent_buffer *buf = NULL;
1153 int ret; 1153 int ret;
1154 1154
1155 buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); 1155 buf = btrfs_find_create_tree_block(root, bytenr);
1156 if (!buf) 1156 if (!buf)
1157 return NULL; 1157 return NULL;
1158 1158
@@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
1275 memset(&root->root_key, 0, sizeof(root->root_key)); 1275 memset(&root->root_key, 0, sizeof(root->root_key));
1276 memset(&root->root_item, 0, sizeof(root->root_item)); 1276 memset(&root->root_item, 0, sizeof(root->root_item));
1277 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 1277 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
1278 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
1279 if (fs_info) 1278 if (fs_info)
1280 root->defrag_trans_start = fs_info->generation; 1279 root->defrag_trans_start = fs_info->generation;
1281 else 1280 else
1282 root->defrag_trans_start = 0; 1281 root->defrag_trans_start = 0;
1283 init_completion(&root->kobj_unregister);
1284 root->root_key.objectid = objectid; 1282 root->root_key.objectid = objectid;
1285 root->anon_dev = 0; 1283 root->anon_dev = 0;
1286 1284
@@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
1630 bool check_ref) 1628 bool check_ref)
1631{ 1629{
1632 struct btrfs_root *root; 1630 struct btrfs_root *root;
1631 struct btrfs_path *path;
1632 struct btrfs_key key;
1633 int ret; 1633 int ret;
1634 1634
1635 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) 1635 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@@ -1669,8 +1669,17 @@ again:
1669 if (ret) 1669 if (ret)
1670 goto fail; 1670 goto fail;
1671 1671
1672 ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID, 1672 path = btrfs_alloc_path();
1673 location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL); 1673 if (!path) {
1674 ret = -ENOMEM;
1675 goto fail;
1676 }
1677 key.objectid = BTRFS_ORPHAN_OBJECTID;
1678 key.type = BTRFS_ORPHAN_ITEM_KEY;
1679 key.offset = location->objectid;
1680
1681 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
1682 btrfs_free_path(path);
1674 if (ret < 0) 1683 if (ret < 0)
1675 goto fail; 1684 goto fail;
1676 if (ret == 0) 1685 if (ret == 0)
@@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
2232 spin_lock_init(&fs_info->qgroup_op_lock); 2241 spin_lock_init(&fs_info->qgroup_op_lock);
2233 spin_lock_init(&fs_info->buffer_lock); 2242 spin_lock_init(&fs_info->buffer_lock);
2234 spin_lock_init(&fs_info->unused_bgs_lock); 2243 spin_lock_init(&fs_info->unused_bgs_lock);
2244 mutex_init(&fs_info->unused_bg_unpin_mutex);
2235 rwlock_init(&fs_info->tree_mod_log_lock); 2245 rwlock_init(&fs_info->tree_mod_log_lock);
2236 mutex_init(&fs_info->reloc_mutex); 2246 mutex_init(&fs_info->reloc_mutex);
2237 mutex_init(&fs_info->delalloc_root_mutex); 2247 mutex_init(&fs_info->delalloc_root_mutex);
@@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
2496 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2506 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2497 2507
2498 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2508 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2499 printk(KERN_ERR "BTRFS: has skinny extents\n"); 2509 printk(KERN_INFO "BTRFS: has skinny extents\n");
2500 2510
2501 /* 2511 /*
2502 * flag our filesystem as having big metadata blocks if 2512 * flag our filesystem as having big metadata blocks if
@@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
2520 */ 2530 */
2521 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2531 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2522 (sectorsize != nodesize)) { 2532 (sectorsize != nodesize)) {
2523 printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " 2533 printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
2524 "are not allowed for mixed block groups on %s\n", 2534 "are not allowed for mixed block groups on %s\n",
2525 sb->s_id); 2535 sb->s_id);
2526 goto fail_alloc; 2536 goto fail_alloc;
@@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
2628 sb->s_blocksize_bits = blksize_bits(sectorsize); 2638 sb->s_blocksize_bits = blksize_bits(sectorsize);
2629 2639
2630 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { 2640 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2631 printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id); 2641 printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
2632 goto fail_sb_buffer; 2642 goto fail_sb_buffer;
2633 } 2643 }
2634 2644
2635 if (sectorsize != PAGE_SIZE) { 2645 if (sectorsize != PAGE_SIZE) {
2636 printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) " 2646 printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
2637 "found on %s\n", (unsigned long)sectorsize, sb->s_id); 2647 "found on %s\n", (unsigned long)sectorsize, sb->s_id);
2638 goto fail_sb_buffer; 2648 goto fail_sb_buffer;
2639 } 2649 }
@@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
2642 ret = btrfs_read_sys_array(tree_root); 2652 ret = btrfs_read_sys_array(tree_root);
2643 mutex_unlock(&fs_info->chunk_mutex); 2653 mutex_unlock(&fs_info->chunk_mutex);
2644 if (ret) { 2654 if (ret) {
2645 printk(KERN_WARNING "BTRFS: failed to read the system " 2655 printk(KERN_ERR "BTRFS: failed to read the system "
2646 "array on %s\n", sb->s_id); 2656 "array on %s\n", sb->s_id);
2647 goto fail_sb_buffer; 2657 goto fail_sb_buffer;
2648 } 2658 }
@@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
2657 generation); 2667 generation);
2658 if (!chunk_root->node || 2668 if (!chunk_root->node ||
2659 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2669 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2660 printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", 2670 printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
2661 sb->s_id); 2671 sb->s_id);
2662 goto fail_tree_roots; 2672 goto fail_tree_roots;
2663 } 2673 }
@@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
2669 2679
2670 ret = btrfs_read_chunk_tree(chunk_root); 2680 ret = btrfs_read_chunk_tree(chunk_root);
2671 if (ret) { 2681 if (ret) {
2672 printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n", 2682 printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
2673 sb->s_id); 2683 sb->s_id);
2674 goto fail_tree_roots; 2684 goto fail_tree_roots;
2675 } 2685 }
@@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
2681 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2691 btrfs_close_extra_devices(fs_info, fs_devices, 0);
2682 2692
2683 if (!fs_devices->latest_bdev) { 2693 if (!fs_devices->latest_bdev) {
2684 printk(KERN_CRIT "BTRFS: failed to read devices on %s\n", 2694 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
2685 sb->s_id); 2695 sb->s_id);
2686 goto fail_tree_roots; 2696 goto fail_tree_roots;
2687 } 2697 }
@@ -2765,7 +2775,7 @@ retry_root_backup:
2765 2775
2766 ret = btrfs_recover_balance(fs_info); 2776 ret = btrfs_recover_balance(fs_info);
2767 if (ret) { 2777 if (ret) {
2768 printk(KERN_WARNING "BTRFS: failed to recover balance\n"); 2778 printk(KERN_ERR "BTRFS: failed to recover balance\n");
2769 goto fail_block_groups; 2779 goto fail_block_groups;
2770 } 2780 }
2771 2781
@@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3860 printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", 3870 printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
3861 btrfs_super_log_root(sb)); 3871 btrfs_super_log_root(sb));
3862 3872
3873 /*
3874 * Check the lower bound, the alignment and other constraints are
3875 * checked later.
3876 */
3877 if (btrfs_super_nodesize(sb) < 4096) {
3878 printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
3879 btrfs_super_nodesize(sb));
3880 ret = -EINVAL;
3881 }
3882 if (btrfs_super_sectorsize(sb) < 4096) {
3883 printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
3884 btrfs_super_sectorsize(sb));
3885 ret = -EINVAL;
3886 }
3887
3863 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { 3888 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
3864 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", 3889 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
3865 fs_info->fsid, sb->dev_item.fsid); 3890 fs_info->fsid, sb->dev_item.fsid);
@@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3873 if (btrfs_super_num_devices(sb) > (1UL << 31)) 3898 if (btrfs_super_num_devices(sb) > (1UL << 31))
3874 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", 3899 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
3875 btrfs_super_num_devices(sb)); 3900 btrfs_super_num_devices(sb));
3901 if (btrfs_super_num_devices(sb) == 0) {
3902 printk(KERN_ERR "BTRFS: number of devices is 0\n");
3903 ret = -EINVAL;
3904 }
3876 3905
3877 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { 3906 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
3878 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", 3907 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@@ -3881,6 +3910,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3881 } 3910 }
3882 3911
3883 /* 3912 /*
3913 * Obvious sys_chunk_array corruptions, it must hold at least one key
3914 * and one chunk
3915 */
3916 if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
3917 printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
3918 btrfs_super_sys_array_size(sb),
3919 BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
3920 ret = -EINVAL;
3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk));
3928 ret = -EINVAL;
3929 }
3930
3931 /*
3884 * The generation is a global counter, we'll trust it more than the others 3932 * The generation is a global counter, we'll trust it more than the others
3885 * but it's still possible that it's the one that's wrong. 3933 * but it's still possible that it's the one that's wrong.
3886 */ 3934 */
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 414651821fb3..27d44c0fd236 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -46,11 +46,11 @@ struct btrfs_fs_devices;
46 46
47struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, 47struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
48 u64 parent_transid); 48 u64 parent_transid);
49void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); 49void readahead_tree_block(struct btrfs_root *root, u64 bytenr);
50int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, 50int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
51 int mirror_num, struct extent_buffer **eb); 51 int mirror_num, struct extent_buffer **eb);
52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
53 u64 bytenr, u32 blocksize); 53 u64 bytenr);
54void clean_tree_block(struct btrfs_trans_handle *trans, 54void clean_tree_block(struct btrfs_trans_handle *trans,
55 struct btrfs_root *root, struct extent_buffer *buf); 55 struct btrfs_root *root, struct extent_buffer *buf);
56int open_ctree(struct super_block *sb, 56int open_ctree(struct super_block *sb,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a684086c3c81..571f402d3fc4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -74,8 +74,9 @@ enum {
74 RESERVE_ALLOC_NO_ACCOUNT = 2, 74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75}; 75};
76 76
77static int update_block_group(struct btrfs_root *root, 77static int update_block_group(struct btrfs_trans_handle *trans,
78 u64 bytenr, u64 num_bytes, int alloc); 78 struct btrfs_root *root, u64 bytenr,
79 u64 num_bytes, int alloc);
79static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 80static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 struct btrfs_root *root, 81 struct btrfs_root *root,
81 u64 bytenr, u64 num_bytes, u64 parent, 82 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1925 */ 1926 */
1926 ret = 0; 1927 ret = 0;
1927 } 1928 }
1928 kfree(bbio); 1929 btrfs_put_bbio(bbio);
1929 } 1930 }
1930 1931
1931 if (actual_bytes) 1932 if (actual_bytes)
@@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2768 struct btrfs_delayed_ref_head *head; 2769 struct btrfs_delayed_ref_head *head;
2769 int ret; 2770 int ret;
2770 int run_all = count == (unsigned long)-1; 2771 int run_all = count == (unsigned long)-1;
2771 int run_most = 0;
2772 2772
2773 /* We'll clean this up in btrfs_cleanup_transaction */ 2773 /* We'll clean this up in btrfs_cleanup_transaction */
2774 if (trans->aborted) 2774 if (trans->aborted)
@@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2778 root = root->fs_info->tree_root; 2778 root = root->fs_info->tree_root;
2779 2779
2780 delayed_refs = &trans->transaction->delayed_refs; 2780 delayed_refs = &trans->transaction->delayed_refs;
2781 if (count == 0) { 2781 if (count == 0)
2782 count = atomic_read(&delayed_refs->num_entries) * 2; 2782 count = atomic_read(&delayed_refs->num_entries) * 2;
2783 run_most = 1;
2784 }
2785 2783
2786again: 2784again:
2787#ifdef SCRAMBLE_DELAYED_REFS 2785#ifdef SCRAMBLE_DELAYED_REFS
@@ -3315,120 +3313,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3315 struct btrfs_root *root) 3313 struct btrfs_root *root)
3316{ 3314{
3317 struct btrfs_block_group_cache *cache; 3315 struct btrfs_block_group_cache *cache;
3318 int err = 0; 3316 struct btrfs_transaction *cur_trans = trans->transaction;
3317 int ret = 0;
3319 struct btrfs_path *path; 3318 struct btrfs_path *path;
3320 u64 last = 0; 3319
3320 if (list_empty(&cur_trans->dirty_bgs))
3321 return 0;
3321 3322
3322 path = btrfs_alloc_path(); 3323 path = btrfs_alloc_path();
3323 if (!path) 3324 if (!path)
3324 return -ENOMEM; 3325 return -ENOMEM;
3325 3326
3326again: 3327 /*
3327 while (1) { 3328 * We don't need the lock here since we are protected by the transaction
3328 cache = btrfs_lookup_first_block_group(root->fs_info, last); 3329 * commit. We want to do the cache_save_setup first and then run the
3329 while (cache) { 3330 * delayed refs to make sure we have the best chance at doing this all
3330 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 3331 * in one shot.
3331 break; 3332 */
3332 cache = next_block_group(root, cache); 3333 while (!list_empty(&cur_trans->dirty_bgs)) {
3333 } 3334 cache = list_first_entry(&cur_trans->dirty_bgs,
3334 if (!cache) { 3335 struct btrfs_block_group_cache,
3335 if (last == 0) 3336 dirty_list);
3336 break; 3337 list_del_init(&cache->dirty_list);
3337 last = 0; 3338 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3338 continue; 3339 cache_save_setup(cache, trans, path);
3339 } 3340 if (!ret)
3340 err = cache_save_setup(cache, trans, path); 3341 ret = btrfs_run_delayed_refs(trans, root,
3341 last = cache->key.objectid + cache->key.offset; 3342 (unsigned long) -1);
3342 btrfs_put_block_group(cache); 3343 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
3343 } 3344 btrfs_write_out_cache(root, trans, cache, path);
3344 3345 if (!ret)
3345 while (1) { 3346 ret = write_one_cache_group(trans, root, path, cache);
3346 if (last == 0) {
3347 err = btrfs_run_delayed_refs(trans, root,
3348 (unsigned long)-1);
3349 if (err) /* File system offline */
3350 goto out;
3351 }
3352
3353 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3354 while (cache) {
3355 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
3356 btrfs_put_block_group(cache);
3357 goto again;
3358 }
3359
3360 if (cache->dirty)
3361 break;
3362 cache = next_block_group(root, cache);
3363 }
3364 if (!cache) {
3365 if (last == 0)
3366 break;
3367 last = 0;
3368 continue;
3369 }
3370
3371 if (cache->disk_cache_state == BTRFS_DC_SETUP)
3372 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
3373 cache->dirty = 0;
3374 last = cache->key.objectid + cache->key.offset;
3375
3376 err = write_one_cache_group(trans, root, path, cache);
3377 btrfs_put_block_group(cache);
3378 if (err) /* File system offline */
3379 goto out;
3380 }
3381
3382 while (1) {
3383 /*
3384 * I don't think this is needed since we're just marking our
3385 * preallocated extent as written, but just in case it can't
3386 * hurt.
3387 */
3388 if (last == 0) {
3389 err = btrfs_run_delayed_refs(trans, root,
3390 (unsigned long)-1);
3391 if (err) /* File system offline */
3392 goto out;
3393 }
3394
3395 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3396 while (cache) {
3397 /*
3398 * Really this shouldn't happen, but it could if we
3399 * couldn't write the entire preallocated extent and
3400 * splitting the extent resulted in a new block.
3401 */
3402 if (cache->dirty) {
3403 btrfs_put_block_group(cache);
3404 goto again;
3405 }
3406 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3407 break;
3408 cache = next_block_group(root, cache);
3409 }
3410 if (!cache) {
3411 if (last == 0)
3412 break;
3413 last = 0;
3414 continue;
3415 }
3416
3417 err = btrfs_write_out_cache(root, trans, cache, path);
3418
3419 /*
3420 * If we didn't have an error then the cache state is still
3421 * NEED_WRITE, so we can set it to WRITTEN.
3422 */
3423 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3424 cache->disk_cache_state = BTRFS_DC_WRITTEN;
3425 last = cache->key.objectid + cache->key.offset;
3426 btrfs_put_block_group(cache); 3347 btrfs_put_block_group(cache);
3427 } 3348 }
3428out:
3429 3349
3430 btrfs_free_path(path); 3350 btrfs_free_path(path);
3431 return err; 3351 return ret;
3432} 3352}
3433 3353
3434int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 3354int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -5043,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5043/** 4963/**
5044 * drop_outstanding_extent - drop an outstanding extent 4964 * drop_outstanding_extent - drop an outstanding extent
5045 * @inode: the inode we're dropping the extent for 4965 * @inode: the inode we're dropping the extent for
4966 * @num_bytes: the number of bytes we're relaseing.
5046 * 4967 *
5047 * This is called when we are freeing up an outstanding extent, either called 4968 * This is called when we are freeing up an outstanding extent, either called
5048 * after an error or after an extent is written. This will return the number of 4969 * after an error or after an extent is written. This will return the number of
5049 * reserved extents that need to be freed. This must be called with 4970 * reserved extents that need to be freed. This must be called with
5050 * BTRFS_I(inode)->lock held. 4971 * BTRFS_I(inode)->lock held.
5051 */ 4972 */
5052static unsigned drop_outstanding_extent(struct inode *inode) 4973static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
5053{ 4974{
5054 unsigned drop_inode_space = 0; 4975 unsigned drop_inode_space = 0;
5055 unsigned dropped_extents = 0; 4976 unsigned dropped_extents = 0;
4977 unsigned num_extents = 0;
5056 4978
5057 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 4979 num_extents = (unsigned)div64_u64(num_bytes +
5058 BTRFS_I(inode)->outstanding_extents--; 4980 BTRFS_MAX_EXTENT_SIZE - 1,
4981 BTRFS_MAX_EXTENT_SIZE);
4982 ASSERT(num_extents);
4983 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
4984 BTRFS_I(inode)->outstanding_extents -= num_extents;
5059 4985
5060 if (BTRFS_I(inode)->outstanding_extents == 0 && 4986 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5061 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, 4987 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@@ -5226,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5226 5152
5227out_fail: 5153out_fail:
5228 spin_lock(&BTRFS_I(inode)->lock); 5154 spin_lock(&BTRFS_I(inode)->lock);
5229 dropped = drop_outstanding_extent(inode); 5155 dropped = drop_outstanding_extent(inode, num_bytes);
5230 /* 5156 /*
5231 * If the inodes csum_bytes is the same as the original 5157 * If the inodes csum_bytes is the same as the original
5232 * csum_bytes then we know we haven't raced with any free()ers 5158 * csum_bytes then we know we haven't raced with any free()ers
@@ -5305,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5305 5231
5306 num_bytes = ALIGN(num_bytes, root->sectorsize); 5232 num_bytes = ALIGN(num_bytes, root->sectorsize);
5307 spin_lock(&BTRFS_I(inode)->lock); 5233 spin_lock(&BTRFS_I(inode)->lock);
5308 dropped = drop_outstanding_extent(inode); 5234 dropped = drop_outstanding_extent(inode, num_bytes);
5309 5235
5310 if (num_bytes) 5236 if (num_bytes)
5311 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 5237 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
@@ -5375,8 +5301,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5375 btrfs_free_reserved_data_space(inode, num_bytes); 5301 btrfs_free_reserved_data_space(inode, num_bytes);
5376} 5302}
5377 5303
5378static int update_block_group(struct btrfs_root *root, 5304static int update_block_group(struct btrfs_trans_handle *trans,
5379 u64 bytenr, u64 num_bytes, int alloc) 5305 struct btrfs_root *root, u64 bytenr,
5306 u64 num_bytes, int alloc)
5380{ 5307{
5381 struct btrfs_block_group_cache *cache = NULL; 5308 struct btrfs_block_group_cache *cache = NULL;
5382 struct btrfs_fs_info *info = root->fs_info; 5309 struct btrfs_fs_info *info = root->fs_info;
@@ -5414,6 +5341,14 @@ static int update_block_group(struct btrfs_root *root,
5414 if (!alloc && cache->cached == BTRFS_CACHE_NO) 5341 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5415 cache_block_group(cache, 1); 5342 cache_block_group(cache, 1);
5416 5343
5344 spin_lock(&trans->transaction->dirty_bgs_lock);
5345 if (list_empty(&cache->dirty_list)) {
5346 list_add_tail(&cache->dirty_list,
5347 &trans->transaction->dirty_bgs);
5348 btrfs_get_block_group(cache);
5349 }
5350 spin_unlock(&trans->transaction->dirty_bgs_lock);
5351
5417 byte_in_group = bytenr - cache->key.objectid; 5352 byte_in_group = bytenr - cache->key.objectid;
5418 WARN_ON(byte_in_group > cache->key.offset); 5353 WARN_ON(byte_in_group > cache->key.offset);
5419 5354
@@ -5424,7 +5359,6 @@ static int update_block_group(struct btrfs_root *root,
5424 cache->disk_cache_state < BTRFS_DC_CLEAR) 5359 cache->disk_cache_state < BTRFS_DC_CLEAR)
5425 cache->disk_cache_state = BTRFS_DC_CLEAR; 5360 cache->disk_cache_state = BTRFS_DC_CLEAR;
5426 5361
5427 cache->dirty = 1;
5428 old_val = btrfs_block_group_used(&cache->item); 5362 old_val = btrfs_block_group_used(&cache->item);
5429 num_bytes = min(total, cache->key.offset - byte_in_group); 5363 num_bytes = min(total, cache->key.offset - byte_in_group);
5430 if (alloc) { 5364 if (alloc) {
@@ -5807,10 +5741,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5807 unpin = &fs_info->freed_extents[0]; 5741 unpin = &fs_info->freed_extents[0];
5808 5742
5809 while (1) { 5743 while (1) {
5744 mutex_lock(&fs_info->unused_bg_unpin_mutex);
5810 ret = find_first_extent_bit(unpin, 0, &start, &end, 5745 ret = find_first_extent_bit(unpin, 0, &start, &end,
5811 EXTENT_DIRTY, NULL); 5746 EXTENT_DIRTY, NULL);
5812 if (ret) 5747 if (ret) {
5748 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
5813 break; 5749 break;
5750 }
5814 5751
5815 if (btrfs_test_opt(root, DISCARD)) 5752 if (btrfs_test_opt(root, DISCARD))
5816 ret = btrfs_discard_extent(root, start, 5753 ret = btrfs_discard_extent(root, start,
@@ -5818,6 +5755,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5818 5755
5819 clear_extent_dirty(unpin, start, end, GFP_NOFS); 5756 clear_extent_dirty(unpin, start, end, GFP_NOFS);
5820 unpin_extent_range(root, start, end, true); 5757 unpin_extent_range(root, start, end, true);
5758 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
5821 cond_resched(); 5759 cond_resched();
5822 } 5760 }
5823 5761
@@ -6103,7 +6041,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6103 } 6041 }
6104 } 6042 }
6105 6043
6106 ret = update_block_group(root, bytenr, num_bytes, 0); 6044 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
6107 if (ret) { 6045 if (ret) {
6108 btrfs_abort_transaction(trans, extent_root, ret); 6046 btrfs_abort_transaction(trans, extent_root, ret);
6109 goto out; 6047 goto out;
@@ -6205,7 +6143,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6205 struct extent_buffer *buf, 6143 struct extent_buffer *buf,
6206 u64 parent, int last_ref) 6144 u64 parent, int last_ref)
6207{ 6145{
6208 struct btrfs_block_group_cache *cache = NULL;
6209 int pin = 1; 6146 int pin = 1;
6210 int ret; 6147 int ret;
6211 6148
@@ -6221,17 +6158,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6221 if (!last_ref) 6158 if (!last_ref)
6222 return; 6159 return;
6223 6160
6224 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6225
6226 if (btrfs_header_generation(buf) == trans->transid) { 6161 if (btrfs_header_generation(buf) == trans->transid) {
6162 struct btrfs_block_group_cache *cache;
6163
6227 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 6164 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6228 ret = check_ref_cleanup(trans, root, buf->start); 6165 ret = check_ref_cleanup(trans, root, buf->start);
6229 if (!ret) 6166 if (!ret)
6230 goto out; 6167 goto out;
6231 } 6168 }
6232 6169
6170 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6171
6233 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 6172 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6234 pin_down_extent(root, cache, buf->start, buf->len, 1); 6173 pin_down_extent(root, cache, buf->start, buf->len, 1);
6174 btrfs_put_block_group(cache);
6235 goto out; 6175 goto out;
6236 } 6176 }
6237 6177
@@ -6239,6 +6179,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6239 6179
6240 btrfs_add_free_space(cache, buf->start, buf->len); 6180 btrfs_add_free_space(cache, buf->start, buf->len);
6241 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); 6181 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6182 btrfs_put_block_group(cache);
6242 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 6183 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6243 pin = 0; 6184 pin = 0;
6244 } 6185 }
@@ -6253,7 +6194,6 @@ out:
6253 * anymore. 6194 * anymore.
6254 */ 6195 */
6255 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 6196 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
6256 btrfs_put_block_group(cache);
6257} 6197}
6258 6198
6259/* Can return -ENOMEM */ 6199/* Can return -ENOMEM */
@@ -7063,7 +7003,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7063 if (ret) 7003 if (ret)
7064 return ret; 7004 return ret;
7065 7005
7066 ret = update_block_group(root, ins->objectid, ins->offset, 1); 7006 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
7067 if (ret) { /* -ENOENT, logic error */ 7007 if (ret) { /* -ENOENT, logic error */
7068 btrfs_err(fs_info, "update block group failed for %llu %llu", 7008 btrfs_err(fs_info, "update block group failed for %llu %llu",
7069 ins->objectid, ins->offset); 7009 ins->objectid, ins->offset);
@@ -7152,7 +7092,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7152 return ret; 7092 return ret;
7153 } 7093 }
7154 7094
7155 ret = update_block_group(root, ins->objectid, root->nodesize, 1); 7095 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7096 1);
7156 if (ret) { /* -ENOENT, logic error */ 7097 if (ret) { /* -ENOENT, logic error */
7157 btrfs_err(fs_info, "update block group failed for %llu %llu", 7098 btrfs_err(fs_info, "update block group failed for %llu %llu",
7158 ins->objectid, ins->offset); 7099 ins->objectid, ins->offset);
@@ -7217,11 +7158,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7217 7158
7218static struct extent_buffer * 7159static struct extent_buffer *
7219btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, 7160btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7220 u64 bytenr, u32 blocksize, int level) 7161 u64 bytenr, int level)
7221{ 7162{
7222 struct extent_buffer *buf; 7163 struct extent_buffer *buf;
7223 7164
7224 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 7165 buf = btrfs_find_create_tree_block(root, bytenr);
7225 if (!buf) 7166 if (!buf)
7226 return ERR_PTR(-ENOMEM); 7167 return ERR_PTR(-ENOMEM);
7227 btrfs_set_header_generation(buf, trans->transid); 7168 btrfs_set_header_generation(buf, trans->transid);
@@ -7340,7 +7281,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7340 7281
7341 if (btrfs_test_is_dummy_root(root)) { 7282 if (btrfs_test_is_dummy_root(root)) {
7342 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, 7283 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
7343 blocksize, level); 7284 level);
7344 if (!IS_ERR(buf)) 7285 if (!IS_ERR(buf))
7345 root->alloc_bytenr += blocksize; 7286 root->alloc_bytenr += blocksize;
7346 return buf; 7287 return buf;
@@ -7357,8 +7298,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7357 return ERR_PTR(ret); 7298 return ERR_PTR(ret);
7358 } 7299 }
7359 7300
7360 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 7301 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
7361 blocksize, level);
7362 BUG_ON(IS_ERR(buf)); /* -ENOMEM */ 7302 BUG_ON(IS_ERR(buf)); /* -ENOMEM */
7363 7303
7364 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 7304 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
@@ -7487,7 +7427,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
7487 continue; 7427 continue;
7488 } 7428 }
7489reada: 7429reada:
7490 readahead_tree_block(root, bytenr, blocksize); 7430 readahead_tree_block(root, bytenr);
7491 nread++; 7431 nread++;
7492 } 7432 }
7493 wc->reada_slot = slot; 7433 wc->reada_slot = slot;
@@ -7828,7 +7768,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7828 7768
7829 next = btrfs_find_tree_block(root, bytenr); 7769 next = btrfs_find_tree_block(root, bytenr);
7830 if (!next) { 7770 if (!next) {
7831 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 7771 next = btrfs_find_create_tree_block(root, bytenr);
7832 if (!next) 7772 if (!next)
7833 return -ENOMEM; 7773 return -ENOMEM;
7834 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, 7774 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
@@ -8548,14 +8488,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8548 if (IS_ERR(trans)) 8488 if (IS_ERR(trans))
8549 return PTR_ERR(trans); 8489 return PTR_ERR(trans);
8550 8490
8551 alloc_flags = update_block_group_flags(root, cache->flags);
8552 if (alloc_flags != cache->flags) {
8553 ret = do_chunk_alloc(trans, root, alloc_flags,
8554 CHUNK_ALLOC_FORCE);
8555 if (ret < 0)
8556 goto out;
8557 }
8558
8559 ret = set_block_group_ro(cache, 0); 8491 ret = set_block_group_ro(cache, 0);
8560 if (!ret) 8492 if (!ret)
8561 goto out; 8493 goto out;
@@ -8566,6 +8498,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8566 goto out; 8498 goto out;
8567 ret = set_block_group_ro(cache, 0); 8499 ret = set_block_group_ro(cache, 0);
8568out: 8500out:
8501 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
8502 alloc_flags = update_block_group_flags(root, cache->flags);
8503 check_system_chunk(trans, root, alloc_flags);
8504 }
8505
8569 btrfs_end_transaction(trans, root); 8506 btrfs_end_transaction(trans, root);
8570 return ret; 8507 return ret;
8571} 8508}
@@ -9005,6 +8942,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9005 INIT_LIST_HEAD(&cache->cluster_list); 8942 INIT_LIST_HEAD(&cache->cluster_list);
9006 INIT_LIST_HEAD(&cache->bg_list); 8943 INIT_LIST_HEAD(&cache->bg_list);
9007 INIT_LIST_HEAD(&cache->ro_list); 8944 INIT_LIST_HEAD(&cache->ro_list);
8945 INIT_LIST_HEAD(&cache->dirty_list);
9008 btrfs_init_free_space_ctl(cache); 8946 btrfs_init_free_space_ctl(cache);
9009 atomic_set(&cache->trimming, 0); 8947 atomic_set(&cache->trimming, 0);
9010 8948
@@ -9068,9 +9006,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
9068 * b) Setting 'dirty flag' makes sure that we flush 9006 * b) Setting 'dirty flag' makes sure that we flush
9069 * the new space cache info onto disk. 9007 * the new space cache info onto disk.
9070 */ 9008 */
9071 cache->disk_cache_state = BTRFS_DC_CLEAR;
9072 if (btrfs_test_opt(root, SPACE_CACHE)) 9009 if (btrfs_test_opt(root, SPACE_CACHE))
9073 cache->dirty = 1; 9010 cache->disk_cache_state = BTRFS_DC_CLEAR;
9074 } 9011 }
9075 9012
9076 read_extent_buffer(leaf, &cache->item, 9013 read_extent_buffer(leaf, &cache->item,
@@ -9460,6 +9397,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9460 } 9397 }
9461 } 9398 }
9462 9399
9400 spin_lock(&trans->transaction->dirty_bgs_lock);
9401 if (!list_empty(&block_group->dirty_list)) {
9402 list_del_init(&block_group->dirty_list);
9403 btrfs_put_block_group(block_group);
9404 }
9405 spin_unlock(&trans->transaction->dirty_bgs_lock);
9406
9463 btrfs_remove_free_space_cache(block_group); 9407 btrfs_remove_free_space_cache(block_group);
9464 9408
9465 spin_lock(&block_group->space_info->lock); 9409 spin_lock(&block_group->space_info->lock);
@@ -9611,7 +9555,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9611 * Want to do this before we do anything else so we can recover 9555 * Want to do this before we do anything else so we can recover
9612 * properly if we fail to join the transaction. 9556 * properly if we fail to join the transaction.
9613 */ 9557 */
9614 trans = btrfs_join_transaction(root); 9558 /* 1 for btrfs_orphan_reserve_metadata() */
9559 trans = btrfs_start_transaction(root, 1);
9615 if (IS_ERR(trans)) { 9560 if (IS_ERR(trans)) {
9616 btrfs_set_block_group_rw(root, block_group); 9561 btrfs_set_block_group_rw(root, block_group);
9617 ret = PTR_ERR(trans); 9562 ret = PTR_ERR(trans);
@@ -9624,18 +9569,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9624 */ 9569 */
9625 start = block_group->key.objectid; 9570 start = block_group->key.objectid;
9626 end = start + block_group->key.offset - 1; 9571 end = start + block_group->key.offset - 1;
9572 /*
9573 * Hold the unused_bg_unpin_mutex lock to avoid racing with
9574 * btrfs_finish_extent_commit(). If we are at transaction N,
9575 * another task might be running finish_extent_commit() for the
9576 * previous transaction N - 1, and have seen a range belonging
9577 * to the block group in freed_extents[] before we were able to
9578 * clear the whole block group range from freed_extents[]. This
9579 * means that task can lookup for the block group after we
9580 * unpinned it from freed_extents[] and removed it, leading to
9581 * a BUG_ON() at btrfs_unpin_extent_range().
9582 */
9583 mutex_lock(&fs_info->unused_bg_unpin_mutex);
9627 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, 9584 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
9628 EXTENT_DIRTY, GFP_NOFS); 9585 EXTENT_DIRTY, GFP_NOFS);
9629 if (ret) { 9586 if (ret) {
9587 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9630 btrfs_set_block_group_rw(root, block_group); 9588 btrfs_set_block_group_rw(root, block_group);
9631 goto end_trans; 9589 goto end_trans;
9632 } 9590 }
9633 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, 9591 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
9634 EXTENT_DIRTY, GFP_NOFS); 9592 EXTENT_DIRTY, GFP_NOFS);
9635 if (ret) { 9593 if (ret) {
9594 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9636 btrfs_set_block_group_rw(root, block_group); 9595 btrfs_set_block_group_rw(root, block_group);
9637 goto end_trans; 9596 goto end_trans;
9638 } 9597 }
9598 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9639 9599
9640 /* Reset pinned so btrfs_put_block_group doesn't complain */ 9600 /* Reset pinned so btrfs_put_block_group doesn't complain */
9641 block_group->pinned = 0; 9601 block_group->pinned = 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c73df6a7c9b6..c7233ff1d533 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void)
64 64
65 while (!list_empty(&states)) { 65 while (!list_empty(&states)) {
66 state = list_entry(states.next, struct extent_state, leak_list); 66 state = list_entry(states.next, struct extent_state, leak_list);
67 pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", 67 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
68 state->start, state->end, state->state, 68 state->start, state->end, state->state,
69 extent_state_in_tree(state), 69 extent_state_in_tree(state),
70 atomic_read(&state->refs)); 70 atomic_read(&state->refs));
@@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree,
396} 396}
397 397
398static void set_state_cb(struct extent_io_tree *tree, 398static void set_state_cb(struct extent_io_tree *tree,
399 struct extent_state *state, unsigned long *bits) 399 struct extent_state *state, unsigned *bits)
400{ 400{
401 if (tree->ops && tree->ops->set_bit_hook) 401 if (tree->ops && tree->ops->set_bit_hook)
402 tree->ops->set_bit_hook(tree->mapping->host, state, bits); 402 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
403} 403}
404 404
405static void clear_state_cb(struct extent_io_tree *tree, 405static void clear_state_cb(struct extent_io_tree *tree,
406 struct extent_state *state, unsigned long *bits) 406 struct extent_state *state, unsigned *bits)
407{ 407{
408 if (tree->ops && tree->ops->clear_bit_hook) 408 if (tree->ops && tree->ops->clear_bit_hook)
409 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 409 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
410} 410}
411 411
412static void set_state_bits(struct extent_io_tree *tree, 412static void set_state_bits(struct extent_io_tree *tree,
413 struct extent_state *state, unsigned long *bits); 413 struct extent_state *state, unsigned *bits);
414 414
415/* 415/*
416 * insert an extent_state struct into the tree. 'bits' are set on the 416 * insert an extent_state struct into the tree. 'bits' are set on the
@@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree,
426 struct extent_state *state, u64 start, u64 end, 426 struct extent_state *state, u64 start, u64 end,
427 struct rb_node ***p, 427 struct rb_node ***p,
428 struct rb_node **parent, 428 struct rb_node **parent,
429 unsigned long *bits) 429 unsigned *bits)
430{ 430{
431 struct rb_node *node; 431 struct rb_node *node;
432 432
@@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state)
511 */ 511 */
512static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 512static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
513 struct extent_state *state, 513 struct extent_state *state,
514 unsigned long *bits, int wake) 514 unsigned *bits, int wake)
515{ 515{
516 struct extent_state *next; 516 struct extent_state *next;
517 unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS; 517 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
518 518
519 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 519 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
520 u64 range = state->end - state->start + 1; 520 u64 range = state->end - state->start + 1;
@@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
570 * This takes the tree lock, and returns 0 on success and < 0 on error. 570 * This takes the tree lock, and returns 0 on success and < 0 on error.
571 */ 571 */
572int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 572int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
573 unsigned long bits, int wake, int delete, 573 unsigned bits, int wake, int delete,
574 struct extent_state **cached_state, 574 struct extent_state **cached_state,
575 gfp_t mask) 575 gfp_t mask)
576{ 576{
@@ -789,9 +789,9 @@ out:
789 789
790static void set_state_bits(struct extent_io_tree *tree, 790static void set_state_bits(struct extent_io_tree *tree,
791 struct extent_state *state, 791 struct extent_state *state,
792 unsigned long *bits) 792 unsigned *bits)
793{ 793{
794 unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS; 794 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
795 795
796 set_state_cb(tree, state, bits); 796 set_state_cb(tree, state, bits);
797 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 797 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree,
803 803
804static void cache_state_if_flags(struct extent_state *state, 804static void cache_state_if_flags(struct extent_state *state,
805 struct extent_state **cached_ptr, 805 struct extent_state **cached_ptr,
806 const u64 flags) 806 unsigned flags)
807{ 807{
808 if (cached_ptr && !(*cached_ptr)) { 808 if (cached_ptr && !(*cached_ptr)) {
809 if (!flags || (state->state & flags)) { 809 if (!flags || (state->state & flags)) {
@@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state,
833 833
834static int __must_check 834static int __must_check
835__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 835__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
836 unsigned long bits, unsigned long exclusive_bits, 836 unsigned bits, unsigned exclusive_bits,
837 u64 *failed_start, struct extent_state **cached_state, 837 u64 *failed_start, struct extent_state **cached_state,
838 gfp_t mask) 838 gfp_t mask)
839{ 839{
@@ -1034,7 +1034,7 @@ search_again:
1034} 1034}
1035 1035
1036int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1036int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1037 unsigned long bits, u64 * failed_start, 1037 unsigned bits, u64 * failed_start,
1038 struct extent_state **cached_state, gfp_t mask) 1038 struct extent_state **cached_state, gfp_t mask)
1039{ 1039{
1040 return __set_extent_bit(tree, start, end, bits, 0, failed_start, 1040 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
@@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1060 * boundary bits like LOCK. 1060 * boundary bits like LOCK.
1061 */ 1061 */
1062int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1062int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1063 unsigned long bits, unsigned long clear_bits, 1063 unsigned bits, unsigned clear_bits,
1064 struct extent_state **cached_state, gfp_t mask) 1064 struct extent_state **cached_state, gfp_t mask)
1065{ 1065{
1066 struct extent_state *state; 1066 struct extent_state *state;
@@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1268} 1268}
1269 1269
1270int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1270int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1271 unsigned long bits, gfp_t mask) 1271 unsigned bits, gfp_t mask)
1272{ 1272{
1273 return set_extent_bit(tree, start, end, bits, NULL, 1273 return set_extent_bit(tree, start, end, bits, NULL,
1274 NULL, mask); 1274 NULL, mask);
1275} 1275}
1276 1276
1277int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1277int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1278 unsigned long bits, gfp_t mask) 1278 unsigned bits, gfp_t mask)
1279{ 1279{
1280 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); 1280 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
1281} 1281}
@@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1330 * us if waiting is desired. 1330 * us if waiting is desired.
1331 */ 1331 */
1332int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1332int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1333 unsigned long bits, struct extent_state **cached_state) 1333 unsigned bits, struct extent_state **cached_state)
1334{ 1334{
1335 int err; 1335 int err;
1336 u64 failed_start; 1336 u64 failed_start;
1337
1337 while (1) { 1338 while (1) {
1338 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, 1339 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
1339 EXTENT_LOCKED, &failed_start, 1340 EXTENT_LOCKED, &failed_start,
@@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1440 */ 1441 */
1441static struct extent_state * 1442static struct extent_state *
1442find_first_extent_bit_state(struct extent_io_tree *tree, 1443find_first_extent_bit_state(struct extent_io_tree *tree,
1443 u64 start, unsigned long bits) 1444 u64 start, unsigned bits)
1444{ 1445{
1445 struct rb_node *node; 1446 struct rb_node *node;
1446 struct extent_state *state; 1447 struct extent_state *state;
@@ -1474,7 +1475,7 @@ out:
1474 * If nothing was found, 1 is returned. If found something, return 0. 1475 * If nothing was found, 1 is returned. If found something, return 0.
1475 */ 1476 */
1476int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1477int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1477 u64 *start_ret, u64 *end_ret, unsigned long bits, 1478 u64 *start_ret, u64 *end_ret, unsigned bits,
1478 struct extent_state **cached_state) 1479 struct extent_state **cached_state)
1479{ 1480{
1480 struct extent_state *state; 1481 struct extent_state *state;
@@ -1753,7 +1754,7 @@ out_failed:
1753 1754
1754int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, 1755int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1755 struct page *locked_page, 1756 struct page *locked_page,
1756 unsigned long clear_bits, 1757 unsigned clear_bits,
1757 unsigned long page_ops) 1758 unsigned long page_ops)
1758{ 1759{
1759 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 1760 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1810 */ 1811 */
1811u64 count_range_bits(struct extent_io_tree *tree, 1812u64 count_range_bits(struct extent_io_tree *tree,
1812 u64 *start, u64 search_end, u64 max_bytes, 1813 u64 *start, u64 search_end, u64 max_bytes,
1813 unsigned long bits, int contig) 1814 unsigned bits, int contig)
1814{ 1815{
1815 struct rb_node *node; 1816 struct rb_node *node;
1816 struct extent_state *state; 1817 struct extent_state *state;
@@ -1928,7 +1929,7 @@ out:
1928 * range is found set. 1929 * range is found set.
1929 */ 1930 */
1930int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1931int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1931 unsigned long bits, int filled, struct extent_state *cached) 1932 unsigned bits, int filled, struct extent_state *cached)
1932{ 1933{
1933 struct extent_state *state = NULL; 1934 struct extent_state *state = NULL;
1934 struct rb_node *node; 1935 struct rb_node *node;
@@ -2057,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
2057 sector = bbio->stripes[mirror_num-1].physical >> 9; 2058 sector = bbio->stripes[mirror_num-1].physical >> 9;
2058 bio->bi_iter.bi_sector = sector; 2059 bio->bi_iter.bi_sector = sector;
2059 dev = bbio->stripes[mirror_num-1].dev; 2060 dev = bbio->stripes[mirror_num-1].dev;
2060 kfree(bbio); 2061 btrfs_put_bbio(bbio);
2061 if (!dev || !dev->bdev || !dev->writeable) { 2062 if (!dev || !dev->bdev || !dev->writeable) {
2062 bio_put(bio); 2063 bio_put(bio);
2063 return -EIO; 2064 return -EIO;
@@ -2816,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2816 bio_add_page(bio, page, page_size, offset) < page_size) { 2817 bio_add_page(bio, page, page_size, offset) < page_size) {
2817 ret = submit_one_bio(rw, bio, mirror_num, 2818 ret = submit_one_bio(rw, bio, mirror_num,
2818 prev_bio_flags); 2819 prev_bio_flags);
2819 if (ret < 0) 2820 if (ret < 0) {
2821 *bio_ret = NULL;
2820 return ret; 2822 return ret;
2823 }
2821 bio = NULL; 2824 bio = NULL;
2822 } else { 2825 } else {
2823 return 0; 2826 return 0;
@@ -3239,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
3239 page, 3242 page,
3240 &delalloc_start, 3243 &delalloc_start,
3241 &delalloc_end, 3244 &delalloc_end,
3242 128 * 1024 * 1024); 3245 BTRFS_MAX_EXTENT_SIZE);
3243 if (nr_delalloc == 0) { 3246 if (nr_delalloc == 0) {
3244 delalloc_start = delalloc_end + 1; 3247 delalloc_start = delalloc_end + 1;
3245 continue; 3248 continue;
@@ -4598,11 +4601,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4598 4601
4599static struct extent_buffer * 4602static struct extent_buffer *
4600__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, 4603__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4601 unsigned long len, gfp_t mask) 4604 unsigned long len)
4602{ 4605{
4603 struct extent_buffer *eb = NULL; 4606 struct extent_buffer *eb = NULL;
4604 4607
4605 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 4608 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
4606 if (eb == NULL) 4609 if (eb == NULL)
4607 return NULL; 4610 return NULL;
4608 eb->start = start; 4611 eb->start = start;
@@ -4643,7 +4646,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4643 struct extent_buffer *new; 4646 struct extent_buffer *new;
4644 unsigned long num_pages = num_extent_pages(src->start, src->len); 4647 unsigned long num_pages = num_extent_pages(src->start, src->len);
4645 4648
4646 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); 4649 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4647 if (new == NULL) 4650 if (new == NULL)
4648 return NULL; 4651 return NULL;
4649 4652
@@ -4666,13 +4669,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4666 return new; 4669 return new;
4667} 4670}
4668 4671
4669struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) 4672struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4673 u64 start)
4670{ 4674{
4671 struct extent_buffer *eb; 4675 struct extent_buffer *eb;
4672 unsigned long num_pages = num_extent_pages(0, len); 4676 unsigned long len;
4677 unsigned long num_pages;
4673 unsigned long i; 4678 unsigned long i;
4674 4679
4675 eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); 4680 if (!fs_info) {
4681 /*
4682 * Called only from tests that don't always have a fs_info
4683 * available, but we know that nodesize is 4096
4684 */
4685 len = 4096;
4686 } else {
4687 len = fs_info->tree_root->nodesize;
4688 }
4689 num_pages = num_extent_pages(0, len);
4690
4691 eb = __alloc_extent_buffer(fs_info, start, len);
4676 if (!eb) 4692 if (!eb)
4677 return NULL; 4693 return NULL;
4678 4694
@@ -4762,7 +4778,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4762 4778
4763#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4779#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4764struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 4780struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4765 u64 start, unsigned long len) 4781 u64 start)
4766{ 4782{
4767 struct extent_buffer *eb, *exists = NULL; 4783 struct extent_buffer *eb, *exists = NULL;
4768 int ret; 4784 int ret;
@@ -4770,7 +4786,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4770 eb = find_extent_buffer(fs_info, start); 4786 eb = find_extent_buffer(fs_info, start);
4771 if (eb) 4787 if (eb)
4772 return eb; 4788 return eb;
4773 eb = alloc_dummy_extent_buffer(start, len); 4789 eb = alloc_dummy_extent_buffer(fs_info, start);
4774 if (!eb) 4790 if (!eb)
4775 return NULL; 4791 return NULL;
4776 eb->fs_info = fs_info; 4792 eb->fs_info = fs_info;
@@ -4808,8 +4824,9 @@ free_eb:
4808#endif 4824#endif
4809 4825
4810struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 4826struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4811 u64 start, unsigned long len) 4827 u64 start)
4812{ 4828{
4829 unsigned long len = fs_info->tree_root->nodesize;
4813 unsigned long num_pages = num_extent_pages(start, len); 4830 unsigned long num_pages = num_extent_pages(start, len);
4814 unsigned long i; 4831 unsigned long i;
4815 unsigned long index = start >> PAGE_CACHE_SHIFT; 4832 unsigned long index = start >> PAGE_CACHE_SHIFT;
@@ -4824,7 +4841,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4824 if (eb) 4841 if (eb)
4825 return eb; 4842 return eb;
4826 4843
4827 eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS); 4844 eb = __alloc_extent_buffer(fs_info, start, len);
4828 if (!eb) 4845 if (!eb)
4829 return NULL; 4846 return NULL;
4830 4847
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index ece9ce87edff..695b0ccfb755 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -4,22 +4,22 @@
4#include <linux/rbtree.h> 4#include <linux/rbtree.h>
5 5
6/* bits for the extent state */ 6/* bits for the extent state */
7#define EXTENT_DIRTY 1 7#define EXTENT_DIRTY (1U << 0)
8#define EXTENT_WRITEBACK (1 << 1) 8#define EXTENT_WRITEBACK (1U << 1)
9#define EXTENT_UPTODATE (1 << 2) 9#define EXTENT_UPTODATE (1U << 2)
10#define EXTENT_LOCKED (1 << 3) 10#define EXTENT_LOCKED (1U << 3)
11#define EXTENT_NEW (1 << 4) 11#define EXTENT_NEW (1U << 4)
12#define EXTENT_DELALLOC (1 << 5) 12#define EXTENT_DELALLOC (1U << 5)
13#define EXTENT_DEFRAG (1 << 6) 13#define EXTENT_DEFRAG (1U << 6)
14#define EXTENT_BOUNDARY (1 << 9) 14#define EXTENT_BOUNDARY (1U << 9)
15#define EXTENT_NODATASUM (1 << 10) 15#define EXTENT_NODATASUM (1U << 10)
16#define EXTENT_DO_ACCOUNTING (1 << 11) 16#define EXTENT_DO_ACCOUNTING (1U << 11)
17#define EXTENT_FIRST_DELALLOC (1 << 12) 17#define EXTENT_FIRST_DELALLOC (1U << 12)
18#define EXTENT_NEED_WAIT (1 << 13) 18#define EXTENT_NEED_WAIT (1U << 13)
19#define EXTENT_DAMAGED (1 << 14) 19#define EXTENT_DAMAGED (1U << 14)
20#define EXTENT_NORESERVE (1 << 15) 20#define EXTENT_NORESERVE (1U << 15)
21#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 21#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
22#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 22#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
23 23
24/* 24/*
25 * flags for bio submission. The high bits indicate the compression 25 * flags for bio submission. The high bits indicate the compression
@@ -81,9 +81,9 @@ struct extent_io_ops {
81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
82 struct extent_state *state, int uptodate); 82 struct extent_state *state, int uptodate);
83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
84 unsigned long *bits); 84 unsigned *bits);
85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
86 unsigned long *bits); 86 unsigned *bits);
87 void (*merge_extent_hook)(struct inode *inode, 87 void (*merge_extent_hook)(struct inode *inode,
88 struct extent_state *new, 88 struct extent_state *new,
89 struct extent_state *other); 89 struct extent_state *other);
@@ -108,7 +108,7 @@ struct extent_state {
108 /* ADD NEW ELEMENTS AFTER THIS */ 108 /* ADD NEW ELEMENTS AFTER THIS */
109 wait_queue_head_t wq; 109 wait_queue_head_t wq;
110 atomic_t refs; 110 atomic_t refs;
111 unsigned long state; 111 unsigned state;
112 112
113 /* for use by the FS */ 113 /* for use by the FS */
114 u64 private; 114 u64 private;
@@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
188int try_release_extent_buffer(struct page *page); 188int try_release_extent_buffer(struct page *page);
189int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 189int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
190int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 190int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
191 unsigned long bits, struct extent_state **cached); 191 unsigned bits, struct extent_state **cached);
192int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); 192int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
193int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, 193int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
194 struct extent_state **cached, gfp_t mask); 194 struct extent_state **cached, gfp_t mask);
@@ -202,21 +202,21 @@ void extent_io_exit(void);
202 202
203u64 count_range_bits(struct extent_io_tree *tree, 203u64 count_range_bits(struct extent_io_tree *tree,
204 u64 *start, u64 search_end, 204 u64 *start, u64 search_end,
205 u64 max_bytes, unsigned long bits, int contig); 205 u64 max_bytes, unsigned bits, int contig);
206 206
207void free_extent_state(struct extent_state *state); 207void free_extent_state(struct extent_state *state);
208int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 208int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
209 unsigned long bits, int filled, 209 unsigned bits, int filled,
210 struct extent_state *cached_state); 210 struct extent_state *cached_state);
211int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 211int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
212 unsigned long bits, gfp_t mask); 212 unsigned bits, gfp_t mask);
213int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 213int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
214 unsigned long bits, int wake, int delete, 214 unsigned bits, int wake, int delete,
215 struct extent_state **cached, gfp_t mask); 215 struct extent_state **cached, gfp_t mask);
216int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 216int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
217 unsigned long bits, gfp_t mask); 217 unsigned bits, gfp_t mask);
218int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 218int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
219 unsigned long bits, u64 *failed_start, 219 unsigned bits, u64 *failed_start,
220 struct extent_state **cached_state, gfp_t mask); 220 struct extent_state **cached_state, gfp_t mask);
221int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 221int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
222 struct extent_state **cached_state, gfp_t mask); 222 struct extent_state **cached_state, gfp_t mask);
@@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
229int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 229int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
230 gfp_t mask); 230 gfp_t mask);
231int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 231int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
232 unsigned long bits, unsigned long clear_bits, 232 unsigned bits, unsigned clear_bits,
233 struct extent_state **cached_state, gfp_t mask); 233 struct extent_state **cached_state, gfp_t mask);
234int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 234int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
235 struct extent_state **cached_state, gfp_t mask); 235 struct extent_state **cached_state, gfp_t mask);
236int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, 236int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
237 struct extent_state **cached_state, gfp_t mask); 237 struct extent_state **cached_state, gfp_t mask);
238int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 238int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
239 u64 *start_ret, u64 *end_ret, unsigned long bits, 239 u64 *start_ret, u64 *end_ret, unsigned bits,
240 struct extent_state **cached_state); 240 struct extent_state **cached_state);
241int extent_invalidatepage(struct extent_io_tree *tree, 241int extent_invalidatepage(struct extent_io_tree *tree,
242 struct page *page, unsigned long offset); 242 struct page *page, unsigned long offset);
@@ -262,8 +262,9 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
262void set_page_extent_mapped(struct page *page); 262void set_page_extent_mapped(struct page *page);
263 263
264struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 264struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
265 u64 start, unsigned long len); 265 u64 start);
266struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); 266struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
267 u64 start);
267struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); 268struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
268struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, 269struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
269 u64 start); 270 u64 start);
@@ -322,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
322int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 323int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
323int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, 324int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
324 struct page *locked_page, 325 struct page *locked_page,
325 unsigned long bits_to_clear, 326 unsigned bits_to_clear,
326 unsigned long page_ops); 327 unsigned long page_ops);
327struct bio * 328struct bio *
328btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 329btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
@@ -377,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
377 u64 *end, u64 max_bytes); 378 u64 *end, u64 max_bytes);
378#endif 379#endif
379struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 380struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
380 u64 start, unsigned long len); 381 u64 start);
381#endif 382#endif
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d6c03f7f136b..a71978578fa7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -651,15 +651,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
651 struct io_ctl io_ctl; 651 struct io_ctl io_ctl;
652 struct btrfs_key key; 652 struct btrfs_key key;
653 struct btrfs_free_space *e, *n; 653 struct btrfs_free_space *e, *n;
654 struct list_head bitmaps; 654 LIST_HEAD(bitmaps);
655 u64 num_entries; 655 u64 num_entries;
656 u64 num_bitmaps; 656 u64 num_bitmaps;
657 u64 generation; 657 u64 generation;
658 u8 type; 658 u8 type;
659 int ret = 0; 659 int ret = 0;
660 660
661 INIT_LIST_HEAD(&bitmaps);
662
663 /* Nothing in the space cache, goodbye */ 661 /* Nothing in the space cache, goodbye */
664 if (!i_size_read(inode)) 662 if (!i_size_read(inode))
665 return 0; 663 return 0;
@@ -1243,6 +1241,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1243 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1241 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1244 struct inode *inode; 1242 struct inode *inode;
1245 int ret = 0; 1243 int ret = 0;
1244 enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
1246 1245
1247 root = root->fs_info->tree_root; 1246 root = root->fs_info->tree_root;
1248 1247
@@ -1266,9 +1265,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1266 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1265 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1267 path, block_group->key.objectid); 1266 path, block_group->key.objectid);
1268 if (ret) { 1267 if (ret) {
1269 spin_lock(&block_group->lock); 1268 dcs = BTRFS_DC_ERROR;
1270 block_group->disk_cache_state = BTRFS_DC_ERROR;
1271 spin_unlock(&block_group->lock);
1272 ret = 0; 1269 ret = 0;
1273#ifdef DEBUG 1270#ifdef DEBUG
1274 btrfs_err(root->fs_info, 1271 btrfs_err(root->fs_info,
@@ -1277,6 +1274,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1277#endif 1274#endif
1278 } 1275 }
1279 1276
1277 spin_lock(&block_group->lock);
1278 block_group->disk_cache_state = dcs;
1279 spin_unlock(&block_group->lock);
1280 iput(inode); 1280 iput(inode);
1281 return ret; 1281 return ret;
1282} 1282}
@@ -2903,7 +2903,6 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
2903 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2903 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2904 min_bytes); 2904 min_bytes);
2905 2905
2906 INIT_LIST_HEAD(&bitmaps);
2907 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2906 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2908 bytes + empty_size, 2907 bytes + empty_size,
2909 cont1_bytes, min_bytes); 2908 cont1_bytes, min_bytes);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 8ffa4783cbf4..265e03c73f4d 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -344,6 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
344 return -ENOMEM; 344 return -ENOMEM;
345 345
346 path->leave_spinning = 1; 346 path->leave_spinning = 1;
347 path->skip_release_on_error = 1;
347 ret = btrfs_insert_empty_item(trans, root, path, &key, 348 ret = btrfs_insert_empty_item(trans, root, path, &key,
348 ins_len); 349 ins_len);
349 if (ret == -EEXIST) { 350 if (ret == -EEXIST) {
@@ -362,8 +363,12 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
362 ptr = (unsigned long)(ref + 1); 363 ptr = (unsigned long)(ref + 1);
363 ret = 0; 364 ret = 0;
364 } else if (ret < 0) { 365 } else if (ret < 0) {
365 if (ret == -EOVERFLOW) 366 if (ret == -EOVERFLOW) {
366 ret = -EMLINK; 367 if (find_name_in_backref(path, name, name_len, &ref))
368 ret = -EEXIST;
369 else
370 ret = -EMLINK;
371 }
367 goto out; 372 goto out;
368 } else { 373 } else {
369 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 374 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 54bcf639d1cf..a85c23dfcddb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1530static void btrfs_split_extent_hook(struct inode *inode, 1530static void btrfs_split_extent_hook(struct inode *inode,
1531 struct extent_state *orig, u64 split) 1531 struct extent_state *orig, u64 split)
1532{ 1532{
1533 u64 size;
1534
1533 /* not delalloc, ignore it */ 1535 /* not delalloc, ignore it */
1534 if (!(orig->state & EXTENT_DELALLOC)) 1536 if (!(orig->state & EXTENT_DELALLOC))
1535 return; 1537 return;
1536 1538
1539 size = orig->end - orig->start + 1;
1540 if (size > BTRFS_MAX_EXTENT_SIZE) {
1541 u64 num_extents;
1542 u64 new_size;
1543
1544 /*
1545 * We need the largest size of the remaining extent to see if we
1546 * need to add a new outstanding extent. Think of the following
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */
1561 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size)
1563 new_size = split - orig->start;
1564
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents)
1569 return;
1570 }
1571
1537 spin_lock(&BTRFS_I(inode)->lock); 1572 spin_lock(&BTRFS_I(inode)->lock);
1538 BTRFS_I(inode)->outstanding_extents++; 1573 BTRFS_I(inode)->outstanding_extents++;
1539 spin_unlock(&BTRFS_I(inode)->lock); 1574 spin_unlock(&BTRFS_I(inode)->lock);
@@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1549 struct extent_state *new, 1584 struct extent_state *new,
1550 struct extent_state *other) 1585 struct extent_state *other)
1551{ 1586{
1587 u64 new_size, old_size;
1588 u64 num_extents;
1589
1552 /* not delalloc, ignore it */ 1590 /* not delalloc, ignore it */
1553 if (!(other->state & EXTENT_DELALLOC)) 1591 if (!(other->state & EXTENT_DELALLOC))
1554 return; 1592 return;
1555 1593
1594 old_size = other->end - other->start + 1;
1595 new_size = old_size + (new->end - new->start + 1);
1596
1597 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1599 spin_lock(&BTRFS_I(inode)->lock);
1600 BTRFS_I(inode)->outstanding_extents--;
1601 spin_unlock(&BTRFS_I(inode)->lock);
1602 return;
1603 }
1604
1605 /*
1606 * If we grew by another max_extent, just return, we want to keep that
1607 * reserved amount.
1608 */
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE);
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents)
1613 return;
1614
1556 spin_lock(&BTRFS_I(inode)->lock); 1615 spin_lock(&BTRFS_I(inode)->lock);
1557 BTRFS_I(inode)->outstanding_extents--; 1616 BTRFS_I(inode)->outstanding_extents--;
1558 spin_unlock(&BTRFS_I(inode)->lock); 1617 spin_unlock(&BTRFS_I(inode)->lock);
@@ -1604,7 +1663,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1604 * have pending delalloc work to be done. 1663 * have pending delalloc work to be done.
1605 */ 1664 */
1606static void btrfs_set_bit_hook(struct inode *inode, 1665static void btrfs_set_bit_hook(struct inode *inode,
1607 struct extent_state *state, unsigned long *bits) 1666 struct extent_state *state, unsigned *bits)
1608{ 1667{
1609 1668
1610 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) 1669 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@@ -1645,9 +1704,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
1645 */ 1704 */
1646static void btrfs_clear_bit_hook(struct inode *inode, 1705static void btrfs_clear_bit_hook(struct inode *inode,
1647 struct extent_state *state, 1706 struct extent_state *state,
1648 unsigned long *bits) 1707 unsigned *bits)
1649{ 1708{
1650 u64 len = state->end + 1 - state->start; 1709 u64 len = state->end + 1 - state->start;
1710 u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
1711 BTRFS_MAX_EXTENT_SIZE);
1651 1712
1652 spin_lock(&BTRFS_I(inode)->lock); 1713 spin_lock(&BTRFS_I(inode)->lock);
1653 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) 1714 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1667 *bits &= ~EXTENT_FIRST_DELALLOC; 1728 *bits &= ~EXTENT_FIRST_DELALLOC;
1668 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { 1729 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1669 spin_lock(&BTRFS_I(inode)->lock); 1730 spin_lock(&BTRFS_I(inode)->lock);
1670 BTRFS_I(inode)->outstanding_extents--; 1731 BTRFS_I(inode)->outstanding_extents -= num_extents;
1671 spin_unlock(&BTRFS_I(inode)->lock); 1732 spin_unlock(&BTRFS_I(inode)->lock);
1672 } 1733 }
1673 1734
@@ -2945,7 +3006,7 @@ static int __readpage_endio_check(struct inode *inode,
2945 return 0; 3006 return 0;
2946zeroit: 3007zeroit:
2947 if (__ratelimit(&_rs)) 3008 if (__ratelimit(&_rs))
2948 btrfs_info(BTRFS_I(inode)->root->fs_info, 3009 btrfs_warn(BTRFS_I(inode)->root->fs_info,
2949 "csum failed ino %llu off %llu csum %u expected csum %u", 3010 "csum failed ino %llu off %llu csum %u expected csum %u",
2950 btrfs_ino(inode), start, csum, csum_expected); 3011 btrfs_ino(inode), start, csum, csum_expected);
2951 memset(kaddr + pgoff, 1, len); 3012 memset(kaddr + pgoff, 1, len);
@@ -3407,7 +3468,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3407 3468
3408out: 3469out:
3409 if (ret) 3470 if (ret)
3410 btrfs_crit(root->fs_info, 3471 btrfs_err(root->fs_info,
3411 "could not do orphan cleanup %d", ret); 3472 "could not do orphan cleanup %d", ret);
3412 btrfs_free_path(path); 3473 btrfs_free_path(path);
3413 return ret; 3474 return ret;
@@ -3490,7 +3551,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
3490 struct btrfs_path *path; 3551 struct btrfs_path *path;
3491 struct extent_buffer *leaf; 3552 struct extent_buffer *leaf;
3492 struct btrfs_inode_item *inode_item; 3553 struct btrfs_inode_item *inode_item;
3493 struct btrfs_timespec *tspec;
3494 struct btrfs_root *root = BTRFS_I(inode)->root; 3554 struct btrfs_root *root = BTRFS_I(inode)->root;
3495 struct btrfs_key location; 3555 struct btrfs_key location;
3496 unsigned long ptr; 3556 unsigned long ptr;
@@ -3527,17 +3587,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
3527 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); 3587 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3528 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 3588 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
3529 3589
3530 tspec = btrfs_inode_atime(inode_item); 3590 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3531 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); 3591 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3532 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); 3592
3593 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3594 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3533 3595
3534 tspec = btrfs_inode_mtime(inode_item); 3596 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3535 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); 3597 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3536 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
3537 3598
3538 tspec = btrfs_inode_ctime(inode_item); 3599 BTRFS_I(inode)->i_otime.tv_sec =
3539 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); 3600 btrfs_timespec_sec(leaf, &inode_item->otime);
3540 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); 3601 BTRFS_I(inode)->i_otime.tv_nsec =
3602 btrfs_timespec_nsec(leaf, &inode_item->otime);
3541 3603
3542 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 3604 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3543 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 3605 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
@@ -3656,21 +3718,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3656 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 3718 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3657 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 3719 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3658 3720
3659 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 3721 btrfs_set_token_timespec_sec(leaf, &item->atime,
3660 inode->i_atime.tv_sec, &token); 3722 inode->i_atime.tv_sec, &token);
3661 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 3723 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3662 inode->i_atime.tv_nsec, &token); 3724 inode->i_atime.tv_nsec, &token);
3663 3725
3664 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 3726 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3665 inode->i_mtime.tv_sec, &token); 3727 inode->i_mtime.tv_sec, &token);
3666 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 3728 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3667 inode->i_mtime.tv_nsec, &token); 3729 inode->i_mtime.tv_nsec, &token);
3668 3730
3669 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 3731 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3670 inode->i_ctime.tv_sec, &token); 3732 inode->i_ctime.tv_sec, &token);
3671 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 3733 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3672 inode->i_ctime.tv_nsec, &token); 3734 inode->i_ctime.tv_nsec, &token);
3673 3735
3736 btrfs_set_token_timespec_sec(leaf, &item->otime,
3737 BTRFS_I(inode)->i_otime.tv_sec, &token);
3738 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3739 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3740
3674 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 3741 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3675 &token); 3742 &token);
3676 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, 3743 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
@@ -5007,6 +5074,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
5007 struct btrfs_root *new_root; 5074 struct btrfs_root *new_root;
5008 struct btrfs_root_ref *ref; 5075 struct btrfs_root_ref *ref;
5009 struct extent_buffer *leaf; 5076 struct extent_buffer *leaf;
5077 struct btrfs_key key;
5010 int ret; 5078 int ret;
5011 int err = 0; 5079 int err = 0;
5012 5080
@@ -5017,9 +5085,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
5017 } 5085 }
5018 5086
5019 err = -ENOENT; 5087 err = -ENOENT;
5020 ret = btrfs_find_item(root->fs_info->tree_root, path, 5088 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5021 BTRFS_I(dir)->root->root_key.objectid, 5089 key.type = BTRFS_ROOT_REF_KEY;
5022 location->objectid, BTRFS_ROOT_REF_KEY, NULL); 5090 key.offset = location->objectid;
5091
5092 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
5093 0, 0);
5023 if (ret) { 5094 if (ret) {
5024 if (ret < 0) 5095 if (ret < 0)
5025 err = ret; 5096 err = ret;
@@ -5258,7 +5329,10 @@ static struct inode *new_simple_dir(struct super_block *s,
5258 inode->i_op = &btrfs_dir_ro_inode_operations; 5329 inode->i_op = &btrfs_dir_ro_inode_operations;
5259 inode->i_fop = &simple_dir_operations; 5330 inode->i_fop = &simple_dir_operations;
5260 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; 5331 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5261 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 5332 inode->i_mtime = CURRENT_TIME;
5333 inode->i_atime = inode->i_mtime;
5334 inode->i_ctime = inode->i_mtime;
5335 BTRFS_I(inode)->i_otime = inode->i_mtime;
5262 5336
5263 return inode; 5337 return inode;
5264} 5338}
@@ -5826,7 +5900,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5826 5900
5827 inode_init_owner(inode, dir, mode); 5901 inode_init_owner(inode, dir, mode);
5828 inode_set_bytes(inode, 0); 5902 inode_set_bytes(inode, 0);
5829 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 5903
5904 inode->i_mtime = CURRENT_TIME;
5905 inode->i_atime = inode->i_mtime;
5906 inode->i_ctime = inode->i_mtime;
5907 BTRFS_I(inode)->i_otime = inode->i_mtime;
5908
5830 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 5909 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
5831 struct btrfs_inode_item); 5910 struct btrfs_inode_item);
5832 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, 5911 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
@@ -7134,11 +7213,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7134 u64 start = iblock << inode->i_blkbits; 7213 u64 start = iblock << inode->i_blkbits;
7135 u64 lockstart, lockend; 7214 u64 lockstart, lockend;
7136 u64 len = bh_result->b_size; 7215 u64 len = bh_result->b_size;
7216 u64 orig_len = len;
7137 int unlock_bits = EXTENT_LOCKED; 7217 int unlock_bits = EXTENT_LOCKED;
7138 int ret = 0; 7218 int ret = 0;
7139 7219
7140 if (create) 7220 if (create)
7141 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 7221 unlock_bits |= EXTENT_DIRTY;
7142 else 7222 else
7143 len = min_t(u64, len, root->sectorsize); 7223 len = min_t(u64, len, root->sectorsize);
7144 7224
@@ -7269,14 +7349,12 @@ unlock:
7269 if (start + len > i_size_read(inode)) 7349 if (start + len > i_size_read(inode))
7270 i_size_write(inode, start + len); 7350 i_size_write(inode, start + len);
7271 7351
7272 spin_lock(&BTRFS_I(inode)->lock); 7352 if (len < orig_len) {
7273 BTRFS_I(inode)->outstanding_extents++; 7353 spin_lock(&BTRFS_I(inode)->lock);
7274 spin_unlock(&BTRFS_I(inode)->lock); 7354 BTRFS_I(inode)->outstanding_extents++;
7275 7355 spin_unlock(&BTRFS_I(inode)->lock);
7276 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 7356 }
7277 lockstart + len - 1, EXTENT_DELALLOC, NULL, 7357 btrfs_free_reserved_data_space(inode, len);
7278 &cached_state, GFP_NOFS);
7279 BUG_ON(ret);
7280 } 7358 }
7281 7359
7282 /* 7360 /*
@@ -7805,8 +7883,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7805 } 7883 }
7806 7884
7807 /* async crcs make it difficult to collect full stripe writes. */ 7885 /* async crcs make it difficult to collect full stripe writes. */
7808 if (btrfs_get_alloc_profile(root, 1) & 7886 if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
7809 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
7810 async_submit = 0; 7887 async_submit = 0;
7811 else 7888 else
7812 async_submit = 1; 7889 async_submit = 1;
@@ -8053,8 +8130,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8053 else if (ret >= 0 && (size_t)ret < count) 8130 else if (ret >= 0 && (size_t)ret < count)
8054 btrfs_delalloc_release_space(inode, 8131 btrfs_delalloc_release_space(inode,
8055 count - (size_t)ret); 8132 count - (size_t)ret);
8056 else
8057 btrfs_delalloc_release_metadata(inode, 0);
8058 } 8133 }
8059out: 8134out:
8060 if (wakeup) 8135 if (wakeup)
@@ -8575,6 +8650,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
8575 8650
8576 ei->delayed_node = NULL; 8651 ei->delayed_node = NULL;
8577 8652
8653 ei->i_otime.tv_sec = 0;
8654 ei->i_otime.tv_nsec = 0;
8655
8578 inode = &ei->vfs_inode; 8656 inode = &ei->vfs_inode;
8579 extent_map_tree_init(&ei->extent_tree); 8657 extent_map_tree_init(&ei->extent_tree);
8580 extent_io_tree_init(&ei->io_tree, &inode->i_data); 8658 extent_io_tree_init(&ei->io_tree, &inode->i_data);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 48b60dbf807f..97159a8e91d4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1431,9 +1431,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1431 qgroup = u64_to_ptr(unode->aux); 1431 qgroup = u64_to_ptr(unode->aux);
1432 qgroup->rfer += sign * oper->num_bytes; 1432 qgroup->rfer += sign * oper->num_bytes;
1433 qgroup->rfer_cmpr += sign * oper->num_bytes; 1433 qgroup->rfer_cmpr += sign * oper->num_bytes;
1434 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1434 qgroup->excl += sign * oper->num_bytes; 1435 qgroup->excl += sign * oper->num_bytes;
1435 if (sign < 0)
1436 WARN_ON(qgroup->excl < oper->num_bytes);
1437 qgroup->excl_cmpr += sign * oper->num_bytes; 1436 qgroup->excl_cmpr += sign * oper->num_bytes;
1438 qgroup_dirty(fs_info, qgroup); 1437 qgroup_dirty(fs_info, qgroup);
1439 1438
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 8ab2a17bbba8..5264858ed768 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -58,15 +58,6 @@
58 */ 58 */
59#define RBIO_CACHE_READY_BIT 3 59#define RBIO_CACHE_READY_BIT 3
60 60
61/*
62 * bbio and raid_map is managed by the caller, so we shouldn't free
63 * them here. And besides that, all rbios with this flag should not
64 * be cached, because we need raid_map to check the rbios' stripe
65 * is the same or not, but it is very likely that the caller has
66 * free raid_map, so don't cache those rbios.
67 */
68#define RBIO_HOLD_BBIO_MAP_BIT 4
69
70#define RBIO_CACHE_SIZE 1024 61#define RBIO_CACHE_SIZE 1024
71 62
72enum btrfs_rbio_ops { 63enum btrfs_rbio_ops {
@@ -79,13 +70,6 @@ struct btrfs_raid_bio {
79 struct btrfs_fs_info *fs_info; 70 struct btrfs_fs_info *fs_info;
80 struct btrfs_bio *bbio; 71 struct btrfs_bio *bbio;
81 72
82 /*
83 * logical block numbers for the start of each stripe
84 * The last one or two are p/q. These are sorted,
85 * so raid_map[0] is the start of our full stripe
86 */
87 u64 *raid_map;
88
89 /* while we're doing rmw on a stripe 73 /* while we're doing rmw on a stripe
90 * we put it into a hash table so we can 74 * we put it into a hash table so we can
91 * lock the stripe and merge more rbios 75 * lock the stripe and merge more rbios
@@ -303,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
303 */ 287 */
304static int rbio_bucket(struct btrfs_raid_bio *rbio) 288static int rbio_bucket(struct btrfs_raid_bio *rbio)
305{ 289{
306 u64 num = rbio->raid_map[0]; 290 u64 num = rbio->bbio->raid_map[0];
307 291
308 /* 292 /*
309 * we shift down quite a bit. We're using byte 293 * we shift down quite a bit. We're using byte
@@ -606,8 +590,8 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
606 test_bit(RBIO_CACHE_BIT, &cur->flags)) 590 test_bit(RBIO_CACHE_BIT, &cur->flags))
607 return 0; 591 return 0;
608 592
609 if (last->raid_map[0] != 593 if (last->bbio->raid_map[0] !=
610 cur->raid_map[0]) 594 cur->bbio->raid_map[0])
611 return 0; 595 return 0;
612 596
613 /* we can't merge with different operations */ 597 /* we can't merge with different operations */
@@ -689,7 +673,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
689 spin_lock_irqsave(&h->lock, flags); 673 spin_lock_irqsave(&h->lock, flags);
690 list_for_each_entry(cur, &h->hash_list, hash_list) { 674 list_for_each_entry(cur, &h->hash_list, hash_list) {
691 walk++; 675 walk++;
692 if (cur->raid_map[0] == rbio->raid_map[0]) { 676 if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
693 spin_lock(&cur->bio_list_lock); 677 spin_lock(&cur->bio_list_lock);
694 678
695 /* can we steal this cached rbio's pages? */ 679 /* can we steal this cached rbio's pages? */
@@ -841,21 +825,6 @@ done_nolock:
841 remove_rbio_from_cache(rbio); 825 remove_rbio_from_cache(rbio);
842} 826}
843 827
844static inline void
845__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
846{
847 if (need) {
848 kfree(raid_map);
849 kfree(bbio);
850 }
851}
852
853static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
854{
855 __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
856 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
857}
858
859static void __free_raid_bio(struct btrfs_raid_bio *rbio) 828static void __free_raid_bio(struct btrfs_raid_bio *rbio)
860{ 829{
861 int i; 830 int i;
@@ -875,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
875 } 844 }
876 } 845 }
877 846
878 free_bbio_and_raid_map(rbio); 847 btrfs_put_bbio(rbio->bbio);
879
880 kfree(rbio); 848 kfree(rbio);
881} 849}
882 850
@@ -985,8 +953,7 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
985 * this does not allocate any pages for rbio->pages. 953 * this does not allocate any pages for rbio->pages.
986 */ 954 */
987static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, 955static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
988 struct btrfs_bio *bbio, u64 *raid_map, 956 struct btrfs_bio *bbio, u64 stripe_len)
989 u64 stripe_len)
990{ 957{
991 struct btrfs_raid_bio *rbio; 958 struct btrfs_raid_bio *rbio;
992 int nr_data = 0; 959 int nr_data = 0;
@@ -1007,7 +974,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
1007 INIT_LIST_HEAD(&rbio->stripe_cache); 974 INIT_LIST_HEAD(&rbio->stripe_cache);
1008 INIT_LIST_HEAD(&rbio->hash_list); 975 INIT_LIST_HEAD(&rbio->hash_list);
1009 rbio->bbio = bbio; 976 rbio->bbio = bbio;
1010 rbio->raid_map = raid_map;
1011 rbio->fs_info = root->fs_info; 977 rbio->fs_info = root->fs_info;
1012 rbio->stripe_len = stripe_len; 978 rbio->stripe_len = stripe_len;
1013 rbio->nr_pages = num_pages; 979 rbio->nr_pages = num_pages;
@@ -1028,10 +994,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
1028 rbio->bio_pages = p + sizeof(struct page *) * num_pages; 994 rbio->bio_pages = p + sizeof(struct page *) * num_pages;
1029 rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; 995 rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
1030 996
1031 if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) 997 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
998 nr_data = real_stripes - 1;
999 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1032 nr_data = real_stripes - 2; 1000 nr_data = real_stripes - 2;
1033 else 1001 else
1034 nr_data = real_stripes - 1; 1002 BUG();
1035 1003
1036 rbio->nr_data = nr_data; 1004 rbio->nr_data = nr_data;
1037 return rbio; 1005 return rbio;
@@ -1182,7 +1150,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1182 spin_lock_irq(&rbio->bio_list_lock); 1150 spin_lock_irq(&rbio->bio_list_lock);
1183 bio_list_for_each(bio, &rbio->bio_list) { 1151 bio_list_for_each(bio, &rbio->bio_list) {
1184 start = (u64)bio->bi_iter.bi_sector << 9; 1152 start = (u64)bio->bi_iter.bi_sector << 9;
1185 stripe_offset = start - rbio->raid_map[0]; 1153 stripe_offset = start - rbio->bbio->raid_map[0];
1186 page_index = stripe_offset >> PAGE_CACHE_SHIFT; 1154 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1187 1155
1188 for (i = 0; i < bio->bi_vcnt; i++) { 1156 for (i = 0; i < bio->bi_vcnt; i++) {
@@ -1402,7 +1370,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1402 logical <<= 9; 1370 logical <<= 9;
1403 1371
1404 for (i = 0; i < rbio->nr_data; i++) { 1372 for (i = 0; i < rbio->nr_data; i++) {
1405 stripe_start = rbio->raid_map[i]; 1373 stripe_start = rbio->bbio->raid_map[i];
1406 if (logical >= stripe_start && 1374 if (logical >= stripe_start &&
1407 logical < stripe_start + rbio->stripe_len) { 1375 logical < stripe_start + rbio->stripe_len) {
1408 return i; 1376 return i;
@@ -1776,17 +1744,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1776 * our main entry point for writes from the rest of the FS. 1744 * our main entry point for writes from the rest of the FS.
1777 */ 1745 */
1778int raid56_parity_write(struct btrfs_root *root, struct bio *bio, 1746int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1779 struct btrfs_bio *bbio, u64 *raid_map, 1747 struct btrfs_bio *bbio, u64 stripe_len)
1780 u64 stripe_len)
1781{ 1748{
1782 struct btrfs_raid_bio *rbio; 1749 struct btrfs_raid_bio *rbio;
1783 struct btrfs_plug_cb *plug = NULL; 1750 struct btrfs_plug_cb *plug = NULL;
1784 struct blk_plug_cb *cb; 1751 struct blk_plug_cb *cb;
1785 int ret; 1752 int ret;
1786 1753
1787 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 1754 rbio = alloc_rbio(root, bbio, stripe_len);
1788 if (IS_ERR(rbio)) { 1755 if (IS_ERR(rbio)) {
1789 __free_bbio_and_raid_map(bbio, raid_map, 1); 1756 btrfs_put_bbio(bbio);
1790 return PTR_ERR(rbio); 1757 return PTR_ERR(rbio);
1791 } 1758 }
1792 bio_list_add(&rbio->bio_list, bio); 1759 bio_list_add(&rbio->bio_list, bio);
@@ -1885,9 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1885 } 1852 }
1886 1853
1887 /* all raid6 handling here */ 1854 /* all raid6 handling here */
1888 if (rbio->raid_map[rbio->real_stripes - 1] == 1855 if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
1889 RAID6_Q_STRIPE) {
1890
1891 /* 1856 /*
1892 * single failure, rebuild from parity raid5 1857 * single failure, rebuild from parity raid5
1893 * style 1858 * style
@@ -1922,8 +1887,9 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1922 * here due to a crc mismatch and we can't give them the 1887 * here due to a crc mismatch and we can't give them the
1923 * data they want 1888 * data they want
1924 */ 1889 */
1925 if (rbio->raid_map[failb] == RAID6_Q_STRIPE) { 1890 if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
1926 if (rbio->raid_map[faila] == RAID5_P_STRIPE) { 1891 if (rbio->bbio->raid_map[faila] ==
1892 RAID5_P_STRIPE) {
1927 err = -EIO; 1893 err = -EIO;
1928 goto cleanup; 1894 goto cleanup;
1929 } 1895 }
@@ -1934,7 +1900,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1934 goto pstripe; 1900 goto pstripe;
1935 } 1901 }
1936 1902
1937 if (rbio->raid_map[failb] == RAID5_P_STRIPE) { 1903 if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
1938 raid6_datap_recov(rbio->real_stripes, 1904 raid6_datap_recov(rbio->real_stripes,
1939 PAGE_SIZE, faila, pointers); 1905 PAGE_SIZE, faila, pointers);
1940 } else { 1906 } else {
@@ -2001,8 +1967,7 @@ cleanup:
2001 1967
2002cleanup_io: 1968cleanup_io:
2003 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 1969 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
2004 if (err == 0 && 1970 if (err == 0)
2005 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
2006 cache_rbio_pages(rbio); 1971 cache_rbio_pages(rbio);
2007 else 1972 else
2008 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 1973 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -2156,15 +2121,16 @@ cleanup:
2156 * of the drive. 2121 * of the drive.
2157 */ 2122 */
2158int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, 2123int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2159 struct btrfs_bio *bbio, u64 *raid_map, 2124 struct btrfs_bio *bbio, u64 stripe_len,
2160 u64 stripe_len, int mirror_num, int generic_io) 2125 int mirror_num, int generic_io)
2161{ 2126{
2162 struct btrfs_raid_bio *rbio; 2127 struct btrfs_raid_bio *rbio;
2163 int ret; 2128 int ret;
2164 2129
2165 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2130 rbio = alloc_rbio(root, bbio, stripe_len);
2166 if (IS_ERR(rbio)) { 2131 if (IS_ERR(rbio)) {
2167 __free_bbio_and_raid_map(bbio, raid_map, generic_io); 2132 if (generic_io)
2133 btrfs_put_bbio(bbio);
2168 return PTR_ERR(rbio); 2134 return PTR_ERR(rbio);
2169 } 2135 }
2170 2136
@@ -2175,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2175 rbio->faila = find_logical_bio_stripe(rbio, bio); 2141 rbio->faila = find_logical_bio_stripe(rbio, bio);
2176 if (rbio->faila == -1) { 2142 if (rbio->faila == -1) {
2177 BUG(); 2143 BUG();
2178 __free_bbio_and_raid_map(bbio, raid_map, generic_io); 2144 if (generic_io)
2145 btrfs_put_bbio(bbio);
2179 kfree(rbio); 2146 kfree(rbio);
2180 return -EIO; 2147 return -EIO;
2181 } 2148 }
@@ -2184,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2184 btrfs_bio_counter_inc_noblocked(root->fs_info); 2151 btrfs_bio_counter_inc_noblocked(root->fs_info);
2185 rbio->generic_bio_cnt = 1; 2152 rbio->generic_bio_cnt = 1;
2186 } else { 2153 } else {
2187 set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); 2154 btrfs_get_bbio(bbio);
2188 } 2155 }
2189 2156
2190 /* 2157 /*
@@ -2240,14 +2207,14 @@ static void read_rebuild_work(struct btrfs_work *work)
2240 2207
2241struct btrfs_raid_bio * 2208struct btrfs_raid_bio *
2242raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, 2209raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
2243 struct btrfs_bio *bbio, u64 *raid_map, 2210 struct btrfs_bio *bbio, u64 stripe_len,
2244 u64 stripe_len, struct btrfs_device *scrub_dev, 2211 struct btrfs_device *scrub_dev,
2245 unsigned long *dbitmap, int stripe_nsectors) 2212 unsigned long *dbitmap, int stripe_nsectors)
2246{ 2213{
2247 struct btrfs_raid_bio *rbio; 2214 struct btrfs_raid_bio *rbio;
2248 int i; 2215 int i;
2249 2216
2250 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2217 rbio = alloc_rbio(root, bbio, stripe_len);
2251 if (IS_ERR(rbio)) 2218 if (IS_ERR(rbio))
2252 return NULL; 2219 return NULL;
2253 bio_list_add(&rbio->bio_list, bio); 2220 bio_list_add(&rbio->bio_list, bio);
@@ -2279,10 +2246,10 @@ void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
2279 int stripe_offset; 2246 int stripe_offset;
2280 int index; 2247 int index;
2281 2248
2282 ASSERT(logical >= rbio->raid_map[0]); 2249 ASSERT(logical >= rbio->bbio->raid_map[0]);
2283 ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + 2250 ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
2284 rbio->stripe_len * rbio->nr_data); 2251 rbio->stripe_len * rbio->nr_data);
2285 stripe_offset = (int)(logical - rbio->raid_map[0]); 2252 stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
2286 index = stripe_offset >> PAGE_CACHE_SHIFT; 2253 index = stripe_offset >> PAGE_CACHE_SHIFT;
2287 rbio->bio_pages[index] = page; 2254 rbio->bio_pages[index] = page;
2288} 2255}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 31d4a157b5e3..2b5d7977d83b 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -43,16 +43,15 @@ struct btrfs_raid_bio;
43struct btrfs_device; 43struct btrfs_device;
44 44
45int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, 45int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
46 struct btrfs_bio *bbio, u64 *raid_map, 46 struct btrfs_bio *bbio, u64 stripe_len,
47 u64 stripe_len, int mirror_num, int generic_io); 47 int mirror_num, int generic_io);
48int raid56_parity_write(struct btrfs_root *root, struct bio *bio, 48int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
49 struct btrfs_bio *bbio, u64 *raid_map, 49 struct btrfs_bio *bbio, u64 stripe_len);
50 u64 stripe_len);
51 50
52struct btrfs_raid_bio * 51struct btrfs_raid_bio *
53raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, 52raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
54 struct btrfs_bio *bbio, u64 *raid_map, 53 struct btrfs_bio *bbio, u64 stripe_len,
55 u64 stripe_len, struct btrfs_device *scrub_dev, 54 struct btrfs_device *scrub_dev,
56 unsigned long *dbitmap, int stripe_nsectors); 55 unsigned long *dbitmap, int stripe_nsectors);
57void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, 56void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
58 struct page *page, u64 logical); 57 struct page *page, u64 logical);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index b63ae20618fb..0e7beea92b4c 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -66,7 +66,6 @@ struct reada_extctl {
66struct reada_extent { 66struct reada_extent {
67 u64 logical; 67 u64 logical;
68 struct btrfs_key top; 68 struct btrfs_key top;
69 u32 blocksize;
70 int err; 69 int err;
71 struct list_head extctl; 70 struct list_head extctl;
72 int refcnt; 71 int refcnt;
@@ -349,7 +348,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
349 348
350 blocksize = root->nodesize; 349 blocksize = root->nodesize;
351 re->logical = logical; 350 re->logical = logical;
352 re->blocksize = blocksize;
353 re->top = *top; 351 re->top = *top;
354 INIT_LIST_HEAD(&re->extctl); 352 INIT_LIST_HEAD(&re->extctl);
355 spin_lock_init(&re->lock); 353 spin_lock_init(&re->lock);
@@ -463,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
463 spin_unlock(&fs_info->reada_lock); 461 spin_unlock(&fs_info->reada_lock);
464 btrfs_dev_replace_unlock(&fs_info->dev_replace); 462 btrfs_dev_replace_unlock(&fs_info->dev_replace);
465 463
466 kfree(bbio); 464 btrfs_put_bbio(bbio);
467 return re; 465 return re;
468 466
469error: 467error:
@@ -488,7 +486,7 @@ error:
488 kref_put(&zone->refcnt, reada_zone_release); 486 kref_put(&zone->refcnt, reada_zone_release);
489 spin_unlock(&fs_info->reada_lock); 487 spin_unlock(&fs_info->reada_lock);
490 } 488 }
491 kfree(bbio); 489 btrfs_put_bbio(bbio);
492 kfree(re); 490 kfree(re);
493 return re_exist; 491 return re_exist;
494} 492}
@@ -660,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
660 int mirror_num = 0; 658 int mirror_num = 0;
661 struct extent_buffer *eb = NULL; 659 struct extent_buffer *eb = NULL;
662 u64 logical; 660 u64 logical;
663 u32 blocksize;
664 int ret; 661 int ret;
665 int i; 662 int i;
666 int need_kick = 0; 663 int need_kick = 0;
@@ -694,7 +691,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
694 spin_unlock(&fs_info->reada_lock); 691 spin_unlock(&fs_info->reada_lock);
695 return 0; 692 return 0;
696 } 693 }
697 dev->reada_next = re->logical + re->blocksize; 694 dev->reada_next = re->logical + fs_info->tree_root->nodesize;
698 re->refcnt++; 695 re->refcnt++;
699 696
700 spin_unlock(&fs_info->reada_lock); 697 spin_unlock(&fs_info->reada_lock);
@@ -709,7 +706,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
709 } 706 }
710 } 707 }
711 logical = re->logical; 708 logical = re->logical;
712 blocksize = re->blocksize;
713 709
714 spin_lock(&re->lock); 710 spin_lock(&re->lock);
715 if (re->scheduled_for == NULL) { 711 if (re->scheduled_for == NULL) {
@@ -724,8 +720,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
724 return 0; 720 return 0;
725 721
726 atomic_inc(&dev->reada_in_flight); 722 atomic_inc(&dev->reada_in_flight);
727 ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, 723 ret = reada_tree_block_flagged(fs_info->extent_root, logical,
728 mirror_num, &eb); 724 mirror_num, &eb);
729 if (ret) 725 if (ret)
730 __readahead_hook(fs_info->extent_root, NULL, logical, ret); 726 __readahead_hook(fs_info->extent_root, NULL, logical, ret);
731 else if (eb) 727 else if (eb)
@@ -851,7 +847,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
851 break; 847 break;
852 printk(KERN_DEBUG 848 printk(KERN_DEBUG
853 " re: logical %llu size %u empty %d for %lld", 849 " re: logical %llu size %u empty %d for %lld",
854 re->logical, re->blocksize, 850 re->logical, fs_info->tree_root->nodesize,
855 list_empty(&re->extctl), re->scheduled_for ? 851 list_empty(&re->extctl), re->scheduled_for ?
856 re->scheduled_for->devid : -1); 852 re->scheduled_for->devid : -1);
857 853
@@ -886,7 +882,8 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
886 } 882 }
887 printk(KERN_DEBUG 883 printk(KERN_DEBUG
888 "re: logical %llu size %u list empty %d for %lld", 884 "re: logical %llu size %u list empty %d for %lld",
889 re->logical, re->blocksize, list_empty(&re->extctl), 885 re->logical, fs_info->tree_root->nodesize,
886 list_empty(&re->extctl),
890 re->scheduled_for ? re->scheduled_for->devid : -1); 887 re->scheduled_for ? re->scheduled_for->devid : -1);
891 for (i = 0; i < re->nzones; ++i) { 888 for (i = 0; i < re->nzones; ++i) {
892 printk(KERN_CONT " zone %llu-%llu devs", 889 printk(KERN_CONT " zone %llu-%llu devs",
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 74257d6436ad..d83085381bcc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2855,9 +2855,10 @@ static void update_processed_blocks(struct reloc_control *rc,
2855 } 2855 }
2856} 2856}
2857 2857
2858static int tree_block_processed(u64 bytenr, u32 blocksize, 2858static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2859 struct reloc_control *rc)
2860{ 2859{
2860 u32 blocksize = rc->extent_root->nodesize;
2861
2861 if (test_range_bit(&rc->processed_blocks, bytenr, 2862 if (test_range_bit(&rc->processed_blocks, bytenr,
2862 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2863 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2863 return 1; 2864 return 1;
@@ -2965,8 +2966,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2965 while (rb_node) { 2966 while (rb_node) {
2966 block = rb_entry(rb_node, struct tree_block, rb_node); 2967 block = rb_entry(rb_node, struct tree_block, rb_node);
2967 if (!block->key_ready) 2968 if (!block->key_ready)
2968 readahead_tree_block(rc->extent_root, block->bytenr, 2969 readahead_tree_block(rc->extent_root, block->bytenr);
2969 block->key.objectid);
2970 rb_node = rb_next(rb_node); 2970 rb_node = rb_next(rb_node);
2971 } 2971 }
2972 2972
@@ -3353,7 +3353,7 @@ static int __add_tree_block(struct reloc_control *rc,
3353 bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, 3353 bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info,
3354 SKINNY_METADATA); 3354 SKINNY_METADATA);
3355 3355
3356 if (tree_block_processed(bytenr, blocksize, rc)) 3356 if (tree_block_processed(bytenr, rc))
3357 return 0; 3357 return 0;
3358 3358
3359 if (tree_search(blocks, bytenr)) 3359 if (tree_search(blocks, bytenr))
@@ -3611,7 +3611,7 @@ static int find_data_references(struct reloc_control *rc,
3611 if (added) 3611 if (added)
3612 goto next; 3612 goto next;
3613 3613
3614 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 3614 if (!tree_block_processed(leaf->start, rc)) {
3615 block = kmalloc(sizeof(*block), GFP_NOFS); 3615 block = kmalloc(sizeof(*block), GFP_NOFS);
3616 if (!block) { 3616 if (!block) {
3617 err = -ENOMEM; 3617 err = -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index e427cb7ee12c..ec57687c9a4d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -66,7 +66,6 @@ struct scrub_ctx;
66struct scrub_recover { 66struct scrub_recover {
67 atomic_t refs; 67 atomic_t refs;
68 struct btrfs_bio *bbio; 68 struct btrfs_bio *bbio;
69 u64 *raid_map;
70 u64 map_length; 69 u64 map_length;
71}; 70};
72 71
@@ -80,7 +79,7 @@ struct scrub_page {
80 u64 logical; 79 u64 logical;
81 u64 physical; 80 u64 physical;
82 u64 physical_for_dev_replace; 81 u64 physical_for_dev_replace;
83 atomic_t ref_count; 82 atomic_t refs;
84 struct { 83 struct {
85 unsigned int mirror_num:8; 84 unsigned int mirror_num:8;
86 unsigned int have_csum:1; 85 unsigned int have_csum:1;
@@ -113,7 +112,7 @@ struct scrub_block {
113 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; 112 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
114 int page_count; 113 int page_count;
115 atomic_t outstanding_pages; 114 atomic_t outstanding_pages;
116 atomic_t ref_count; /* free mem on transition to zero */ 115 atomic_t refs; /* free mem on transition to zero */
117 struct scrub_ctx *sctx; 116 struct scrub_ctx *sctx;
118 struct scrub_parity *sparity; 117 struct scrub_parity *sparity;
119 struct { 118 struct {
@@ -142,7 +141,7 @@ struct scrub_parity {
142 141
143 int stripe_len; 142 int stripe_len;
144 143
145 atomic_t ref_count; 144 atomic_t refs;
146 145
147 struct list_head spages; 146 struct list_head spages;
148 147
@@ -194,6 +193,15 @@ struct scrub_ctx {
194 */ 193 */
195 struct btrfs_scrub_progress stat; 194 struct btrfs_scrub_progress stat;
196 spinlock_t stat_lock; 195 spinlock_t stat_lock;
196
197 /*
198 * Use a ref counter to avoid use-after-free issues. Scrub workers
199 * decrement bios_in_flight and workers_pending and then do a wakeup
200 * on the list_wait wait queue. We must ensure the main scrub task
201 * doesn't free the scrub context before or while the workers are
202 * doing the wakeup() call.
203 */
204 atomic_t refs;
197}; 205};
198 206
199struct scrub_fixup_nodatasum { 207struct scrub_fixup_nodatasum {
@@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
236static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); 244static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
237static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); 245static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
238static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); 246static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
239static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 247static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
240 struct btrfs_fs_info *fs_info,
241 struct scrub_block *original_sblock,
242 u64 length, u64 logical,
243 struct scrub_block *sblocks_for_recheck); 248 struct scrub_block *sblocks_for_recheck);
244static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 249static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
245 struct scrub_block *sblock, int is_metadata, 250 struct scrub_block *sblock, int is_metadata,
@@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
251 const u8 *csum, u64 generation, 256 const u8 *csum, u64 generation,
252 u16 csum_size); 257 u16 csum_size);
253static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 258static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
254 struct scrub_block *sblock_good, 259 struct scrub_block *sblock_good);
255 int force_write);
256static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, 260static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
257 struct scrub_block *sblock_good, 261 struct scrub_block *sblock_good,
258 int page_num, int force_write); 262 int page_num, int force_write);
@@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
302static void copy_nocow_pages_worker(struct btrfs_work *work); 306static void copy_nocow_pages_worker(struct btrfs_work *work);
303static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 307static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 308static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
309static void scrub_put_ctx(struct scrub_ctx *sctx);
305 310
306 311
307static void scrub_pending_bio_inc(struct scrub_ctx *sctx) 312static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
308{ 313{
314 atomic_inc(&sctx->refs);
309 atomic_inc(&sctx->bios_in_flight); 315 atomic_inc(&sctx->bios_in_flight);
310} 316}
311 317
@@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
313{ 319{
314 atomic_dec(&sctx->bios_in_flight); 320 atomic_dec(&sctx->bios_in_flight);
315 wake_up(&sctx->list_wait); 321 wake_up(&sctx->list_wait);
322 scrub_put_ctx(sctx);
316} 323}
317 324
318static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) 325static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
@@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
346{ 353{
347 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; 354 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
348 355
356 atomic_inc(&sctx->refs);
349 /* 357 /*
350 * increment scrubs_running to prevent cancel requests from 358 * increment scrubs_running to prevent cancel requests from
351 * completing as long as a worker is running. we must also 359 * completing as long as a worker is running. we must also
@@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
388 atomic_dec(&sctx->workers_pending); 396 atomic_dec(&sctx->workers_pending);
389 wake_up(&fs_info->scrub_pause_wait); 397 wake_up(&fs_info->scrub_pause_wait);
390 wake_up(&sctx->list_wait); 398 wake_up(&sctx->list_wait);
399 scrub_put_ctx(sctx);
391} 400}
392 401
393static void scrub_free_csums(struct scrub_ctx *sctx) 402static void scrub_free_csums(struct scrub_ctx *sctx)
@@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
433 kfree(sctx); 442 kfree(sctx);
434} 443}
435 444
445static void scrub_put_ctx(struct scrub_ctx *sctx)
446{
447 if (atomic_dec_and_test(&sctx->refs))
448 scrub_free_ctx(sctx);
449}
450
436static noinline_for_stack 451static noinline_for_stack
437struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) 452struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
438{ 453{
@@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
457 sctx = kzalloc(sizeof(*sctx), GFP_NOFS); 472 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
458 if (!sctx) 473 if (!sctx)
459 goto nomem; 474 goto nomem;
475 atomic_set(&sctx->refs, 1);
460 sctx->is_dev_replace = is_dev_replace; 476 sctx->is_dev_replace = is_dev_replace;
461 sctx->pages_per_rd_bio = pages_per_rd_bio; 477 sctx->pages_per_rd_bio = pages_per_rd_bio;
462 sctx->curr = -1; 478 sctx->curr = -1;
@@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
520 struct inode_fs_paths *ipath = NULL; 536 struct inode_fs_paths *ipath = NULL;
521 struct btrfs_root *local_root; 537 struct btrfs_root *local_root;
522 struct btrfs_key root_key; 538 struct btrfs_key root_key;
539 struct btrfs_key key;
523 540
524 root_key.objectid = root; 541 root_key.objectid = root;
525 root_key.type = BTRFS_ROOT_ITEM_KEY; 542 root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
530 goto err; 547 goto err;
531 } 548 }
532 549
533 ret = inode_item_info(inum, 0, local_root, swarn->path); 550 /*
551 * this makes the path point to (inum INODE_ITEM ioff)
552 */
553 key.objectid = inum;
554 key.type = BTRFS_INODE_ITEM_KEY;
555 key.offset = 0;
556
557 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
534 if (ret) { 558 if (ret) {
535 btrfs_release_path(swarn->path); 559 btrfs_release_path(swarn->path);
536 goto err; 560 goto err;
@@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
848static inline void scrub_put_recover(struct scrub_recover *recover) 872static inline void scrub_put_recover(struct scrub_recover *recover)
849{ 873{
850 if (atomic_dec_and_test(&recover->refs)) { 874 if (atomic_dec_and_test(&recover->refs)) {
851 kfree(recover->bbio); 875 btrfs_put_bbio(recover->bbio);
852 kfree(recover->raid_map);
853 kfree(recover); 876 kfree(recover);
854 } 877 }
855} 878}
@@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
955 } 978 }
956 979
957 /* setup the context, map the logical blocks and alloc the pages */ 980 /* setup the context, map the logical blocks and alloc the pages */
958 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, 981 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
959 logical, sblocks_for_recheck);
960 if (ret) { 982 if (ret) {
961 spin_lock(&sctx->stat_lock); 983 spin_lock(&sctx->stat_lock);
962 sctx->stat.read_errors++; 984 sctx->stat.read_errors++;
@@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
1030 if (!is_metadata && !have_csum) { 1052 if (!is_metadata && !have_csum) {
1031 struct scrub_fixup_nodatasum *fixup_nodatasum; 1053 struct scrub_fixup_nodatasum *fixup_nodatasum;
1032 1054
1033nodatasum_case:
1034 WARN_ON(sctx->is_dev_replace); 1055 WARN_ON(sctx->is_dev_replace);
1035 1056
1057nodatasum_case:
1058
1036 /* 1059 /*
1037 * !is_metadata and !have_csum, this means that the data 1060 * !is_metadata and !have_csum, this means that the data
1038 * might not be COW'ed, that it might be modified 1061 * might not be COW'ed, that it might be modified
@@ -1091,76 +1114,20 @@ nodatasum_case:
1091 sblock_other->no_io_error_seen) { 1114 sblock_other->no_io_error_seen) {
1092 if (sctx->is_dev_replace) { 1115 if (sctx->is_dev_replace) {
1093 scrub_write_block_to_dev_replace(sblock_other); 1116 scrub_write_block_to_dev_replace(sblock_other);
1117 goto corrected_error;
1094 } else { 1118 } else {
1095 int force_write = is_metadata || have_csum;
1096
1097 ret = scrub_repair_block_from_good_copy( 1119 ret = scrub_repair_block_from_good_copy(
1098 sblock_bad, sblock_other, 1120 sblock_bad, sblock_other);
1099 force_write); 1121 if (!ret)
1122 goto corrected_error;
1100 } 1123 }
1101 if (0 == ret)
1102 goto corrected_error;
1103 } 1124 }
1104 } 1125 }
1105 1126
1106 /* 1127 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1107 * for dev_replace, pick good pages and write to the target device. 1128 goto did_not_correct_error;
1108 */
1109 if (sctx->is_dev_replace) {
1110 success = 1;
1111 for (page_num = 0; page_num < sblock_bad->page_count;
1112 page_num++) {
1113 int sub_success;
1114
1115 sub_success = 0;
1116 for (mirror_index = 0;
1117 mirror_index < BTRFS_MAX_MIRRORS &&
1118 sblocks_for_recheck[mirror_index].page_count > 0;
1119 mirror_index++) {
1120 struct scrub_block *sblock_other =
1121 sblocks_for_recheck + mirror_index;
1122 struct scrub_page *page_other =
1123 sblock_other->pagev[page_num];
1124
1125 if (!page_other->io_error) {
1126 ret = scrub_write_page_to_dev_replace(
1127 sblock_other, page_num);
1128 if (ret == 0) {
1129 /* succeeded for this page */
1130 sub_success = 1;
1131 break;
1132 } else {
1133 btrfs_dev_replace_stats_inc(
1134 &sctx->dev_root->
1135 fs_info->dev_replace.
1136 num_write_errors);
1137 }
1138 }
1139 }
1140
1141 if (!sub_success) {
1142 /*
1143 * did not find a mirror to fetch the page
1144 * from. scrub_write_page_to_dev_replace()
1145 * handles this case (page->io_error), by
1146 * filling the block with zeros before
1147 * submitting the write request
1148 */
1149 success = 0;
1150 ret = scrub_write_page_to_dev_replace(
1151 sblock_bad, page_num);
1152 if (ret)
1153 btrfs_dev_replace_stats_inc(
1154 &sctx->dev_root->fs_info->
1155 dev_replace.num_write_errors);
1156 }
1157 }
1158
1159 goto out;
1160 }
1161 1129
1162 /* 1130 /*
1163 * for regular scrub, repair those pages that are errored.
1164 * In case of I/O errors in the area that is supposed to be 1131 * In case of I/O errors in the area that is supposed to be
1165 * repaired, continue by picking good copies of those pages. 1132 * repaired, continue by picking good copies of those pages.
1166 * Select the good pages from mirrors to rewrite bad pages from 1133 * Select the good pages from mirrors to rewrite bad pages from
@@ -1184,44 +1151,64 @@ nodatasum_case:
1184 * mirror, even if other 512 byte sectors in the same PAGE_SIZE 1151 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
1185 * area are unreadable. 1152 * area are unreadable.
1186 */ 1153 */
1187
1188 /* can only fix I/O errors from here on */
1189 if (sblock_bad->no_io_error_seen)
1190 goto did_not_correct_error;
1191
1192 success = 1; 1154 success = 1;
1193 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { 1155 for (page_num = 0; page_num < sblock_bad->page_count;
1156 page_num++) {
1194 struct scrub_page *page_bad = sblock_bad->pagev[page_num]; 1157 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1158 struct scrub_block *sblock_other = NULL;
1195 1159
1196 if (!page_bad->io_error) 1160 /* skip no-io-error page in scrub */
1161 if (!page_bad->io_error && !sctx->is_dev_replace)
1197 continue; 1162 continue;
1198 1163
1199 for (mirror_index = 0; 1164 /* try to find no-io-error page in mirrors */
1200 mirror_index < BTRFS_MAX_MIRRORS && 1165 if (page_bad->io_error) {
1201 sblocks_for_recheck[mirror_index].page_count > 0; 1166 for (mirror_index = 0;
1202 mirror_index++) { 1167 mirror_index < BTRFS_MAX_MIRRORS &&
1203 struct scrub_block *sblock_other = sblocks_for_recheck + 1168 sblocks_for_recheck[mirror_index].page_count > 0;
1204 mirror_index; 1169 mirror_index++) {
1205 struct scrub_page *page_other = sblock_other->pagev[ 1170 if (!sblocks_for_recheck[mirror_index].
1206 page_num]; 1171 pagev[page_num]->io_error) {
1207 1172 sblock_other = sblocks_for_recheck +
1208 if (!page_other->io_error) { 1173 mirror_index;
1209 ret = scrub_repair_page_from_good_copy( 1174 break;
1210 sblock_bad, sblock_other, page_num, 0);
1211 if (0 == ret) {
1212 page_bad->io_error = 0;
1213 break; /* succeeded for this page */
1214 } 1175 }
1215 } 1176 }
1177 if (!sblock_other)
1178 success = 0;
1216 } 1179 }
1217 1180
1218 if (page_bad->io_error) { 1181 if (sctx->is_dev_replace) {
1219 /* did not find a mirror to copy the page from */ 1182 /*
1220 success = 0; 1183 * did not find a mirror to fetch the page
1184 * from. scrub_write_page_to_dev_replace()
1185 * handles this case (page->io_error), by
1186 * filling the block with zeros before
1187 * submitting the write request
1188 */
1189 if (!sblock_other)
1190 sblock_other = sblock_bad;
1191
1192 if (scrub_write_page_to_dev_replace(sblock_other,
1193 page_num) != 0) {
1194 btrfs_dev_replace_stats_inc(
1195 &sctx->dev_root->
1196 fs_info->dev_replace.
1197 num_write_errors);
1198 success = 0;
1199 }
1200 } else if (sblock_other) {
1201 ret = scrub_repair_page_from_good_copy(sblock_bad,
1202 sblock_other,
1203 page_num, 0);
1204 if (0 == ret)
1205 page_bad->io_error = 0;
1206 else
1207 success = 0;
1221 } 1208 }
1222 } 1209 }
1223 1210
1224 if (success) { 1211 if (success && !sctx->is_dev_replace) {
1225 if (is_metadata || have_csum) { 1212 if (is_metadata || have_csum) {
1226 /* 1213 /*
1227 * need to verify the checksum now that all 1214 * need to verify the checksum now that all
@@ -1288,19 +1275,18 @@ out:
1288 return 0; 1275 return 0;
1289} 1276}
1290 1277
1291static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) 1278static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1292{ 1279{
1293 if (raid_map) { 1280 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1294 if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) 1281 return 2;
1295 return 3; 1282 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1296 else 1283 return 3;
1297 return 2; 1284 else
1298 } else {
1299 return (int)bbio->num_stripes; 1285 return (int)bbio->num_stripes;
1300 }
1301} 1286}
1302 1287
1303static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, 1288static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1289 u64 *raid_map,
1304 u64 mapped_length, 1290 u64 mapped_length,
1305 int nstripes, int mirror, 1291 int nstripes, int mirror,
1306 int *stripe_index, 1292 int *stripe_index,
@@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1308{ 1294{
1309 int i; 1295 int i;
1310 1296
1311 if (raid_map) { 1297 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1312 /* RAID5/6 */ 1298 /* RAID5/6 */
1313 for (i = 0; i < nstripes; i++) { 1299 for (i = 0; i < nstripes; i++) {
1314 if (raid_map[i] == RAID6_Q_STRIPE || 1300 if (raid_map[i] == RAID6_Q_STRIPE ||
@@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1329 } 1315 }
1330} 1316}
1331 1317
1332static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 1318static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1333 struct btrfs_fs_info *fs_info,
1334 struct scrub_block *original_sblock,
1335 u64 length, u64 logical,
1336 struct scrub_block *sblocks_for_recheck) 1319 struct scrub_block *sblocks_for_recheck)
1337{ 1320{
1321 struct scrub_ctx *sctx = original_sblock->sctx;
1322 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1323 u64 length = original_sblock->page_count * PAGE_SIZE;
1324 u64 logical = original_sblock->pagev[0]->logical;
1338 struct scrub_recover *recover; 1325 struct scrub_recover *recover;
1339 struct btrfs_bio *bbio; 1326 struct btrfs_bio *bbio;
1340 u64 *raid_map;
1341 u64 sublen; 1327 u64 sublen;
1342 u64 mapped_length; 1328 u64 mapped_length;
1343 u64 stripe_offset; 1329 u64 stripe_offset;
1344 int stripe_index; 1330 int stripe_index;
1345 int page_index; 1331 int page_index = 0;
1346 int mirror_index; 1332 int mirror_index;
1347 int nmirrors; 1333 int nmirrors;
1348 int ret; 1334 int ret;
1349 1335
1350 /* 1336 /*
1351 * note: the two members ref_count and outstanding_pages 1337 * note: the two members refs and outstanding_pages
1352 * are not used (and not set) in the blocks that are used for 1338 * are not used (and not set) in the blocks that are used for
1353 * the recheck procedure 1339 * the recheck procedure
1354 */ 1340 */
1355 1341
1356 page_index = 0;
1357 while (length > 0) { 1342 while (length > 0) {
1358 sublen = min_t(u64, length, PAGE_SIZE); 1343 sublen = min_t(u64, length, PAGE_SIZE);
1359 mapped_length = sublen; 1344 mapped_length = sublen;
1360 bbio = NULL; 1345 bbio = NULL;
1361 raid_map = NULL;
1362 1346
1363 /* 1347 /*
1364 * with a length of PAGE_SIZE, each returned stripe 1348 * with a length of PAGE_SIZE, each returned stripe
1365 * represents one mirror 1349 * represents one mirror
1366 */ 1350 */
1367 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, 1351 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1368 &mapped_length, &bbio, 0, &raid_map); 1352 &mapped_length, &bbio, 0, 1);
1369 if (ret || !bbio || mapped_length < sublen) { 1353 if (ret || !bbio || mapped_length < sublen) {
1370 kfree(bbio); 1354 btrfs_put_bbio(bbio);
1371 kfree(raid_map);
1372 return -EIO; 1355 return -EIO;
1373 } 1356 }
1374 1357
1375 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); 1358 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1376 if (!recover) { 1359 if (!recover) {
1377 kfree(bbio); 1360 btrfs_put_bbio(bbio);
1378 kfree(raid_map);
1379 return -ENOMEM; 1361 return -ENOMEM;
1380 } 1362 }
1381 1363
1382 atomic_set(&recover->refs, 1); 1364 atomic_set(&recover->refs, 1);
1383 recover->bbio = bbio; 1365 recover->bbio = bbio;
1384 recover->raid_map = raid_map;
1385 recover->map_length = mapped_length; 1366 recover->map_length = mapped_length;
1386 1367
1387 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); 1368 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1388 1369
1389 nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); 1370 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1371
1390 for (mirror_index = 0; mirror_index < nmirrors; 1372 for (mirror_index = 0; mirror_index < nmirrors;
1391 mirror_index++) { 1373 mirror_index++) {
1392 struct scrub_block *sblock; 1374 struct scrub_block *sblock;
1393 struct scrub_page *page; 1375 struct scrub_page *page;
1394 1376
1395 if (mirror_index >= BTRFS_MAX_MIRRORS)
1396 continue;
1397
1398 sblock = sblocks_for_recheck + mirror_index; 1377 sblock = sblocks_for_recheck + mirror_index;
1399 sblock->sctx = sctx; 1378 sblock->sctx = sctx;
1400 page = kzalloc(sizeof(*page), GFP_NOFS); 1379 page = kzalloc(sizeof(*page), GFP_NOFS);
@@ -1410,9 +1389,12 @@ leave_nomem:
1410 sblock->pagev[page_index] = page; 1389 sblock->pagev[page_index] = page;
1411 page->logical = logical; 1390 page->logical = logical;
1412 1391
1413 scrub_stripe_index_and_offset(logical, raid_map, 1392 scrub_stripe_index_and_offset(logical,
1393 bbio->map_type,
1394 bbio->raid_map,
1414 mapped_length, 1395 mapped_length,
1415 bbio->num_stripes, 1396 bbio->num_stripes -
1397 bbio->num_tgtdevs,
1416 mirror_index, 1398 mirror_index,
1417 &stripe_index, 1399 &stripe_index,
1418 &stripe_offset); 1400 &stripe_offset);
@@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error)
1458 1440
1459static inline int scrub_is_page_on_raid56(struct scrub_page *page) 1441static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1460{ 1442{
1461 return page->recover && page->recover->raid_map; 1443 return page->recover &&
1444 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
1462} 1445}
1463 1446
1464static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, 1447static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
@@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1475 bio->bi_end_io = scrub_bio_wait_endio; 1458 bio->bi_end_io = scrub_bio_wait_endio;
1476 1459
1477 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, 1460 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1478 page->recover->raid_map,
1479 page->recover->map_length, 1461 page->recover->map_length,
1480 page->mirror_num, 0); 1462 page->mirror_num, 0);
1481 if (ret) 1463 if (ret)
@@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1615} 1597}
1616 1598
1617static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 1599static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1618 struct scrub_block *sblock_good, 1600 struct scrub_block *sblock_good)
1619 int force_write)
1620{ 1601{
1621 int page_num; 1602 int page_num;
1622 int ret = 0; 1603 int ret = 0;
@@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1626 1607
1627 ret_sub = scrub_repair_page_from_good_copy(sblock_bad, 1608 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1628 sblock_good, 1609 sblock_good,
1629 page_num, 1610 page_num, 1);
1630 force_write);
1631 if (ret_sub) 1611 if (ret_sub)
1632 ret = ret_sub; 1612 ret = ret_sub;
1633 } 1613 }
@@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
2067 2047
2068static void scrub_block_get(struct scrub_block *sblock) 2048static void scrub_block_get(struct scrub_block *sblock)
2069{ 2049{
2070 atomic_inc(&sblock->ref_count); 2050 atomic_inc(&sblock->refs);
2071} 2051}
2072 2052
2073static void scrub_block_put(struct scrub_block *sblock) 2053static void scrub_block_put(struct scrub_block *sblock)
2074{ 2054{
2075 if (atomic_dec_and_test(&sblock->ref_count)) { 2055 if (atomic_dec_and_test(&sblock->refs)) {
2076 int i; 2056 int i;
2077 2057
2078 if (sblock->sparity) 2058 if (sblock->sparity)
@@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock)
2086 2066
2087static void scrub_page_get(struct scrub_page *spage) 2067static void scrub_page_get(struct scrub_page *spage)
2088{ 2068{
2089 atomic_inc(&spage->ref_count); 2069 atomic_inc(&spage->refs);
2090} 2070}
2091 2071
2092static void scrub_page_put(struct scrub_page *spage) 2072static void scrub_page_put(struct scrub_page *spage)
2093{ 2073{
2094 if (atomic_dec_and_test(&spage->ref_count)) { 2074 if (atomic_dec_and_test(&spage->refs)) {
2095 if (spage->page) 2075 if (spage->page)
2096 __free_page(spage->page); 2076 __free_page(spage->page);
2097 kfree(spage); 2077 kfree(spage);
@@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2217 2197
2218 /* one ref inside this function, plus one for each page added to 2198 /* one ref inside this function, plus one for each page added to
2219 * a bio later on */ 2199 * a bio later on */
2220 atomic_set(&sblock->ref_count, 1); 2200 atomic_set(&sblock->refs, 1);
2221 sblock->sctx = sctx; 2201 sblock->sctx = sctx;
2222 sblock->no_io_error_seen = 1; 2202 sblock->no_io_error_seen = 1;
2223 2203
@@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
2510 2490
2511 /* one ref inside this function, plus one for each page added to 2491 /* one ref inside this function, plus one for each page added to
2512 * a bio later on */ 2492 * a bio later on */
2513 atomic_set(&sblock->ref_count, 1); 2493 atomic_set(&sblock->refs, 1);
2514 sblock->sctx = sctx; 2494 sblock->sctx = sctx;
2515 sblock->no_io_error_seen = 1; 2495 sblock->no_io_error_seen = 1;
2516 sblock->sparity = sparity; 2496 sblock->sparity = sparity;
@@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2705 struct btrfs_raid_bio *rbio; 2685 struct btrfs_raid_bio *rbio;
2706 struct scrub_page *spage; 2686 struct scrub_page *spage;
2707 struct btrfs_bio *bbio = NULL; 2687 struct btrfs_bio *bbio = NULL;
2708 u64 *raid_map = NULL;
2709 u64 length; 2688 u64 length;
2710 int ret; 2689 int ret;
2711 2690
@@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2716 length = sparity->logic_end - sparity->logic_start + 1; 2695 length = sparity->logic_end - sparity->logic_start + 1;
2717 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, 2696 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2718 sparity->logic_start, 2697 sparity->logic_start,
2719 &length, &bbio, 0, &raid_map); 2698 &length, &bbio, 0, 1);
2720 if (ret || !bbio || !raid_map) 2699 if (ret || !bbio || !bbio->raid_map)
2721 goto bbio_out; 2700 goto bbio_out;
2722 2701
2723 bio = btrfs_io_bio_alloc(GFP_NOFS, 0); 2702 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
@@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2729 bio->bi_end_io = scrub_parity_bio_endio; 2708 bio->bi_end_io = scrub_parity_bio_endio;
2730 2709
2731 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, 2710 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2732 raid_map, length, 2711 length, sparity->scrub_dev,
2733 sparity->scrub_dev,
2734 sparity->dbitmap, 2712 sparity->dbitmap,
2735 sparity->nsectors); 2713 sparity->nsectors);
2736 if (!rbio) 2714 if (!rbio)
@@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2747rbio_out: 2725rbio_out:
2748 bio_put(bio); 2726 bio_put(bio);
2749bbio_out: 2727bbio_out:
2750 kfree(bbio); 2728 btrfs_put_bbio(bbio);
2751 kfree(raid_map);
2752 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, 2729 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2753 sparity->nsectors); 2730 sparity->nsectors);
2754 spin_lock(&sctx->stat_lock); 2731 spin_lock(&sctx->stat_lock);
@@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
2765 2742
2766static void scrub_parity_get(struct scrub_parity *sparity) 2743static void scrub_parity_get(struct scrub_parity *sparity)
2767{ 2744{
2768 atomic_inc(&sparity->ref_count); 2745 atomic_inc(&sparity->refs);
2769} 2746}
2770 2747
2771static void scrub_parity_put(struct scrub_parity *sparity) 2748static void scrub_parity_put(struct scrub_parity *sparity)
2772{ 2749{
2773 if (!atomic_dec_and_test(&sparity->ref_count)) 2750 if (!atomic_dec_and_test(&sparity->refs))
2774 return; 2751 return;
2775 2752
2776 scrub_parity_check_and_repair(sparity); 2753 scrub_parity_check_and_repair(sparity);
@@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2820 sparity->scrub_dev = sdev; 2797 sparity->scrub_dev = sdev;
2821 sparity->logic_start = logic_start; 2798 sparity->logic_start = logic_start;
2822 sparity->logic_end = logic_end; 2799 sparity->logic_end = logic_end;
2823 atomic_set(&sparity->ref_count, 1); 2800 atomic_set(&sparity->refs, 1);
2824 INIT_LIST_HEAD(&sparity->spages); 2801 INIT_LIST_HEAD(&sparity->spages);
2825 sparity->dbitmap = sparity->bitmap; 2802 sparity->dbitmap = sparity->bitmap;
2826 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; 2803 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3037 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 3014 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3038 increment = map->stripe_len; 3015 increment = map->stripe_len;
3039 mirror_num = num % map->num_stripes + 1; 3016 mirror_num = num % map->num_stripes + 1;
3040 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3017 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3041 BTRFS_BLOCK_GROUP_RAID6)) {
3042 get_raid56_logic_offset(physical, num, map, &offset, NULL); 3018 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3043 increment = map->stripe_len * nr_data_stripes(map); 3019 increment = map->stripe_len * nr_data_stripes(map);
3044 mirror_num = 1; 3020 mirror_num = 1;
@@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3074 */ 3050 */
3075 logical = base + offset; 3051 logical = base + offset;
3076 physical_end = physical + nstripes * map->stripe_len; 3052 physical_end = physical + nstripes * map->stripe_len;
3077 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3053 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3078 BTRFS_BLOCK_GROUP_RAID6)) {
3079 get_raid56_logic_offset(physical_end, num, 3054 get_raid56_logic_offset(physical_end, num,
3080 map, &logic_end, NULL); 3055 map, &logic_end, NULL);
3081 logic_end += base; 3056 logic_end += base;
@@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3121 ret = 0; 3096 ret = 0;
3122 while (physical < physical_end) { 3097 while (physical < physical_end) {
3123 /* for raid56, we skip parity stripe */ 3098 /* for raid56, we skip parity stripe */
3124 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3099 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3125 BTRFS_BLOCK_GROUP_RAID6)) {
3126 ret = get_raid56_logic_offset(physical, num, 3100 ret = get_raid56_logic_offset(physical, num,
3127 map, &logical, &stripe_logical); 3101 map, &logical, &stripe_logical);
3128 logical += base; 3102 logical += base;
@@ -3280,8 +3254,7 @@ again:
3280 scrub_free_csums(sctx); 3254 scrub_free_csums(sctx);
3281 if (extent_logical + extent_len < 3255 if (extent_logical + extent_len <
3282 key.objectid + bytes) { 3256 key.objectid + bytes) {
3283 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3257 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3284 BTRFS_BLOCK_GROUP_RAID6)) {
3285 /* 3258 /*
3286 * loop until we find next data stripe 3259 * loop until we find next data stripe
3287 * or we have finished all stripes. 3260 * or we have finished all stripes.
@@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3775 scrub_workers_put(fs_info); 3748 scrub_workers_put(fs_info);
3776 mutex_unlock(&fs_info->scrub_lock); 3749 mutex_unlock(&fs_info->scrub_lock);
3777 3750
3778 scrub_free_ctx(sctx); 3751 scrub_put_ctx(sctx);
3779 3752
3780 return ret; 3753 return ret;
3781} 3754}
@@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3881 &mapped_length, &bbio, 0); 3854 &mapped_length, &bbio, 0);
3882 if (ret || !bbio || mapped_length < extent_len || 3855 if (ret || !bbio || mapped_length < extent_len ||
3883 !bbio->stripes[0].dev->bdev) { 3856 !bbio->stripes[0].dev->bdev) {
3884 kfree(bbio); 3857 btrfs_put_bbio(bbio);
3885 return; 3858 return;
3886 } 3859 }
3887 3860
3888 *extent_physical = bbio->stripes[0].physical; 3861 *extent_physical = bbio->stripes[0].physical;
3889 *extent_mirror_num = bbio->mirror_num; 3862 *extent_mirror_num = bbio->mirror_num;
3890 *extent_dev = bbio->stripes[0].dev; 3863 *extent_dev = bbio->stripes[0].dev;
3891 kfree(bbio); 3864 btrfs_put_bbio(bbio);
3892} 3865}
3893 3866
3894static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, 3867static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 804432dbc351..fe5857223515 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2471,12 +2471,9 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
2471 if (ret < 0) 2471 if (ret < 0)
2472 goto out; 2472 goto out;
2473 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2473 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2474 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, 2474 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2475 btrfs_inode_atime(ii)); 2475 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2476 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, 2476 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2477 btrfs_inode_mtime(ii));
2478 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2479 btrfs_inode_ctime(ii));
2480 /* TODO Add otime support when the otime patches get into upstream */ 2477 /* TODO Add otime support when the otime patches get into upstream */
2481 2478
2482 ret = send_cmd(sctx); 2479 ret = send_cmd(sctx);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6f49b2872a64..05fef198ff94 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1958,11 +1958,6 @@ static int btrfs_freeze(struct super_block *sb)
1958 return btrfs_commit_transaction(trans, root); 1958 return btrfs_commit_transaction(trans, root);
1959} 1959}
1960 1960
1961static int btrfs_unfreeze(struct super_block *sb)
1962{
1963 return 0;
1964}
1965
1966static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 1961static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1967{ 1962{
1968 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 1963 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2011,7 +2006,6 @@ static const struct super_operations btrfs_super_ops = {
2011 .statfs = btrfs_statfs, 2006 .statfs = btrfs_statfs,
2012 .remount_fs = btrfs_remount, 2007 .remount_fs = btrfs_remount,
2013 .freeze_fs = btrfs_freeze, 2008 .freeze_fs = btrfs_freeze,
2014 .unfreeze_fs = btrfs_unfreeze,
2015}; 2009};
2016 2010
2017static const struct file_operations btrfs_ctl_fops = { 2011static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 92db3f648df4..94edb0a2a026 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -733,10 +733,18 @@ int btrfs_init_sysfs(void)
733 733
734 ret = btrfs_init_debugfs(); 734 ret = btrfs_init_debugfs();
735 if (ret) 735 if (ret)
736 return ret; 736 goto out1;
737 737
738 init_feature_attrs(); 738 init_feature_attrs();
739 ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); 739 ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
740 if (ret)
741 goto out2;
742
743 return 0;
744out2:
745 debugfs_remove_recursive(btrfs_debugfs_root_dentry);
746out1:
747 kset_unregister(btrfs_kset);
740 748
741 return ret; 749 return ret;
742} 750}
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index cc286ce97d1e..f51963a8f929 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -53,7 +53,7 @@ static int test_btrfs_split_item(void)
53 return -ENOMEM; 53 return -ENOMEM;
54 } 54 }
55 55
56 path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); 56 path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
57 if (!eb) { 57 if (!eb) {
58 test_msg("Could not allocate dummy buffer\n"); 58 test_msg("Could not allocate dummy buffer\n");
59 ret = -ENOMEM; 59 ret = -ENOMEM;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 7e99c2f98dd0..9e9f2368177d 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -258,8 +258,7 @@ static int test_find_delalloc(void)
258 } 258 }
259 ret = 0; 259 ret = 0;
260out_bits: 260out_bits:
261 clear_extent_bits(&tmp, 0, total_dirty - 1, 261 clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
262 (unsigned long)-1, GFP_NOFS);
263out: 262out:
264 if (locked_page) 263 if (locked_page)
265 page_cache_release(locked_page); 264 page_cache_release(locked_page);
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 3ae0f5b8bb80..a116b55ce788 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void)
255 goto out; 255 goto out;
256 } 256 }
257 257
258 root->node = alloc_dummy_extent_buffer(0, 4096); 258 root->node = alloc_dummy_extent_buffer(NULL, 4096);
259 if (!root->node) { 259 if (!root->node) {
260 test_msg("Couldn't allocate dummy buffer\n"); 260 test_msg("Couldn't allocate dummy buffer\n");
261 goto out; 261 goto out;
@@ -843,7 +843,7 @@ static int test_hole_first(void)
843 goto out; 843 goto out;
844 } 844 }
845 845
846 root->node = alloc_dummy_extent_buffer(0, 4096); 846 root->node = alloc_dummy_extent_buffer(NULL, 4096);
847 if (!root->node) { 847 if (!root->node) {
848 test_msg("Couldn't allocate dummy buffer\n"); 848 test_msg("Couldn't allocate dummy buffer\n");
849 goto out; 849 goto out;
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ec3dcb202357..73f299ebdabb 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -404,12 +404,22 @@ int btrfs_test_qgroups(void)
404 ret = -ENOMEM; 404 ret = -ENOMEM;
405 goto out; 405 goto out;
406 } 406 }
407 /* We are using this root as our extent root */
408 root->fs_info->extent_root = root;
409
410 /*
411 * Some of the paths we test assume we have a filled out fs_info, so we
412 * just need to add the root in there so we don't panic.
413 */
414 root->fs_info->tree_root = root;
415 root->fs_info->quota_root = root;
416 root->fs_info->quota_enabled = 1;
407 417
408 /* 418 /*
409 * Can't use bytenr 0, some things freak out 419 * Can't use bytenr 0, some things freak out
410 * *cough*backref walking code*cough* 420 * *cough*backref walking code*cough*
411 */ 421 */
412 root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); 422 root->node = alloc_test_extent_buffer(root->fs_info, 4096);
413 if (!root->node) { 423 if (!root->node) {
414 test_msg("Couldn't allocate dummy buffer\n"); 424 test_msg("Couldn't allocate dummy buffer\n");
415 ret = -ENOMEM; 425 ret = -ENOMEM;
@@ -448,17 +458,6 @@ int btrfs_test_qgroups(void)
448 goto out; 458 goto out;
449 } 459 }
450 460
451 /* We are using this root as our extent root */
452 root->fs_info->extent_root = root;
453
454 /*
455 * Some of the paths we test assume we have a filled out fs_info, so we
456 * just need to addt he root in there so we don't panic.
457 */
458 root->fs_info->tree_root = root;
459 root->fs_info->quota_root = root;
460 root->fs_info->quota_enabled = 1;
461
462 test_msg("Running qgroup tests\n"); 461 test_msg("Running qgroup tests\n");
463 ret = test_no_shared_qgroup(root); 462 ret = test_no_shared_qgroup(root);
464 if (ret) 463 if (ret)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e88b59d13439..7e80f32550a6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -220,6 +220,7 @@ loop:
220 * commit the transaction. 220 * commit the transaction.
221 */ 221 */
222 atomic_set(&cur_trans->use_count, 2); 222 atomic_set(&cur_trans->use_count, 2);
223 cur_trans->have_free_bgs = 0;
223 cur_trans->start_time = get_seconds(); 224 cur_trans->start_time = get_seconds();
224 225
225 cur_trans->delayed_refs.href_root = RB_ROOT; 226 cur_trans->delayed_refs.href_root = RB_ROOT;
@@ -248,6 +249,8 @@ loop:
248 INIT_LIST_HEAD(&cur_trans->pending_chunks); 249 INIT_LIST_HEAD(&cur_trans->pending_chunks);
249 INIT_LIST_HEAD(&cur_trans->switch_commits); 250 INIT_LIST_HEAD(&cur_trans->switch_commits);
250 INIT_LIST_HEAD(&cur_trans->pending_ordered); 251 INIT_LIST_HEAD(&cur_trans->pending_ordered);
252 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
253 spin_lock_init(&cur_trans->dirty_bgs_lock);
251 list_add_tail(&cur_trans->list, &fs_info->trans_list); 254 list_add_tail(&cur_trans->list, &fs_info->trans_list);
252 extent_io_tree_init(&cur_trans->dirty_pages, 255 extent_io_tree_init(&cur_trans->dirty_pages,
253 fs_info->btree_inode->i_mapping); 256 fs_info->btree_inode->i_mapping);
@@ -1020,6 +1023,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1020 u64 old_root_bytenr; 1023 u64 old_root_bytenr;
1021 u64 old_root_used; 1024 u64 old_root_used;
1022 struct btrfs_root *tree_root = root->fs_info->tree_root; 1025 struct btrfs_root *tree_root = root->fs_info->tree_root;
1026 bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
1023 1027
1024 old_root_used = btrfs_root_used(&root->root_item); 1028 old_root_used = btrfs_root_used(&root->root_item);
1025 btrfs_write_dirty_block_groups(trans, root); 1029 btrfs_write_dirty_block_groups(trans, root);
@@ -1027,7 +1031,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1027 while (1) { 1031 while (1) {
1028 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1032 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1029 if (old_root_bytenr == root->node->start && 1033 if (old_root_bytenr == root->node->start &&
1030 old_root_used == btrfs_root_used(&root->root_item)) 1034 old_root_used == btrfs_root_used(&root->root_item) &&
1035 (!extent_root ||
1036 list_empty(&trans->transaction->dirty_bgs)))
1031 break; 1037 break;
1032 1038
1033 btrfs_set_root_node(&root->root_item, root->node); 1039 btrfs_set_root_node(&root->root_item, root->node);
@@ -1038,7 +1044,15 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1038 return ret; 1044 return ret;
1039 1045
1040 old_root_used = btrfs_root_used(&root->root_item); 1046 old_root_used = btrfs_root_used(&root->root_item);
1041 ret = btrfs_write_dirty_block_groups(trans, root); 1047 if (extent_root) {
1048 ret = btrfs_write_dirty_block_groups(trans, root);
1049 if (ret)
1050 return ret;
1051 }
1052 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1053 if (ret)
1054 return ret;
1055 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1042 if (ret) 1056 if (ret)
1043 return ret; 1057 return ret;
1044 } 1058 }
@@ -1061,10 +1075,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1061 struct extent_buffer *eb; 1075 struct extent_buffer *eb;
1062 int ret; 1076 int ret;
1063 1077
1064 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1065 if (ret)
1066 return ret;
1067
1068 eb = btrfs_lock_root_node(fs_info->tree_root); 1078 eb = btrfs_lock_root_node(fs_info->tree_root);
1069 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 1079 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
1070 0, &eb); 1080 0, &eb);
@@ -1097,6 +1107,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1097 next = fs_info->dirty_cowonly_roots.next; 1107 next = fs_info->dirty_cowonly_roots.next;
1098 list_del_init(next); 1108 list_del_init(next);
1099 root = list_entry(next, struct btrfs_root, dirty_list); 1109 root = list_entry(next, struct btrfs_root, dirty_list);
1110 clear_bit(BTRFS_ROOT_DIRTY, &root->state);
1100 1111
1101 if (root != fs_info->extent_root) 1112 if (root != fs_info->extent_root)
1102 list_add_tail(&root->dirty_list, 1113 list_add_tail(&root->dirty_list,
@@ -1983,6 +1994,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1983 switch_commit_roots(cur_trans, root->fs_info); 1994 switch_commit_roots(cur_trans, root->fs_info);
1984 1995
1985 assert_qgroups_uptodate(trans); 1996 assert_qgroups_uptodate(trans);
1997 ASSERT(list_empty(&cur_trans->dirty_bgs));
1986 update_super_roots(root); 1998 update_super_roots(root);
1987 1999
1988 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 2000 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
@@ -2026,6 +2038,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2026 2038
2027 btrfs_finish_extent_commit(trans, root); 2039 btrfs_finish_extent_commit(trans, root);
2028 2040
2041 if (cur_trans->have_free_bgs)
2042 btrfs_clear_space_info_full(root->fs_info);
2043
2029 root->fs_info->last_trans_committed = cur_trans->transid; 2044 root->fs_info->last_trans_committed = cur_trans->transid;
2030 /* 2045 /*
2031 * We needn't acquire the lock here because there is no other task 2046 * We needn't acquire the lock here because there is no other task
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 00ed29c4b3f9..937050a2b68e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -47,6 +47,11 @@ struct btrfs_transaction {
47 atomic_t num_writers; 47 atomic_t num_writers;
48 atomic_t use_count; 48 atomic_t use_count;
49 49
50 /*
51 * true if there is free bgs operations in this transaction
52 */
53 int have_free_bgs;
54
50 /* Be protected by fs_info->trans_lock when we want to change it. */ 55 /* Be protected by fs_info->trans_lock when we want to change it. */
51 enum btrfs_trans_state state; 56 enum btrfs_trans_state state;
52 struct list_head list; 57 struct list_head list;
@@ -58,6 +63,8 @@ struct btrfs_transaction {
58 struct list_head pending_chunks; 63 struct list_head pending_chunks;
59 struct list_head pending_ordered; 64 struct list_head pending_ordered;
60 struct list_head switch_commits; 65 struct list_head switch_commits;
66 struct list_head dirty_bgs;
67 spinlock_t dirty_bgs_lock;
61 struct btrfs_delayed_ref_root delayed_refs; 68 struct btrfs_delayed_ref_root delayed_refs;
62 int aborted; 69 int aborted;
63}; 70};
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1a9585d4380a..9a37f8b39bae 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
453insert: 453insert:
454 btrfs_release_path(path); 454 btrfs_release_path(path);
455 /* try to insert the key into the destination tree */ 455 /* try to insert the key into the destination tree */
456 path->skip_release_on_error = 1;
456 ret = btrfs_insert_empty_item(trans, root, path, 457 ret = btrfs_insert_empty_item(trans, root, path,
457 key, item_size); 458 key, item_size);
459 path->skip_release_on_error = 0;
458 460
459 /* make sure any existing item is the correct size */ 461 /* make sure any existing item is the correct size */
460 if (ret == -EEXIST) { 462 if (ret == -EEXIST || ret == -EOVERFLOW) {
461 u32 found_size; 463 u32 found_size;
462 found_size = btrfs_item_size_nr(path->nodes[0], 464 found_size = btrfs_item_size_nr(path->nodes[0],
463 path->slots[0]); 465 path->slots[0]);
@@ -488,8 +490,20 @@ insert:
488 src_item = (struct btrfs_inode_item *)src_ptr; 490 src_item = (struct btrfs_inode_item *)src_ptr;
489 dst_item = (struct btrfs_inode_item *)dst_ptr; 491 dst_item = (struct btrfs_inode_item *)dst_ptr;
490 492
491 if (btrfs_inode_generation(eb, src_item) == 0) 493 if (btrfs_inode_generation(eb, src_item) == 0) {
494 struct extent_buffer *dst_eb = path->nodes[0];
495
496 if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497 S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
498 struct btrfs_map_token token;
499 u64 ino_size = btrfs_inode_size(eb, src_item);
500
501 btrfs_init_map_token(&token);
502 btrfs_set_token_inode_size(dst_eb, dst_item,
503 ino_size, &token);
504 }
492 goto no_copy; 505 goto no_copy;
506 }
493 507
494 if (overwrite_root && 508 if (overwrite_root &&
495 S_ISDIR(btrfs_inode_mode(eb, src_item)) && 509 S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
@@ -844,7 +858,7 @@ out:
844static noinline int backref_in_log(struct btrfs_root *log, 858static noinline int backref_in_log(struct btrfs_root *log,
845 struct btrfs_key *key, 859 struct btrfs_key *key,
846 u64 ref_objectid, 860 u64 ref_objectid,
847 char *name, int namelen) 861 const char *name, int namelen)
848{ 862{
849 struct btrfs_path *path; 863 struct btrfs_path *path;
850 struct btrfs_inode_ref *ref; 864 struct btrfs_inode_ref *ref;
@@ -1254,13 +1268,14 @@ out:
1254} 1268}
1255 1269
1256static int insert_orphan_item(struct btrfs_trans_handle *trans, 1270static int insert_orphan_item(struct btrfs_trans_handle *trans,
1257 struct btrfs_root *root, u64 offset) 1271 struct btrfs_root *root, u64 ino)
1258{ 1272{
1259 int ret; 1273 int ret;
1260 ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID, 1274
1261 offset, BTRFS_ORPHAN_ITEM_KEY, NULL); 1275 ret = btrfs_insert_orphan_item(trans, root, ino);
1262 if (ret > 0) 1276 if (ret == -EEXIST)
1263 ret = btrfs_insert_orphan_item(trans, root, offset); 1277 ret = 0;
1278
1264 return ret; 1279 return ret;
1265} 1280}
1266 1281
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
1287 leaf = path->nodes[0]; 1302 leaf = path->nodes[0];
1288 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1303 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1289 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1304 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1305 cur_offset = 0;
1290 1306
1291 while (cur_offset < item_size) { 1307 while (cur_offset < item_size) {
1292 extref = (struct btrfs_inode_extref *) (ptr + cur_offset); 1308 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
1302 } 1318 }
1303 btrfs_release_path(path); 1319 btrfs_release_path(path);
1304 1320
1305 if (ret < 0) 1321 if (ret < 0 && ret != -ENOENT)
1306 return ret; 1322 return ret;
1307 return nlink; 1323 return nlink;
1308} 1324}
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1394 nlink = ret; 1410 nlink = ret;
1395 1411
1396 ret = count_inode_extrefs(root, inode, path); 1412 ret = count_inode_extrefs(root, inode, path);
1397 if (ret == -ENOENT)
1398 ret = 0;
1399
1400 if (ret < 0) 1413 if (ret < 0)
1401 goto out; 1414 goto out;
1402 1415
@@ -1557,6 +1570,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1557} 1570}
1558 1571
1559/* 1572/*
1573 * Return true if an inode reference exists in the log for the given name,
1574 * inode and parent inode.
1575 */
1576static bool name_in_log_ref(struct btrfs_root *log_root,
1577 const char *name, const int name_len,
1578 const u64 dirid, const u64 ino)
1579{
1580 struct btrfs_key search_key;
1581
1582 search_key.objectid = ino;
1583 search_key.type = BTRFS_INODE_REF_KEY;
1584 search_key.offset = dirid;
1585 if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1586 return true;
1587
1588 search_key.type = BTRFS_INODE_EXTREF_KEY;
1589 search_key.offset = btrfs_extref_hash(dirid, name, name_len);
1590 if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1591 return true;
1592
1593 return false;
1594}
1595
1596/*
1560 * take a single entry in a log directory item and replay it into 1597 * take a single entry in a log directory item and replay it into
1561 * the subvolume. 1598 * the subvolume.
1562 * 1599 *
@@ -1666,10 +1703,17 @@ out:
1666 return ret; 1703 return ret;
1667 1704
1668insert: 1705insert:
1706 if (name_in_log_ref(root->log_root, name, name_len,
1707 key->objectid, log_key.objectid)) {
1708 /* The dentry will be added later. */
1709 ret = 0;
1710 update_size = false;
1711 goto out;
1712 }
1669 btrfs_release_path(path); 1713 btrfs_release_path(path);
1670 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1714 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1671 name, name_len, log_type, &log_key); 1715 name, name_len, log_type, &log_key);
1672 if (ret && ret != -ENOENT) 1716 if (ret && ret != -ENOENT && ret != -EEXIST)
1673 goto out; 1717 goto out;
1674 update_size = false; 1718 update_size = false;
1675 ret = 0; 1719 ret = 0;
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2164 parent = path->nodes[*level]; 2208 parent = path->nodes[*level];
2165 root_owner = btrfs_header_owner(parent); 2209 root_owner = btrfs_header_owner(parent);
2166 2210
2167 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 2211 next = btrfs_find_create_tree_block(root, bytenr);
2168 if (!next) 2212 if (!next)
2169 return -ENOMEM; 2213 return -ENOMEM;
2170 2214
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
2416 mutex_unlock(&root->log_mutex); 2460 mutex_unlock(&root->log_mutex);
2417 if (atomic_read(&root->log_writers)) 2461 if (atomic_read(&root->log_writers))
2418 schedule(); 2462 schedule();
2419 mutex_lock(&root->log_mutex);
2420 finish_wait(&root->log_writer_wait, &wait); 2463 finish_wait(&root->log_writer_wait, &wait);
2464 mutex_lock(&root->log_mutex);
2421 } 2465 }
2422} 2466}
2423 2467
@@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
3219static void fill_inode_item(struct btrfs_trans_handle *trans, 3263static void fill_inode_item(struct btrfs_trans_handle *trans,
3220 struct extent_buffer *leaf, 3264 struct extent_buffer *leaf,
3221 struct btrfs_inode_item *item, 3265 struct btrfs_inode_item *item,
3222 struct inode *inode, int log_inode_only) 3266 struct inode *inode, int log_inode_only,
3267 u64 logged_isize)
3223{ 3268{
3224 struct btrfs_map_token token; 3269 struct btrfs_map_token token;
3225 3270
@@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3232 * to say 'update this inode with these values' 3277 * to say 'update this inode with these values'
3233 */ 3278 */
3234 btrfs_set_token_inode_generation(leaf, item, 0, &token); 3279 btrfs_set_token_inode_generation(leaf, item, 0, &token);
3235 btrfs_set_token_inode_size(leaf, item, 0, &token); 3280 btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
3236 } else { 3281 } else {
3237 btrfs_set_token_inode_generation(leaf, item, 3282 btrfs_set_token_inode_generation(leaf, item,
3238 BTRFS_I(inode)->generation, 3283 BTRFS_I(inode)->generation,
@@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3245 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 3290 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3246 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 3291 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3247 3292
3248 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 3293 btrfs_set_token_timespec_sec(leaf, &item->atime,
3249 inode->i_atime.tv_sec, &token); 3294 inode->i_atime.tv_sec, &token);
3250 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 3295 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3251 inode->i_atime.tv_nsec, &token); 3296 inode->i_atime.tv_nsec, &token);
3252 3297
3253 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 3298 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3254 inode->i_mtime.tv_sec, &token); 3299 inode->i_mtime.tv_sec, &token);
3255 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 3300 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3256 inode->i_mtime.tv_nsec, &token); 3301 inode->i_mtime.tv_nsec, &token);
3257 3302
3258 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 3303 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3259 inode->i_ctime.tv_sec, &token); 3304 inode->i_ctime.tv_sec, &token);
3260 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 3305 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3261 inode->i_ctime.tv_nsec, &token); 3306 inode->i_ctime.tv_nsec, &token);
3262 3307
3263 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 3308 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
@@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
3284 return ret; 3329 return ret;
3285 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3330 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3286 struct btrfs_inode_item); 3331 struct btrfs_inode_item);
3287 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); 3332 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
3288 btrfs_release_path(path); 3333 btrfs_release_path(path);
3289 return 0; 3334 return 0;
3290} 3335}
@@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3293 struct inode *inode, 3338 struct inode *inode,
3294 struct btrfs_path *dst_path, 3339 struct btrfs_path *dst_path,
3295 struct btrfs_path *src_path, u64 *last_extent, 3340 struct btrfs_path *src_path, u64 *last_extent,
3296 int start_slot, int nr, int inode_only) 3341 int start_slot, int nr, int inode_only,
3342 u64 logged_isize)
3297{ 3343{
3298 unsigned long src_offset; 3344 unsigned long src_offset;
3299 unsigned long dst_offset; 3345 unsigned long dst_offset;
@@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3350 dst_path->slots[0], 3396 dst_path->slots[0],
3351 struct btrfs_inode_item); 3397 struct btrfs_inode_item);
3352 fill_inode_item(trans, dst_path->nodes[0], inode_item, 3398 fill_inode_item(trans, dst_path->nodes[0], inode_item,
3353 inode, inode_only == LOG_INODE_EXISTS); 3399 inode, inode_only == LOG_INODE_EXISTS,
3400 logged_isize);
3354 } else { 3401 } else {
3355 copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 3402 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
3356 src_offset, ins_sizes[i]); 3403 src_offset, ins_sizes[i]);
@@ -3902,6 +3949,33 @@ process:
3902 return ret; 3949 return ret;
3903} 3950}
3904 3951
3952static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3953 struct btrfs_path *path, u64 *size_ret)
3954{
3955 struct btrfs_key key;
3956 int ret;
3957
3958 key.objectid = btrfs_ino(inode);
3959 key.type = BTRFS_INODE_ITEM_KEY;
3960 key.offset = 0;
3961
3962 ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
3963 if (ret < 0) {
3964 return ret;
3965 } else if (ret > 0) {
3966 *size_ret = i_size_read(inode);
3967 } else {
3968 struct btrfs_inode_item *item;
3969
3970 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3971 struct btrfs_inode_item);
3972 *size_ret = btrfs_inode_size(path->nodes[0], item);
3973 }
3974
3975 btrfs_release_path(path);
3976 return 0;
3977}
3978
3905/* log a single inode in the tree log. 3979/* log a single inode in the tree log.
3906 * At least one parent directory for this inode must exist in the tree 3980 * At least one parent directory for this inode must exist in the tree
3907 * or be logged already. 3981 * or be logged already.
@@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3939 bool fast_search = false; 4013 bool fast_search = false;
3940 u64 ino = btrfs_ino(inode); 4014 u64 ino = btrfs_ino(inode);
3941 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4015 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4016 u64 logged_isize = 0;
3942 4017
3943 path = btrfs_alloc_path(); 4018 path = btrfs_alloc_path();
3944 if (!path) 4019 if (!path)
@@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3966 max_key.type = (u8)-1; 4041 max_key.type = (u8)-1;
3967 max_key.offset = (u64)-1; 4042 max_key.offset = (u64)-1;
3968 4043
3969 /* Only run delayed items if we are a dir or a new file */ 4044 /*
4045 * Only run delayed items if we are a dir or a new file.
4046 * Otherwise commit the delayed inode only, which is needed in
4047 * order for the log replay code to mark inodes for link count
4048 * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
4049 */
3970 if (S_ISDIR(inode->i_mode) || 4050 if (S_ISDIR(inode->i_mode) ||
3971 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { 4051 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
3972 ret = btrfs_commit_inode_delayed_items(trans, inode); 4052 ret = btrfs_commit_inode_delayed_items(trans, inode);
3973 if (ret) { 4053 else
3974 btrfs_free_path(path); 4054 ret = btrfs_commit_inode_delayed_inode(inode);
3975 btrfs_free_path(dst_path); 4055
3976 return ret; 4056 if (ret) {
3977 } 4057 btrfs_free_path(path);
4058 btrfs_free_path(dst_path);
4059 return ret;
3978 } 4060 }
3979 4061
3980 mutex_lock(&BTRFS_I(inode)->log_mutex); 4062 mutex_lock(&BTRFS_I(inode)->log_mutex);
@@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3988 if (S_ISDIR(inode->i_mode)) { 4070 if (S_ISDIR(inode->i_mode)) {
3989 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4071 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
3990 4072
3991 if (inode_only == LOG_INODE_EXISTS) 4073 if (inode_only == LOG_INODE_EXISTS) {
3992 max_key_type = BTRFS_XATTR_ITEM_KEY; 4074 max_key_type = BTRFS_INODE_EXTREF_KEY;
4075 max_key.type = max_key_type;
4076 }
3993 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4077 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
3994 } else { 4078 } else {
3995 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4079 if (inode_only == LOG_INODE_EXISTS) {
3996 &BTRFS_I(inode)->runtime_flags)) { 4080 /*
3997 clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4081 * Make sure the new inode item we write to the log has
3998 &BTRFS_I(inode)->runtime_flags); 4082 * the same isize as the current one (if it exists).
3999 ret = btrfs_truncate_inode_items(trans, log, 4083 * This is necessary to prevent data loss after log
4000 inode, 0, 0); 4084 * replay, and also to prevent doing a wrong expanding
4001 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4085 * truncate - for e.g. create file, write 4K into offset
4002 &BTRFS_I(inode)->runtime_flags) || 4086 * 0, fsync, write 4K into offset 4096, add hard link,
4087 * fsync some other file (to sync log), power fail - if
4088 * we use the inode's current i_size, after log replay
4089 * we get a 8Kb file, with the last 4Kb extent as a hole
4090 * (zeroes), as if an expanding truncate happened,
4091 * instead of getting a file of 4Kb only.
4092 */
4093 err = logged_inode_size(log, inode, path,
4094 &logged_isize);
4095 if (err)
4096 goto out_unlock;
4097 }
4098 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4099 &BTRFS_I(inode)->runtime_flags)) {
4100 if (inode_only == LOG_INODE_EXISTS) {
4101 max_key.type = BTRFS_INODE_EXTREF_KEY;
4102 ret = drop_objectid_items(trans, log, path, ino,
4103 max_key.type);
4104 } else {
4105 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4106 &BTRFS_I(inode)->runtime_flags);
4107 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4108 &BTRFS_I(inode)->runtime_flags);
4109 ret = btrfs_truncate_inode_items(trans, log,
4110 inode, 0, 0);
4111 }
4112 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
4113 &BTRFS_I(inode)->runtime_flags) ||
4003 inode_only == LOG_INODE_EXISTS) { 4114 inode_only == LOG_INODE_EXISTS) {
4004 if (inode_only == LOG_INODE_ALL) 4115 if (inode_only == LOG_INODE_ALL) {
4116 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4117 &BTRFS_I(inode)->runtime_flags);
4005 fast_search = true; 4118 fast_search = true;
4006 max_key.type = BTRFS_XATTR_ITEM_KEY; 4119 max_key.type = BTRFS_XATTR_ITEM_KEY;
4120 } else {
4121 max_key.type = BTRFS_INODE_EXTREF_KEY;
4122 }
4007 ret = drop_objectid_items(trans, log, path, ino, 4123 ret = drop_objectid_items(trans, log, path, ino,
4008 max_key.type); 4124 max_key.type);
4009 } else { 4125 } else {
@@ -4047,7 +4163,8 @@ again:
4047 } 4163 }
4048 4164
4049 ret = copy_items(trans, inode, dst_path, path, &last_extent, 4165 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4050 ins_start_slot, ins_nr, inode_only); 4166 ins_start_slot, ins_nr, inode_only,
4167 logged_isize);
4051 if (ret < 0) { 4168 if (ret < 0) {
4052 err = ret; 4169 err = ret;
4053 goto out_unlock; 4170 goto out_unlock;
@@ -4071,7 +4188,7 @@ next_slot:
4071 if (ins_nr) { 4188 if (ins_nr) {
4072 ret = copy_items(trans, inode, dst_path, path, 4189 ret = copy_items(trans, inode, dst_path, path,
4073 &last_extent, ins_start_slot, 4190 &last_extent, ins_start_slot,
4074 ins_nr, inode_only); 4191 ins_nr, inode_only, logged_isize);
4075 if (ret < 0) { 4192 if (ret < 0) {
4076 err = ret; 4193 err = ret;
4077 goto out_unlock; 4194 goto out_unlock;
@@ -4092,7 +4209,8 @@ next_slot:
4092 } 4209 }
4093 if (ins_nr) { 4210 if (ins_nr) {
4094 ret = copy_items(trans, inode, dst_path, path, &last_extent, 4211 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4095 ins_start_slot, ins_nr, inode_only); 4212 ins_start_slot, ins_nr, inode_only,
4213 logged_isize);
4096 if (ret < 0) { 4214 if (ret < 0) {
4097 err = ret; 4215 err = ret;
4098 goto out_unlock; 4216 goto out_unlock;
@@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4273 struct dentry *old_parent = NULL; 4391 struct dentry *old_parent = NULL;
4274 int ret = 0; 4392 int ret = 0;
4275 u64 last_committed = root->fs_info->last_trans_committed; 4393 u64 last_committed = root->fs_info->last_trans_committed;
4394 const struct dentry * const first_parent = parent;
4395 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4396 last_committed);
4276 4397
4277 sb = inode->i_sb; 4398 sb = inode->i_sb;
4278 4399
@@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4328 goto end_trans; 4449 goto end_trans;
4329 } 4450 }
4330 4451
4331 inode_only = LOG_INODE_EXISTS;
4332 while (1) { 4452 while (1) {
4333 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4453 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
4334 break; 4454 break;
@@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4337 if (root != BTRFS_I(inode)->root) 4457 if (root != BTRFS_I(inode)->root)
4338 break; 4458 break;
4339 4459
4460 /*
4461 * On unlink we must make sure our immediate parent directory
4462 * inode is fully logged. This is to prevent leaving dangling
4463 * directory index entries and a wrong directory inode's i_size.
4464 * Not doing so can result in a directory being impossible to
4465 * delete after log replay (rmdir will always fail with error
4466 * -ENOTEMPTY).
4467 */
4468 if (did_unlink && parent == first_parent)
4469 inode_only = LOG_INODE_ALL;
4470 else
4471 inode_only = LOG_INODE_EXISTS;
4472
4340 if (BTRFS_I(inode)->generation > 4473 if (BTRFS_I(inode)->generation >
4341 root->fs_info->last_trans_committed) { 4474 root->fs_info->last_trans_committed ||
4475 inode_only == LOG_INODE_ALL) {
4342 ret = btrfs_log_inode(trans, root, inode, inode_only, 4476 ret = btrfs_log_inode(trans, root, inode, inode_only,
4343 0, LLONG_MAX, ctx); 4477 0, LLONG_MAX, ctx);
4344 if (ret) 4478 if (ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 50c5a8762aed..cd4d1315aaa9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1310,6 +1310,8 @@ again:
1310 if (ret) { 1310 if (ret) {
1311 btrfs_error(root->fs_info, ret, 1311 btrfs_error(root->fs_info, ret,
1312 "Failed to remove dev extent item"); 1312 "Failed to remove dev extent item");
1313 } else {
1314 trans->transaction->have_free_bgs = 1;
1313 } 1315 }
1314out: 1316out:
1315 btrfs_free_path(path); 1317 btrfs_free_path(path);
@@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
4196 4198
4197static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) 4199static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4198{ 4200{
4199 if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) 4201 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4200 return; 4202 return;
4201 4203
4202 btrfs_set_fs_incompat(info, RAID56); 4204 btrfs_set_fs_incompat(info, RAID56);
@@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
4803 4805
4804 BUG_ON(em->start > logical || em->start + em->len < logical); 4806 BUG_ON(em->start > logical || em->start + em->len < logical);
4805 map = (struct map_lookup *)em->bdev; 4807 map = (struct map_lookup *)em->bdev;
4806 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 4808 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
4807 BTRFS_BLOCK_GROUP_RAID6)) {
4808 len = map->stripe_len * nr_data_stripes(map); 4809 len = map->stripe_len * nr_data_stripes(map);
4809 }
4810 free_extent_map(em); 4810 free_extent_map(em);
4811 return len; 4811 return len;
4812} 4812}
@@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
4826 4826
4827 BUG_ON(em->start > logical || em->start + em->len < logical); 4827 BUG_ON(em->start > logical || em->start + em->len < logical);
4828 map = (struct map_lookup *)em->bdev; 4828 map = (struct map_lookup *)em->bdev;
4829 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 4829 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
4830 BTRFS_BLOCK_GROUP_RAID6))
4831 ret = 1; 4830 ret = 1;
4832 free_extent_map(em); 4831 free_extent_map(em);
4833 return ret; 4832 return ret;
@@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b)
4876} 4875}
4877 4876
4878/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ 4877/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
4879static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) 4878static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
4880{ 4879{
4881 struct btrfs_bio_stripe s; 4880 struct btrfs_bio_stripe s;
4882 int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
4883 int i; 4881 int i;
4884 u64 l; 4882 u64 l;
4885 int again = 1; 4883 int again = 1;
4886 int m;
4887 4884
4888 while (again) { 4885 while (again) {
4889 again = 0; 4886 again = 0;
4890 for (i = 0; i < real_stripes - 1; i++) { 4887 for (i = 0; i < num_stripes - 1; i++) {
4891 if (parity_smaller(raid_map[i], raid_map[i+1])) { 4888 if (parity_smaller(bbio->raid_map[i],
4889 bbio->raid_map[i+1])) {
4892 s = bbio->stripes[i]; 4890 s = bbio->stripes[i];
4893 l = raid_map[i]; 4891 l = bbio->raid_map[i];
4894 bbio->stripes[i] = bbio->stripes[i+1]; 4892 bbio->stripes[i] = bbio->stripes[i+1];
4895 raid_map[i] = raid_map[i+1]; 4893 bbio->raid_map[i] = bbio->raid_map[i+1];
4896 bbio->stripes[i+1] = s; 4894 bbio->stripes[i+1] = s;
4897 raid_map[i+1] = l; 4895 bbio->raid_map[i+1] = l;
4898
4899 if (bbio->tgtdev_map) {
4900 m = bbio->tgtdev_map[i];
4901 bbio->tgtdev_map[i] =
4902 bbio->tgtdev_map[i + 1];
4903 bbio->tgtdev_map[i + 1] = m;
4904 }
4905 4896
4906 again = 1; 4897 again = 1;
4907 } 4898 }
@@ -4909,10 +4900,41 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
4909 } 4900 }
4910} 4901}
4911 4902
4903static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
4904{
4905 struct btrfs_bio *bbio = kzalloc(
4906 sizeof(struct btrfs_bio) +
4907 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
4908 sizeof(int) * (real_stripes) +
4909 sizeof(u64) * (real_stripes),
4910 GFP_NOFS);
4911 if (!bbio)
4912 return NULL;
4913
4914 atomic_set(&bbio->error, 0);
4915 atomic_set(&bbio->refs, 1);
4916
4917 return bbio;
4918}
4919
4920void btrfs_get_bbio(struct btrfs_bio *bbio)
4921{
4922 WARN_ON(!atomic_read(&bbio->refs));
4923 atomic_inc(&bbio->refs);
4924}
4925
4926void btrfs_put_bbio(struct btrfs_bio *bbio)
4927{
4928 if (!bbio)
4929 return;
4930 if (atomic_dec_and_test(&bbio->refs))
4931 kfree(bbio);
4932}
4933
4912static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, 4934static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4913 u64 logical, u64 *length, 4935 u64 logical, u64 *length,
4914 struct btrfs_bio **bbio_ret, 4936 struct btrfs_bio **bbio_ret,
4915 int mirror_num, u64 **raid_map_ret) 4937 int mirror_num, int need_raid_map)
4916{ 4938{
4917 struct extent_map *em; 4939 struct extent_map *em;
4918 struct map_lookup *map; 4940 struct map_lookup *map;
@@ -4925,7 +4947,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4925 u64 stripe_nr_orig; 4947 u64 stripe_nr_orig;
4926 u64 stripe_nr_end; 4948 u64 stripe_nr_end;
4927 u64 stripe_len; 4949 u64 stripe_len;
4928 u64 *raid_map = NULL;
4929 int stripe_index; 4950 int stripe_index;
4930 int i; 4951 int i;
4931 int ret = 0; 4952 int ret = 0;
@@ -4976,7 +4997,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4976 stripe_offset = offset - stripe_offset; 4997 stripe_offset = offset - stripe_offset;
4977 4998
4978 /* if we're here for raid56, we need to know the stripe aligned start */ 4999 /* if we're here for raid56, we need to know the stripe aligned start */
4979 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { 5000 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
4980 unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); 5001 unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
4981 raid56_full_stripe_start = offset; 5002 raid56_full_stripe_start = offset;
4982 5003
@@ -4989,8 +5010,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4989 5010
4990 if (rw & REQ_DISCARD) { 5011 if (rw & REQ_DISCARD) {
4991 /* we don't discard raid56 yet */ 5012 /* we don't discard raid56 yet */
4992 if (map->type & 5013 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
4993 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
4994 ret = -EOPNOTSUPP; 5014 ret = -EOPNOTSUPP;
4995 goto out; 5015 goto out;
4996 } 5016 }
@@ -5000,7 +5020,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5000 /* For writes to RAID[56], allow a full stripeset across all disks. 5020 /* For writes to RAID[56], allow a full stripeset across all disks.
5001 For other RAID types and for RAID[56] reads, just allow a single 5021 For other RAID types and for RAID[56] reads, just allow a single
5002 stripe (on a single disk). */ 5022 stripe (on a single disk). */
5003 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) && 5023 if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
5004 (rw & REQ_WRITE)) { 5024 (rw & REQ_WRITE)) {
5005 max_len = stripe_len * nr_data_stripes(map) - 5025 max_len = stripe_len * nr_data_stripes(map) -
5006 (offset - raid56_full_stripe_start); 5026 (offset - raid56_full_stripe_start);
@@ -5047,7 +5067,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5047 u64 physical_of_found = 0; 5067 u64 physical_of_found = 0;
5048 5068
5049 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, 5069 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
5050 logical, &tmp_length, &tmp_bbio, 0, NULL); 5070 logical, &tmp_length, &tmp_bbio, 0, 0);
5051 if (ret) { 5071 if (ret) {
5052 WARN_ON(tmp_bbio != NULL); 5072 WARN_ON(tmp_bbio != NULL);
5053 goto out; 5073 goto out;
@@ -5061,7 +5081,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5061 * is not left of the left cursor 5081 * is not left of the left cursor
5062 */ 5082 */
5063 ret = -EIO; 5083 ret = -EIO;
5064 kfree(tmp_bbio); 5084 btrfs_put_bbio(tmp_bbio);
5065 goto out; 5085 goto out;
5066 } 5086 }
5067 5087
@@ -5096,11 +5116,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5096 } else { 5116 } else {
5097 WARN_ON(1); 5117 WARN_ON(1);
5098 ret = -EIO; 5118 ret = -EIO;
5099 kfree(tmp_bbio); 5119 btrfs_put_bbio(tmp_bbio);
5100 goto out; 5120 goto out;
5101 } 5121 }
5102 5122
5103 kfree(tmp_bbio); 5123 btrfs_put_bbio(tmp_bbio);
5104 } else if (mirror_num > map->num_stripes) { 5124 } else if (mirror_num > map->num_stripes) {
5105 mirror_num = 0; 5125 mirror_num = 0;
5106 } 5126 }
@@ -5166,15 +5186,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5166 mirror_num = stripe_index - old_stripe_index + 1; 5186 mirror_num = stripe_index - old_stripe_index + 1;
5167 } 5187 }
5168 5188
5169 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 5189 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5170 BTRFS_BLOCK_GROUP_RAID6)) { 5190 if (need_raid_map &&
5171 u64 tmp;
5172
5173 if (raid_map_ret &&
5174 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || 5191 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5175 mirror_num > 1)) { 5192 mirror_num > 1)) {
5176 int i, rot;
5177
5178 /* push stripe_nr back to the start of the full stripe */ 5193 /* push stripe_nr back to the start of the full stripe */
5179 stripe_nr = raid56_full_stripe_start; 5194 stripe_nr = raid56_full_stripe_start;
5180 do_div(stripe_nr, stripe_len * nr_data_stripes(map)); 5195 do_div(stripe_nr, stripe_len * nr_data_stripes(map));
@@ -5183,32 +5198,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5183 num_stripes = map->num_stripes; 5198 num_stripes = map->num_stripes;
5184 max_errors = nr_parity_stripes(map); 5199 max_errors = nr_parity_stripes(map);
5185 5200
5186 raid_map = kmalloc_array(num_stripes, sizeof(u64),
5187 GFP_NOFS);
5188 if (!raid_map) {
5189 ret = -ENOMEM;
5190 goto out;
5191 }
5192
5193 /* Work out the disk rotation on this stripe-set */
5194 tmp = stripe_nr;
5195 rot = do_div(tmp, num_stripes);
5196
5197 /* Fill in the logical address of each stripe */
5198 tmp = stripe_nr * nr_data_stripes(map);
5199 for (i = 0; i < nr_data_stripes(map); i++)
5200 raid_map[(i+rot) % num_stripes] =
5201 em->start + (tmp + i) * map->stripe_len;
5202
5203 raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
5204 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5205 raid_map[(i+rot+1) % num_stripes] =
5206 RAID6_Q_STRIPE;
5207
5208 *length = map->stripe_len; 5201 *length = map->stripe_len;
5209 stripe_index = 0; 5202 stripe_index = 0;
5210 stripe_offset = 0; 5203 stripe_offset = 0;
5211 } else { 5204 } else {
5205 u64 tmp;
5206
5212 /* 5207 /*
5213 * Mirror #0 or #1 means the original data block. 5208 * Mirror #0 or #1 means the original data block.
5214 * Mirror #2 is RAID5 parity block. 5209 * Mirror #2 is RAID5 parity block.
@@ -5246,17 +5241,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5246 tgtdev_indexes = num_stripes; 5241 tgtdev_indexes = num_stripes;
5247 } 5242 }
5248 5243
5249 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), 5244 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
5250 GFP_NOFS);
5251 if (!bbio) { 5245 if (!bbio) {
5252 kfree(raid_map);
5253 ret = -ENOMEM; 5246 ret = -ENOMEM;
5254 goto out; 5247 goto out;
5255 } 5248 }
5256 atomic_set(&bbio->error, 0);
5257 if (dev_replace_is_ongoing) 5249 if (dev_replace_is_ongoing)
5258 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); 5250 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
5259 5251
5252 /* build raid_map */
5253 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
5254 need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5255 mirror_num > 1)) {
5256 u64 tmp;
5257 int i, rot;
5258
5259 bbio->raid_map = (u64 *)((void *)bbio->stripes +
5260 sizeof(struct btrfs_bio_stripe) *
5261 num_alloc_stripes +
5262 sizeof(int) * tgtdev_indexes);
5263
5264 /* Work out the disk rotation on this stripe-set */
5265 tmp = stripe_nr;
5266 rot = do_div(tmp, num_stripes);
5267
5268 /* Fill in the logical address of each stripe */
5269 tmp = stripe_nr * nr_data_stripes(map);
5270 for (i = 0; i < nr_data_stripes(map); i++)
5271 bbio->raid_map[(i+rot) % num_stripes] =
5272 em->start + (tmp + i) * map->stripe_len;
5273
5274 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
5275 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5276 bbio->raid_map[(i+rot+1) % num_stripes] =
5277 RAID6_Q_STRIPE;
5278 }
5279
5260 if (rw & REQ_DISCARD) { 5280 if (rw & REQ_DISCARD) {
5261 int factor = 0; 5281 int factor = 0;
5262 int sub_stripes = 0; 5282 int sub_stripes = 0;
@@ -5340,6 +5360,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5340 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) 5360 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
5341 max_errors = btrfs_chunk_max_errors(map); 5361 max_errors = btrfs_chunk_max_errors(map);
5342 5362
5363 if (bbio->raid_map)
5364 sort_parity_stripes(bbio, num_stripes);
5365
5343 tgtdev_indexes = 0; 5366 tgtdev_indexes = 0;
5344 if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && 5367 if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
5345 dev_replace->tgtdev != NULL) { 5368 dev_replace->tgtdev != NULL) {
@@ -5427,6 +5450,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5427 } 5450 }
5428 5451
5429 *bbio_ret = bbio; 5452 *bbio_ret = bbio;
5453 bbio->map_type = map->type;
5430 bbio->num_stripes = num_stripes; 5454 bbio->num_stripes = num_stripes;
5431 bbio->max_errors = max_errors; 5455 bbio->max_errors = max_errors;
5432 bbio->mirror_num = mirror_num; 5456 bbio->mirror_num = mirror_num;
@@ -5443,10 +5467,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5443 bbio->stripes[0].physical = physical_to_patch_in_first_stripe; 5467 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
5444 bbio->mirror_num = map->num_stripes + 1; 5468 bbio->mirror_num = map->num_stripes + 1;
5445 } 5469 }
5446 if (raid_map) {
5447 sort_parity_stripes(bbio, raid_map);
5448 *raid_map_ret = raid_map;
5449 }
5450out: 5470out:
5451 if (dev_replace_is_ongoing) 5471 if (dev_replace_is_ongoing)
5452 btrfs_dev_replace_unlock(dev_replace); 5472 btrfs_dev_replace_unlock(dev_replace);
@@ -5459,17 +5479,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5459 struct btrfs_bio **bbio_ret, int mirror_num) 5479 struct btrfs_bio **bbio_ret, int mirror_num)
5460{ 5480{
5461 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, 5481 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
5462 mirror_num, NULL); 5482 mirror_num, 0);
5463} 5483}
5464 5484
5465/* For Scrub/replace */ 5485/* For Scrub/replace */
5466int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, 5486int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
5467 u64 logical, u64 *length, 5487 u64 logical, u64 *length,
5468 struct btrfs_bio **bbio_ret, int mirror_num, 5488 struct btrfs_bio **bbio_ret, int mirror_num,
5469 u64 **raid_map_ret) 5489 int need_raid_map)
5470{ 5490{
5471 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, 5491 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
5472 mirror_num, raid_map_ret); 5492 mirror_num, need_raid_map);
5473} 5493}
5474 5494
5475int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 5495int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -5511,8 +5531,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5511 do_div(length, map->num_stripes / map->sub_stripes); 5531 do_div(length, map->num_stripes / map->sub_stripes);
5512 else if (map->type & BTRFS_BLOCK_GROUP_RAID0) 5532 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5513 do_div(length, map->num_stripes); 5533 do_div(length, map->num_stripes);
5514 else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 5534 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5515 BTRFS_BLOCK_GROUP_RAID6)) {
5516 do_div(length, nr_data_stripes(map)); 5535 do_div(length, nr_data_stripes(map));
5517 rmap_len = map->stripe_len * nr_data_stripes(map); 5536 rmap_len = map->stripe_len * nr_data_stripes(map);
5518 } 5537 }
@@ -5565,7 +5584,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
5565 bio_endio_nodec(bio, err); 5584 bio_endio_nodec(bio, err);
5566 else 5585 else
5567 bio_endio(bio, err); 5586 bio_endio(bio, err);
5568 kfree(bbio); 5587 btrfs_put_bbio(bbio);
5569} 5588}
5570 5589
5571static void btrfs_end_bio(struct bio *bio, int err) 5590static void btrfs_end_bio(struct bio *bio, int err)
@@ -5808,7 +5827,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5808 u64 logical = (u64)bio->bi_iter.bi_sector << 9; 5827 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
5809 u64 length = 0; 5828 u64 length = 0;
5810 u64 map_length; 5829 u64 map_length;
5811 u64 *raid_map = NULL;
5812 int ret; 5830 int ret;
5813 int dev_nr = 0; 5831 int dev_nr = 0;
5814 int total_devs = 1; 5832 int total_devs = 1;
@@ -5819,7 +5837,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5819 5837
5820 btrfs_bio_counter_inc_blocked(root->fs_info); 5838 btrfs_bio_counter_inc_blocked(root->fs_info);
5821 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, 5839 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
5822 mirror_num, &raid_map); 5840 mirror_num, 1);
5823 if (ret) { 5841 if (ret) {
5824 btrfs_bio_counter_dec(root->fs_info); 5842 btrfs_bio_counter_dec(root->fs_info);
5825 return ret; 5843 return ret;
@@ -5832,15 +5850,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5832 bbio->fs_info = root->fs_info; 5850 bbio->fs_info = root->fs_info;
5833 atomic_set(&bbio->stripes_pending, bbio->num_stripes); 5851 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
5834 5852
5835 if (raid_map) { 5853 if (bbio->raid_map) {
5836 /* In this case, map_length has been set to the length of 5854 /* In this case, map_length has been set to the length of
5837 a single stripe; not the whole write */ 5855 a single stripe; not the whole write */
5838 if (rw & WRITE) { 5856 if (rw & WRITE) {
5839 ret = raid56_parity_write(root, bio, bbio, 5857 ret = raid56_parity_write(root, bio, bbio, map_length);
5840 raid_map, map_length);
5841 } else { 5858 } else {
5842 ret = raid56_parity_recover(root, bio, bbio, 5859 ret = raid56_parity_recover(root, bio, bbio, map_length,
5843 raid_map, map_length,
5844 mirror_num, 1); 5860 mirror_num, 1);
5845 } 5861 }
5846 5862
@@ -6238,17 +6254,22 @@ int btrfs_read_sys_array(struct btrfs_root *root)
6238 struct extent_buffer *sb; 6254 struct extent_buffer *sb;
6239 struct btrfs_disk_key *disk_key; 6255 struct btrfs_disk_key *disk_key;
6240 struct btrfs_chunk *chunk; 6256 struct btrfs_chunk *chunk;
6241 u8 *ptr; 6257 u8 *array_ptr;
6242 unsigned long sb_ptr; 6258 unsigned long sb_array_offset;
6243 int ret = 0; 6259 int ret = 0;
6244 u32 num_stripes; 6260 u32 num_stripes;
6245 u32 array_size; 6261 u32 array_size;
6246 u32 len = 0; 6262 u32 len = 0;
6247 u32 cur; 6263 u32 cur_offset;
6248 struct btrfs_key key; 6264 struct btrfs_key key;
6249 6265
6250 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, 6266 ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
6251 BTRFS_SUPER_INFO_SIZE); 6267 /*
6268 * This will create extent buffer of nodesize, superblock size is
6269 * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
6270 * overallocate but we can keep it as-is, only the first page is used.
6271 */
6272 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
6252 if (!sb) 6273 if (!sb)
6253 return -ENOMEM; 6274 return -ENOMEM;
6254 btrfs_set_buffer_uptodate(sb); 6275 btrfs_set_buffer_uptodate(sb);
@@ -6271,35 +6292,56 @@ int btrfs_read_sys_array(struct btrfs_root *root)
6271 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 6292 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
6272 array_size = btrfs_super_sys_array_size(super_copy); 6293 array_size = btrfs_super_sys_array_size(super_copy);
6273 6294
6274 ptr = super_copy->sys_chunk_array; 6295 array_ptr = super_copy->sys_chunk_array;
6275 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); 6296 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
6276 cur = 0; 6297 cur_offset = 0;
6298
6299 while (cur_offset < array_size) {
6300 disk_key = (struct btrfs_disk_key *)array_ptr;
6301 len = sizeof(*disk_key);
6302 if (cur_offset + len > array_size)
6303 goto out_short_read;
6277 6304
6278 while (cur < array_size) {
6279 disk_key = (struct btrfs_disk_key *)ptr;
6280 btrfs_disk_key_to_cpu(&key, disk_key); 6305 btrfs_disk_key_to_cpu(&key, disk_key);
6281 6306
6282 len = sizeof(*disk_key); ptr += len; 6307 array_ptr += len;
6283 sb_ptr += len; 6308 sb_array_offset += len;
6284 cur += len; 6309 cur_offset += len;
6285 6310
6286 if (key.type == BTRFS_CHUNK_ITEM_KEY) { 6311 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6287 chunk = (struct btrfs_chunk *)sb_ptr; 6312 chunk = (struct btrfs_chunk *)sb_array_offset;
6313 /*
6314 * At least one btrfs_chunk with one stripe must be
6315 * present, exact stripe count check comes afterwards
6316 */
6317 len = btrfs_chunk_item_size(1);
6318 if (cur_offset + len > array_size)
6319 goto out_short_read;
6320
6321 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6322 len = btrfs_chunk_item_size(num_stripes);
6323 if (cur_offset + len > array_size)
6324 goto out_short_read;
6325
6288 ret = read_one_chunk(root, &key, sb, chunk); 6326 ret = read_one_chunk(root, &key, sb, chunk);
6289 if (ret) 6327 if (ret)
6290 break; 6328 break;
6291 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6292 len = btrfs_chunk_item_size(num_stripes);
6293 } else { 6329 } else {
6294 ret = -EIO; 6330 ret = -EIO;
6295 break; 6331 break;
6296 } 6332 }
6297 ptr += len; 6333 array_ptr += len;
6298 sb_ptr += len; 6334 sb_array_offset += len;
6299 cur += len; 6335 cur_offset += len;
6300 } 6336 }
6301 free_extent_buffer(sb); 6337 free_extent_buffer(sb);
6302 return ret; 6338 return ret;
6339
6340out_short_read:
6341 printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
6342 len, cur_offset);
6343 free_extent_buffer(sb);
6344 return -EIO;
6303} 6345}
6304 6346
6305int btrfs_read_chunk_tree(struct btrfs_root *root) 6347int btrfs_read_chunk_tree(struct btrfs_root *root)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d6fe73c0f4a2..83069dec6898 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -295,8 +295,10 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
295#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) 295#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0)
296 296
297struct btrfs_bio { 297struct btrfs_bio {
298 atomic_t refs;
298 atomic_t stripes_pending; 299 atomic_t stripes_pending;
299 struct btrfs_fs_info *fs_info; 300 struct btrfs_fs_info *fs_info;
301 u64 map_type; /* get from map_lookup->type */
300 bio_end_io_t *end_io; 302 bio_end_io_t *end_io;
301 struct bio *orig_bio; 303 struct bio *orig_bio;
302 unsigned long flags; 304 unsigned long flags;
@@ -307,6 +309,12 @@ struct btrfs_bio {
307 int mirror_num; 309 int mirror_num;
308 int num_tgtdevs; 310 int num_tgtdevs;
309 int *tgtdev_map; 311 int *tgtdev_map;
312 /*
313 * logical block numbers for the start of each stripe
314 * The last one or two are p/q. These are sorted,
315 * so raid_map[0] is the start of our full stripe
316 */
317 u64 *raid_map;
310 struct btrfs_bio_stripe stripes[]; 318 struct btrfs_bio_stripe stripes[];
311}; 319};
312 320
@@ -388,19 +396,15 @@ struct btrfs_balance_control {
388 396
389int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 397int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
390 u64 end, u64 *length); 398 u64 end, u64 *length);
391 399void btrfs_get_bbio(struct btrfs_bio *bbio);
392#define btrfs_bio_size(total_stripes, real_stripes) \ 400void btrfs_put_bbio(struct btrfs_bio *bbio);
393 (sizeof(struct btrfs_bio) + \
394 (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \
395 (sizeof(int) * (real_stripes)))
396
397int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, 401int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
398 u64 logical, u64 *length, 402 u64 logical, u64 *length,
399 struct btrfs_bio **bbio_ret, int mirror_num); 403 struct btrfs_bio **bbio_ret, int mirror_num);
400int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, 404int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
401 u64 logical, u64 *length, 405 u64 logical, u64 *length,
402 struct btrfs_bio **bbio_ret, int mirror_num, 406 struct btrfs_bio **bbio_ret, int mirror_num,
403 u64 **raid_map_ret); 407 int need_raid_map);
404int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 408int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
405 u64 chunk_start, u64 physical, u64 devid, 409 u64 chunk_start, u64 physical, u64 devid,
406 u64 **logical, int *naddrs, int *stripe_len); 410 u64 **logical, int *naddrs, int *stripe_len);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 611e1c5893b4..b6dec05c7196 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -495,8 +495,7 @@ struct btrfs_ioctl_send_args {
495 495
496/* Error codes as returned by the kernel */ 496/* Error codes as returned by the kernel */
497enum btrfs_err_code { 497enum btrfs_err_code {
498 notused, 498 BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
499 BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
500 BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, 499 BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
501 BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, 500 BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
502 BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, 501 BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,