diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-19 17:36:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-19 17:36:00 -0500 |
commit | 2b9fb532d4168e8974fe49709e2c4c8d5352a64c (patch) | |
tree | 610cbe2d1bb32e28db135a767f158ade31452e2e | |
parent | 4533f6e27a366ecc3da4876074ebfe0cc0ea4f0f (diff) | |
parent | a742994aa2e271eb8cd8e043d276515ec858ed73 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This pull is mostly cleanups and fixes:
- The raid5/6 cleanups from Zhao Lei fixup some long standing warts
in the code and add improvements on top of the scrubbing support
from 3.19.
- Josef has round one of our ENOSPC fixes coming from large btrfs
clusters here at FB.
- Dave Sterba continues a long series of cleanups (thanks Dave), and
Filipe continues hammering on corner cases in fsync and others
This all was held up a little trying to track down a use-after-free in
btrfs raid5/6. It's not clear yet if this is just made easier to
trigger with this pull or if its a new bug from the raid5/6 cleanups.
Dave Sterba is the only one to trigger it so far, but he has a
consistent way to reproduce, so we'll get it nailed shortly"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits)
Btrfs: don't remove extents and xattrs when logging new names
Btrfs: fix fsync data loss after adding hard link to inode
Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group
Btrfs: account for large extents with enospc
Btrfs: don't set and clear delalloc for O_DIRECT writes
Btrfs: only adjust outstanding_extents when we do a short write
btrfs: Fix out-of-space bug
Btrfs: scrub, fix sleep in atomic context
Btrfs: fix scheduler warning when syncing log
Btrfs: Remove unnecessary placeholder in btrfs_err_code
btrfs: cleanup init for list in free-space-cache
btrfs: delete chunk allocation attemp when setting block group ro
btrfs: clear bio reference after submit_one_bio()
Btrfs: fix scrub race leading to use-after-free
Btrfs: add missing cleanup on sysfs init failure
Btrfs: fix race between transaction commit and empty block group removal
btrfs: add more checks to btrfs_read_sys_array
btrfs: cleanup, rename a few variables in btrfs_read_sys_array
btrfs: add checks for sys_chunk_array sizes
btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
...
34 files changed, 1063 insertions, 861 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8729cf68d2fe..f55721ff9385 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans, | |||
1246 | return ret; | 1246 | return ret; |
1247 | } | 1247 | } |
1248 | 1248 | ||
1249 | /* | ||
1250 | * this makes the path point to (inum INODE_ITEM ioff) | ||
1251 | */ | ||
1252 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | ||
1253 | struct btrfs_path *path) | ||
1254 | { | ||
1255 | struct btrfs_key key; | ||
1256 | return btrfs_find_item(fs_root, path, inum, ioff, | ||
1257 | BTRFS_INODE_ITEM_KEY, &key); | ||
1258 | } | ||
1259 | |||
1260 | static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | ||
1261 | struct btrfs_path *path, | ||
1262 | struct btrfs_key *found_key) | ||
1263 | { | ||
1264 | return btrfs_find_item(fs_root, path, inum, ioff, | ||
1265 | BTRFS_INODE_REF_KEY, found_key); | ||
1266 | } | ||
1267 | |||
1268 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | 1249 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, |
1269 | u64 start_off, struct btrfs_path *path, | 1250 | u64 start_off, struct btrfs_path *path, |
1270 | struct btrfs_inode_extref **ret_extref, | 1251 | struct btrfs_inode_extref **ret_extref, |
@@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1374 | btrfs_tree_read_unlock_blocking(eb); | 1355 | btrfs_tree_read_unlock_blocking(eb); |
1375 | free_extent_buffer(eb); | 1356 | free_extent_buffer(eb); |
1376 | } | 1357 | } |
1377 | ret = inode_ref_info(parent, 0, fs_root, path, &found_key); | 1358 | ret = btrfs_find_item(fs_root, path, parent, 0, |
1359 | BTRFS_INODE_REF_KEY, &found_key); | ||
1378 | if (ret > 0) | 1360 | if (ret > 0) |
1379 | ret = -ENOENT; | 1361 | ret = -ENOENT; |
1380 | if (ret) | 1362 | if (ret) |
@@ -1727,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, | |||
1727 | struct btrfs_key found_key; | 1709 | struct btrfs_key found_key; |
1728 | 1710 | ||
1729 | while (!ret) { | 1711 | while (!ret) { |
1730 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, | 1712 | ret = btrfs_find_item(fs_root, path, inum, |
1731 | &found_key); | 1713 | parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY, |
1714 | &found_key); | ||
1715 | |||
1732 | if (ret < 0) | 1716 | if (ret < 0) |
1733 | break; | 1717 | break; |
1734 | if (ret) { | 1718 | if (ret) { |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 2a1ac6bfc724..9c41fbac3009 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -32,9 +32,6 @@ struct inode_fs_paths { | |||
32 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, | 32 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, |
33 | void *ctx); | 33 | void *ctx); |
34 | 34 | ||
35 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | ||
36 | struct btrfs_path *path); | ||
37 | |||
38 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 35 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
39 | struct btrfs_path *path, struct btrfs_key *found_key, | 36 | struct btrfs_path *path, struct btrfs_key *found_key, |
40 | u64 *flags); | 37 | u64 *flags); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 4aadadcfab20..de5e4f2adfea 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -185,6 +185,9 @@ struct btrfs_inode { | |||
185 | 185 | ||
186 | struct btrfs_delayed_node *delayed_node; | 186 | struct btrfs_delayed_node *delayed_node; |
187 | 187 | ||
188 | /* File creation time. */ | ||
189 | struct timespec i_otime; | ||
190 | |||
188 | struct inode vfs_inode; | 191 | struct inode vfs_inode; |
189 | }; | 192 | }; |
190 | 193 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 14a72ed14ef7..993642199326 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | |||
213 | */ | 213 | */ |
214 | static void add_root_to_dirty_list(struct btrfs_root *root) | 214 | static void add_root_to_dirty_list(struct btrfs_root *root) |
215 | { | 215 | { |
216 | if (test_bit(BTRFS_ROOT_DIRTY, &root->state) || | ||
217 | !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state)) | ||
218 | return; | ||
219 | |||
216 | spin_lock(&root->fs_info->trans_lock); | 220 | spin_lock(&root->fs_info->trans_lock); |
217 | if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) && | 221 | if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) { |
218 | list_empty(&root->dirty_list)) { | 222 | /* Want the extent tree to be the last on the list */ |
219 | list_add(&root->dirty_list, | 223 | if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID) |
220 | &root->fs_info->dirty_cowonly_roots); | 224 | list_move_tail(&root->dirty_list, |
225 | &root->fs_info->dirty_cowonly_roots); | ||
226 | else | ||
227 | list_move(&root->dirty_list, | ||
228 | &root->fs_info->dirty_cowonly_roots); | ||
221 | } | 229 | } |
222 | spin_unlock(&root->fs_info->trans_lock); | 230 | spin_unlock(&root->fs_info->trans_lock); |
223 | } | 231 | } |
@@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
1363 | 1371 | ||
1364 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | 1372 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { |
1365 | BUG_ON(tm->slot != 0); | 1373 | BUG_ON(tm->slot != 0); |
1366 | eb_rewin = alloc_dummy_extent_buffer(eb->start, | 1374 | eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); |
1367 | fs_info->tree_root->nodesize); | ||
1368 | if (!eb_rewin) { | 1375 | if (!eb_rewin) { |
1369 | btrfs_tree_read_unlock_blocking(eb); | 1376 | btrfs_tree_read_unlock_blocking(eb); |
1370 | free_extent_buffer(eb); | 1377 | free_extent_buffer(eb); |
@@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
1444 | } else if (old_root) { | 1451 | } else if (old_root) { |
1445 | btrfs_tree_read_unlock(eb_root); | 1452 | btrfs_tree_read_unlock(eb_root); |
1446 | free_extent_buffer(eb_root); | 1453 | free_extent_buffer(eb_root); |
1447 | eb = alloc_dummy_extent_buffer(logical, root->nodesize); | 1454 | eb = alloc_dummy_extent_buffer(root->fs_info, logical); |
1448 | } else { | 1455 | } else { |
1449 | btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); | 1456 | btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); |
1450 | eb = btrfs_clone_extent_buffer(eb_root); | 1457 | eb = btrfs_clone_extent_buffer(eb_root); |
@@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root, | |||
2282 | if ((search <= target && target - search <= 65536) || | 2289 | if ((search <= target && target - search <= 65536) || |
2283 | (search > target && search - target <= 65536)) { | 2290 | (search > target && search - target <= 65536)) { |
2284 | gen = btrfs_node_ptr_generation(node, nr); | 2291 | gen = btrfs_node_ptr_generation(node, nr); |
2285 | readahead_tree_block(root, search, blocksize); | 2292 | readahead_tree_block(root, search); |
2286 | nread += blocksize; | 2293 | nread += blocksize; |
2287 | } | 2294 | } |
2288 | nscan++; | 2295 | nscan++; |
@@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2301 | u64 gen; | 2308 | u64 gen; |
2302 | u64 block1 = 0; | 2309 | u64 block1 = 0; |
2303 | u64 block2 = 0; | 2310 | u64 block2 = 0; |
2304 | int blocksize; | ||
2305 | 2311 | ||
2306 | parent = path->nodes[level + 1]; | 2312 | parent = path->nodes[level + 1]; |
2307 | if (!parent) | 2313 | if (!parent) |
@@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2309 | 2315 | ||
2310 | nritems = btrfs_header_nritems(parent); | 2316 | nritems = btrfs_header_nritems(parent); |
2311 | slot = path->slots[level + 1]; | 2317 | slot = path->slots[level + 1]; |
2312 | blocksize = root->nodesize; | ||
2313 | 2318 | ||
2314 | if (slot > 0) { | 2319 | if (slot > 0) { |
2315 | block1 = btrfs_node_blockptr(parent, slot - 1); | 2320 | block1 = btrfs_node_blockptr(parent, slot - 1); |
@@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2334 | } | 2339 | } |
2335 | 2340 | ||
2336 | if (block1) | 2341 | if (block1) |
2337 | readahead_tree_block(root, block1, blocksize); | 2342 | readahead_tree_block(root, block1); |
2338 | if (block2) | 2343 | if (block2) |
2339 | readahead_tree_block(root, block2, blocksize); | 2344 | readahead_tree_block(root, block2); |
2340 | } | 2345 | } |
2341 | 2346 | ||
2342 | 2347 | ||
@@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key, | |||
2609 | return 0; | 2614 | return 0; |
2610 | } | 2615 | } |
2611 | 2616 | ||
2612 | int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, | 2617 | int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, |
2613 | u64 iobjectid, u64 ioff, u8 key_type, | 2618 | u64 iobjectid, u64 ioff, u8 key_type, |
2614 | struct btrfs_key *found_key) | 2619 | struct btrfs_key *found_key) |
2615 | { | 2620 | { |
2616 | int ret; | 2621 | int ret; |
2617 | struct btrfs_key key; | 2622 | struct btrfs_key key; |
2618 | struct extent_buffer *eb; | 2623 | struct extent_buffer *eb; |
2619 | struct btrfs_path *path; | 2624 | |
2625 | ASSERT(path); | ||
2626 | ASSERT(found_key); | ||
2620 | 2627 | ||
2621 | key.type = key_type; | 2628 | key.type = key_type; |
2622 | key.objectid = iobjectid; | 2629 | key.objectid = iobjectid; |
2623 | key.offset = ioff; | 2630 | key.offset = ioff; |
2624 | 2631 | ||
2625 | if (found_path == NULL) { | ||
2626 | path = btrfs_alloc_path(); | ||
2627 | if (!path) | ||
2628 | return -ENOMEM; | ||
2629 | } else | ||
2630 | path = found_path; | ||
2631 | |||
2632 | ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); | 2632 | ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); |
2633 | if ((ret < 0) || (found_key == NULL)) { | 2633 | if (ret < 0) |
2634 | if (path != found_path) | ||
2635 | btrfs_free_path(path); | ||
2636 | return ret; | 2634 | return ret; |
2637 | } | ||
2638 | 2635 | ||
2639 | eb = path->nodes[0]; | 2636 | eb = path->nodes[0]; |
2640 | if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { | 2637 | if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { |
@@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
3383 | add_root_to_dirty_list(root); | 3380 | add_root_to_dirty_list(root); |
3384 | extent_buffer_get(c); | 3381 | extent_buffer_get(c); |
3385 | path->nodes[level] = c; | 3382 | path->nodes[level] = c; |
3386 | path->locks[level] = BTRFS_WRITE_LOCK; | 3383 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
3387 | path->slots[level] = 0; | 3384 | path->slots[level] = 0; |
3388 | return 0; | 3385 | return 0; |
3389 | } | 3386 | } |
@@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
4356 | path->search_for_split = 1; | 4353 | path->search_for_split = 1; |
4357 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 4354 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
4358 | path->search_for_split = 0; | 4355 | path->search_for_split = 0; |
4356 | if (ret > 0) | ||
4357 | ret = -EAGAIN; | ||
4359 | if (ret < 0) | 4358 | if (ret < 0) |
4360 | goto err; | 4359 | goto err; |
4361 | 4360 | ||
4362 | ret = -EAGAIN; | 4361 | ret = -EAGAIN; |
4363 | leaf = path->nodes[0]; | 4362 | leaf = path->nodes[0]; |
4364 | /* if our item isn't there or got smaller, return now */ | 4363 | /* if our item isn't there, return now */ |
4365 | if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) | 4364 | if (item_size != btrfs_item_size_nr(leaf, path->slots[0])) |
4366 | goto err; | 4365 | goto err; |
4367 | 4366 | ||
4368 | /* the leaf has changed, it now has room. return now */ | 4367 | /* the leaf has changed, it now has room. return now */ |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0b180708bf79..84c3b00f3de8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
198 | 198 | ||
199 | #define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) | 199 | #define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) |
200 | 200 | ||
201 | #define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024) | ||
202 | |||
201 | /* | 203 | /* |
202 | * The key defines the order in the tree, and so it also defines (optimal) | 204 | * The key defines the order in the tree, and so it also defines (optimal) |
203 | * block layout. | 205 | * block layout. |
@@ -1020,6 +1022,9 @@ enum btrfs_raid_types { | |||
1020 | BTRFS_BLOCK_GROUP_RAID6 | \ | 1022 | BTRFS_BLOCK_GROUP_RAID6 | \ |
1021 | BTRFS_BLOCK_GROUP_DUP | \ | 1023 | BTRFS_BLOCK_GROUP_DUP | \ |
1022 | BTRFS_BLOCK_GROUP_RAID10) | 1024 | BTRFS_BLOCK_GROUP_RAID10) |
1025 | #define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ | ||
1026 | BTRFS_BLOCK_GROUP_RAID6) | ||
1027 | |||
1023 | /* | 1028 | /* |
1024 | * We need a bit for restriper to be able to tell when chunks of type | 1029 | * We need a bit for restriper to be able to tell when chunks of type |
1025 | * SINGLE are available. This "extended" profile format is used in | 1030 | * SINGLE are available. This "extended" profile format is used in |
@@ -1239,7 +1244,6 @@ enum btrfs_disk_cache_state { | |||
1239 | BTRFS_DC_ERROR = 1, | 1244 | BTRFS_DC_ERROR = 1, |
1240 | BTRFS_DC_CLEAR = 2, | 1245 | BTRFS_DC_CLEAR = 2, |
1241 | BTRFS_DC_SETUP = 3, | 1246 | BTRFS_DC_SETUP = 3, |
1242 | BTRFS_DC_NEED_WRITE = 4, | ||
1243 | }; | 1247 | }; |
1244 | 1248 | ||
1245 | struct btrfs_caching_control { | 1249 | struct btrfs_caching_control { |
@@ -1277,7 +1281,6 @@ struct btrfs_block_group_cache { | |||
1277 | unsigned long full_stripe_len; | 1281 | unsigned long full_stripe_len; |
1278 | 1282 | ||
1279 | unsigned int ro:1; | 1283 | unsigned int ro:1; |
1280 | unsigned int dirty:1; | ||
1281 | unsigned int iref:1; | 1284 | unsigned int iref:1; |
1282 | unsigned int has_caching_ctl:1; | 1285 | unsigned int has_caching_ctl:1; |
1283 | unsigned int removed:1; | 1286 | unsigned int removed:1; |
@@ -1315,6 +1318,9 @@ struct btrfs_block_group_cache { | |||
1315 | struct list_head ro_list; | 1318 | struct list_head ro_list; |
1316 | 1319 | ||
1317 | atomic_t trimming; | 1320 | atomic_t trimming; |
1321 | |||
1322 | /* For dirty block groups */ | ||
1323 | struct list_head dirty_list; | ||
1318 | }; | 1324 | }; |
1319 | 1325 | ||
1320 | /* delayed seq elem */ | 1326 | /* delayed seq elem */ |
@@ -1741,6 +1747,7 @@ struct btrfs_fs_info { | |||
1741 | 1747 | ||
1742 | spinlock_t unused_bgs_lock; | 1748 | spinlock_t unused_bgs_lock; |
1743 | struct list_head unused_bgs; | 1749 | struct list_head unused_bgs; |
1750 | struct mutex unused_bg_unpin_mutex; | ||
1744 | 1751 | ||
1745 | /* For btrfs to record security options */ | 1752 | /* For btrfs to record security options */ |
1746 | struct security_mnt_opts security_opts; | 1753 | struct security_mnt_opts security_opts; |
@@ -1776,6 +1783,7 @@ struct btrfs_subvolume_writers { | |||
1776 | #define BTRFS_ROOT_DEFRAG_RUNNING 6 | 1783 | #define BTRFS_ROOT_DEFRAG_RUNNING 6 |
1777 | #define BTRFS_ROOT_FORCE_COW 7 | 1784 | #define BTRFS_ROOT_FORCE_COW 7 |
1778 | #define BTRFS_ROOT_MULTI_LOG_TASKS 8 | 1785 | #define BTRFS_ROOT_MULTI_LOG_TASKS 8 |
1786 | #define BTRFS_ROOT_DIRTY 9 | ||
1779 | 1787 | ||
1780 | /* | 1788 | /* |
1781 | * in ram representation of the tree. extent_root is used for all allocations | 1789 | * in ram representation of the tree. extent_root is used for all allocations |
@@ -1794,8 +1802,6 @@ struct btrfs_root { | |||
1794 | struct btrfs_fs_info *fs_info; | 1802 | struct btrfs_fs_info *fs_info; |
1795 | struct extent_io_tree dirty_log_pages; | 1803 | struct extent_io_tree dirty_log_pages; |
1796 | 1804 | ||
1797 | struct kobject root_kobj; | ||
1798 | struct completion kobj_unregister; | ||
1799 | struct mutex objectid_mutex; | 1805 | struct mutex objectid_mutex; |
1800 | 1806 | ||
1801 | spinlock_t accounting_lock; | 1807 | spinlock_t accounting_lock; |
@@ -2465,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); | |||
2465 | BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); | 2471 | BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); |
2466 | BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); | 2472 | BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); |
2467 | BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); | 2473 | BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); |
2468 | |||
2469 | static inline struct btrfs_timespec * | ||
2470 | btrfs_inode_atime(struct btrfs_inode_item *inode_item) | ||
2471 | { | ||
2472 | unsigned long ptr = (unsigned long)inode_item; | ||
2473 | ptr += offsetof(struct btrfs_inode_item, atime); | ||
2474 | return (struct btrfs_timespec *)ptr; | ||
2475 | } | ||
2476 | |||
2477 | static inline struct btrfs_timespec * | ||
2478 | btrfs_inode_mtime(struct btrfs_inode_item *inode_item) | ||
2479 | { | ||
2480 | unsigned long ptr = (unsigned long)inode_item; | ||
2481 | ptr += offsetof(struct btrfs_inode_item, mtime); | ||
2482 | return (struct btrfs_timespec *)ptr; | ||
2483 | } | ||
2484 | |||
2485 | static inline struct btrfs_timespec * | ||
2486 | btrfs_inode_ctime(struct btrfs_inode_item *inode_item) | ||
2487 | { | ||
2488 | unsigned long ptr = (unsigned long)inode_item; | ||
2489 | ptr += offsetof(struct btrfs_inode_item, ctime); | ||
2490 | return (struct btrfs_timespec *)ptr; | ||
2491 | } | ||
2492 | |||
2493 | BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); | 2474 | BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); |
2494 | BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); | 2475 | BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); |
2495 | BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); | 2476 | BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index de4e70fb3cbb..82f0c7c95474 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
1755 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | 1755 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); |
1756 | btrfs_set_stack_inode_block_group(inode_item, 0); | 1756 | btrfs_set_stack_inode_block_group(inode_item, 0); |
1757 | 1757 | ||
1758 | btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), | 1758 | btrfs_set_stack_timespec_sec(&inode_item->atime, |
1759 | inode->i_atime.tv_sec); | 1759 | inode->i_atime.tv_sec); |
1760 | btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item), | 1760 | btrfs_set_stack_timespec_nsec(&inode_item->atime, |
1761 | inode->i_atime.tv_nsec); | 1761 | inode->i_atime.tv_nsec); |
1762 | 1762 | ||
1763 | btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item), | 1763 | btrfs_set_stack_timespec_sec(&inode_item->mtime, |
1764 | inode->i_mtime.tv_sec); | 1764 | inode->i_mtime.tv_sec); |
1765 | btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item), | 1765 | btrfs_set_stack_timespec_nsec(&inode_item->mtime, |
1766 | inode->i_mtime.tv_nsec); | 1766 | inode->i_mtime.tv_nsec); |
1767 | 1767 | ||
1768 | btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item), | 1768 | btrfs_set_stack_timespec_sec(&inode_item->ctime, |
1769 | inode->i_ctime.tv_sec); | 1769 | inode->i_ctime.tv_sec); |
1770 | btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item), | 1770 | btrfs_set_stack_timespec_nsec(&inode_item->ctime, |
1771 | inode->i_ctime.tv_nsec); | 1771 | inode->i_ctime.tv_nsec); |
1772 | |||
1773 | btrfs_set_stack_timespec_sec(&inode_item->otime, | ||
1774 | BTRFS_I(inode)->i_otime.tv_sec); | ||
1775 | btrfs_set_stack_timespec_nsec(&inode_item->otime, | ||
1776 | BTRFS_I(inode)->i_otime.tv_nsec); | ||
1772 | } | 1777 | } |
1773 | 1778 | ||
1774 | int btrfs_fill_inode(struct inode *inode, u32 *rdev) | 1779 | int btrfs_fill_inode(struct inode *inode, u32 *rdev) |
1775 | { | 1780 | { |
1776 | struct btrfs_delayed_node *delayed_node; | 1781 | struct btrfs_delayed_node *delayed_node; |
1777 | struct btrfs_inode_item *inode_item; | 1782 | struct btrfs_inode_item *inode_item; |
1778 | struct btrfs_timespec *tspec; | ||
1779 | 1783 | ||
1780 | delayed_node = btrfs_get_delayed_node(inode); | 1784 | delayed_node = btrfs_get_delayed_node(inode); |
1781 | if (!delayed_node) | 1785 | if (!delayed_node) |
@@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
1802 | *rdev = btrfs_stack_inode_rdev(inode_item); | 1806 | *rdev = btrfs_stack_inode_rdev(inode_item); |
1803 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); | 1807 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); |
1804 | 1808 | ||
1805 | tspec = btrfs_inode_atime(inode_item); | 1809 | inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime); |
1806 | inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); | 1810 | inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime); |
1807 | inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); | 1811 | |
1812 | inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime); | ||
1813 | inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime); | ||
1808 | 1814 | ||
1809 | tspec = btrfs_inode_mtime(inode_item); | 1815 | inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime); |
1810 | inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); | 1816 | inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime); |
1811 | inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec); | ||
1812 | 1817 | ||
1813 | tspec = btrfs_inode_ctime(inode_item); | 1818 | BTRFS_I(inode)->i_otime.tv_sec = |
1814 | inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); | 1819 | btrfs_stack_timespec_sec(&inode_item->otime); |
1815 | inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); | 1820 | BTRFS_I(inode)->i_otime.tv_nsec = |
1821 | btrfs_stack_timespec_nsec(&inode_item->otime); | ||
1816 | 1822 | ||
1817 | inode->i_generation = BTRFS_I(inode)->generation; | 1823 | inode->i_generation = BTRFS_I(inode)->generation; |
1818 | BTRFS_I(inode)->index_cnt = (u64)-1; | 1824 | BTRFS_I(inode)->index_cnt = (u64)-1; |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index ca6a3a3b6b6c..5ec03d999c37 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -440,18 +440,9 @@ leave: | |||
440 | */ | 440 | */ |
441 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) | 441 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) |
442 | { | 442 | { |
443 | s64 writers; | ||
444 | DEFINE_WAIT(wait); | ||
445 | |||
446 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | 443 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); |
447 | do { | 444 | wait_event(fs_info->replace_wait, !percpu_counter_sum( |
448 | prepare_to_wait(&fs_info->replace_wait, &wait, | 445 | &fs_info->bio_counter)); |
449 | TASK_UNINTERRUPTIBLE); | ||
450 | writers = percpu_counter_sum(&fs_info->bio_counter); | ||
451 | if (writers) | ||
452 | schedule(); | ||
453 | finish_wait(&fs_info->replace_wait, &wait); | ||
454 | } while (writers); | ||
455 | } | 446 | } |
456 | 447 | ||
457 | /* | 448 | /* |
@@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount) | |||
932 | 923 | ||
933 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) | 924 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) |
934 | { | 925 | { |
935 | DEFINE_WAIT(wait); | 926 | while (1) { |
936 | again: | 927 | percpu_counter_inc(&fs_info->bio_counter); |
937 | percpu_counter_inc(&fs_info->bio_counter); | 928 | if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING, |
938 | if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { | 929 | &fs_info->fs_state))) |
930 | break; | ||
931 | |||
939 | btrfs_bio_counter_dec(fs_info); | 932 | btrfs_bio_counter_dec(fs_info); |
940 | wait_event(fs_info->replace_wait, | 933 | wait_event(fs_info->replace_wait, |
941 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, | 934 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, |
942 | &fs_info->fs_state)); | 935 | &fs_info->fs_state)); |
943 | goto again; | ||
944 | } | 936 | } |
945 | |||
946 | } | 937 | } |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1afb18226da8..f79f38542a73 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
318 | memcpy(&found, result, csum_size); | 318 | memcpy(&found, result, csum_size); |
319 | 319 | ||
320 | read_extent_buffer(buf, &val, 0, csum_size); | 320 | read_extent_buffer(buf, &val, 0, csum_size); |
321 | printk_ratelimited(KERN_INFO | 321 | printk_ratelimited(KERN_WARNING |
322 | "BTRFS: %s checksum verify failed on %llu wanted %X found %X " | 322 | "BTRFS: %s checksum verify failed on %llu wanted %X found %X " |
323 | "level %d\n", | 323 | "level %d\n", |
324 | root->fs_info->sb->s_id, buf->start, | 324 | root->fs_info->sb->s_id, buf->start, |
@@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
367 | ret = 0; | 367 | ret = 0; |
368 | goto out; | 368 | goto out; |
369 | } | 369 | } |
370 | printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", | 370 | printk_ratelimited(KERN_ERR |
371 | "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", | ||
371 | eb->fs_info->sb->s_id, eb->start, | 372 | eb->fs_info->sb->s_id, eb->start, |
372 | parent_transid, btrfs_header_generation(eb)); | 373 | parent_transid, btrfs_header_generation(eb)); |
373 | ret = 1; | 374 | ret = 1; |
@@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
633 | 634 | ||
634 | found_start = btrfs_header_bytenr(eb); | 635 | found_start = btrfs_header_bytenr(eb); |
635 | if (found_start != eb->start) { | 636 | if (found_start != eb->start) { |
636 | printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " | 637 | printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start " |
637 | "%llu %llu\n", | 638 | "%llu %llu\n", |
638 | eb->fs_info->sb->s_id, found_start, eb->start); | 639 | eb->fs_info->sb->s_id, found_start, eb->start); |
639 | ret = -EIO; | 640 | ret = -EIO; |
640 | goto err; | 641 | goto err; |
641 | } | 642 | } |
642 | if (check_tree_block_fsid(root, eb)) { | 643 | if (check_tree_block_fsid(root, eb)) { |
643 | printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", | 644 | printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", |
644 | eb->fs_info->sb->s_id, eb->start); | 645 | eb->fs_info->sb->s_id, eb->start); |
645 | ret = -EIO; | 646 | ret = -EIO; |
646 | goto err; | 647 | goto err; |
647 | } | 648 | } |
648 | found_level = btrfs_header_level(eb); | 649 | found_level = btrfs_header_level(eb); |
649 | if (found_level >= BTRFS_MAX_LEVEL) { | 650 | if (found_level >= BTRFS_MAX_LEVEL) { |
650 | btrfs_info(root->fs_info, "bad tree block level %d", | 651 | btrfs_err(root->fs_info, "bad tree block level %d", |
651 | (int)btrfs_header_level(eb)); | 652 | (int)btrfs_header_level(eb)); |
652 | ret = -EIO; | 653 | ret = -EIO; |
653 | goto err; | 654 | goto err; |
@@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = { | |||
1073 | .set_page_dirty = btree_set_page_dirty, | 1074 | .set_page_dirty = btree_set_page_dirty, |
1074 | }; | 1075 | }; |
1075 | 1076 | ||
1076 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) | 1077 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr) |
1077 | { | 1078 | { |
1078 | struct extent_buffer *buf = NULL; | 1079 | struct extent_buffer *buf = NULL; |
1079 | struct inode *btree_inode = root->fs_info->btree_inode; | 1080 | struct inode *btree_inode = root->fs_info->btree_inode; |
1080 | 1081 | ||
1081 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1082 | buf = btrfs_find_create_tree_block(root, bytenr); |
1082 | if (!buf) | 1083 | if (!buf) |
1083 | return; | 1084 | return; |
1084 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | 1085 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, |
@@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) | |||
1086 | free_extent_buffer(buf); | 1087 | free_extent_buffer(buf); |
1087 | } | 1088 | } |
1088 | 1089 | ||
1089 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 1090 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, |
1090 | int mirror_num, struct extent_buffer **eb) | 1091 | int mirror_num, struct extent_buffer **eb) |
1091 | { | 1092 | { |
1092 | struct extent_buffer *buf = NULL; | 1093 | struct extent_buffer *buf = NULL; |
@@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
1094 | struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; | 1095 | struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; |
1095 | int ret; | 1096 | int ret; |
1096 | 1097 | ||
1097 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1098 | buf = btrfs_find_create_tree_block(root, bytenr); |
1098 | if (!buf) | 1099 | if (!buf) |
1099 | return 0; | 1100 | return 0; |
1100 | 1101 | ||
@@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | |||
1125 | } | 1126 | } |
1126 | 1127 | ||
1127 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 1128 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
1128 | u64 bytenr, u32 blocksize) | 1129 | u64 bytenr) |
1129 | { | 1130 | { |
1130 | if (btrfs_test_is_dummy_root(root)) | 1131 | if (btrfs_test_is_dummy_root(root)) |
1131 | return alloc_test_extent_buffer(root->fs_info, bytenr, | 1132 | return alloc_test_extent_buffer(root->fs_info, bytenr); |
1132 | blocksize); | 1133 | return alloc_extent_buffer(root->fs_info, bytenr); |
1133 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); | ||
1134 | } | 1134 | } |
1135 | 1135 | ||
1136 | 1136 | ||
@@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
1152 | struct extent_buffer *buf = NULL; | 1152 | struct extent_buffer *buf = NULL; |
1153 | int ret; | 1153 | int ret; |
1154 | 1154 | ||
1155 | buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); | 1155 | buf = btrfs_find_create_tree_block(root, bytenr); |
1156 | if (!buf) | 1156 | if (!buf) |
1157 | return NULL; | 1157 | return NULL; |
1158 | 1158 | ||
@@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, | |||
1275 | memset(&root->root_key, 0, sizeof(root->root_key)); | 1275 | memset(&root->root_key, 0, sizeof(root->root_key)); |
1276 | memset(&root->root_item, 0, sizeof(root->root_item)); | 1276 | memset(&root->root_item, 0, sizeof(root->root_item)); |
1277 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); | 1277 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); |
1278 | memset(&root->root_kobj, 0, sizeof(root->root_kobj)); | ||
1279 | if (fs_info) | 1278 | if (fs_info) |
1280 | root->defrag_trans_start = fs_info->generation; | 1279 | root->defrag_trans_start = fs_info->generation; |
1281 | else | 1280 | else |
1282 | root->defrag_trans_start = 0; | 1281 | root->defrag_trans_start = 0; |
1283 | init_completion(&root->kobj_unregister); | ||
1284 | root->root_key.objectid = objectid; | 1282 | root->root_key.objectid = objectid; |
1285 | root->anon_dev = 0; | 1283 | root->anon_dev = 0; |
1286 | 1284 | ||
@@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, | |||
1630 | bool check_ref) | 1628 | bool check_ref) |
1631 | { | 1629 | { |
1632 | struct btrfs_root *root; | 1630 | struct btrfs_root *root; |
1631 | struct btrfs_path *path; | ||
1632 | struct btrfs_key key; | ||
1633 | int ret; | 1633 | int ret; |
1634 | 1634 | ||
1635 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | 1635 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) |
@@ -1669,8 +1669,17 @@ again: | |||
1669 | if (ret) | 1669 | if (ret) |
1670 | goto fail; | 1670 | goto fail; |
1671 | 1671 | ||
1672 | ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID, | 1672 | path = btrfs_alloc_path(); |
1673 | location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL); | 1673 | if (!path) { |
1674 | ret = -ENOMEM; | ||
1675 | goto fail; | ||
1676 | } | ||
1677 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
1678 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
1679 | key.offset = location->objectid; | ||
1680 | |||
1681 | ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); | ||
1682 | btrfs_free_path(path); | ||
1674 | if (ret < 0) | 1683 | if (ret < 0) |
1675 | goto fail; | 1684 | goto fail; |
1676 | if (ret == 0) | 1685 | if (ret == 0) |
@@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb, | |||
2232 | spin_lock_init(&fs_info->qgroup_op_lock); | 2241 | spin_lock_init(&fs_info->qgroup_op_lock); |
2233 | spin_lock_init(&fs_info->buffer_lock); | 2242 | spin_lock_init(&fs_info->buffer_lock); |
2234 | spin_lock_init(&fs_info->unused_bgs_lock); | 2243 | spin_lock_init(&fs_info->unused_bgs_lock); |
2244 | mutex_init(&fs_info->unused_bg_unpin_mutex); | ||
2235 | rwlock_init(&fs_info->tree_mod_log_lock); | 2245 | rwlock_init(&fs_info->tree_mod_log_lock); |
2236 | mutex_init(&fs_info->reloc_mutex); | 2246 | mutex_init(&fs_info->reloc_mutex); |
2237 | mutex_init(&fs_info->delalloc_root_mutex); | 2247 | mutex_init(&fs_info->delalloc_root_mutex); |
@@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb, | |||
2496 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | 2506 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
2497 | 2507 | ||
2498 | if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) | 2508 | if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) |
2499 | printk(KERN_ERR "BTRFS: has skinny extents\n"); | 2509 | printk(KERN_INFO "BTRFS: has skinny extents\n"); |
2500 | 2510 | ||
2501 | /* | 2511 | /* |
2502 | * flag our filesystem as having big metadata blocks if | 2512 | * flag our filesystem as having big metadata blocks if |
@@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb, | |||
2520 | */ | 2530 | */ |
2521 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && | 2531 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && |
2522 | (sectorsize != nodesize)) { | 2532 | (sectorsize != nodesize)) { |
2523 | printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " | 2533 | printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes " |
2524 | "are not allowed for mixed block groups on %s\n", | 2534 | "are not allowed for mixed block groups on %s\n", |
2525 | sb->s_id); | 2535 | sb->s_id); |
2526 | goto fail_alloc; | 2536 | goto fail_alloc; |
@@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb, | |||
2628 | sb->s_blocksize_bits = blksize_bits(sectorsize); | 2638 | sb->s_blocksize_bits = blksize_bits(sectorsize); |
2629 | 2639 | ||
2630 | if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { | 2640 | if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { |
2631 | printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id); | 2641 | printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id); |
2632 | goto fail_sb_buffer; | 2642 | goto fail_sb_buffer; |
2633 | } | 2643 | } |
2634 | 2644 | ||
2635 | if (sectorsize != PAGE_SIZE) { | 2645 | if (sectorsize != PAGE_SIZE) { |
2636 | printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) " | 2646 | printk(KERN_ERR "BTRFS: incompatible sector size (%lu) " |
2637 | "found on %s\n", (unsigned long)sectorsize, sb->s_id); | 2647 | "found on %s\n", (unsigned long)sectorsize, sb->s_id); |
2638 | goto fail_sb_buffer; | 2648 | goto fail_sb_buffer; |
2639 | } | 2649 | } |
@@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb, | |||
2642 | ret = btrfs_read_sys_array(tree_root); | 2652 | ret = btrfs_read_sys_array(tree_root); |
2643 | mutex_unlock(&fs_info->chunk_mutex); | 2653 | mutex_unlock(&fs_info->chunk_mutex); |
2644 | if (ret) { | 2654 | if (ret) { |
2645 | printk(KERN_WARNING "BTRFS: failed to read the system " | 2655 | printk(KERN_ERR "BTRFS: failed to read the system " |
2646 | "array on %s\n", sb->s_id); | 2656 | "array on %s\n", sb->s_id); |
2647 | goto fail_sb_buffer; | 2657 | goto fail_sb_buffer; |
2648 | } | 2658 | } |
@@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb, | |||
2657 | generation); | 2667 | generation); |
2658 | if (!chunk_root->node || | 2668 | if (!chunk_root->node || |
2659 | !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2669 | !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
2660 | printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", | 2670 | printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", |
2661 | sb->s_id); | 2671 | sb->s_id); |
2662 | goto fail_tree_roots; | 2672 | goto fail_tree_roots; |
2663 | } | 2673 | } |
@@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb, | |||
2669 | 2679 | ||
2670 | ret = btrfs_read_chunk_tree(chunk_root); | 2680 | ret = btrfs_read_chunk_tree(chunk_root); |
2671 | if (ret) { | 2681 | if (ret) { |
2672 | printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n", | 2682 | printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n", |
2673 | sb->s_id); | 2683 | sb->s_id); |
2674 | goto fail_tree_roots; | 2684 | goto fail_tree_roots; |
2675 | } | 2685 | } |
@@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb, | |||
2681 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | 2691 | btrfs_close_extra_devices(fs_info, fs_devices, 0); |
2682 | 2692 | ||
2683 | if (!fs_devices->latest_bdev) { | 2693 | if (!fs_devices->latest_bdev) { |
2684 | printk(KERN_CRIT "BTRFS: failed to read devices on %s\n", | 2694 | printk(KERN_ERR "BTRFS: failed to read devices on %s\n", |
2685 | sb->s_id); | 2695 | sb->s_id); |
2686 | goto fail_tree_roots; | 2696 | goto fail_tree_roots; |
2687 | } | 2697 | } |
@@ -2765,7 +2775,7 @@ retry_root_backup: | |||
2765 | 2775 | ||
2766 | ret = btrfs_recover_balance(fs_info); | 2776 | ret = btrfs_recover_balance(fs_info); |
2767 | if (ret) { | 2777 | if (ret) { |
2768 | printk(KERN_WARNING "BTRFS: failed to recover balance\n"); | 2778 | printk(KERN_ERR "BTRFS: failed to recover balance\n"); |
2769 | goto fail_block_groups; | 2779 | goto fail_block_groups; |
2770 | } | 2780 | } |
2771 | 2781 | ||
@@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3860 | printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", | 3870 | printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", |
3861 | btrfs_super_log_root(sb)); | 3871 | btrfs_super_log_root(sb)); |
3862 | 3872 | ||
3873 | /* | ||
3874 | * Check the lower bound, the alignment and other constraints are | ||
3875 | * checked later. | ||
3876 | */ | ||
3877 | if (btrfs_super_nodesize(sb) < 4096) { | ||
3878 | printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n", | ||
3879 | btrfs_super_nodesize(sb)); | ||
3880 | ret = -EINVAL; | ||
3881 | } | ||
3882 | if (btrfs_super_sectorsize(sb) < 4096) { | ||
3883 | printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n", | ||
3884 | btrfs_super_sectorsize(sb)); | ||
3885 | ret = -EINVAL; | ||
3886 | } | ||
3887 | |||
3863 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { | 3888 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { |
3864 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", | 3889 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", |
3865 | fs_info->fsid, sb->dev_item.fsid); | 3890 | fs_info->fsid, sb->dev_item.fsid); |
@@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3873 | if (btrfs_super_num_devices(sb) > (1UL << 31)) | 3898 | if (btrfs_super_num_devices(sb) > (1UL << 31)) |
3874 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", | 3899 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", |
3875 | btrfs_super_num_devices(sb)); | 3900 | btrfs_super_num_devices(sb)); |
3901 | if (btrfs_super_num_devices(sb) == 0) { | ||
3902 | printk(KERN_ERR "BTRFS: number of devices is 0\n"); | ||
3903 | ret = -EINVAL; | ||
3904 | } | ||
3876 | 3905 | ||
3877 | if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { | 3906 | if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { |
3878 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", | 3907 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", |
@@ -3881,6 +3910,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3881 | } | 3910 | } |
3882 | 3911 | ||
3883 | /* | 3912 | /* |
3913 | * Obvious sys_chunk_array corruptions, it must hold at least one key | ||
3914 | * and one chunk | ||
3915 | */ | ||
3916 | if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { | ||
3917 | printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n", | ||
3918 | btrfs_super_sys_array_size(sb), | ||
3919 | BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); | ||
3920 | ret = -EINVAL; | ||
3921 | } | ||
3922 | if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) | ||
3923 | + sizeof(struct btrfs_chunk)) { | ||
3924 | printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", | ||
3925 | btrfs_super_sys_array_size(sb), | ||
3926 | sizeof(struct btrfs_disk_key) | ||
3927 | + sizeof(struct btrfs_chunk)); | ||
3928 | ret = -EINVAL; | ||
3929 | } | ||
3930 | |||
3931 | /* | ||
3884 | * The generation is a global counter, we'll trust it more than the others | 3932 | * The generation is a global counter, we'll trust it more than the others |
3885 | * but it's still possible that it's the one that's wrong. | 3933 | * but it's still possible that it's the one that's wrong. |
3886 | */ | 3934 | */ |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 414651821fb3..27d44c0fd236 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -46,11 +46,11 @@ struct btrfs_fs_devices; | |||
46 | 46 | ||
47 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 47 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
48 | u64 parent_transid); | 48 | u64 parent_transid); |
49 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); | 49 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr); |
50 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 50 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, |
51 | int mirror_num, struct extent_buffer **eb); | 51 | int mirror_num, struct extent_buffer **eb); |
52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
53 | u64 bytenr, u32 blocksize); | 53 | u64 bytenr); |
54 | void clean_tree_block(struct btrfs_trans_handle *trans, | 54 | void clean_tree_block(struct btrfs_trans_handle *trans, |
55 | struct btrfs_root *root, struct extent_buffer *buf); | 55 | struct btrfs_root *root, struct extent_buffer *buf); |
56 | int open_ctree(struct super_block *sb, | 56 | int open_ctree(struct super_block *sb, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a684086c3c81..571f402d3fc4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -74,8 +74,9 @@ enum { | |||
74 | RESERVE_ALLOC_NO_ACCOUNT = 2, | 74 | RESERVE_ALLOC_NO_ACCOUNT = 2, |
75 | }; | 75 | }; |
76 | 76 | ||
77 | static int update_block_group(struct btrfs_root *root, | 77 | static int update_block_group(struct btrfs_trans_handle *trans, |
78 | u64 bytenr, u64 num_bytes, int alloc); | 78 | struct btrfs_root *root, u64 bytenr, |
79 | u64 num_bytes, int alloc); | ||
79 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 80 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
80 | struct btrfs_root *root, | 81 | struct btrfs_root *root, |
81 | u64 bytenr, u64 num_bytes, u64 parent, | 82 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1925 | */ | 1926 | */ |
1926 | ret = 0; | 1927 | ret = 0; |
1927 | } | 1928 | } |
1928 | kfree(bbio); | 1929 | btrfs_put_bbio(bbio); |
1929 | } | 1930 | } |
1930 | 1931 | ||
1931 | if (actual_bytes) | 1932 | if (actual_bytes) |
@@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2768 | struct btrfs_delayed_ref_head *head; | 2769 | struct btrfs_delayed_ref_head *head; |
2769 | int ret; | 2770 | int ret; |
2770 | int run_all = count == (unsigned long)-1; | 2771 | int run_all = count == (unsigned long)-1; |
2771 | int run_most = 0; | ||
2772 | 2772 | ||
2773 | /* We'll clean this up in btrfs_cleanup_transaction */ | 2773 | /* We'll clean this up in btrfs_cleanup_transaction */ |
2774 | if (trans->aborted) | 2774 | if (trans->aborted) |
@@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2778 | root = root->fs_info->tree_root; | 2778 | root = root->fs_info->tree_root; |
2779 | 2779 | ||
2780 | delayed_refs = &trans->transaction->delayed_refs; | 2780 | delayed_refs = &trans->transaction->delayed_refs; |
2781 | if (count == 0) { | 2781 | if (count == 0) |
2782 | count = atomic_read(&delayed_refs->num_entries) * 2; | 2782 | count = atomic_read(&delayed_refs->num_entries) * 2; |
2783 | run_most = 1; | ||
2784 | } | ||
2785 | 2783 | ||
2786 | again: | 2784 | again: |
2787 | #ifdef SCRAMBLE_DELAYED_REFS | 2785 | #ifdef SCRAMBLE_DELAYED_REFS |
@@ -3315,120 +3313,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3315 | struct btrfs_root *root) | 3313 | struct btrfs_root *root) |
3316 | { | 3314 | { |
3317 | struct btrfs_block_group_cache *cache; | 3315 | struct btrfs_block_group_cache *cache; |
3318 | int err = 0; | 3316 | struct btrfs_transaction *cur_trans = trans->transaction; |
3317 | int ret = 0; | ||
3319 | struct btrfs_path *path; | 3318 | struct btrfs_path *path; |
3320 | u64 last = 0; | 3319 | |
3320 | if (list_empty(&cur_trans->dirty_bgs)) | ||
3321 | return 0; | ||
3321 | 3322 | ||
3322 | path = btrfs_alloc_path(); | 3323 | path = btrfs_alloc_path(); |
3323 | if (!path) | 3324 | if (!path) |
3324 | return -ENOMEM; | 3325 | return -ENOMEM; |
3325 | 3326 | ||
3326 | again: | 3327 | /* |
3327 | while (1) { | 3328 | * We don't need the lock here since we are protected by the transaction |
3328 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | 3329 | * commit. We want to do the cache_save_setup first and then run the |
3329 | while (cache) { | 3330 | * delayed refs to make sure we have the best chance at doing this all |
3330 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | 3331 | * in one shot. |
3331 | break; | 3332 | */ |
3332 | cache = next_block_group(root, cache); | 3333 | while (!list_empty(&cur_trans->dirty_bgs)) { |
3333 | } | 3334 | cache = list_first_entry(&cur_trans->dirty_bgs, |
3334 | if (!cache) { | 3335 | struct btrfs_block_group_cache, |
3335 | if (last == 0) | 3336 | dirty_list); |
3336 | break; | 3337 | list_del_init(&cache->dirty_list); |
3337 | last = 0; | 3338 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) |
3338 | continue; | 3339 | cache_save_setup(cache, trans, path); |
3339 | } | 3340 | if (!ret) |
3340 | err = cache_save_setup(cache, trans, path); | 3341 | ret = btrfs_run_delayed_refs(trans, root, |
3341 | last = cache->key.objectid + cache->key.offset; | 3342 | (unsigned long) -1); |
3342 | btrfs_put_block_group(cache); | 3343 | if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) |
3343 | } | 3344 | btrfs_write_out_cache(root, trans, cache, path); |
3344 | 3345 | if (!ret) | |
3345 | while (1) { | 3346 | ret = write_one_cache_group(trans, root, path, cache); |
3346 | if (last == 0) { | ||
3347 | err = btrfs_run_delayed_refs(trans, root, | ||
3348 | (unsigned long)-1); | ||
3349 | if (err) /* File system offline */ | ||
3350 | goto out; | ||
3351 | } | ||
3352 | |||
3353 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
3354 | while (cache) { | ||
3355 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) { | ||
3356 | btrfs_put_block_group(cache); | ||
3357 | goto again; | ||
3358 | } | ||
3359 | |||
3360 | if (cache->dirty) | ||
3361 | break; | ||
3362 | cache = next_block_group(root, cache); | ||
3363 | } | ||
3364 | if (!cache) { | ||
3365 | if (last == 0) | ||
3366 | break; | ||
3367 | last = 0; | ||
3368 | continue; | ||
3369 | } | ||
3370 | |||
3371 | if (cache->disk_cache_state == BTRFS_DC_SETUP) | ||
3372 | cache->disk_cache_state = BTRFS_DC_NEED_WRITE; | ||
3373 | cache->dirty = 0; | ||
3374 | last = cache->key.objectid + cache->key.offset; | ||
3375 | |||
3376 | err = write_one_cache_group(trans, root, path, cache); | ||
3377 | btrfs_put_block_group(cache); | ||
3378 | if (err) /* File system offline */ | ||
3379 | goto out; | ||
3380 | } | ||
3381 | |||
3382 | while (1) { | ||
3383 | /* | ||
3384 | * I don't think this is needed since we're just marking our | ||
3385 | * preallocated extent as written, but just in case it can't | ||
3386 | * hurt. | ||
3387 | */ | ||
3388 | if (last == 0) { | ||
3389 | err = btrfs_run_delayed_refs(trans, root, | ||
3390 | (unsigned long)-1); | ||
3391 | if (err) /* File system offline */ | ||
3392 | goto out; | ||
3393 | } | ||
3394 | |||
3395 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
3396 | while (cache) { | ||
3397 | /* | ||
3398 | * Really this shouldn't happen, but it could if we | ||
3399 | * couldn't write the entire preallocated extent and | ||
3400 | * splitting the extent resulted in a new block. | ||
3401 | */ | ||
3402 | if (cache->dirty) { | ||
3403 | btrfs_put_block_group(cache); | ||
3404 | goto again; | ||
3405 | } | ||
3406 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
3407 | break; | ||
3408 | cache = next_block_group(root, cache); | ||
3409 | } | ||
3410 | if (!cache) { | ||
3411 | if (last == 0) | ||
3412 | break; | ||
3413 | last = 0; | ||
3414 | continue; | ||
3415 | } | ||
3416 | |||
3417 | err = btrfs_write_out_cache(root, trans, cache, path); | ||
3418 | |||
3419 | /* | ||
3420 | * If we didn't have an error then the cache state is still | ||
3421 | * NEED_WRITE, so we can set it to WRITTEN. | ||
3422 | */ | ||
3423 | if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
3424 | cache->disk_cache_state = BTRFS_DC_WRITTEN; | ||
3425 | last = cache->key.objectid + cache->key.offset; | ||
3426 | btrfs_put_block_group(cache); | 3347 | btrfs_put_block_group(cache); |
3427 | } | 3348 | } |
3428 | out: | ||
3429 | 3349 | ||
3430 | btrfs_free_path(path); | 3350 | btrfs_free_path(path); |
3431 | return err; | 3351 | return ret; |
3432 | } | 3352 | } |
3433 | 3353 | ||
3434 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | 3354 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) |
@@ -5043,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root, | |||
5043 | /** | 4963 | /** |
5044 | * drop_outstanding_extent - drop an outstanding extent | 4964 | * drop_outstanding_extent - drop an outstanding extent |
5045 | * @inode: the inode we're dropping the extent for | 4965 | * @inode: the inode we're dropping the extent for |
4966 | * @num_bytes: the number of bytes we're relaseing. | ||
5046 | * | 4967 | * |
5047 | * This is called when we are freeing up an outstanding extent, either called | 4968 | * This is called when we are freeing up an outstanding extent, either called |
5048 | * after an error or after an extent is written. This will return the number of | 4969 | * after an error or after an extent is written. This will return the number of |
5049 | * reserved extents that need to be freed. This must be called with | 4970 | * reserved extents that need to be freed. This must be called with |
5050 | * BTRFS_I(inode)->lock held. | 4971 | * BTRFS_I(inode)->lock held. |
5051 | */ | 4972 | */ |
5052 | static unsigned drop_outstanding_extent(struct inode *inode) | 4973 | static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes) |
5053 | { | 4974 | { |
5054 | unsigned drop_inode_space = 0; | 4975 | unsigned drop_inode_space = 0; |
5055 | unsigned dropped_extents = 0; | 4976 | unsigned dropped_extents = 0; |
4977 | unsigned num_extents = 0; | ||
5056 | 4978 | ||
5057 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | 4979 | num_extents = (unsigned)div64_u64(num_bytes + |
5058 | BTRFS_I(inode)->outstanding_extents--; | 4980 | BTRFS_MAX_EXTENT_SIZE - 1, |
4981 | BTRFS_MAX_EXTENT_SIZE); | ||
4982 | ASSERT(num_extents); | ||
4983 | ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents); | ||
4984 | BTRFS_I(inode)->outstanding_extents -= num_extents; | ||
5059 | 4985 | ||
5060 | if (BTRFS_I(inode)->outstanding_extents == 0 && | 4986 | if (BTRFS_I(inode)->outstanding_extents == 0 && |
5061 | test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | 4987 | test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
@@ -5226,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5226 | 5152 | ||
5227 | out_fail: | 5153 | out_fail: |
5228 | spin_lock(&BTRFS_I(inode)->lock); | 5154 | spin_lock(&BTRFS_I(inode)->lock); |
5229 | dropped = drop_outstanding_extent(inode); | 5155 | dropped = drop_outstanding_extent(inode, num_bytes); |
5230 | /* | 5156 | /* |
5231 | * If the inodes csum_bytes is the same as the original | 5157 | * If the inodes csum_bytes is the same as the original |
5232 | * csum_bytes then we know we haven't raced with any free()ers | 5158 | * csum_bytes then we know we haven't raced with any free()ers |
@@ -5305,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
5305 | 5231 | ||
5306 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 5232 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
5307 | spin_lock(&BTRFS_I(inode)->lock); | 5233 | spin_lock(&BTRFS_I(inode)->lock); |
5308 | dropped = drop_outstanding_extent(inode); | 5234 | dropped = drop_outstanding_extent(inode, num_bytes); |
5309 | 5235 | ||
5310 | if (num_bytes) | 5236 | if (num_bytes) |
5311 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | 5237 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); |
@@ -5375,8 +5301,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | |||
5375 | btrfs_free_reserved_data_space(inode, num_bytes); | 5301 | btrfs_free_reserved_data_space(inode, num_bytes); |
5376 | } | 5302 | } |
5377 | 5303 | ||
5378 | static int update_block_group(struct btrfs_root *root, | 5304 | static int update_block_group(struct btrfs_trans_handle *trans, |
5379 | u64 bytenr, u64 num_bytes, int alloc) | 5305 | struct btrfs_root *root, u64 bytenr, |
5306 | u64 num_bytes, int alloc) | ||
5380 | { | 5307 | { |
5381 | struct btrfs_block_group_cache *cache = NULL; | 5308 | struct btrfs_block_group_cache *cache = NULL; |
5382 | struct btrfs_fs_info *info = root->fs_info; | 5309 | struct btrfs_fs_info *info = root->fs_info; |
@@ -5414,6 +5341,14 @@ static int update_block_group(struct btrfs_root *root, | |||
5414 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 5341 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
5415 | cache_block_group(cache, 1); | 5342 | cache_block_group(cache, 1); |
5416 | 5343 | ||
5344 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
5345 | if (list_empty(&cache->dirty_list)) { | ||
5346 | list_add_tail(&cache->dirty_list, | ||
5347 | &trans->transaction->dirty_bgs); | ||
5348 | btrfs_get_block_group(cache); | ||
5349 | } | ||
5350 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
5351 | |||
5417 | byte_in_group = bytenr - cache->key.objectid; | 5352 | byte_in_group = bytenr - cache->key.objectid; |
5418 | WARN_ON(byte_in_group > cache->key.offset); | 5353 | WARN_ON(byte_in_group > cache->key.offset); |
5419 | 5354 | ||
@@ -5424,7 +5359,6 @@ static int update_block_group(struct btrfs_root *root, | |||
5424 | cache->disk_cache_state < BTRFS_DC_CLEAR) | 5359 | cache->disk_cache_state < BTRFS_DC_CLEAR) |
5425 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 5360 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
5426 | 5361 | ||
5427 | cache->dirty = 1; | ||
5428 | old_val = btrfs_block_group_used(&cache->item); | 5362 | old_val = btrfs_block_group_used(&cache->item); |
5429 | num_bytes = min(total, cache->key.offset - byte_in_group); | 5363 | num_bytes = min(total, cache->key.offset - byte_in_group); |
5430 | if (alloc) { | 5364 | if (alloc) { |
@@ -5807,10 +5741,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
5807 | unpin = &fs_info->freed_extents[0]; | 5741 | unpin = &fs_info->freed_extents[0]; |
5808 | 5742 | ||
5809 | while (1) { | 5743 | while (1) { |
5744 | mutex_lock(&fs_info->unused_bg_unpin_mutex); | ||
5810 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 5745 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
5811 | EXTENT_DIRTY, NULL); | 5746 | EXTENT_DIRTY, NULL); |
5812 | if (ret) | 5747 | if (ret) { |
5748 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); | ||
5813 | break; | 5749 | break; |
5750 | } | ||
5814 | 5751 | ||
5815 | if (btrfs_test_opt(root, DISCARD)) | 5752 | if (btrfs_test_opt(root, DISCARD)) |
5816 | ret = btrfs_discard_extent(root, start, | 5753 | ret = btrfs_discard_extent(root, start, |
@@ -5818,6 +5755,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
5818 | 5755 | ||
5819 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 5756 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
5820 | unpin_extent_range(root, start, end, true); | 5757 | unpin_extent_range(root, start, end, true); |
5758 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); | ||
5821 | cond_resched(); | 5759 | cond_resched(); |
5822 | } | 5760 | } |
5823 | 5761 | ||
@@ -6103,7 +6041,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
6103 | } | 6041 | } |
6104 | } | 6042 | } |
6105 | 6043 | ||
6106 | ret = update_block_group(root, bytenr, num_bytes, 0); | 6044 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
6107 | if (ret) { | 6045 | if (ret) { |
6108 | btrfs_abort_transaction(trans, extent_root, ret); | 6046 | btrfs_abort_transaction(trans, extent_root, ret); |
6109 | goto out; | 6047 | goto out; |
@@ -6205,7 +6143,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
6205 | struct extent_buffer *buf, | 6143 | struct extent_buffer *buf, |
6206 | u64 parent, int last_ref) | 6144 | u64 parent, int last_ref) |
6207 | { | 6145 | { |
6208 | struct btrfs_block_group_cache *cache = NULL; | ||
6209 | int pin = 1; | 6146 | int pin = 1; |
6210 | int ret; | 6147 | int ret; |
6211 | 6148 | ||
@@ -6221,17 +6158,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
6221 | if (!last_ref) | 6158 | if (!last_ref) |
6222 | return; | 6159 | return; |
6223 | 6160 | ||
6224 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
6225 | |||
6226 | if (btrfs_header_generation(buf) == trans->transid) { | 6161 | if (btrfs_header_generation(buf) == trans->transid) { |
6162 | struct btrfs_block_group_cache *cache; | ||
6163 | |||
6227 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 6164 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
6228 | ret = check_ref_cleanup(trans, root, buf->start); | 6165 | ret = check_ref_cleanup(trans, root, buf->start); |
6229 | if (!ret) | 6166 | if (!ret) |
6230 | goto out; | 6167 | goto out; |
6231 | } | 6168 | } |
6232 | 6169 | ||
6170 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
6171 | |||
6233 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 6172 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
6234 | pin_down_extent(root, cache, buf->start, buf->len, 1); | 6173 | pin_down_extent(root, cache, buf->start, buf->len, 1); |
6174 | btrfs_put_block_group(cache); | ||
6235 | goto out; | 6175 | goto out; |
6236 | } | 6176 | } |
6237 | 6177 | ||
@@ -6239,6 +6179,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
6239 | 6179 | ||
6240 | btrfs_add_free_space(cache, buf->start, buf->len); | 6180 | btrfs_add_free_space(cache, buf->start, buf->len); |
6241 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); | 6181 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); |
6182 | btrfs_put_block_group(cache); | ||
6242 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); | 6183 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); |
6243 | pin = 0; | 6184 | pin = 0; |
6244 | } | 6185 | } |
@@ -6253,7 +6194,6 @@ out: | |||
6253 | * anymore. | 6194 | * anymore. |
6254 | */ | 6195 | */ |
6255 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | 6196 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); |
6256 | btrfs_put_block_group(cache); | ||
6257 | } | 6197 | } |
6258 | 6198 | ||
6259 | /* Can return -ENOMEM */ | 6199 | /* Can return -ENOMEM */ |
@@ -7063,7 +7003,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
7063 | if (ret) | 7003 | if (ret) |
7064 | return ret; | 7004 | return ret; |
7065 | 7005 | ||
7066 | ret = update_block_group(root, ins->objectid, ins->offset, 1); | 7006 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
7067 | if (ret) { /* -ENOENT, logic error */ | 7007 | if (ret) { /* -ENOENT, logic error */ |
7068 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 7008 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
7069 | ins->objectid, ins->offset); | 7009 | ins->objectid, ins->offset); |
@@ -7152,7 +7092,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7152 | return ret; | 7092 | return ret; |
7153 | } | 7093 | } |
7154 | 7094 | ||
7155 | ret = update_block_group(root, ins->objectid, root->nodesize, 1); | 7095 | ret = update_block_group(trans, root, ins->objectid, root->nodesize, |
7096 | 1); | ||
7156 | if (ret) { /* -ENOENT, logic error */ | 7097 | if (ret) { /* -ENOENT, logic error */ |
7157 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 7098 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
7158 | ins->objectid, ins->offset); | 7099 | ins->objectid, ins->offset); |
@@ -7217,11 +7158,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
7217 | 7158 | ||
7218 | static struct extent_buffer * | 7159 | static struct extent_buffer * |
7219 | btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 7160 | btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
7220 | u64 bytenr, u32 blocksize, int level) | 7161 | u64 bytenr, int level) |
7221 | { | 7162 | { |
7222 | struct extent_buffer *buf; | 7163 | struct extent_buffer *buf; |
7223 | 7164 | ||
7224 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 7165 | buf = btrfs_find_create_tree_block(root, bytenr); |
7225 | if (!buf) | 7166 | if (!buf) |
7226 | return ERR_PTR(-ENOMEM); | 7167 | return ERR_PTR(-ENOMEM); |
7227 | btrfs_set_header_generation(buf, trans->transid); | 7168 | btrfs_set_header_generation(buf, trans->transid); |
@@ -7340,7 +7281,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
7340 | 7281 | ||
7341 | if (btrfs_test_is_dummy_root(root)) { | 7282 | if (btrfs_test_is_dummy_root(root)) { |
7342 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, | 7283 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, |
7343 | blocksize, level); | 7284 | level); |
7344 | if (!IS_ERR(buf)) | 7285 | if (!IS_ERR(buf)) |
7345 | root->alloc_bytenr += blocksize; | 7286 | root->alloc_bytenr += blocksize; |
7346 | return buf; | 7287 | return buf; |
@@ -7357,8 +7298,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
7357 | return ERR_PTR(ret); | 7298 | return ERR_PTR(ret); |
7358 | } | 7299 | } |
7359 | 7300 | ||
7360 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 7301 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); |
7361 | blocksize, level); | ||
7362 | BUG_ON(IS_ERR(buf)); /* -ENOMEM */ | 7302 | BUG_ON(IS_ERR(buf)); /* -ENOMEM */ |
7363 | 7303 | ||
7364 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | 7304 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { |
@@ -7487,7 +7427,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
7487 | continue; | 7427 | continue; |
7488 | } | 7428 | } |
7489 | reada: | 7429 | reada: |
7490 | readahead_tree_block(root, bytenr, blocksize); | 7430 | readahead_tree_block(root, bytenr); |
7491 | nread++; | 7431 | nread++; |
7492 | } | 7432 | } |
7493 | wc->reada_slot = slot; | 7433 | wc->reada_slot = slot; |
@@ -7828,7 +7768,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
7828 | 7768 | ||
7829 | next = btrfs_find_tree_block(root, bytenr); | 7769 | next = btrfs_find_tree_block(root, bytenr); |
7830 | if (!next) { | 7770 | if (!next) { |
7831 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 7771 | next = btrfs_find_create_tree_block(root, bytenr); |
7832 | if (!next) | 7772 | if (!next) |
7833 | return -ENOMEM; | 7773 | return -ENOMEM; |
7834 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, | 7774 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, |
@@ -8548,14 +8488,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
8548 | if (IS_ERR(trans)) | 8488 | if (IS_ERR(trans)) |
8549 | return PTR_ERR(trans); | 8489 | return PTR_ERR(trans); |
8550 | 8490 | ||
8551 | alloc_flags = update_block_group_flags(root, cache->flags); | ||
8552 | if (alloc_flags != cache->flags) { | ||
8553 | ret = do_chunk_alloc(trans, root, alloc_flags, | ||
8554 | CHUNK_ALLOC_FORCE); | ||
8555 | if (ret < 0) | ||
8556 | goto out; | ||
8557 | } | ||
8558 | |||
8559 | ret = set_block_group_ro(cache, 0); | 8491 | ret = set_block_group_ro(cache, 0); |
8560 | if (!ret) | 8492 | if (!ret) |
8561 | goto out; | 8493 | goto out; |
@@ -8566,6 +8498,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
8566 | goto out; | 8498 | goto out; |
8567 | ret = set_block_group_ro(cache, 0); | 8499 | ret = set_block_group_ro(cache, 0); |
8568 | out: | 8500 | out: |
8501 | if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { | ||
8502 | alloc_flags = update_block_group_flags(root, cache->flags); | ||
8503 | check_system_chunk(trans, root, alloc_flags); | ||
8504 | } | ||
8505 | |||
8569 | btrfs_end_transaction(trans, root); | 8506 | btrfs_end_transaction(trans, root); |
8570 | return ret; | 8507 | return ret; |
8571 | } | 8508 | } |
@@ -9005,6 +8942,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) | |||
9005 | INIT_LIST_HEAD(&cache->cluster_list); | 8942 | INIT_LIST_HEAD(&cache->cluster_list); |
9006 | INIT_LIST_HEAD(&cache->bg_list); | 8943 | INIT_LIST_HEAD(&cache->bg_list); |
9007 | INIT_LIST_HEAD(&cache->ro_list); | 8944 | INIT_LIST_HEAD(&cache->ro_list); |
8945 | INIT_LIST_HEAD(&cache->dirty_list); | ||
9008 | btrfs_init_free_space_ctl(cache); | 8946 | btrfs_init_free_space_ctl(cache); |
9009 | atomic_set(&cache->trimming, 0); | 8947 | atomic_set(&cache->trimming, 0); |
9010 | 8948 | ||
@@ -9068,9 +9006,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
9068 | * b) Setting 'dirty flag' makes sure that we flush | 9006 | * b) Setting 'dirty flag' makes sure that we flush |
9069 | * the new space cache info onto disk. | 9007 | * the new space cache info onto disk. |
9070 | */ | 9008 | */ |
9071 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
9072 | if (btrfs_test_opt(root, SPACE_CACHE)) | 9009 | if (btrfs_test_opt(root, SPACE_CACHE)) |
9073 | cache->dirty = 1; | 9010 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
9074 | } | 9011 | } |
9075 | 9012 | ||
9076 | read_extent_buffer(leaf, &cache->item, | 9013 | read_extent_buffer(leaf, &cache->item, |
@@ -9460,6 +9397,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9460 | } | 9397 | } |
9461 | } | 9398 | } |
9462 | 9399 | ||
9400 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
9401 | if (!list_empty(&block_group->dirty_list)) { | ||
9402 | list_del_init(&block_group->dirty_list); | ||
9403 | btrfs_put_block_group(block_group); | ||
9404 | } | ||
9405 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
9406 | |||
9463 | btrfs_remove_free_space_cache(block_group); | 9407 | btrfs_remove_free_space_cache(block_group); |
9464 | 9408 | ||
9465 | spin_lock(&block_group->space_info->lock); | 9409 | spin_lock(&block_group->space_info->lock); |
@@ -9611,7 +9555,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9611 | * Want to do this before we do anything else so we can recover | 9555 | * Want to do this before we do anything else so we can recover |
9612 | * properly if we fail to join the transaction. | 9556 | * properly if we fail to join the transaction. |
9613 | */ | 9557 | */ |
9614 | trans = btrfs_join_transaction(root); | 9558 | /* 1 for btrfs_orphan_reserve_metadata() */ |
9559 | trans = btrfs_start_transaction(root, 1); | ||
9615 | if (IS_ERR(trans)) { | 9560 | if (IS_ERR(trans)) { |
9616 | btrfs_set_block_group_rw(root, block_group); | 9561 | btrfs_set_block_group_rw(root, block_group); |
9617 | ret = PTR_ERR(trans); | 9562 | ret = PTR_ERR(trans); |
@@ -9624,18 +9569,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9624 | */ | 9569 | */ |
9625 | start = block_group->key.objectid; | 9570 | start = block_group->key.objectid; |
9626 | end = start + block_group->key.offset - 1; | 9571 | end = start + block_group->key.offset - 1; |
9572 | /* | ||
9573 | * Hold the unused_bg_unpin_mutex lock to avoid racing with | ||
9574 | * btrfs_finish_extent_commit(). If we are at transaction N, | ||
9575 | * another task might be running finish_extent_commit() for the | ||
9576 | * previous transaction N - 1, and have seen a range belonging | ||
9577 | * to the block group in freed_extents[] before we were able to | ||
9578 | * clear the whole block group range from freed_extents[]. This | ||
9579 | * means that task can lookup for the block group after we | ||
9580 | * unpinned it from freed_extents[] and removed it, leading to | ||
9581 | * a BUG_ON() at btrfs_unpin_extent_range(). | ||
9582 | */ | ||
9583 | mutex_lock(&fs_info->unused_bg_unpin_mutex); | ||
9627 | ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, | 9584 | ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, |
9628 | EXTENT_DIRTY, GFP_NOFS); | 9585 | EXTENT_DIRTY, GFP_NOFS); |
9629 | if (ret) { | 9586 | if (ret) { |
9587 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); | ||
9630 | btrfs_set_block_group_rw(root, block_group); | 9588 | btrfs_set_block_group_rw(root, block_group); |
9631 | goto end_trans; | 9589 | goto end_trans; |
9632 | } | 9590 | } |
9633 | ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, | 9591 | ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, |
9634 | EXTENT_DIRTY, GFP_NOFS); | 9592 | EXTENT_DIRTY, GFP_NOFS); |
9635 | if (ret) { | 9593 | if (ret) { |
9594 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); | ||
9636 | btrfs_set_block_group_rw(root, block_group); | 9595 | btrfs_set_block_group_rw(root, block_group); |
9637 | goto end_trans; | 9596 | goto end_trans; |
9638 | } | 9597 | } |
9598 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); | ||
9639 | 9599 | ||
9640 | /* Reset pinned so btrfs_put_block_group doesn't complain */ | 9600 | /* Reset pinned so btrfs_put_block_group doesn't complain */ |
9641 | block_group->pinned = 0; | 9601 | block_group->pinned = 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c73df6a7c9b6..c7233ff1d533 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void) | |||
64 | 64 | ||
65 | while (!list_empty(&states)) { | 65 | while (!list_empty(&states)) { |
66 | state = list_entry(states.next, struct extent_state, leak_list); | 66 | state = list_entry(states.next, struct extent_state, leak_list); |
67 | pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", | 67 | pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", |
68 | state->start, state->end, state->state, | 68 | state->start, state->end, state->state, |
69 | extent_state_in_tree(state), | 69 | extent_state_in_tree(state), |
70 | atomic_read(&state->refs)); | 70 | atomic_read(&state->refs)); |
@@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree, | |||
396 | } | 396 | } |
397 | 397 | ||
398 | static void set_state_cb(struct extent_io_tree *tree, | 398 | static void set_state_cb(struct extent_io_tree *tree, |
399 | struct extent_state *state, unsigned long *bits) | 399 | struct extent_state *state, unsigned *bits) |
400 | { | 400 | { |
401 | if (tree->ops && tree->ops->set_bit_hook) | 401 | if (tree->ops && tree->ops->set_bit_hook) |
402 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); | 402 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); |
403 | } | 403 | } |
404 | 404 | ||
405 | static void clear_state_cb(struct extent_io_tree *tree, | 405 | static void clear_state_cb(struct extent_io_tree *tree, |
406 | struct extent_state *state, unsigned long *bits) | 406 | struct extent_state *state, unsigned *bits) |
407 | { | 407 | { |
408 | if (tree->ops && tree->ops->clear_bit_hook) | 408 | if (tree->ops && tree->ops->clear_bit_hook) |
409 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 409 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
410 | } | 410 | } |
411 | 411 | ||
412 | static void set_state_bits(struct extent_io_tree *tree, | 412 | static void set_state_bits(struct extent_io_tree *tree, |
413 | struct extent_state *state, unsigned long *bits); | 413 | struct extent_state *state, unsigned *bits); |
414 | 414 | ||
415 | /* | 415 | /* |
416 | * insert an extent_state struct into the tree. 'bits' are set on the | 416 | * insert an extent_state struct into the tree. 'bits' are set on the |
@@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
426 | struct extent_state *state, u64 start, u64 end, | 426 | struct extent_state *state, u64 start, u64 end, |
427 | struct rb_node ***p, | 427 | struct rb_node ***p, |
428 | struct rb_node **parent, | 428 | struct rb_node **parent, |
429 | unsigned long *bits) | 429 | unsigned *bits) |
430 | { | 430 | { |
431 | struct rb_node *node; | 431 | struct rb_node *node; |
432 | 432 | ||
@@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state) | |||
511 | */ | 511 | */ |
512 | static struct extent_state *clear_state_bit(struct extent_io_tree *tree, | 512 | static struct extent_state *clear_state_bit(struct extent_io_tree *tree, |
513 | struct extent_state *state, | 513 | struct extent_state *state, |
514 | unsigned long *bits, int wake) | 514 | unsigned *bits, int wake) |
515 | { | 515 | { |
516 | struct extent_state *next; | 516 | struct extent_state *next; |
517 | unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS; | 517 | unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS; |
518 | 518 | ||
519 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 519 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
520 | u64 range = state->end - state->start + 1; | 520 | u64 range = state->end - state->start + 1; |
@@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err) | |||
570 | * This takes the tree lock, and returns 0 on success and < 0 on error. | 570 | * This takes the tree lock, and returns 0 on success and < 0 on error. |
571 | */ | 571 | */ |
572 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 572 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
573 | unsigned long bits, int wake, int delete, | 573 | unsigned bits, int wake, int delete, |
574 | struct extent_state **cached_state, | 574 | struct extent_state **cached_state, |
575 | gfp_t mask) | 575 | gfp_t mask) |
576 | { | 576 | { |
@@ -789,9 +789,9 @@ out: | |||
789 | 789 | ||
790 | static void set_state_bits(struct extent_io_tree *tree, | 790 | static void set_state_bits(struct extent_io_tree *tree, |
791 | struct extent_state *state, | 791 | struct extent_state *state, |
792 | unsigned long *bits) | 792 | unsigned *bits) |
793 | { | 793 | { |
794 | unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS; | 794 | unsigned bits_to_set = *bits & ~EXTENT_CTLBITS; |
795 | 795 | ||
796 | set_state_cb(tree, state, bits); | 796 | set_state_cb(tree, state, bits); |
797 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 797 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
@@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree, | |||
803 | 803 | ||
804 | static void cache_state_if_flags(struct extent_state *state, | 804 | static void cache_state_if_flags(struct extent_state *state, |
805 | struct extent_state **cached_ptr, | 805 | struct extent_state **cached_ptr, |
806 | const u64 flags) | 806 | unsigned flags) |
807 | { | 807 | { |
808 | if (cached_ptr && !(*cached_ptr)) { | 808 | if (cached_ptr && !(*cached_ptr)) { |
809 | if (!flags || (state->state & flags)) { | 809 | if (!flags || (state->state & flags)) { |
@@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state, | |||
833 | 833 | ||
834 | static int __must_check | 834 | static int __must_check |
835 | __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 835 | __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
836 | unsigned long bits, unsigned long exclusive_bits, | 836 | unsigned bits, unsigned exclusive_bits, |
837 | u64 *failed_start, struct extent_state **cached_state, | 837 | u64 *failed_start, struct extent_state **cached_state, |
838 | gfp_t mask) | 838 | gfp_t mask) |
839 | { | 839 | { |
@@ -1034,7 +1034,7 @@ search_again: | |||
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1036 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
1037 | unsigned long bits, u64 * failed_start, | 1037 | unsigned bits, u64 * failed_start, |
1038 | struct extent_state **cached_state, gfp_t mask) | 1038 | struct extent_state **cached_state, gfp_t mask) |
1039 | { | 1039 | { |
1040 | return __set_extent_bit(tree, start, end, bits, 0, failed_start, | 1040 | return __set_extent_bit(tree, start, end, bits, 0, failed_start, |
@@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1060 | * boundary bits like LOCK. | 1060 | * boundary bits like LOCK. |
1061 | */ | 1061 | */ |
1062 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1062 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
1063 | unsigned long bits, unsigned long clear_bits, | 1063 | unsigned bits, unsigned clear_bits, |
1064 | struct extent_state **cached_state, gfp_t mask) | 1064 | struct extent_state **cached_state, gfp_t mask) |
1065 | { | 1065 | { |
1066 | struct extent_state *state; | 1066 | struct extent_state *state; |
@@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
1268 | } | 1268 | } |
1269 | 1269 | ||
1270 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 1270 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
1271 | unsigned long bits, gfp_t mask) | 1271 | unsigned bits, gfp_t mask) |
1272 | { | 1272 | { |
1273 | return set_extent_bit(tree, start, end, bits, NULL, | 1273 | return set_extent_bit(tree, start, end, bits, NULL, |
1274 | NULL, mask); | 1274 | NULL, mask); |
1275 | } | 1275 | } |
1276 | 1276 | ||
1277 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 1277 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
1278 | unsigned long bits, gfp_t mask) | 1278 | unsigned bits, gfp_t mask) |
1279 | { | 1279 | { |
1280 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); | 1280 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); |
1281 | } | 1281 | } |
@@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | |||
1330 | * us if waiting is desired. | 1330 | * us if waiting is desired. |
1331 | */ | 1331 | */ |
1332 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 1332 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
1333 | unsigned long bits, struct extent_state **cached_state) | 1333 | unsigned bits, struct extent_state **cached_state) |
1334 | { | 1334 | { |
1335 | int err; | 1335 | int err; |
1336 | u64 failed_start; | 1336 | u64 failed_start; |
1337 | |||
1337 | while (1) { | 1338 | while (1) { |
1338 | err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, | 1339 | err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, |
1339 | EXTENT_LOCKED, &failed_start, | 1340 | EXTENT_LOCKED, &failed_start, |
@@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
1440 | */ | 1441 | */ |
1441 | static struct extent_state * | 1442 | static struct extent_state * |
1442 | find_first_extent_bit_state(struct extent_io_tree *tree, | 1443 | find_first_extent_bit_state(struct extent_io_tree *tree, |
1443 | u64 start, unsigned long bits) | 1444 | u64 start, unsigned bits) |
1444 | { | 1445 | { |
1445 | struct rb_node *node; | 1446 | struct rb_node *node; |
1446 | struct extent_state *state; | 1447 | struct extent_state *state; |
@@ -1474,7 +1475,7 @@ out: | |||
1474 | * If nothing was found, 1 is returned. If found something, return 0. | 1475 | * If nothing was found, 1 is returned. If found something, return 0. |
1475 | */ | 1476 | */ |
1476 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1477 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
1477 | u64 *start_ret, u64 *end_ret, unsigned long bits, | 1478 | u64 *start_ret, u64 *end_ret, unsigned bits, |
1478 | struct extent_state **cached_state) | 1479 | struct extent_state **cached_state) |
1479 | { | 1480 | { |
1480 | struct extent_state *state; | 1481 | struct extent_state *state; |
@@ -1753,7 +1754,7 @@ out_failed: | |||
1753 | 1754 | ||
1754 | int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, | 1755 | int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, |
1755 | struct page *locked_page, | 1756 | struct page *locked_page, |
1756 | unsigned long clear_bits, | 1757 | unsigned clear_bits, |
1757 | unsigned long page_ops) | 1758 | unsigned long page_ops) |
1758 | { | 1759 | { |
1759 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 1760 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
@@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, | |||
1810 | */ | 1811 | */ |
1811 | u64 count_range_bits(struct extent_io_tree *tree, | 1812 | u64 count_range_bits(struct extent_io_tree *tree, |
1812 | u64 *start, u64 search_end, u64 max_bytes, | 1813 | u64 *start, u64 search_end, u64 max_bytes, |
1813 | unsigned long bits, int contig) | 1814 | unsigned bits, int contig) |
1814 | { | 1815 | { |
1815 | struct rb_node *node; | 1816 | struct rb_node *node; |
1816 | struct extent_state *state; | 1817 | struct extent_state *state; |
@@ -1928,7 +1929,7 @@ out: | |||
1928 | * range is found set. | 1929 | * range is found set. |
1929 | */ | 1930 | */ |
1930 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1931 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
1931 | unsigned long bits, int filled, struct extent_state *cached) | 1932 | unsigned bits, int filled, struct extent_state *cached) |
1932 | { | 1933 | { |
1933 | struct extent_state *state = NULL; | 1934 | struct extent_state *state = NULL; |
1934 | struct rb_node *node; | 1935 | struct rb_node *node; |
@@ -2057,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, | |||
2057 | sector = bbio->stripes[mirror_num-1].physical >> 9; | 2058 | sector = bbio->stripes[mirror_num-1].physical >> 9; |
2058 | bio->bi_iter.bi_sector = sector; | 2059 | bio->bi_iter.bi_sector = sector; |
2059 | dev = bbio->stripes[mirror_num-1].dev; | 2060 | dev = bbio->stripes[mirror_num-1].dev; |
2060 | kfree(bbio); | 2061 | btrfs_put_bbio(bbio); |
2061 | if (!dev || !dev->bdev || !dev->writeable) { | 2062 | if (!dev || !dev->bdev || !dev->writeable) { |
2062 | bio_put(bio); | 2063 | bio_put(bio); |
2063 | return -EIO; | 2064 | return -EIO; |
@@ -2816,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
2816 | bio_add_page(bio, page, page_size, offset) < page_size) { | 2817 | bio_add_page(bio, page, page_size, offset) < page_size) { |
2817 | ret = submit_one_bio(rw, bio, mirror_num, | 2818 | ret = submit_one_bio(rw, bio, mirror_num, |
2818 | prev_bio_flags); | 2819 | prev_bio_flags); |
2819 | if (ret < 0) | 2820 | if (ret < 0) { |
2821 | *bio_ret = NULL; | ||
2820 | return ret; | 2822 | return ret; |
2823 | } | ||
2821 | bio = NULL; | 2824 | bio = NULL; |
2822 | } else { | 2825 | } else { |
2823 | return 0; | 2826 | return 0; |
@@ -3239,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, | |||
3239 | page, | 3242 | page, |
3240 | &delalloc_start, | 3243 | &delalloc_start, |
3241 | &delalloc_end, | 3244 | &delalloc_end, |
3242 | 128 * 1024 * 1024); | 3245 | BTRFS_MAX_EXTENT_SIZE); |
3243 | if (nr_delalloc == 0) { | 3246 | if (nr_delalloc == 0) { |
3244 | delalloc_start = delalloc_end + 1; | 3247 | delalloc_start = delalloc_end + 1; |
3245 | continue; | 3248 | continue; |
@@ -4598,11 +4601,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) | |||
4598 | 4601 | ||
4599 | static struct extent_buffer * | 4602 | static struct extent_buffer * |
4600 | __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, | 4603 | __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, |
4601 | unsigned long len, gfp_t mask) | 4604 | unsigned long len) |
4602 | { | 4605 | { |
4603 | struct extent_buffer *eb = NULL; | 4606 | struct extent_buffer *eb = NULL; |
4604 | 4607 | ||
4605 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 4608 | eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS); |
4606 | if (eb == NULL) | 4609 | if (eb == NULL) |
4607 | return NULL; | 4610 | return NULL; |
4608 | eb->start = start; | 4611 | eb->start = start; |
@@ -4643,7 +4646,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) | |||
4643 | struct extent_buffer *new; | 4646 | struct extent_buffer *new; |
4644 | unsigned long num_pages = num_extent_pages(src->start, src->len); | 4647 | unsigned long num_pages = num_extent_pages(src->start, src->len); |
4645 | 4648 | ||
4646 | new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); | 4649 | new = __alloc_extent_buffer(src->fs_info, src->start, src->len); |
4647 | if (new == NULL) | 4650 | if (new == NULL) |
4648 | return NULL; | 4651 | return NULL; |
4649 | 4652 | ||
@@ -4666,13 +4669,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) | |||
4666 | return new; | 4669 | return new; |
4667 | } | 4670 | } |
4668 | 4671 | ||
4669 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | 4672 | struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
4673 | u64 start) | ||
4670 | { | 4674 | { |
4671 | struct extent_buffer *eb; | 4675 | struct extent_buffer *eb; |
4672 | unsigned long num_pages = num_extent_pages(0, len); | 4676 | unsigned long len; |
4677 | unsigned long num_pages; | ||
4673 | unsigned long i; | 4678 | unsigned long i; |
4674 | 4679 | ||
4675 | eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); | 4680 | if (!fs_info) { |
4681 | /* | ||
4682 | * Called only from tests that don't always have a fs_info | ||
4683 | * available, but we know that nodesize is 4096 | ||
4684 | */ | ||
4685 | len = 4096; | ||
4686 | } else { | ||
4687 | len = fs_info->tree_root->nodesize; | ||
4688 | } | ||
4689 | num_pages = num_extent_pages(0, len); | ||
4690 | |||
4691 | eb = __alloc_extent_buffer(fs_info, start, len); | ||
4676 | if (!eb) | 4692 | if (!eb) |
4677 | return NULL; | 4693 | return NULL; |
4678 | 4694 | ||
@@ -4762,7 +4778,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4762 | 4778 | ||
4763 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 4779 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
4764 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | 4780 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
4765 | u64 start, unsigned long len) | 4781 | u64 start) |
4766 | { | 4782 | { |
4767 | struct extent_buffer *eb, *exists = NULL; | 4783 | struct extent_buffer *eb, *exists = NULL; |
4768 | int ret; | 4784 | int ret; |
@@ -4770,7 +4786,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4770 | eb = find_extent_buffer(fs_info, start); | 4786 | eb = find_extent_buffer(fs_info, start); |
4771 | if (eb) | 4787 | if (eb) |
4772 | return eb; | 4788 | return eb; |
4773 | eb = alloc_dummy_extent_buffer(start, len); | 4789 | eb = alloc_dummy_extent_buffer(fs_info, start); |
4774 | if (!eb) | 4790 | if (!eb) |
4775 | return NULL; | 4791 | return NULL; |
4776 | eb->fs_info = fs_info; | 4792 | eb->fs_info = fs_info; |
@@ -4808,8 +4824,9 @@ free_eb: | |||
4808 | #endif | 4824 | #endif |
4809 | 4825 | ||
4810 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | 4826 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
4811 | u64 start, unsigned long len) | 4827 | u64 start) |
4812 | { | 4828 | { |
4829 | unsigned long len = fs_info->tree_root->nodesize; | ||
4813 | unsigned long num_pages = num_extent_pages(start, len); | 4830 | unsigned long num_pages = num_extent_pages(start, len); |
4814 | unsigned long i; | 4831 | unsigned long i; |
4815 | unsigned long index = start >> PAGE_CACHE_SHIFT; | 4832 | unsigned long index = start >> PAGE_CACHE_SHIFT; |
@@ -4824,7 +4841,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4824 | if (eb) | 4841 | if (eb) |
4825 | return eb; | 4842 | return eb; |
4826 | 4843 | ||
4827 | eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS); | 4844 | eb = __alloc_extent_buffer(fs_info, start, len); |
4828 | if (!eb) | 4845 | if (!eb) |
4829 | return NULL; | 4846 | return NULL; |
4830 | 4847 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ece9ce87edff..695b0ccfb755 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -4,22 +4,22 @@ | |||
4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
5 | 5 | ||
6 | /* bits for the extent state */ | 6 | /* bits for the extent state */ |
7 | #define EXTENT_DIRTY 1 | 7 | #define EXTENT_DIRTY (1U << 0) |
8 | #define EXTENT_WRITEBACK (1 << 1) | 8 | #define EXTENT_WRITEBACK (1U << 1) |
9 | #define EXTENT_UPTODATE (1 << 2) | 9 | #define EXTENT_UPTODATE (1U << 2) |
10 | #define EXTENT_LOCKED (1 << 3) | 10 | #define EXTENT_LOCKED (1U << 3) |
11 | #define EXTENT_NEW (1 << 4) | 11 | #define EXTENT_NEW (1U << 4) |
12 | #define EXTENT_DELALLOC (1 << 5) | 12 | #define EXTENT_DELALLOC (1U << 5) |
13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1U << 6) |
14 | #define EXTENT_BOUNDARY (1 << 9) | 14 | #define EXTENT_BOUNDARY (1U << 9) |
15 | #define EXTENT_NODATASUM (1 << 10) | 15 | #define EXTENT_NODATASUM (1U << 10) |
16 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 16 | #define EXTENT_DO_ACCOUNTING (1U << 11) |
17 | #define EXTENT_FIRST_DELALLOC (1 << 12) | 17 | #define EXTENT_FIRST_DELALLOC (1U << 12) |
18 | #define EXTENT_NEED_WAIT (1 << 13) | 18 | #define EXTENT_NEED_WAIT (1U << 13) |
19 | #define EXTENT_DAMAGED (1 << 14) | 19 | #define EXTENT_DAMAGED (1U << 14) |
20 | #define EXTENT_NORESERVE (1 << 15) | 20 | #define EXTENT_NORESERVE (1U << 15) |
21 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 21 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
22 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 22 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * flags for bio submission. The high bits indicate the compression | 25 | * flags for bio submission. The high bits indicate the compression |
@@ -81,9 +81,9 @@ struct extent_io_ops { | |||
81 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 81 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
82 | struct extent_state *state, int uptodate); | 82 | struct extent_state *state, int uptodate); |
83 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, | 83 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
84 | unsigned long *bits); | 84 | unsigned *bits); |
85 | void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 85 | void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
86 | unsigned long *bits); | 86 | unsigned *bits); |
87 | void (*merge_extent_hook)(struct inode *inode, | 87 | void (*merge_extent_hook)(struct inode *inode, |
88 | struct extent_state *new, | 88 | struct extent_state *new, |
89 | struct extent_state *other); | 89 | struct extent_state *other); |
@@ -108,7 +108,7 @@ struct extent_state { | |||
108 | /* ADD NEW ELEMENTS AFTER THIS */ | 108 | /* ADD NEW ELEMENTS AFTER THIS */ |
109 | wait_queue_head_t wq; | 109 | wait_queue_head_t wq; |
110 | atomic_t refs; | 110 | atomic_t refs; |
111 | unsigned long state; | 111 | unsigned state; |
112 | 112 | ||
113 | /* for use by the FS */ | 113 | /* for use by the FS */ |
114 | u64 private; | 114 | u64 private; |
@@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
188 | int try_release_extent_buffer(struct page *page); | 188 | int try_release_extent_buffer(struct page *page); |
189 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); | 189 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); |
190 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 190 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
191 | unsigned long bits, struct extent_state **cached); | 191 | unsigned bits, struct extent_state **cached); |
192 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); | 192 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); |
193 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | 193 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, |
194 | struct extent_state **cached, gfp_t mask); | 194 | struct extent_state **cached, gfp_t mask); |
@@ -202,21 +202,21 @@ void extent_io_exit(void); | |||
202 | 202 | ||
203 | u64 count_range_bits(struct extent_io_tree *tree, | 203 | u64 count_range_bits(struct extent_io_tree *tree, |
204 | u64 *start, u64 search_end, | 204 | u64 *start, u64 search_end, |
205 | u64 max_bytes, unsigned long bits, int contig); | 205 | u64 max_bytes, unsigned bits, int contig); |
206 | 206 | ||
207 | void free_extent_state(struct extent_state *state); | 207 | void free_extent_state(struct extent_state *state); |
208 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 208 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
209 | unsigned long bits, int filled, | 209 | unsigned bits, int filled, |
210 | struct extent_state *cached_state); | 210 | struct extent_state *cached_state); |
211 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 211 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
212 | unsigned long bits, gfp_t mask); | 212 | unsigned bits, gfp_t mask); |
213 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 213 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
214 | unsigned long bits, int wake, int delete, | 214 | unsigned bits, int wake, int delete, |
215 | struct extent_state **cached, gfp_t mask); | 215 | struct extent_state **cached, gfp_t mask); |
216 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 216 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
217 | unsigned long bits, gfp_t mask); | 217 | unsigned bits, gfp_t mask); |
218 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 218 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
219 | unsigned long bits, u64 *failed_start, | 219 | unsigned bits, u64 *failed_start, |
220 | struct extent_state **cached_state, gfp_t mask); | 220 | struct extent_state **cached_state, gfp_t mask); |
221 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 221 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
222 | struct extent_state **cached_state, gfp_t mask); | 222 | struct extent_state **cached_state, gfp_t mask); |
@@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
229 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 229 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
230 | gfp_t mask); | 230 | gfp_t mask); |
231 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 231 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
232 | unsigned long bits, unsigned long clear_bits, | 232 | unsigned bits, unsigned clear_bits, |
233 | struct extent_state **cached_state, gfp_t mask); | 233 | struct extent_state **cached_state, gfp_t mask); |
234 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 234 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
235 | struct extent_state **cached_state, gfp_t mask); | 235 | struct extent_state **cached_state, gfp_t mask); |
236 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | 236 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, |
237 | struct extent_state **cached_state, gfp_t mask); | 237 | struct extent_state **cached_state, gfp_t mask); |
238 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 238 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
239 | u64 *start_ret, u64 *end_ret, unsigned long bits, | 239 | u64 *start_ret, u64 *end_ret, unsigned bits, |
240 | struct extent_state **cached_state); | 240 | struct extent_state **cached_state); |
241 | int extent_invalidatepage(struct extent_io_tree *tree, | 241 | int extent_invalidatepage(struct extent_io_tree *tree, |
242 | struct page *page, unsigned long offset); | 242 | struct page *page, unsigned long offset); |
@@ -262,8 +262,9 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); | |||
262 | void set_page_extent_mapped(struct page *page); | 262 | void set_page_extent_mapped(struct page *page); |
263 | 263 | ||
264 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | 264 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
265 | u64 start, unsigned long len); | 265 | u64 start); |
266 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); | 266 | struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
267 | u64 start); | ||
267 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); | 268 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); |
268 | struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | 269 | struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, |
269 | u64 start); | 270 | u64 start); |
@@ -322,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); | |||
322 | int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); | 323 | int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); |
323 | int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, | 324 | int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, |
324 | struct page *locked_page, | 325 | struct page *locked_page, |
325 | unsigned long bits_to_clear, | 326 | unsigned bits_to_clear, |
326 | unsigned long page_ops); | 327 | unsigned long page_ops); |
327 | struct bio * | 328 | struct bio * |
328 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 329 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
@@ -377,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
377 | u64 *end, u64 max_bytes); | 378 | u64 *end, u64 max_bytes); |
378 | #endif | 379 | #endif |
379 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | 380 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
380 | u64 start, unsigned long len); | 381 | u64 start); |
381 | #endif | 382 | #endif |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d6c03f7f136b..a71978578fa7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -651,15 +651,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
651 | struct io_ctl io_ctl; | 651 | struct io_ctl io_ctl; |
652 | struct btrfs_key key; | 652 | struct btrfs_key key; |
653 | struct btrfs_free_space *e, *n; | 653 | struct btrfs_free_space *e, *n; |
654 | struct list_head bitmaps; | 654 | LIST_HEAD(bitmaps); |
655 | u64 num_entries; | 655 | u64 num_entries; |
656 | u64 num_bitmaps; | 656 | u64 num_bitmaps; |
657 | u64 generation; | 657 | u64 generation; |
658 | u8 type; | 658 | u8 type; |
659 | int ret = 0; | 659 | int ret = 0; |
660 | 660 | ||
661 | INIT_LIST_HEAD(&bitmaps); | ||
662 | |||
663 | /* Nothing in the space cache, goodbye */ | 661 | /* Nothing in the space cache, goodbye */ |
664 | if (!i_size_read(inode)) | 662 | if (!i_size_read(inode)) |
665 | return 0; | 663 | return 0; |
@@ -1243,6 +1241,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
1243 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 1241 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
1244 | struct inode *inode; | 1242 | struct inode *inode; |
1245 | int ret = 0; | 1243 | int ret = 0; |
1244 | enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN; | ||
1246 | 1245 | ||
1247 | root = root->fs_info->tree_root; | 1246 | root = root->fs_info->tree_root; |
1248 | 1247 | ||
@@ -1266,9 +1265,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
1266 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, | 1265 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, |
1267 | path, block_group->key.objectid); | 1266 | path, block_group->key.objectid); |
1268 | if (ret) { | 1267 | if (ret) { |
1269 | spin_lock(&block_group->lock); | 1268 | dcs = BTRFS_DC_ERROR; |
1270 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
1271 | spin_unlock(&block_group->lock); | ||
1272 | ret = 0; | 1269 | ret = 0; |
1273 | #ifdef DEBUG | 1270 | #ifdef DEBUG |
1274 | btrfs_err(root->fs_info, | 1271 | btrfs_err(root->fs_info, |
@@ -1277,6 +1274,9 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
1277 | #endif | 1274 | #endif |
1278 | } | 1275 | } |
1279 | 1276 | ||
1277 | spin_lock(&block_group->lock); | ||
1278 | block_group->disk_cache_state = dcs; | ||
1279 | spin_unlock(&block_group->lock); | ||
1280 | iput(inode); | 1280 | iput(inode); |
1281 | return ret; | 1281 | return ret; |
1282 | } | 1282 | } |
@@ -2903,7 +2903,6 @@ int btrfs_find_space_cluster(struct btrfs_root *root, | |||
2903 | trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, | 2903 | trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, |
2904 | min_bytes); | 2904 | min_bytes); |
2905 | 2905 | ||
2906 | INIT_LIST_HEAD(&bitmaps); | ||
2907 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, | 2906 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, |
2908 | bytes + empty_size, | 2907 | bytes + empty_size, |
2909 | cont1_bytes, min_bytes); | 2908 | cont1_bytes, min_bytes); |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 8ffa4783cbf4..265e03c73f4d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -344,6 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
344 | return -ENOMEM; | 344 | return -ENOMEM; |
345 | 345 | ||
346 | path->leave_spinning = 1; | 346 | path->leave_spinning = 1; |
347 | path->skip_release_on_error = 1; | ||
347 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 348 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
348 | ins_len); | 349 | ins_len); |
349 | if (ret == -EEXIST) { | 350 | if (ret == -EEXIST) { |
@@ -362,8 +363,12 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
362 | ptr = (unsigned long)(ref + 1); | 363 | ptr = (unsigned long)(ref + 1); |
363 | ret = 0; | 364 | ret = 0; |
364 | } else if (ret < 0) { | 365 | } else if (ret < 0) { |
365 | if (ret == -EOVERFLOW) | 366 | if (ret == -EOVERFLOW) { |
366 | ret = -EMLINK; | 367 | if (find_name_in_backref(path, name, name_len, &ref)) |
368 | ret = -EEXIST; | ||
369 | else | ||
370 | ret = -EMLINK; | ||
371 | } | ||
367 | goto out; | 372 | goto out; |
368 | } else { | 373 | } else { |
369 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 374 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 54bcf639d1cf..a85c23dfcddb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1530 | static void btrfs_split_extent_hook(struct inode *inode, | 1530 | static void btrfs_split_extent_hook(struct inode *inode, |
1531 | struct extent_state *orig, u64 split) | 1531 | struct extent_state *orig, u64 split) |
1532 | { | 1532 | { |
1533 | u64 size; | ||
1534 | |||
1533 | /* not delalloc, ignore it */ | 1535 | /* not delalloc, ignore it */ |
1534 | if (!(orig->state & EXTENT_DELALLOC)) | 1536 | if (!(orig->state & EXTENT_DELALLOC)) |
1535 | return; | 1537 | return; |
1536 | 1538 | ||
1539 | size = orig->end - orig->start + 1; | ||
1540 | if (size > BTRFS_MAX_EXTENT_SIZE) { | ||
1541 | u64 num_extents; | ||
1542 | u64 new_size; | ||
1543 | |||
1544 | /* | ||
1545 | * We need the largest size of the remaining extent to see if we | ||
1546 | * need to add a new outstanding extent. Think of the following | ||
1547 | * case | ||
1548 | * | ||
1549 | * [MEAX_EXTENT_SIZEx2 - 4k][4k] | ||
1550 | * | ||
1551 | * The new_size would just be 4k and we'd think we had enough | ||
1552 | * outstanding extents for this if we only took one side of the | ||
1553 | * split, same goes for the other direction. We need to see if | ||
1554 | * the larger size still is the same amount of extents as the | ||
1555 | * original size, because if it is we need to add a new | ||
1556 | * outstanding extent. But if we split up and the larger size | ||
1557 | * is less than the original then we are good to go since we've | ||
1558 | * already accounted for the extra extent in our original | ||
1559 | * accounting. | ||
1560 | */ | ||
1561 | new_size = orig->end - split + 1; | ||
1562 | if ((split - orig->start) > new_size) | ||
1563 | new_size = split - orig->start; | ||
1564 | |||
1565 | num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
1566 | BTRFS_MAX_EXTENT_SIZE); | ||
1567 | if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
1568 | BTRFS_MAX_EXTENT_SIZE) < num_extents) | ||
1569 | return; | ||
1570 | } | ||
1571 | |||
1537 | spin_lock(&BTRFS_I(inode)->lock); | 1572 | spin_lock(&BTRFS_I(inode)->lock); |
1538 | BTRFS_I(inode)->outstanding_extents++; | 1573 | BTRFS_I(inode)->outstanding_extents++; |
1539 | spin_unlock(&BTRFS_I(inode)->lock); | 1574 | spin_unlock(&BTRFS_I(inode)->lock); |
@@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode, | |||
1549 | struct extent_state *new, | 1584 | struct extent_state *new, |
1550 | struct extent_state *other) | 1585 | struct extent_state *other) |
1551 | { | 1586 | { |
1587 | u64 new_size, old_size; | ||
1588 | u64 num_extents; | ||
1589 | |||
1552 | /* not delalloc, ignore it */ | 1590 | /* not delalloc, ignore it */ |
1553 | if (!(other->state & EXTENT_DELALLOC)) | 1591 | if (!(other->state & EXTENT_DELALLOC)) |
1554 | return; | 1592 | return; |
1555 | 1593 | ||
1594 | old_size = other->end - other->start + 1; | ||
1595 | new_size = old_size + (new->end - new->start + 1); | ||
1596 | |||
1597 | /* we're not bigger than the max, unreserve the space and go */ | ||
1598 | if (new_size <= BTRFS_MAX_EXTENT_SIZE) { | ||
1599 | spin_lock(&BTRFS_I(inode)->lock); | ||
1600 | BTRFS_I(inode)->outstanding_extents--; | ||
1601 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1602 | return; | ||
1603 | } | ||
1604 | |||
1605 | /* | ||
1606 | * If we grew by another max_extent, just return, we want to keep that | ||
1607 | * reserved amount. | ||
1608 | */ | ||
1609 | num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
1610 | BTRFS_MAX_EXTENT_SIZE); | ||
1611 | if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
1612 | BTRFS_MAX_EXTENT_SIZE) > num_extents) | ||
1613 | return; | ||
1614 | |||
1556 | spin_lock(&BTRFS_I(inode)->lock); | 1615 | spin_lock(&BTRFS_I(inode)->lock); |
1557 | BTRFS_I(inode)->outstanding_extents--; | 1616 | BTRFS_I(inode)->outstanding_extents--; |
1558 | spin_unlock(&BTRFS_I(inode)->lock); | 1617 | spin_unlock(&BTRFS_I(inode)->lock); |
@@ -1604,7 +1663,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, | |||
1604 | * have pending delalloc work to be done. | 1663 | * have pending delalloc work to be done. |
1605 | */ | 1664 | */ |
1606 | static void btrfs_set_bit_hook(struct inode *inode, | 1665 | static void btrfs_set_bit_hook(struct inode *inode, |
1607 | struct extent_state *state, unsigned long *bits) | 1666 | struct extent_state *state, unsigned *bits) |
1608 | { | 1667 | { |
1609 | 1668 | ||
1610 | if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) | 1669 | if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) |
@@ -1645,9 +1704,11 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1645 | */ | 1704 | */ |
1646 | static void btrfs_clear_bit_hook(struct inode *inode, | 1705 | static void btrfs_clear_bit_hook(struct inode *inode, |
1647 | struct extent_state *state, | 1706 | struct extent_state *state, |
1648 | unsigned long *bits) | 1707 | unsigned *bits) |
1649 | { | 1708 | { |
1650 | u64 len = state->end + 1 - state->start; | 1709 | u64 len = state->end + 1 - state->start; |
1710 | u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1, | ||
1711 | BTRFS_MAX_EXTENT_SIZE); | ||
1651 | 1712 | ||
1652 | spin_lock(&BTRFS_I(inode)->lock); | 1713 | spin_lock(&BTRFS_I(inode)->lock); |
1653 | if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) | 1714 | if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) |
@@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1667 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1728 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1668 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { | 1729 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { |
1669 | spin_lock(&BTRFS_I(inode)->lock); | 1730 | spin_lock(&BTRFS_I(inode)->lock); |
1670 | BTRFS_I(inode)->outstanding_extents--; | 1731 | BTRFS_I(inode)->outstanding_extents -= num_extents; |
1671 | spin_unlock(&BTRFS_I(inode)->lock); | 1732 | spin_unlock(&BTRFS_I(inode)->lock); |
1672 | } | 1733 | } |
1673 | 1734 | ||
@@ -2945,7 +3006,7 @@ static int __readpage_endio_check(struct inode *inode, | |||
2945 | return 0; | 3006 | return 0; |
2946 | zeroit: | 3007 | zeroit: |
2947 | if (__ratelimit(&_rs)) | 3008 | if (__ratelimit(&_rs)) |
2948 | btrfs_info(BTRFS_I(inode)->root->fs_info, | 3009 | btrfs_warn(BTRFS_I(inode)->root->fs_info, |
2949 | "csum failed ino %llu off %llu csum %u expected csum %u", | 3010 | "csum failed ino %llu off %llu csum %u expected csum %u", |
2950 | btrfs_ino(inode), start, csum, csum_expected); | 3011 | btrfs_ino(inode), start, csum, csum_expected); |
2951 | memset(kaddr + pgoff, 1, len); | 3012 | memset(kaddr + pgoff, 1, len); |
@@ -3407,7 +3468,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3407 | 3468 | ||
3408 | out: | 3469 | out: |
3409 | if (ret) | 3470 | if (ret) |
3410 | btrfs_crit(root->fs_info, | 3471 | btrfs_err(root->fs_info, |
3411 | "could not do orphan cleanup %d", ret); | 3472 | "could not do orphan cleanup %d", ret); |
3412 | btrfs_free_path(path); | 3473 | btrfs_free_path(path); |
3413 | return ret; | 3474 | return ret; |
@@ -3490,7 +3551,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
3490 | struct btrfs_path *path; | 3551 | struct btrfs_path *path; |
3491 | struct extent_buffer *leaf; | 3552 | struct extent_buffer *leaf; |
3492 | struct btrfs_inode_item *inode_item; | 3553 | struct btrfs_inode_item *inode_item; |
3493 | struct btrfs_timespec *tspec; | ||
3494 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3554 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3495 | struct btrfs_key location; | 3555 | struct btrfs_key location; |
3496 | unsigned long ptr; | 3556 | unsigned long ptr; |
@@ -3527,17 +3587,19 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
3527 | i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); | 3587 | i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); |
3528 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); | 3588 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); |
3529 | 3589 | ||
3530 | tspec = btrfs_inode_atime(inode_item); | 3590 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime); |
3531 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); | 3591 | inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime); |
3532 | inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); | 3592 | |
3593 | inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime); | ||
3594 | inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime); | ||
3533 | 3595 | ||
3534 | tspec = btrfs_inode_mtime(inode_item); | 3596 | inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime); |
3535 | inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); | 3597 | inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime); |
3536 | inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); | ||
3537 | 3598 | ||
3538 | tspec = btrfs_inode_ctime(inode_item); | 3599 | BTRFS_I(inode)->i_otime.tv_sec = |
3539 | inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); | 3600 | btrfs_timespec_sec(leaf, &inode_item->otime); |
3540 | inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); | 3601 | BTRFS_I(inode)->i_otime.tv_nsec = |
3602 | btrfs_timespec_nsec(leaf, &inode_item->otime); | ||
3541 | 3603 | ||
3542 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 3604 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
3543 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 3605 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
@@ -3656,21 +3718,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
3656 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); | 3718 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); |
3657 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); | 3719 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); |
3658 | 3720 | ||
3659 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), | 3721 | btrfs_set_token_timespec_sec(leaf, &item->atime, |
3660 | inode->i_atime.tv_sec, &token); | 3722 | inode->i_atime.tv_sec, &token); |
3661 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), | 3723 | btrfs_set_token_timespec_nsec(leaf, &item->atime, |
3662 | inode->i_atime.tv_nsec, &token); | 3724 | inode->i_atime.tv_nsec, &token); |
3663 | 3725 | ||
3664 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), | 3726 | btrfs_set_token_timespec_sec(leaf, &item->mtime, |
3665 | inode->i_mtime.tv_sec, &token); | 3727 | inode->i_mtime.tv_sec, &token); |
3666 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), | 3728 | btrfs_set_token_timespec_nsec(leaf, &item->mtime, |
3667 | inode->i_mtime.tv_nsec, &token); | 3729 | inode->i_mtime.tv_nsec, &token); |
3668 | 3730 | ||
3669 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), | 3731 | btrfs_set_token_timespec_sec(leaf, &item->ctime, |
3670 | inode->i_ctime.tv_sec, &token); | 3732 | inode->i_ctime.tv_sec, &token); |
3671 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), | 3733 | btrfs_set_token_timespec_nsec(leaf, &item->ctime, |
3672 | inode->i_ctime.tv_nsec, &token); | 3734 | inode->i_ctime.tv_nsec, &token); |
3673 | 3735 | ||
3736 | btrfs_set_token_timespec_sec(leaf, &item->otime, | ||
3737 | BTRFS_I(inode)->i_otime.tv_sec, &token); | ||
3738 | btrfs_set_token_timespec_nsec(leaf, &item->otime, | ||
3739 | BTRFS_I(inode)->i_otime.tv_nsec, &token); | ||
3740 | |||
3674 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), | 3741 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), |
3675 | &token); | 3742 | &token); |
3676 | btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, | 3743 | btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, |
@@ -5007,6 +5074,7 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
5007 | struct btrfs_root *new_root; | 5074 | struct btrfs_root *new_root; |
5008 | struct btrfs_root_ref *ref; | 5075 | struct btrfs_root_ref *ref; |
5009 | struct extent_buffer *leaf; | 5076 | struct extent_buffer *leaf; |
5077 | struct btrfs_key key; | ||
5010 | int ret; | 5078 | int ret; |
5011 | int err = 0; | 5079 | int err = 0; |
5012 | 5080 | ||
@@ -5017,9 +5085,12 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
5017 | } | 5085 | } |
5018 | 5086 | ||
5019 | err = -ENOENT; | 5087 | err = -ENOENT; |
5020 | ret = btrfs_find_item(root->fs_info->tree_root, path, | 5088 | key.objectid = BTRFS_I(dir)->root->root_key.objectid; |
5021 | BTRFS_I(dir)->root->root_key.objectid, | 5089 | key.type = BTRFS_ROOT_REF_KEY; |
5022 | location->objectid, BTRFS_ROOT_REF_KEY, NULL); | 5090 | key.offset = location->objectid; |
5091 | |||
5092 | ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path, | ||
5093 | 0, 0); | ||
5023 | if (ret) { | 5094 | if (ret) { |
5024 | if (ret < 0) | 5095 | if (ret < 0) |
5025 | err = ret; | 5096 | err = ret; |
@@ -5258,7 +5329,10 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
5258 | inode->i_op = &btrfs_dir_ro_inode_operations; | 5329 | inode->i_op = &btrfs_dir_ro_inode_operations; |
5259 | inode->i_fop = &simple_dir_operations; | 5330 | inode->i_fop = &simple_dir_operations; |
5260 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; | 5331 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; |
5261 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 5332 | inode->i_mtime = CURRENT_TIME; |
5333 | inode->i_atime = inode->i_mtime; | ||
5334 | inode->i_ctime = inode->i_mtime; | ||
5335 | BTRFS_I(inode)->i_otime = inode->i_mtime; | ||
5262 | 5336 | ||
5263 | return inode; | 5337 | return inode; |
5264 | } | 5338 | } |
@@ -5826,7 +5900,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5826 | 5900 | ||
5827 | inode_init_owner(inode, dir, mode); | 5901 | inode_init_owner(inode, dir, mode); |
5828 | inode_set_bytes(inode, 0); | 5902 | inode_set_bytes(inode, 0); |
5829 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 5903 | |
5904 | inode->i_mtime = CURRENT_TIME; | ||
5905 | inode->i_atime = inode->i_mtime; | ||
5906 | inode->i_ctime = inode->i_mtime; | ||
5907 | BTRFS_I(inode)->i_otime = inode->i_mtime; | ||
5908 | |||
5830 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 5909 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
5831 | struct btrfs_inode_item); | 5910 | struct btrfs_inode_item); |
5832 | memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, | 5911 | memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, |
@@ -7134,11 +7213,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7134 | u64 start = iblock << inode->i_blkbits; | 7213 | u64 start = iblock << inode->i_blkbits; |
7135 | u64 lockstart, lockend; | 7214 | u64 lockstart, lockend; |
7136 | u64 len = bh_result->b_size; | 7215 | u64 len = bh_result->b_size; |
7216 | u64 orig_len = len; | ||
7137 | int unlock_bits = EXTENT_LOCKED; | 7217 | int unlock_bits = EXTENT_LOCKED; |
7138 | int ret = 0; | 7218 | int ret = 0; |
7139 | 7219 | ||
7140 | if (create) | 7220 | if (create) |
7141 | unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; | 7221 | unlock_bits |= EXTENT_DIRTY; |
7142 | else | 7222 | else |
7143 | len = min_t(u64, len, root->sectorsize); | 7223 | len = min_t(u64, len, root->sectorsize); |
7144 | 7224 | ||
@@ -7269,14 +7349,12 @@ unlock: | |||
7269 | if (start + len > i_size_read(inode)) | 7349 | if (start + len > i_size_read(inode)) |
7270 | i_size_write(inode, start + len); | 7350 | i_size_write(inode, start + len); |
7271 | 7351 | ||
7272 | spin_lock(&BTRFS_I(inode)->lock); | 7352 | if (len < orig_len) { |
7273 | BTRFS_I(inode)->outstanding_extents++; | 7353 | spin_lock(&BTRFS_I(inode)->lock); |
7274 | spin_unlock(&BTRFS_I(inode)->lock); | 7354 | BTRFS_I(inode)->outstanding_extents++; |
7275 | 7355 | spin_unlock(&BTRFS_I(inode)->lock); | |
7276 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 7356 | } |
7277 | lockstart + len - 1, EXTENT_DELALLOC, NULL, | 7357 | btrfs_free_reserved_data_space(inode, len); |
7278 | &cached_state, GFP_NOFS); | ||
7279 | BUG_ON(ret); | ||
7280 | } | 7358 | } |
7281 | 7359 | ||
7282 | /* | 7360 | /* |
@@ -7805,8 +7883,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7805 | } | 7883 | } |
7806 | 7884 | ||
7807 | /* async crcs make it difficult to collect full stripe writes. */ | 7885 | /* async crcs make it difficult to collect full stripe writes. */ |
7808 | if (btrfs_get_alloc_profile(root, 1) & | 7886 | if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK) |
7809 | (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) | ||
7810 | async_submit = 0; | 7887 | async_submit = 0; |
7811 | else | 7888 | else |
7812 | async_submit = 1; | 7889 | async_submit = 1; |
@@ -8053,8 +8130,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
8053 | else if (ret >= 0 && (size_t)ret < count) | 8130 | else if (ret >= 0 && (size_t)ret < count) |
8054 | btrfs_delalloc_release_space(inode, | 8131 | btrfs_delalloc_release_space(inode, |
8055 | count - (size_t)ret); | 8132 | count - (size_t)ret); |
8056 | else | ||
8057 | btrfs_delalloc_release_metadata(inode, 0); | ||
8058 | } | 8133 | } |
8059 | out: | 8134 | out: |
8060 | if (wakeup) | 8135 | if (wakeup) |
@@ -8575,6 +8650,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
8575 | 8650 | ||
8576 | ei->delayed_node = NULL; | 8651 | ei->delayed_node = NULL; |
8577 | 8652 | ||
8653 | ei->i_otime.tv_sec = 0; | ||
8654 | ei->i_otime.tv_nsec = 0; | ||
8655 | |||
8578 | inode = &ei->vfs_inode; | 8656 | inode = &ei->vfs_inode; |
8579 | extent_map_tree_init(&ei->extent_tree); | 8657 | extent_map_tree_init(&ei->extent_tree); |
8580 | extent_io_tree_init(&ei->io_tree, &inode->i_data); | 8658 | extent_io_tree_init(&ei->io_tree, &inode->i_data); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 48b60dbf807f..97159a8e91d4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1431,9 +1431,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | |||
1431 | qgroup = u64_to_ptr(unode->aux); | 1431 | qgroup = u64_to_ptr(unode->aux); |
1432 | qgroup->rfer += sign * oper->num_bytes; | 1432 | qgroup->rfer += sign * oper->num_bytes; |
1433 | qgroup->rfer_cmpr += sign * oper->num_bytes; | 1433 | qgroup->rfer_cmpr += sign * oper->num_bytes; |
1434 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | ||
1434 | qgroup->excl += sign * oper->num_bytes; | 1435 | qgroup->excl += sign * oper->num_bytes; |
1435 | if (sign < 0) | ||
1436 | WARN_ON(qgroup->excl < oper->num_bytes); | ||
1437 | qgroup->excl_cmpr += sign * oper->num_bytes; | 1436 | qgroup->excl_cmpr += sign * oper->num_bytes; |
1438 | qgroup_dirty(fs_info, qgroup); | 1437 | qgroup_dirty(fs_info, qgroup); |
1439 | 1438 | ||
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 8ab2a17bbba8..5264858ed768 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -58,15 +58,6 @@ | |||
58 | */ | 58 | */ |
59 | #define RBIO_CACHE_READY_BIT 3 | 59 | #define RBIO_CACHE_READY_BIT 3 |
60 | 60 | ||
61 | /* | ||
62 | * bbio and raid_map is managed by the caller, so we shouldn't free | ||
63 | * them here. And besides that, all rbios with this flag should not | ||
64 | * be cached, because we need raid_map to check the rbios' stripe | ||
65 | * is the same or not, but it is very likely that the caller has | ||
66 | * free raid_map, so don't cache those rbios. | ||
67 | */ | ||
68 | #define RBIO_HOLD_BBIO_MAP_BIT 4 | ||
69 | |||
70 | #define RBIO_CACHE_SIZE 1024 | 61 | #define RBIO_CACHE_SIZE 1024 |
71 | 62 | ||
72 | enum btrfs_rbio_ops { | 63 | enum btrfs_rbio_ops { |
@@ -79,13 +70,6 @@ struct btrfs_raid_bio { | |||
79 | struct btrfs_fs_info *fs_info; | 70 | struct btrfs_fs_info *fs_info; |
80 | struct btrfs_bio *bbio; | 71 | struct btrfs_bio *bbio; |
81 | 72 | ||
82 | /* | ||
83 | * logical block numbers for the start of each stripe | ||
84 | * The last one or two are p/q. These are sorted, | ||
85 | * so raid_map[0] is the start of our full stripe | ||
86 | */ | ||
87 | u64 *raid_map; | ||
88 | |||
89 | /* while we're doing rmw on a stripe | 73 | /* while we're doing rmw on a stripe |
90 | * we put it into a hash table so we can | 74 | * we put it into a hash table so we can |
91 | * lock the stripe and merge more rbios | 75 | * lock the stripe and merge more rbios |
@@ -303,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) | |||
303 | */ | 287 | */ |
304 | static int rbio_bucket(struct btrfs_raid_bio *rbio) | 288 | static int rbio_bucket(struct btrfs_raid_bio *rbio) |
305 | { | 289 | { |
306 | u64 num = rbio->raid_map[0]; | 290 | u64 num = rbio->bbio->raid_map[0]; |
307 | 291 | ||
308 | /* | 292 | /* |
309 | * we shift down quite a bit. We're using byte | 293 | * we shift down quite a bit. We're using byte |
@@ -606,8 +590,8 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, | |||
606 | test_bit(RBIO_CACHE_BIT, &cur->flags)) | 590 | test_bit(RBIO_CACHE_BIT, &cur->flags)) |
607 | return 0; | 591 | return 0; |
608 | 592 | ||
609 | if (last->raid_map[0] != | 593 | if (last->bbio->raid_map[0] != |
610 | cur->raid_map[0]) | 594 | cur->bbio->raid_map[0]) |
611 | return 0; | 595 | return 0; |
612 | 596 | ||
613 | /* we can't merge with different operations */ | 597 | /* we can't merge with different operations */ |
@@ -689,7 +673,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) | |||
689 | spin_lock_irqsave(&h->lock, flags); | 673 | spin_lock_irqsave(&h->lock, flags); |
690 | list_for_each_entry(cur, &h->hash_list, hash_list) { | 674 | list_for_each_entry(cur, &h->hash_list, hash_list) { |
691 | walk++; | 675 | walk++; |
692 | if (cur->raid_map[0] == rbio->raid_map[0]) { | 676 | if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) { |
693 | spin_lock(&cur->bio_list_lock); | 677 | spin_lock(&cur->bio_list_lock); |
694 | 678 | ||
695 | /* can we steal this cached rbio's pages? */ | 679 | /* can we steal this cached rbio's pages? */ |
@@ -841,21 +825,6 @@ done_nolock: | |||
841 | remove_rbio_from_cache(rbio); | 825 | remove_rbio_from_cache(rbio); |
842 | } | 826 | } |
843 | 827 | ||
844 | static inline void | ||
845 | __free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need) | ||
846 | { | ||
847 | if (need) { | ||
848 | kfree(raid_map); | ||
849 | kfree(bbio); | ||
850 | } | ||
851 | } | ||
852 | |||
853 | static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio) | ||
854 | { | ||
855 | __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map, | ||
856 | !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)); | ||
857 | } | ||
858 | |||
859 | static void __free_raid_bio(struct btrfs_raid_bio *rbio) | 828 | static void __free_raid_bio(struct btrfs_raid_bio *rbio) |
860 | { | 829 | { |
861 | int i; | 830 | int i; |
@@ -875,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) | |||
875 | } | 844 | } |
876 | } | 845 | } |
877 | 846 | ||
878 | free_bbio_and_raid_map(rbio); | 847 | btrfs_put_bbio(rbio->bbio); |
879 | |||
880 | kfree(rbio); | 848 | kfree(rbio); |
881 | } | 849 | } |
882 | 850 | ||
@@ -985,8 +953,7 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) | |||
985 | * this does not allocate any pages for rbio->pages. | 953 | * this does not allocate any pages for rbio->pages. |
986 | */ | 954 | */ |
987 | static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | 955 | static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, |
988 | struct btrfs_bio *bbio, u64 *raid_map, | 956 | struct btrfs_bio *bbio, u64 stripe_len) |
989 | u64 stripe_len) | ||
990 | { | 957 | { |
991 | struct btrfs_raid_bio *rbio; | 958 | struct btrfs_raid_bio *rbio; |
992 | int nr_data = 0; | 959 | int nr_data = 0; |
@@ -1007,7 +974,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
1007 | INIT_LIST_HEAD(&rbio->stripe_cache); | 974 | INIT_LIST_HEAD(&rbio->stripe_cache); |
1008 | INIT_LIST_HEAD(&rbio->hash_list); | 975 | INIT_LIST_HEAD(&rbio->hash_list); |
1009 | rbio->bbio = bbio; | 976 | rbio->bbio = bbio; |
1010 | rbio->raid_map = raid_map; | ||
1011 | rbio->fs_info = root->fs_info; | 977 | rbio->fs_info = root->fs_info; |
1012 | rbio->stripe_len = stripe_len; | 978 | rbio->stripe_len = stripe_len; |
1013 | rbio->nr_pages = num_pages; | 979 | rbio->nr_pages = num_pages; |
@@ -1028,10 +994,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
1028 | rbio->bio_pages = p + sizeof(struct page *) * num_pages; | 994 | rbio->bio_pages = p + sizeof(struct page *) * num_pages; |
1029 | rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; | 995 | rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; |
1030 | 996 | ||
1031 | if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) | 997 | if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) |
998 | nr_data = real_stripes - 1; | ||
999 | else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) | ||
1032 | nr_data = real_stripes - 2; | 1000 | nr_data = real_stripes - 2; |
1033 | else | 1001 | else |
1034 | nr_data = real_stripes - 1; | 1002 | BUG(); |
1035 | 1003 | ||
1036 | rbio->nr_data = nr_data; | 1004 | rbio->nr_data = nr_data; |
1037 | return rbio; | 1005 | return rbio; |
@@ -1182,7 +1150,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) | |||
1182 | spin_lock_irq(&rbio->bio_list_lock); | 1150 | spin_lock_irq(&rbio->bio_list_lock); |
1183 | bio_list_for_each(bio, &rbio->bio_list) { | 1151 | bio_list_for_each(bio, &rbio->bio_list) { |
1184 | start = (u64)bio->bi_iter.bi_sector << 9; | 1152 | start = (u64)bio->bi_iter.bi_sector << 9; |
1185 | stripe_offset = start - rbio->raid_map[0]; | 1153 | stripe_offset = start - rbio->bbio->raid_map[0]; |
1186 | page_index = stripe_offset >> PAGE_CACHE_SHIFT; | 1154 | page_index = stripe_offset >> PAGE_CACHE_SHIFT; |
1187 | 1155 | ||
1188 | for (i = 0; i < bio->bi_vcnt; i++) { | 1156 | for (i = 0; i < bio->bi_vcnt; i++) { |
@@ -1402,7 +1370,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, | |||
1402 | logical <<= 9; | 1370 | logical <<= 9; |
1403 | 1371 | ||
1404 | for (i = 0; i < rbio->nr_data; i++) { | 1372 | for (i = 0; i < rbio->nr_data; i++) { |
1405 | stripe_start = rbio->raid_map[i]; | 1373 | stripe_start = rbio->bbio->raid_map[i]; |
1406 | if (logical >= stripe_start && | 1374 | if (logical >= stripe_start && |
1407 | logical < stripe_start + rbio->stripe_len) { | 1375 | logical < stripe_start + rbio->stripe_len) { |
1408 | return i; | 1376 | return i; |
@@ -1776,17 +1744,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1776 | * our main entry point for writes from the rest of the FS. | 1744 | * our main entry point for writes from the rest of the FS. |
1777 | */ | 1745 | */ |
1778 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | 1746 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, |
1779 | struct btrfs_bio *bbio, u64 *raid_map, | 1747 | struct btrfs_bio *bbio, u64 stripe_len) |
1780 | u64 stripe_len) | ||
1781 | { | 1748 | { |
1782 | struct btrfs_raid_bio *rbio; | 1749 | struct btrfs_raid_bio *rbio; |
1783 | struct btrfs_plug_cb *plug = NULL; | 1750 | struct btrfs_plug_cb *plug = NULL; |
1784 | struct blk_plug_cb *cb; | 1751 | struct blk_plug_cb *cb; |
1785 | int ret; | 1752 | int ret; |
1786 | 1753 | ||
1787 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 1754 | rbio = alloc_rbio(root, bbio, stripe_len); |
1788 | if (IS_ERR(rbio)) { | 1755 | if (IS_ERR(rbio)) { |
1789 | __free_bbio_and_raid_map(bbio, raid_map, 1); | 1756 | btrfs_put_bbio(bbio); |
1790 | return PTR_ERR(rbio); | 1757 | return PTR_ERR(rbio); |
1791 | } | 1758 | } |
1792 | bio_list_add(&rbio->bio_list, bio); | 1759 | bio_list_add(&rbio->bio_list, bio); |
@@ -1885,9 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1885 | } | 1852 | } |
1886 | 1853 | ||
1887 | /* all raid6 handling here */ | 1854 | /* all raid6 handling here */ |
1888 | if (rbio->raid_map[rbio->real_stripes - 1] == | 1855 | if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) { |
1889 | RAID6_Q_STRIPE) { | ||
1890 | |||
1891 | /* | 1856 | /* |
1892 | * single failure, rebuild from parity raid5 | 1857 | * single failure, rebuild from parity raid5 |
1893 | * style | 1858 | * style |
@@ -1922,8 +1887,9 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1922 | * here due to a crc mismatch and we can't give them the | 1887 | * here due to a crc mismatch and we can't give them the |
1923 | * data they want | 1888 | * data they want |
1924 | */ | 1889 | */ |
1925 | if (rbio->raid_map[failb] == RAID6_Q_STRIPE) { | 1890 | if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { |
1926 | if (rbio->raid_map[faila] == RAID5_P_STRIPE) { | 1891 | if (rbio->bbio->raid_map[faila] == |
1892 | RAID5_P_STRIPE) { | ||
1927 | err = -EIO; | 1893 | err = -EIO; |
1928 | goto cleanup; | 1894 | goto cleanup; |
1929 | } | 1895 | } |
@@ -1934,7 +1900,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1934 | goto pstripe; | 1900 | goto pstripe; |
1935 | } | 1901 | } |
1936 | 1902 | ||
1937 | if (rbio->raid_map[failb] == RAID5_P_STRIPE) { | 1903 | if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) { |
1938 | raid6_datap_recov(rbio->real_stripes, | 1904 | raid6_datap_recov(rbio->real_stripes, |
1939 | PAGE_SIZE, faila, pointers); | 1905 | PAGE_SIZE, faila, pointers); |
1940 | } else { | 1906 | } else { |
@@ -2001,8 +1967,7 @@ cleanup: | |||
2001 | 1967 | ||
2002 | cleanup_io: | 1968 | cleanup_io: |
2003 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { | 1969 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { |
2004 | if (err == 0 && | 1970 | if (err == 0) |
2005 | !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)) | ||
2006 | cache_rbio_pages(rbio); | 1971 | cache_rbio_pages(rbio); |
2007 | else | 1972 | else |
2008 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); | 1973 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); |
@@ -2156,15 +2121,16 @@ cleanup: | |||
2156 | * of the drive. | 2121 | * of the drive. |
2157 | */ | 2122 | */ |
2158 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | 2123 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, |
2159 | struct btrfs_bio *bbio, u64 *raid_map, | 2124 | struct btrfs_bio *bbio, u64 stripe_len, |
2160 | u64 stripe_len, int mirror_num, int generic_io) | 2125 | int mirror_num, int generic_io) |
2161 | { | 2126 | { |
2162 | struct btrfs_raid_bio *rbio; | 2127 | struct btrfs_raid_bio *rbio; |
2163 | int ret; | 2128 | int ret; |
2164 | 2129 | ||
2165 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 2130 | rbio = alloc_rbio(root, bbio, stripe_len); |
2166 | if (IS_ERR(rbio)) { | 2131 | if (IS_ERR(rbio)) { |
2167 | __free_bbio_and_raid_map(bbio, raid_map, generic_io); | 2132 | if (generic_io) |
2133 | btrfs_put_bbio(bbio); | ||
2168 | return PTR_ERR(rbio); | 2134 | return PTR_ERR(rbio); |
2169 | } | 2135 | } |
2170 | 2136 | ||
@@ -2175,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | |||
2175 | rbio->faila = find_logical_bio_stripe(rbio, bio); | 2141 | rbio->faila = find_logical_bio_stripe(rbio, bio); |
2176 | if (rbio->faila == -1) { | 2142 | if (rbio->faila == -1) { |
2177 | BUG(); | 2143 | BUG(); |
2178 | __free_bbio_and_raid_map(bbio, raid_map, generic_io); | 2144 | if (generic_io) |
2145 | btrfs_put_bbio(bbio); | ||
2179 | kfree(rbio); | 2146 | kfree(rbio); |
2180 | return -EIO; | 2147 | return -EIO; |
2181 | } | 2148 | } |
@@ -2184,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | |||
2184 | btrfs_bio_counter_inc_noblocked(root->fs_info); | 2151 | btrfs_bio_counter_inc_noblocked(root->fs_info); |
2185 | rbio->generic_bio_cnt = 1; | 2152 | rbio->generic_bio_cnt = 1; |
2186 | } else { | 2153 | } else { |
2187 | set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); | 2154 | btrfs_get_bbio(bbio); |
2188 | } | 2155 | } |
2189 | 2156 | ||
2190 | /* | 2157 | /* |
@@ -2240,14 +2207,14 @@ static void read_rebuild_work(struct btrfs_work *work) | |||
2240 | 2207 | ||
2241 | struct btrfs_raid_bio * | 2208 | struct btrfs_raid_bio * |
2242 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, | 2209 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, |
2243 | struct btrfs_bio *bbio, u64 *raid_map, | 2210 | struct btrfs_bio *bbio, u64 stripe_len, |
2244 | u64 stripe_len, struct btrfs_device *scrub_dev, | 2211 | struct btrfs_device *scrub_dev, |
2245 | unsigned long *dbitmap, int stripe_nsectors) | 2212 | unsigned long *dbitmap, int stripe_nsectors) |
2246 | { | 2213 | { |
2247 | struct btrfs_raid_bio *rbio; | 2214 | struct btrfs_raid_bio *rbio; |
2248 | int i; | 2215 | int i; |
2249 | 2216 | ||
2250 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 2217 | rbio = alloc_rbio(root, bbio, stripe_len); |
2251 | if (IS_ERR(rbio)) | 2218 | if (IS_ERR(rbio)) |
2252 | return NULL; | 2219 | return NULL; |
2253 | bio_list_add(&rbio->bio_list, bio); | 2220 | bio_list_add(&rbio->bio_list, bio); |
@@ -2279,10 +2246,10 @@ void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, | |||
2279 | int stripe_offset; | 2246 | int stripe_offset; |
2280 | int index; | 2247 | int index; |
2281 | 2248 | ||
2282 | ASSERT(logical >= rbio->raid_map[0]); | 2249 | ASSERT(logical >= rbio->bbio->raid_map[0]); |
2283 | ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + | 2250 | ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] + |
2284 | rbio->stripe_len * rbio->nr_data); | 2251 | rbio->stripe_len * rbio->nr_data); |
2285 | stripe_offset = (int)(logical - rbio->raid_map[0]); | 2252 | stripe_offset = (int)(logical - rbio->bbio->raid_map[0]); |
2286 | index = stripe_offset >> PAGE_CACHE_SHIFT; | 2253 | index = stripe_offset >> PAGE_CACHE_SHIFT; |
2287 | rbio->bio_pages[index] = page; | 2254 | rbio->bio_pages[index] = page; |
2288 | } | 2255 | } |
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 31d4a157b5e3..2b5d7977d83b 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h | |||
@@ -43,16 +43,15 @@ struct btrfs_raid_bio; | |||
43 | struct btrfs_device; | 43 | struct btrfs_device; |
44 | 44 | ||
45 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | 45 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, |
46 | struct btrfs_bio *bbio, u64 *raid_map, | 46 | struct btrfs_bio *bbio, u64 stripe_len, |
47 | u64 stripe_len, int mirror_num, int generic_io); | 47 | int mirror_num, int generic_io); |
48 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | 48 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, |
49 | struct btrfs_bio *bbio, u64 *raid_map, | 49 | struct btrfs_bio *bbio, u64 stripe_len); |
50 | u64 stripe_len); | ||
51 | 50 | ||
52 | struct btrfs_raid_bio * | 51 | struct btrfs_raid_bio * |
53 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, | 52 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, |
54 | struct btrfs_bio *bbio, u64 *raid_map, | 53 | struct btrfs_bio *bbio, u64 stripe_len, |
55 | u64 stripe_len, struct btrfs_device *scrub_dev, | 54 | struct btrfs_device *scrub_dev, |
56 | unsigned long *dbitmap, int stripe_nsectors); | 55 | unsigned long *dbitmap, int stripe_nsectors); |
57 | void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, | 56 | void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, |
58 | struct page *page, u64 logical); | 57 | struct page *page, u64 logical); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index b63ae20618fb..0e7beea92b4c 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -66,7 +66,6 @@ struct reada_extctl { | |||
66 | struct reada_extent { | 66 | struct reada_extent { |
67 | u64 logical; | 67 | u64 logical; |
68 | struct btrfs_key top; | 68 | struct btrfs_key top; |
69 | u32 blocksize; | ||
70 | int err; | 69 | int err; |
71 | struct list_head extctl; | 70 | struct list_head extctl; |
72 | int refcnt; | 71 | int refcnt; |
@@ -349,7 +348,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
349 | 348 | ||
350 | blocksize = root->nodesize; | 349 | blocksize = root->nodesize; |
351 | re->logical = logical; | 350 | re->logical = logical; |
352 | re->blocksize = blocksize; | ||
353 | re->top = *top; | 351 | re->top = *top; |
354 | INIT_LIST_HEAD(&re->extctl); | 352 | INIT_LIST_HEAD(&re->extctl); |
355 | spin_lock_init(&re->lock); | 353 | spin_lock_init(&re->lock); |
@@ -463,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
463 | spin_unlock(&fs_info->reada_lock); | 461 | spin_unlock(&fs_info->reada_lock); |
464 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 462 | btrfs_dev_replace_unlock(&fs_info->dev_replace); |
465 | 463 | ||
466 | kfree(bbio); | 464 | btrfs_put_bbio(bbio); |
467 | return re; | 465 | return re; |
468 | 466 | ||
469 | error: | 467 | error: |
@@ -488,7 +486,7 @@ error: | |||
488 | kref_put(&zone->refcnt, reada_zone_release); | 486 | kref_put(&zone->refcnt, reada_zone_release); |
489 | spin_unlock(&fs_info->reada_lock); | 487 | spin_unlock(&fs_info->reada_lock); |
490 | } | 488 | } |
491 | kfree(bbio); | 489 | btrfs_put_bbio(bbio); |
492 | kfree(re); | 490 | kfree(re); |
493 | return re_exist; | 491 | return re_exist; |
494 | } | 492 | } |
@@ -660,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
660 | int mirror_num = 0; | 658 | int mirror_num = 0; |
661 | struct extent_buffer *eb = NULL; | 659 | struct extent_buffer *eb = NULL; |
662 | u64 logical; | 660 | u64 logical; |
663 | u32 blocksize; | ||
664 | int ret; | 661 | int ret; |
665 | int i; | 662 | int i; |
666 | int need_kick = 0; | 663 | int need_kick = 0; |
@@ -694,7 +691,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
694 | spin_unlock(&fs_info->reada_lock); | 691 | spin_unlock(&fs_info->reada_lock); |
695 | return 0; | 692 | return 0; |
696 | } | 693 | } |
697 | dev->reada_next = re->logical + re->blocksize; | 694 | dev->reada_next = re->logical + fs_info->tree_root->nodesize; |
698 | re->refcnt++; | 695 | re->refcnt++; |
699 | 696 | ||
700 | spin_unlock(&fs_info->reada_lock); | 697 | spin_unlock(&fs_info->reada_lock); |
@@ -709,7 +706,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
709 | } | 706 | } |
710 | } | 707 | } |
711 | logical = re->logical; | 708 | logical = re->logical; |
712 | blocksize = re->blocksize; | ||
713 | 709 | ||
714 | spin_lock(&re->lock); | 710 | spin_lock(&re->lock); |
715 | if (re->scheduled_for == NULL) { | 711 | if (re->scheduled_for == NULL) { |
@@ -724,8 +720,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
724 | return 0; | 720 | return 0; |
725 | 721 | ||
726 | atomic_inc(&dev->reada_in_flight); | 722 | atomic_inc(&dev->reada_in_flight); |
727 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, | 723 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, |
728 | mirror_num, &eb); | 724 | mirror_num, &eb); |
729 | if (ret) | 725 | if (ret) |
730 | __readahead_hook(fs_info->extent_root, NULL, logical, ret); | 726 | __readahead_hook(fs_info->extent_root, NULL, logical, ret); |
731 | else if (eb) | 727 | else if (eb) |
@@ -851,7 +847,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) | |||
851 | break; | 847 | break; |
852 | printk(KERN_DEBUG | 848 | printk(KERN_DEBUG |
853 | " re: logical %llu size %u empty %d for %lld", | 849 | " re: logical %llu size %u empty %d for %lld", |
854 | re->logical, re->blocksize, | 850 | re->logical, fs_info->tree_root->nodesize, |
855 | list_empty(&re->extctl), re->scheduled_for ? | 851 | list_empty(&re->extctl), re->scheduled_for ? |
856 | re->scheduled_for->devid : -1); | 852 | re->scheduled_for->devid : -1); |
857 | 853 | ||
@@ -886,7 +882,8 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) | |||
886 | } | 882 | } |
887 | printk(KERN_DEBUG | 883 | printk(KERN_DEBUG |
888 | "re: logical %llu size %u list empty %d for %lld", | 884 | "re: logical %llu size %u list empty %d for %lld", |
889 | re->logical, re->blocksize, list_empty(&re->extctl), | 885 | re->logical, fs_info->tree_root->nodesize, |
886 | list_empty(&re->extctl), | ||
890 | re->scheduled_for ? re->scheduled_for->devid : -1); | 887 | re->scheduled_for ? re->scheduled_for->devid : -1); |
891 | for (i = 0; i < re->nzones; ++i) { | 888 | for (i = 0; i < re->nzones; ++i) { |
892 | printk(KERN_CONT " zone %llu-%llu devs", | 889 | printk(KERN_CONT " zone %llu-%llu devs", |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 74257d6436ad..d83085381bcc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2855,9 +2855,10 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
2855 | } | 2855 | } |
2856 | } | 2856 | } |
2857 | 2857 | ||
2858 | static int tree_block_processed(u64 bytenr, u32 blocksize, | 2858 | static int tree_block_processed(u64 bytenr, struct reloc_control *rc) |
2859 | struct reloc_control *rc) | ||
2860 | { | 2859 | { |
2860 | u32 blocksize = rc->extent_root->nodesize; | ||
2861 | |||
2861 | if (test_range_bit(&rc->processed_blocks, bytenr, | 2862 | if (test_range_bit(&rc->processed_blocks, bytenr, |
2862 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) | 2863 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) |
2863 | return 1; | 2864 | return 1; |
@@ -2965,8 +2966,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2965 | while (rb_node) { | 2966 | while (rb_node) { |
2966 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2967 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2967 | if (!block->key_ready) | 2968 | if (!block->key_ready) |
2968 | readahead_tree_block(rc->extent_root, block->bytenr, | 2969 | readahead_tree_block(rc->extent_root, block->bytenr); |
2969 | block->key.objectid); | ||
2970 | rb_node = rb_next(rb_node); | 2970 | rb_node = rb_next(rb_node); |
2971 | } | 2971 | } |
2972 | 2972 | ||
@@ -3353,7 +3353,7 @@ static int __add_tree_block(struct reloc_control *rc, | |||
3353 | bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, | 3353 | bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, |
3354 | SKINNY_METADATA); | 3354 | SKINNY_METADATA); |
3355 | 3355 | ||
3356 | if (tree_block_processed(bytenr, blocksize, rc)) | 3356 | if (tree_block_processed(bytenr, rc)) |
3357 | return 0; | 3357 | return 0; |
3358 | 3358 | ||
3359 | if (tree_search(blocks, bytenr)) | 3359 | if (tree_search(blocks, bytenr)) |
@@ -3611,7 +3611,7 @@ static int find_data_references(struct reloc_control *rc, | |||
3611 | if (added) | 3611 | if (added) |
3612 | goto next; | 3612 | goto next; |
3613 | 3613 | ||
3614 | if (!tree_block_processed(leaf->start, leaf->len, rc)) { | 3614 | if (!tree_block_processed(leaf->start, rc)) { |
3615 | block = kmalloc(sizeof(*block), GFP_NOFS); | 3615 | block = kmalloc(sizeof(*block), GFP_NOFS); |
3616 | if (!block) { | 3616 | if (!block) { |
3617 | err = -ENOMEM; | 3617 | err = -ENOMEM; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e427cb7ee12c..ec57687c9a4d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -66,7 +66,6 @@ struct scrub_ctx; | |||
66 | struct scrub_recover { | 66 | struct scrub_recover { |
67 | atomic_t refs; | 67 | atomic_t refs; |
68 | struct btrfs_bio *bbio; | 68 | struct btrfs_bio *bbio; |
69 | u64 *raid_map; | ||
70 | u64 map_length; | 69 | u64 map_length; |
71 | }; | 70 | }; |
72 | 71 | ||
@@ -80,7 +79,7 @@ struct scrub_page { | |||
80 | u64 logical; | 79 | u64 logical; |
81 | u64 physical; | 80 | u64 physical; |
82 | u64 physical_for_dev_replace; | 81 | u64 physical_for_dev_replace; |
83 | atomic_t ref_count; | 82 | atomic_t refs; |
84 | struct { | 83 | struct { |
85 | unsigned int mirror_num:8; | 84 | unsigned int mirror_num:8; |
86 | unsigned int have_csum:1; | 85 | unsigned int have_csum:1; |
@@ -113,7 +112,7 @@ struct scrub_block { | |||
113 | struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; | 112 | struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; |
114 | int page_count; | 113 | int page_count; |
115 | atomic_t outstanding_pages; | 114 | atomic_t outstanding_pages; |
116 | atomic_t ref_count; /* free mem on transition to zero */ | 115 | atomic_t refs; /* free mem on transition to zero */ |
117 | struct scrub_ctx *sctx; | 116 | struct scrub_ctx *sctx; |
118 | struct scrub_parity *sparity; | 117 | struct scrub_parity *sparity; |
119 | struct { | 118 | struct { |
@@ -142,7 +141,7 @@ struct scrub_parity { | |||
142 | 141 | ||
143 | int stripe_len; | 142 | int stripe_len; |
144 | 143 | ||
145 | atomic_t ref_count; | 144 | atomic_t refs; |
146 | 145 | ||
147 | struct list_head spages; | 146 | struct list_head spages; |
148 | 147 | ||
@@ -194,6 +193,15 @@ struct scrub_ctx { | |||
194 | */ | 193 | */ |
195 | struct btrfs_scrub_progress stat; | 194 | struct btrfs_scrub_progress stat; |
196 | spinlock_t stat_lock; | 195 | spinlock_t stat_lock; |
196 | |||
197 | /* | ||
198 | * Use a ref counter to avoid use-after-free issues. Scrub workers | ||
199 | * decrement bios_in_flight and workers_pending and then do a wakeup | ||
200 | * on the list_wait wait queue. We must ensure the main scrub task | ||
201 | * doesn't free the scrub context before or while the workers are | ||
202 | * doing the wakeup() call. | ||
203 | */ | ||
204 | atomic_t refs; | ||
197 | }; | 205 | }; |
198 | 206 | ||
199 | struct scrub_fixup_nodatasum { | 207 | struct scrub_fixup_nodatasum { |
@@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx); | |||
236 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); | 244 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); |
237 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); | 245 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); |
238 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); | 246 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); |
239 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 247 | static int scrub_setup_recheck_block(struct scrub_block *original_sblock, |
240 | struct btrfs_fs_info *fs_info, | ||
241 | struct scrub_block *original_sblock, | ||
242 | u64 length, u64 logical, | ||
243 | struct scrub_block *sblocks_for_recheck); | 248 | struct scrub_block *sblocks_for_recheck); |
244 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 249 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
245 | struct scrub_block *sblock, int is_metadata, | 250 | struct scrub_block *sblock, int is_metadata, |
@@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
251 | const u8 *csum, u64 generation, | 256 | const u8 *csum, u64 generation, |
252 | u16 csum_size); | 257 | u16 csum_size); |
253 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | 258 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, |
254 | struct scrub_block *sblock_good, | 259 | struct scrub_block *sblock_good); |
255 | int force_write); | ||
256 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | 260 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, |
257 | struct scrub_block *sblock_good, | 261 | struct scrub_block *sblock_good, |
258 | int page_num, int force_write); | 262 | int page_num, int force_write); |
@@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
302 | static void copy_nocow_pages_worker(struct btrfs_work *work); | 306 | static void copy_nocow_pages_worker(struct btrfs_work *work); |
303 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); | 307 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); |
304 | static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); | 308 | static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); |
309 | static void scrub_put_ctx(struct scrub_ctx *sctx); | ||
305 | 310 | ||
306 | 311 | ||
307 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) | 312 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) |
308 | { | 313 | { |
314 | atomic_inc(&sctx->refs); | ||
309 | atomic_inc(&sctx->bios_in_flight); | 315 | atomic_inc(&sctx->bios_in_flight); |
310 | } | 316 | } |
311 | 317 | ||
@@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx) | |||
313 | { | 319 | { |
314 | atomic_dec(&sctx->bios_in_flight); | 320 | atomic_dec(&sctx->bios_in_flight); |
315 | wake_up(&sctx->list_wait); | 321 | wake_up(&sctx->list_wait); |
322 | scrub_put_ctx(sctx); | ||
316 | } | 323 | } |
317 | 324 | ||
318 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) | 325 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) |
@@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
346 | { | 353 | { |
347 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | 354 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
348 | 355 | ||
356 | atomic_inc(&sctx->refs); | ||
349 | /* | 357 | /* |
350 | * increment scrubs_running to prevent cancel requests from | 358 | * increment scrubs_running to prevent cancel requests from |
351 | * completing as long as a worker is running. we must also | 359 | * completing as long as a worker is running. we must also |
@@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) | |||
388 | atomic_dec(&sctx->workers_pending); | 396 | atomic_dec(&sctx->workers_pending); |
389 | wake_up(&fs_info->scrub_pause_wait); | 397 | wake_up(&fs_info->scrub_pause_wait); |
390 | wake_up(&sctx->list_wait); | 398 | wake_up(&sctx->list_wait); |
399 | scrub_put_ctx(sctx); | ||
391 | } | 400 | } |
392 | 401 | ||
393 | static void scrub_free_csums(struct scrub_ctx *sctx) | 402 | static void scrub_free_csums(struct scrub_ctx *sctx) |
@@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) | |||
433 | kfree(sctx); | 442 | kfree(sctx); |
434 | } | 443 | } |
435 | 444 | ||
445 | static void scrub_put_ctx(struct scrub_ctx *sctx) | ||
446 | { | ||
447 | if (atomic_dec_and_test(&sctx->refs)) | ||
448 | scrub_free_ctx(sctx); | ||
449 | } | ||
450 | |||
436 | static noinline_for_stack | 451 | static noinline_for_stack |
437 | struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | 452 | struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) |
438 | { | 453 | { |
@@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
457 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); | 472 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); |
458 | if (!sctx) | 473 | if (!sctx) |
459 | goto nomem; | 474 | goto nomem; |
475 | atomic_set(&sctx->refs, 1); | ||
460 | sctx->is_dev_replace = is_dev_replace; | 476 | sctx->is_dev_replace = is_dev_replace; |
461 | sctx->pages_per_rd_bio = pages_per_rd_bio; | 477 | sctx->pages_per_rd_bio = pages_per_rd_bio; |
462 | sctx->curr = -1; | 478 | sctx->curr = -1; |
@@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
520 | struct inode_fs_paths *ipath = NULL; | 536 | struct inode_fs_paths *ipath = NULL; |
521 | struct btrfs_root *local_root; | 537 | struct btrfs_root *local_root; |
522 | struct btrfs_key root_key; | 538 | struct btrfs_key root_key; |
539 | struct btrfs_key key; | ||
523 | 540 | ||
524 | root_key.objectid = root; | 541 | root_key.objectid = root; |
525 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 542 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
530 | goto err; | 547 | goto err; |
531 | } | 548 | } |
532 | 549 | ||
533 | ret = inode_item_info(inum, 0, local_root, swarn->path); | 550 | /* |
551 | * this makes the path point to (inum INODE_ITEM ioff) | ||
552 | */ | ||
553 | key.objectid = inum; | ||
554 | key.type = BTRFS_INODE_ITEM_KEY; | ||
555 | key.offset = 0; | ||
556 | |||
557 | ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0); | ||
534 | if (ret) { | 558 | if (ret) { |
535 | btrfs_release_path(swarn->path); | 559 | btrfs_release_path(swarn->path); |
536 | goto err; | 560 | goto err; |
@@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover) | |||
848 | static inline void scrub_put_recover(struct scrub_recover *recover) | 872 | static inline void scrub_put_recover(struct scrub_recover *recover) |
849 | { | 873 | { |
850 | if (atomic_dec_and_test(&recover->refs)) { | 874 | if (atomic_dec_and_test(&recover->refs)) { |
851 | kfree(recover->bbio); | 875 | btrfs_put_bbio(recover->bbio); |
852 | kfree(recover->raid_map); | ||
853 | kfree(recover); | 876 | kfree(recover); |
854 | } | 877 | } |
855 | } | 878 | } |
@@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
955 | } | 978 | } |
956 | 979 | ||
957 | /* setup the context, map the logical blocks and alloc the pages */ | 980 | /* setup the context, map the logical blocks and alloc the pages */ |
958 | ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, | 981 | ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck); |
959 | logical, sblocks_for_recheck); | ||
960 | if (ret) { | 982 | if (ret) { |
961 | spin_lock(&sctx->stat_lock); | 983 | spin_lock(&sctx->stat_lock); |
962 | sctx->stat.read_errors++; | 984 | sctx->stat.read_errors++; |
@@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
1030 | if (!is_metadata && !have_csum) { | 1052 | if (!is_metadata && !have_csum) { |
1031 | struct scrub_fixup_nodatasum *fixup_nodatasum; | 1053 | struct scrub_fixup_nodatasum *fixup_nodatasum; |
1032 | 1054 | ||
1033 | nodatasum_case: | ||
1034 | WARN_ON(sctx->is_dev_replace); | 1055 | WARN_ON(sctx->is_dev_replace); |
1035 | 1056 | ||
1057 | nodatasum_case: | ||
1058 | |||
1036 | /* | 1059 | /* |
1037 | * !is_metadata and !have_csum, this means that the data | 1060 | * !is_metadata and !have_csum, this means that the data |
1038 | * might not be COW'ed, that it might be modified | 1061 | * might not be COW'ed, that it might be modified |
@@ -1091,76 +1114,20 @@ nodatasum_case: | |||
1091 | sblock_other->no_io_error_seen) { | 1114 | sblock_other->no_io_error_seen) { |
1092 | if (sctx->is_dev_replace) { | 1115 | if (sctx->is_dev_replace) { |
1093 | scrub_write_block_to_dev_replace(sblock_other); | 1116 | scrub_write_block_to_dev_replace(sblock_other); |
1117 | goto corrected_error; | ||
1094 | } else { | 1118 | } else { |
1095 | int force_write = is_metadata || have_csum; | ||
1096 | |||
1097 | ret = scrub_repair_block_from_good_copy( | 1119 | ret = scrub_repair_block_from_good_copy( |
1098 | sblock_bad, sblock_other, | 1120 | sblock_bad, sblock_other); |
1099 | force_write); | 1121 | if (!ret) |
1122 | goto corrected_error; | ||
1100 | } | 1123 | } |
1101 | if (0 == ret) | ||
1102 | goto corrected_error; | ||
1103 | } | 1124 | } |
1104 | } | 1125 | } |
1105 | 1126 | ||
1106 | /* | 1127 | if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace) |
1107 | * for dev_replace, pick good pages and write to the target device. | 1128 | goto did_not_correct_error; |
1108 | */ | ||
1109 | if (sctx->is_dev_replace) { | ||
1110 | success = 1; | ||
1111 | for (page_num = 0; page_num < sblock_bad->page_count; | ||
1112 | page_num++) { | ||
1113 | int sub_success; | ||
1114 | |||
1115 | sub_success = 0; | ||
1116 | for (mirror_index = 0; | ||
1117 | mirror_index < BTRFS_MAX_MIRRORS && | ||
1118 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
1119 | mirror_index++) { | ||
1120 | struct scrub_block *sblock_other = | ||
1121 | sblocks_for_recheck + mirror_index; | ||
1122 | struct scrub_page *page_other = | ||
1123 | sblock_other->pagev[page_num]; | ||
1124 | |||
1125 | if (!page_other->io_error) { | ||
1126 | ret = scrub_write_page_to_dev_replace( | ||
1127 | sblock_other, page_num); | ||
1128 | if (ret == 0) { | ||
1129 | /* succeeded for this page */ | ||
1130 | sub_success = 1; | ||
1131 | break; | ||
1132 | } else { | ||
1133 | btrfs_dev_replace_stats_inc( | ||
1134 | &sctx->dev_root-> | ||
1135 | fs_info->dev_replace. | ||
1136 | num_write_errors); | ||
1137 | } | ||
1138 | } | ||
1139 | } | ||
1140 | |||
1141 | if (!sub_success) { | ||
1142 | /* | ||
1143 | * did not find a mirror to fetch the page | ||
1144 | * from. scrub_write_page_to_dev_replace() | ||
1145 | * handles this case (page->io_error), by | ||
1146 | * filling the block with zeros before | ||
1147 | * submitting the write request | ||
1148 | */ | ||
1149 | success = 0; | ||
1150 | ret = scrub_write_page_to_dev_replace( | ||
1151 | sblock_bad, page_num); | ||
1152 | if (ret) | ||
1153 | btrfs_dev_replace_stats_inc( | ||
1154 | &sctx->dev_root->fs_info-> | ||
1155 | dev_replace.num_write_errors); | ||
1156 | } | ||
1157 | } | ||
1158 | |||
1159 | goto out; | ||
1160 | } | ||
1161 | 1129 | ||
1162 | /* | 1130 | /* |
1163 | * for regular scrub, repair those pages that are errored. | ||
1164 | * In case of I/O errors in the area that is supposed to be | 1131 | * In case of I/O errors in the area that is supposed to be |
1165 | * repaired, continue by picking good copies of those pages. | 1132 | * repaired, continue by picking good copies of those pages. |
1166 | * Select the good pages from mirrors to rewrite bad pages from | 1133 | * Select the good pages from mirrors to rewrite bad pages from |
@@ -1184,44 +1151,64 @@ nodatasum_case: | |||
1184 | * mirror, even if other 512 byte sectors in the same PAGE_SIZE | 1151 | * mirror, even if other 512 byte sectors in the same PAGE_SIZE |
1185 | * area are unreadable. | 1152 | * area are unreadable. |
1186 | */ | 1153 | */ |
1187 | |||
1188 | /* can only fix I/O errors from here on */ | ||
1189 | if (sblock_bad->no_io_error_seen) | ||
1190 | goto did_not_correct_error; | ||
1191 | |||
1192 | success = 1; | 1154 | success = 1; |
1193 | for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { | 1155 | for (page_num = 0; page_num < sblock_bad->page_count; |
1156 | page_num++) { | ||
1194 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; | 1157 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; |
1158 | struct scrub_block *sblock_other = NULL; | ||
1195 | 1159 | ||
1196 | if (!page_bad->io_error) | 1160 | /* skip no-io-error page in scrub */ |
1161 | if (!page_bad->io_error && !sctx->is_dev_replace) | ||
1197 | continue; | 1162 | continue; |
1198 | 1163 | ||
1199 | for (mirror_index = 0; | 1164 | /* try to find no-io-error page in mirrors */ |
1200 | mirror_index < BTRFS_MAX_MIRRORS && | 1165 | if (page_bad->io_error) { |
1201 | sblocks_for_recheck[mirror_index].page_count > 0; | 1166 | for (mirror_index = 0; |
1202 | mirror_index++) { | 1167 | mirror_index < BTRFS_MAX_MIRRORS && |
1203 | struct scrub_block *sblock_other = sblocks_for_recheck + | 1168 | sblocks_for_recheck[mirror_index].page_count > 0; |
1204 | mirror_index; | 1169 | mirror_index++) { |
1205 | struct scrub_page *page_other = sblock_other->pagev[ | 1170 | if (!sblocks_for_recheck[mirror_index]. |
1206 | page_num]; | 1171 | pagev[page_num]->io_error) { |
1207 | 1172 | sblock_other = sblocks_for_recheck + | |
1208 | if (!page_other->io_error) { | 1173 | mirror_index; |
1209 | ret = scrub_repair_page_from_good_copy( | 1174 | break; |
1210 | sblock_bad, sblock_other, page_num, 0); | ||
1211 | if (0 == ret) { | ||
1212 | page_bad->io_error = 0; | ||
1213 | break; /* succeeded for this page */ | ||
1214 | } | 1175 | } |
1215 | } | 1176 | } |
1177 | if (!sblock_other) | ||
1178 | success = 0; | ||
1216 | } | 1179 | } |
1217 | 1180 | ||
1218 | if (page_bad->io_error) { | 1181 | if (sctx->is_dev_replace) { |
1219 | /* did not find a mirror to copy the page from */ | 1182 | /* |
1220 | success = 0; | 1183 | * did not find a mirror to fetch the page |
1184 | * from. scrub_write_page_to_dev_replace() | ||
1185 | * handles this case (page->io_error), by | ||
1186 | * filling the block with zeros before | ||
1187 | * submitting the write request | ||
1188 | */ | ||
1189 | if (!sblock_other) | ||
1190 | sblock_other = sblock_bad; | ||
1191 | |||
1192 | if (scrub_write_page_to_dev_replace(sblock_other, | ||
1193 | page_num) != 0) { | ||
1194 | btrfs_dev_replace_stats_inc( | ||
1195 | &sctx->dev_root-> | ||
1196 | fs_info->dev_replace. | ||
1197 | num_write_errors); | ||
1198 | success = 0; | ||
1199 | } | ||
1200 | } else if (sblock_other) { | ||
1201 | ret = scrub_repair_page_from_good_copy(sblock_bad, | ||
1202 | sblock_other, | ||
1203 | page_num, 0); | ||
1204 | if (0 == ret) | ||
1205 | page_bad->io_error = 0; | ||
1206 | else | ||
1207 | success = 0; | ||
1221 | } | 1208 | } |
1222 | } | 1209 | } |
1223 | 1210 | ||
1224 | if (success) { | 1211 | if (success && !sctx->is_dev_replace) { |
1225 | if (is_metadata || have_csum) { | 1212 | if (is_metadata || have_csum) { |
1226 | /* | 1213 | /* |
1227 | * need to verify the checksum now that all | 1214 | * need to verify the checksum now that all |
@@ -1288,19 +1275,18 @@ out: | |||
1288 | return 0; | 1275 | return 0; |
1289 | } | 1276 | } |
1290 | 1277 | ||
1291 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) | 1278 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio) |
1292 | { | 1279 | { |
1293 | if (raid_map) { | 1280 | if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) |
1294 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | 1281 | return 2; |
1295 | return 3; | 1282 | else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) |
1296 | else | 1283 | return 3; |
1297 | return 2; | 1284 | else |
1298 | } else { | ||
1299 | return (int)bbio->num_stripes; | 1285 | return (int)bbio->num_stripes; |
1300 | } | ||
1301 | } | 1286 | } |
1302 | 1287 | ||
1303 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | 1288 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, |
1289 | u64 *raid_map, | ||
1304 | u64 mapped_length, | 1290 | u64 mapped_length, |
1305 | int nstripes, int mirror, | 1291 | int nstripes, int mirror, |
1306 | int *stripe_index, | 1292 | int *stripe_index, |
@@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | |||
1308 | { | 1294 | { |
1309 | int i; | 1295 | int i; |
1310 | 1296 | ||
1311 | if (raid_map) { | 1297 | if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
1312 | /* RAID5/6 */ | 1298 | /* RAID5/6 */ |
1313 | for (i = 0; i < nstripes; i++) { | 1299 | for (i = 0; i < nstripes; i++) { |
1314 | if (raid_map[i] == RAID6_Q_STRIPE || | 1300 | if (raid_map[i] == RAID6_Q_STRIPE || |
@@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | |||
1329 | } | 1315 | } |
1330 | } | 1316 | } |
1331 | 1317 | ||
1332 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 1318 | static int scrub_setup_recheck_block(struct scrub_block *original_sblock, |
1333 | struct btrfs_fs_info *fs_info, | ||
1334 | struct scrub_block *original_sblock, | ||
1335 | u64 length, u64 logical, | ||
1336 | struct scrub_block *sblocks_for_recheck) | 1319 | struct scrub_block *sblocks_for_recheck) |
1337 | { | 1320 | { |
1321 | struct scrub_ctx *sctx = original_sblock->sctx; | ||
1322 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
1323 | u64 length = original_sblock->page_count * PAGE_SIZE; | ||
1324 | u64 logical = original_sblock->pagev[0]->logical; | ||
1338 | struct scrub_recover *recover; | 1325 | struct scrub_recover *recover; |
1339 | struct btrfs_bio *bbio; | 1326 | struct btrfs_bio *bbio; |
1340 | u64 *raid_map; | ||
1341 | u64 sublen; | 1327 | u64 sublen; |
1342 | u64 mapped_length; | 1328 | u64 mapped_length; |
1343 | u64 stripe_offset; | 1329 | u64 stripe_offset; |
1344 | int stripe_index; | 1330 | int stripe_index; |
1345 | int page_index; | 1331 | int page_index = 0; |
1346 | int mirror_index; | 1332 | int mirror_index; |
1347 | int nmirrors; | 1333 | int nmirrors; |
1348 | int ret; | 1334 | int ret; |
1349 | 1335 | ||
1350 | /* | 1336 | /* |
1351 | * note: the two members ref_count and outstanding_pages | 1337 | * note: the two members refs and outstanding_pages |
1352 | * are not used (and not set) in the blocks that are used for | 1338 | * are not used (and not set) in the blocks that are used for |
1353 | * the recheck procedure | 1339 | * the recheck procedure |
1354 | */ | 1340 | */ |
1355 | 1341 | ||
1356 | page_index = 0; | ||
1357 | while (length > 0) { | 1342 | while (length > 0) { |
1358 | sublen = min_t(u64, length, PAGE_SIZE); | 1343 | sublen = min_t(u64, length, PAGE_SIZE); |
1359 | mapped_length = sublen; | 1344 | mapped_length = sublen; |
1360 | bbio = NULL; | 1345 | bbio = NULL; |
1361 | raid_map = NULL; | ||
1362 | 1346 | ||
1363 | /* | 1347 | /* |
1364 | * with a length of PAGE_SIZE, each returned stripe | 1348 | * with a length of PAGE_SIZE, each returned stripe |
1365 | * represents one mirror | 1349 | * represents one mirror |
1366 | */ | 1350 | */ |
1367 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, | 1351 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, |
1368 | &mapped_length, &bbio, 0, &raid_map); | 1352 | &mapped_length, &bbio, 0, 1); |
1369 | if (ret || !bbio || mapped_length < sublen) { | 1353 | if (ret || !bbio || mapped_length < sublen) { |
1370 | kfree(bbio); | 1354 | btrfs_put_bbio(bbio); |
1371 | kfree(raid_map); | ||
1372 | return -EIO; | 1355 | return -EIO; |
1373 | } | 1356 | } |
1374 | 1357 | ||
1375 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); | 1358 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); |
1376 | if (!recover) { | 1359 | if (!recover) { |
1377 | kfree(bbio); | 1360 | btrfs_put_bbio(bbio); |
1378 | kfree(raid_map); | ||
1379 | return -ENOMEM; | 1361 | return -ENOMEM; |
1380 | } | 1362 | } |
1381 | 1363 | ||
1382 | atomic_set(&recover->refs, 1); | 1364 | atomic_set(&recover->refs, 1); |
1383 | recover->bbio = bbio; | 1365 | recover->bbio = bbio; |
1384 | recover->raid_map = raid_map; | ||
1385 | recover->map_length = mapped_length; | 1366 | recover->map_length = mapped_length; |
1386 | 1367 | ||
1387 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); | 1368 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
1388 | 1369 | ||
1389 | nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); | 1370 | nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); |
1371 | |||
1390 | for (mirror_index = 0; mirror_index < nmirrors; | 1372 | for (mirror_index = 0; mirror_index < nmirrors; |
1391 | mirror_index++) { | 1373 | mirror_index++) { |
1392 | struct scrub_block *sblock; | 1374 | struct scrub_block *sblock; |
1393 | struct scrub_page *page; | 1375 | struct scrub_page *page; |
1394 | 1376 | ||
1395 | if (mirror_index >= BTRFS_MAX_MIRRORS) | ||
1396 | continue; | ||
1397 | |||
1398 | sblock = sblocks_for_recheck + mirror_index; | 1377 | sblock = sblocks_for_recheck + mirror_index; |
1399 | sblock->sctx = sctx; | 1378 | sblock->sctx = sctx; |
1400 | page = kzalloc(sizeof(*page), GFP_NOFS); | 1379 | page = kzalloc(sizeof(*page), GFP_NOFS); |
@@ -1410,9 +1389,12 @@ leave_nomem: | |||
1410 | sblock->pagev[page_index] = page; | 1389 | sblock->pagev[page_index] = page; |
1411 | page->logical = logical; | 1390 | page->logical = logical; |
1412 | 1391 | ||
1413 | scrub_stripe_index_and_offset(logical, raid_map, | 1392 | scrub_stripe_index_and_offset(logical, |
1393 | bbio->map_type, | ||
1394 | bbio->raid_map, | ||
1414 | mapped_length, | 1395 | mapped_length, |
1415 | bbio->num_stripes, | 1396 | bbio->num_stripes - |
1397 | bbio->num_tgtdevs, | ||
1416 | mirror_index, | 1398 | mirror_index, |
1417 | &stripe_index, | 1399 | &stripe_index, |
1418 | &stripe_offset); | 1400 | &stripe_offset); |
@@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error) | |||
1458 | 1440 | ||
1459 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) | 1441 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) |
1460 | { | 1442 | { |
1461 | return page->recover && page->recover->raid_map; | 1443 | return page->recover && |
1444 | (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); | ||
1462 | } | 1445 | } |
1463 | 1446 | ||
1464 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | 1447 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, |
@@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | |||
1475 | bio->bi_end_io = scrub_bio_wait_endio; | 1458 | bio->bi_end_io = scrub_bio_wait_endio; |
1476 | 1459 | ||
1477 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, | 1460 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, |
1478 | page->recover->raid_map, | ||
1479 | page->recover->map_length, | 1461 | page->recover->map_length, |
1480 | page->mirror_num, 0); | 1462 | page->mirror_num, 0); |
1481 | if (ret) | 1463 | if (ret) |
@@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
1615 | } | 1597 | } |
1616 | 1598 | ||
1617 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | 1599 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, |
1618 | struct scrub_block *sblock_good, | 1600 | struct scrub_block *sblock_good) |
1619 | int force_write) | ||
1620 | { | 1601 | { |
1621 | int page_num; | 1602 | int page_num; |
1622 | int ret = 0; | 1603 | int ret = 0; |
@@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | |||
1626 | 1607 | ||
1627 | ret_sub = scrub_repair_page_from_good_copy(sblock_bad, | 1608 | ret_sub = scrub_repair_page_from_good_copy(sblock_bad, |
1628 | sblock_good, | 1609 | sblock_good, |
1629 | page_num, | 1610 | page_num, 1); |
1630 | force_write); | ||
1631 | if (ret_sub) | 1611 | if (ret_sub) |
1632 | ret = ret_sub; | 1612 | ret = ret_sub; |
1633 | } | 1613 | } |
@@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
2067 | 2047 | ||
2068 | static void scrub_block_get(struct scrub_block *sblock) | 2048 | static void scrub_block_get(struct scrub_block *sblock) |
2069 | { | 2049 | { |
2070 | atomic_inc(&sblock->ref_count); | 2050 | atomic_inc(&sblock->refs); |
2071 | } | 2051 | } |
2072 | 2052 | ||
2073 | static void scrub_block_put(struct scrub_block *sblock) | 2053 | static void scrub_block_put(struct scrub_block *sblock) |
2074 | { | 2054 | { |
2075 | if (atomic_dec_and_test(&sblock->ref_count)) { | 2055 | if (atomic_dec_and_test(&sblock->refs)) { |
2076 | int i; | 2056 | int i; |
2077 | 2057 | ||
2078 | if (sblock->sparity) | 2058 | if (sblock->sparity) |
@@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
2086 | 2066 | ||
2087 | static void scrub_page_get(struct scrub_page *spage) | 2067 | static void scrub_page_get(struct scrub_page *spage) |
2088 | { | 2068 | { |
2089 | atomic_inc(&spage->ref_count); | 2069 | atomic_inc(&spage->refs); |
2090 | } | 2070 | } |
2091 | 2071 | ||
2092 | static void scrub_page_put(struct scrub_page *spage) | 2072 | static void scrub_page_put(struct scrub_page *spage) |
2093 | { | 2073 | { |
2094 | if (atomic_dec_and_test(&spage->ref_count)) { | 2074 | if (atomic_dec_and_test(&spage->refs)) { |
2095 | if (spage->page) | 2075 | if (spage->page) |
2096 | __free_page(spage->page); | 2076 | __free_page(spage->page); |
2097 | kfree(spage); | 2077 | kfree(spage); |
@@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2217 | 2197 | ||
2218 | /* one ref inside this function, plus one for each page added to | 2198 | /* one ref inside this function, plus one for each page added to |
2219 | * a bio later on */ | 2199 | * a bio later on */ |
2220 | atomic_set(&sblock->ref_count, 1); | 2200 | atomic_set(&sblock->refs, 1); |
2221 | sblock->sctx = sctx; | 2201 | sblock->sctx = sctx; |
2222 | sblock->no_io_error_seen = 1; | 2202 | sblock->no_io_error_seen = 1; |
2223 | 2203 | ||
@@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, | |||
2510 | 2490 | ||
2511 | /* one ref inside this function, plus one for each page added to | 2491 | /* one ref inside this function, plus one for each page added to |
2512 | * a bio later on */ | 2492 | * a bio later on */ |
2513 | atomic_set(&sblock->ref_count, 1); | 2493 | atomic_set(&sblock->refs, 1); |
2514 | sblock->sctx = sctx; | 2494 | sblock->sctx = sctx; |
2515 | sblock->no_io_error_seen = 1; | 2495 | sblock->no_io_error_seen = 1; |
2516 | sblock->sparity = sparity; | 2496 | sblock->sparity = sparity; |
@@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2705 | struct btrfs_raid_bio *rbio; | 2685 | struct btrfs_raid_bio *rbio; |
2706 | struct scrub_page *spage; | 2686 | struct scrub_page *spage; |
2707 | struct btrfs_bio *bbio = NULL; | 2687 | struct btrfs_bio *bbio = NULL; |
2708 | u64 *raid_map = NULL; | ||
2709 | u64 length; | 2688 | u64 length; |
2710 | int ret; | 2689 | int ret; |
2711 | 2690 | ||
@@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2716 | length = sparity->logic_end - sparity->logic_start + 1; | 2695 | length = sparity->logic_end - sparity->logic_start + 1; |
2717 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, | 2696 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, |
2718 | sparity->logic_start, | 2697 | sparity->logic_start, |
2719 | &length, &bbio, 0, &raid_map); | 2698 | &length, &bbio, 0, 1); |
2720 | if (ret || !bbio || !raid_map) | 2699 | if (ret || !bbio || !bbio->raid_map) |
2721 | goto bbio_out; | 2700 | goto bbio_out; |
2722 | 2701 | ||
2723 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | 2702 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); |
@@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2729 | bio->bi_end_io = scrub_parity_bio_endio; | 2708 | bio->bi_end_io = scrub_parity_bio_endio; |
2730 | 2709 | ||
2731 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, | 2710 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, |
2732 | raid_map, length, | 2711 | length, sparity->scrub_dev, |
2733 | sparity->scrub_dev, | ||
2734 | sparity->dbitmap, | 2712 | sparity->dbitmap, |
2735 | sparity->nsectors); | 2713 | sparity->nsectors); |
2736 | if (!rbio) | 2714 | if (!rbio) |
@@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2747 | rbio_out: | 2725 | rbio_out: |
2748 | bio_put(bio); | 2726 | bio_put(bio); |
2749 | bbio_out: | 2727 | bbio_out: |
2750 | kfree(bbio); | 2728 | btrfs_put_bbio(bbio); |
2751 | kfree(raid_map); | ||
2752 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | 2729 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, |
2753 | sparity->nsectors); | 2730 | sparity->nsectors); |
2754 | spin_lock(&sctx->stat_lock); | 2731 | spin_lock(&sctx->stat_lock); |
@@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors) | |||
2765 | 2742 | ||
2766 | static void scrub_parity_get(struct scrub_parity *sparity) | 2743 | static void scrub_parity_get(struct scrub_parity *sparity) |
2767 | { | 2744 | { |
2768 | atomic_inc(&sparity->ref_count); | 2745 | atomic_inc(&sparity->refs); |
2769 | } | 2746 | } |
2770 | 2747 | ||
2771 | static void scrub_parity_put(struct scrub_parity *sparity) | 2748 | static void scrub_parity_put(struct scrub_parity *sparity) |
2772 | { | 2749 | { |
2773 | if (!atomic_dec_and_test(&sparity->ref_count)) | 2750 | if (!atomic_dec_and_test(&sparity->refs)) |
2774 | return; | 2751 | return; |
2775 | 2752 | ||
2776 | scrub_parity_check_and_repair(sparity); | 2753 | scrub_parity_check_and_repair(sparity); |
@@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, | |||
2820 | sparity->scrub_dev = sdev; | 2797 | sparity->scrub_dev = sdev; |
2821 | sparity->logic_start = logic_start; | 2798 | sparity->logic_start = logic_start; |
2822 | sparity->logic_end = logic_end; | 2799 | sparity->logic_end = logic_end; |
2823 | atomic_set(&sparity->ref_count, 1); | 2800 | atomic_set(&sparity->refs, 1); |
2824 | INIT_LIST_HEAD(&sparity->spages); | 2801 | INIT_LIST_HEAD(&sparity->spages); |
2825 | sparity->dbitmap = sparity->bitmap; | 2802 | sparity->dbitmap = sparity->bitmap; |
2826 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; | 2803 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; |
@@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3037 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3014 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
3038 | increment = map->stripe_len; | 3015 | increment = map->stripe_len; |
3039 | mirror_num = num % map->num_stripes + 1; | 3016 | mirror_num = num % map->num_stripes + 1; |
3040 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3017 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3041 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3042 | get_raid56_logic_offset(physical, num, map, &offset, NULL); | 3018 | get_raid56_logic_offset(physical, num, map, &offset, NULL); |
3043 | increment = map->stripe_len * nr_data_stripes(map); | 3019 | increment = map->stripe_len * nr_data_stripes(map); |
3044 | mirror_num = 1; | 3020 | mirror_num = 1; |
@@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3074 | */ | 3050 | */ |
3075 | logical = base + offset; | 3051 | logical = base + offset; |
3076 | physical_end = physical + nstripes * map->stripe_len; | 3052 | physical_end = physical + nstripes * map->stripe_len; |
3077 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3053 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3078 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3079 | get_raid56_logic_offset(physical_end, num, | 3054 | get_raid56_logic_offset(physical_end, num, |
3080 | map, &logic_end, NULL); | 3055 | map, &logic_end, NULL); |
3081 | logic_end += base; | 3056 | logic_end += base; |
@@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3121 | ret = 0; | 3096 | ret = 0; |
3122 | while (physical < physical_end) { | 3097 | while (physical < physical_end) { |
3123 | /* for raid56, we skip parity stripe */ | 3098 | /* for raid56, we skip parity stripe */ |
3124 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3099 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3125 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3126 | ret = get_raid56_logic_offset(physical, num, | 3100 | ret = get_raid56_logic_offset(physical, num, |
3127 | map, &logical, &stripe_logical); | 3101 | map, &logical, &stripe_logical); |
3128 | logical += base; | 3102 | logical += base; |
@@ -3280,8 +3254,7 @@ again: | |||
3280 | scrub_free_csums(sctx); | 3254 | scrub_free_csums(sctx); |
3281 | if (extent_logical + extent_len < | 3255 | if (extent_logical + extent_len < |
3282 | key.objectid + bytes) { | 3256 | key.objectid + bytes) { |
3283 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3257 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3284 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3285 | /* | 3258 | /* |
3286 | * loop until we find next data stripe | 3259 | * loop until we find next data stripe |
3287 | * or we have finished all stripes. | 3260 | * or we have finished all stripes. |
@@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
3775 | scrub_workers_put(fs_info); | 3748 | scrub_workers_put(fs_info); |
3776 | mutex_unlock(&fs_info->scrub_lock); | 3749 | mutex_unlock(&fs_info->scrub_lock); |
3777 | 3750 | ||
3778 | scrub_free_ctx(sctx); | 3751 | scrub_put_ctx(sctx); |
3779 | 3752 | ||
3780 | return ret; | 3753 | return ret; |
3781 | } | 3754 | } |
@@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | |||
3881 | &mapped_length, &bbio, 0); | 3854 | &mapped_length, &bbio, 0); |
3882 | if (ret || !bbio || mapped_length < extent_len || | 3855 | if (ret || !bbio || mapped_length < extent_len || |
3883 | !bbio->stripes[0].dev->bdev) { | 3856 | !bbio->stripes[0].dev->bdev) { |
3884 | kfree(bbio); | 3857 | btrfs_put_bbio(bbio); |
3885 | return; | 3858 | return; |
3886 | } | 3859 | } |
3887 | 3860 | ||
3888 | *extent_physical = bbio->stripes[0].physical; | 3861 | *extent_physical = bbio->stripes[0].physical; |
3889 | *extent_mirror_num = bbio->mirror_num; | 3862 | *extent_mirror_num = bbio->mirror_num; |
3890 | *extent_dev = bbio->stripes[0].dev; | 3863 | *extent_dev = bbio->stripes[0].dev; |
3891 | kfree(bbio); | 3864 | btrfs_put_bbio(bbio); |
3892 | } | 3865 | } |
3893 | 3866 | ||
3894 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | 3867 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 804432dbc351..fe5857223515 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -2471,12 +2471,9 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
2471 | if (ret < 0) | 2471 | if (ret < 0) |
2472 | goto out; | 2472 | goto out; |
2473 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 2473 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
2474 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, | 2474 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); |
2475 | btrfs_inode_atime(ii)); | 2475 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); |
2476 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, | 2476 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime); |
2477 | btrfs_inode_mtime(ii)); | ||
2478 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, | ||
2479 | btrfs_inode_ctime(ii)); | ||
2480 | /* TODO Add otime support when the otime patches get into upstream */ | 2477 | /* TODO Add otime support when the otime patches get into upstream */ |
2481 | 2478 | ||
2482 | ret = send_cmd(sctx); | 2479 | ret = send_cmd(sctx); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6f49b2872a64..05fef198ff94 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -1958,11 +1958,6 @@ static int btrfs_freeze(struct super_block *sb) | |||
1958 | return btrfs_commit_transaction(trans, root); | 1958 | return btrfs_commit_transaction(trans, root); |
1959 | } | 1959 | } |
1960 | 1960 | ||
1961 | static int btrfs_unfreeze(struct super_block *sb) | ||
1962 | { | ||
1963 | return 0; | ||
1964 | } | ||
1965 | |||
1966 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | 1961 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) |
1967 | { | 1962 | { |
1968 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); | 1963 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); |
@@ -2011,7 +2006,6 @@ static const struct super_operations btrfs_super_ops = { | |||
2011 | .statfs = btrfs_statfs, | 2006 | .statfs = btrfs_statfs, |
2012 | .remount_fs = btrfs_remount, | 2007 | .remount_fs = btrfs_remount, |
2013 | .freeze_fs = btrfs_freeze, | 2008 | .freeze_fs = btrfs_freeze, |
2014 | .unfreeze_fs = btrfs_unfreeze, | ||
2015 | }; | 2009 | }; |
2016 | 2010 | ||
2017 | static const struct file_operations btrfs_ctl_fops = { | 2011 | static const struct file_operations btrfs_ctl_fops = { |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 92db3f648df4..94edb0a2a026 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -733,10 +733,18 @@ int btrfs_init_sysfs(void) | |||
733 | 733 | ||
734 | ret = btrfs_init_debugfs(); | 734 | ret = btrfs_init_debugfs(); |
735 | if (ret) | 735 | if (ret) |
736 | return ret; | 736 | goto out1; |
737 | 737 | ||
738 | init_feature_attrs(); | 738 | init_feature_attrs(); |
739 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 739 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
740 | if (ret) | ||
741 | goto out2; | ||
742 | |||
743 | return 0; | ||
744 | out2: | ||
745 | debugfs_remove_recursive(btrfs_debugfs_root_dentry); | ||
746 | out1: | ||
747 | kset_unregister(btrfs_kset); | ||
740 | 748 | ||
741 | return ret; | 749 | return ret; |
742 | } | 750 | } |
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index cc286ce97d1e..f51963a8f929 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c | |||
@@ -53,7 +53,7 @@ static int test_btrfs_split_item(void) | |||
53 | return -ENOMEM; | 53 | return -ENOMEM; |
54 | } | 54 | } |
55 | 55 | ||
56 | path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); | 56 | path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096); |
57 | if (!eb) { | 57 | if (!eb) { |
58 | test_msg("Could not allocate dummy buffer\n"); | 58 | test_msg("Could not allocate dummy buffer\n"); |
59 | ret = -ENOMEM; | 59 | ret = -ENOMEM; |
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 7e99c2f98dd0..9e9f2368177d 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c | |||
@@ -258,8 +258,7 @@ static int test_find_delalloc(void) | |||
258 | } | 258 | } |
259 | ret = 0; | 259 | ret = 0; |
260 | out_bits: | 260 | out_bits: |
261 | clear_extent_bits(&tmp, 0, total_dirty - 1, | 261 | clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); |
262 | (unsigned long)-1, GFP_NOFS); | ||
263 | out: | 262 | out: |
264 | if (locked_page) | 263 | if (locked_page) |
265 | page_cache_release(locked_page); | 264 | page_cache_release(locked_page); |
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 3ae0f5b8bb80..a116b55ce788 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
@@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void) | |||
255 | goto out; | 255 | goto out; |
256 | } | 256 | } |
257 | 257 | ||
258 | root->node = alloc_dummy_extent_buffer(0, 4096); | 258 | root->node = alloc_dummy_extent_buffer(NULL, 4096); |
259 | if (!root->node) { | 259 | if (!root->node) { |
260 | test_msg("Couldn't allocate dummy buffer\n"); | 260 | test_msg("Couldn't allocate dummy buffer\n"); |
261 | goto out; | 261 | goto out; |
@@ -843,7 +843,7 @@ static int test_hole_first(void) | |||
843 | goto out; | 843 | goto out; |
844 | } | 844 | } |
845 | 845 | ||
846 | root->node = alloc_dummy_extent_buffer(0, 4096); | 846 | root->node = alloc_dummy_extent_buffer(NULL, 4096); |
847 | if (!root->node) { | 847 | if (!root->node) { |
848 | test_msg("Couldn't allocate dummy buffer\n"); | 848 | test_msg("Couldn't allocate dummy buffer\n"); |
849 | goto out; | 849 | goto out; |
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index ec3dcb202357..73f299ebdabb 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c | |||
@@ -404,12 +404,22 @@ int btrfs_test_qgroups(void) | |||
404 | ret = -ENOMEM; | 404 | ret = -ENOMEM; |
405 | goto out; | 405 | goto out; |
406 | } | 406 | } |
407 | /* We are using this root as our extent root */ | ||
408 | root->fs_info->extent_root = root; | ||
409 | |||
410 | /* | ||
411 | * Some of the paths we test assume we have a filled out fs_info, so we | ||
412 | * just need to add the root in there so we don't panic. | ||
413 | */ | ||
414 | root->fs_info->tree_root = root; | ||
415 | root->fs_info->quota_root = root; | ||
416 | root->fs_info->quota_enabled = 1; | ||
407 | 417 | ||
408 | /* | 418 | /* |
409 | * Can't use bytenr 0, some things freak out | 419 | * Can't use bytenr 0, some things freak out |
410 | * *cough*backref walking code*cough* | 420 | * *cough*backref walking code*cough* |
411 | */ | 421 | */ |
412 | root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); | 422 | root->node = alloc_test_extent_buffer(root->fs_info, 4096); |
413 | if (!root->node) { | 423 | if (!root->node) { |
414 | test_msg("Couldn't allocate dummy buffer\n"); | 424 | test_msg("Couldn't allocate dummy buffer\n"); |
415 | ret = -ENOMEM; | 425 | ret = -ENOMEM; |
@@ -448,17 +458,6 @@ int btrfs_test_qgroups(void) | |||
448 | goto out; | 458 | goto out; |
449 | } | 459 | } |
450 | 460 | ||
451 | /* We are using this root as our extent root */ | ||
452 | root->fs_info->extent_root = root; | ||
453 | |||
454 | /* | ||
455 | * Some of the paths we test assume we have a filled out fs_info, so we | ||
456 | * just need to addt he root in there so we don't panic. | ||
457 | */ | ||
458 | root->fs_info->tree_root = root; | ||
459 | root->fs_info->quota_root = root; | ||
460 | root->fs_info->quota_enabled = 1; | ||
461 | |||
462 | test_msg("Running qgroup tests\n"); | 461 | test_msg("Running qgroup tests\n"); |
463 | ret = test_no_shared_qgroup(root); | 462 | ret = test_no_shared_qgroup(root); |
464 | if (ret) | 463 | if (ret) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e88b59d13439..7e80f32550a6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -220,6 +220,7 @@ loop: | |||
220 | * commit the transaction. | 220 | * commit the transaction. |
221 | */ | 221 | */ |
222 | atomic_set(&cur_trans->use_count, 2); | 222 | atomic_set(&cur_trans->use_count, 2); |
223 | cur_trans->have_free_bgs = 0; | ||
223 | cur_trans->start_time = get_seconds(); | 224 | cur_trans->start_time = get_seconds(); |
224 | 225 | ||
225 | cur_trans->delayed_refs.href_root = RB_ROOT; | 226 | cur_trans->delayed_refs.href_root = RB_ROOT; |
@@ -248,6 +249,8 @@ loop: | |||
248 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | 249 | INIT_LIST_HEAD(&cur_trans->pending_chunks); |
249 | INIT_LIST_HEAD(&cur_trans->switch_commits); | 250 | INIT_LIST_HEAD(&cur_trans->switch_commits); |
250 | INIT_LIST_HEAD(&cur_trans->pending_ordered); | 251 | INIT_LIST_HEAD(&cur_trans->pending_ordered); |
252 | INIT_LIST_HEAD(&cur_trans->dirty_bgs); | ||
253 | spin_lock_init(&cur_trans->dirty_bgs_lock); | ||
251 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 254 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
252 | extent_io_tree_init(&cur_trans->dirty_pages, | 255 | extent_io_tree_init(&cur_trans->dirty_pages, |
253 | fs_info->btree_inode->i_mapping); | 256 | fs_info->btree_inode->i_mapping); |
@@ -1020,6 +1023,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
1020 | u64 old_root_bytenr; | 1023 | u64 old_root_bytenr; |
1021 | u64 old_root_used; | 1024 | u64 old_root_used; |
1022 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 1025 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
1026 | bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID); | ||
1023 | 1027 | ||
1024 | old_root_used = btrfs_root_used(&root->root_item); | 1028 | old_root_used = btrfs_root_used(&root->root_item); |
1025 | btrfs_write_dirty_block_groups(trans, root); | 1029 | btrfs_write_dirty_block_groups(trans, root); |
@@ -1027,7 +1031,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
1027 | while (1) { | 1031 | while (1) { |
1028 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 1032 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
1029 | if (old_root_bytenr == root->node->start && | 1033 | if (old_root_bytenr == root->node->start && |
1030 | old_root_used == btrfs_root_used(&root->root_item)) | 1034 | old_root_used == btrfs_root_used(&root->root_item) && |
1035 | (!extent_root || | ||
1036 | list_empty(&trans->transaction->dirty_bgs))) | ||
1031 | break; | 1037 | break; |
1032 | 1038 | ||
1033 | btrfs_set_root_node(&root->root_item, root->node); | 1039 | btrfs_set_root_node(&root->root_item, root->node); |
@@ -1038,7 +1044,15 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
1038 | return ret; | 1044 | return ret; |
1039 | 1045 | ||
1040 | old_root_used = btrfs_root_used(&root->root_item); | 1046 | old_root_used = btrfs_root_used(&root->root_item); |
1041 | ret = btrfs_write_dirty_block_groups(trans, root); | 1047 | if (extent_root) { |
1048 | ret = btrfs_write_dirty_block_groups(trans, root); | ||
1049 | if (ret) | ||
1050 | return ret; | ||
1051 | } | ||
1052 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1053 | if (ret) | ||
1054 | return ret; | ||
1055 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1042 | if (ret) | 1056 | if (ret) |
1043 | return ret; | 1057 | return ret; |
1044 | } | 1058 | } |
@@ -1061,10 +1075,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
1061 | struct extent_buffer *eb; | 1075 | struct extent_buffer *eb; |
1062 | int ret; | 1076 | int ret; |
1063 | 1077 | ||
1064 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1065 | if (ret) | ||
1066 | return ret; | ||
1067 | |||
1068 | eb = btrfs_lock_root_node(fs_info->tree_root); | 1078 | eb = btrfs_lock_root_node(fs_info->tree_root); |
1069 | ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, | 1079 | ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, |
1070 | 0, &eb); | 1080 | 0, &eb); |
@@ -1097,6 +1107,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
1097 | next = fs_info->dirty_cowonly_roots.next; | 1107 | next = fs_info->dirty_cowonly_roots.next; |
1098 | list_del_init(next); | 1108 | list_del_init(next); |
1099 | root = list_entry(next, struct btrfs_root, dirty_list); | 1109 | root = list_entry(next, struct btrfs_root, dirty_list); |
1110 | clear_bit(BTRFS_ROOT_DIRTY, &root->state); | ||
1100 | 1111 | ||
1101 | if (root != fs_info->extent_root) | 1112 | if (root != fs_info->extent_root) |
1102 | list_add_tail(&root->dirty_list, | 1113 | list_add_tail(&root->dirty_list, |
@@ -1983,6 +1994,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1983 | switch_commit_roots(cur_trans, root->fs_info); | 1994 | switch_commit_roots(cur_trans, root->fs_info); |
1984 | 1995 | ||
1985 | assert_qgroups_uptodate(trans); | 1996 | assert_qgroups_uptodate(trans); |
1997 | ASSERT(list_empty(&cur_trans->dirty_bgs)); | ||
1986 | update_super_roots(root); | 1998 | update_super_roots(root); |
1987 | 1999 | ||
1988 | btrfs_set_super_log_root(root->fs_info->super_copy, 0); | 2000 | btrfs_set_super_log_root(root->fs_info->super_copy, 0); |
@@ -2026,6 +2038,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
2026 | 2038 | ||
2027 | btrfs_finish_extent_commit(trans, root); | 2039 | btrfs_finish_extent_commit(trans, root); |
2028 | 2040 | ||
2041 | if (cur_trans->have_free_bgs) | ||
2042 | btrfs_clear_space_info_full(root->fs_info); | ||
2043 | |||
2029 | root->fs_info->last_trans_committed = cur_trans->transid; | 2044 | root->fs_info->last_trans_committed = cur_trans->transid; |
2030 | /* | 2045 | /* |
2031 | * We needn't acquire the lock here because there is no other task | 2046 | * We needn't acquire the lock here because there is no other task |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 00ed29c4b3f9..937050a2b68e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -47,6 +47,11 @@ struct btrfs_transaction { | |||
47 | atomic_t num_writers; | 47 | atomic_t num_writers; |
48 | atomic_t use_count; | 48 | atomic_t use_count; |
49 | 49 | ||
50 | /* | ||
51 | * true if there is free bgs operations in this transaction | ||
52 | */ | ||
53 | int have_free_bgs; | ||
54 | |||
50 | /* Be protected by fs_info->trans_lock when we want to change it. */ | 55 | /* Be protected by fs_info->trans_lock when we want to change it. */ |
51 | enum btrfs_trans_state state; | 56 | enum btrfs_trans_state state; |
52 | struct list_head list; | 57 | struct list_head list; |
@@ -58,6 +63,8 @@ struct btrfs_transaction { | |||
58 | struct list_head pending_chunks; | 63 | struct list_head pending_chunks; |
59 | struct list_head pending_ordered; | 64 | struct list_head pending_ordered; |
60 | struct list_head switch_commits; | 65 | struct list_head switch_commits; |
66 | struct list_head dirty_bgs; | ||
67 | spinlock_t dirty_bgs_lock; | ||
61 | struct btrfs_delayed_ref_root delayed_refs; | 68 | struct btrfs_delayed_ref_root delayed_refs; |
62 | int aborted; | 69 | int aborted; |
63 | }; | 70 | }; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1a9585d4380a..9a37f8b39bae 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, | |||
453 | insert: | 453 | insert: |
454 | btrfs_release_path(path); | 454 | btrfs_release_path(path); |
455 | /* try to insert the key into the destination tree */ | 455 | /* try to insert the key into the destination tree */ |
456 | path->skip_release_on_error = 1; | ||
456 | ret = btrfs_insert_empty_item(trans, root, path, | 457 | ret = btrfs_insert_empty_item(trans, root, path, |
457 | key, item_size); | 458 | key, item_size); |
459 | path->skip_release_on_error = 0; | ||
458 | 460 | ||
459 | /* make sure any existing item is the correct size */ | 461 | /* make sure any existing item is the correct size */ |
460 | if (ret == -EEXIST) { | 462 | if (ret == -EEXIST || ret == -EOVERFLOW) { |
461 | u32 found_size; | 463 | u32 found_size; |
462 | found_size = btrfs_item_size_nr(path->nodes[0], | 464 | found_size = btrfs_item_size_nr(path->nodes[0], |
463 | path->slots[0]); | 465 | path->slots[0]); |
@@ -488,8 +490,20 @@ insert: | |||
488 | src_item = (struct btrfs_inode_item *)src_ptr; | 490 | src_item = (struct btrfs_inode_item *)src_ptr; |
489 | dst_item = (struct btrfs_inode_item *)dst_ptr; | 491 | dst_item = (struct btrfs_inode_item *)dst_ptr; |
490 | 492 | ||
491 | if (btrfs_inode_generation(eb, src_item) == 0) | 493 | if (btrfs_inode_generation(eb, src_item) == 0) { |
494 | struct extent_buffer *dst_eb = path->nodes[0]; | ||
495 | |||
496 | if (S_ISREG(btrfs_inode_mode(eb, src_item)) && | ||
497 | S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { | ||
498 | struct btrfs_map_token token; | ||
499 | u64 ino_size = btrfs_inode_size(eb, src_item); | ||
500 | |||
501 | btrfs_init_map_token(&token); | ||
502 | btrfs_set_token_inode_size(dst_eb, dst_item, | ||
503 | ino_size, &token); | ||
504 | } | ||
492 | goto no_copy; | 505 | goto no_copy; |
506 | } | ||
493 | 507 | ||
494 | if (overwrite_root && | 508 | if (overwrite_root && |
495 | S_ISDIR(btrfs_inode_mode(eb, src_item)) && | 509 | S_ISDIR(btrfs_inode_mode(eb, src_item)) && |
@@ -844,7 +858,7 @@ out: | |||
844 | static noinline int backref_in_log(struct btrfs_root *log, | 858 | static noinline int backref_in_log(struct btrfs_root *log, |
845 | struct btrfs_key *key, | 859 | struct btrfs_key *key, |
846 | u64 ref_objectid, | 860 | u64 ref_objectid, |
847 | char *name, int namelen) | 861 | const char *name, int namelen) |
848 | { | 862 | { |
849 | struct btrfs_path *path; | 863 | struct btrfs_path *path; |
850 | struct btrfs_inode_ref *ref; | 864 | struct btrfs_inode_ref *ref; |
@@ -1254,13 +1268,14 @@ out: | |||
1254 | } | 1268 | } |
1255 | 1269 | ||
1256 | static int insert_orphan_item(struct btrfs_trans_handle *trans, | 1270 | static int insert_orphan_item(struct btrfs_trans_handle *trans, |
1257 | struct btrfs_root *root, u64 offset) | 1271 | struct btrfs_root *root, u64 ino) |
1258 | { | 1272 | { |
1259 | int ret; | 1273 | int ret; |
1260 | ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID, | 1274 | |
1261 | offset, BTRFS_ORPHAN_ITEM_KEY, NULL); | 1275 | ret = btrfs_insert_orphan_item(trans, root, ino); |
1262 | if (ret > 0) | 1276 | if (ret == -EEXIST) |
1263 | ret = btrfs_insert_orphan_item(trans, root, offset); | 1277 | ret = 0; |
1278 | |||
1264 | return ret; | 1279 | return ret; |
1265 | } | 1280 | } |
1266 | 1281 | ||
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root, | |||
1287 | leaf = path->nodes[0]; | 1302 | leaf = path->nodes[0]; |
1288 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | 1303 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
1289 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | 1304 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
1305 | cur_offset = 0; | ||
1290 | 1306 | ||
1291 | while (cur_offset < item_size) { | 1307 | while (cur_offset < item_size) { |
1292 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); | 1308 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root, | |||
1302 | } | 1318 | } |
1303 | btrfs_release_path(path); | 1319 | btrfs_release_path(path); |
1304 | 1320 | ||
1305 | if (ret < 0) | 1321 | if (ret < 0 && ret != -ENOENT) |
1306 | return ret; | 1322 | return ret; |
1307 | return nlink; | 1323 | return nlink; |
1308 | } | 1324 | } |
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1394 | nlink = ret; | 1410 | nlink = ret; |
1395 | 1411 | ||
1396 | ret = count_inode_extrefs(root, inode, path); | 1412 | ret = count_inode_extrefs(root, inode, path); |
1397 | if (ret == -ENOENT) | ||
1398 | ret = 0; | ||
1399 | |||
1400 | if (ret < 0) | 1413 | if (ret < 0) |
1401 | goto out; | 1414 | goto out; |
1402 | 1415 | ||
@@ -1557,6 +1570,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, | |||
1557 | } | 1570 | } |
1558 | 1571 | ||
1559 | /* | 1572 | /* |
1573 | * Return true if an inode reference exists in the log for the given name, | ||
1574 | * inode and parent inode. | ||
1575 | */ | ||
1576 | static bool name_in_log_ref(struct btrfs_root *log_root, | ||
1577 | const char *name, const int name_len, | ||
1578 | const u64 dirid, const u64 ino) | ||
1579 | { | ||
1580 | struct btrfs_key search_key; | ||
1581 | |||
1582 | search_key.objectid = ino; | ||
1583 | search_key.type = BTRFS_INODE_REF_KEY; | ||
1584 | search_key.offset = dirid; | ||
1585 | if (backref_in_log(log_root, &search_key, dirid, name, name_len)) | ||
1586 | return true; | ||
1587 | |||
1588 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
1589 | search_key.offset = btrfs_extref_hash(dirid, name, name_len); | ||
1590 | if (backref_in_log(log_root, &search_key, dirid, name, name_len)) | ||
1591 | return true; | ||
1592 | |||
1593 | return false; | ||
1594 | } | ||
1595 | |||
1596 | /* | ||
1560 | * take a single entry in a log directory item and replay it into | 1597 | * take a single entry in a log directory item and replay it into |
1561 | * the subvolume. | 1598 | * the subvolume. |
1562 | * | 1599 | * |
@@ -1666,10 +1703,17 @@ out: | |||
1666 | return ret; | 1703 | return ret; |
1667 | 1704 | ||
1668 | insert: | 1705 | insert: |
1706 | if (name_in_log_ref(root->log_root, name, name_len, | ||
1707 | key->objectid, log_key.objectid)) { | ||
1708 | /* The dentry will be added later. */ | ||
1709 | ret = 0; | ||
1710 | update_size = false; | ||
1711 | goto out; | ||
1712 | } | ||
1669 | btrfs_release_path(path); | 1713 | btrfs_release_path(path); |
1670 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | 1714 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, |
1671 | name, name_len, log_type, &log_key); | 1715 | name, name_len, log_type, &log_key); |
1672 | if (ret && ret != -ENOENT) | 1716 | if (ret && ret != -ENOENT && ret != -EEXIST) |
1673 | goto out; | 1717 | goto out; |
1674 | update_size = false; | 1718 | update_size = false; |
1675 | ret = 0; | 1719 | ret = 0; |
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
2164 | parent = path->nodes[*level]; | 2208 | parent = path->nodes[*level]; |
2165 | root_owner = btrfs_header_owner(parent); | 2209 | root_owner = btrfs_header_owner(parent); |
2166 | 2210 | ||
2167 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 2211 | next = btrfs_find_create_tree_block(root, bytenr); |
2168 | if (!next) | 2212 | if (!next) |
2169 | return -ENOMEM; | 2213 | return -ENOMEM; |
2170 | 2214 | ||
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
2416 | mutex_unlock(&root->log_mutex); | 2460 | mutex_unlock(&root->log_mutex); |
2417 | if (atomic_read(&root->log_writers)) | 2461 | if (atomic_read(&root->log_writers)) |
2418 | schedule(); | 2462 | schedule(); |
2419 | mutex_lock(&root->log_mutex); | ||
2420 | finish_wait(&root->log_writer_wait, &wait); | 2463 | finish_wait(&root->log_writer_wait, &wait); |
2464 | mutex_lock(&root->log_mutex); | ||
2421 | } | 2465 | } |
2422 | } | 2466 | } |
2423 | 2467 | ||
@@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
3219 | static void fill_inode_item(struct btrfs_trans_handle *trans, | 3263 | static void fill_inode_item(struct btrfs_trans_handle *trans, |
3220 | struct extent_buffer *leaf, | 3264 | struct extent_buffer *leaf, |
3221 | struct btrfs_inode_item *item, | 3265 | struct btrfs_inode_item *item, |
3222 | struct inode *inode, int log_inode_only) | 3266 | struct inode *inode, int log_inode_only, |
3267 | u64 logged_isize) | ||
3223 | { | 3268 | { |
3224 | struct btrfs_map_token token; | 3269 | struct btrfs_map_token token; |
3225 | 3270 | ||
@@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
3232 | * to say 'update this inode with these values' | 3277 | * to say 'update this inode with these values' |
3233 | */ | 3278 | */ |
3234 | btrfs_set_token_inode_generation(leaf, item, 0, &token); | 3279 | btrfs_set_token_inode_generation(leaf, item, 0, &token); |
3235 | btrfs_set_token_inode_size(leaf, item, 0, &token); | 3280 | btrfs_set_token_inode_size(leaf, item, logged_isize, &token); |
3236 | } else { | 3281 | } else { |
3237 | btrfs_set_token_inode_generation(leaf, item, | 3282 | btrfs_set_token_inode_generation(leaf, item, |
3238 | BTRFS_I(inode)->generation, | 3283 | BTRFS_I(inode)->generation, |
@@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
3245 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); | 3290 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); |
3246 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); | 3291 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); |
3247 | 3292 | ||
3248 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), | 3293 | btrfs_set_token_timespec_sec(leaf, &item->atime, |
3249 | inode->i_atime.tv_sec, &token); | 3294 | inode->i_atime.tv_sec, &token); |
3250 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), | 3295 | btrfs_set_token_timespec_nsec(leaf, &item->atime, |
3251 | inode->i_atime.tv_nsec, &token); | 3296 | inode->i_atime.tv_nsec, &token); |
3252 | 3297 | ||
3253 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), | 3298 | btrfs_set_token_timespec_sec(leaf, &item->mtime, |
3254 | inode->i_mtime.tv_sec, &token); | 3299 | inode->i_mtime.tv_sec, &token); |
3255 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), | 3300 | btrfs_set_token_timespec_nsec(leaf, &item->mtime, |
3256 | inode->i_mtime.tv_nsec, &token); | 3301 | inode->i_mtime.tv_nsec, &token); |
3257 | 3302 | ||
3258 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), | 3303 | btrfs_set_token_timespec_sec(leaf, &item->ctime, |
3259 | inode->i_ctime.tv_sec, &token); | 3304 | inode->i_ctime.tv_sec, &token); |
3260 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), | 3305 | btrfs_set_token_timespec_nsec(leaf, &item->ctime, |
3261 | inode->i_ctime.tv_nsec, &token); | 3306 | inode->i_ctime.tv_nsec, &token); |
3262 | 3307 | ||
3263 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), | 3308 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), |
@@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans, | |||
3284 | return ret; | 3329 | return ret; |
3285 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3330 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3286 | struct btrfs_inode_item); | 3331 | struct btrfs_inode_item); |
3287 | fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); | 3332 | fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0); |
3288 | btrfs_release_path(path); | 3333 | btrfs_release_path(path); |
3289 | return 0; | 3334 | return 0; |
3290 | } | 3335 | } |
@@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3293 | struct inode *inode, | 3338 | struct inode *inode, |
3294 | struct btrfs_path *dst_path, | 3339 | struct btrfs_path *dst_path, |
3295 | struct btrfs_path *src_path, u64 *last_extent, | 3340 | struct btrfs_path *src_path, u64 *last_extent, |
3296 | int start_slot, int nr, int inode_only) | 3341 | int start_slot, int nr, int inode_only, |
3342 | u64 logged_isize) | ||
3297 | { | 3343 | { |
3298 | unsigned long src_offset; | 3344 | unsigned long src_offset; |
3299 | unsigned long dst_offset; | 3345 | unsigned long dst_offset; |
@@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3350 | dst_path->slots[0], | 3396 | dst_path->slots[0], |
3351 | struct btrfs_inode_item); | 3397 | struct btrfs_inode_item); |
3352 | fill_inode_item(trans, dst_path->nodes[0], inode_item, | 3398 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
3353 | inode, inode_only == LOG_INODE_EXISTS); | 3399 | inode, inode_only == LOG_INODE_EXISTS, |
3400 | logged_isize); | ||
3354 | } else { | 3401 | } else { |
3355 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3402 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
3356 | src_offset, ins_sizes[i]); | 3403 | src_offset, ins_sizes[i]); |
@@ -3902,6 +3949,33 @@ process: | |||
3902 | return ret; | 3949 | return ret; |
3903 | } | 3950 | } |
3904 | 3951 | ||
3952 | static int logged_inode_size(struct btrfs_root *log, struct inode *inode, | ||
3953 | struct btrfs_path *path, u64 *size_ret) | ||
3954 | { | ||
3955 | struct btrfs_key key; | ||
3956 | int ret; | ||
3957 | |||
3958 | key.objectid = btrfs_ino(inode); | ||
3959 | key.type = BTRFS_INODE_ITEM_KEY; | ||
3960 | key.offset = 0; | ||
3961 | |||
3962 | ret = btrfs_search_slot(NULL, log, &key, path, 0, 0); | ||
3963 | if (ret < 0) { | ||
3964 | return ret; | ||
3965 | } else if (ret > 0) { | ||
3966 | *size_ret = i_size_read(inode); | ||
3967 | } else { | ||
3968 | struct btrfs_inode_item *item; | ||
3969 | |||
3970 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3971 | struct btrfs_inode_item); | ||
3972 | *size_ret = btrfs_inode_size(path->nodes[0], item); | ||
3973 | } | ||
3974 | |||
3975 | btrfs_release_path(path); | ||
3976 | return 0; | ||
3977 | } | ||
3978 | |||
3905 | /* log a single inode in the tree log. | 3979 | /* log a single inode in the tree log. |
3906 | * At least one parent directory for this inode must exist in the tree | 3980 | * At least one parent directory for this inode must exist in the tree |
3907 | * or be logged already. | 3981 | * or be logged already. |
@@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3939 | bool fast_search = false; | 4013 | bool fast_search = false; |
3940 | u64 ino = btrfs_ino(inode); | 4014 | u64 ino = btrfs_ino(inode); |
3941 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 4015 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4016 | u64 logged_isize = 0; | ||
3942 | 4017 | ||
3943 | path = btrfs_alloc_path(); | 4018 | path = btrfs_alloc_path(); |
3944 | if (!path) | 4019 | if (!path) |
@@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3966 | max_key.type = (u8)-1; | 4041 | max_key.type = (u8)-1; |
3967 | max_key.offset = (u64)-1; | 4042 | max_key.offset = (u64)-1; |
3968 | 4043 | ||
3969 | /* Only run delayed items if we are a dir or a new file */ | 4044 | /* |
4045 | * Only run delayed items if we are a dir or a new file. | ||
4046 | * Otherwise commit the delayed inode only, which is needed in | ||
4047 | * order for the log replay code to mark inodes for link count | ||
4048 | * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items). | ||
4049 | */ | ||
3970 | if (S_ISDIR(inode->i_mode) || | 4050 | if (S_ISDIR(inode->i_mode) || |
3971 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { | 4051 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) |
3972 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 4052 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
3973 | if (ret) { | 4053 | else |
3974 | btrfs_free_path(path); | 4054 | ret = btrfs_commit_inode_delayed_inode(inode); |
3975 | btrfs_free_path(dst_path); | 4055 | |
3976 | return ret; | 4056 | if (ret) { |
3977 | } | 4057 | btrfs_free_path(path); |
4058 | btrfs_free_path(dst_path); | ||
4059 | return ret; | ||
3978 | } | 4060 | } |
3979 | 4061 | ||
3980 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 4062 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
@@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3988 | if (S_ISDIR(inode->i_mode)) { | 4070 | if (S_ISDIR(inode->i_mode)) { |
3989 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; | 4071 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; |
3990 | 4072 | ||
3991 | if (inode_only == LOG_INODE_EXISTS) | 4073 | if (inode_only == LOG_INODE_EXISTS) { |
3992 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 4074 | max_key_type = BTRFS_INODE_EXTREF_KEY; |
4075 | max_key.type = max_key_type; | ||
4076 | } | ||
3993 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 4077 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
3994 | } else { | 4078 | } else { |
3995 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 4079 | if (inode_only == LOG_INODE_EXISTS) { |
3996 | &BTRFS_I(inode)->runtime_flags)) { | 4080 | /* |
3997 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | 4081 | * Make sure the new inode item we write to the log has |
3998 | &BTRFS_I(inode)->runtime_flags); | 4082 | * the same isize as the current one (if it exists). |
3999 | ret = btrfs_truncate_inode_items(trans, log, | 4083 | * This is necessary to prevent data loss after log |
4000 | inode, 0, 0); | 4084 | * replay, and also to prevent doing a wrong expanding |
4001 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, | 4085 | * truncate - for e.g. create file, write 4K into offset |
4002 | &BTRFS_I(inode)->runtime_flags) || | 4086 | * 0, fsync, write 4K into offset 4096, add hard link, |
4087 | * fsync some other file (to sync log), power fail - if | ||
4088 | * we use the inode's current i_size, after log replay | ||
4089 | * we get a 8Kb file, with the last 4Kb extent as a hole | ||
4090 | * (zeroes), as if an expanding truncate happened, | ||
4091 | * instead of getting a file of 4Kb only. | ||
4092 | */ | ||
4093 | err = logged_inode_size(log, inode, path, | ||
4094 | &logged_isize); | ||
4095 | if (err) | ||
4096 | goto out_unlock; | ||
4097 | } | ||
4098 | if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
4099 | &BTRFS_I(inode)->runtime_flags)) { | ||
4100 | if (inode_only == LOG_INODE_EXISTS) { | ||
4101 | max_key.type = BTRFS_INODE_EXTREF_KEY; | ||
4102 | ret = drop_objectid_items(trans, log, path, ino, | ||
4103 | max_key.type); | ||
4104 | } else { | ||
4105 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
4106 | &BTRFS_I(inode)->runtime_flags); | ||
4107 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4108 | &BTRFS_I(inode)->runtime_flags); | ||
4109 | ret = btrfs_truncate_inode_items(trans, log, | ||
4110 | inode, 0, 0); | ||
4111 | } | ||
4112 | } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4113 | &BTRFS_I(inode)->runtime_flags) || | ||
4003 | inode_only == LOG_INODE_EXISTS) { | 4114 | inode_only == LOG_INODE_EXISTS) { |
4004 | if (inode_only == LOG_INODE_ALL) | 4115 | if (inode_only == LOG_INODE_ALL) { |
4116 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4117 | &BTRFS_I(inode)->runtime_flags); | ||
4005 | fast_search = true; | 4118 | fast_search = true; |
4006 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 4119 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
4120 | } else { | ||
4121 | max_key.type = BTRFS_INODE_EXTREF_KEY; | ||
4122 | } | ||
4007 | ret = drop_objectid_items(trans, log, path, ino, | 4123 | ret = drop_objectid_items(trans, log, path, ino, |
4008 | max_key.type); | 4124 | max_key.type); |
4009 | } else { | 4125 | } else { |
@@ -4047,7 +4163,8 @@ again: | |||
4047 | } | 4163 | } |
4048 | 4164 | ||
4049 | ret = copy_items(trans, inode, dst_path, path, &last_extent, | 4165 | ret = copy_items(trans, inode, dst_path, path, &last_extent, |
4050 | ins_start_slot, ins_nr, inode_only); | 4166 | ins_start_slot, ins_nr, inode_only, |
4167 | logged_isize); | ||
4051 | if (ret < 0) { | 4168 | if (ret < 0) { |
4052 | err = ret; | 4169 | err = ret; |
4053 | goto out_unlock; | 4170 | goto out_unlock; |
@@ -4071,7 +4188,7 @@ next_slot: | |||
4071 | if (ins_nr) { | 4188 | if (ins_nr) { |
4072 | ret = copy_items(trans, inode, dst_path, path, | 4189 | ret = copy_items(trans, inode, dst_path, path, |
4073 | &last_extent, ins_start_slot, | 4190 | &last_extent, ins_start_slot, |
4074 | ins_nr, inode_only); | 4191 | ins_nr, inode_only, logged_isize); |
4075 | if (ret < 0) { | 4192 | if (ret < 0) { |
4076 | err = ret; | 4193 | err = ret; |
4077 | goto out_unlock; | 4194 | goto out_unlock; |
@@ -4092,7 +4209,8 @@ next_slot: | |||
4092 | } | 4209 | } |
4093 | if (ins_nr) { | 4210 | if (ins_nr) { |
4094 | ret = copy_items(trans, inode, dst_path, path, &last_extent, | 4211 | ret = copy_items(trans, inode, dst_path, path, &last_extent, |
4095 | ins_start_slot, ins_nr, inode_only); | 4212 | ins_start_slot, ins_nr, inode_only, |
4213 | logged_isize); | ||
4096 | if (ret < 0) { | 4214 | if (ret < 0) { |
4097 | err = ret; | 4215 | err = ret; |
4098 | goto out_unlock; | 4216 | goto out_unlock; |
@@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4273 | struct dentry *old_parent = NULL; | 4391 | struct dentry *old_parent = NULL; |
4274 | int ret = 0; | 4392 | int ret = 0; |
4275 | u64 last_committed = root->fs_info->last_trans_committed; | 4393 | u64 last_committed = root->fs_info->last_trans_committed; |
4394 | const struct dentry * const first_parent = parent; | ||
4395 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > | ||
4396 | last_committed); | ||
4276 | 4397 | ||
4277 | sb = inode->i_sb; | 4398 | sb = inode->i_sb; |
4278 | 4399 | ||
@@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4328 | goto end_trans; | 4449 | goto end_trans; |
4329 | } | 4450 | } |
4330 | 4451 | ||
4331 | inode_only = LOG_INODE_EXISTS; | ||
4332 | while (1) { | 4452 | while (1) { |
4333 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 4453 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
4334 | break; | 4454 | break; |
@@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4337 | if (root != BTRFS_I(inode)->root) | 4457 | if (root != BTRFS_I(inode)->root) |
4338 | break; | 4458 | break; |
4339 | 4459 | ||
4460 | /* | ||
4461 | * On unlink we must make sure our immediate parent directory | ||
4462 | * inode is fully logged. This is to prevent leaving dangling | ||
4463 | * directory index entries and a wrong directory inode's i_size. | ||
4464 | * Not doing so can result in a directory being impossible to | ||
4465 | * delete after log replay (rmdir will always fail with error | ||
4466 | * -ENOTEMPTY). | ||
4467 | */ | ||
4468 | if (did_unlink && parent == first_parent) | ||
4469 | inode_only = LOG_INODE_ALL; | ||
4470 | else | ||
4471 | inode_only = LOG_INODE_EXISTS; | ||
4472 | |||
4340 | if (BTRFS_I(inode)->generation > | 4473 | if (BTRFS_I(inode)->generation > |
4341 | root->fs_info->last_trans_committed) { | 4474 | root->fs_info->last_trans_committed || |
4475 | inode_only == LOG_INODE_ALL) { | ||
4342 | ret = btrfs_log_inode(trans, root, inode, inode_only, | 4476 | ret = btrfs_log_inode(trans, root, inode, inode_only, |
4343 | 0, LLONG_MAX, ctx); | 4477 | 0, LLONG_MAX, ctx); |
4344 | if (ret) | 4478 | if (ret) |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 50c5a8762aed..cd4d1315aaa9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1310,6 +1310,8 @@ again: | |||
1310 | if (ret) { | 1310 | if (ret) { |
1311 | btrfs_error(root->fs_info, ret, | 1311 | btrfs_error(root->fs_info, ret, |
1312 | "Failed to remove dev extent item"); | 1312 | "Failed to remove dev extent item"); |
1313 | } else { | ||
1314 | trans->transaction->have_free_bgs = 1; | ||
1313 | } | 1315 | } |
1314 | out: | 1316 | out: |
1315 | btrfs_free_path(path); | 1317 | btrfs_free_path(path); |
@@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) | |||
4196 | 4198 | ||
4197 | static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) | 4199 | static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) |
4198 | { | 4200 | { |
4199 | if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) | 4201 | if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) |
4200 | return; | 4202 | return; |
4201 | 4203 | ||
4202 | btrfs_set_fs_incompat(info, RAID56); | 4204 | btrfs_set_fs_incompat(info, RAID56); |
@@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, | |||
4803 | 4805 | ||
4804 | BUG_ON(em->start > logical || em->start + em->len < logical); | 4806 | BUG_ON(em->start > logical || em->start + em->len < logical); |
4805 | map = (struct map_lookup *)em->bdev; | 4807 | map = (struct map_lookup *)em->bdev; |
4806 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 4808 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) |
4807 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
4808 | len = map->stripe_len * nr_data_stripes(map); | 4809 | len = map->stripe_len * nr_data_stripes(map); |
4809 | } | ||
4810 | free_extent_map(em); | 4810 | free_extent_map(em); |
4811 | return len; | 4811 | return len; |
4812 | } | 4812 | } |
@@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, | |||
4826 | 4826 | ||
4827 | BUG_ON(em->start > logical || em->start + em->len < logical); | 4827 | BUG_ON(em->start > logical || em->start + em->len < logical); |
4828 | map = (struct map_lookup *)em->bdev; | 4828 | map = (struct map_lookup *)em->bdev; |
4829 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 4829 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) |
4830 | BTRFS_BLOCK_GROUP_RAID6)) | ||
4831 | ret = 1; | 4830 | ret = 1; |
4832 | free_extent_map(em); | 4831 | free_extent_map(em); |
4833 | return ret; | 4832 | return ret; |
@@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b) | |||
4876 | } | 4875 | } |
4877 | 4876 | ||
4878 | /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ | 4877 | /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ |
4879 | static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) | 4878 | static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) |
4880 | { | 4879 | { |
4881 | struct btrfs_bio_stripe s; | 4880 | struct btrfs_bio_stripe s; |
4882 | int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; | ||
4883 | int i; | 4881 | int i; |
4884 | u64 l; | 4882 | u64 l; |
4885 | int again = 1; | 4883 | int again = 1; |
4886 | int m; | ||
4887 | 4884 | ||
4888 | while (again) { | 4885 | while (again) { |
4889 | again = 0; | 4886 | again = 0; |
4890 | for (i = 0; i < real_stripes - 1; i++) { | 4887 | for (i = 0; i < num_stripes - 1; i++) { |
4891 | if (parity_smaller(raid_map[i], raid_map[i+1])) { | 4888 | if (parity_smaller(bbio->raid_map[i], |
4889 | bbio->raid_map[i+1])) { | ||
4892 | s = bbio->stripes[i]; | 4890 | s = bbio->stripes[i]; |
4893 | l = raid_map[i]; | 4891 | l = bbio->raid_map[i]; |
4894 | bbio->stripes[i] = bbio->stripes[i+1]; | 4892 | bbio->stripes[i] = bbio->stripes[i+1]; |
4895 | raid_map[i] = raid_map[i+1]; | 4893 | bbio->raid_map[i] = bbio->raid_map[i+1]; |
4896 | bbio->stripes[i+1] = s; | 4894 | bbio->stripes[i+1] = s; |
4897 | raid_map[i+1] = l; | 4895 | bbio->raid_map[i+1] = l; |
4898 | |||
4899 | if (bbio->tgtdev_map) { | ||
4900 | m = bbio->tgtdev_map[i]; | ||
4901 | bbio->tgtdev_map[i] = | ||
4902 | bbio->tgtdev_map[i + 1]; | ||
4903 | bbio->tgtdev_map[i + 1] = m; | ||
4904 | } | ||
4905 | 4896 | ||
4906 | again = 1; | 4897 | again = 1; |
4907 | } | 4898 | } |
@@ -4909,10 +4900,41 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) | |||
4909 | } | 4900 | } |
4910 | } | 4901 | } |
4911 | 4902 | ||
4903 | static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) | ||
4904 | { | ||
4905 | struct btrfs_bio *bbio = kzalloc( | ||
4906 | sizeof(struct btrfs_bio) + | ||
4907 | sizeof(struct btrfs_bio_stripe) * (total_stripes) + | ||
4908 | sizeof(int) * (real_stripes) + | ||
4909 | sizeof(u64) * (real_stripes), | ||
4910 | GFP_NOFS); | ||
4911 | if (!bbio) | ||
4912 | return NULL; | ||
4913 | |||
4914 | atomic_set(&bbio->error, 0); | ||
4915 | atomic_set(&bbio->refs, 1); | ||
4916 | |||
4917 | return bbio; | ||
4918 | } | ||
4919 | |||
4920 | void btrfs_get_bbio(struct btrfs_bio *bbio) | ||
4921 | { | ||
4922 | WARN_ON(!atomic_read(&bbio->refs)); | ||
4923 | atomic_inc(&bbio->refs); | ||
4924 | } | ||
4925 | |||
4926 | void btrfs_put_bbio(struct btrfs_bio *bbio) | ||
4927 | { | ||
4928 | if (!bbio) | ||
4929 | return; | ||
4930 | if (atomic_dec_and_test(&bbio->refs)) | ||
4931 | kfree(bbio); | ||
4932 | } | ||
4933 | |||
4912 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | 4934 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
4913 | u64 logical, u64 *length, | 4935 | u64 logical, u64 *length, |
4914 | struct btrfs_bio **bbio_ret, | 4936 | struct btrfs_bio **bbio_ret, |
4915 | int mirror_num, u64 **raid_map_ret) | 4937 | int mirror_num, int need_raid_map) |
4916 | { | 4938 | { |
4917 | struct extent_map *em; | 4939 | struct extent_map *em; |
4918 | struct map_lookup *map; | 4940 | struct map_lookup *map; |
@@ -4925,7 +4947,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4925 | u64 stripe_nr_orig; | 4947 | u64 stripe_nr_orig; |
4926 | u64 stripe_nr_end; | 4948 | u64 stripe_nr_end; |
4927 | u64 stripe_len; | 4949 | u64 stripe_len; |
4928 | u64 *raid_map = NULL; | ||
4929 | int stripe_index; | 4950 | int stripe_index; |
4930 | int i; | 4951 | int i; |
4931 | int ret = 0; | 4952 | int ret = 0; |
@@ -4976,7 +4997,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4976 | stripe_offset = offset - stripe_offset; | 4997 | stripe_offset = offset - stripe_offset; |
4977 | 4998 | ||
4978 | /* if we're here for raid56, we need to know the stripe aligned start */ | 4999 | /* if we're here for raid56, we need to know the stripe aligned start */ |
4979 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { | 5000 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
4980 | unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); | 5001 | unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); |
4981 | raid56_full_stripe_start = offset; | 5002 | raid56_full_stripe_start = offset; |
4982 | 5003 | ||
@@ -4989,8 +5010,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4989 | 5010 | ||
4990 | if (rw & REQ_DISCARD) { | 5011 | if (rw & REQ_DISCARD) { |
4991 | /* we don't discard raid56 yet */ | 5012 | /* we don't discard raid56 yet */ |
4992 | if (map->type & | 5013 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
4993 | (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
4994 | ret = -EOPNOTSUPP; | 5014 | ret = -EOPNOTSUPP; |
4995 | goto out; | 5015 | goto out; |
4996 | } | 5016 | } |
@@ -5000,7 +5020,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5000 | /* For writes to RAID[56], allow a full stripeset across all disks. | 5020 | /* For writes to RAID[56], allow a full stripeset across all disks. |
5001 | For other RAID types and for RAID[56] reads, just allow a single | 5021 | For other RAID types and for RAID[56] reads, just allow a single |
5002 | stripe (on a single disk). */ | 5022 | stripe (on a single disk). */ |
5003 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) && | 5023 | if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && |
5004 | (rw & REQ_WRITE)) { | 5024 | (rw & REQ_WRITE)) { |
5005 | max_len = stripe_len * nr_data_stripes(map) - | 5025 | max_len = stripe_len * nr_data_stripes(map) - |
5006 | (offset - raid56_full_stripe_start); | 5026 | (offset - raid56_full_stripe_start); |
@@ -5047,7 +5067,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5047 | u64 physical_of_found = 0; | 5067 | u64 physical_of_found = 0; |
5048 | 5068 | ||
5049 | ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, | 5069 | ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, |
5050 | logical, &tmp_length, &tmp_bbio, 0, NULL); | 5070 | logical, &tmp_length, &tmp_bbio, 0, 0); |
5051 | if (ret) { | 5071 | if (ret) { |
5052 | WARN_ON(tmp_bbio != NULL); | 5072 | WARN_ON(tmp_bbio != NULL); |
5053 | goto out; | 5073 | goto out; |
@@ -5061,7 +5081,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5061 | * is not left of the left cursor | 5081 | * is not left of the left cursor |
5062 | */ | 5082 | */ |
5063 | ret = -EIO; | 5083 | ret = -EIO; |
5064 | kfree(tmp_bbio); | 5084 | btrfs_put_bbio(tmp_bbio); |
5065 | goto out; | 5085 | goto out; |
5066 | } | 5086 | } |
5067 | 5087 | ||
@@ -5096,11 +5116,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5096 | } else { | 5116 | } else { |
5097 | WARN_ON(1); | 5117 | WARN_ON(1); |
5098 | ret = -EIO; | 5118 | ret = -EIO; |
5099 | kfree(tmp_bbio); | 5119 | btrfs_put_bbio(tmp_bbio); |
5100 | goto out; | 5120 | goto out; |
5101 | } | 5121 | } |
5102 | 5122 | ||
5103 | kfree(tmp_bbio); | 5123 | btrfs_put_bbio(tmp_bbio); |
5104 | } else if (mirror_num > map->num_stripes) { | 5124 | } else if (mirror_num > map->num_stripes) { |
5105 | mirror_num = 0; | 5125 | mirror_num = 0; |
5106 | } | 5126 | } |
@@ -5166,15 +5186,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5166 | mirror_num = stripe_index - old_stripe_index + 1; | 5186 | mirror_num = stripe_index - old_stripe_index + 1; |
5167 | } | 5187 | } |
5168 | 5188 | ||
5169 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 5189 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
5170 | BTRFS_BLOCK_GROUP_RAID6)) { | 5190 | if (need_raid_map && |
5171 | u64 tmp; | ||
5172 | |||
5173 | if (raid_map_ret && | ||
5174 | ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || | 5191 | ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || |
5175 | mirror_num > 1)) { | 5192 | mirror_num > 1)) { |
5176 | int i, rot; | ||
5177 | |||
5178 | /* push stripe_nr back to the start of the full stripe */ | 5193 | /* push stripe_nr back to the start of the full stripe */ |
5179 | stripe_nr = raid56_full_stripe_start; | 5194 | stripe_nr = raid56_full_stripe_start; |
5180 | do_div(stripe_nr, stripe_len * nr_data_stripes(map)); | 5195 | do_div(stripe_nr, stripe_len * nr_data_stripes(map)); |
@@ -5183,32 +5198,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5183 | num_stripes = map->num_stripes; | 5198 | num_stripes = map->num_stripes; |
5184 | max_errors = nr_parity_stripes(map); | 5199 | max_errors = nr_parity_stripes(map); |
5185 | 5200 | ||
5186 | raid_map = kmalloc_array(num_stripes, sizeof(u64), | ||
5187 | GFP_NOFS); | ||
5188 | if (!raid_map) { | ||
5189 | ret = -ENOMEM; | ||
5190 | goto out; | ||
5191 | } | ||
5192 | |||
5193 | /* Work out the disk rotation on this stripe-set */ | ||
5194 | tmp = stripe_nr; | ||
5195 | rot = do_div(tmp, num_stripes); | ||
5196 | |||
5197 | /* Fill in the logical address of each stripe */ | ||
5198 | tmp = stripe_nr * nr_data_stripes(map); | ||
5199 | for (i = 0; i < nr_data_stripes(map); i++) | ||
5200 | raid_map[(i+rot) % num_stripes] = | ||
5201 | em->start + (tmp + i) * map->stripe_len; | ||
5202 | |||
5203 | raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE; | ||
5204 | if (map->type & BTRFS_BLOCK_GROUP_RAID6) | ||
5205 | raid_map[(i+rot+1) % num_stripes] = | ||
5206 | RAID6_Q_STRIPE; | ||
5207 | |||
5208 | *length = map->stripe_len; | 5201 | *length = map->stripe_len; |
5209 | stripe_index = 0; | 5202 | stripe_index = 0; |
5210 | stripe_offset = 0; | 5203 | stripe_offset = 0; |
5211 | } else { | 5204 | } else { |
5205 | u64 tmp; | ||
5206 | |||
5212 | /* | 5207 | /* |
5213 | * Mirror #0 or #1 means the original data block. | 5208 | * Mirror #0 or #1 means the original data block. |
5214 | * Mirror #2 is RAID5 parity block. | 5209 | * Mirror #2 is RAID5 parity block. |
@@ -5246,17 +5241,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5246 | tgtdev_indexes = num_stripes; | 5241 | tgtdev_indexes = num_stripes; |
5247 | } | 5242 | } |
5248 | 5243 | ||
5249 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), | 5244 | bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes); |
5250 | GFP_NOFS); | ||
5251 | if (!bbio) { | 5245 | if (!bbio) { |
5252 | kfree(raid_map); | ||
5253 | ret = -ENOMEM; | 5246 | ret = -ENOMEM; |
5254 | goto out; | 5247 | goto out; |
5255 | } | 5248 | } |
5256 | atomic_set(&bbio->error, 0); | ||
5257 | if (dev_replace_is_ongoing) | 5249 | if (dev_replace_is_ongoing) |
5258 | bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); | 5250 | bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); |
5259 | 5251 | ||
5252 | /* build raid_map */ | ||
5253 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && | ||
5254 | need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || | ||
5255 | mirror_num > 1)) { | ||
5256 | u64 tmp; | ||
5257 | int i, rot; | ||
5258 | |||
5259 | bbio->raid_map = (u64 *)((void *)bbio->stripes + | ||
5260 | sizeof(struct btrfs_bio_stripe) * | ||
5261 | num_alloc_stripes + | ||
5262 | sizeof(int) * tgtdev_indexes); | ||
5263 | |||
5264 | /* Work out the disk rotation on this stripe-set */ | ||
5265 | tmp = stripe_nr; | ||
5266 | rot = do_div(tmp, num_stripes); | ||
5267 | |||
5268 | /* Fill in the logical address of each stripe */ | ||
5269 | tmp = stripe_nr * nr_data_stripes(map); | ||
5270 | for (i = 0; i < nr_data_stripes(map); i++) | ||
5271 | bbio->raid_map[(i+rot) % num_stripes] = | ||
5272 | em->start + (tmp + i) * map->stripe_len; | ||
5273 | |||
5274 | bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE; | ||
5275 | if (map->type & BTRFS_BLOCK_GROUP_RAID6) | ||
5276 | bbio->raid_map[(i+rot+1) % num_stripes] = | ||
5277 | RAID6_Q_STRIPE; | ||
5278 | } | ||
5279 | |||
5260 | if (rw & REQ_DISCARD) { | 5280 | if (rw & REQ_DISCARD) { |
5261 | int factor = 0; | 5281 | int factor = 0; |
5262 | int sub_stripes = 0; | 5282 | int sub_stripes = 0; |
@@ -5340,6 +5360,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5340 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) | 5360 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
5341 | max_errors = btrfs_chunk_max_errors(map); | 5361 | max_errors = btrfs_chunk_max_errors(map); |
5342 | 5362 | ||
5363 | if (bbio->raid_map) | ||
5364 | sort_parity_stripes(bbio, num_stripes); | ||
5365 | |||
5343 | tgtdev_indexes = 0; | 5366 | tgtdev_indexes = 0; |
5344 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | 5367 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && |
5345 | dev_replace->tgtdev != NULL) { | 5368 | dev_replace->tgtdev != NULL) { |
@@ -5427,6 +5450,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5427 | } | 5450 | } |
5428 | 5451 | ||
5429 | *bbio_ret = bbio; | 5452 | *bbio_ret = bbio; |
5453 | bbio->map_type = map->type; | ||
5430 | bbio->num_stripes = num_stripes; | 5454 | bbio->num_stripes = num_stripes; |
5431 | bbio->max_errors = max_errors; | 5455 | bbio->max_errors = max_errors; |
5432 | bbio->mirror_num = mirror_num; | 5456 | bbio->mirror_num = mirror_num; |
@@ -5443,10 +5467,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5443 | bbio->stripes[0].physical = physical_to_patch_in_first_stripe; | 5467 | bbio->stripes[0].physical = physical_to_patch_in_first_stripe; |
5444 | bbio->mirror_num = map->num_stripes + 1; | 5468 | bbio->mirror_num = map->num_stripes + 1; |
5445 | } | 5469 | } |
5446 | if (raid_map) { | ||
5447 | sort_parity_stripes(bbio, raid_map); | ||
5448 | *raid_map_ret = raid_map; | ||
5449 | } | ||
5450 | out: | 5470 | out: |
5451 | if (dev_replace_is_ongoing) | 5471 | if (dev_replace_is_ongoing) |
5452 | btrfs_dev_replace_unlock(dev_replace); | 5472 | btrfs_dev_replace_unlock(dev_replace); |
@@ -5459,17 +5479,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5459 | struct btrfs_bio **bbio_ret, int mirror_num) | 5479 | struct btrfs_bio **bbio_ret, int mirror_num) |
5460 | { | 5480 | { |
5461 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, | 5481 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, |
5462 | mirror_num, NULL); | 5482 | mirror_num, 0); |
5463 | } | 5483 | } |
5464 | 5484 | ||
5465 | /* For Scrub/replace */ | 5485 | /* For Scrub/replace */ |
5466 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, | 5486 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, |
5467 | u64 logical, u64 *length, | 5487 | u64 logical, u64 *length, |
5468 | struct btrfs_bio **bbio_ret, int mirror_num, | 5488 | struct btrfs_bio **bbio_ret, int mirror_num, |
5469 | u64 **raid_map_ret) | 5489 | int need_raid_map) |
5470 | { | 5490 | { |
5471 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, | 5491 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, |
5472 | mirror_num, raid_map_ret); | 5492 | mirror_num, need_raid_map); |
5473 | } | 5493 | } |
5474 | 5494 | ||
5475 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 5495 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
@@ -5511,8 +5531,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
5511 | do_div(length, map->num_stripes / map->sub_stripes); | 5531 | do_div(length, map->num_stripes / map->sub_stripes); |
5512 | else if (map->type & BTRFS_BLOCK_GROUP_RAID0) | 5532 | else if (map->type & BTRFS_BLOCK_GROUP_RAID0) |
5513 | do_div(length, map->num_stripes); | 5533 | do_div(length, map->num_stripes); |
5514 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 5534 | else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
5515 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
5516 | do_div(length, nr_data_stripes(map)); | 5535 | do_div(length, nr_data_stripes(map)); |
5517 | rmap_len = map->stripe_len * nr_data_stripes(map); | 5536 | rmap_len = map->stripe_len * nr_data_stripes(map); |
5518 | } | 5537 | } |
@@ -5565,7 +5584,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e | |||
5565 | bio_endio_nodec(bio, err); | 5584 | bio_endio_nodec(bio, err); |
5566 | else | 5585 | else |
5567 | bio_endio(bio, err); | 5586 | bio_endio(bio, err); |
5568 | kfree(bbio); | 5587 | btrfs_put_bbio(bbio); |
5569 | } | 5588 | } |
5570 | 5589 | ||
5571 | static void btrfs_end_bio(struct bio *bio, int err) | 5590 | static void btrfs_end_bio(struct bio *bio, int err) |
@@ -5808,7 +5827,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5808 | u64 logical = (u64)bio->bi_iter.bi_sector << 9; | 5827 | u64 logical = (u64)bio->bi_iter.bi_sector << 9; |
5809 | u64 length = 0; | 5828 | u64 length = 0; |
5810 | u64 map_length; | 5829 | u64 map_length; |
5811 | u64 *raid_map = NULL; | ||
5812 | int ret; | 5830 | int ret; |
5813 | int dev_nr = 0; | 5831 | int dev_nr = 0; |
5814 | int total_devs = 1; | 5832 | int total_devs = 1; |
@@ -5819,7 +5837,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5819 | 5837 | ||
5820 | btrfs_bio_counter_inc_blocked(root->fs_info); | 5838 | btrfs_bio_counter_inc_blocked(root->fs_info); |
5821 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, | 5839 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
5822 | mirror_num, &raid_map); | 5840 | mirror_num, 1); |
5823 | if (ret) { | 5841 | if (ret) { |
5824 | btrfs_bio_counter_dec(root->fs_info); | 5842 | btrfs_bio_counter_dec(root->fs_info); |
5825 | return ret; | 5843 | return ret; |
@@ -5832,15 +5850,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5832 | bbio->fs_info = root->fs_info; | 5850 | bbio->fs_info = root->fs_info; |
5833 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 5851 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
5834 | 5852 | ||
5835 | if (raid_map) { | 5853 | if (bbio->raid_map) { |
5836 | /* In this case, map_length has been set to the length of | 5854 | /* In this case, map_length has been set to the length of |
5837 | a single stripe; not the whole write */ | 5855 | a single stripe; not the whole write */ |
5838 | if (rw & WRITE) { | 5856 | if (rw & WRITE) { |
5839 | ret = raid56_parity_write(root, bio, bbio, | 5857 | ret = raid56_parity_write(root, bio, bbio, map_length); |
5840 | raid_map, map_length); | ||
5841 | } else { | 5858 | } else { |
5842 | ret = raid56_parity_recover(root, bio, bbio, | 5859 | ret = raid56_parity_recover(root, bio, bbio, map_length, |
5843 | raid_map, map_length, | ||
5844 | mirror_num, 1); | 5860 | mirror_num, 1); |
5845 | } | 5861 | } |
5846 | 5862 | ||
@@ -6238,17 +6254,22 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
6238 | struct extent_buffer *sb; | 6254 | struct extent_buffer *sb; |
6239 | struct btrfs_disk_key *disk_key; | 6255 | struct btrfs_disk_key *disk_key; |
6240 | struct btrfs_chunk *chunk; | 6256 | struct btrfs_chunk *chunk; |
6241 | u8 *ptr; | 6257 | u8 *array_ptr; |
6242 | unsigned long sb_ptr; | 6258 | unsigned long sb_array_offset; |
6243 | int ret = 0; | 6259 | int ret = 0; |
6244 | u32 num_stripes; | 6260 | u32 num_stripes; |
6245 | u32 array_size; | 6261 | u32 array_size; |
6246 | u32 len = 0; | 6262 | u32 len = 0; |
6247 | u32 cur; | 6263 | u32 cur_offset; |
6248 | struct btrfs_key key; | 6264 | struct btrfs_key key; |
6249 | 6265 | ||
6250 | sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, | 6266 | ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); |
6251 | BTRFS_SUPER_INFO_SIZE); | 6267 | /* |
6268 | * This will create extent buffer of nodesize, superblock size is | ||
6269 | * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will | ||
6270 | * overallocate but we can keep it as-is, only the first page is used. | ||
6271 | */ | ||
6272 | sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); | ||
6252 | if (!sb) | 6273 | if (!sb) |
6253 | return -ENOMEM; | 6274 | return -ENOMEM; |
6254 | btrfs_set_buffer_uptodate(sb); | 6275 | btrfs_set_buffer_uptodate(sb); |
@@ -6271,35 +6292,56 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
6271 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | 6292 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); |
6272 | array_size = btrfs_super_sys_array_size(super_copy); | 6293 | array_size = btrfs_super_sys_array_size(super_copy); |
6273 | 6294 | ||
6274 | ptr = super_copy->sys_chunk_array; | 6295 | array_ptr = super_copy->sys_chunk_array; |
6275 | sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); | 6296 | sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array); |
6276 | cur = 0; | 6297 | cur_offset = 0; |
6298 | |||
6299 | while (cur_offset < array_size) { | ||
6300 | disk_key = (struct btrfs_disk_key *)array_ptr; | ||
6301 | len = sizeof(*disk_key); | ||
6302 | if (cur_offset + len > array_size) | ||
6303 | goto out_short_read; | ||
6277 | 6304 | ||
6278 | while (cur < array_size) { | ||
6279 | disk_key = (struct btrfs_disk_key *)ptr; | ||
6280 | btrfs_disk_key_to_cpu(&key, disk_key); | 6305 | btrfs_disk_key_to_cpu(&key, disk_key); |
6281 | 6306 | ||
6282 | len = sizeof(*disk_key); ptr += len; | 6307 | array_ptr += len; |
6283 | sb_ptr += len; | 6308 | sb_array_offset += len; |
6284 | cur += len; | 6309 | cur_offset += len; |
6285 | 6310 | ||
6286 | if (key.type == BTRFS_CHUNK_ITEM_KEY) { | 6311 | if (key.type == BTRFS_CHUNK_ITEM_KEY) { |
6287 | chunk = (struct btrfs_chunk *)sb_ptr; | 6312 | chunk = (struct btrfs_chunk *)sb_array_offset; |
6313 | /* | ||
6314 | * At least one btrfs_chunk with one stripe must be | ||
6315 | * present, exact stripe count check comes afterwards | ||
6316 | */ | ||
6317 | len = btrfs_chunk_item_size(1); | ||
6318 | if (cur_offset + len > array_size) | ||
6319 | goto out_short_read; | ||
6320 | |||
6321 | num_stripes = btrfs_chunk_num_stripes(sb, chunk); | ||
6322 | len = btrfs_chunk_item_size(num_stripes); | ||
6323 | if (cur_offset + len > array_size) | ||
6324 | goto out_short_read; | ||
6325 | |||
6288 | ret = read_one_chunk(root, &key, sb, chunk); | 6326 | ret = read_one_chunk(root, &key, sb, chunk); |
6289 | if (ret) | 6327 | if (ret) |
6290 | break; | 6328 | break; |
6291 | num_stripes = btrfs_chunk_num_stripes(sb, chunk); | ||
6292 | len = btrfs_chunk_item_size(num_stripes); | ||
6293 | } else { | 6329 | } else { |
6294 | ret = -EIO; | 6330 | ret = -EIO; |
6295 | break; | 6331 | break; |
6296 | } | 6332 | } |
6297 | ptr += len; | 6333 | array_ptr += len; |
6298 | sb_ptr += len; | 6334 | sb_array_offset += len; |
6299 | cur += len; | 6335 | cur_offset += len; |
6300 | } | 6336 | } |
6301 | free_extent_buffer(sb); | 6337 | free_extent_buffer(sb); |
6302 | return ret; | 6338 | return ret; |
6339 | |||
6340 | out_short_read: | ||
6341 | printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n", | ||
6342 | len, cur_offset); | ||
6343 | free_extent_buffer(sb); | ||
6344 | return -EIO; | ||
6303 | } | 6345 | } |
6304 | 6346 | ||
6305 | int btrfs_read_chunk_tree(struct btrfs_root *root) | 6347 | int btrfs_read_chunk_tree(struct btrfs_root *root) |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d6fe73c0f4a2..83069dec6898 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -295,8 +295,10 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | |||
295 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) | 295 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) |
296 | 296 | ||
297 | struct btrfs_bio { | 297 | struct btrfs_bio { |
298 | atomic_t refs; | ||
298 | atomic_t stripes_pending; | 299 | atomic_t stripes_pending; |
299 | struct btrfs_fs_info *fs_info; | 300 | struct btrfs_fs_info *fs_info; |
301 | u64 map_type; /* get from map_lookup->type */ | ||
300 | bio_end_io_t *end_io; | 302 | bio_end_io_t *end_io; |
301 | struct bio *orig_bio; | 303 | struct bio *orig_bio; |
302 | unsigned long flags; | 304 | unsigned long flags; |
@@ -307,6 +309,12 @@ struct btrfs_bio { | |||
307 | int mirror_num; | 309 | int mirror_num; |
308 | int num_tgtdevs; | 310 | int num_tgtdevs; |
309 | int *tgtdev_map; | 311 | int *tgtdev_map; |
312 | /* | ||
313 | * logical block numbers for the start of each stripe | ||
314 | * The last one or two are p/q. These are sorted, | ||
315 | * so raid_map[0] is the start of our full stripe | ||
316 | */ | ||
317 | u64 *raid_map; | ||
310 | struct btrfs_bio_stripe stripes[]; | 318 | struct btrfs_bio_stripe stripes[]; |
311 | }; | 319 | }; |
312 | 320 | ||
@@ -388,19 +396,15 @@ struct btrfs_balance_control { | |||
388 | 396 | ||
389 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | 397 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, |
390 | u64 end, u64 *length); | 398 | u64 end, u64 *length); |
391 | 399 | void btrfs_get_bbio(struct btrfs_bio *bbio); | |
392 | #define btrfs_bio_size(total_stripes, real_stripes) \ | 400 | void btrfs_put_bbio(struct btrfs_bio *bbio); |
393 | (sizeof(struct btrfs_bio) + \ | ||
394 | (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \ | ||
395 | (sizeof(int) * (real_stripes))) | ||
396 | |||
397 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | 401 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
398 | u64 logical, u64 *length, | 402 | u64 logical, u64 *length, |
399 | struct btrfs_bio **bbio_ret, int mirror_num); | 403 | struct btrfs_bio **bbio_ret, int mirror_num); |
400 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, | 404 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, |
401 | u64 logical, u64 *length, | 405 | u64 logical, u64 *length, |
402 | struct btrfs_bio **bbio_ret, int mirror_num, | 406 | struct btrfs_bio **bbio_ret, int mirror_num, |
403 | u64 **raid_map_ret); | 407 | int need_raid_map); |
404 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 408 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
405 | u64 chunk_start, u64 physical, u64 devid, | 409 | u64 chunk_start, u64 physical, u64 devid, |
406 | u64 **logical, int *naddrs, int *stripe_len); | 410 | u64 **logical, int *naddrs, int *stripe_len); |
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 611e1c5893b4..b6dec05c7196 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h | |||
@@ -495,8 +495,7 @@ struct btrfs_ioctl_send_args { | |||
495 | 495 | ||
496 | /* Error codes as returned by the kernel */ | 496 | /* Error codes as returned by the kernel */ |
497 | enum btrfs_err_code { | 497 | enum btrfs_err_code { |
498 | notused, | 498 | BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, |
499 | BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET, | ||
500 | BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, | 499 | BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, |
501 | BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, | 500 | BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, |
502 | BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, | 501 | BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, |