aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-13 19:30:29 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-13 19:30:29 -0500
commite75cdf9898132f521df98a3ce1c280a2f85d360a (patch)
tree4f6a24cf7ab56b962cc8bead17895521b5985dc6
parentca4ba96e02e932a0c9997a40fd51253b5b2d0f9d (diff)
parentd5f2e33b92b10b8007be50b570a27e9bacfb4c3a (diff)
Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes and cleanups from Chris Mason: "Some of this got cherry-picked from a github repo this week, but I verified the patches. We have three small scrub cleanups and a collection of fixes" * 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: btrfs: Use fs_info directly in btrfs_delete_unused_bgs btrfs: Fix lost-data-profile caused by balance bg btrfs: Fix lost-data-profile caused by auto removing bg btrfs: Remove len argument from scrub_find_csum btrfs: Reduce unnecessary arguments in scrub_recheck_block btrfs: Use scrub_checksum_data and scrub_checksum_tree_block for scrub_recheck_block_checksum btrfs: Reset sblock->xxx_error stats before calling scrub_recheck_block_checksum btrfs: scrub: setup all fields for sblock_to_check btrfs: scrub: set error stats when tree block spanning stripes Btrfs: fix race when listing an inode's xattrs Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow Btrfs: fix race leading to incorrect item deletion when dropping extents Btrfs: fix sleeping inside atomic context in qgroup rescan worker Btrfs: fix race waiting for qgroup rescan worker btrfs: qgroup: exit the rescan worker during umount Btrfs: fix extent accounting for partial direct IO writes
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/file.c16
-rw-r--r--fs/btrfs/inode.c62
-rw-r--r--fs/btrfs/qgroup.c13
-rw-r--r--fs/btrfs/scrub.c179
-rw-r--r--fs/btrfs/volumes.c21
-rw-r--r--fs/btrfs/xattr.c4
8 files changed, 163 insertions, 146 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 640598c0d0e7..974be09e7556 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root)
3780 fs_info->closing = 1; 3780 fs_info->closing = 1;
3781 smp_mb(); 3781 smp_mb();
3782 3782
3783 /* wait for the qgroup rescan worker to stop */
3784 btrfs_qgroup_wait_for_completion(fs_info);
3785
3783 /* wait for the uuid_scan task to finish */ 3786 /* wait for the uuid_scan task to finish */
3784 down(&fs_info->uuid_tree_rescan_sem); 3787 down(&fs_info->uuid_tree_rescan_sem);
3785 /* avoid complains from lockdep et al., set sem back to initial state */ 3788 /* avoid complains from lockdep et al., set sem back to initial state */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 99a8e57da8a1..acf3ed11cfb6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10279,22 +10279,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10279 block_group = list_first_entry(&fs_info->unused_bgs, 10279 block_group = list_first_entry(&fs_info->unused_bgs,
10280 struct btrfs_block_group_cache, 10280 struct btrfs_block_group_cache,
10281 bg_list); 10281 bg_list);
10282 space_info = block_group->space_info;
10283 list_del_init(&block_group->bg_list); 10282 list_del_init(&block_group->bg_list);
10283
10284 space_info = block_group->space_info;
10285
10284 if (ret || btrfs_mixed_space_info(space_info)) { 10286 if (ret || btrfs_mixed_space_info(space_info)) {
10285 btrfs_put_block_group(block_group); 10287 btrfs_put_block_group(block_group);
10286 continue; 10288 continue;
10287 } 10289 }
10288 spin_unlock(&fs_info->unused_bgs_lock); 10290 spin_unlock(&fs_info->unused_bgs_lock);
10289 10291
10290 mutex_lock(&root->fs_info->delete_unused_bgs_mutex); 10292 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10291 10293
10292 /* Don't want to race with allocators so take the groups_sem */ 10294 /* Don't want to race with allocators so take the groups_sem */
10293 down_write(&space_info->groups_sem); 10295 down_write(&space_info->groups_sem);
10294 spin_lock(&block_group->lock); 10296 spin_lock(&block_group->lock);
10295 if (block_group->reserved || 10297 if (block_group->reserved ||
10296 btrfs_block_group_used(&block_group->item) || 10298 btrfs_block_group_used(&block_group->item) ||
10297 block_group->ro) { 10299 block_group->ro ||
10300 list_is_singular(&block_group->list)) {
10298 /* 10301 /*
10299 * We want to bail if we made new allocations or have 10302 * We want to bail if we made new allocations or have
10300 * outstanding allocations in this block group. We do 10303 * outstanding allocations in this block group. We do
@@ -10410,7 +10413,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10410end_trans: 10413end_trans:
10411 btrfs_end_transaction(trans, root); 10414 btrfs_end_transaction(trans, root);
10412next: 10415next:
10413 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 10416 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
10414 btrfs_put_block_group(block_group); 10417 btrfs_put_block_group(block_group);
10415 spin_lock(&fs_info->unused_bgs_lock); 10418 spin_lock(&fs_info->unused_bgs_lock);
10416 } 10419 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6bd5ce9d75f0..977e715f0bf2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -756,8 +756,16 @@ next_slot:
756 } 756 }
757 757
758 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 758 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
759 if (key.objectid > ino || 759
760 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 760 if (key.objectid > ino)
761 break;
762 if (WARN_ON_ONCE(key.objectid < ino) ||
763 key.type < BTRFS_EXTENT_DATA_KEY) {
764 ASSERT(del_nr == 0);
765 path->slots[0]++;
766 goto next_slot;
767 }
768 if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
761 break; 769 break;
762 770
763 fi = btrfs_item_ptr(leaf, path->slots[0], 771 fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -776,8 +784,8 @@ next_slot:
776 btrfs_file_extent_inline_len(leaf, 784 btrfs_file_extent_inline_len(leaf,
777 path->slots[0], fi); 785 path->slots[0], fi);
778 } else { 786 } else {
779 WARN_ON(1); 787 /* can't happen */
780 extent_end = search_start; 788 BUG();
781 } 789 }
782 790
783 /* 791 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0e4f2bfcc37d..994490d5fa64 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1304,8 +1304,14 @@ next_slot:
1304 num_bytes = 0; 1304 num_bytes = 0;
1305 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1305 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1306 1306
1307 if (found_key.objectid > ino || 1307 if (found_key.objectid > ino)
1308 found_key.type > BTRFS_EXTENT_DATA_KEY || 1308 break;
1309 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1310 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1311 path->slots[0]++;
1312 goto next_slot;
1313 }
1314 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1309 found_key.offset > end) 1315 found_key.offset > end)
1310 break; 1316 break;
1311 1317
@@ -7503,6 +7509,28 @@ struct btrfs_dio_data {
7503 u64 reserve; 7509 u64 reserve;
7504}; 7510};
7505 7511
7512static void adjust_dio_outstanding_extents(struct inode *inode,
7513 struct btrfs_dio_data *dio_data,
7514 const u64 len)
7515{
7516 unsigned num_extents;
7517
7518 num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
7519 BTRFS_MAX_EXTENT_SIZE);
7520 /*
7521 * If we have an outstanding_extents count still set then we're
7522 * within our reservation, otherwise we need to adjust our inode
7523 * counter appropriately.
7524 */
7525 if (dio_data->outstanding_extents) {
7526 dio_data->outstanding_extents -= num_extents;
7527 } else {
7528 spin_lock(&BTRFS_I(inode)->lock);
7529 BTRFS_I(inode)->outstanding_extents += num_extents;
7530 spin_unlock(&BTRFS_I(inode)->lock);
7531 }
7532}
7533
7506static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 7534static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7507 struct buffer_head *bh_result, int create) 7535 struct buffer_head *bh_result, int create)
7508{ 7536{
@@ -7538,8 +7566,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7538 * If this errors out it's because we couldn't invalidate pagecache for 7566 * If this errors out it's because we couldn't invalidate pagecache for
7539 * this range and we need to fallback to buffered. 7567 * this range and we need to fallback to buffered.
7540 */ 7568 */
7541 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 7569 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7542 return -ENOTBLK; 7570 create)) {
7571 ret = -ENOTBLK;
7572 goto err;
7573 }
7543 7574
7544 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 7575 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7545 if (IS_ERR(em)) { 7576 if (IS_ERR(em)) {
@@ -7657,19 +7688,7 @@ unlock:
7657 if (start + len > i_size_read(inode)) 7688 if (start + len > i_size_read(inode))
7658 i_size_write(inode, start + len); 7689 i_size_write(inode, start + len);
7659 7690
7660 /* 7691 adjust_dio_outstanding_extents(inode, dio_data, len);
7661 * If we have an outstanding_extents count still set then we're
7662 * within our reservation, otherwise we need to adjust our inode
7663 * counter appropriately.
7664 */
7665 if (dio_data->outstanding_extents) {
7666 (dio_data->outstanding_extents)--;
7667 } else {
7668 spin_lock(&BTRFS_I(inode)->lock);
7669 BTRFS_I(inode)->outstanding_extents++;
7670 spin_unlock(&BTRFS_I(inode)->lock);
7671 }
7672
7673 btrfs_free_reserved_data_space(inode, start, len); 7692 btrfs_free_reserved_data_space(inode, start, len);
7674 WARN_ON(dio_data->reserve < len); 7693 WARN_ON(dio_data->reserve < len);
7675 dio_data->reserve -= len; 7694 dio_data->reserve -= len;
@@ -7696,8 +7715,17 @@ unlock:
7696unlock_err: 7715unlock_err:
7697 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7716 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7698 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7717 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7718err:
7699 if (dio_data) 7719 if (dio_data)
7700 current->journal_info = dio_data; 7720 current->journal_info = dio_data;
7721 /*
7722 * Compensate the delalloc release we do in btrfs_direct_IO() when we
7723 * write less data then expected, so that we don't underflow our inode's
7724 * outstanding extents counter.
7725 */
7726 if (create && dio_data)
7727 adjust_dio_outstanding_extents(inode, dio_data, len);
7728
7701 return ret; 7729 return ret;
7702} 7730}
7703 7731
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 46476c226395..93e12c18ffd7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2198,7 +2198,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2198 int slot; 2198 int slot;
2199 int ret; 2199 int ret;
2200 2200
2201 path->leave_spinning = 1;
2202 mutex_lock(&fs_info->qgroup_rescan_lock); 2201 mutex_lock(&fs_info->qgroup_rescan_lock);
2203 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2202 ret = btrfs_search_slot_for_read(fs_info->extent_root,
2204 &fs_info->qgroup_rescan_progress, 2203 &fs_info->qgroup_rescan_progress,
@@ -2286,7 +2285,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2286 goto out; 2285 goto out;
2287 2286
2288 err = 0; 2287 err = 0;
2289 while (!err) { 2288 while (!err && !btrfs_fs_closing(fs_info)) {
2290 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2289 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2291 if (IS_ERR(trans)) { 2290 if (IS_ERR(trans)) {
2292 err = PTR_ERR(trans); 2291 err = PTR_ERR(trans);
@@ -2307,7 +2306,8 @@ out:
2307 btrfs_free_path(path); 2306 btrfs_free_path(path);
2308 2307
2309 mutex_lock(&fs_info->qgroup_rescan_lock); 2308 mutex_lock(&fs_info->qgroup_rescan_lock);
2310 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2309 if (!btrfs_fs_closing(fs_info))
2310 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2311 2311
2312 if (err > 0 && 2312 if (err > 0 &&
2313 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2313 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
@@ -2336,7 +2336,9 @@ out:
2336 } 2336 }
2337 btrfs_end_transaction(trans, fs_info->quota_root); 2337 btrfs_end_transaction(trans, fs_info->quota_root);
2338 2338
2339 if (err >= 0) { 2339 if (btrfs_fs_closing(fs_info)) {
2340 btrfs_info(fs_info, "qgroup scan paused");
2341 } else if (err >= 0) {
2340 btrfs_info(fs_info, "qgroup scan completed%s", 2342 btrfs_info(fs_info, "qgroup scan completed%s",
2341 err > 0 ? " (inconsistency flag cleared)" : ""); 2343 err > 0 ? " (inconsistency flag cleared)" : "");
2342 } else { 2344 } else {
@@ -2384,12 +2386,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2384 memset(&fs_info->qgroup_rescan_progress, 0, 2386 memset(&fs_info->qgroup_rescan_progress, 0,
2385 sizeof(fs_info->qgroup_rescan_progress)); 2387 sizeof(fs_info->qgroup_rescan_progress));
2386 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2388 fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2389 init_completion(&fs_info->qgroup_rescan_completion);
2387 2390
2388 spin_unlock(&fs_info->qgroup_lock); 2391 spin_unlock(&fs_info->qgroup_lock);
2389 mutex_unlock(&fs_info->qgroup_rescan_lock); 2392 mutex_unlock(&fs_info->qgroup_rescan_lock);
2390 2393
2391 init_completion(&fs_info->qgroup_rescan_completion);
2392
2393 memset(&fs_info->qgroup_rescan_work, 0, 2394 memset(&fs_info->qgroup_rescan_work, 0,
2394 sizeof(fs_info->qgroup_rescan_work)); 2395 sizeof(fs_info->qgroup_rescan_work));
2395 btrfs_init_work(&fs_info->qgroup_rescan_work, 2396 btrfs_init_work(&fs_info->qgroup_rescan_work,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 550de89a8661..2907a77fb1f6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -248,14 +248,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
248static int scrub_setup_recheck_block(struct scrub_block *original_sblock, 248static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
249 struct scrub_block *sblocks_for_recheck); 249 struct scrub_block *sblocks_for_recheck);
250static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 250static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
251 struct scrub_block *sblock, int is_metadata, 251 struct scrub_block *sblock,
252 int have_csum, u8 *csum, u64 generation, 252 int retry_failed_mirror);
253 u16 csum_size, int retry_failed_mirror); 253static void scrub_recheck_block_checksum(struct scrub_block *sblock);
254static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
255 struct scrub_block *sblock,
256 int is_metadata, int have_csum,
257 const u8 *csum, u64 generation,
258 u16 csum_size);
259static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 254static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
260 struct scrub_block *sblock_good); 255 struct scrub_block *sblock_good);
261static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, 256static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
@@ -889,11 +884,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
889 struct btrfs_fs_info *fs_info; 884 struct btrfs_fs_info *fs_info;
890 u64 length; 885 u64 length;
891 u64 logical; 886 u64 logical;
892 u64 generation;
893 unsigned int failed_mirror_index; 887 unsigned int failed_mirror_index;
894 unsigned int is_metadata; 888 unsigned int is_metadata;
895 unsigned int have_csum; 889 unsigned int have_csum;
896 u8 *csum;
897 struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */ 890 struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
898 struct scrub_block *sblock_bad; 891 struct scrub_block *sblock_bad;
899 int ret; 892 int ret;
@@ -918,13 +911,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
918 } 911 }
919 length = sblock_to_check->page_count * PAGE_SIZE; 912 length = sblock_to_check->page_count * PAGE_SIZE;
920 logical = sblock_to_check->pagev[0]->logical; 913 logical = sblock_to_check->pagev[0]->logical;
921 generation = sblock_to_check->pagev[0]->generation;
922 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); 914 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
923 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; 915 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
924 is_metadata = !(sblock_to_check->pagev[0]->flags & 916 is_metadata = !(sblock_to_check->pagev[0]->flags &
925 BTRFS_EXTENT_FLAG_DATA); 917 BTRFS_EXTENT_FLAG_DATA);
926 have_csum = sblock_to_check->pagev[0]->have_csum; 918 have_csum = sblock_to_check->pagev[0]->have_csum;
927 csum = sblock_to_check->pagev[0]->csum;
928 dev = sblock_to_check->pagev[0]->dev; 919 dev = sblock_to_check->pagev[0]->dev;
929 920
930 if (sctx->is_dev_replace && !is_metadata && !have_csum) { 921 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
@@ -987,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
987 sblock_bad = sblocks_for_recheck + failed_mirror_index; 978 sblock_bad = sblocks_for_recheck + failed_mirror_index;
988 979
989 /* build and submit the bios for the failed mirror, check checksums */ 980 /* build and submit the bios for the failed mirror, check checksums */
990 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, 981 scrub_recheck_block(fs_info, sblock_bad, 1);
991 csum, generation, sctx->csum_size, 1);
992 982
993 if (!sblock_bad->header_error && !sblock_bad->checksum_error && 983 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
994 sblock_bad->no_io_error_seen) { 984 sblock_bad->no_io_error_seen) {
@@ -1101,9 +1091,7 @@ nodatasum_case:
1101 sblock_other = sblocks_for_recheck + mirror_index; 1091 sblock_other = sblocks_for_recheck + mirror_index;
1102 1092
1103 /* build and submit the bios, check checksums */ 1093 /* build and submit the bios, check checksums */
1104 scrub_recheck_block(fs_info, sblock_other, is_metadata, 1094 scrub_recheck_block(fs_info, sblock_other, 0);
1105 have_csum, csum, generation,
1106 sctx->csum_size, 0);
1107 1095
1108 if (!sblock_other->header_error && 1096 if (!sblock_other->header_error &&
1109 !sblock_other->checksum_error && 1097 !sblock_other->checksum_error &&
@@ -1215,9 +1203,7 @@ nodatasum_case:
1215 * is verified, but most likely the data comes out 1203 * is verified, but most likely the data comes out
1216 * of the page cache. 1204 * of the page cache.
1217 */ 1205 */
1218 scrub_recheck_block(fs_info, sblock_bad, 1206 scrub_recheck_block(fs_info, sblock_bad, 1);
1219 is_metadata, have_csum, csum,
1220 generation, sctx->csum_size, 1);
1221 if (!sblock_bad->header_error && 1207 if (!sblock_bad->header_error &&
1222 !sblock_bad->checksum_error && 1208 !sblock_bad->checksum_error &&
1223 sblock_bad->no_io_error_seen) 1209 sblock_bad->no_io_error_seen)
@@ -1318,6 +1304,9 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1318 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; 1304 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1319 u64 length = original_sblock->page_count * PAGE_SIZE; 1305 u64 length = original_sblock->page_count * PAGE_SIZE;
1320 u64 logical = original_sblock->pagev[0]->logical; 1306 u64 logical = original_sblock->pagev[0]->logical;
1307 u64 generation = original_sblock->pagev[0]->generation;
1308 u64 flags = original_sblock->pagev[0]->flags;
1309 u64 have_csum = original_sblock->pagev[0]->have_csum;
1321 struct scrub_recover *recover; 1310 struct scrub_recover *recover;
1322 struct btrfs_bio *bbio; 1311 struct btrfs_bio *bbio;
1323 u64 sublen; 1312 u64 sublen;
@@ -1372,6 +1361,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1372 1361
1373 sblock = sblocks_for_recheck + mirror_index; 1362 sblock = sblocks_for_recheck + mirror_index;
1374 sblock->sctx = sctx; 1363 sblock->sctx = sctx;
1364
1375 page = kzalloc(sizeof(*page), GFP_NOFS); 1365 page = kzalloc(sizeof(*page), GFP_NOFS);
1376 if (!page) { 1366 if (!page) {
1377leave_nomem: 1367leave_nomem:
@@ -1383,7 +1373,15 @@ leave_nomem:
1383 } 1373 }
1384 scrub_page_get(page); 1374 scrub_page_get(page);
1385 sblock->pagev[page_index] = page; 1375 sblock->pagev[page_index] = page;
1376 page->sblock = sblock;
1377 page->flags = flags;
1378 page->generation = generation;
1386 page->logical = logical; 1379 page->logical = logical;
1380 page->have_csum = have_csum;
1381 if (have_csum)
1382 memcpy(page->csum,
1383 original_sblock->pagev[0]->csum,
1384 sctx->csum_size);
1387 1385
1388 scrub_stripe_index_and_offset(logical, 1386 scrub_stripe_index_and_offset(logical,
1389 bbio->map_type, 1387 bbio->map_type,
@@ -1474,15 +1472,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1474 * the pages that are errored in the just handled mirror can be repaired. 1472 * the pages that are errored in the just handled mirror can be repaired.
1475 */ 1473 */
1476static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 1474static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1477 struct scrub_block *sblock, int is_metadata, 1475 struct scrub_block *sblock,
1478 int have_csum, u8 *csum, u64 generation, 1476 int retry_failed_mirror)
1479 u16 csum_size, int retry_failed_mirror)
1480{ 1477{
1481 int page_num; 1478 int page_num;
1482 1479
1483 sblock->no_io_error_seen = 1; 1480 sblock->no_io_error_seen = 1;
1484 sblock->header_error = 0;
1485 sblock->checksum_error = 0;
1486 1481
1487 for (page_num = 0; page_num < sblock->page_count; page_num++) { 1482 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1488 struct bio *bio; 1483 struct bio *bio;
@@ -1518,9 +1513,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1518 } 1513 }
1519 1514
1520 if (sblock->no_io_error_seen) 1515 if (sblock->no_io_error_seen)
1521 scrub_recheck_block_checksum(fs_info, sblock, is_metadata, 1516 scrub_recheck_block_checksum(sblock);
1522 have_csum, csum, generation,
1523 csum_size);
1524 1517
1525 return; 1518 return;
1526} 1519}
@@ -1535,61 +1528,16 @@ static inline int scrub_check_fsid(u8 fsid[],
1535 return !ret; 1528 return !ret;
1536} 1529}
1537 1530
1538static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, 1531static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1539 struct scrub_block *sblock,
1540 int is_metadata, int have_csum,
1541 const u8 *csum, u64 generation,
1542 u16 csum_size)
1543{ 1532{
1544 int page_num; 1533 sblock->header_error = 0;
1545 u8 calculated_csum[BTRFS_CSUM_SIZE]; 1534 sblock->checksum_error = 0;
1546 u32 crc = ~(u32)0; 1535 sblock->generation_error = 0;
1547 void *mapped_buffer;
1548
1549 WARN_ON(!sblock->pagev[0]->page);
1550 if (is_metadata) {
1551 struct btrfs_header *h;
1552
1553 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1554 h = (struct btrfs_header *)mapped_buffer;
1555
1556 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
1557 !scrub_check_fsid(h->fsid, sblock->pagev[0]) ||
1558 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1559 BTRFS_UUID_SIZE)) {
1560 sblock->header_error = 1;
1561 } else if (generation != btrfs_stack_header_generation(h)) {
1562 sblock->header_error = 1;
1563 sblock->generation_error = 1;
1564 }
1565 csum = h->csum;
1566 } else {
1567 if (!have_csum)
1568 return;
1569
1570 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1571 }
1572
1573 for (page_num = 0;;) {
1574 if (page_num == 0 && is_metadata)
1575 crc = btrfs_csum_data(
1576 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1577 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1578 else
1579 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1580
1581 kunmap_atomic(mapped_buffer);
1582 page_num++;
1583 if (page_num >= sblock->page_count)
1584 break;
1585 WARN_ON(!sblock->pagev[page_num]->page);
1586
1587 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1588 }
1589 1536
1590 btrfs_csum_final(crc, calculated_csum); 1537 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1591 if (memcmp(calculated_csum, csum, csum_size)) 1538 scrub_checksum_data(sblock);
1592 sblock->checksum_error = 1; 1539 else
1540 scrub_checksum_tree_block(sblock);
1593} 1541}
1594 1542
1595static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 1543static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
@@ -1833,6 +1781,18 @@ static int scrub_checksum(struct scrub_block *sblock)
1833 u64 flags; 1781 u64 flags;
1834 int ret; 1782 int ret;
1835 1783
1784 /*
1785 * No need to initialize these stats currently,
1786 * because this function only use return value
1787 * instead of these stats value.
1788 *
1789 * Todo:
1790 * always use stats
1791 */
1792 sblock->header_error = 0;
1793 sblock->generation_error = 0;
1794 sblock->checksum_error = 0;
1795
1836 WARN_ON(sblock->page_count < 1); 1796 WARN_ON(sblock->page_count < 1);
1837 flags = sblock->pagev[0]->flags; 1797 flags = sblock->pagev[0]->flags;
1838 ret = 0; 1798 ret = 0;
@@ -1858,7 +1818,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1858 struct page *page; 1818 struct page *page;
1859 void *buffer; 1819 void *buffer;
1860 u32 crc = ~(u32)0; 1820 u32 crc = ~(u32)0;
1861 int fail = 0;
1862 u64 len; 1821 u64 len;
1863 int index; 1822 int index;
1864 1823
@@ -1889,9 +1848,9 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1889 1848
1890 btrfs_csum_final(crc, csum); 1849 btrfs_csum_final(crc, csum);
1891 if (memcmp(csum, on_disk_csum, sctx->csum_size)) 1850 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1892 fail = 1; 1851 sblock->checksum_error = 1;
1893 1852
1894 return fail; 1853 return sblock->checksum_error;
1895} 1854}
1896 1855
1897static int scrub_checksum_tree_block(struct scrub_block *sblock) 1856static int scrub_checksum_tree_block(struct scrub_block *sblock)
@@ -1907,8 +1866,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1907 u64 mapped_size; 1866 u64 mapped_size;
1908 void *p; 1867 void *p;
1909 u32 crc = ~(u32)0; 1868 u32 crc = ~(u32)0;
1910 int fail = 0;
1911 int crc_fail = 0;
1912 u64 len; 1869 u64 len;
1913 int index; 1870 int index;
1914 1871
@@ -1923,19 +1880,20 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1923 * a) don't have an extent buffer and 1880 * a) don't have an extent buffer and
1924 * b) the page is already kmapped 1881 * b) the page is already kmapped
1925 */ 1882 */
1926
1927 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) 1883 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1928 ++fail; 1884 sblock->header_error = 1;
1929 1885
1930 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) 1886 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1931 ++fail; 1887 sblock->header_error = 1;
1888 sblock->generation_error = 1;
1889 }
1932 1890
1933 if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) 1891 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1934 ++fail; 1892 sblock->header_error = 1;
1935 1893
1936 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 1894 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1937 BTRFS_UUID_SIZE)) 1895 BTRFS_UUID_SIZE))
1938 ++fail; 1896 sblock->header_error = 1;
1939 1897
1940 len = sctx->nodesize - BTRFS_CSUM_SIZE; 1898 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1941 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; 1899 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
@@ -1960,9 +1918,9 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1960 1918
1961 btrfs_csum_final(crc, calculated_csum); 1919 btrfs_csum_final(crc, calculated_csum);
1962 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) 1920 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1963 ++crc_fail; 1921 sblock->checksum_error = 1;
1964 1922
1965 return fail || crc_fail; 1923 return sblock->header_error || sblock->checksum_error;
1966} 1924}
1967 1925
1968static int scrub_checksum_super(struct scrub_block *sblock) 1926static int scrub_checksum_super(struct scrub_block *sblock)
@@ -2176,39 +2134,27 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work)
2176{ 2134{
2177 struct scrub_block *sblock = container_of(work, struct scrub_block, work); 2135 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2178 struct scrub_ctx *sctx = sblock->sctx; 2136 struct scrub_ctx *sctx = sblock->sctx;
2179 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2180 unsigned int is_metadata;
2181 unsigned int have_csum;
2182 u8 *csum;
2183 u64 generation;
2184 u64 logical; 2137 u64 logical;
2185 struct btrfs_device *dev; 2138 struct btrfs_device *dev;
2186 2139
2187 is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA);
2188 have_csum = sblock->pagev[0]->have_csum;
2189 csum = sblock->pagev[0]->csum;
2190 generation = sblock->pagev[0]->generation;
2191 logical = sblock->pagev[0]->logical; 2140 logical = sblock->pagev[0]->logical;
2192 dev = sblock->pagev[0]->dev; 2141 dev = sblock->pagev[0]->dev;
2193 2142
2194 if (sblock->no_io_error_seen) { 2143 if (sblock->no_io_error_seen)
2195 scrub_recheck_block_checksum(fs_info, sblock, is_metadata, 2144 scrub_recheck_block_checksum(sblock);
2196 have_csum, csum, generation,
2197 sctx->csum_size);
2198 }
2199 2145
2200 if (!sblock->no_io_error_seen) { 2146 if (!sblock->no_io_error_seen) {
2201 spin_lock(&sctx->stat_lock); 2147 spin_lock(&sctx->stat_lock);
2202 sctx->stat.read_errors++; 2148 sctx->stat.read_errors++;
2203 spin_unlock(&sctx->stat_lock); 2149 spin_unlock(&sctx->stat_lock);
2204 btrfs_err_rl_in_rcu(fs_info, 2150 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
2205 "IO error rebuilding logical %llu for dev %s", 2151 "IO error rebuilding logical %llu for dev %s",
2206 logical, rcu_str_deref(dev->name)); 2152 logical, rcu_str_deref(dev->name));
2207 } else if (sblock->header_error || sblock->checksum_error) { 2153 } else if (sblock->header_error || sblock->checksum_error) {
2208 spin_lock(&sctx->stat_lock); 2154 spin_lock(&sctx->stat_lock);
2209 sctx->stat.uncorrectable_errors++; 2155 sctx->stat.uncorrectable_errors++;
2210 spin_unlock(&sctx->stat_lock); 2156 spin_unlock(&sctx->stat_lock);
2211 btrfs_err_rl_in_rcu(fs_info, 2157 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
2212 "failed to rebuild valid logical %llu for dev %s", 2158 "failed to rebuild valid logical %llu for dev %s",
2213 logical, rcu_str_deref(dev->name)); 2159 logical, rcu_str_deref(dev->name));
2214 } else { 2160 } else {
@@ -2500,8 +2446,7 @@ static void scrub_block_complete(struct scrub_block *sblock)
2500 } 2446 }
2501} 2447}
2502 2448
2503static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, 2449static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2504 u8 *csum)
2505{ 2450{
2506 struct btrfs_ordered_sum *sum = NULL; 2451 struct btrfs_ordered_sum *sum = NULL;
2507 unsigned long index; 2452 unsigned long index;
@@ -2565,7 +2510,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2565 2510
2566 if (flags & BTRFS_EXTENT_FLAG_DATA) { 2511 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2567 /* push csums to sbio */ 2512 /* push csums to sbio */
2568 have_csum = scrub_find_csum(sctx, logical, l, csum); 2513 have_csum = scrub_find_csum(sctx, logical, csum);
2569 if (have_csum == 0) 2514 if (have_csum == 0)
2570 ++sctx->stat.no_csum; 2515 ++sctx->stat.no_csum;
2571 if (sctx->is_dev_replace && !have_csum) { 2516 if (sctx->is_dev_replace && !have_csum) {
@@ -2703,7 +2648,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
2703 2648
2704 if (flags & BTRFS_EXTENT_FLAG_DATA) { 2649 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2705 /* push csums to sbio */ 2650 /* push csums to sbio */
2706 have_csum = scrub_find_csum(sctx, logical, l, csum); 2651 have_csum = scrub_find_csum(sctx, logical, csum);
2707 if (have_csum == 0) 2652 if (have_csum == 0)
2708 goto skip; 2653 goto skip;
2709 } 2654 }
@@ -3012,6 +2957,9 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
3012 logic_start + map->stripe_len)) { 2957 logic_start + map->stripe_len)) {
3013 btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", 2958 btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3014 key.objectid, logic_start); 2959 key.objectid, logic_start);
2960 spin_lock(&sctx->stat_lock);
2961 sctx->stat.uncorrectable_errors++;
2962 spin_unlock(&sctx->stat_lock);
3015 goto next; 2963 goto next;
3016 } 2964 }
3017again: 2965again:
@@ -3361,6 +3309,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3361 "scrub: tree block %llu spanning " 3309 "scrub: tree block %llu spanning "
3362 "stripes, ignored. logical=%llu", 3310 "stripes, ignored. logical=%llu",
3363 key.objectid, logical); 3311 key.objectid, logical);
3312 spin_lock(&sctx->stat_lock);
3313 sctx->stat.uncorrectable_errors++;
3314 spin_unlock(&sctx->stat_lock);
3364 goto next; 3315 goto next;
3365 } 3316 }
3366 3317
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9b2dafa5ba59..a6df8fdc1312 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3400,6 +3400,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3400 u32 count_data = 0; 3400 u32 count_data = 0;
3401 u32 count_meta = 0; 3401 u32 count_meta = 0;
3402 u32 count_sys = 0; 3402 u32 count_sys = 0;
3403 int chunk_reserved = 0;
3403 3404
3404 /* step one make some room on all the devices */ 3405 /* step one make some room on all the devices */
3405 devices = &fs_info->fs_devices->devices; 3406 devices = &fs_info->fs_devices->devices;
@@ -3501,6 +3502,7 @@ again:
3501 3502
3502 ret = should_balance_chunk(chunk_root, leaf, chunk, 3503 ret = should_balance_chunk(chunk_root, leaf, chunk,
3503 found_key.offset); 3504 found_key.offset);
3505
3504 btrfs_release_path(path); 3506 btrfs_release_path(path);
3505 if (!ret) { 3507 if (!ret) {
3506 mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3508 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3537,6 +3539,25 @@ again:
3537 goto loop; 3539 goto loop;
3538 } 3540 }
3539 3541
3542 if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
3543 trans = btrfs_start_transaction(chunk_root, 0);
3544 if (IS_ERR(trans)) {
3545 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3546 ret = PTR_ERR(trans);
3547 goto error;
3548 }
3549
3550 ret = btrfs_force_chunk_alloc(trans, chunk_root,
3551 BTRFS_BLOCK_GROUP_DATA);
3552 if (ret < 0) {
3553 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3554 goto error;
3555 }
3556
3557 btrfs_end_transaction(trans, chunk_root);
3558 chunk_reserved = 1;
3559 }
3560
3540 ret = btrfs_relocate_chunk(chunk_root, 3561 ret = btrfs_relocate_chunk(chunk_root,
3541 found_key.offset); 3562 found_key.offset);
3542 mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3563 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 6f518c90e1c1..1fcd7b6e7564 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
313 /* check to make sure this item is what we want */ 313 /* check to make sure this item is what we want */
314 if (found_key.objectid != key.objectid) 314 if (found_key.objectid != key.objectid)
315 break; 315 break;
316 if (found_key.type != BTRFS_XATTR_ITEM_KEY) 316 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
317 break; 317 break;
318 if (found_key.type < BTRFS_XATTR_ITEM_KEY)
319 goto next;
318 320
319 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 321 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
320 if (verify_dir_item(root, leaf, di)) 322 if (verify_dir_item(root, leaf, di))