aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2018-03-28 21:08:11 -0400
committerDavid Sterba <dsterba@suse.com>2018-03-30 20:01:06 -0400
commit581c1760415c48cca9349b198bba52dd38750765 (patch)
tree90e5aa035bfcab9a05fdec79eaa4ce90dc7aa267
parent3c0efdf03b2d127f0e40e30db4e7aa0429b1b79a (diff)
btrfs: Validate child tree block's level and first key
We have several reports about node pointer points to incorrect child tree blocks, which could have even wrong owner and level but still with valid generation and checksum. Although btrfs check could handle it and print error message like: leaf parent key incorrect 60670574592 Kernel doesn't have enough check on this type of corruption correctly. At least add such check to read_tree_block() and btrfs_read_buffer(), where we need two new parameters @level and @first_key to verify the child tree block. The new @level check is mandatory and all call sites are already modified to extract expected level from its call chain. While @first_key is optional, the following call sites are skipping such check: 1) Root node/leaf As ROOT_ITEM doesn't contain the first key, skip @first_key check. 2) Direct backref Only parent bytenr and level is known and we need to resolve the key all by ourselves, skip @first_key check. Another note of this verification is, it needs extra info from nodeptr or ROOT_ITEM, so it can't fit into current tree-checker framework, which is limited to node/leaf boundary. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/ctree.c28
-rw-r--r--fs/btrfs/disk-io.c95
-rw-r--r--fs/btrfs/disk-io.h8
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/print-tree.c10
-rw-r--r--fs/btrfs/qgroup.c7
-rw-r--r--fs/btrfs/ref-verify.c7
-rw-r--r--fs/btrfs/relocation.c21
-rw-r--r--fs/btrfs/tree-log.c28
10 files changed, 170 insertions, 46 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6007dd6b799e..571024bc632e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
738 BUG_ON(ref->key_for_search.type); 738 BUG_ON(ref->key_for_search.type);
739 BUG_ON(!ref->wanted_disk_byte); 739 BUG_ON(!ref->wanted_disk_byte);
740 740
741 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0); 741 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
742 ref->level - 1, NULL);
742 if (IS_ERR(eb)) { 743 if (IS_ERR(eb)) {
743 free_pref(ref); 744 free_pref(ref);
744 return PTR_ERR(eb); 745 return PTR_ERR(eb);
@@ -1288,7 +1289,8 @@ again:
1288 ref->level == 0) { 1289 ref->level == 0) {
1289 struct extent_buffer *eb; 1290 struct extent_buffer *eb;
1290 1291
1291 eb = read_tree_block(fs_info, ref->parent, 0); 1292 eb = read_tree_block(fs_info, ref->parent, 0,
1293 ref->level, NULL);
1292 if (IS_ERR(eb)) { 1294 if (IS_ERR(eb)) {
1293 ret = PTR_ERR(eb); 1295 ret = PTR_ERR(eb);
1294 goto out; 1296 goto out;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1ef6b67f893a..7c8faeb868f4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1354,6 +1354,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1354 struct tree_mod_root *old_root = NULL; 1354 struct tree_mod_root *old_root = NULL;
1355 u64 old_generation = 0; 1355 u64 old_generation = 0;
1356 u64 logical; 1356 u64 logical;
1357 int level;
1357 1358
1358 eb_root = btrfs_read_lock_root_node(root); 1359 eb_root = btrfs_read_lock_root_node(root);
1359 tm = __tree_mod_log_oldest_root(eb_root, time_seq); 1360 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
@@ -1364,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1364 old_root = &tm->old_root; 1365 old_root = &tm->old_root;
1365 old_generation = tm->generation; 1366 old_generation = tm->generation;
1366 logical = old_root->logical; 1367 logical = old_root->logical;
1368 level = old_root->level;
1367 } else { 1369 } else {
1368 logical = eb_root->start; 1370 logical = eb_root->start;
1371 level = btrfs_header_level(eb_root);
1369 } 1372 }
1370 1373
1371 tm = tree_mod_log_search(fs_info, logical, time_seq); 1374 tm = tree_mod_log_search(fs_info, logical, time_seq);
1372 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1375 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1373 btrfs_tree_read_unlock(eb_root); 1376 btrfs_tree_read_unlock(eb_root);
1374 free_extent_buffer(eb_root); 1377 free_extent_buffer(eb_root);
1375 old = read_tree_block(fs_info, logical, 0); 1378 old = read_tree_block(fs_info, logical, 0, level, NULL);
1376 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { 1379 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
1377 if (!IS_ERR(old)) 1380 if (!IS_ERR(old))
1378 free_extent_buffer(old); 1381 free_extent_buffer(old);
@@ -1592,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1592 btrfs_set_lock_blocking(parent); 1595 btrfs_set_lock_blocking(parent);
1593 1596
1594 for (i = start_slot; i <= end_slot; i++) { 1597 for (i = start_slot; i <= end_slot; i++) {
1598 struct btrfs_key first_key;
1595 int close = 1; 1599 int close = 1;
1596 1600
1597 btrfs_node_key(parent, &disk_key, i); 1601 btrfs_node_key(parent, &disk_key, i);
@@ -1601,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1601 progress_passed = 1; 1605 progress_passed = 1;
1602 blocknr = btrfs_node_blockptr(parent, i); 1606 blocknr = btrfs_node_blockptr(parent, i);
1603 gen = btrfs_node_ptr_generation(parent, i); 1607 gen = btrfs_node_ptr_generation(parent, i);
1608 btrfs_node_key_to_cpu(parent, &first_key, i);
1604 if (last_block == 0) 1609 if (last_block == 0)
1605 last_block = blocknr; 1610 last_block = blocknr;
1606 1611
@@ -1624,7 +1629,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1624 uptodate = 0; 1629 uptodate = 0;
1625 if (!cur || !uptodate) { 1630 if (!cur || !uptodate) {
1626 if (!cur) { 1631 if (!cur) {
1627 cur = read_tree_block(fs_info, blocknr, gen); 1632 cur = read_tree_block(fs_info, blocknr, gen,
1633 parent_level - 1,
1634 &first_key);
1628 if (IS_ERR(cur)) { 1635 if (IS_ERR(cur)) {
1629 return PTR_ERR(cur); 1636 return PTR_ERR(cur);
1630 } else if (!extent_buffer_uptodate(cur)) { 1637 } else if (!extent_buffer_uptodate(cur)) {
@@ -1632,7 +1639,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1632 return -EIO; 1639 return -EIO;
1633 } 1640 }
1634 } else if (!uptodate) { 1641 } else if (!uptodate) {
1635 err = btrfs_read_buffer(cur, gen); 1642 err = btrfs_read_buffer(cur, gen,
1643 parent_level - 1,&first_key);
1636 if (err) { 1644 if (err) {
1637 free_extent_buffer(cur); 1645 free_extent_buffer(cur);
1638 return err; 1646 return err;
@@ -1785,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
1785{ 1793{
1786 int level = btrfs_header_level(parent); 1794 int level = btrfs_header_level(parent);
1787 struct extent_buffer *eb; 1795 struct extent_buffer *eb;
1796 struct btrfs_key first_key;
1788 1797
1789 if (slot < 0 || slot >= btrfs_header_nritems(parent)) 1798 if (slot < 0 || slot >= btrfs_header_nritems(parent))
1790 return ERR_PTR(-ENOENT); 1799 return ERR_PTR(-ENOENT);
1791 1800
1792 BUG_ON(level == 0); 1801 BUG_ON(level == 0);
1793 1802
1803 btrfs_node_key_to_cpu(parent, &first_key, slot);
1794 eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), 1804 eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
1795 btrfs_node_ptr_generation(parent, slot)); 1805 btrfs_node_ptr_generation(parent, slot),
1806 level - 1, &first_key);
1796 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { 1807 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
1797 free_extent_buffer(eb); 1808 free_extent_buffer(eb);
1798 eb = ERR_PTR(-EIO); 1809 eb = ERR_PTR(-EIO);
@@ -2388,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2388 u64 gen; 2399 u64 gen;
2389 struct extent_buffer *b = *eb_ret; 2400 struct extent_buffer *b = *eb_ret;
2390 struct extent_buffer *tmp; 2401 struct extent_buffer *tmp;
2402 struct btrfs_key first_key;
2391 int ret; 2403 int ret;
2404 int parent_level;
2392 2405
2393 blocknr = btrfs_node_blockptr(b, slot); 2406 blocknr = btrfs_node_blockptr(b, slot);
2394 gen = btrfs_node_ptr_generation(b, slot); 2407 gen = btrfs_node_ptr_generation(b, slot);
2408 parent_level = btrfs_header_level(b);
2409 btrfs_node_key_to_cpu(b, &first_key, slot);
2395 2410
2396 tmp = find_extent_buffer(fs_info, blocknr); 2411 tmp = find_extent_buffer(fs_info, blocknr);
2397 if (tmp) { 2412 if (tmp) {
@@ -2410,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2410 btrfs_set_path_blocking(p); 2425 btrfs_set_path_blocking(p);
2411 2426
2412 /* now we're allowed to do a blocking uptodate check */ 2427 /* now we're allowed to do a blocking uptodate check */
2413 ret = btrfs_read_buffer(tmp, gen); 2428 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
2414 if (!ret) { 2429 if (!ret) {
2415 *eb_ret = tmp; 2430 *eb_ret = tmp;
2416 return 0; 2431 return 0;
@@ -2437,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2437 btrfs_release_path(p); 2452 btrfs_release_path(p);
2438 2453
2439 ret = -EAGAIN; 2454 ret = -EAGAIN;
2440 tmp = read_tree_block(fs_info, blocknr, 0); 2455 tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
2456 &first_key);
2441 if (!IS_ERR(tmp)) { 2457 if (!IS_ERR(tmp)) {
2442 /* 2458 /*
2443 * If the read above didn't mark this buffer up to date, 2459 * If the read above didn't mark this buffer up to date,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 269374261e36..a2f3a0c67a99 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -427,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
427 return ret; 427 return ret;
428} 428}
429 429
430static int verify_level_key(struct btrfs_fs_info *fs_info,
431 struct extent_buffer *eb, int level,
432 struct btrfs_key *first_key)
433{
434 int found_level;
435 struct btrfs_key found_key;
436 int ret;
437
438 found_level = btrfs_header_level(eb);
439 if (found_level != level) {
440#ifdef CONFIG_BTRFS_DEBUG
441 WARN_ON(1);
442 btrfs_err(fs_info,
443"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
444 eb->start, level, found_level);
445#endif
446 return -EIO;
447 }
448
449 if (!first_key)
450 return 0;
451
452 if (found_level)
453 btrfs_node_key_to_cpu(eb, &found_key, 0);
454 else
455 btrfs_item_key_to_cpu(eb, &found_key, 0);
456 ret = btrfs_comp_cpu_keys(first_key, &found_key);
457
458#ifdef CONFIG_BTRFS_DEBUG
459 if (ret) {
460 WARN_ON(1);
461 btrfs_err(fs_info,
462"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
463 eb->start, first_key->objectid, first_key->type,
464 first_key->offset, found_key.objectid,
465 found_key.type, found_key.offset);
466 }
467#endif
468 return ret;
469}
470
430/* 471/*
431 * helper to read a given tree block, doing retries as required when 472 * helper to read a given tree block, doing retries as required when
432 * the checksums don't match and we have alternate mirrors to try. 473 * the checksums don't match and we have alternate mirrors to try.
474 *
475 * @parent_transid: expected transid, skip check if 0
476 * @level: expected level, mandatory check
477 * @first_key: expected key of first slot, skip check if NULL
433 */ 478 */
434static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, 479static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
435 struct extent_buffer *eb, 480 struct extent_buffer *eb,
436 u64 parent_transid) 481 u64 parent_transid, int level,
482 struct btrfs_key *first_key)
437{ 483{
438 struct extent_io_tree *io_tree; 484 struct extent_io_tree *io_tree;
439 int failed = 0; 485 int failed = 0;
@@ -448,11 +494,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
448 ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, 494 ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
449 mirror_num); 495 mirror_num);
450 if (!ret) { 496 if (!ret) {
451 if (!verify_parent_transid(io_tree, eb, 497 if (verify_parent_transid(io_tree, eb,
452 parent_transid, 0)) 498 parent_transid, 0))
453 break;
454 else
455 ret = -EIO; 499 ret = -EIO;
500 else if (verify_level_key(fs_info, eb, level,
501 first_key))
502 ret = -EUCLEAN;
503 else
504 break;
456 } 505 }
457 506
458 /* 507 /*
@@ -460,7 +509,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
460 * there is no reason to read the other copies, they won't be 509 * there is no reason to read the other copies, they won't be
461 * any less wrong. 510 * any less wrong.
462 */ 511 */
463 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) 512 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
513 ret == -EUCLEAN)
464 break; 514 break;
465 515
466 num_copies = btrfs_num_copies(fs_info, 516 num_copies = btrfs_num_copies(fs_info,
@@ -1049,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
1049 buf->start, buf->start + buf->len - 1); 1099 buf->start, buf->start + buf->len - 1);
1050} 1100}
1051 1101
1102/*
1103 * Read tree block at logical address @bytenr and do variant basic but critical
1104 * verification.
1105 *
1106 * @parent_transid: expected transid of this tree block, skip check if 0
1107 * @level: expected level, mandatory check
1108 * @first_key: expected key in slot 0, skip check if NULL
1109 */
1052struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, 1110struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
1053 u64 parent_transid) 1111 u64 parent_transid, int level,
1112 struct btrfs_key *first_key)
1054{ 1113{
1055 struct extent_buffer *buf = NULL; 1114 struct extent_buffer *buf = NULL;
1056 int ret; 1115 int ret;
@@ -1059,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
1059 if (IS_ERR(buf)) 1118 if (IS_ERR(buf))
1060 return buf; 1119 return buf;
1061 1120
1062 ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid); 1121 ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
1122 level, first_key);
1063 if (ret) { 1123 if (ret) {
1064 free_extent_buffer(buf); 1124 free_extent_buffer(buf);
1065 return ERR_PTR(ret); 1125 return ERR_PTR(ret);
@@ -1388,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1388 struct btrfs_path *path; 1448 struct btrfs_path *path;
1389 u64 generation; 1449 u64 generation;
1390 int ret; 1450 int ret;
1451 int level;
1391 1452
1392 path = btrfs_alloc_path(); 1453 path = btrfs_alloc_path();
1393 if (!path) 1454 if (!path)
@@ -1410,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1410 } 1471 }
1411 1472
1412 generation = btrfs_root_generation(&root->root_item); 1473 generation = btrfs_root_generation(&root->root_item);
1474 level = btrfs_root_level(&root->root_item);
1413 root->node = read_tree_block(fs_info, 1475 root->node = read_tree_block(fs_info,
1414 btrfs_root_bytenr(&root->root_item), 1476 btrfs_root_bytenr(&root->root_item),
1415 generation); 1477 generation, level, NULL);
1416 if (IS_ERR(root->node)) { 1478 if (IS_ERR(root->node)) {
1417 ret = PTR_ERR(root->node); 1479 ret = PTR_ERR(root->node);
1418 goto find_fail; 1480 goto find_fail;
@@ -2261,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
2261 struct btrfs_root *log_tree_root; 2323 struct btrfs_root *log_tree_root;
2262 struct btrfs_super_block *disk_super = fs_info->super_copy; 2324 struct btrfs_super_block *disk_super = fs_info->super_copy;
2263 u64 bytenr = btrfs_super_log_root(disk_super); 2325 u64 bytenr = btrfs_super_log_root(disk_super);
2326 int level = btrfs_super_log_root_level(disk_super);
2264 2327
2265 if (fs_devices->rw_devices == 0) { 2328 if (fs_devices->rw_devices == 0) {
2266 btrfs_warn(fs_info, "log replay required on RO media"); 2329 btrfs_warn(fs_info, "log replay required on RO media");
@@ -2274,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
2274 __setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); 2337 __setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
2275 2338
2276 log_tree_root->node = read_tree_block(fs_info, bytenr, 2339 log_tree_root->node = read_tree_block(fs_info, bytenr,
2277 fs_info->generation + 1); 2340 fs_info->generation + 1,
2341 level, NULL);
2278 if (IS_ERR(log_tree_root->node)) { 2342 if (IS_ERR(log_tree_root->node)) {
2279 btrfs_warn(fs_info, "failed to read log tree"); 2343 btrfs_warn(fs_info, "failed to read log tree");
2280 ret = PTR_ERR(log_tree_root->node); 2344 ret = PTR_ERR(log_tree_root->node);
@@ -2390,6 +2454,7 @@ int open_ctree(struct super_block *sb,
2390 int num_backups_tried = 0; 2454 int num_backups_tried = 0;
2391 int backup_index = 0; 2455 int backup_index = 0;
2392 int clear_free_space_tree = 0; 2456 int clear_free_space_tree = 0;
2457 int level;
2393 2458
2394 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); 2459 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
2395 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); 2460 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2725,12 +2790,13 @@ int open_ctree(struct super_block *sb,
2725 } 2790 }
2726 2791
2727 generation = btrfs_super_chunk_root_generation(disk_super); 2792 generation = btrfs_super_chunk_root_generation(disk_super);
2793 level = btrfs_super_chunk_root_level(disk_super);
2728 2794
2729 __setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); 2795 __setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
2730 2796
2731 chunk_root->node = read_tree_block(fs_info, 2797 chunk_root->node = read_tree_block(fs_info,
2732 btrfs_super_chunk_root(disk_super), 2798 btrfs_super_chunk_root(disk_super),
2733 generation); 2799 generation, level, NULL);
2734 if (IS_ERR(chunk_root->node) || 2800 if (IS_ERR(chunk_root->node) ||
2735 !extent_buffer_uptodate(chunk_root->node)) { 2801 !extent_buffer_uptodate(chunk_root->node)) {
2736 btrfs_err(fs_info, "failed to read chunk root"); 2802 btrfs_err(fs_info, "failed to read chunk root");
@@ -2764,10 +2830,11 @@ int open_ctree(struct super_block *sb,
2764 2830
2765retry_root_backup: 2831retry_root_backup:
2766 generation = btrfs_super_generation(disk_super); 2832 generation = btrfs_super_generation(disk_super);
2833 level = btrfs_super_root_level(disk_super);
2767 2834
2768 tree_root->node = read_tree_block(fs_info, 2835 tree_root->node = read_tree_block(fs_info,
2769 btrfs_super_root(disk_super), 2836 btrfs_super_root(disk_super),
2770 generation); 2837 generation, level, NULL);
2771 if (IS_ERR(tree_root->node) || 2838 if (IS_ERR(tree_root->node) ||
2772 !extent_buffer_uptodate(tree_root->node)) { 2839 !extent_buffer_uptodate(tree_root->node)) {
2773 btrfs_warn(fs_info, "failed to read tree root"); 2840 btrfs_warn(fs_info, "failed to read tree root");
@@ -3887,12 +3954,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
3887 __btrfs_btree_balance_dirty(fs_info, 0); 3954 __btrfs_btree_balance_dirty(fs_info, 0);
3888} 3955}
3889 3956
3890int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 3957int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
3958 struct btrfs_key *first_key)
3891{ 3959{
3892 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; 3960 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3893 struct btrfs_fs_info *fs_info = root->fs_info; 3961 struct btrfs_fs_info *fs_info = root->fs_info;
3894 3962
3895 return btree_read_extent_buffer_pages(fs_info, buf, parent_transid); 3963 return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
3964 level, first_key);
3896} 3965}
3897 3966
3898static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) 3967static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 70a88d61b547..453ea9f5d4e9 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror)
52struct btrfs_device; 52struct btrfs_device;
53struct btrfs_fs_devices; 53struct btrfs_fs_devices;
54 54
55struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, 55struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
56 u64 bytenr, u64 parent_transid); 56 u64 parent_transid, int level,
57 struct btrfs_key *first_key);
57void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr); 58void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
58int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, 59int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
59 int mirror_num, struct extent_buffer **eb); 60 int mirror_num, struct extent_buffer **eb);
@@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
123void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 124void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
124int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, 125int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
125 int atomic); 126 int atomic);
126int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 127int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
128 struct btrfs_key *first_key);
127u32 btrfs_csum_data(const char *data, u32 seed, size_t len); 129u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
128void btrfs_csum_final(u32 crc, u8 *result); 130void btrfs_csum_final(u32 crc, u8 *result);
129blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 131blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6b07202385d3..72f6c03445b6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8710,6 +8710,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8710 u64 parent; 8710 u64 parent;
8711 u32 blocksize; 8711 u32 blocksize;
8712 struct btrfs_key key; 8712 struct btrfs_key key;
8713 struct btrfs_key first_key;
8713 struct extent_buffer *next; 8714 struct extent_buffer *next;
8714 int level = wc->level; 8715 int level = wc->level;
8715 int reada = 0; 8716 int reada = 0;
@@ -8730,6 +8731,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8730 } 8731 }
8731 8732
8732 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 8733 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8734 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
8735 path->slots[level]);
8733 blocksize = fs_info->nodesize; 8736 blocksize = fs_info->nodesize;
8734 8737
8735 next = find_extent_buffer(fs_info, bytenr); 8738 next = find_extent_buffer(fs_info, bytenr);
@@ -8794,7 +8797,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8794 if (!next) { 8797 if (!next) {
8795 if (reada && level == 1) 8798 if (reada && level == 1)
8796 reada_walk_down(trans, root, wc, path); 8799 reada_walk_down(trans, root, wc, path);
8797 next = read_tree_block(fs_info, bytenr, generation); 8800 next = read_tree_block(fs_info, bytenr, generation, level - 1,
8801 &first_key);
8798 if (IS_ERR(next)) { 8802 if (IS_ERR(next)) {
8799 return PTR_ERR(next); 8803 return PTR_ERR(next);
8800 } else if (!extent_buffer_uptodate(next)) { 8804 } else if (!extent_buffer_uptodate(next)) {
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 569205e651c7..4a8770485f77 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -365,9 +365,13 @@ void btrfs_print_tree(struct extent_buffer *c)
365 btrfs_node_blockptr(c, i)); 365 btrfs_node_blockptr(c, i));
366 } 366 }
367 for (i = 0; i < nr; i++) { 367 for (i = 0; i < nr; i++) {
368 struct extent_buffer *next = read_tree_block(fs_info, 368 struct btrfs_key first_key;
369 btrfs_node_blockptr(c, i), 369 struct extent_buffer *next;
370 btrfs_node_ptr_generation(c, i)); 370
371 btrfs_node_key_to_cpu(c, &first_key, i);
372 next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
373 btrfs_node_ptr_generation(c, i),
374 level - 1, &first_key);
371 if (IS_ERR(next)) { 375 if (IS_ERR(next)) {
372 continue; 376 continue;
373 } else if (!extent_buffer_uptodate(next)) { 377 } else if (!extent_buffer_uptodate(next)) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 6b715d6d3c94..875df02ffaee 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1684,7 +1684,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
1684 return 0; 1684 return 0;
1685 1685
1686 if (!extent_buffer_uptodate(root_eb)) { 1686 if (!extent_buffer_uptodate(root_eb)) {
1687 ret = btrfs_read_buffer(root_eb, root_gen); 1687 ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
1688 if (ret) 1688 if (ret)
1689 goto out; 1689 goto out;
1690 } 1690 }
@@ -1715,6 +1715,7 @@ walk_down:
1715 level = root_level; 1715 level = root_level;
1716 while (level >= 0) { 1716 while (level >= 0) {
1717 if (path->nodes[level] == NULL) { 1717 if (path->nodes[level] == NULL) {
1718 struct btrfs_key first_key;
1718 int parent_slot; 1719 int parent_slot;
1719 u64 child_gen; 1720 u64 child_gen;
1720 u64 child_bytenr; 1721 u64 child_bytenr;
@@ -1727,8 +1728,10 @@ walk_down:
1727 parent_slot = path->slots[level + 1]; 1728 parent_slot = path->slots[level + 1];
1728 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1729 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
1729 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1730 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
1731 btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
1730 1732
1731 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1733 eb = read_tree_block(fs_info, child_bytenr, child_gen,
1734 level, &first_key);
1732 if (IS_ERR(eb)) { 1735 if (IS_ERR(eb)) {
1733 ret = PTR_ERR(eb); 1736 ret = PTR_ERR(eb);
1734 goto out; 1737 goto out;
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 171f3cce30e6..35fab67dcbe8 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -579,11 +579,16 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
579 579
580 while (level >= 0) { 580 while (level >= 0) {
581 if (level) { 581 if (level) {
582 struct btrfs_key first_key;
583
582 block_bytenr = btrfs_node_blockptr(path->nodes[level], 584 block_bytenr = btrfs_node_blockptr(path->nodes[level],
583 path->slots[level]); 585 path->slots[level]);
584 gen = btrfs_node_ptr_generation(path->nodes[level], 586 gen = btrfs_node_ptr_generation(path->nodes[level],
585 path->slots[level]); 587 path->slots[level]);
586 eb = read_tree_block(fs_info, block_bytenr, gen); 588 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
589 path->slots[level]);
590 eb = read_tree_block(fs_info, block_bytenr, gen,
591 level - 1, &first_key);
587 if (IS_ERR(eb)) 592 if (IS_ERR(eb))
588 return PTR_ERR(eb); 593 return PTR_ERR(eb);
589 if (!extent_buffer_uptodate(eb)) { 594 if (!extent_buffer_uptodate(eb)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e61e1ee9af9a..4874c09f6d3c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1839,6 +1839,8 @@ again:
1839 1839
1840 parent = eb; 1840 parent = eb;
1841 while (1) { 1841 while (1) {
1842 struct btrfs_key first_key;
1843
1842 level = btrfs_header_level(parent); 1844 level = btrfs_header_level(parent);
1843 BUG_ON(level < lowest_level); 1845 BUG_ON(level < lowest_level);
1844 1846
@@ -1852,6 +1854,7 @@ again:
1852 old_bytenr = btrfs_node_blockptr(parent, slot); 1854 old_bytenr = btrfs_node_blockptr(parent, slot);
1853 blocksize = fs_info->nodesize; 1855 blocksize = fs_info->nodesize;
1854 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1856 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1857 btrfs_node_key_to_cpu(parent, &key, slot);
1855 1858
1856 if (level <= max_level) { 1859 if (level <= max_level) {
1857 eb = path->nodes[level]; 1860 eb = path->nodes[level];
@@ -1876,7 +1879,8 @@ again:
1876 break; 1879 break;
1877 } 1880 }
1878 1881
1879 eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen); 1882 eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
1883 level - 1, &first_key);
1880 if (IS_ERR(eb)) { 1884 if (IS_ERR(eb)) {
1881 ret = PTR_ERR(eb); 1885 ret = PTR_ERR(eb);
1882 break; 1886 break;
@@ -2036,6 +2040,8 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
2036 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2040 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2037 2041
2038 for (i = *level; i > 0; i--) { 2042 for (i = *level; i > 0; i--) {
2043 struct btrfs_key first_key;
2044
2039 eb = path->nodes[i]; 2045 eb = path->nodes[i];
2040 nritems = btrfs_header_nritems(eb); 2046 nritems = btrfs_header_nritems(eb);
2041 while (path->slots[i] < nritems) { 2047 while (path->slots[i] < nritems) {
@@ -2056,7 +2062,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
2056 } 2062 }
2057 2063
2058 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 2064 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
2059 eb = read_tree_block(fs_info, bytenr, ptr_gen); 2065 btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
2066 eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
2067 &first_key);
2060 if (IS_ERR(eb)) { 2068 if (IS_ERR(eb)) {
2061 return PTR_ERR(eb); 2069 return PTR_ERR(eb);
2062 } else if (!extent_buffer_uptodate(eb)) { 2070 } else if (!extent_buffer_uptodate(eb)) {
@@ -2714,6 +2722,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2714 path->lowest_level = node->level + 1; 2722 path->lowest_level = node->level + 1;
2715 rc->backref_cache.path[node->level] = node; 2723 rc->backref_cache.path[node->level] = node;
2716 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2724 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2725 struct btrfs_key first_key;
2726
2717 cond_resched(); 2727 cond_resched();
2718 2728
2719 upper = edge->node[UPPER]; 2729 upper = edge->node[UPPER];
@@ -2779,7 +2789,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2779 2789
2780 blocksize = root->fs_info->nodesize; 2790 blocksize = root->fs_info->nodesize;
2781 generation = btrfs_node_ptr_generation(upper->eb, slot); 2791 generation = btrfs_node_ptr_generation(upper->eb, slot);
2782 eb = read_tree_block(fs_info, bytenr, generation); 2792 btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
2793 eb = read_tree_block(fs_info, bytenr, generation,
2794 upper->level - 1, &first_key);
2783 if (IS_ERR(eb)) { 2795 if (IS_ERR(eb)) {
2784 err = PTR_ERR(eb); 2796 err = PTR_ERR(eb);
2785 goto next; 2797 goto next;
@@ -2944,7 +2956,8 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2944 struct extent_buffer *eb; 2956 struct extent_buffer *eb;
2945 2957
2946 BUG_ON(block->key_ready); 2958 BUG_ON(block->key_ready);
2947 eb = read_tree_block(fs_info, block->bytenr, block->key.offset); 2959 eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
2960 block->level, NULL);
2948 if (IS_ERR(eb)) { 2961 if (IS_ERR(eb)) {
2949 return PTR_ERR(eb); 2962 return PTR_ERR(eb);
2950 } else if (!extent_buffer_uptodate(eb)) { 2963 } else if (!extent_buffer_uptodate(eb)) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 70afd1085033..c91babc6aa4b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -286,7 +286,7 @@ struct walk_control {
286 * inside it 286 * inside it
287 */ 287 */
288 int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, 288 int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
289 struct walk_control *wc, u64 gen); 289 struct walk_control *wc, u64 gen, int level);
290}; 290};
291 291
292/* 292/*
@@ -294,7 +294,7 @@ struct walk_control {
294 */ 294 */
295static int process_one_buffer(struct btrfs_root *log, 295static int process_one_buffer(struct btrfs_root *log,
296 struct extent_buffer *eb, 296 struct extent_buffer *eb,
297 struct walk_control *wc, u64 gen) 297 struct walk_control *wc, u64 gen, int level)
298{ 298{
299 struct btrfs_fs_info *fs_info = log->fs_info; 299 struct btrfs_fs_info *fs_info = log->fs_info;
300 int ret = 0; 300 int ret = 0;
@@ -304,7 +304,7 @@ static int process_one_buffer(struct btrfs_root *log,
304 * pin down any logged extents, so we have to read the block. 304 * pin down any logged extents, so we have to read the block.
305 */ 305 */
306 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { 306 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
307 ret = btrfs_read_buffer(eb, gen); 307 ret = btrfs_read_buffer(eb, gen, level, NULL);
308 if (ret) 308 if (ret)
309 return ret; 309 return ret;
310 } 310 }
@@ -2406,17 +2406,16 @@ out:
2406 * back refs). 2406 * back refs).
2407 */ 2407 */
2408static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, 2408static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2409 struct walk_control *wc, u64 gen) 2409 struct walk_control *wc, u64 gen, int level)
2410{ 2410{
2411 int nritems; 2411 int nritems;
2412 struct btrfs_path *path; 2412 struct btrfs_path *path;
2413 struct btrfs_root *root = wc->replay_dest; 2413 struct btrfs_root *root = wc->replay_dest;
2414 struct btrfs_key key; 2414 struct btrfs_key key;
2415 int level;
2416 int i; 2415 int i;
2417 int ret; 2416 int ret;
2418 2417
2419 ret = btrfs_read_buffer(eb, gen); 2418 ret = btrfs_read_buffer(eb, gen, level, NULL);
2420 if (ret) 2419 if (ret)
2421 return ret; 2420 return ret;
2422 2421
@@ -2533,6 +2532,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2533 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2532 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2534 2533
2535 while (*level > 0) { 2534 while (*level > 0) {
2535 struct btrfs_key first_key;
2536
2536 WARN_ON(*level < 0); 2537 WARN_ON(*level < 0);
2537 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2538 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2538 cur = path->nodes[*level]; 2539 cur = path->nodes[*level];
@@ -2545,6 +2546,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2545 2546
2546 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 2547 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2547 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 2548 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2549 btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
2548 blocksize = fs_info->nodesize; 2550 blocksize = fs_info->nodesize;
2549 2551
2550 parent = path->nodes[*level]; 2552 parent = path->nodes[*level];
@@ -2555,7 +2557,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2555 return PTR_ERR(next); 2557 return PTR_ERR(next);
2556 2558
2557 if (*level == 1) { 2559 if (*level == 1) {
2558 ret = wc->process_func(root, next, wc, ptr_gen); 2560 ret = wc->process_func(root, next, wc, ptr_gen,
2561 *level - 1);
2559 if (ret) { 2562 if (ret) {
2560 free_extent_buffer(next); 2563 free_extent_buffer(next);
2561 return ret; 2564 return ret;
@@ -2563,7 +2566,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2563 2566
2564 path->slots[*level]++; 2567 path->slots[*level]++;
2565 if (wc->free) { 2568 if (wc->free) {
2566 ret = btrfs_read_buffer(next, ptr_gen); 2569 ret = btrfs_read_buffer(next, ptr_gen,
2570 *level - 1, &first_key);
2567 if (ret) { 2571 if (ret) {
2568 free_extent_buffer(next); 2572 free_extent_buffer(next);
2569 return ret; 2573 return ret;
@@ -2593,7 +2597,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2593 free_extent_buffer(next); 2597 free_extent_buffer(next);
2594 continue; 2598 continue;
2595 } 2599 }
2596 ret = btrfs_read_buffer(next, ptr_gen); 2600 ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key);
2597 if (ret) { 2601 if (ret) {
2598 free_extent_buffer(next); 2602 free_extent_buffer(next);
2599 return ret; 2603 return ret;
@@ -2643,7 +2647,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2643 2647
2644 root_owner = btrfs_header_owner(parent); 2648 root_owner = btrfs_header_owner(parent);
2645 ret = wc->process_func(root, path->nodes[*level], wc, 2649 ret = wc->process_func(root, path->nodes[*level], wc,
2646 btrfs_header_generation(path->nodes[*level])); 2650 btrfs_header_generation(path->nodes[*level]),
2651 *level);
2647 if (ret) 2652 if (ret)
2648 return ret; 2653 return ret;
2649 2654
@@ -2725,7 +2730,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2725 /* was the root node processed? if not, catch it here */ 2730 /* was the root node processed? if not, catch it here */
2726 if (path->nodes[orig_level]) { 2731 if (path->nodes[orig_level]) {
2727 ret = wc->process_func(log, path->nodes[orig_level], wc, 2732 ret = wc->process_func(log, path->nodes[orig_level], wc,
2728 btrfs_header_generation(path->nodes[orig_level])); 2733 btrfs_header_generation(path->nodes[orig_level]),
2734 orig_level);
2729 if (ret) 2735 if (ret)
2730 goto out; 2736 goto out;
2731 if (wc->free) { 2737 if (wc->free) {