aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 17:36:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 17:36:00 -0500
commit2b9fb532d4168e8974fe49709e2c4c8d5352a64c (patch)
tree610cbe2d1bb32e28db135a767f158ade31452e2e /fs/btrfs/disk-io.c
parent4533f6e27a366ecc3da4876074ebfe0cc0ea4f0f (diff)
parenta742994aa2e271eb8cd8e043d276515ec858ed73 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This pull is mostly cleanups and fixes: - The raid5/6 cleanups from Zhao Lei fixup some long standing warts in the code and add improvements on top of the scrubbing support from 3.19. - Josef has round one of our ENOSPC fixes coming from large btrfs clusters here at FB. - Dave Sterba continues a long series of cleanups (thanks Dave), and Filipe continues hammering on corner cases in fsync and others This all was held up a little trying to track down a use-after-free in btrfs raid5/6. It's not clear yet if this is just made easier to trigger with this pull or if its a new bug from the raid5/6 cleanups. Dave Sterba is the only one to trigger it so far, but he has a consistent way to reproduce, so we'll get it nailed shortly" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits) Btrfs: don't remove extents and xattrs when logging new names Btrfs: fix fsync data loss after adding hard link to inode Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group Btrfs: account for large extents with enospc Btrfs: don't set and clear delalloc for O_DIRECT writes Btrfs: only adjust outstanding_extents when we do a short write btrfs: Fix out-of-space bug Btrfs: scrub, fix sleep in atomic context Btrfs: fix scheduler warning when syncing log Btrfs: Remove unnecessary placeholder in btrfs_err_code btrfs: cleanup init for list in free-space-cache btrfs: delete chunk allocation attemp when setting block group ro btrfs: clear bio reference after submit_one_bio() Btrfs: fix scrub race leading to use-after-free Btrfs: add missing cleanup on sysfs init failure Btrfs: fix race between transaction commit and empty block group removal btrfs: add more checks to btrfs_read_sys_array btrfs: cleanup, rename a few variables in btrfs_read_sys_array btrfs: add checks for sys_chunk_array sizes btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize ...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c102
1 files changed, 75 insertions, 27 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1afb18226da8..f79f38542a73 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
318 memcpy(&found, result, csum_size); 318 memcpy(&found, result, csum_size);
319 319
320 read_extent_buffer(buf, &val, 0, csum_size); 320 read_extent_buffer(buf, &val, 0, csum_size);
321 printk_ratelimited(KERN_INFO 321 printk_ratelimited(KERN_WARNING
322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X " 322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
323 "level %d\n", 323 "level %d\n",
324 root->fs_info->sb->s_id, buf->start, 324 root->fs_info->sb->s_id, buf->start,
@@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
367 ret = 0; 367 ret = 0;
368 goto out; 368 goto out;
369 } 369 }
370 printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", 370 printk_ratelimited(KERN_ERR
371 "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
371 eb->fs_info->sb->s_id, eb->start, 372 eb->fs_info->sb->s_id, eb->start,
372 parent_transid, btrfs_header_generation(eb)); 373 parent_transid, btrfs_header_generation(eb));
373 ret = 1; 374 ret = 1;
@@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
633 634
634 found_start = btrfs_header_bytenr(eb); 635 found_start = btrfs_header_bytenr(eb);
635 if (found_start != eb->start) { 636 if (found_start != eb->start) {
636 printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " 637 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
637 "%llu %llu\n", 638 "%llu %llu\n",
638 eb->fs_info->sb->s_id, found_start, eb->start); 639 eb->fs_info->sb->s_id, found_start, eb->start);
639 ret = -EIO; 640 ret = -EIO;
640 goto err; 641 goto err;
641 } 642 }
642 if (check_tree_block_fsid(root, eb)) { 643 if (check_tree_block_fsid(root, eb)) {
643 printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", 644 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
644 eb->fs_info->sb->s_id, eb->start); 645 eb->fs_info->sb->s_id, eb->start);
645 ret = -EIO; 646 ret = -EIO;
646 goto err; 647 goto err;
647 } 648 }
648 found_level = btrfs_header_level(eb); 649 found_level = btrfs_header_level(eb);
649 if (found_level >= BTRFS_MAX_LEVEL) { 650 if (found_level >= BTRFS_MAX_LEVEL) {
650 btrfs_info(root->fs_info, "bad tree block level %d", 651 btrfs_err(root->fs_info, "bad tree block level %d",
651 (int)btrfs_header_level(eb)); 652 (int)btrfs_header_level(eb));
652 ret = -EIO; 653 ret = -EIO;
653 goto err; 654 goto err;
@@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
1073 .set_page_dirty = btree_set_page_dirty, 1074 .set_page_dirty = btree_set_page_dirty,
1074}; 1075};
1075 1076
1076void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) 1077void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
1077{ 1078{
1078 struct extent_buffer *buf = NULL; 1079 struct extent_buffer *buf = NULL;
1079 struct inode *btree_inode = root->fs_info->btree_inode; 1080 struct inode *btree_inode = root->fs_info->btree_inode;
1080 1081
1081 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 1082 buf = btrfs_find_create_tree_block(root, bytenr);
1082 if (!buf) 1083 if (!buf)
1083 return; 1084 return;
1084 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 1085 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
1086 free_extent_buffer(buf); 1087 free_extent_buffer(buf);
1087} 1088}
1088 1089
1089int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, 1090int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
1090 int mirror_num, struct extent_buffer **eb) 1091 int mirror_num, struct extent_buffer **eb)
1091{ 1092{
1092 struct extent_buffer *buf = NULL; 1093 struct extent_buffer *buf = NULL;
@@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1094 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; 1095 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
1095 int ret; 1096 int ret;
1096 1097
1097 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 1098 buf = btrfs_find_create_tree_block(root, bytenr);
1098 if (!buf) 1099 if (!buf)
1099 return 0; 1100 return 0;
1100 1101
@@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
1125} 1126}
1126 1127
1127struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1128struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1128 u64 bytenr, u32 blocksize) 1129 u64 bytenr)
1129{ 1130{
1130 if (btrfs_test_is_dummy_root(root)) 1131 if (btrfs_test_is_dummy_root(root))
1131 return alloc_test_extent_buffer(root->fs_info, bytenr, 1132 return alloc_test_extent_buffer(root->fs_info, bytenr);
1132 blocksize); 1133 return alloc_extent_buffer(root->fs_info, bytenr);
1133 return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
1134} 1134}
1135 1135
1136 1136
@@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1152 struct extent_buffer *buf = NULL; 1152 struct extent_buffer *buf = NULL;
1153 int ret; 1153 int ret;
1154 1154
1155 buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); 1155 buf = btrfs_find_create_tree_block(root, bytenr);
1156 if (!buf) 1156 if (!buf)
1157 return NULL; 1157 return NULL;
1158 1158
@@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
1275 memset(&root->root_key, 0, sizeof(root->root_key)); 1275 memset(&root->root_key, 0, sizeof(root->root_key));
1276 memset(&root->root_item, 0, sizeof(root->root_item)); 1276 memset(&root->root_item, 0, sizeof(root->root_item));
1277 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 1277 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
1278 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
1279 if (fs_info) 1278 if (fs_info)
1280 root->defrag_trans_start = fs_info->generation; 1279 root->defrag_trans_start = fs_info->generation;
1281 else 1280 else
1282 root->defrag_trans_start = 0; 1281 root->defrag_trans_start = 0;
1283 init_completion(&root->kobj_unregister);
1284 root->root_key.objectid = objectid; 1282 root->root_key.objectid = objectid;
1285 root->anon_dev = 0; 1283 root->anon_dev = 0;
1286 1284
@@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
1630 bool check_ref) 1628 bool check_ref)
1631{ 1629{
1632 struct btrfs_root *root; 1630 struct btrfs_root *root;
1631 struct btrfs_path *path;
1632 struct btrfs_key key;
1633 int ret; 1633 int ret;
1634 1634
1635 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) 1635 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@@ -1669,8 +1669,17 @@ again:
1669 if (ret) 1669 if (ret)
1670 goto fail; 1670 goto fail;
1671 1671
1672 ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID, 1672 path = btrfs_alloc_path();
1673 location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL); 1673 if (!path) {
1674 ret = -ENOMEM;
1675 goto fail;
1676 }
1677 key.objectid = BTRFS_ORPHAN_OBJECTID;
1678 key.type = BTRFS_ORPHAN_ITEM_KEY;
1679 key.offset = location->objectid;
1680
1681 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
1682 btrfs_free_path(path);
1674 if (ret < 0) 1683 if (ret < 0)
1675 goto fail; 1684 goto fail;
1676 if (ret == 0) 1685 if (ret == 0)
@@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
2232 spin_lock_init(&fs_info->qgroup_op_lock); 2241 spin_lock_init(&fs_info->qgroup_op_lock);
2233 spin_lock_init(&fs_info->buffer_lock); 2242 spin_lock_init(&fs_info->buffer_lock);
2234 spin_lock_init(&fs_info->unused_bgs_lock); 2243 spin_lock_init(&fs_info->unused_bgs_lock);
2244 mutex_init(&fs_info->unused_bg_unpin_mutex);
2235 rwlock_init(&fs_info->tree_mod_log_lock); 2245 rwlock_init(&fs_info->tree_mod_log_lock);
2236 mutex_init(&fs_info->reloc_mutex); 2246 mutex_init(&fs_info->reloc_mutex);
2237 mutex_init(&fs_info->delalloc_root_mutex); 2247 mutex_init(&fs_info->delalloc_root_mutex);
@@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
2496 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2506 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2497 2507
2498 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2508 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2499 printk(KERN_ERR "BTRFS: has skinny extents\n"); 2509 printk(KERN_INFO "BTRFS: has skinny extents\n");
2500 2510
2501 /* 2511 /*
2502 * flag our filesystem as having big metadata blocks if 2512 * flag our filesystem as having big metadata blocks if
@@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
2520 */ 2530 */
2521 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2531 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2522 (sectorsize != nodesize)) { 2532 (sectorsize != nodesize)) {
2523 printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " 2533 printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
2524 "are not allowed for mixed block groups on %s\n", 2534 "are not allowed for mixed block groups on %s\n",
2525 sb->s_id); 2535 sb->s_id);
2526 goto fail_alloc; 2536 goto fail_alloc;
@@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
2628 sb->s_blocksize_bits = blksize_bits(sectorsize); 2638 sb->s_blocksize_bits = blksize_bits(sectorsize);
2629 2639
2630 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { 2640 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2631 printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id); 2641 printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
2632 goto fail_sb_buffer; 2642 goto fail_sb_buffer;
2633 } 2643 }
2634 2644
2635 if (sectorsize != PAGE_SIZE) { 2645 if (sectorsize != PAGE_SIZE) {
2636 printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) " 2646 printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
2637 "found on %s\n", (unsigned long)sectorsize, sb->s_id); 2647 "found on %s\n", (unsigned long)sectorsize, sb->s_id);
2638 goto fail_sb_buffer; 2648 goto fail_sb_buffer;
2639 } 2649 }
@@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
2642 ret = btrfs_read_sys_array(tree_root); 2652 ret = btrfs_read_sys_array(tree_root);
2643 mutex_unlock(&fs_info->chunk_mutex); 2653 mutex_unlock(&fs_info->chunk_mutex);
2644 if (ret) { 2654 if (ret) {
2645 printk(KERN_WARNING "BTRFS: failed to read the system " 2655 printk(KERN_ERR "BTRFS: failed to read the system "
2646 "array on %s\n", sb->s_id); 2656 "array on %s\n", sb->s_id);
2647 goto fail_sb_buffer; 2657 goto fail_sb_buffer;
2648 } 2658 }
@@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
2657 generation); 2667 generation);
2658 if (!chunk_root->node || 2668 if (!chunk_root->node ||
2659 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2669 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2660 printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", 2670 printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
2661 sb->s_id); 2671 sb->s_id);
2662 goto fail_tree_roots; 2672 goto fail_tree_roots;
2663 } 2673 }
@@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
2669 2679
2670 ret = btrfs_read_chunk_tree(chunk_root); 2680 ret = btrfs_read_chunk_tree(chunk_root);
2671 if (ret) { 2681 if (ret) {
2672 printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n", 2682 printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
2673 sb->s_id); 2683 sb->s_id);
2674 goto fail_tree_roots; 2684 goto fail_tree_roots;
2675 } 2685 }
@@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
2681 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2691 btrfs_close_extra_devices(fs_info, fs_devices, 0);
2682 2692
2683 if (!fs_devices->latest_bdev) { 2693 if (!fs_devices->latest_bdev) {
2684 printk(KERN_CRIT "BTRFS: failed to read devices on %s\n", 2694 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
2685 sb->s_id); 2695 sb->s_id);
2686 goto fail_tree_roots; 2696 goto fail_tree_roots;
2687 } 2697 }
@@ -2765,7 +2775,7 @@ retry_root_backup:
2765 2775
2766 ret = btrfs_recover_balance(fs_info); 2776 ret = btrfs_recover_balance(fs_info);
2767 if (ret) { 2777 if (ret) {
2768 printk(KERN_WARNING "BTRFS: failed to recover balance\n"); 2778 printk(KERN_ERR "BTRFS: failed to recover balance\n");
2769 goto fail_block_groups; 2779 goto fail_block_groups;
2770 } 2780 }
2771 2781
@@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3860 printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", 3870 printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
3861 btrfs_super_log_root(sb)); 3871 btrfs_super_log_root(sb));
3862 3872
3873 /*
3874 * Check the lower bound, the alignment and other constraints are
3875 * checked later.
3876 */
3877 if (btrfs_super_nodesize(sb) < 4096) {
3878 printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
3879 btrfs_super_nodesize(sb));
3880 ret = -EINVAL;
3881 }
3882 if (btrfs_super_sectorsize(sb) < 4096) {
3883 printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
3884 btrfs_super_sectorsize(sb));
3885 ret = -EINVAL;
3886 }
3887
3863 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { 3888 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
3864 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", 3889 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
3865 fs_info->fsid, sb->dev_item.fsid); 3890 fs_info->fsid, sb->dev_item.fsid);
@@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3873 if (btrfs_super_num_devices(sb) > (1UL << 31)) 3898 if (btrfs_super_num_devices(sb) > (1UL << 31))
3874 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", 3899 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
3875 btrfs_super_num_devices(sb)); 3900 btrfs_super_num_devices(sb));
3901 if (btrfs_super_num_devices(sb) == 0) {
3902 printk(KERN_ERR "BTRFS: number of devices is 0\n");
3903 ret = -EINVAL;
3904 }
3876 3905
3877 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { 3906 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
3878 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", 3907 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@@ -3881,6 +3910,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3881 } 3910 }
3882 3911
3883 /* 3912 /*
3913 * Obvious sys_chunk_array corruptions, it must hold at least one key
3914 * and one chunk
3915 */
3916 if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
3917 printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
3918 btrfs_super_sys_array_size(sb),
3919 BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
3920 ret = -EINVAL;
3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk));
3928 ret = -EINVAL;
3929 }
3930
3931 /*
3884 * The generation is a global counter, we'll trust it more than the others 3932 * The generation is a global counter, we'll trust it more than the others
3885 * but it's still possible that it's the one that's wrong. 3933 * but it's still possible that it's the one that's wrong.
3886 */ 3934 */