aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c211
1 files changed, 125 insertions, 86 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 100b07f021b4..d7a7315bd031 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <asm/unaligned.h>
32#include "compat.h" 33#include "compat.h"
33#include "ctree.h" 34#include "ctree.h"
34#include "disk-io.h" 35#include "disk-io.h"
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
198 199
199void btrfs_csum_final(u32 crc, char *result) 200void btrfs_csum_final(u32 crc, char *result)
200{ 201{
201 *(__le32 *)result = ~cpu_to_le32(crc); 202 put_unaligned_le32(~crc, result);
202} 203}
203 204
204/* 205/*
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
323 int num_copies = 0; 324 int num_copies = 0;
324 int mirror_num = 0; 325 int mirror_num = 0;
325 326
327 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 328 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
327 while (1) { 329 while (1) {
328 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 330 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
331 !verify_parent_transid(io_tree, eb, parent_transid)) 333 !verify_parent_transid(io_tree, eb, parent_transid))
332 return ret; 334 return ret;
333 335
336 /*
337 * This buffer's crc is fine, but its contents are corrupted, so
338 * there is no reason to read the other copies, they won't be
339 * any less wrong.
340 */
341 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
342 return ret;
343
334 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 344 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
335 eb->start, eb->len); 345 eb->start, eb->len);
336 if (num_copies == 1) 346 if (num_copies == 1)
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
419 return ret; 429 return ret;
420} 430}
421 431
432#define CORRUPT(reason, eb, root, slot) \
433 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
434 "root=%llu, slot=%d\n", reason, \
435 (unsigned long long)btrfs_header_bytenr(eb), \
436 (unsigned long long)root->objectid, slot)
437
438static noinline int check_leaf(struct btrfs_root *root,
439 struct extent_buffer *leaf)
440{
441 struct btrfs_key key;
442 struct btrfs_key leaf_key;
443 u32 nritems = btrfs_header_nritems(leaf);
444 int slot;
445
446 if (nritems == 0)
447 return 0;
448
449 /* Check the 0 item */
450 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
451 BTRFS_LEAF_DATA_SIZE(root)) {
452 CORRUPT("invalid item offset size pair", leaf, root, 0);
453 return -EIO;
454 }
455
456 /*
457 * Check to make sure each items keys are in the correct order and their
458 * offsets make sense. We only have to loop through nritems-1 because
459 * we check the current slot against the next slot, which verifies the
460 * next slot's offset+size makes sense and that the current's slot
461 * offset is correct.
462 */
463 for (slot = 0; slot < nritems - 1; slot++) {
464 btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
465 btrfs_item_key_to_cpu(leaf, &key, slot + 1);
466
467 /* Make sure the keys are in the right order */
468 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
469 CORRUPT("bad key order", leaf, root, slot);
470 return -EIO;
471 }
472
473 /*
474 * Make sure the offset and ends are right, remember that the
475 * item data starts at the end of the leaf and grows towards the
476 * front.
477 */
478 if (btrfs_item_offset_nr(leaf, slot) !=
479 btrfs_item_end_nr(leaf, slot + 1)) {
480 CORRUPT("slot offset bad", leaf, root, slot);
481 return -EIO;
482 }
483
484 /*
485 * Check to make sure that we don't point outside of the leaf,
486 * just incase all the items are consistent to eachother, but
487 * all point outside of the leaf.
488 */
489 if (btrfs_item_end_nr(leaf, slot) >
490 BTRFS_LEAF_DATA_SIZE(root)) {
491 CORRUPT("slot end outside of leaf", leaf, root, slot);
492 return -EIO;
493 }
494 }
495
496 return 0;
497}
498
422#ifdef CONFIG_DEBUG_LOCK_ALLOC 499#ifdef CONFIG_DEBUG_LOCK_ALLOC
423void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 500void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
424{ 501{
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
485 btrfs_set_buffer_lockdep_class(eb, found_level); 562 btrfs_set_buffer_lockdep_class(eb, found_level);
486 563
487 ret = csum_tree_block(root, eb, 1); 564 ret = csum_tree_block(root, eb, 1);
488 if (ret) 565 if (ret) {
566 ret = -EIO;
567 goto err;
568 }
569
570 /*
571 * If this is a leaf block and it is corrupt, set the corrupt bit so
572 * that we don't try and read the other copies of this block, just
573 * return -EIO.
574 */
575 if (found_level == 0 && check_leaf(root, eb)) {
576 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
489 ret = -EIO; 577 ret = -EIO;
578 }
490 579
491 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 580 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
492 end = eb->start + end - 1; 581 end = eb->start + end - 1;
@@ -847,7 +936,6 @@ static const struct address_space_operations btree_aops = {
847 .writepages = btree_writepages, 936 .writepages = btree_writepages,
848 .releasepage = btree_releasepage, 937 .releasepage = btree_releasepage,
849 .invalidatepage = btree_invalidatepage, 938 .invalidatepage = btree_invalidatepage,
850 .sync_page = block_sync_page,
851#ifdef CONFIG_MIGRATION 939#ifdef CONFIG_MIGRATION
852 .migratepage = btree_migratepage, 940 .migratepage = btree_migratepage,
853#endif 941#endif
@@ -1160,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1160 root, fs_info, location->objectid); 1248 root, fs_info, location->objectid);
1161 1249
1162 path = btrfs_alloc_path(); 1250 path = btrfs_alloc_path();
1163 BUG_ON(!path); 1251 if (!path) {
1252 kfree(root);
1253 return ERR_PTR(-ENOMEM);
1254 }
1164 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 1255 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1165 if (ret == 0) { 1256 if (ret == 0) {
1166 l = path->nodes[0]; 1257 l = path->nodes[0];
@@ -1331,82 +1422,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1331} 1422}
1332 1423
1333/* 1424/*
1334 * this unplugs every device on the box, and it is only used when page
1335 * is null
1336 */
1337static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1338{
1339 struct btrfs_device *device;
1340 struct btrfs_fs_info *info;
1341
1342 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1343 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1344 if (!device->bdev)
1345 continue;
1346
1347 bdi = blk_get_backing_dev_info(device->bdev);
1348 if (bdi->unplug_io_fn)
1349 bdi->unplug_io_fn(bdi, page);
1350 }
1351}
1352
1353static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1354{
1355 struct inode *inode;
1356 struct extent_map_tree *em_tree;
1357 struct extent_map *em;
1358 struct address_space *mapping;
1359 u64 offset;
1360
1361 /* the generic O_DIRECT read code does this */
1362 if (1 || !page) {
1363 __unplug_io_fn(bdi, page);
1364 return;
1365 }
1366
1367 /*
1368 * page->mapping may change at any time. Get a consistent copy
1369 * and use that for everything below
1370 */
1371 smp_mb();
1372 mapping = page->mapping;
1373 if (!mapping)
1374 return;
1375
1376 inode = mapping->host;
1377
1378 /*
1379 * don't do the expensive searching for a small number of
1380 * devices
1381 */
1382 if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1383 __unplug_io_fn(bdi, page);
1384 return;
1385 }
1386
1387 offset = page_offset(page);
1388
1389 em_tree = &BTRFS_I(inode)->extent_tree;
1390 read_lock(&em_tree->lock);
1391 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1392 read_unlock(&em_tree->lock);
1393 if (!em) {
1394 __unplug_io_fn(bdi, page);
1395 return;
1396 }
1397
1398 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1399 free_extent_map(em);
1400 __unplug_io_fn(bdi, page);
1401 return;
1402 }
1403 offset = offset - em->start;
1404 btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1405 em->block_start + offset, page);
1406 free_extent_map(em);
1407}
1408
1409/*
1410 * If this fails, caller must call bdi_destroy() to get rid of the 1425 * If this fails, caller must call bdi_destroy() to get rid of the
1411 * bdi again. 1426 * bdi again.
1412 */ 1427 */
@@ -1420,8 +1435,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1420 return err; 1435 return err;
1421 1436
1422 bdi->ra_pages = default_backing_dev_info.ra_pages; 1437 bdi->ra_pages = default_backing_dev_info.ra_pages;
1423 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1424 bdi->unplug_io_data = info;
1425 bdi->congested_fn = btrfs_congested_fn; 1438 bdi->congested_fn = btrfs_congested_fn;
1426 bdi->congested_data = info; 1439 bdi->congested_data = info;
1427 return 0; 1440 return 0;
@@ -1632,6 +1645,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1632 goto fail_bdi; 1645 goto fail_bdi;
1633 } 1646 }
1634 1647
1648 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
1649
1635 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1650 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1636 INIT_LIST_HEAD(&fs_info->trans_list); 1651 INIT_LIST_HEAD(&fs_info->trans_list);
1637 INIT_LIST_HEAD(&fs_info->dead_roots); 1652 INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -1762,6 +1777,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1762 1777
1763 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1778 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1764 1779
1780 /*
1781 * In the long term, we'll store the compression type in the super
1782 * block, and it'll be used for per file compression control.
1783 */
1784 fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
1785
1765 ret = btrfs_parse_options(tree_root, options); 1786 ret = btrfs_parse_options(tree_root, options);
1766 if (ret) { 1787 if (ret) {
1767 err = ret; 1788 err = ret;
@@ -1967,6 +1988,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1967 fs_info->metadata_alloc_profile = (u64)-1; 1988 fs_info->metadata_alloc_profile = (u64)-1;
1968 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1989 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1969 1990
1991 ret = btrfs_init_space_info(fs_info);
1992 if (ret) {
1993 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
1994 goto fail_block_groups;
1995 }
1996
1970 ret = btrfs_read_block_groups(extent_root); 1997 ret = btrfs_read_block_groups(extent_root);
1971 if (ret) { 1998 if (ret) {
1972 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 1999 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@ -2058,9 +2085,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2058 2085
2059 if (!(sb->s_flags & MS_RDONLY)) { 2086 if (!(sb->s_flags & MS_RDONLY)) {
2060 down_read(&fs_info->cleanup_work_sem); 2087 down_read(&fs_info->cleanup_work_sem);
2061 btrfs_orphan_cleanup(fs_info->fs_root); 2088 err = btrfs_orphan_cleanup(fs_info->fs_root);
2062 btrfs_orphan_cleanup(fs_info->tree_root); 2089 if (!err)
2090 err = btrfs_orphan_cleanup(fs_info->tree_root);
2063 up_read(&fs_info->cleanup_work_sem); 2091 up_read(&fs_info->cleanup_work_sem);
2092 if (err) {
2093 close_ctree(tree_root);
2094 return ERR_PTR(err);
2095 }
2064 } 2096 }
2065 2097
2066 return tree_root; 2098 return tree_root;
@@ -2435,8 +2467,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2435 2467
2436 root_objectid = gang[ret - 1]->root_key.objectid + 1; 2468 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2437 for (i = 0; i < ret; i++) { 2469 for (i = 0; i < ret; i++) {
2470 int err;
2471
2438 root_objectid = gang[i]->root_key.objectid; 2472 root_objectid = gang[i]->root_key.objectid;
2439 btrfs_orphan_cleanup(gang[i]); 2473 err = btrfs_orphan_cleanup(gang[i]);
2474 if (err)
2475 return err;
2440 } 2476 }
2441 root_objectid++; 2477 root_objectid++;
2442 } 2478 }
@@ -2947,7 +2983,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2947 break; 2983 break;
2948 2984
2949 /* opt_discard */ 2985 /* opt_discard */
2950 ret = btrfs_error_discard_extent(root, start, end + 1 - start); 2986 if (btrfs_test_opt(root, DISCARD))
2987 ret = btrfs_error_discard_extent(root, start,
2988 end + 1 - start,
2989 NULL);
2951 2990
2952 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2991 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2953 btrfs_error_unpin_extent_range(root, start, end); 2992 btrfs_error_unpin_extent_range(root, start, end);