aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c234
1 files changed, 143 insertions, 91 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36455d8..68c84c8c24bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <asm/unaligned.h>
32#include "compat.h" 33#include "compat.h"
33#include "ctree.h" 34#include "ctree.h"
34#include "disk-io.h" 35#include "disk-io.h"
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
198 199
199void btrfs_csum_final(u32 crc, char *result) 200void btrfs_csum_final(u32 crc, char *result)
200{ 201{
201 *(__le32 *)result = ~cpu_to_le32(crc); 202 put_unaligned_le32(~crc, result);
202} 203}
203 204
204/* 205/*
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
323 int num_copies = 0; 324 int num_copies = 0;
324 int mirror_num = 0; 325 int mirror_num = 0;
325 326
327 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 328 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
327 while (1) { 329 while (1) {
328 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 330 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
331 !verify_parent_transid(io_tree, eb, parent_transid)) 333 !verify_parent_transid(io_tree, eb, parent_transid))
332 return ret; 334 return ret;
333 335
336 /*
337 * This buffer's crc is fine, but its contents are corrupted, so
338 * there is no reason to read the other copies, they won't be
339 * any less wrong.
340 */
341 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
342 return ret;
343
334 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 344 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
335 eb->start, eb->len); 345 eb->start, eb->len);
336 if (num_copies == 1) 346 if (num_copies == 1)
@@ -359,10 +369,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
359 369
360 tree = &BTRFS_I(page->mapping->host)->io_tree; 370 tree = &BTRFS_I(page->mapping->host)->io_tree;
361 371
362 if (page->private == EXTENT_PAGE_PRIVATE) 372 if (page->private == EXTENT_PAGE_PRIVATE) {
373 WARN_ON(1);
363 goto out; 374 goto out;
364 if (!page->private) 375 }
376 if (!page->private) {
377 WARN_ON(1);
365 goto out; 378 goto out;
379 }
366 len = page->private >> 2; 380 len = page->private >> 2;
367 WARN_ON(len == 0); 381 WARN_ON(len == 0);
368 382
@@ -415,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
415 return ret; 429 return ret;
416} 430}
417 431
432#define CORRUPT(reason, eb, root, slot) \
433 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
434 "root=%llu, slot=%d\n", reason, \
435 (unsigned long long)btrfs_header_bytenr(eb), \
436 (unsigned long long)root->objectid, slot)
437
438static noinline int check_leaf(struct btrfs_root *root,
439 struct extent_buffer *leaf)
440{
441 struct btrfs_key key;
442 struct btrfs_key leaf_key;
443 u32 nritems = btrfs_header_nritems(leaf);
444 int slot;
445
446 if (nritems == 0)
447 return 0;
448
449 /* Check the 0 item */
450 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
451 BTRFS_LEAF_DATA_SIZE(root)) {
452 CORRUPT("invalid item offset size pair", leaf, root, 0);
453 return -EIO;
454 }
455
456 /*
457 * Check to make sure each items keys are in the correct order and their
458 * offsets make sense. We only have to loop through nritems-1 because
459 * we check the current slot against the next slot, which verifies the
460 * next slot's offset+size makes sense and that the current's slot
461 * offset is correct.
462 */
463 for (slot = 0; slot < nritems - 1; slot++) {
464 btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
465 btrfs_item_key_to_cpu(leaf, &key, slot + 1);
466
467 /* Make sure the keys are in the right order */
468 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
469 CORRUPT("bad key order", leaf, root, slot);
470 return -EIO;
471 }
472
473 /*
474 * Make sure the offset and ends are right, remember that the
475 * item data starts at the end of the leaf and grows towards the
476 * front.
477 */
478 if (btrfs_item_offset_nr(leaf, slot) !=
479 btrfs_item_end_nr(leaf, slot + 1)) {
480 CORRUPT("slot offset bad", leaf, root, slot);
481 return -EIO;
482 }
483
484 /*
485 * Check to make sure that we don't point outside of the leaf,
486 * just incase all the items are consistent to eachother, but
487 * all point outside of the leaf.
488 */
489 if (btrfs_item_end_nr(leaf, slot) >
490 BTRFS_LEAF_DATA_SIZE(root)) {
491 CORRUPT("slot end outside of leaf", leaf, root, slot);
492 return -EIO;
493 }
494 }
495
496 return 0;
497}
498
418#ifdef CONFIG_DEBUG_LOCK_ALLOC 499#ifdef CONFIG_DEBUG_LOCK_ALLOC
419void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 500void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
420{ 501{
@@ -481,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
481 btrfs_set_buffer_lockdep_class(eb, found_level); 562 btrfs_set_buffer_lockdep_class(eb, found_level);
482 563
483 ret = csum_tree_block(root, eb, 1); 564 ret = csum_tree_block(root, eb, 1);
484 if (ret) 565 if (ret) {
566 ret = -EIO;
567 goto err;
568 }
569
570 /*
571 * If this is a leaf block and it is corrupt, set the corrupt bit so
572 * that we don't try and read the other copies of this block, just
573 * return -EIO.
574 */
575 if (found_level == 0 && check_leaf(root, eb)) {
576 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
485 ret = -EIO; 577 ret = -EIO;
578 }
486 579
487 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 580 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
488 end = eb->start + end - 1; 581 end = eb->start + end - 1;
@@ -843,7 +936,6 @@ static const struct address_space_operations btree_aops = {
843 .writepages = btree_writepages, 936 .writepages = btree_writepages,
844 .releasepage = btree_releasepage, 937 .releasepage = btree_releasepage,
845 .invalidatepage = btree_invalidatepage, 938 .invalidatepage = btree_invalidatepage,
846 .sync_page = block_sync_page,
847#ifdef CONFIG_MIGRATION 939#ifdef CONFIG_MIGRATION
848 .migratepage = btree_migratepage, 940 .migratepage = btree_migratepage,
849#endif 941#endif
@@ -1156,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1156 root, fs_info, location->objectid); 1248 root, fs_info, location->objectid);
1157 1249
1158 path = btrfs_alloc_path(); 1250 path = btrfs_alloc_path();
1159 BUG_ON(!path); 1251 if (!path) {
1252 kfree(root);
1253 return ERR_PTR(-ENOMEM);
1254 }
1160 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 1255 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1161 if (ret == 0) { 1256 if (ret == 0) {
1162 l = path->nodes[0]; 1257 l = path->nodes[0];
@@ -1180,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1180 root->commit_root = btrfs_root_node(root); 1275 root->commit_root = btrfs_root_node(root);
1181 BUG_ON(!root->node); 1276 BUG_ON(!root->node);
1182out: 1277out:
1183 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) 1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1184 root->ref_cows = 1; 1279 root->ref_cows = 1;
1280 btrfs_check_and_init_root_item(&root->root_item);
1281 }
1185 1282
1186 return root; 1283 return root;
1187} 1284}
@@ -1327,82 +1424,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1327} 1424}
1328 1425
1329/* 1426/*
1330 * this unplugs every device on the box, and it is only used when page
1331 * is null
1332 */
1333static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1334{
1335 struct btrfs_device *device;
1336 struct btrfs_fs_info *info;
1337
1338 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1339 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1340 if (!device->bdev)
1341 continue;
1342
1343 bdi = blk_get_backing_dev_info(device->bdev);
1344 if (bdi->unplug_io_fn)
1345 bdi->unplug_io_fn(bdi, page);
1346 }
1347}
1348
1349static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1350{
1351 struct inode *inode;
1352 struct extent_map_tree *em_tree;
1353 struct extent_map *em;
1354 struct address_space *mapping;
1355 u64 offset;
1356
1357 /* the generic O_DIRECT read code does this */
1358 if (1 || !page) {
1359 __unplug_io_fn(bdi, page);
1360 return;
1361 }
1362
1363 /*
1364 * page->mapping may change at any time. Get a consistent copy
1365 * and use that for everything below
1366 */
1367 smp_mb();
1368 mapping = page->mapping;
1369 if (!mapping)
1370 return;
1371
1372 inode = mapping->host;
1373
1374 /*
1375 * don't do the expensive searching for a small number of
1376 * devices
1377 */
1378 if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1379 __unplug_io_fn(bdi, page);
1380 return;
1381 }
1382
1383 offset = page_offset(page);
1384
1385 em_tree = &BTRFS_I(inode)->extent_tree;
1386 read_lock(&em_tree->lock);
1387 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1388 read_unlock(&em_tree->lock);
1389 if (!em) {
1390 __unplug_io_fn(bdi, page);
1391 return;
1392 }
1393
1394 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1395 free_extent_map(em);
1396 __unplug_io_fn(bdi, page);
1397 return;
1398 }
1399 offset = offset - em->start;
1400 btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1401 em->block_start + offset, page);
1402 free_extent_map(em);
1403}
1404
1405/*
1406 * If this fails, caller must call bdi_destroy() to get rid of the 1427 * If this fails, caller must call bdi_destroy() to get rid of the
1407 * bdi again. 1428 * bdi again.
1408 */ 1429 */
@@ -1416,8 +1437,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1416 return err; 1437 return err;
1417 1438
1418 bdi->ra_pages = default_backing_dev_info.ra_pages; 1439 bdi->ra_pages = default_backing_dev_info.ra_pages;
1419 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1420 bdi->unplug_io_data = info;
1421 bdi->congested_fn = btrfs_congested_fn; 1440 bdi->congested_fn = btrfs_congested_fn;
1422 bdi->congested_data = info; 1441 bdi->congested_data = info;
1423 return 0; 1442 return 0;
@@ -1550,6 +1569,7 @@ static int transaction_kthread(void *arg)
1550 spin_unlock(&root->fs_info->new_trans_lock); 1569 spin_unlock(&root->fs_info->new_trans_lock);
1551 1570
1552 trans = btrfs_join_transaction(root, 1); 1571 trans = btrfs_join_transaction(root, 1);
1572 BUG_ON(IS_ERR(trans));
1553 if (transid == trans->transid) { 1573 if (transid == trans->transid) {
1554 ret = btrfs_commit_transaction(trans, root); 1574 ret = btrfs_commit_transaction(trans, root);
1555 BUG_ON(ret); 1575 BUG_ON(ret);
@@ -1627,6 +1647,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1627 goto fail_bdi; 1647 goto fail_bdi;
1628 } 1648 }
1629 1649
1650 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
1651
1630 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1652 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1631 INIT_LIST_HEAD(&fs_info->trans_list); 1653 INIT_LIST_HEAD(&fs_info->trans_list);
1632 INIT_LIST_HEAD(&fs_info->dead_roots); 1654 INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -1757,6 +1779,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1757 1779
1758 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1780 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1759 1781
1782 /*
1783 * In the long term, we'll store the compression type in the super
1784 * block, and it'll be used for per file compression control.
1785 */
1786 fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
1787
1760 ret = btrfs_parse_options(tree_root, options); 1788 ret = btrfs_parse_options(tree_root, options);
1761 if (ret) { 1789 if (ret) {
1762 err = ret; 1790 err = ret;
@@ -1962,6 +1990,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1962 fs_info->metadata_alloc_profile = (u64)-1; 1990 fs_info->metadata_alloc_profile = (u64)-1;
1963 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1991 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1964 1992
1993 ret = btrfs_init_space_info(fs_info);
1994 if (ret) {
1995 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
1996 goto fail_block_groups;
1997 }
1998
1965 ret = btrfs_read_block_groups(extent_root); 1999 ret = btrfs_read_block_groups(extent_root);
1966 if (ret) { 2000 if (ret) {
1967 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 2001 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@ -2053,9 +2087,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2053 2087
2054 if (!(sb->s_flags & MS_RDONLY)) { 2088 if (!(sb->s_flags & MS_RDONLY)) {
2055 down_read(&fs_info->cleanup_work_sem); 2089 down_read(&fs_info->cleanup_work_sem);
2056 btrfs_orphan_cleanup(fs_info->fs_root); 2090 err = btrfs_orphan_cleanup(fs_info->fs_root);
2057 btrfs_orphan_cleanup(fs_info->tree_root); 2091 if (!err)
2092 err = btrfs_orphan_cleanup(fs_info->tree_root);
2058 up_read(&fs_info->cleanup_work_sem); 2093 up_read(&fs_info->cleanup_work_sem);
2094 if (err) {
2095 close_ctree(tree_root);
2096 return ERR_PTR(err);
2097 }
2059 } 2098 }
2060 2099
2061 return tree_root; 2100 return tree_root;
@@ -2430,8 +2469,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2430 2469
2431 root_objectid = gang[ret - 1]->root_key.objectid + 1; 2470 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2432 for (i = 0; i < ret; i++) { 2471 for (i = 0; i < ret; i++) {
2472 int err;
2473
2433 root_objectid = gang[i]->root_key.objectid; 2474 root_objectid = gang[i]->root_key.objectid;
2434 btrfs_orphan_cleanup(gang[i]); 2475 err = btrfs_orphan_cleanup(gang[i]);
2476 if (err)
2477 return err;
2435 } 2478 }
2436 root_objectid++; 2479 root_objectid++;
2437 } 2480 }
@@ -2453,10 +2496,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2453 up_write(&root->fs_info->cleanup_work_sem); 2496 up_write(&root->fs_info->cleanup_work_sem);
2454 2497
2455 trans = btrfs_join_transaction(root, 1); 2498 trans = btrfs_join_transaction(root, 1);
2499 if (IS_ERR(trans))
2500 return PTR_ERR(trans);
2456 ret = btrfs_commit_transaction(trans, root); 2501 ret = btrfs_commit_transaction(trans, root);
2457 BUG_ON(ret); 2502 BUG_ON(ret);
2458 /* run commit again to drop the original snapshot */ 2503 /* run commit again to drop the original snapshot */
2459 trans = btrfs_join_transaction(root, 1); 2504 trans = btrfs_join_transaction(root, 1);
2505 if (IS_ERR(trans))
2506 return PTR_ERR(trans);
2460 btrfs_commit_transaction(trans, root); 2507 btrfs_commit_transaction(trans, root);
2461 ret = btrfs_write_and_wait_transaction(NULL, root); 2508 ret = btrfs_write_and_wait_transaction(NULL, root);
2462 BUG_ON(ret); 2509 BUG_ON(ret);
@@ -2484,7 +2531,7 @@ int close_ctree(struct btrfs_root *root)
2484 * ERROR state on disk. 2531 * ERROR state on disk.
2485 * 2532 *
2486 * 2. when btrfs flips readonly just in btrfs_commit_super, 2533 * 2. when btrfs flips readonly just in btrfs_commit_super,
2487 * and in such case, btrfs cannnot write sb via btrfs_commit_super, 2534 * and in such case, btrfs cannot write sb via btrfs_commit_super,
2488 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, 2535 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
2489 * btrfs will cleanup all FS resources first and write sb then. 2536 * btrfs will cleanup all FS resources first and write sb then.
2490 */ 2537 */
@@ -2554,6 +2601,8 @@ int close_ctree(struct btrfs_root *root)
2554 kfree(fs_info->chunk_root); 2601 kfree(fs_info->chunk_root);
2555 kfree(fs_info->dev_root); 2602 kfree(fs_info->dev_root);
2556 kfree(fs_info->csum_root); 2603 kfree(fs_info->csum_root);
2604 kfree(fs_info);
2605
2557 return 0; 2606 return 0;
2558} 2607}
2559 2608
@@ -2936,7 +2985,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2936 break; 2985 break;
2937 2986
2938 /* opt_discard */ 2987 /* opt_discard */
2939 ret = btrfs_error_discard_extent(root, start, end + 1 - start); 2988 if (btrfs_test_opt(root, DISCARD))
2989 ret = btrfs_error_discard_extent(root, start,
2990 end + 1 - start,
2991 NULL);
2940 2992
2941 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2993 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2942 btrfs_error_unpin_extent_range(root, start, end); 2994 btrfs_error_unpin_extent_range(root, start, end);
@@ -3005,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3005 btrfs_destroy_pinned_extent(root, 3057 btrfs_destroy_pinned_extent(root,
3006 root->fs_info->pinned_extents); 3058 root->fs_info->pinned_extents);
3007 3059
3008 t->use_count = 0; 3060 atomic_set(&t->use_count, 0);
3009 list_del_init(&t->list); 3061 list_del_init(&t->list);
3010 memset(t, 0, sizeof(*t)); 3062 memset(t, 0, sizeof(*t));
3011 kmem_cache_free(btrfs_transaction_cachep, t); 3063 kmem_cache_free(btrfs_transaction_cachep, t);