diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-04-26 04:22:15 -0400 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-04-26 04:22:59 -0400 |
commit | 07f9479a40cc778bc1462ada11f95b01360ae4ff (patch) | |
tree | 0676cf38df3844004bb3ebfd99dfa67a4a8998f5 /fs/btrfs/disk-io.c | |
parent | 9d5e6bdb3013acfb311ab407eeca0b6a6a3dedbf (diff) | |
parent | cd2e49e90f1cae7726c9a2c54488d881d7f1cd1c (diff) |
Merge branch 'master' into for-next
Fast-forwarded to current state of Linus' tree as there are patches to be
applied for files that didn't exist on the old branch.
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 217 |
1 files changed, 129 insertions, 88 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 100b07f021b4..68c84c8c24bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | 31 | #include <linux/migrate.h> |
32 | #include <asm/unaligned.h> | ||
32 | #include "compat.h" | 33 | #include "compat.h" |
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | |||
198 | 199 | ||
199 | void btrfs_csum_final(u32 crc, char *result) | 200 | void btrfs_csum_final(u32 crc, char *result) |
200 | { | 201 | { |
201 | *(__le32 *)result = ~cpu_to_le32(crc); | 202 | put_unaligned_le32(~crc, result); |
202 | } | 203 | } |
203 | 204 | ||
204 | /* | 205 | /* |
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
323 | int num_copies = 0; | 324 | int num_copies = 0; |
324 | int mirror_num = 0; | 325 | int mirror_num = 0; |
325 | 326 | ||
327 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
326 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 328 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
327 | while (1) { | 329 | while (1) { |
328 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 330 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
331 | !verify_parent_transid(io_tree, eb, parent_transid)) | 333 | !verify_parent_transid(io_tree, eb, parent_transid)) |
332 | return ret; | 334 | return ret; |
333 | 335 | ||
336 | /* | ||
337 | * This buffer's crc is fine, but its contents are corrupted, so | ||
338 | * there is no reason to read the other copies, they won't be | ||
339 | * any less wrong. | ||
340 | */ | ||
341 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | ||
342 | return ret; | ||
343 | |||
334 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 344 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
335 | eb->start, eb->len); | 345 | eb->start, eb->len); |
336 | if (num_copies == 1) | 346 | if (num_copies == 1) |
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
419 | return ret; | 429 | return ret; |
420 | } | 430 | } |
421 | 431 | ||
432 | #define CORRUPT(reason, eb, root, slot) \ | ||
433 | printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ | ||
434 | "root=%llu, slot=%d\n", reason, \ | ||
435 | (unsigned long long)btrfs_header_bytenr(eb), \ | ||
436 | (unsigned long long)root->objectid, slot) | ||
437 | |||
438 | static noinline int check_leaf(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf) | ||
440 | { | ||
441 | struct btrfs_key key; | ||
442 | struct btrfs_key leaf_key; | ||
443 | u32 nritems = btrfs_header_nritems(leaf); | ||
444 | int slot; | ||
445 | |||
446 | if (nritems == 0) | ||
447 | return 0; | ||
448 | |||
449 | /* Check the 0 item */ | ||
450 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
451 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
452 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
453 | return -EIO; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Check to make sure each items keys are in the correct order and their | ||
458 | * offsets make sense. We only have to loop through nritems-1 because | ||
459 | * we check the current slot against the next slot, which verifies the | ||
460 | * next slot's offset+size makes sense and that the current's slot | ||
461 | * offset is correct. | ||
462 | */ | ||
463 | for (slot = 0; slot < nritems - 1; slot++) { | ||
464 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
465 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
466 | |||
467 | /* Make sure the keys are in the right order */ | ||
468 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
469 | CORRUPT("bad key order", leaf, root, slot); | ||
470 | return -EIO; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * Make sure the offset and ends are right, remember that the | ||
475 | * item data starts at the end of the leaf and grows towards the | ||
476 | * front. | ||
477 | */ | ||
478 | if (btrfs_item_offset_nr(leaf, slot) != | ||
479 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
480 | CORRUPT("slot offset bad", leaf, root, slot); | ||
481 | return -EIO; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * Check to make sure that we don't point outside of the leaf, | ||
486 | * just incase all the items are consistent to eachother, but | ||
487 | * all point outside of the leaf. | ||
488 | */ | ||
489 | if (btrfs_item_end_nr(leaf, slot) > | ||
490 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
491 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
492 | return -EIO; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
422 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 499 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
423 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | 500 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) |
424 | { | 501 | { |
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
485 | btrfs_set_buffer_lockdep_class(eb, found_level); | 562 | btrfs_set_buffer_lockdep_class(eb, found_level); |
486 | 563 | ||
487 | ret = csum_tree_block(root, eb, 1); | 564 | ret = csum_tree_block(root, eb, 1); |
488 | if (ret) | 565 | if (ret) { |
566 | ret = -EIO; | ||
567 | goto err; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * If this is a leaf block and it is corrupt, set the corrupt bit so | ||
572 | * that we don't try and read the other copies of this block, just | ||
573 | * return -EIO. | ||
574 | */ | ||
575 | if (found_level == 0 && check_leaf(root, eb)) { | ||
576 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
489 | ret = -EIO; | 577 | ret = -EIO; |
578 | } | ||
490 | 579 | ||
491 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 580 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
492 | end = eb->start + end - 1; | 581 | end = eb->start + end - 1; |
@@ -847,7 +936,6 @@ static const struct address_space_operations btree_aops = { | |||
847 | .writepages = btree_writepages, | 936 | .writepages = btree_writepages, |
848 | .releasepage = btree_releasepage, | 937 | .releasepage = btree_releasepage, |
849 | .invalidatepage = btree_invalidatepage, | 938 | .invalidatepage = btree_invalidatepage, |
850 | .sync_page = block_sync_page, | ||
851 | #ifdef CONFIG_MIGRATION | 939 | #ifdef CONFIG_MIGRATION |
852 | .migratepage = btree_migratepage, | 940 | .migratepage = btree_migratepage, |
853 | #endif | 941 | #endif |
@@ -1160,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1160 | root, fs_info, location->objectid); | 1248 | root, fs_info, location->objectid); |
1161 | 1249 | ||
1162 | path = btrfs_alloc_path(); | 1250 | path = btrfs_alloc_path(); |
1163 | BUG_ON(!path); | 1251 | if (!path) { |
1252 | kfree(root); | ||
1253 | return ERR_PTR(-ENOMEM); | ||
1254 | } | ||
1164 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1255 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1165 | if (ret == 0) { | 1256 | if (ret == 0) { |
1166 | l = path->nodes[0]; | 1257 | l = path->nodes[0]; |
@@ -1184,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1184 | root->commit_root = btrfs_root_node(root); | 1275 | root->commit_root = btrfs_root_node(root); |
1185 | BUG_ON(!root->node); | 1276 | BUG_ON(!root->node); |
1186 | out: | 1277 | out: |
1187 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) | 1278 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
1188 | root->ref_cows = 1; | 1279 | root->ref_cows = 1; |
1280 | btrfs_check_and_init_root_item(&root->root_item); | ||
1281 | } | ||
1189 | 1282 | ||
1190 | return root; | 1283 | return root; |
1191 | } | 1284 | } |
@@ -1331,82 +1424,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1331 | } | 1424 | } |
1332 | 1425 | ||
1333 | /* | 1426 | /* |
1334 | * this unplugs every device on the box, and it is only used when page | ||
1335 | * is null | ||
1336 | */ | ||
1337 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1338 | { | ||
1339 | struct btrfs_device *device; | ||
1340 | struct btrfs_fs_info *info; | ||
1341 | |||
1342 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
1343 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | ||
1344 | if (!device->bdev) | ||
1345 | continue; | ||
1346 | |||
1347 | bdi = blk_get_backing_dev_info(device->bdev); | ||
1348 | if (bdi->unplug_io_fn) | ||
1349 | bdi->unplug_io_fn(bdi, page); | ||
1350 | } | ||
1351 | } | ||
1352 | |||
1353 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1354 | { | ||
1355 | struct inode *inode; | ||
1356 | struct extent_map_tree *em_tree; | ||
1357 | struct extent_map *em; | ||
1358 | struct address_space *mapping; | ||
1359 | u64 offset; | ||
1360 | |||
1361 | /* the generic O_DIRECT read code does this */ | ||
1362 | if (1 || !page) { | ||
1363 | __unplug_io_fn(bdi, page); | ||
1364 | return; | ||
1365 | } | ||
1366 | |||
1367 | /* | ||
1368 | * page->mapping may change at any time. Get a consistent copy | ||
1369 | * and use that for everything below | ||
1370 | */ | ||
1371 | smp_mb(); | ||
1372 | mapping = page->mapping; | ||
1373 | if (!mapping) | ||
1374 | return; | ||
1375 | |||
1376 | inode = mapping->host; | ||
1377 | |||
1378 | /* | ||
1379 | * don't do the expensive searching for a small number of | ||
1380 | * devices | ||
1381 | */ | ||
1382 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
1383 | __unplug_io_fn(bdi, page); | ||
1384 | return; | ||
1385 | } | ||
1386 | |||
1387 | offset = page_offset(page); | ||
1388 | |||
1389 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
1390 | read_lock(&em_tree->lock); | ||
1391 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
1392 | read_unlock(&em_tree->lock); | ||
1393 | if (!em) { | ||
1394 | __unplug_io_fn(bdi, page); | ||
1395 | return; | ||
1396 | } | ||
1397 | |||
1398 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
1399 | free_extent_map(em); | ||
1400 | __unplug_io_fn(bdi, page); | ||
1401 | return; | ||
1402 | } | ||
1403 | offset = offset - em->start; | ||
1404 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1405 | em->block_start + offset, page); | ||
1406 | free_extent_map(em); | ||
1407 | } | ||
1408 | |||
1409 | /* | ||
1410 | * If this fails, caller must call bdi_destroy() to get rid of the | 1427 | * If this fails, caller must call bdi_destroy() to get rid of the |
1411 | * bdi again. | 1428 | * bdi again. |
1412 | */ | 1429 | */ |
@@ -1420,8 +1437,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1420 | return err; | 1437 | return err; |
1421 | 1438 | ||
1422 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1439 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1423 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
1424 | bdi->unplug_io_data = info; | ||
1425 | bdi->congested_fn = btrfs_congested_fn; | 1440 | bdi->congested_fn = btrfs_congested_fn; |
1426 | bdi->congested_data = info; | 1441 | bdi->congested_data = info; |
1427 | return 0; | 1442 | return 0; |
@@ -1632,6 +1647,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1632 | goto fail_bdi; | 1647 | goto fail_bdi; |
1633 | } | 1648 | } |
1634 | 1649 | ||
1650 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | ||
1651 | |||
1635 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1652 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1636 | INIT_LIST_HEAD(&fs_info->trans_list); | 1653 | INIT_LIST_HEAD(&fs_info->trans_list); |
1637 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1654 | INIT_LIST_HEAD(&fs_info->dead_roots); |
@@ -1762,6 +1779,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1762 | 1779 | ||
1763 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 1780 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
1764 | 1781 | ||
1782 | /* | ||
1783 | * In the long term, we'll store the compression type in the super | ||
1784 | * block, and it'll be used for per file compression control. | ||
1785 | */ | ||
1786 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
1787 | |||
1765 | ret = btrfs_parse_options(tree_root, options); | 1788 | ret = btrfs_parse_options(tree_root, options); |
1766 | if (ret) { | 1789 | if (ret) { |
1767 | err = ret; | 1790 | err = ret; |
@@ -1967,6 +1990,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1967 | fs_info->metadata_alloc_profile = (u64)-1; | 1990 | fs_info->metadata_alloc_profile = (u64)-1; |
1968 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1991 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1969 | 1992 | ||
1993 | ret = btrfs_init_space_info(fs_info); | ||
1994 | if (ret) { | ||
1995 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | ||
1996 | goto fail_block_groups; | ||
1997 | } | ||
1998 | |||
1970 | ret = btrfs_read_block_groups(extent_root); | 1999 | ret = btrfs_read_block_groups(extent_root); |
1971 | if (ret) { | 2000 | if (ret) { |
1972 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2001 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
@@ -2058,9 +2087,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2058 | 2087 | ||
2059 | if (!(sb->s_flags & MS_RDONLY)) { | 2088 | if (!(sb->s_flags & MS_RDONLY)) { |
2060 | down_read(&fs_info->cleanup_work_sem); | 2089 | down_read(&fs_info->cleanup_work_sem); |
2061 | btrfs_orphan_cleanup(fs_info->fs_root); | 2090 | err = btrfs_orphan_cleanup(fs_info->fs_root); |
2062 | btrfs_orphan_cleanup(fs_info->tree_root); | 2091 | if (!err) |
2092 | err = btrfs_orphan_cleanup(fs_info->tree_root); | ||
2063 | up_read(&fs_info->cleanup_work_sem); | 2093 | up_read(&fs_info->cleanup_work_sem); |
2094 | if (err) { | ||
2095 | close_ctree(tree_root); | ||
2096 | return ERR_PTR(err); | ||
2097 | } | ||
2064 | } | 2098 | } |
2065 | 2099 | ||
2066 | return tree_root; | 2100 | return tree_root; |
@@ -2435,8 +2469,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2435 | 2469 | ||
2436 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2470 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2437 | for (i = 0; i < ret; i++) { | 2471 | for (i = 0; i < ret; i++) { |
2472 | int err; | ||
2473 | |||
2438 | root_objectid = gang[i]->root_key.objectid; | 2474 | root_objectid = gang[i]->root_key.objectid; |
2439 | btrfs_orphan_cleanup(gang[i]); | 2475 | err = btrfs_orphan_cleanup(gang[i]); |
2476 | if (err) | ||
2477 | return err; | ||
2440 | } | 2478 | } |
2441 | root_objectid++; | 2479 | root_objectid++; |
2442 | } | 2480 | } |
@@ -2947,7 +2985,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
2947 | break; | 2985 | break; |
2948 | 2986 | ||
2949 | /* opt_discard */ | 2987 | /* opt_discard */ |
2950 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | 2988 | if (btrfs_test_opt(root, DISCARD)) |
2989 | ret = btrfs_error_discard_extent(root, start, | ||
2990 | end + 1 - start, | ||
2991 | NULL); | ||
2951 | 2992 | ||
2952 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 2993 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
2953 | btrfs_error_unpin_extent_range(root, start, end); | 2994 | btrfs_error_unpin_extent_range(root, start, end); |
@@ -3016,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3016 | btrfs_destroy_pinned_extent(root, | 3057 | btrfs_destroy_pinned_extent(root, |
3017 | root->fs_info->pinned_extents); | 3058 | root->fs_info->pinned_extents); |
3018 | 3059 | ||
3019 | t->use_count = 0; | 3060 | atomic_set(&t->use_count, 0); |
3020 | list_del_init(&t->list); | 3061 | list_del_init(&t->list); |
3021 | memset(t, 0, sizeof(*t)); | 3062 | memset(t, 0, sizeof(*t)); |
3022 | kmem_cache_free(btrfs_transaction_cachep, t); | 3063 | kmem_cache_free(btrfs_transaction_cachep, t); |