diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 640 |
1 files changed, 545 insertions, 95 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51d2e4de34eb..68c84c8c24bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | 31 | #include <linux/migrate.h> |
32 | #include <asm/unaligned.h> | ||
32 | #include "compat.h" | 33 | #include "compat.h" |
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
@@ -44,6 +45,20 @@ | |||
44 | static struct extent_io_ops btree_extent_io_ops; | 45 | static struct extent_io_ops btree_extent_io_ops; |
45 | static void end_workqueue_fn(struct btrfs_work *work); | 46 | static void end_workqueue_fn(struct btrfs_work *work); |
46 | static void free_fs_root(struct btrfs_root *root); | 47 | static void free_fs_root(struct btrfs_root *root); |
48 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
49 | int read_only); | ||
50 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
51 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
52 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
53 | struct btrfs_root *root); | ||
54 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
55 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
56 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
57 | struct extent_io_tree *dirty_pages, | ||
58 | int mark); | ||
59 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
60 | struct extent_io_tree *pinned_extents); | ||
61 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
47 | 62 | ||
48 | /* | 63 | /* |
49 | * end_io_wq structs are used to do processing in task context when an IO is | 64 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -184,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | |||
184 | 199 | ||
185 | void btrfs_csum_final(u32 crc, char *result) | 200 | void btrfs_csum_final(u32 crc, char *result) |
186 | { | 201 | { |
187 | *(__le32 *)result = ~cpu_to_le32(crc); | 202 | put_unaligned_le32(~crc, result); |
188 | } | 203 | } |
189 | 204 | ||
190 | /* | 205 | /* |
@@ -309,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
309 | int num_copies = 0; | 324 | int num_copies = 0; |
310 | int mirror_num = 0; | 325 | int mirror_num = 0; |
311 | 326 | ||
327 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
312 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 328 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
313 | while (1) { | 329 | while (1) { |
314 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 330 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
@@ -317,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
317 | !verify_parent_transid(io_tree, eb, parent_transid)) | 333 | !verify_parent_transid(io_tree, eb, parent_transid)) |
318 | return ret; | 334 | return ret; |
319 | 335 | ||
336 | /* | ||
337 | * This buffer's crc is fine, but its contents are corrupted, so | ||
338 | * there is no reason to read the other copies, they won't be | ||
339 | * any less wrong. | ||
340 | */ | ||
341 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | ||
342 | return ret; | ||
343 | |||
320 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 344 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
321 | eb->start, eb->len); | 345 | eb->start, eb->len); |
322 | if (num_copies == 1) | 346 | if (num_copies == 1) |
@@ -345,14 +369,22 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
345 | 369 | ||
346 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 370 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
347 | 371 | ||
348 | if (page->private == EXTENT_PAGE_PRIVATE) | 372 | if (page->private == EXTENT_PAGE_PRIVATE) { |
373 | WARN_ON(1); | ||
349 | goto out; | 374 | goto out; |
350 | if (!page->private) | 375 | } |
376 | if (!page->private) { | ||
377 | WARN_ON(1); | ||
351 | goto out; | 378 | goto out; |
379 | } | ||
352 | len = page->private >> 2; | 380 | len = page->private >> 2; |
353 | WARN_ON(len == 0); | 381 | WARN_ON(len == 0); |
354 | 382 | ||
355 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 383 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
384 | if (eb == NULL) { | ||
385 | WARN_ON(1); | ||
386 | goto out; | ||
387 | } | ||
356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 388 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
357 | btrfs_header_generation(eb)); | 389 | btrfs_header_generation(eb)); |
358 | BUG_ON(ret); | 390 | BUG_ON(ret); |
@@ -397,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
397 | return ret; | 429 | return ret; |
398 | } | 430 | } |
399 | 431 | ||
432 | #define CORRUPT(reason, eb, root, slot) \ | ||
433 | printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ | ||
434 | "root=%llu, slot=%d\n", reason, \ | ||
435 | (unsigned long long)btrfs_header_bytenr(eb), \ | ||
436 | (unsigned long long)root->objectid, slot) | ||
437 | |||
438 | static noinline int check_leaf(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf) | ||
440 | { | ||
441 | struct btrfs_key key; | ||
442 | struct btrfs_key leaf_key; | ||
443 | u32 nritems = btrfs_header_nritems(leaf); | ||
444 | int slot; | ||
445 | |||
446 | if (nritems == 0) | ||
447 | return 0; | ||
448 | |||
449 | /* Check the 0 item */ | ||
450 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
451 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
452 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
453 | return -EIO; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Check to make sure each items keys are in the correct order and their | ||
458 | * offsets make sense. We only have to loop through nritems-1 because | ||
459 | * we check the current slot against the next slot, which verifies the | ||
460 | * next slot's offset+size makes sense and that the current's slot | ||
461 | * offset is correct. | ||
462 | */ | ||
463 | for (slot = 0; slot < nritems - 1; slot++) { | ||
464 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
465 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
466 | |||
467 | /* Make sure the keys are in the right order */ | ||
468 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
469 | CORRUPT("bad key order", leaf, root, slot); | ||
470 | return -EIO; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * Make sure the offset and ends are right, remember that the | ||
475 | * item data starts at the end of the leaf and grows towards the | ||
476 | * front. | ||
477 | */ | ||
478 | if (btrfs_item_offset_nr(leaf, slot) != | ||
479 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
480 | CORRUPT("slot offset bad", leaf, root, slot); | ||
481 | return -EIO; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * Check to make sure that we don't point outside of the leaf, | ||
486 | * just incase all the items are consistent to eachother, but | ||
487 | * all point outside of the leaf. | ||
488 | */ | ||
489 | if (btrfs_item_end_nr(leaf, slot) > | ||
490 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
491 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
492 | return -EIO; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
400 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 499 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
401 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | 500 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) |
402 | { | 501 | { |
@@ -427,6 +526,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
427 | WARN_ON(len == 0); | 526 | WARN_ON(len == 0); |
428 | 527 | ||
429 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 528 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
529 | if (eb == NULL) { | ||
530 | ret = -EIO; | ||
531 | goto out; | ||
532 | } | ||
430 | 533 | ||
431 | found_start = btrfs_header_bytenr(eb); | 534 | found_start = btrfs_header_bytenr(eb); |
432 | if (found_start != start) { | 535 | if (found_start != start) { |
@@ -459,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
459 | btrfs_set_buffer_lockdep_class(eb, found_level); | 562 | btrfs_set_buffer_lockdep_class(eb, found_level); |
460 | 563 | ||
461 | ret = csum_tree_block(root, eb, 1); | 564 | ret = csum_tree_block(root, eb, 1); |
462 | if (ret) | 565 | if (ret) { |
566 | ret = -EIO; | ||
567 | goto err; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * If this is a leaf block and it is corrupt, set the corrupt bit so | ||
572 | * that we don't try and read the other copies of this block, just | ||
573 | * return -EIO. | ||
574 | */ | ||
575 | if (found_level == 0 && check_leaf(root, eb)) { | ||
576 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
463 | ret = -EIO; | 577 | ret = -EIO; |
578 | } | ||
464 | 579 | ||
465 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 580 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
466 | end = eb->start + end - 1; | 581 | end = eb->start + end - 1; |
@@ -821,7 +936,6 @@ static const struct address_space_operations btree_aops = { | |||
821 | .writepages = btree_writepages, | 936 | .writepages = btree_writepages, |
822 | .releasepage = btree_releasepage, | 937 | .releasepage = btree_releasepage, |
823 | .invalidatepage = btree_invalidatepage, | 938 | .invalidatepage = btree_invalidatepage, |
824 | .sync_page = block_sync_page, | ||
825 | #ifdef CONFIG_MIGRATION | 939 | #ifdef CONFIG_MIGRATION |
826 | .migratepage = btree_migratepage, | 940 | .migratepage = btree_migratepage, |
827 | #endif | 941 | #endif |
@@ -1134,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1134 | root, fs_info, location->objectid); | 1248 | root, fs_info, location->objectid); |
1135 | 1249 | ||
1136 | path = btrfs_alloc_path(); | 1250 | path = btrfs_alloc_path(); |
1137 | BUG_ON(!path); | 1251 | if (!path) { |
1252 | kfree(root); | ||
1253 | return ERR_PTR(-ENOMEM); | ||
1254 | } | ||
1138 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1255 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1139 | if (ret == 0) { | 1256 | if (ret == 0) { |
1140 | l = path->nodes[0]; | 1257 | l = path->nodes[0]; |
@@ -1145,6 +1262,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1145 | } | 1262 | } |
1146 | btrfs_free_path(path); | 1263 | btrfs_free_path(path); |
1147 | if (ret) { | 1264 | if (ret) { |
1265 | kfree(root); | ||
1148 | if (ret > 0) | 1266 | if (ret > 0) |
1149 | ret = -ENOENT; | 1267 | ret = -ENOENT; |
1150 | return ERR_PTR(ret); | 1268 | return ERR_PTR(ret); |
@@ -1157,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1157 | root->commit_root = btrfs_root_node(root); | 1275 | root->commit_root = btrfs_root_node(root); |
1158 | BUG_ON(!root->node); | 1276 | BUG_ON(!root->node); |
1159 | out: | 1277 | out: |
1160 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) | 1278 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
1161 | root->ref_cows = 1; | 1279 | root->ref_cows = 1; |
1280 | btrfs_check_and_init_root_item(&root->root_item); | ||
1281 | } | ||
1162 | 1282 | ||
1163 | return root; | 1283 | return root; |
1164 | } | 1284 | } |
@@ -1304,82 +1424,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1304 | } | 1424 | } |
1305 | 1425 | ||
1306 | /* | 1426 | /* |
1307 | * this unplugs every device on the box, and it is only used when page | ||
1308 | * is null | ||
1309 | */ | ||
1310 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1311 | { | ||
1312 | struct btrfs_device *device; | ||
1313 | struct btrfs_fs_info *info; | ||
1314 | |||
1315 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
1316 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | ||
1317 | if (!device->bdev) | ||
1318 | continue; | ||
1319 | |||
1320 | bdi = blk_get_backing_dev_info(device->bdev); | ||
1321 | if (bdi->unplug_io_fn) | ||
1322 | bdi->unplug_io_fn(bdi, page); | ||
1323 | } | ||
1324 | } | ||
1325 | |||
1326 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1327 | { | ||
1328 | struct inode *inode; | ||
1329 | struct extent_map_tree *em_tree; | ||
1330 | struct extent_map *em; | ||
1331 | struct address_space *mapping; | ||
1332 | u64 offset; | ||
1333 | |||
1334 | /* the generic O_DIRECT read code does this */ | ||
1335 | if (1 || !page) { | ||
1336 | __unplug_io_fn(bdi, page); | ||
1337 | return; | ||
1338 | } | ||
1339 | |||
1340 | /* | ||
1341 | * page->mapping may change at any time. Get a consistent copy | ||
1342 | * and use that for everything below | ||
1343 | */ | ||
1344 | smp_mb(); | ||
1345 | mapping = page->mapping; | ||
1346 | if (!mapping) | ||
1347 | return; | ||
1348 | |||
1349 | inode = mapping->host; | ||
1350 | |||
1351 | /* | ||
1352 | * don't do the expensive searching for a small number of | ||
1353 | * devices | ||
1354 | */ | ||
1355 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
1356 | __unplug_io_fn(bdi, page); | ||
1357 | return; | ||
1358 | } | ||
1359 | |||
1360 | offset = page_offset(page); | ||
1361 | |||
1362 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
1363 | read_lock(&em_tree->lock); | ||
1364 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
1365 | read_unlock(&em_tree->lock); | ||
1366 | if (!em) { | ||
1367 | __unplug_io_fn(bdi, page); | ||
1368 | return; | ||
1369 | } | ||
1370 | |||
1371 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
1372 | free_extent_map(em); | ||
1373 | __unplug_io_fn(bdi, page); | ||
1374 | return; | ||
1375 | } | ||
1376 | offset = offset - em->start; | ||
1377 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1378 | em->block_start + offset, page); | ||
1379 | free_extent_map(em); | ||
1380 | } | ||
1381 | |||
1382 | /* | ||
1383 | * If this fails, caller must call bdi_destroy() to get rid of the | 1427 | * If this fails, caller must call bdi_destroy() to get rid of the |
1384 | * bdi again. | 1428 | * bdi again. |
1385 | */ | 1429 | */ |
@@ -1393,8 +1437,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1393 | return err; | 1437 | return err; |
1394 | 1438 | ||
1395 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1439 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1396 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
1397 | bdi->unplug_io_data = info; | ||
1398 | bdi->congested_fn = btrfs_congested_fn; | 1440 | bdi->congested_fn = btrfs_congested_fn; |
1399 | bdi->congested_data = info; | 1441 | bdi->congested_data = info; |
1400 | return 0; | 1442 | return 0; |
@@ -1527,6 +1569,7 @@ static int transaction_kthread(void *arg) | |||
1527 | spin_unlock(&root->fs_info->new_trans_lock); | 1569 | spin_unlock(&root->fs_info->new_trans_lock); |
1528 | 1570 | ||
1529 | trans = btrfs_join_transaction(root, 1); | 1571 | trans = btrfs_join_transaction(root, 1); |
1572 | BUG_ON(IS_ERR(trans)); | ||
1530 | if (transid == trans->transid) { | 1573 | if (transid == trans->transid) { |
1531 | ret = btrfs_commit_transaction(trans, root); | 1574 | ret = btrfs_commit_transaction(trans, root); |
1532 | BUG_ON(ret); | 1575 | BUG_ON(ret); |
@@ -1604,6 +1647,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1604 | goto fail_bdi; | 1647 | goto fail_bdi; |
1605 | } | 1648 | } |
1606 | 1649 | ||
1650 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | ||
1651 | |||
1607 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1652 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1608 | INIT_LIST_HEAD(&fs_info->trans_list); | 1653 | INIT_LIST_HEAD(&fs_info->trans_list); |
1609 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1654 | INIT_LIST_HEAD(&fs_info->dead_roots); |
@@ -1713,8 +1758,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1713 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1758 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1714 | 1759 | ||
1715 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1760 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1716 | if (!bh) | 1761 | if (!bh) { |
1762 | err = -EINVAL; | ||
1717 | goto fail_iput; | 1763 | goto fail_iput; |
1764 | } | ||
1718 | 1765 | ||
1719 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1766 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1720 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1767 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1727,6 +1774,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1727 | if (!btrfs_super_root(disk_super)) | 1774 | if (!btrfs_super_root(disk_super)) |
1728 | goto fail_iput; | 1775 | goto fail_iput; |
1729 | 1776 | ||
1777 | /* check FS state, whether FS is broken. */ | ||
1778 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1779 | |||
1780 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1781 | |||
1782 | /* | ||
1783 | * In the long term, we'll store the compression type in the super | ||
1784 | * block, and it'll be used for per file compression control. | ||
1785 | */ | ||
1786 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
1787 | |||
1730 | ret = btrfs_parse_options(tree_root, options); | 1788 | ret = btrfs_parse_options(tree_root, options); |
1731 | if (ret) { | 1789 | if (ret) { |
1732 | err = ret; | 1790 | err = ret; |
@@ -1744,10 +1802,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1744 | } | 1802 | } |
1745 | 1803 | ||
1746 | features = btrfs_super_incompat_flags(disk_super); | 1804 | features = btrfs_super_incompat_flags(disk_super); |
1747 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1805 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1748 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1806 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1749 | btrfs_set_super_incompat_flags(disk_super, features); | 1807 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1750 | } | 1808 | btrfs_set_super_incompat_flags(disk_super, features); |
1751 | 1809 | ||
1752 | features = btrfs_super_compat_ro_flags(disk_super) & | 1810 | features = btrfs_super_compat_ro_flags(disk_super) & |
1753 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1811 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1932,6 +1990,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1932 | fs_info->metadata_alloc_profile = (u64)-1; | 1990 | fs_info->metadata_alloc_profile = (u64)-1; |
1933 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1991 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1934 | 1992 | ||
1993 | ret = btrfs_init_space_info(fs_info); | ||
1994 | if (ret) { | ||
1995 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | ||
1996 | goto fail_block_groups; | ||
1997 | } | ||
1998 | |||
1935 | ret = btrfs_read_block_groups(extent_root); | 1999 | ret = btrfs_read_block_groups(extent_root); |
1936 | if (ret) { | 2000 | if (ret) { |
1937 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2001 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
@@ -1957,7 +2021,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1957 | btrfs_set_opt(fs_info->mount_opt, SSD); | 2021 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1958 | } | 2022 | } |
1959 | 2023 | ||
1960 | if (btrfs_super_log_root(disk_super) != 0) { | 2024 | /* do not make disk changes in broken FS */ |
2025 | if (btrfs_super_log_root(disk_super) != 0 && | ||
2026 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1961 | u64 bytenr = btrfs_super_log_root(disk_super); | 2027 | u64 bytenr = btrfs_super_log_root(disk_super); |
1962 | 2028 | ||
1963 | if (fs_devices->rw_devices == 0) { | 2029 | if (fs_devices->rw_devices == 0) { |
@@ -2021,9 +2087,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2021 | 2087 | ||
2022 | if (!(sb->s_flags & MS_RDONLY)) { | 2088 | if (!(sb->s_flags & MS_RDONLY)) { |
2023 | down_read(&fs_info->cleanup_work_sem); | 2089 | down_read(&fs_info->cleanup_work_sem); |
2024 | btrfs_orphan_cleanup(fs_info->fs_root); | 2090 | err = btrfs_orphan_cleanup(fs_info->fs_root); |
2025 | btrfs_orphan_cleanup(fs_info->tree_root); | 2091 | if (!err) |
2092 | err = btrfs_orphan_cleanup(fs_info->tree_root); | ||
2026 | up_read(&fs_info->cleanup_work_sem); | 2093 | up_read(&fs_info->cleanup_work_sem); |
2094 | if (err) { | ||
2095 | close_ctree(tree_root); | ||
2096 | return ERR_PTR(err); | ||
2097 | } | ||
2027 | } | 2098 | } |
2028 | 2099 | ||
2029 | return tree_root; | 2100 | return tree_root; |
@@ -2398,8 +2469,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2398 | 2469 | ||
2399 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2470 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2400 | for (i = 0; i < ret; i++) { | 2471 | for (i = 0; i < ret; i++) { |
2472 | int err; | ||
2473 | |||
2401 | root_objectid = gang[i]->root_key.objectid; | 2474 | root_objectid = gang[i]->root_key.objectid; |
2402 | btrfs_orphan_cleanup(gang[i]); | 2475 | err = btrfs_orphan_cleanup(gang[i]); |
2476 | if (err) | ||
2477 | return err; | ||
2403 | } | 2478 | } |
2404 | root_objectid++; | 2479 | root_objectid++; |
2405 | } | 2480 | } |
@@ -2421,10 +2496,14 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2421 | up_write(&root->fs_info->cleanup_work_sem); | 2496 | up_write(&root->fs_info->cleanup_work_sem); |
2422 | 2497 | ||
2423 | trans = btrfs_join_transaction(root, 1); | 2498 | trans = btrfs_join_transaction(root, 1); |
2499 | if (IS_ERR(trans)) | ||
2500 | return PTR_ERR(trans); | ||
2424 | ret = btrfs_commit_transaction(trans, root); | 2501 | ret = btrfs_commit_transaction(trans, root); |
2425 | BUG_ON(ret); | 2502 | BUG_ON(ret); |
2426 | /* run commit again to drop the original snapshot */ | 2503 | /* run commit again to drop the original snapshot */ |
2427 | trans = btrfs_join_transaction(root, 1); | 2504 | trans = btrfs_join_transaction(root, 1); |
2505 | if (IS_ERR(trans)) | ||
2506 | return PTR_ERR(trans); | ||
2428 | btrfs_commit_transaction(trans, root); | 2507 | btrfs_commit_transaction(trans, root); |
2429 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2508 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2430 | BUG_ON(ret); | 2509 | BUG_ON(ret); |
@@ -2442,8 +2521,28 @@ int close_ctree(struct btrfs_root *root) | |||
2442 | smp_mb(); | 2521 | smp_mb(); |
2443 | 2522 | ||
2444 | btrfs_put_block_group_cache(fs_info); | 2523 | btrfs_put_block_group_cache(fs_info); |
2524 | |||
2525 | /* | ||
2526 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2527 | * | ||
2528 | * 1. when btrfs flips readonly somewhere else before | ||
2529 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2530 | * and btrfs will skip to write sb directly to keep | ||
2531 | * ERROR state on disk. | ||
2532 | * | ||
2533 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2534 | * and in such case, btrfs cannot write sb via btrfs_commit_super, | ||
2535 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2536 | * btrfs will cleanup all FS resources first and write sb then. | ||
2537 | */ | ||
2445 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2538 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2446 | ret = btrfs_commit_super(root); | 2539 | ret = btrfs_commit_super(root); |
2540 | if (ret) | ||
2541 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2542 | } | ||
2543 | |||
2544 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2545 | ret = btrfs_error_commit_super(root); | ||
2447 | if (ret) | 2546 | if (ret) |
2448 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2547 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2449 | } | 2548 | } |
@@ -2502,6 +2601,8 @@ int close_ctree(struct btrfs_root *root) | |||
2502 | kfree(fs_info->chunk_root); | 2601 | kfree(fs_info->chunk_root); |
2503 | kfree(fs_info->dev_root); | 2602 | kfree(fs_info->dev_root); |
2504 | kfree(fs_info->csum_root); | 2603 | kfree(fs_info->csum_root); |
2604 | kfree(fs_info); | ||
2605 | |||
2505 | return 0; | 2606 | return 0; |
2506 | } | 2607 | } |
2507 | 2608 | ||
@@ -2619,6 +2720,355 @@ out: | |||
2619 | return 0; | 2720 | return 0; |
2620 | } | 2721 | } |
2621 | 2722 | ||
2723 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2724 | int read_only) | ||
2725 | { | ||
2726 | if (read_only) | ||
2727 | return; | ||
2728 | |||
2729 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2730 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2731 | "running btrfsck is recommended\n"); | ||
2732 | } | ||
2733 | |||
2734 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2735 | { | ||
2736 | int ret; | ||
2737 | |||
2738 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2739 | btrfs_run_delayed_iputs(root); | ||
2740 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2741 | |||
2742 | down_write(&root->fs_info->cleanup_work_sem); | ||
2743 | up_write(&root->fs_info->cleanup_work_sem); | ||
2744 | |||
2745 | /* cleanup FS via transaction */ | ||
2746 | btrfs_cleanup_transaction(root); | ||
2747 | |||
2748 | ret = write_ctree_super(NULL, root, 0); | ||
2749 | |||
2750 | return ret; | ||
2751 | } | ||
2752 | |||
2753 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2754 | { | ||
2755 | struct btrfs_inode *btrfs_inode; | ||
2756 | struct list_head splice; | ||
2757 | |||
2758 | INIT_LIST_HEAD(&splice); | ||
2759 | |||
2760 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2761 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2762 | |||
2763 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2764 | while (!list_empty(&splice)) { | ||
2765 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2766 | ordered_operations); | ||
2767 | |||
2768 | list_del_init(&btrfs_inode->ordered_operations); | ||
2769 | |||
2770 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2771 | } | ||
2772 | |||
2773 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2774 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2775 | |||
2776 | return 0; | ||
2777 | } | ||
2778 | |||
2779 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2780 | { | ||
2781 | struct list_head splice; | ||
2782 | struct btrfs_ordered_extent *ordered; | ||
2783 | struct inode *inode; | ||
2784 | |||
2785 | INIT_LIST_HEAD(&splice); | ||
2786 | |||
2787 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2788 | |||
2789 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2790 | while (!list_empty(&splice)) { | ||
2791 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2792 | root_extent_list); | ||
2793 | |||
2794 | list_del_init(&ordered->root_extent_list); | ||
2795 | atomic_inc(&ordered->refs); | ||
2796 | |||
2797 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2798 | inode = igrab(ordered->inode); | ||
2799 | |||
2800 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2801 | if (inode) | ||
2802 | iput(inode); | ||
2803 | |||
2804 | atomic_set(&ordered->refs, 1); | ||
2805 | btrfs_put_ordered_extent(ordered); | ||
2806 | |||
2807 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2808 | } | ||
2809 | |||
2810 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2811 | |||
2812 | return 0; | ||
2813 | } | ||
2814 | |||
2815 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2816 | struct btrfs_root *root) | ||
2817 | { | ||
2818 | struct rb_node *node; | ||
2819 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2820 | struct btrfs_delayed_ref_node *ref; | ||
2821 | int ret = 0; | ||
2822 | |||
2823 | delayed_refs = &trans->delayed_refs; | ||
2824 | |||
2825 | spin_lock(&delayed_refs->lock); | ||
2826 | if (delayed_refs->num_entries == 0) { | ||
2827 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2828 | return ret; | ||
2829 | } | ||
2830 | |||
2831 | node = rb_first(&delayed_refs->root); | ||
2832 | while (node) { | ||
2833 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2834 | node = rb_next(node); | ||
2835 | |||
2836 | ref->in_tree = 0; | ||
2837 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2838 | delayed_refs->num_entries--; | ||
2839 | |||
2840 | atomic_set(&ref->refs, 1); | ||
2841 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2842 | struct btrfs_delayed_ref_head *head; | ||
2843 | |||
2844 | head = btrfs_delayed_node_to_head(ref); | ||
2845 | mutex_lock(&head->mutex); | ||
2846 | kfree(head->extent_op); | ||
2847 | delayed_refs->num_heads--; | ||
2848 | if (list_empty(&head->cluster)) | ||
2849 | delayed_refs->num_heads_ready--; | ||
2850 | list_del_init(&head->cluster); | ||
2851 | mutex_unlock(&head->mutex); | ||
2852 | } | ||
2853 | |||
2854 | spin_unlock(&delayed_refs->lock); | ||
2855 | btrfs_put_delayed_ref(ref); | ||
2856 | |||
2857 | cond_resched(); | ||
2858 | spin_lock(&delayed_refs->lock); | ||
2859 | } | ||
2860 | |||
2861 | spin_unlock(&delayed_refs->lock); | ||
2862 | |||
2863 | return ret; | ||
2864 | } | ||
2865 | |||
2866 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2867 | { | ||
2868 | struct btrfs_pending_snapshot *snapshot; | ||
2869 | struct list_head splice; | ||
2870 | |||
2871 | INIT_LIST_HEAD(&splice); | ||
2872 | |||
2873 | list_splice_init(&t->pending_snapshots, &splice); | ||
2874 | |||
2875 | while (!list_empty(&splice)) { | ||
2876 | snapshot = list_entry(splice.next, | ||
2877 | struct btrfs_pending_snapshot, | ||
2878 | list); | ||
2879 | |||
2880 | list_del_init(&snapshot->list); | ||
2881 | |||
2882 | kfree(snapshot); | ||
2883 | } | ||
2884 | |||
2885 | return 0; | ||
2886 | } | ||
2887 | |||
2888 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2889 | { | ||
2890 | struct btrfs_inode *btrfs_inode; | ||
2891 | struct list_head splice; | ||
2892 | |||
2893 | INIT_LIST_HEAD(&splice); | ||
2894 | |||
2895 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2896 | |||
2897 | spin_lock(&root->fs_info->delalloc_lock); | ||
2898 | |||
2899 | while (!list_empty(&splice)) { | ||
2900 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2901 | delalloc_inodes); | ||
2902 | |||
2903 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2904 | |||
2905 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2906 | } | ||
2907 | |||
2908 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2909 | |||
2910 | return 0; | ||
2911 | } | ||
2912 | |||
2913 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2914 | struct extent_io_tree *dirty_pages, | ||
2915 | int mark) | ||
2916 | { | ||
2917 | int ret; | ||
2918 | struct page *page; | ||
2919 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2920 | struct extent_buffer *eb; | ||
2921 | u64 start = 0; | ||
2922 | u64 end; | ||
2923 | u64 offset; | ||
2924 | unsigned long index; | ||
2925 | |||
2926 | while (1) { | ||
2927 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2928 | mark); | ||
2929 | if (ret) | ||
2930 | break; | ||
2931 | |||
2932 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2933 | while (start <= end) { | ||
2934 | index = start >> PAGE_CACHE_SHIFT; | ||
2935 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2936 | page = find_get_page(btree_inode->i_mapping, index); | ||
2937 | if (!page) | ||
2938 | continue; | ||
2939 | offset = page_offset(page); | ||
2940 | |||
2941 | spin_lock(&dirty_pages->buffer_lock); | ||
2942 | eb = radix_tree_lookup( | ||
2943 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2944 | offset >> PAGE_CACHE_SHIFT); | ||
2945 | spin_unlock(&dirty_pages->buffer_lock); | ||
2946 | if (eb) { | ||
2947 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2948 | &eb->bflags); | ||
2949 | atomic_set(&eb->refs, 1); | ||
2950 | } | ||
2951 | if (PageWriteback(page)) | ||
2952 | end_page_writeback(page); | ||
2953 | |||
2954 | lock_page(page); | ||
2955 | if (PageDirty(page)) { | ||
2956 | clear_page_dirty_for_io(page); | ||
2957 | spin_lock_irq(&page->mapping->tree_lock); | ||
2958 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2959 | page_index(page), | ||
2960 | PAGECACHE_TAG_DIRTY); | ||
2961 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2962 | } | ||
2963 | |||
2964 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2965 | unlock_page(page); | ||
2966 | } | ||
2967 | } | ||
2968 | |||
2969 | return ret; | ||
2970 | } | ||
2971 | |||
2972 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2973 | struct extent_io_tree *pinned_extents) | ||
2974 | { | ||
2975 | struct extent_io_tree *unpin; | ||
2976 | u64 start; | ||
2977 | u64 end; | ||
2978 | int ret; | ||
2979 | |||
2980 | unpin = pinned_extents; | ||
2981 | while (1) { | ||
2982 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
2983 | EXTENT_DIRTY); | ||
2984 | if (ret) | ||
2985 | break; | ||
2986 | |||
2987 | /* opt_discard */ | ||
2988 | if (btrfs_test_opt(root, DISCARD)) | ||
2989 | ret = btrfs_error_discard_extent(root, start, | ||
2990 | end + 1 - start, | ||
2991 | NULL); | ||
2992 | |||
2993 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
2994 | btrfs_error_unpin_extent_range(root, start, end); | ||
2995 | cond_resched(); | ||
2996 | } | ||
2997 | |||
2998 | return 0; | ||
2999 | } | ||
3000 | |||
3001 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
3002 | { | ||
3003 | struct btrfs_transaction *t; | ||
3004 | LIST_HEAD(list); | ||
3005 | |||
3006 | WARN_ON(1); | ||
3007 | |||
3008 | mutex_lock(&root->fs_info->trans_mutex); | ||
3009 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
3010 | |||
3011 | list_splice_init(&root->fs_info->trans_list, &list); | ||
3012 | while (!list_empty(&list)) { | ||
3013 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
3014 | if (!t) | ||
3015 | break; | ||
3016 | |||
3017 | btrfs_destroy_ordered_operations(root); | ||
3018 | |||
3019 | btrfs_destroy_ordered_extents(root); | ||
3020 | |||
3021 | btrfs_destroy_delayed_refs(t, root); | ||
3022 | |||
3023 | btrfs_block_rsv_release(root, | ||
3024 | &root->fs_info->trans_block_rsv, | ||
3025 | t->dirty_pages.dirty_bytes); | ||
3026 | |||
3027 | /* FIXME: cleanup wait for commit */ | ||
3028 | t->in_commit = 1; | ||
3029 | t->blocked = 1; | ||
3030 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
3031 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
3032 | |||
3033 | t->blocked = 0; | ||
3034 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
3035 | wake_up(&root->fs_info->transaction_wait); | ||
3036 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3037 | |||
3038 | mutex_lock(&root->fs_info->trans_mutex); | ||
3039 | t->commit_done = 1; | ||
3040 | if (waitqueue_active(&t->commit_wait)) | ||
3041 | wake_up(&t->commit_wait); | ||
3042 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3043 | |||
3044 | mutex_lock(&root->fs_info->trans_mutex); | ||
3045 | |||
3046 | btrfs_destroy_pending_snapshots(t); | ||
3047 | |||
3048 | btrfs_destroy_delalloc_inodes(root); | ||
3049 | |||
3050 | spin_lock(&root->fs_info->new_trans_lock); | ||
3051 | root->fs_info->running_transaction = NULL; | ||
3052 | spin_unlock(&root->fs_info->new_trans_lock); | ||
3053 | |||
3054 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3055 | EXTENT_DIRTY); | ||
3056 | |||
3057 | btrfs_destroy_pinned_extent(root, | ||
3058 | root->fs_info->pinned_extents); | ||
3059 | |||
3060 | atomic_set(&t->use_count, 0); | ||
3061 | list_del_init(&t->list); | ||
3062 | memset(t, 0, sizeof(*t)); | ||
3063 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3064 | } | ||
3065 | |||
3066 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3067 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3068 | |||
3069 | return 0; | ||
3070 | } | ||
3071 | |||
2622 | static struct extent_io_ops btree_extent_io_ops = { | 3072 | static struct extent_io_ops btree_extent_io_ops = { |
2623 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3073 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2624 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3074 | .readpage_end_io_hook = btree_readpage_end_io_hook, |