diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 189 |
1 files changed, 147 insertions, 42 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 102c176fc29c..632f8f3cc9db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -620,7 +620,7 @@ out: | |||
620 | 620 | ||
621 | static int btree_io_failed_hook(struct bio *failed_bio, | 621 | static int btree_io_failed_hook(struct bio *failed_bio, |
622 | struct page *page, u64 start, u64 end, | 622 | struct page *page, u64 start, u64 end, |
623 | u64 mirror_num, struct extent_state *state) | 623 | int mirror_num, struct extent_state *state) |
624 | { | 624 | { |
625 | struct extent_io_tree *tree; | 625 | struct extent_io_tree *tree; |
626 | unsigned long len; | 626 | unsigned long len; |
@@ -1890,31 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1890 | u64 features; | 1890 | u64 features; |
1891 | struct btrfs_key location; | 1891 | struct btrfs_key location; |
1892 | struct buffer_head *bh; | 1892 | struct buffer_head *bh; |
1893 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), | 1893 | struct btrfs_super_block *disk_super; |
1894 | GFP_NOFS); | ||
1895 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | ||
1896 | GFP_NOFS); | ||
1897 | struct btrfs_root *tree_root = btrfs_sb(sb); | 1894 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1898 | struct btrfs_fs_info *fs_info = NULL; | 1895 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1899 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1896 | struct btrfs_root *extent_root; |
1900 | GFP_NOFS); | 1897 | struct btrfs_root *csum_root; |
1901 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1898 | struct btrfs_root *chunk_root; |
1902 | GFP_NOFS); | 1899 | struct btrfs_root *dev_root; |
1903 | struct btrfs_root *log_tree_root; | 1900 | struct btrfs_root *log_tree_root; |
1904 | |||
1905 | int ret; | 1901 | int ret; |
1906 | int err = -EINVAL; | 1902 | int err = -EINVAL; |
1907 | int num_backups_tried = 0; | 1903 | int num_backups_tried = 0; |
1908 | int backup_index = 0; | 1904 | int backup_index = 0; |
1909 | 1905 | ||
1910 | struct btrfs_super_block *disk_super; | 1906 | extent_root = fs_info->extent_root = |
1907 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1908 | csum_root = fs_info->csum_root = | ||
1909 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1910 | chunk_root = fs_info->chunk_root = | ||
1911 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1912 | dev_root = fs_info->dev_root = | ||
1913 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1911 | 1914 | ||
1912 | if (!extent_root || !tree_root || !tree_root->fs_info || | 1915 | if (!extent_root || !csum_root || !chunk_root || !dev_root) { |
1913 | !chunk_root || !dev_root || !csum_root) { | ||
1914 | err = -ENOMEM; | 1916 | err = -ENOMEM; |
1915 | goto fail; | 1917 | goto fail; |
1916 | } | 1918 | } |
1917 | fs_info = tree_root->fs_info; | ||
1918 | 1919 | ||
1919 | ret = init_srcu_struct(&fs_info->subvol_srcu); | 1920 | ret = init_srcu_struct(&fs_info->subvol_srcu); |
1920 | if (ret) { | 1921 | if (ret) { |
@@ -1954,12 +1955,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1954 | mutex_init(&fs_info->reloc_mutex); | 1955 | mutex_init(&fs_info->reloc_mutex); |
1955 | 1956 | ||
1956 | init_completion(&fs_info->kobj_unregister); | 1957 | init_completion(&fs_info->kobj_unregister); |
1957 | fs_info->tree_root = tree_root; | ||
1958 | fs_info->extent_root = extent_root; | ||
1959 | fs_info->csum_root = csum_root; | ||
1960 | fs_info->chunk_root = chunk_root; | ||
1961 | fs_info->dev_root = dev_root; | ||
1962 | fs_info->fs_devices = fs_devices; | ||
1963 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1958 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1964 | INIT_LIST_HEAD(&fs_info->space_info); | 1959 | INIT_LIST_HEAD(&fs_info->space_info); |
1965 | btrfs_mapping_init(&fs_info->mapping_tree); | 1960 | btrfs_mapping_init(&fs_info->mapping_tree); |
@@ -2465,21 +2460,20 @@ fail_sb_buffer: | |||
2465 | btrfs_stop_workers(&fs_info->caching_workers); | 2460 | btrfs_stop_workers(&fs_info->caching_workers); |
2466 | fail_alloc: | 2461 | fail_alloc: |
2467 | fail_iput: | 2462 | fail_iput: |
2463 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2464 | |||
2468 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2465 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2469 | iput(fs_info->btree_inode); | 2466 | iput(fs_info->btree_inode); |
2470 | |||
2471 | btrfs_close_devices(fs_info->fs_devices); | ||
2472 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2473 | fail_bdi: | 2467 | fail_bdi: |
2474 | bdi_destroy(&fs_info->bdi); | 2468 | bdi_destroy(&fs_info->bdi); |
2475 | fail_srcu: | 2469 | fail_srcu: |
2476 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2470 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2477 | fail: | 2471 | fail: |
2472 | btrfs_close_devices(fs_info->fs_devices); | ||
2478 | free_fs_info(fs_info); | 2473 | free_fs_info(fs_info); |
2479 | return ERR_PTR(err); | 2474 | return ERR_PTR(err); |
2480 | 2475 | ||
2481 | recovery_tree_root: | 2476 | recovery_tree_root: |
2482 | |||
2483 | if (!btrfs_test_opt(tree_root, RECOVERY)) | 2477 | if (!btrfs_test_opt(tree_root, RECOVERY)) |
2484 | goto fail_tree_roots; | 2478 | goto fail_tree_roots; |
2485 | 2479 | ||
@@ -2579,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2579 | int errors = 0; | 2573 | int errors = 0; |
2580 | u32 crc; | 2574 | u32 crc; |
2581 | u64 bytenr; | 2575 | u64 bytenr; |
2582 | int last_barrier = 0; | ||
2583 | 2576 | ||
2584 | if (max_mirrors == 0) | 2577 | if (max_mirrors == 0) |
2585 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; | 2578 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; |
2586 | 2579 | ||
2587 | /* make sure only the last submit_bh does a barrier */ | ||
2588 | if (do_barriers) { | ||
2589 | for (i = 0; i < max_mirrors; i++) { | ||
2590 | bytenr = btrfs_sb_offset(i); | ||
2591 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
2592 | device->total_bytes) | ||
2593 | break; | ||
2594 | last_barrier = i; | ||
2595 | } | ||
2596 | } | ||
2597 | |||
2598 | for (i = 0; i < max_mirrors; i++) { | 2580 | for (i = 0; i < max_mirrors; i++) { |
2599 | bytenr = btrfs_sb_offset(i); | 2581 | bytenr = btrfs_sb_offset(i); |
2600 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | 2582 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) |
@@ -2640,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2640 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2622 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2641 | } | 2623 | } |
2642 | 2624 | ||
2643 | if (i == last_barrier && do_barriers) | 2625 | /* |
2644 | ret = submit_bh(WRITE_FLUSH_FUA, bh); | 2626 | * we fua the first super. The others we allow |
2645 | else | 2627 | * to go down lazy. |
2646 | ret = submit_bh(WRITE_SYNC, bh); | 2628 | */ |
2647 | 2629 | ret = submit_bh(WRITE_FUA, bh); | |
2648 | if (ret) | 2630 | if (ret) |
2649 | errors++; | 2631 | errors++; |
2650 | } | 2632 | } |
2651 | return errors < i ? 0 : -1; | 2633 | return errors < i ? 0 : -1; |
2652 | } | 2634 | } |
2653 | 2635 | ||
2636 | /* | ||
2637 | * endio for the write_dev_flush, this will wake anyone waiting | ||
2638 | * for the barrier when it is done | ||
2639 | */ | ||
2640 | static void btrfs_end_empty_barrier(struct bio *bio, int err) | ||
2641 | { | ||
2642 | if (err) { | ||
2643 | if (err == -EOPNOTSUPP) | ||
2644 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
2645 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2646 | } | ||
2647 | if (bio->bi_private) | ||
2648 | complete(bio->bi_private); | ||
2649 | bio_put(bio); | ||
2650 | } | ||
2651 | |||
2652 | /* | ||
2653 | * trigger flushes for one the devices. If you pass wait == 0, the flushes are | ||
2654 | * sent down. With wait == 1, it waits for the previous flush. | ||
2655 | * | ||
2656 | * any device where the flush fails with eopnotsupp are flagged as not-barrier | ||
2657 | * capable | ||
2658 | */ | ||
2659 | static int write_dev_flush(struct btrfs_device *device, int wait) | ||
2660 | { | ||
2661 | struct bio *bio; | ||
2662 | int ret = 0; | ||
2663 | |||
2664 | if (device->nobarriers) | ||
2665 | return 0; | ||
2666 | |||
2667 | if (wait) { | ||
2668 | bio = device->flush_bio; | ||
2669 | if (!bio) | ||
2670 | return 0; | ||
2671 | |||
2672 | wait_for_completion(&device->flush_wait); | ||
2673 | |||
2674 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) { | ||
2675 | printk("btrfs: disabling barriers on dev %s\n", | ||
2676 | device->name); | ||
2677 | device->nobarriers = 1; | ||
2678 | } | ||
2679 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
2680 | ret = -EIO; | ||
2681 | } | ||
2682 | |||
2683 | /* drop the reference from the wait == 0 run */ | ||
2684 | bio_put(bio); | ||
2685 | device->flush_bio = NULL; | ||
2686 | |||
2687 | return ret; | ||
2688 | } | ||
2689 | |||
2690 | /* | ||
2691 | * one reference for us, and we leave it for the | ||
2692 | * caller | ||
2693 | */ | ||
2694 | device->flush_bio = NULL;; | ||
2695 | bio = bio_alloc(GFP_NOFS, 0); | ||
2696 | if (!bio) | ||
2697 | return -ENOMEM; | ||
2698 | |||
2699 | bio->bi_end_io = btrfs_end_empty_barrier; | ||
2700 | bio->bi_bdev = device->bdev; | ||
2701 | init_completion(&device->flush_wait); | ||
2702 | bio->bi_private = &device->flush_wait; | ||
2703 | device->flush_bio = bio; | ||
2704 | |||
2705 | bio_get(bio); | ||
2706 | submit_bio(WRITE_FLUSH, bio); | ||
2707 | |||
2708 | return 0; | ||
2709 | } | ||
2710 | |||
2711 | /* | ||
2712 | * send an empty flush down to each device in parallel, | ||
2713 | * then wait for them | ||
2714 | */ | ||
2715 | static int barrier_all_devices(struct btrfs_fs_info *info) | ||
2716 | { | ||
2717 | struct list_head *head; | ||
2718 | struct btrfs_device *dev; | ||
2719 | int errors = 0; | ||
2720 | int ret; | ||
2721 | |||
2722 | /* send down all the barriers */ | ||
2723 | head = &info->fs_devices->devices; | ||
2724 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2725 | if (!dev->bdev) { | ||
2726 | errors++; | ||
2727 | continue; | ||
2728 | } | ||
2729 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2730 | continue; | ||
2731 | |||
2732 | ret = write_dev_flush(dev, 0); | ||
2733 | if (ret) | ||
2734 | errors++; | ||
2735 | } | ||
2736 | |||
2737 | /* wait for all the barriers */ | ||
2738 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2739 | if (!dev->bdev) { | ||
2740 | errors++; | ||
2741 | continue; | ||
2742 | } | ||
2743 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2744 | continue; | ||
2745 | |||
2746 | ret = write_dev_flush(dev, 1); | ||
2747 | if (ret) | ||
2748 | errors++; | ||
2749 | } | ||
2750 | if (errors) | ||
2751 | return -EIO; | ||
2752 | return 0; | ||
2753 | } | ||
2754 | |||
2654 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2755 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2655 | { | 2756 | { |
2656 | struct list_head *head; | 2757 | struct list_head *head; |
@@ -2672,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2672 | 2773 | ||
2673 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2774 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2674 | head = &root->fs_info->fs_devices->devices; | 2775 | head = &root->fs_info->fs_devices->devices; |
2776 | |||
2777 | if (do_barriers) | ||
2778 | barrier_all_devices(root->fs_info); | ||
2779 | |||
2675 | list_for_each_entry_rcu(dev, head, dev_list) { | 2780 | list_for_each_entry_rcu(dev, head, dev_list) { |
2676 | if (!dev->bdev) { | 2781 | if (!dev->bdev) { |
2677 | total_errors++; | 2782 | total_errors++; |