diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 223 |
1 files changed, 168 insertions, 55 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 102c176fc29c..f44b3928dc2d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -620,7 +620,7 @@ out: | |||
620 | 620 | ||
621 | static int btree_io_failed_hook(struct bio *failed_bio, | 621 | static int btree_io_failed_hook(struct bio *failed_bio, |
622 | struct page *page, u64 start, u64 end, | 622 | struct page *page, u64 start, u64 end, |
623 | u64 mirror_num, struct extent_state *state) | 623 | int mirror_num, struct extent_state *state) |
624 | { | 624 | { |
625 | struct extent_io_tree *tree; | 625 | struct extent_io_tree *tree; |
626 | unsigned long len; | 626 | unsigned long len; |
@@ -1890,31 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1890 | u64 features; | 1890 | u64 features; |
1891 | struct btrfs_key location; | 1891 | struct btrfs_key location; |
1892 | struct buffer_head *bh; | 1892 | struct buffer_head *bh; |
1893 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), | 1893 | struct btrfs_super_block *disk_super; |
1894 | GFP_NOFS); | ||
1895 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | ||
1896 | GFP_NOFS); | ||
1897 | struct btrfs_root *tree_root = btrfs_sb(sb); | 1894 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1898 | struct btrfs_fs_info *fs_info = NULL; | 1895 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1899 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1896 | struct btrfs_root *extent_root; |
1900 | GFP_NOFS); | 1897 | struct btrfs_root *csum_root; |
1901 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1898 | struct btrfs_root *chunk_root; |
1902 | GFP_NOFS); | 1899 | struct btrfs_root *dev_root; |
1903 | struct btrfs_root *log_tree_root; | 1900 | struct btrfs_root *log_tree_root; |
1904 | |||
1905 | int ret; | 1901 | int ret; |
1906 | int err = -EINVAL; | 1902 | int err = -EINVAL; |
1907 | int num_backups_tried = 0; | 1903 | int num_backups_tried = 0; |
1908 | int backup_index = 0; | 1904 | int backup_index = 0; |
1909 | 1905 | ||
1910 | struct btrfs_super_block *disk_super; | 1906 | extent_root = fs_info->extent_root = |
1907 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1908 | csum_root = fs_info->csum_root = | ||
1909 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1910 | chunk_root = fs_info->chunk_root = | ||
1911 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1912 | dev_root = fs_info->dev_root = | ||
1913 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1911 | 1914 | ||
1912 | if (!extent_root || !tree_root || !tree_root->fs_info || | 1915 | if (!extent_root || !csum_root || !chunk_root || !dev_root) { |
1913 | !chunk_root || !dev_root || !csum_root) { | ||
1914 | err = -ENOMEM; | 1916 | err = -ENOMEM; |
1915 | goto fail; | 1917 | goto fail; |
1916 | } | 1918 | } |
1917 | fs_info = tree_root->fs_info; | ||
1918 | 1919 | ||
1919 | ret = init_srcu_struct(&fs_info->subvol_srcu); | 1920 | ret = init_srcu_struct(&fs_info->subvol_srcu); |
1920 | if (ret) { | 1921 | if (ret) { |
@@ -1954,12 +1955,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1954 | mutex_init(&fs_info->reloc_mutex); | 1955 | mutex_init(&fs_info->reloc_mutex); |
1955 | 1956 | ||
1956 | init_completion(&fs_info->kobj_unregister); | 1957 | init_completion(&fs_info->kobj_unregister); |
1957 | fs_info->tree_root = tree_root; | ||
1958 | fs_info->extent_root = extent_root; | ||
1959 | fs_info->csum_root = csum_root; | ||
1960 | fs_info->chunk_root = chunk_root; | ||
1961 | fs_info->dev_root = dev_root; | ||
1962 | fs_info->fs_devices = fs_devices; | ||
1963 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1958 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1964 | INIT_LIST_HEAD(&fs_info->space_info); | 1959 | INIT_LIST_HEAD(&fs_info->space_info); |
1965 | btrfs_mapping_init(&fs_info->mapping_tree); | 1960 | btrfs_mapping_init(&fs_info->mapping_tree); |
@@ -2199,19 +2194,27 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2199 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2194 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
2200 | fs_info->readahead_workers.idle_thresh = 2; | 2195 | fs_info->readahead_workers.idle_thresh = 2; |
2201 | 2196 | ||
2202 | btrfs_start_workers(&fs_info->workers, 1); | 2197 | /* |
2203 | btrfs_start_workers(&fs_info->generic_worker, 1); | 2198 | * btrfs_start_workers can really only fail because of ENOMEM so just |
2204 | btrfs_start_workers(&fs_info->submit_workers, 1); | 2199 | * return -ENOMEM if any of these fail. |
2205 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 2200 | */ |
2206 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 2201 | ret = btrfs_start_workers(&fs_info->workers); |
2207 | btrfs_start_workers(&fs_info->endio_workers, 1); | 2202 | ret |= btrfs_start_workers(&fs_info->generic_worker); |
2208 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 2203 | ret |= btrfs_start_workers(&fs_info->submit_workers); |
2209 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 2204 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); |
2210 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 2205 | ret |= btrfs_start_workers(&fs_info->fixup_workers); |
2211 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 2206 | ret |= btrfs_start_workers(&fs_info->endio_workers); |
2212 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 2207 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); |
2213 | btrfs_start_workers(&fs_info->caching_workers, 1); | 2208 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); |
2214 | btrfs_start_workers(&fs_info->readahead_workers, 1); | 2209 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); |
2210 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | ||
2211 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | ||
2212 | ret |= btrfs_start_workers(&fs_info->caching_workers); | ||
2213 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | ||
2214 | if (ret) { | ||
2215 | ret = -ENOMEM; | ||
2216 | goto fail_sb_buffer; | ||
2217 | } | ||
2215 | 2218 | ||
2216 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 2219 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
2217 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 2220 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2465,21 +2468,20 @@ fail_sb_buffer: | |||
2465 | btrfs_stop_workers(&fs_info->caching_workers); | 2468 | btrfs_stop_workers(&fs_info->caching_workers); |
2466 | fail_alloc: | 2469 | fail_alloc: |
2467 | fail_iput: | 2470 | fail_iput: |
2471 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2472 | |||
2468 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2473 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2469 | iput(fs_info->btree_inode); | 2474 | iput(fs_info->btree_inode); |
2470 | |||
2471 | btrfs_close_devices(fs_info->fs_devices); | ||
2472 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2473 | fail_bdi: | 2475 | fail_bdi: |
2474 | bdi_destroy(&fs_info->bdi); | 2476 | bdi_destroy(&fs_info->bdi); |
2475 | fail_srcu: | 2477 | fail_srcu: |
2476 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2478 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2477 | fail: | 2479 | fail: |
2480 | btrfs_close_devices(fs_info->fs_devices); | ||
2478 | free_fs_info(fs_info); | 2481 | free_fs_info(fs_info); |
2479 | return ERR_PTR(err); | 2482 | return ERR_PTR(err); |
2480 | 2483 | ||
2481 | recovery_tree_root: | 2484 | recovery_tree_root: |
2482 | |||
2483 | if (!btrfs_test_opt(tree_root, RECOVERY)) | 2485 | if (!btrfs_test_opt(tree_root, RECOVERY)) |
2484 | goto fail_tree_roots; | 2486 | goto fail_tree_roots; |
2485 | 2487 | ||
@@ -2579,22 +2581,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2579 | int errors = 0; | 2581 | int errors = 0; |
2580 | u32 crc; | 2582 | u32 crc; |
2581 | u64 bytenr; | 2583 | u64 bytenr; |
2582 | int last_barrier = 0; | ||
2583 | 2584 | ||
2584 | if (max_mirrors == 0) | 2585 | if (max_mirrors == 0) |
2585 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; | 2586 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; |
2586 | 2587 | ||
2587 | /* make sure only the last submit_bh does a barrier */ | ||
2588 | if (do_barriers) { | ||
2589 | for (i = 0; i < max_mirrors; i++) { | ||
2590 | bytenr = btrfs_sb_offset(i); | ||
2591 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
2592 | device->total_bytes) | ||
2593 | break; | ||
2594 | last_barrier = i; | ||
2595 | } | ||
2596 | } | ||
2597 | |||
2598 | for (i = 0; i < max_mirrors; i++) { | 2588 | for (i = 0; i < max_mirrors; i++) { |
2599 | bytenr = btrfs_sb_offset(i); | 2589 | bytenr = btrfs_sb_offset(i); |
2600 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | 2590 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) |
@@ -2640,17 +2630,136 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2640 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2630 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2641 | } | 2631 | } |
2642 | 2632 | ||
2643 | if (i == last_barrier && do_barriers) | 2633 | /* |
2644 | ret = submit_bh(WRITE_FLUSH_FUA, bh); | 2634 | * we fua the first super. The others we allow |
2645 | else | 2635 | * to go down lazy. |
2646 | ret = submit_bh(WRITE_SYNC, bh); | 2636 | */ |
2647 | 2637 | ret = submit_bh(WRITE_FUA, bh); | |
2648 | if (ret) | 2638 | if (ret) |
2649 | errors++; | 2639 | errors++; |
2650 | } | 2640 | } |
2651 | return errors < i ? 0 : -1; | 2641 | return errors < i ? 0 : -1; |
2652 | } | 2642 | } |
2653 | 2643 | ||
2644 | /* | ||
2645 | * endio for the write_dev_flush, this will wake anyone waiting | ||
2646 | * for the barrier when it is done | ||
2647 | */ | ||
2648 | static void btrfs_end_empty_barrier(struct bio *bio, int err) | ||
2649 | { | ||
2650 | if (err) { | ||
2651 | if (err == -EOPNOTSUPP) | ||
2652 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
2653 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2654 | } | ||
2655 | if (bio->bi_private) | ||
2656 | complete(bio->bi_private); | ||
2657 | bio_put(bio); | ||
2658 | } | ||
2659 | |||
2660 | /* | ||
2661 | * trigger flushes for one the devices. If you pass wait == 0, the flushes are | ||
2662 | * sent down. With wait == 1, it waits for the previous flush. | ||
2663 | * | ||
2664 | * any device where the flush fails with eopnotsupp are flagged as not-barrier | ||
2665 | * capable | ||
2666 | */ | ||
2667 | static int write_dev_flush(struct btrfs_device *device, int wait) | ||
2668 | { | ||
2669 | struct bio *bio; | ||
2670 | int ret = 0; | ||
2671 | |||
2672 | if (device->nobarriers) | ||
2673 | return 0; | ||
2674 | |||
2675 | if (wait) { | ||
2676 | bio = device->flush_bio; | ||
2677 | if (!bio) | ||
2678 | return 0; | ||
2679 | |||
2680 | wait_for_completion(&device->flush_wait); | ||
2681 | |||
2682 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) { | ||
2683 | printk("btrfs: disabling barriers on dev %s\n", | ||
2684 | device->name); | ||
2685 | device->nobarriers = 1; | ||
2686 | } | ||
2687 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
2688 | ret = -EIO; | ||
2689 | } | ||
2690 | |||
2691 | /* drop the reference from the wait == 0 run */ | ||
2692 | bio_put(bio); | ||
2693 | device->flush_bio = NULL; | ||
2694 | |||
2695 | return ret; | ||
2696 | } | ||
2697 | |||
2698 | /* | ||
2699 | * one reference for us, and we leave it for the | ||
2700 | * caller | ||
2701 | */ | ||
2702 | device->flush_bio = NULL;; | ||
2703 | bio = bio_alloc(GFP_NOFS, 0); | ||
2704 | if (!bio) | ||
2705 | return -ENOMEM; | ||
2706 | |||
2707 | bio->bi_end_io = btrfs_end_empty_barrier; | ||
2708 | bio->bi_bdev = device->bdev; | ||
2709 | init_completion(&device->flush_wait); | ||
2710 | bio->bi_private = &device->flush_wait; | ||
2711 | device->flush_bio = bio; | ||
2712 | |||
2713 | bio_get(bio); | ||
2714 | submit_bio(WRITE_FLUSH, bio); | ||
2715 | |||
2716 | return 0; | ||
2717 | } | ||
2718 | |||
2719 | /* | ||
2720 | * send an empty flush down to each device in parallel, | ||
2721 | * then wait for them | ||
2722 | */ | ||
2723 | static int barrier_all_devices(struct btrfs_fs_info *info) | ||
2724 | { | ||
2725 | struct list_head *head; | ||
2726 | struct btrfs_device *dev; | ||
2727 | int errors = 0; | ||
2728 | int ret; | ||
2729 | |||
2730 | /* send down all the barriers */ | ||
2731 | head = &info->fs_devices->devices; | ||
2732 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2733 | if (!dev->bdev) { | ||
2734 | errors++; | ||
2735 | continue; | ||
2736 | } | ||
2737 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2738 | continue; | ||
2739 | |||
2740 | ret = write_dev_flush(dev, 0); | ||
2741 | if (ret) | ||
2742 | errors++; | ||
2743 | } | ||
2744 | |||
2745 | /* wait for all the barriers */ | ||
2746 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2747 | if (!dev->bdev) { | ||
2748 | errors++; | ||
2749 | continue; | ||
2750 | } | ||
2751 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2752 | continue; | ||
2753 | |||
2754 | ret = write_dev_flush(dev, 1); | ||
2755 | if (ret) | ||
2756 | errors++; | ||
2757 | } | ||
2758 | if (errors) | ||
2759 | return -EIO; | ||
2760 | return 0; | ||
2761 | } | ||
2762 | |||
2654 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2763 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2655 | { | 2764 | { |
2656 | struct list_head *head; | 2765 | struct list_head *head; |
@@ -2672,6 +2781,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2672 | 2781 | ||
2673 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2782 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2674 | head = &root->fs_info->fs_devices->devices; | 2783 | head = &root->fs_info->fs_devices->devices; |
2784 | |||
2785 | if (do_barriers) | ||
2786 | barrier_all_devices(root->fs_info); | ||
2787 | |||
2675 | list_for_each_entry_rcu(dev, head, dev_list) { | 2788 | list_for_each_entry_rcu(dev, head, dev_list) { |
2676 | if (!dev->bdev) { | 2789 | if (!dev->bdev) { |
2677 | total_errors++; | 2790 | total_errors++; |