aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorliubo <liubo2009@cn.fujitsu.com>2011-01-06 06:30:25 -0500
committerChris Mason <chris.mason@oracle.com>2011-01-17 15:13:08 -0500
commitacce952b0263825da32cf10489413dec78053347 (patch)
treed934881f247484d7b6917bebc40828600bb6b76c /fs/btrfs/disk-io.c
parent6f88a4403def422bd8e276ddf6863d6ac71435d2 (diff)
Btrfs: forced readonly mounts on errors
This patch comes from "Forced readonly mounts on errors" ideas. As we know, this is the first step in being more fault tolerant of disk corruptions instead of just using BUG() statements. The major content: - add a framework for generating errors that should result in filesystems going readonly. - keep FS state in disk super block. - make sure that all of resource will be freed and released at umount time. - make sure that fter FS is forced readonly on error, there will be no more disk change before FS is corrected. For this, we should stop write operation. After this patch is applied, the conversion from BUG() to such a framework can happen incrementally. Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c391
1 files changed, 389 insertions, 2 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9b1dd4138072..1a3af9e8e0c4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,20 @@
44static struct extent_io_ops btree_extent_io_ops; 44static struct extent_io_ops btree_extent_io_ops;
45static void end_workqueue_fn(struct btrfs_work *work); 45static void end_workqueue_fn(struct btrfs_work *work);
46static void free_fs_root(struct btrfs_root *root); 46static void free_fs_root(struct btrfs_root *root);
47static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
48 int read_only);
49static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
50static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
51static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
52 struct btrfs_root *root);
53static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
54static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
55static int btrfs_destroy_marked_extents(struct btrfs_root *root,
56 struct extent_io_tree *dirty_pages,
57 int mark);
58static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
59 struct extent_io_tree *pinned_extents);
60static int btrfs_cleanup_transaction(struct btrfs_root *root);
47 61
48/* 62/*
49 * end_io_wq structs are used to do processing in task context when an IO is 63 * end_io_wq structs are used to do processing in task context when an IO is
@@ -1738,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1738 if (!btrfs_super_root(disk_super)) 1752 if (!btrfs_super_root(disk_super))
1739 goto fail_iput; 1753 goto fail_iput;
1740 1754
1755 /* check FS state, whether FS is broken. */
1756 fs_info->fs_state |= btrfs_super_flags(disk_super);
1757
1758 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1759
1741 ret = btrfs_parse_options(tree_root, options); 1760 ret = btrfs_parse_options(tree_root, options);
1742 if (ret) { 1761 if (ret) {
1743 err = ret; 1762 err = ret;
@@ -1968,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1968 btrfs_set_opt(fs_info->mount_opt, SSD); 1987 btrfs_set_opt(fs_info->mount_opt, SSD);
1969 } 1988 }
1970 1989
1971 if (btrfs_super_log_root(disk_super) != 0) { 1990 /* do not make disk changes in broken FS */
1991 if (btrfs_super_log_root(disk_super) != 0 &&
1992 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
1972 u64 bytenr = btrfs_super_log_root(disk_super); 1993 u64 bytenr = btrfs_super_log_root(disk_super);
1973 1994
1974 if (fs_devices->rw_devices == 0) { 1995 if (fs_devices->rw_devices == 0) {
@@ -2464,8 +2485,28 @@ int close_ctree(struct btrfs_root *root)
2464 smp_mb(); 2485 smp_mb();
2465 2486
2466 btrfs_put_block_group_cache(fs_info); 2487 btrfs_put_block_group_cache(fs_info);
2488
2489 /*
2490 * Here come 2 situations when btrfs is broken to flip readonly:
2491 *
2492 * 1. when btrfs flips readonly somewhere else before
2493 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
2494 * and btrfs will skip to write sb directly to keep
2495 * ERROR state on disk.
2496 *
2497 * 2. when btrfs flips readonly just in btrfs_commit_super,
2498 * and in such case, btrfs cannnot write sb via btrfs_commit_super,
2499 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
2500 * btrfs will cleanup all FS resources first and write sb then.
2501 */
2467 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2502 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2468 ret = btrfs_commit_super(root); 2503 ret = btrfs_commit_super(root);
2504 if (ret)
2505 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2506 }
2507
2508 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
2509 ret = btrfs_error_commit_super(root);
2469 if (ret) 2510 if (ret)
2470 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2511 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2471 } 2512 }
@@ -2641,6 +2682,352 @@ out:
2641 return 0; 2682 return 0;
2642} 2683}
2643 2684
2685static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
2686 int read_only)
2687{
2688 if (read_only)
2689 return;
2690
2691 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2692 printk(KERN_WARNING "warning: mount fs with errors, "
2693 "running btrfsck is recommended\n");
2694}
2695
2696int btrfs_error_commit_super(struct btrfs_root *root)
2697{
2698 int ret;
2699
2700 mutex_lock(&root->fs_info->cleaner_mutex);
2701 btrfs_run_delayed_iputs(root);
2702 mutex_unlock(&root->fs_info->cleaner_mutex);
2703
2704 down_write(&root->fs_info->cleanup_work_sem);
2705 up_write(&root->fs_info->cleanup_work_sem);
2706
2707 /* cleanup FS via transaction */
2708 btrfs_cleanup_transaction(root);
2709
2710 ret = write_ctree_super(NULL, root, 0);
2711
2712 return ret;
2713}
2714
2715static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
2716{
2717 struct btrfs_inode *btrfs_inode;
2718 struct list_head splice;
2719
2720 INIT_LIST_HEAD(&splice);
2721
2722 mutex_lock(&root->fs_info->ordered_operations_mutex);
2723 spin_lock(&root->fs_info->ordered_extent_lock);
2724
2725 list_splice_init(&root->fs_info->ordered_operations, &splice);
2726 while (!list_empty(&splice)) {
2727 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2728 ordered_operations);
2729
2730 list_del_init(&btrfs_inode->ordered_operations);
2731
2732 btrfs_invalidate_inodes(btrfs_inode->root);
2733 }
2734
2735 spin_unlock(&root->fs_info->ordered_extent_lock);
2736 mutex_unlock(&root->fs_info->ordered_operations_mutex);
2737
2738 return 0;
2739}
2740
2741static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
2742{
2743 struct list_head splice;
2744 struct btrfs_ordered_extent *ordered;
2745 struct inode *inode;
2746
2747 INIT_LIST_HEAD(&splice);
2748
2749 spin_lock(&root->fs_info->ordered_extent_lock);
2750
2751 list_splice_init(&root->fs_info->ordered_extents, &splice);
2752 while (!list_empty(&splice)) {
2753 ordered = list_entry(splice.next, struct btrfs_ordered_extent,
2754 root_extent_list);
2755
2756 list_del_init(&ordered->root_extent_list);
2757 atomic_inc(&ordered->refs);
2758
2759 /* the inode may be getting freed (in sys_unlink path). */
2760 inode = igrab(ordered->inode);
2761
2762 spin_unlock(&root->fs_info->ordered_extent_lock);
2763 if (inode)
2764 iput(inode);
2765
2766 atomic_set(&ordered->refs, 1);
2767 btrfs_put_ordered_extent(ordered);
2768
2769 spin_lock(&root->fs_info->ordered_extent_lock);
2770 }
2771
2772 spin_unlock(&root->fs_info->ordered_extent_lock);
2773
2774 return 0;
2775}
2776
2777static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
2778 struct btrfs_root *root)
2779{
2780 struct rb_node *node;
2781 struct btrfs_delayed_ref_root *delayed_refs;
2782 struct btrfs_delayed_ref_node *ref;
2783 int ret = 0;
2784
2785 delayed_refs = &trans->delayed_refs;
2786
2787 spin_lock(&delayed_refs->lock);
2788 if (delayed_refs->num_entries == 0) {
2789 printk(KERN_INFO "delayed_refs has NO entry\n");
2790 return ret;
2791 }
2792
2793 node = rb_first(&delayed_refs->root);
2794 while (node) {
2795 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2796 node = rb_next(node);
2797
2798 ref->in_tree = 0;
2799 rb_erase(&ref->rb_node, &delayed_refs->root);
2800 delayed_refs->num_entries--;
2801
2802 atomic_set(&ref->refs, 1);
2803 if (btrfs_delayed_ref_is_head(ref)) {
2804 struct btrfs_delayed_ref_head *head;
2805
2806 head = btrfs_delayed_node_to_head(ref);
2807 mutex_lock(&head->mutex);
2808 kfree(head->extent_op);
2809 delayed_refs->num_heads--;
2810 if (list_empty(&head->cluster))
2811 delayed_refs->num_heads_ready--;
2812 list_del_init(&head->cluster);
2813 mutex_unlock(&head->mutex);
2814 }
2815
2816 spin_unlock(&delayed_refs->lock);
2817 btrfs_put_delayed_ref(ref);
2818
2819 cond_resched();
2820 spin_lock(&delayed_refs->lock);
2821 }
2822
2823 spin_unlock(&delayed_refs->lock);
2824
2825 return ret;
2826}
2827
2828static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
2829{
2830 struct btrfs_pending_snapshot *snapshot;
2831 struct list_head splice;
2832
2833 INIT_LIST_HEAD(&splice);
2834
2835 list_splice_init(&t->pending_snapshots, &splice);
2836
2837 while (!list_empty(&splice)) {
2838 snapshot = list_entry(splice.next,
2839 struct btrfs_pending_snapshot,
2840 list);
2841
2842 list_del_init(&snapshot->list);
2843
2844 kfree(snapshot);
2845 }
2846
2847 return 0;
2848}
2849
2850static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2851{
2852 struct btrfs_inode *btrfs_inode;
2853 struct list_head splice;
2854
2855 INIT_LIST_HEAD(&splice);
2856
2857 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2858
2859 spin_lock(&root->fs_info->delalloc_lock);
2860
2861 while (!list_empty(&splice)) {
2862 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2863 delalloc_inodes);
2864
2865 list_del_init(&btrfs_inode->delalloc_inodes);
2866
2867 btrfs_invalidate_inodes(btrfs_inode->root);
2868 }
2869
2870 spin_unlock(&root->fs_info->delalloc_lock);
2871
2872 return 0;
2873}
2874
2875static int btrfs_destroy_marked_extents(struct btrfs_root *root,
2876 struct extent_io_tree *dirty_pages,
2877 int mark)
2878{
2879 int ret;
2880 struct page *page;
2881 struct inode *btree_inode = root->fs_info->btree_inode;
2882 struct extent_buffer *eb;
2883 u64 start = 0;
2884 u64 end;
2885 u64 offset;
2886 unsigned long index;
2887
2888 while (1) {
2889 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
2890 mark);
2891 if (ret)
2892 break;
2893
2894 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
2895 while (start <= end) {
2896 index = start >> PAGE_CACHE_SHIFT;
2897 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
2898 page = find_get_page(btree_inode->i_mapping, index);
2899 if (!page)
2900 continue;
2901 offset = page_offset(page);
2902
2903 spin_lock(&dirty_pages->buffer_lock);
2904 eb = radix_tree_lookup(
2905 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
2906 offset >> PAGE_CACHE_SHIFT);
2907 spin_unlock(&dirty_pages->buffer_lock);
2908 if (eb) {
2909 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
2910 &eb->bflags);
2911 atomic_set(&eb->refs, 1);
2912 }
2913 if (PageWriteback(page))
2914 end_page_writeback(page);
2915
2916 lock_page(page);
2917 if (PageDirty(page)) {
2918 clear_page_dirty_for_io(page);
2919 spin_lock_irq(&page->mapping->tree_lock);
2920 radix_tree_tag_clear(&page->mapping->page_tree,
2921 page_index(page),
2922 PAGECACHE_TAG_DIRTY);
2923 spin_unlock_irq(&page->mapping->tree_lock);
2924 }
2925
2926 page->mapping->a_ops->invalidatepage(page, 0);
2927 unlock_page(page);
2928 }
2929 }
2930
2931 return ret;
2932}
2933
2934static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2935 struct extent_io_tree *pinned_extents)
2936{
2937 struct extent_io_tree *unpin;
2938 u64 start;
2939 u64 end;
2940 int ret;
2941
2942 unpin = pinned_extents;
2943 while (1) {
2944 ret = find_first_extent_bit(unpin, 0, &start, &end,
2945 EXTENT_DIRTY);
2946 if (ret)
2947 break;
2948
2949 /* opt_discard */
2950 ret = btrfs_error_discard_extent(root, start, end + 1 - start);
2951
2952 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2953 btrfs_error_unpin_extent_range(root, start, end);
2954 cond_resched();
2955 }
2956
2957 return 0;
2958}
2959
2960static int btrfs_cleanup_transaction(struct btrfs_root *root)
2961{
2962 struct btrfs_transaction *t;
2963 LIST_HEAD(list);
2964
2965 WARN_ON(1);
2966
2967 mutex_lock(&root->fs_info->trans_mutex);
2968 mutex_lock(&root->fs_info->transaction_kthread_mutex);
2969
2970 list_splice_init(&root->fs_info->trans_list, &list);
2971 while (!list_empty(&list)) {
2972 t = list_entry(list.next, struct btrfs_transaction, list);
2973 if (!t)
2974 break;
2975
2976 btrfs_destroy_ordered_operations(root);
2977
2978 btrfs_destroy_ordered_extents(root);
2979
2980 btrfs_destroy_delayed_refs(t, root);
2981
2982 btrfs_block_rsv_release(root,
2983 &root->fs_info->trans_block_rsv,
2984 t->dirty_pages.dirty_bytes);
2985
2986 /* FIXME: cleanup wait for commit */
2987 t->in_commit = 1;
2988 t->blocked = 1;
2989 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
2990 wake_up(&root->fs_info->transaction_blocked_wait);
2991
2992 t->blocked = 0;
2993 if (waitqueue_active(&root->fs_info->transaction_wait))
2994 wake_up(&root->fs_info->transaction_wait);
2995 mutex_unlock(&root->fs_info->trans_mutex);
2996
2997 mutex_lock(&root->fs_info->trans_mutex);
2998 t->commit_done = 1;
2999 if (waitqueue_active(&t->commit_wait))
3000 wake_up(&t->commit_wait);
3001 mutex_unlock(&root->fs_info->trans_mutex);
3002
3003 mutex_lock(&root->fs_info->trans_mutex);
3004
3005 btrfs_destroy_pending_snapshots(t);
3006
3007 btrfs_destroy_delalloc_inodes(root);
3008
3009 spin_lock(&root->fs_info->new_trans_lock);
3010 root->fs_info->running_transaction = NULL;
3011 spin_unlock(&root->fs_info->new_trans_lock);
3012
3013 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3014 EXTENT_DIRTY);
3015
3016 btrfs_destroy_pinned_extent(root,
3017 root->fs_info->pinned_extents);
3018
3019 t->use_count = 0;
3020 list_del_init(&t->list);
3021 memset(t, 0, sizeof(*t));
3022 kmem_cache_free(btrfs_transaction_cachep, t);
3023 }
3024
3025 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3026 mutex_unlock(&root->fs_info->trans_mutex);
3027
3028 return 0;
3029}
3030
2644static struct extent_io_ops btree_extent_io_ops = { 3031static struct extent_io_ops btree_extent_io_ops = {
2645 .write_cache_pages_lock_hook = btree_lock_page_hook, 3032 .write_cache_pages_lock_hook = btree_lock_page_hook,
2646 .readpage_end_io_hook = btree_readpage_end_io_hook, 3033 .readpage_end_io_hook = btree_readpage_end_io_hook,