aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorliubo <liubo2009@cn.fujitsu.com>2011-01-06 06:30:25 -0500
committerChris Mason <chris.mason@oracle.com>2011-01-17 15:13:08 -0500
commitacce952b0263825da32cf10489413dec78053347 (patch)
treed934881f247484d7b6917bebc40828600bb6b76c /fs
parent6f88a4403def422bd8e276ddf6863d6ac71435d2 (diff)
Btrfs: forced readonly mounts on errors
This patch comes from "Forced readonly mounts on errors" ideas. As we know, this is the first step in being more fault tolerant of disk corruptions instead of just using BUG() statements. The major content: - add a framework for generating errors that should result in filesystems going readonly. - keep FS state in disk super block. - make sure that all of resource will be freed and released at umount time. - make sure that fter FS is forced readonly on error, there will be no more disk change before FS is corrected. For this, we should stop write operation. After this patch is applied, the conversion from BUG() to such a framework can happen incrementally. Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h24
-rw-r--r--fs/btrfs/disk-io.c391
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/super.c84
-rw-r--r--fs/btrfs/transaction.c3
7 files changed, 523 insertions, 2 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0995f4f68d7a..72195378bef9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
295#define BTRFS_FSID_SIZE 16 295#define BTRFS_FSID_SIZE 16
296#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 296#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
297#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) 297#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
298
299/*
300 * File system states
301 */
302
303/* Errors detected */
304#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
305
298#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) 306#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
299#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) 307#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
300 308
@@ -1058,6 +1066,9 @@ struct btrfs_fs_info {
1058 unsigned metadata_ratio; 1066 unsigned metadata_ratio;
1059 1067
1060 void *bdev_holder; 1068 void *bdev_holder;
1069
1070 /* filesystem state */
1071 u64 fs_state;
1061}; 1072};
1062 1073
1063/* 1074/*
@@ -2203,6 +2214,11 @@ int btrfs_set_block_group_rw(struct btrfs_root *root,
2203 struct btrfs_block_group_cache *cache); 2214 struct btrfs_block_group_cache *cache);
2204void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2215void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2205u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 2216u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
2217int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes);
2221
2206/* ctree.c */ 2222/* ctree.c */
2207int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2208 int level, int *slot); 2224 int level, int *slot);
@@ -2556,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
2556/* super.c */ 2572/* super.c */
2557int btrfs_parse_options(struct btrfs_root *root, char *options); 2573int btrfs_parse_options(struct btrfs_root *root, char *options);
2558int btrfs_sync_fs(struct super_block *sb, int wait); 2574int btrfs_sync_fs(struct super_block *sb, int wait);
2575void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
2576 unsigned int line, int errno);
2577
2578#define btrfs_std_error(fs_info, errno) \
2579do { \
2580 if ((errno)) \
2581 __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
2582} while (0)
2559 2583
2560/* acl.c */ 2584/* acl.c */
2561#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2585#ifdef CONFIG_BTRFS_FS_POSIX_ACL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9b1dd4138072..1a3af9e8e0c4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,20 @@
44static struct extent_io_ops btree_extent_io_ops; 44static struct extent_io_ops btree_extent_io_ops;
45static void end_workqueue_fn(struct btrfs_work *work); 45static void end_workqueue_fn(struct btrfs_work *work);
46static void free_fs_root(struct btrfs_root *root); 46static void free_fs_root(struct btrfs_root *root);
47static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
48 int read_only);
49static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
50static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
51static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
52 struct btrfs_root *root);
53static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
54static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
55static int btrfs_destroy_marked_extents(struct btrfs_root *root,
56 struct extent_io_tree *dirty_pages,
57 int mark);
58static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
59 struct extent_io_tree *pinned_extents);
60static int btrfs_cleanup_transaction(struct btrfs_root *root);
47 61
48/* 62/*
49 * end_io_wq structs are used to do processing in task context when an IO is 63 * end_io_wq structs are used to do processing in task context when an IO is
@@ -1738,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1738 if (!btrfs_super_root(disk_super)) 1752 if (!btrfs_super_root(disk_super))
1739 goto fail_iput; 1753 goto fail_iput;
1740 1754
1755 /* check FS state, whether FS is broken. */
1756 fs_info->fs_state |= btrfs_super_flags(disk_super);
1757
1758 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1759
1741 ret = btrfs_parse_options(tree_root, options); 1760 ret = btrfs_parse_options(tree_root, options);
1742 if (ret) { 1761 if (ret) {
1743 err = ret; 1762 err = ret;
@@ -1968,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1968 btrfs_set_opt(fs_info->mount_opt, SSD); 1987 btrfs_set_opt(fs_info->mount_opt, SSD);
1969 } 1988 }
1970 1989
1971 if (btrfs_super_log_root(disk_super) != 0) { 1990 /* do not make disk changes in broken FS */
1991 if (btrfs_super_log_root(disk_super) != 0 &&
1992 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
1972 u64 bytenr = btrfs_super_log_root(disk_super); 1993 u64 bytenr = btrfs_super_log_root(disk_super);
1973 1994
1974 if (fs_devices->rw_devices == 0) { 1995 if (fs_devices->rw_devices == 0) {
@@ -2464,8 +2485,28 @@ int close_ctree(struct btrfs_root *root)
2464 smp_mb(); 2485 smp_mb();
2465 2486
2466 btrfs_put_block_group_cache(fs_info); 2487 btrfs_put_block_group_cache(fs_info);
2488
2489 /*
2490 * Here come 2 situations when btrfs is broken to flip readonly:
2491 *
2492 * 1. when btrfs flips readonly somewhere else before
2493 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
2494 * and btrfs will skip to write sb directly to keep
2495 * ERROR state on disk.
2496 *
2497 * 2. when btrfs flips readonly just in btrfs_commit_super,
2498 * and in such case, btrfs cannnot write sb via btrfs_commit_super,
2499 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
2500 * btrfs will cleanup all FS resources first and write sb then.
2501 */
2467 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2502 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2468 ret = btrfs_commit_super(root); 2503 ret = btrfs_commit_super(root);
2504 if (ret)
2505 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2506 }
2507
2508 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
2509 ret = btrfs_error_commit_super(root);
2469 if (ret) 2510 if (ret)
2470 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2511 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2471 } 2512 }
@@ -2641,6 +2682,352 @@ out:
2641 return 0; 2682 return 0;
2642} 2683}
2643 2684
2685static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
2686 int read_only)
2687{
2688 if (read_only)
2689 return;
2690
2691 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2692 printk(KERN_WARNING "warning: mount fs with errors, "
2693 "running btrfsck is recommended\n");
2694}
2695
2696int btrfs_error_commit_super(struct btrfs_root *root)
2697{
2698 int ret;
2699
2700 mutex_lock(&root->fs_info->cleaner_mutex);
2701 btrfs_run_delayed_iputs(root);
2702 mutex_unlock(&root->fs_info->cleaner_mutex);
2703
2704 down_write(&root->fs_info->cleanup_work_sem);
2705 up_write(&root->fs_info->cleanup_work_sem);
2706
2707 /* cleanup FS via transaction */
2708 btrfs_cleanup_transaction(root);
2709
2710 ret = write_ctree_super(NULL, root, 0);
2711
2712 return ret;
2713}
2714
2715static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
2716{
2717 struct btrfs_inode *btrfs_inode;
2718 struct list_head splice;
2719
2720 INIT_LIST_HEAD(&splice);
2721
2722 mutex_lock(&root->fs_info->ordered_operations_mutex);
2723 spin_lock(&root->fs_info->ordered_extent_lock);
2724
2725 list_splice_init(&root->fs_info->ordered_operations, &splice);
2726 while (!list_empty(&splice)) {
2727 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2728 ordered_operations);
2729
2730 list_del_init(&btrfs_inode->ordered_operations);
2731
2732 btrfs_invalidate_inodes(btrfs_inode->root);
2733 }
2734
2735 spin_unlock(&root->fs_info->ordered_extent_lock);
2736 mutex_unlock(&root->fs_info->ordered_operations_mutex);
2737
2738 return 0;
2739}
2740
2741static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
2742{
2743 struct list_head splice;
2744 struct btrfs_ordered_extent *ordered;
2745 struct inode *inode;
2746
2747 INIT_LIST_HEAD(&splice);
2748
2749 spin_lock(&root->fs_info->ordered_extent_lock);
2750
2751 list_splice_init(&root->fs_info->ordered_extents, &splice);
2752 while (!list_empty(&splice)) {
2753 ordered = list_entry(splice.next, struct btrfs_ordered_extent,
2754 root_extent_list);
2755
2756 list_del_init(&ordered->root_extent_list);
2757 atomic_inc(&ordered->refs);
2758
2759 /* the inode may be getting freed (in sys_unlink path). */
2760 inode = igrab(ordered->inode);
2761
2762 spin_unlock(&root->fs_info->ordered_extent_lock);
2763 if (inode)
2764 iput(inode);
2765
2766 atomic_set(&ordered->refs, 1);
2767 btrfs_put_ordered_extent(ordered);
2768
2769 spin_lock(&root->fs_info->ordered_extent_lock);
2770 }
2771
2772 spin_unlock(&root->fs_info->ordered_extent_lock);
2773
2774 return 0;
2775}
2776
2777static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
2778 struct btrfs_root *root)
2779{
2780 struct rb_node *node;
2781 struct btrfs_delayed_ref_root *delayed_refs;
2782 struct btrfs_delayed_ref_node *ref;
2783 int ret = 0;
2784
2785 delayed_refs = &trans->delayed_refs;
2786
2787 spin_lock(&delayed_refs->lock);
2788 if (delayed_refs->num_entries == 0) {
2789 printk(KERN_INFO "delayed_refs has NO entry\n");
2790 return ret;
2791 }
2792
2793 node = rb_first(&delayed_refs->root);
2794 while (node) {
2795 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2796 node = rb_next(node);
2797
2798 ref->in_tree = 0;
2799 rb_erase(&ref->rb_node, &delayed_refs->root);
2800 delayed_refs->num_entries--;
2801
2802 atomic_set(&ref->refs, 1);
2803 if (btrfs_delayed_ref_is_head(ref)) {
2804 struct btrfs_delayed_ref_head *head;
2805
2806 head = btrfs_delayed_node_to_head(ref);
2807 mutex_lock(&head->mutex);
2808 kfree(head->extent_op);
2809 delayed_refs->num_heads--;
2810 if (list_empty(&head->cluster))
2811 delayed_refs->num_heads_ready--;
2812 list_del_init(&head->cluster);
2813 mutex_unlock(&head->mutex);
2814 }
2815
2816 spin_unlock(&delayed_refs->lock);
2817 btrfs_put_delayed_ref(ref);
2818
2819 cond_resched();
2820 spin_lock(&delayed_refs->lock);
2821 }
2822
2823 spin_unlock(&delayed_refs->lock);
2824
2825 return ret;
2826}
2827
2828static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
2829{
2830 struct btrfs_pending_snapshot *snapshot;
2831 struct list_head splice;
2832
2833 INIT_LIST_HEAD(&splice);
2834
2835 list_splice_init(&t->pending_snapshots, &splice);
2836
2837 while (!list_empty(&splice)) {
2838 snapshot = list_entry(splice.next,
2839 struct btrfs_pending_snapshot,
2840 list);
2841
2842 list_del_init(&snapshot->list);
2843
2844 kfree(snapshot);
2845 }
2846
2847 return 0;
2848}
2849
2850static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2851{
2852 struct btrfs_inode *btrfs_inode;
2853 struct list_head splice;
2854
2855 INIT_LIST_HEAD(&splice);
2856
2857 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2858
2859 spin_lock(&root->fs_info->delalloc_lock);
2860
2861 while (!list_empty(&splice)) {
2862 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2863 delalloc_inodes);
2864
2865 list_del_init(&btrfs_inode->delalloc_inodes);
2866
2867 btrfs_invalidate_inodes(btrfs_inode->root);
2868 }
2869
2870 spin_unlock(&root->fs_info->delalloc_lock);
2871
2872 return 0;
2873}
2874
2875static int btrfs_destroy_marked_extents(struct btrfs_root *root,
2876 struct extent_io_tree *dirty_pages,
2877 int mark)
2878{
2879 int ret;
2880 struct page *page;
2881 struct inode *btree_inode = root->fs_info->btree_inode;
2882 struct extent_buffer *eb;
2883 u64 start = 0;
2884 u64 end;
2885 u64 offset;
2886 unsigned long index;
2887
2888 while (1) {
2889 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
2890 mark);
2891 if (ret)
2892 break;
2893
2894 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
2895 while (start <= end) {
2896 index = start >> PAGE_CACHE_SHIFT;
2897 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
2898 page = find_get_page(btree_inode->i_mapping, index);
2899 if (!page)
2900 continue;
2901 offset = page_offset(page);
2902
2903 spin_lock(&dirty_pages->buffer_lock);
2904 eb = radix_tree_lookup(
2905 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
2906 offset >> PAGE_CACHE_SHIFT);
2907 spin_unlock(&dirty_pages->buffer_lock);
2908 if (eb) {
2909 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
2910 &eb->bflags);
2911 atomic_set(&eb->refs, 1);
2912 }
2913 if (PageWriteback(page))
2914 end_page_writeback(page);
2915
2916 lock_page(page);
2917 if (PageDirty(page)) {
2918 clear_page_dirty_for_io(page);
2919 spin_lock_irq(&page->mapping->tree_lock);
2920 radix_tree_tag_clear(&page->mapping->page_tree,
2921 page_index(page),
2922 PAGECACHE_TAG_DIRTY);
2923 spin_unlock_irq(&page->mapping->tree_lock);
2924 }
2925
2926 page->mapping->a_ops->invalidatepage(page, 0);
2927 unlock_page(page);
2928 }
2929 }
2930
2931 return ret;
2932}
2933
2934static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2935 struct extent_io_tree *pinned_extents)
2936{
2937 struct extent_io_tree *unpin;
2938 u64 start;
2939 u64 end;
2940 int ret;
2941
2942 unpin = pinned_extents;
2943 while (1) {
2944 ret = find_first_extent_bit(unpin, 0, &start, &end,
2945 EXTENT_DIRTY);
2946 if (ret)
2947 break;
2948
2949 /* opt_discard */
2950 ret = btrfs_error_discard_extent(root, start, end + 1 - start);
2951
2952 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2953 btrfs_error_unpin_extent_range(root, start, end);
2954 cond_resched();
2955 }
2956
2957 return 0;
2958}
2959
2960static int btrfs_cleanup_transaction(struct btrfs_root *root)
2961{
2962 struct btrfs_transaction *t;
2963 LIST_HEAD(list);
2964
2965 WARN_ON(1);
2966
2967 mutex_lock(&root->fs_info->trans_mutex);
2968 mutex_lock(&root->fs_info->transaction_kthread_mutex);
2969
2970 list_splice_init(&root->fs_info->trans_list, &list);
2971 while (!list_empty(&list)) {
2972 t = list_entry(list.next, struct btrfs_transaction, list);
2973 if (!t)
2974 break;
2975
2976 btrfs_destroy_ordered_operations(root);
2977
2978 btrfs_destroy_ordered_extents(root);
2979
2980 btrfs_destroy_delayed_refs(t, root);
2981
2982 btrfs_block_rsv_release(root,
2983 &root->fs_info->trans_block_rsv,
2984 t->dirty_pages.dirty_bytes);
2985
2986 /* FIXME: cleanup wait for commit */
2987 t->in_commit = 1;
2988 t->blocked = 1;
2989 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
2990 wake_up(&root->fs_info->transaction_blocked_wait);
2991
2992 t->blocked = 0;
2993 if (waitqueue_active(&root->fs_info->transaction_wait))
2994 wake_up(&root->fs_info->transaction_wait);
2995 mutex_unlock(&root->fs_info->trans_mutex);
2996
2997 mutex_lock(&root->fs_info->trans_mutex);
2998 t->commit_done = 1;
2999 if (waitqueue_active(&t->commit_wait))
3000 wake_up(&t->commit_wait);
3001 mutex_unlock(&root->fs_info->trans_mutex);
3002
3003 mutex_lock(&root->fs_info->trans_mutex);
3004
3005 btrfs_destroy_pending_snapshots(t);
3006
3007 btrfs_destroy_delalloc_inodes(root);
3008
3009 spin_lock(&root->fs_info->new_trans_lock);
3010 root->fs_info->running_transaction = NULL;
3011 spin_unlock(&root->fs_info->new_trans_lock);
3012
3013 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3014 EXTENT_DIRTY);
3015
3016 btrfs_destroy_pinned_extent(root,
3017 root->fs_info->pinned_extents);
3018
3019 t->use_count = 0;
3020 list_del_init(&t->list);
3021 memset(t, 0, sizeof(*t));
3022 kmem_cache_free(btrfs_transaction_cachep, t);
3023 }
3024
3025 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3026 mutex_unlock(&root->fs_info->trans_mutex);
3027
3028 return 0;
3029}
3030
2644static struct extent_io_ops btree_extent_io_ops = { 3031static struct extent_io_ops btree_extent_io_ops = {
2645 .write_cache_pages_lock_hook = btree_lock_page_hook, 3032 .write_cache_pages_lock_hook = btree_lock_page_hook,
2646 .readpage_end_io_hook = btree_readpage_end_io_hook, 3033 .readpage_end_io_hook = btree_readpage_end_io_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 88e825a0bf21..07b20dc2fd95 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
52 struct btrfs_root *root, int max_mirrors); 52 struct btrfs_root *root, int max_mirrors);
53struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 53struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
54int btrfs_commit_super(struct btrfs_root *root); 54int btrfs_commit_super(struct btrfs_root *root);
55int btrfs_error_commit_super(struct btrfs_root *root);
55struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
56 u64 bytenr, u32 blocksize); 57 u64 bytenr, u32 blocksize);
57struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 58struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 055b837eab19..bcf303204f7f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8642,3 +8642,14 @@ out:
8642 btrfs_free_path(path); 8642 btrfs_free_path(path);
8643 return ret; 8643 return ret;
8644} 8644}
8645
8646int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
8647{
8648 return unpin_extent_range(root, start, end);
8649}
8650
8651int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
8652 u64 num_bytes)
8653{
8654 return btrfs_discard_extent(root, bytenr, num_bytes);
8655}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 05df688c96f4..f903433f5bdf 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -892,6 +892,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
892 if (err) 892 if (err)
893 goto out; 893 goto out;
894 894
895 /*
896 * If BTRFS flips readonly due to some impossible error
897 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
898 * although we have opened a file as writable, we have
899 * to stop this write operation to ensure FS consistency.
900 */
901 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
902 err = -EROFS;
903 goto out;
904 }
905
895 file_update_time(file); 906 file_update_time(file);
896 BTRFS_I(inode)->sequence++; 907 BTRFS_I(inode)->sequence++;
897 908
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2963376e77f4..52e903b0a293 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,90 @@
54 54
55static const struct super_operations btrfs_super_ops; 55static const struct super_operations btrfs_super_ops;
56 56
57static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
58 char nbuf[16])
59{
60 char *errstr = NULL;
61
62 switch (errno) {
63 case -EIO:
64 errstr = "IO failure";
65 break;
66 case -ENOMEM:
67 errstr = "Out of memory";
68 break;
69 case -EROFS:
70 errstr = "Readonly filesystem";
71 break;
72 default:
73 if (nbuf) {
74 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
75 errstr = nbuf;
76 }
77 break;
78 }
79
80 return errstr;
81}
82
83static void __save_error_info(struct btrfs_fs_info *fs_info)
84{
85 /*
86 * today we only save the error info into ram. Long term we'll
87 * also send it down to the disk
88 */
89 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
90}
91
92/* NOTE:
93 * We move write_super stuff at umount in order to avoid deadlock
94 * for umount hold all lock.
95 */
96static void save_error_info(struct btrfs_fs_info *fs_info)
97{
98 __save_error_info(fs_info);
99}
100
101/* btrfs handle error by forcing the filesystem readonly */
102static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
103{
104 struct super_block *sb = fs_info->sb;
105
106 if (sb->s_flags & MS_RDONLY)
107 return;
108
109 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
110 sb->s_flags |= MS_RDONLY;
111 printk(KERN_INFO "btrfs is forced readonly\n");
112 }
113}
114
115/*
116 * __btrfs_std_error decodes expected errors from the caller and
117 * invokes the approciate error response.
118 */
119void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
120 unsigned int line, int errno)
121{
122 struct super_block *sb = fs_info->sb;
123 char nbuf[16];
124 const char *errstr;
125
126 /*
127 * Special case: if the error is EROFS, and we're already
128 * under MS_RDONLY, then it is safe here.
129 */
130 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
131 return;
132
133 errstr = btrfs_decode_error(fs_info, errno, nbuf);
134 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
135 sb->s_id, function, line, errstr);
136 save_error_info(fs_info);
137
138 btrfs_handle_error(fs_info);
139}
140
57static void btrfs_put_super(struct super_block *sb) 141static void btrfs_put_super(struct super_block *sb)
58{ 142{
59 struct btrfs_root *root = btrfs_sb(sb); 143 struct btrfs_root *root = btrfs_sb(sb);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 29e30d832ec9..bae5c7b8bbe2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181 struct btrfs_trans_handle *h; 181 struct btrfs_trans_handle *h;
182 struct btrfs_transaction *cur_trans; 182 struct btrfs_transaction *cur_trans;
183 int ret; 183 int ret;
184
185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
186 return ERR_PTR(-EROFS);
184again: 187again:
185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 188 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
186 if (!h) 189 if (!h)