aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorStefan Behrens <sbehrens@giantdisaster.de>2012-08-01 12:56:49 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-09 09:20:19 -0400
commit5af3e8cce8b7ba0a2819e18c9146c8c0b452d479 (patch)
tree15c2b4859ea041c09f027ac44de506ac0ffcafa4 /fs/btrfs/disk-io.c
parent62856a9b73860cffe2a3d91b069393b88c219aa6 (diff)
Btrfs: make filesystem read-only when submitting barrier fails
So far the return code of barrier_all_devices() is ignored, which means that errors are ignored. The result can be a corrupt filesystem which is not consistent. This commit adds code to evaluate the return code of barrier_all_devices(). The normal btrfs_error() mechanism is used to switch the filesystem into read-only mode when errors are detected. In order to decide whether barrier_all_devices() should return error or success, the number of disks that are allowed to fail the barrier submission is calculated. This calculation accounts for the worst RAID level of metadata, system and data. If single, dup or RAID0 is in use, a single disk error is already considered to be fatal. Otherwise a single disk error is tolerated. The calculation of the number of disks that are tolerated to fail the barrier operation is performed when the filesystem gets mounted, when a balance operation is started and finished, and when devices are added or removed. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c109
1 files changed, 96 insertions, 13 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c69995556f61..835523687707 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2505,6 +2505,8 @@ retry_root_backup:
2505 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 2505 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
2506 goto fail_block_groups; 2506 goto fail_block_groups;
2507 } 2507 }
2508 fs_info->num_tolerated_disk_barrier_failures =
2509 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
2508 2510
2509 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 2511 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
2510 "btrfs-cleaner"); 2512 "btrfs-cleaner");
@@ -2888,12 +2890,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2888 printk_in_rcu("btrfs: disabling barriers on dev %s\n", 2890 printk_in_rcu("btrfs: disabling barriers on dev %s\n",
2889 rcu_str_deref(device->name)); 2891 rcu_str_deref(device->name));
2890 device->nobarriers = 1; 2892 device->nobarriers = 1;
2891 } 2893 } else if (!bio_flagged(bio, BIO_UPTODATE)) {
2892 if (!bio_flagged(bio, BIO_UPTODATE)) {
2893 ret = -EIO; 2894 ret = -EIO;
2894 if (!bio_flagged(bio, BIO_EOPNOTSUPP)) 2895 btrfs_dev_stat_inc_and_print(device,
2895 btrfs_dev_stat_inc_and_print(device, 2896 BTRFS_DEV_STAT_FLUSH_ERRS);
2896 BTRFS_DEV_STAT_FLUSH_ERRS);
2897 } 2897 }
2898 2898
2899 /* drop the reference from the wait == 0 run */ 2899 /* drop the reference from the wait == 0 run */
@@ -2932,14 +2932,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
2932{ 2932{
2933 struct list_head *head; 2933 struct list_head *head;
2934 struct btrfs_device *dev; 2934 struct btrfs_device *dev;
2935 int errors = 0; 2935 int errors_send = 0;
2936 int errors_wait = 0;
2936 int ret; 2937 int ret;
2937 2938
2938 /* send down all the barriers */ 2939 /* send down all the barriers */
2939 head = &info->fs_devices->devices; 2940 head = &info->fs_devices->devices;
2940 list_for_each_entry_rcu(dev, head, dev_list) { 2941 list_for_each_entry_rcu(dev, head, dev_list) {
2941 if (!dev->bdev) { 2942 if (!dev->bdev) {
2942 errors++; 2943 errors_send++;
2943 continue; 2944 continue;
2944 } 2945 }
2945 if (!dev->in_fs_metadata || !dev->writeable) 2946 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2947,13 +2948,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
2947 2948
2948 ret = write_dev_flush(dev, 0); 2949 ret = write_dev_flush(dev, 0);
2949 if (ret) 2950 if (ret)
2950 errors++; 2951 errors_send++;
2951 } 2952 }
2952 2953
2953 /* wait for all the barriers */ 2954 /* wait for all the barriers */
2954 list_for_each_entry_rcu(dev, head, dev_list) { 2955 list_for_each_entry_rcu(dev, head, dev_list) {
2955 if (!dev->bdev) { 2956 if (!dev->bdev) {
2956 errors++; 2957 errors_wait++;
2957 continue; 2958 continue;
2958 } 2959 }
2959 if (!dev->in_fs_metadata || !dev->writeable) 2960 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2961,13 +2962,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
2961 2962
2962 ret = write_dev_flush(dev, 1); 2963 ret = write_dev_flush(dev, 1);
2963 if (ret) 2964 if (ret)
2964 errors++; 2965 errors_wait++;
2965 } 2966 }
2966 if (errors) 2967 if (errors_send > info->num_tolerated_disk_barrier_failures ||
2968 errors_wait > info->num_tolerated_disk_barrier_failures)
2967 return -EIO; 2969 return -EIO;
2968 return 0; 2970 return 0;
2969} 2971}
2970 2972
2973int btrfs_calc_num_tolerated_disk_barrier_failures(
2974 struct btrfs_fs_info *fs_info)
2975{
2976 struct btrfs_ioctl_space_info space;
2977 struct btrfs_space_info *sinfo;
2978 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2979 BTRFS_BLOCK_GROUP_SYSTEM,
2980 BTRFS_BLOCK_GROUP_METADATA,
2981 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2982 int num_types = 4;
2983 int i;
2984 int c;
2985 int num_tolerated_disk_barrier_failures =
2986 (int)fs_info->fs_devices->num_devices;
2987
2988 for (i = 0; i < num_types; i++) {
2989 struct btrfs_space_info *tmp;
2990
2991 sinfo = NULL;
2992 rcu_read_lock();
2993 list_for_each_entry_rcu(tmp, &fs_info->space_info, list) {
2994 if (tmp->flags == types[i]) {
2995 sinfo = tmp;
2996 break;
2997 }
2998 }
2999 rcu_read_unlock();
3000
3001 if (!sinfo)
3002 continue;
3003
3004 down_read(&sinfo->groups_sem);
3005 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
3006 if (!list_empty(&sinfo->block_groups[c])) {
3007 u64 flags;
3008
3009 btrfs_get_block_group_info(
3010 &sinfo->block_groups[c], &space);
3011 if (space.total_bytes == 0 ||
3012 space.used_bytes == 0)
3013 continue;
3014 flags = space.flags;
3015 /*
3016 * return
3017 * 0: if dup, single or RAID0 is configured for
3018 * any of metadata, system or data, else
3019 * 1: if RAID5 is configured, or if RAID1 or
3020 * RAID10 is configured and only two mirrors
3021 * are used, else
3022 * 2: if RAID6 is configured, else
3023 * num_mirrors - 1: if RAID1 or RAID10 is
3024 * configured and more than
3025 * 2 mirrors are used.
3026 */
3027 if (num_tolerated_disk_barrier_failures > 0 &&
3028 ((flags & (BTRFS_BLOCK_GROUP_DUP |
3029 BTRFS_BLOCK_GROUP_RAID0)) ||
3030 ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
3031 == 0)))
3032 num_tolerated_disk_barrier_failures = 0;
3033 else if (num_tolerated_disk_barrier_failures > 1
3034 &&
3035 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3036 BTRFS_BLOCK_GROUP_RAID10)))
3037 num_tolerated_disk_barrier_failures = 1;
3038 }
3039 }
3040 up_read(&sinfo->groups_sem);
3041 }
3042
3043 return num_tolerated_disk_barrier_failures;
3044}
3045
2971int write_all_supers(struct btrfs_root *root, int max_mirrors) 3046int write_all_supers(struct btrfs_root *root, int max_mirrors)
2972{ 3047{
2973 struct list_head *head; 3048 struct list_head *head;
@@ -2990,8 +3065,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2990 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 3065 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2991 head = &root->fs_info->fs_devices->devices; 3066 head = &root->fs_info->fs_devices->devices;
2992 3067
2993 if (do_barriers) 3068 if (do_barriers) {
2994 barrier_all_devices(root->fs_info); 3069 ret = barrier_all_devices(root->fs_info);
3070 if (ret) {
3071 mutex_unlock(
3072 &root->fs_info->fs_devices->device_list_mutex);
3073 btrfs_error(root->fs_info, ret,
3074 "errors while submitting device barriers.");
3075 return ret;
3076 }
3077 }
2995 3078
2996 list_for_each_entry_rcu(dev, head, dev_list) { 3079 list_for_each_entry_rcu(dev, head, dev_list) {
2997 if (!dev->bdev) { 3080 if (!dev->bdev) {