diff options
author | Anand Jain <anand.jain@oracle.com> | 2017-05-05 19:17:54 -0400 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2017-06-19 12:25:58 -0400 |
commit | 401b41e5a85a635fd9888ba8969c5006a5dbd399 (patch) | |
tree | b1346f834dba4de59f816e49b72c931cdd351ce9 /fs/btrfs/disk-io.c | |
parent | 6b349dfe80ded8ef06cd67d6b0a795c1fea82cbe (diff) |
btrfs: add framework to handle device flush error as a volume
This adds comments to the flush error handling part of the code, and
hopes to maintain the same logic with a framework which can be used to
handle the errors at the volume level.
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5f678dcb20e6..bafdd2fe8f88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -3509,6 +3509,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
3509 | if (wait) { | 3509 | if (wait) { |
3510 | bio = device->flush_bio; | 3510 | bio = device->flush_bio; |
3511 | if (!bio) | 3511 | if (!bio) |
3512 | /* | ||
3513 | * This means the alloc has failed with ENOMEM, however | ||
3514 | * here we return 0, as its not a device error. | ||
3515 | */ | ||
3512 | return 0; | 3516 | return 0; |
3513 | 3517 | ||
3514 | wait_for_completion(&device->flush_wait); | 3518 | wait_for_completion(&device->flush_wait); |
@@ -3548,6 +3552,32 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
3548 | return 0; | 3552 | return 0; |
3549 | } | 3553 | } |
3550 | 3554 | ||
3555 | static int check_barrier_error(struct btrfs_fs_devices *fsdevs) | ||
3556 | { | ||
3557 | int submit_flush_error = 0; | ||
3558 | int dev_flush_error = 0; | ||
3559 | struct btrfs_device *dev; | ||
3560 | int tolerance; | ||
3561 | |||
3562 | list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) { | ||
3563 | if (!dev->bdev) { | ||
3564 | submit_flush_error++; | ||
3565 | dev_flush_error++; | ||
3566 | continue; | ||
3567 | } | ||
3568 | if (dev->last_flush_error == -ENOMEM) | ||
3569 | submit_flush_error++; | ||
3570 | if (dev->last_flush_error && dev->last_flush_error != -ENOMEM) | ||
3571 | dev_flush_error++; | ||
3572 | } | ||
3573 | |||
3574 | tolerance = fsdevs->fs_info->num_tolerated_disk_barrier_failures; | ||
3575 | if (submit_flush_error > tolerance || dev_flush_error > tolerance) | ||
3576 | return -EIO; | ||
3577 | |||
3578 | return 0; | ||
3579 | } | ||
3580 | |||
3551 | /* | 3581 | /* |
3552 | * send an empty flush down to each device in parallel, | 3582 | * send an empty flush down to each device in parallel, |
3553 | * then wait for them | 3583 | * then wait for them |
@@ -3575,6 +3605,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3575 | ret = write_dev_flush(dev, 0); | 3605 | ret = write_dev_flush(dev, 0); |
3576 | if (ret) | 3606 | if (ret) |
3577 | errors_send++; | 3607 | errors_send++; |
3608 | dev->last_flush_error = ret; | ||
3578 | } | 3609 | } |
3579 | 3610 | ||
3580 | /* wait for all the barriers */ | 3611 | /* wait for all the barriers */ |
@@ -3589,12 +3620,30 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3589 | continue; | 3620 | continue; |
3590 | 3621 | ||
3591 | ret = write_dev_flush(dev, 1); | 3622 | ret = write_dev_flush(dev, 1); |
3592 | if (ret) | 3623 | if (ret) { |
3624 | dev->last_flush_error = ret; | ||
3593 | errors_wait++; | 3625 | errors_wait++; |
3626 | } | ||
3627 | } | ||
3628 | |||
3629 | /* | ||
3630 | * Try hard in case of flush. Lets say, in RAID1 we have | ||
3631 | * the following situation | ||
3632 | * dev1: EIO dev2: ENOMEM | ||
3633 | * this is not a fatal error as we hope to recover from | ||
3634 | * ENOMEM in the next attempt to flush. | ||
3635 | * But the following is considered as fatal | ||
3636 | * dev1: ENOMEM dev2: ENOMEM | ||
3637 | * dev1: bdev == NULL dev2: ENOMEM | ||
3638 | */ | ||
3639 | if (errors_send || errors_wait) { | ||
3640 | /* | ||
3641 | * At some point we need the status of all disks | ||
3642 | * to arrive at the volume status. So error checking | ||
3643 | * is being pushed to a separate loop. | ||
3644 | */ | ||
3645 | return check_barrier_error(info->fs_devices); | ||
3594 | } | 3646 | } |
3595 | if (errors_send > info->num_tolerated_disk_barrier_failures || | ||
3596 | errors_wait > info->num_tolerated_disk_barrier_failures) | ||
3597 | return -EIO; | ||
3598 | return 0; | 3647 | return 0; |
3599 | } | 3648 | } |
3600 | 3649 | ||