btrfs: add framework to handle device flush error as a volume

This adds comments to the flush error handling part of the code, and hopes to maintain the same logic with a framework which can be used to handle the errors at the volume level. Signed-off-by: Anand Jain <anand.jain@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
author: Anand Jain <anand.jain@oracle.com> 2017-05-05 19:17:54 -0400
committer: David Sterba <dsterba@suse.com> 2017-06-19 12:25:58 -0400
commit: 401b41e5a85a635fd9888ba8969c5006a5dbd399 (patch)
tree: b1346f834dba4de59f816e49b72c931cdd351ce9 /fs/btrfs/disk-io.c
parent: 6b349dfe80ded8ef06cd67d6b0a795c1fea82cbe (diff)
1 files changed, 53 insertions, 4 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f678dcb20e6..bafdd2fe8f88 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3509,6 +3509,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
        if (wait) {
                bio = device->flush_bio;
                if (!bio)
+                        /*
+                         * This means the alloc has failed with ENOMEM, however
+                         * here we return 0, as its not a device error.
+                         */
                        return 0;
                wait_for_completion(&device->flush_wait);
@@ -3548,6 +3552,32 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
        return 0;
 }
+static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
+{
+        int submit_flush_error = 0;
+        int dev_flush_error = 0;
+        struct btrfs_device *dev;
+        int tolerance;
+        list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
+                if (!dev->bdev) {
+                        submit_flush_error++;
+                        dev_flush_error++;
+                        continue;
+                }
+                if (dev->last_flush_error == -ENOMEM)
+                        submit_flush_error++;
+                if (dev->last_flush_error && dev->last_flush_error != -ENOMEM)
+                        dev_flush_error++;
+        }
+        tolerance = fsdevs->fs_info->num_tolerated_disk_barrier_failures;
+        if (submit_flush_error > tolerance || dev_flush_error > tolerance)
+                return -EIO;
+        return 0;
+}
 /*
 * send an empty flush down to each device in parallel,
 * then wait for them
@@ -3575,6 +3605,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
                ret = write_dev_flush(dev, 0);
                if (ret)
                        errors_send++;
+                dev->last_flush_error = ret;
        }
        /* wait for all the barriers */
@@ -3589,12 +3620,30 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
                        continue;
                ret = write_dev_flush(dev, 1);
-                if (ret)
+                if (ret) {
+                        dev->last_flush_error = ret;
                        errors_wait++;
+                }
+        }
+        /*
+         * Try hard in case of flush. Lets say, in RAID1 we have
+         * the following situation
+         *  dev1: EIO dev2: ENOMEM
+         * this is not a fatal error as we hope to recover from
+         * ENOMEM in the next attempt to flush.
+         * But the following is considered as fatal
+         *  dev1: ENOMEM dev2: ENOMEM
+         *  dev1: bdev == NULL dev2: ENOMEM
+         */
+        if (errors_send || errors_wait) {
+                /*
+                 * At some point we need the status of all disks
+                 * to arrive at the volume status. So error checking
+                 * is being pushed to a separate loop.
+                 */
+                return check_barrier_error(info->fs_devices);
        }
-        if (errors_send > info->num_tolerated_disk_barrier_failures ||
-            errors_wait > info->num_tolerated_disk_barrier_failures)
-                return -EIO;
        return 0;
 }
author	Anand Jain <anand.jain@oracle.com>	2017-05-05 19:17:54 -0400
committer	David Sterba <dsterba@suse.com>	2017-06-19 12:25:58 -0400
commit	401b41e5a85a635fd9888ba8969c5006a5dbd399 (patch)
tree	b1346f834dba4de59f816e49b72c931cdd351ce9 /fs/btrfs/disk-io.c
parent	6b349dfe80ded8ef06cd67d6b0a795c1fea82cbe (diff)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5f678dcb20e6..bafdd2fe8f88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c
@@ -3509,6 +3509,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3509	if (wait) {	3509	if (wait) {
3510	bio = device->flush_bio;	3510	bio = device->flush_bio;
3511	if (!bio)	3511	if (!bio)
		3512	/*
		3513	* This means the alloc has failed with ENOMEM, however
		3514	* here we return 0, as its not a device error.
		3515	*/
3512	return 0;	3516	return 0;
3513		3517
3514	wait_for_completion(&device->flush_wait);	3518	wait_for_completion(&device->flush_wait);
@@ -3548,6 +3552,32 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3548	return 0;	3552	return 0;
3549	}	3553	}
3550		3554
		3555	static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
		3556	{
		3557	int submit_flush_error = 0;
		3558	int dev_flush_error = 0;
		3559	struct btrfs_device *dev;
		3560	int tolerance;
		3561
		3562	list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
		3563	if (!dev->bdev) {
		3564	submit_flush_error++;
		3565	dev_flush_error++;
		3566	continue;
		3567	}
		3568	if (dev->last_flush_error == -ENOMEM)
		3569	submit_flush_error++;
		3570	if (dev->last_flush_error && dev->last_flush_error != -ENOMEM)
		3571	dev_flush_error++;
		3572	}
		3573
		3574	tolerance = fsdevs->fs_info->num_tolerated_disk_barrier_failures;
		3575	if (submit_flush_error > tolerance \|\| dev_flush_error > tolerance)
		3576	return -EIO;
		3577
		3578	return 0;
		3579	}
		3580
3551	/*	3581	/*
3552	* send an empty flush down to each device in parallel,	3582	* send an empty flush down to each device in parallel,
3553	* then wait for them	3583	* then wait for them
@@ -3575,6 +3605,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
3575	ret = write_dev_flush(dev, 0);	3605	ret = write_dev_flush(dev, 0);
3576	if (ret)	3606	if (ret)
3577	errors_send++;	3607	errors_send++;
		3608	dev->last_flush_error = ret;
3578	}	3609	}
3579		3610
3580	/* wait for all the barriers */	3611	/* wait for all the barriers */
@@ -3589,12 +3620,30 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
3589	continue;	3620	continue;
3590		3621
3591	ret = write_dev_flush(dev, 1);	3622	ret = write_dev_flush(dev, 1);
3592	if (ret)	3623	if (ret) {
		3624	dev->last_flush_error = ret;
3593	errors_wait++;	3625	errors_wait++;
		3626	}
		3627	}
		3628
		3629	/*
		3630	* Try hard in case of flush. Lets say, in RAID1 we have
		3631	* the following situation
		3632	* dev1: EIO dev2: ENOMEM
		3633	* this is not a fatal error as we hope to recover from
		3634	* ENOMEM in the next attempt to flush.
		3635	* But the following is considered as fatal
		3636	* dev1: ENOMEM dev2: ENOMEM
		3637	* dev1: bdev == NULL dev2: ENOMEM
		3638	*/
		3639	if (errors_send \|\| errors_wait) {
		3640	/*
		3641	* At some point we need the status of all disks
		3642	* to arrive at the volume status. So error checking
		3643	* is being pushed to a separate loop.
		3644	*/
		3645	return check_barrier_error(info->fs_devices);
3594	}	3646	}
3595	if (errors_send > info->num_tolerated_disk_barrier_failures \|\|
3596	errors_wait > info->num_tolerated_disk_barrier_failures)
3597	return -EIO;
3598	return 0;	3647	return 0;
3599	}	3648	}
3600		3649