md/raid1,5,10: Disable WRITE SAME until a recovery strategy is in place

There are cases where the kernel will believe that the WRITE SAME command is supported by a block device which does not, in fact, support WRITE SAME. This currently happens for SATA drivers behind a SAS controller, but there are probably a hundred other ways that can happen, including drive firmware bugs. After receiving an error for WRITE SAME the block layer will retry the request as a plain write of zeroes, but mdraid will consider the failure as fatal and consider the drive failed. This has the effect that all the mirrors containing a specific set of data are each offlined in very rapid succession resulting in data loss. However, just bouncing the request back up to the block layer isn't ideal either, because the whole initial request-retry sequence should be inside the write bitmap fence, which probably means that md needs to do its own conversion of WRITE SAME to write zero. Until the failure scenario has been sorted out, disable WRITE SAME for raid1, raid5, and raid10. [neilb: added raid5] This patch is appropriate for any -stable since 3.7 when write_same support was added. Cc: stable@vger.kernel.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
author: H. Peter Anvin <hpa@zytor.com> 2013-06-12 10:37:43 -0400
committer: NeilBrown <neilb@suse.de> 2013-06-13 00:49:54 -0400
commit: 5026d7a9b2f3eb1f9bda66c18ac6bc3036ec9020 (patch)
tree: d87edf6a82c43ec53d43aed19ef6710fac6a1b67 /drivers/md
parent: e2d59925221cd562e07fee38ec8839f7209ae603 (diff)
3 files changed, 6 insertions, 5 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5208e9d1aff0..e02ad4450907 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2837,8 +2837,8 @@ static int run(struct mddev *mddev)
                return PTR_ERR(conf);
        if (mddev->queue)
-                blk_queue_max_write_same_sectors(mddev->queue,
+                blk_queue_max_write_same_sectors(mddev->queue, 0);
-                                                 mddev->chunk_sectors);
        rdev_for_each(rdev, mddev) {
                if (!mddev->gendisk)
                        continue;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index aa9ed304951e..06c2cbe046e2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3651,8 +3651,7 @@ static int run(struct mddev *mddev)
        if (mddev->queue) {
                blk_queue_max_discard_sectors(mddev->queue,
                                              mddev->chunk_sectors);
-                blk_queue_max_write_same_sectors(mddev->queue,
+                blk_queue_max_write_same_sectors(mddev->queue, 0);
-                                                 mddev->chunk_sectors);
                blk_queue_io_min(mddev->queue, chunk_size);
                if (conf->geo.raid_disks % conf->geo.near_copies)
                        blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4a7be455d6d8..26ee39936a28 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5465,7 +5465,7 @@ static int run(struct mddev *mddev)
                if (mddev->major_version == 0 &&
                    mddev->minor_version > 90)
                        rdev->recovery_offset = reshape_offset;
-                        
                if (rdev->recovery_offset < reshape_offset) {
                        /* We need to check old and new layout */
                        if (!only_parity(rdev->raid_disk,
@@ -5588,6 +5588,8 @@ static int run(struct mddev *mddev)
                 */
                mddev->queue->limits.discard_zeroes_data = 0;
+                blk_queue_max_write_same_sectors(mddev->queue, 0);
                rdev_for_each(rdev, mddev) {
                        disk_stack_limits(mddev->gendisk, rdev->bdev,
                                          rdev->data_offset << 9);
author	H. Peter Anvin <hpa@zytor.com>	2013-06-12 10:37:43 -0400
committer	NeilBrown <neilb@suse.de>	2013-06-13 00:49:54 -0400
commit	5026d7a9b2f3eb1f9bda66c18ac6bc3036ec9020 (patch)
tree	d87edf6a82c43ec53d43aed19ef6710fac6a1b67 /drivers/md
parent	e2d59925221cd562e07fee38ec8839f7209ae603 (diff)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5208e9d1aff0..e02ad4450907 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c
@@ -2837,8 +2837,8 @@ static int run(struct mddev *mddev)
2837	return PTR_ERR(conf);	2837	return PTR_ERR(conf);
2838		2838
2839	if (mddev->queue)	2839	if (mddev->queue)
2840	blk_queue_max_write_same_sectors(mddev->queue,	2840	blk_queue_max_write_same_sectors(mddev->queue, 0);
2841	mddev->chunk_sectors);	2841
2842	rdev_for_each(rdev, mddev) {	2842	rdev_for_each(rdev, mddev) {
2843	if (!mddev->gendisk)	2843	if (!mddev->gendisk)
2844	continue;	2844	continue;


diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index aa9ed304951e..06c2cbe046e2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c
@@ -3651,8 +3651,7 @@ static int run(struct mddev *mddev)
3651	if (mddev->queue) {	3651	if (mddev->queue) {
3652	blk_queue_max_discard_sectors(mddev->queue,	3652	blk_queue_max_discard_sectors(mddev->queue,
3653	mddev->chunk_sectors);	3653	mddev->chunk_sectors);
3654	blk_queue_max_write_same_sectors(mddev->queue,	3654	blk_queue_max_write_same_sectors(mddev->queue, 0);
3655	mddev->chunk_sectors);
3656	blk_queue_io_min(mddev->queue, chunk_size);	3655	blk_queue_io_min(mddev->queue, chunk_size);
3657	if (conf->geo.raid_disks % conf->geo.near_copies)	3656	if (conf->geo.raid_disks % conf->geo.near_copies)
3658	blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);	3657	blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);


diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4a7be455d6d8..26ee39936a28 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c
@@ -5465,7 +5465,7 @@ static int run(struct mddev *mddev)
5465	if (mddev->major_version == 0 &&	5465	if (mddev->major_version == 0 &&
5466	mddev->minor_version > 90)	5466	mddev->minor_version > 90)
5467	rdev->recovery_offset = reshape_offset;	5467	rdev->recovery_offset = reshape_offset;
5468		5468
5469	if (rdev->recovery_offset < reshape_offset) {	5469	if (rdev->recovery_offset < reshape_offset) {
5470	/* We need to check old and new layout */	5470	/* We need to check old and new layout */
5471	if (!only_parity(rdev->raid_disk,	5471	if (!only_parity(rdev->raid_disk,
@@ -5588,6 +5588,8 @@ static int run(struct mddev *mddev)
5588	*/	5588	*/
5589	mddev->queue->limits.discard_zeroes_data = 0;	5589	mddev->queue->limits.discard_zeroes_data = 0;
5590		5590
		5591	blk_queue_max_write_same_sectors(mddev->queue, 0);
		5592
5591	rdev_for_each(rdev, mddev) {	5593	rdev_for_each(rdev, mddev) {
5592	disk_stack_limits(mddev->gendisk, rdev->bdev,	5594	disk_stack_limits(mddev->gendisk, rdev->bdev,
5593	rdev->data_offset << 9);	5595	rdev->data_offset << 9);