MD: raid5 trim support

Discard for raid4/5/6 has limitation. If discard request size is small, we do discard for one disk, but we need calculate parity and write parity disk. To correctly calculate parity, zero_after_discard must be guaranteed. Even it's true, we need do discard for one disk but write another disks, which makes the parity disks wear out fast. This doesn't make sense. So an efficient discard for raid4/5/6 should discard all data disks and parity disks, which requires the write pattern to be (A, A+chunk_size, A+chunk_size*2...). If A's size is smaller than chunk_size, such pattern is almost impossible in practice. So in this patch, I only handle the case that A's size equals to chunk_size. That is discard request should be aligned to stripe size and its size is multiple of stripe size. Since we can only handle request with specific alignment and size (or part of the request fitting stripes), we can't guarantee zero_after_discard even zero_after_discard is true in low level drives. The block layer doesn't send down correctly aligned requests even correct discard alignment is set, so I must filter out. For raid4/5/6 parity calculation, if data is 0, parity is 0. So if zero_after_discard is true for all disks, data is consistent after discard. Otherwise, data might be lost. Let's consider a scenario: discard a stripe, write data to one disk and write parity disk. The stripe could be still inconsistent till then depending on using data from other data disks or parity disks to calculate new parity. If the disk is broken, we can't restore it. So in this patch, we only enable discard support if all disks have zero_after_discard. If discard fails in one disk, we face the similar inconsistent issue above. The patch will make discard follow the same path as normal write request. If discard fails, a resync will be scheduled to make the data consistent. This isn't good to have extra writes, but data consistency is important. If a subsequent read/write request hits raid5 cache of a discarded stripe, the discarded dev page should have zero filled, so the data is consistent. This patch will always zero dev page for discarded request stripe. This isn't optimal because discard request doesn't need such payload. Next patch will avoid it. Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: NeilBrown <neilb@suse.de>
author: Shaohua Li <shli@kernel.org> 2012-10-10 22:49:05 -0400
committer: NeilBrown <neilb@suse.de> 2012-10-10 22:49:05 -0400
commit: 620125f2bf8ff0c4969b79653b54d7bcc9d40637 (patch)
tree: 373257b7e9a236e66bc3ad99cd1d158e7430014e /drivers/md/raid5.c
parent: 582e2e056a5c3410174c23f5134e6b00e0db9101 (diff)
1 files changed, 165 insertions, 3 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 81c02d63440b..74dcf19cfe68 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -547,6 +547,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                rw = WRITE_FUA;
                        else
                                rw = WRITE;
+                        if (test_and_clear_bit(R5_Discard, &sh->dev[i].flags))
+                                rw |= REQ_DISCARD;
                } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
                        rw = READ;
                else if (test_and_clear_bit(R5_WantReplace,
@@ -1170,8 +1172,13 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                                        set_bit(R5_WantFUA, &dev->flags);
                                if (wbi->bi_rw & REQ_SYNC)
                                        set_bit(R5_SyncIO, &dev->flags);
-                                tx = async_copy_data(1, wbi, dev->page,
+                                if (wbi->bi_rw & REQ_DISCARD) {
-                                        dev->sector, tx);
+                                        memset(page_address(dev->page), 0,
+                                                STRIPE_SECTORS << 9);
+                                        set_bit(R5_Discard, &dev->flags);
+                                } else
+                                        tx = async_copy_data(1, wbi, dev->page,
+                                                dev->sector, tx);
                                wbi = r5_next_bio(wbi, dev->sector);
                        }
                }
@@ -1237,6 +1244,20 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
+        for (i = 0; i < sh->disks; i++) {
+                if (pd_idx == i)
+                        continue;
+                if (!test_bit(R5_Discard, &sh->dev[i].flags))
+                        break;
+        }
+        if (i >= sh->disks) {
+                atomic_inc(&sh->count);
+                memset(page_address(sh->dev[pd_idx].page), 0,
+                        STRIPE_SECTORS << 9);
+                set_bit(R5_Discard, &sh->dev[pd_idx].flags);
+                ops_complete_reconstruct(sh);
+                return;
+        }
        /* check if prexor is active which means only process blocks
         * that are part of a read-modify-write (written)
         */
@@ -1281,10 +1302,28 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
 {
        struct async_submit_ctl submit;
        struct page **blocks = percpu->scribble;
-        int count;
+        int count, i;
        pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+        for (i = 0; i < sh->disks; i++) {
+                if (sh->pd_idx == i || sh->qd_idx == i)
+                        continue;
+                if (!test_bit(R5_Discard, &sh->dev[i].flags))
+                        break;
+        }
+        if (i >= sh->disks) {
+                atomic_inc(&sh->count);
+                memset(page_address(sh->dev[sh->pd_idx].page), 0,
+                        STRIPE_SECTORS << 9);
+                memset(page_address(sh->dev[sh->qd_idx].page), 0,
+                        STRIPE_SECTORS << 9);
+                set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
+                set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
+                ops_complete_reconstruct(sh);
+                return;
+        }
        count = set_syndrome_sources(blocks, sh);
        atomic_inc(&sh->count);
@@ -4067,6 +4106,88 @@ static void release_stripe_plug(struct mddev *mddev,
                release_stripe(sh);
 }
+static void make_discard_request(struct mddev *mddev, struct bio *bi)
+{
+        struct r5conf *conf = mddev->private;
+        sector_t logical_sector, last_sector;
+        struct stripe_head *sh;
+        int remaining;
+        int stripe_sectors;
+        if (mddev->reshape_position != MaxSector)
+                /* Skip discard while reshape is happening */
+                return;
+        logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+        last_sector = bi->bi_sector + (bi->bi_size>>9);
+        bi->bi_next = NULL;
+        bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
+        stripe_sectors = conf->chunk_sectors *
+                (conf->raid_disks - conf->max_degraded);
+        logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector,
+                                               stripe_sectors);
+        sector_div(last_sector, stripe_sectors);
+        logical_sector *= conf->chunk_sectors;
+        last_sector *= conf->chunk_sectors;
+        for (; logical_sector < last_sector;
+             logical_sector += STRIPE_SECTORS) {
+                DEFINE_WAIT(w);
+                int d;
+        again:
+                sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
+                prepare_to_wait(&conf->wait_for_overlap, &w,
+                                TASK_UNINTERRUPTIBLE);
+                spin_lock_irq(&sh->stripe_lock);
+                for (d = 0; d < conf->raid_disks; d++) {
+                        if (d == sh->pd_idx || d == sh->qd_idx)
+                                continue;
+                        if (sh->dev[d].towrite || sh->dev[d].toread) {
+                                set_bit(R5_Overlap, &sh->dev[d].flags);
+                                spin_unlock_irq(&sh->stripe_lock);
+                                release_stripe(sh);
+                                schedule();
+                                goto again;
+                        }
+                }
+                finish_wait(&conf->wait_for_overlap, &w);
+                for (d = 0; d < conf->raid_disks; d++) {
+                        if (d == sh->pd_idx || d == sh->qd_idx)
+                                continue;
+                        sh->dev[d].towrite = bi;
+                        set_bit(R5_OVERWRITE, &sh->dev[d].flags);
+                        raid5_inc_bi_active_stripes(bi);
+                }
+                spin_unlock_irq(&sh->stripe_lock);
+                if (conf->mddev->bitmap) {
+                        for (d = 0;
+                             d < conf->raid_disks - conf->max_degraded;
+                             d++)
+                                bitmap_startwrite(mddev->bitmap,
+                                                  sh->sector,
+                                                  STRIPE_SECTORS,
+                                                  0);
+                        sh->bm_seq = conf->seq_flush + 1;
+                        set_bit(STRIPE_BIT_DELAY, &sh->state);
+                }
+                set_bit(STRIPE_HANDLE, &sh->state);
+                clear_bit(STRIPE_DELAYED, &sh->state);
+                if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+                        atomic_inc(&conf->preread_active_stripes);
+                release_stripe_plug(mddev, sh);
+        }
+        remaining = raid5_dec_bi_active_stripes(bi);
+        if (remaining == 0) {
+                md_write_end(mddev);
+                bio_endio(bi, 0);
+        }
+}
 static void make_request(struct mddev *mddev, struct bio * bi)
 {
        struct r5conf *conf = mddev->private;
@@ -4089,6 +4210,11 @@ static void make_request(struct mddev *mddev, struct bio * bi)
             chunk_aligned_read(mddev,bi))
                return;
+        if (unlikely(bi->bi_rw & REQ_DISCARD)) {
+                make_discard_request(mddev, bi);
+                return;
+        }
        logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
        last_sector = bi->bi_sector + (bi->bi_size>>9);
        bi->bi_next = NULL;
@@ -5362,6 +5488,7 @@ static int run(struct mddev *mddev)
        if (mddev->queue) {
                int chunk_size;
+                bool discard_supported = true;
                /* read-ahead size must cover two whole stripes, which
                 * is 2 * (datadisks) * chunksize where 'n' is the
                 * number of raid devices
@@ -5381,13 +5508,48 @@ static int run(struct mddev *mddev)
                blk_queue_io_min(mddev->queue, chunk_size);
                blk_queue_io_opt(mddev->queue, chunk_size *
                                 (conf->raid_disks - conf->max_degraded));
+                /*
+                 * We can only discard a whole stripe. It doesn't make sense to
+                 * discard data disk but write parity disk
+                 */
+                stripe = stripe * PAGE_SIZE;
+                mddev->queue->limits.discard_alignment = stripe;
+                mddev->queue->limits.discard_granularity = stripe;
+                /*
+                 * unaligned part of discard request will be ignored, so can't
+                 * guarantee discard_zerors_data
+                 */
+                mddev->queue->limits.discard_zeroes_data = 0;
                rdev_for_each(rdev, mddev) {
                        disk_stack_limits(mddev->gendisk, rdev->bdev,
                                          rdev->data_offset << 9);
                        disk_stack_limits(mddev->gendisk, rdev->bdev,
                                          rdev->new_data_offset << 9);
+                        /*
+                         * discard_zeroes_data is required, otherwise data
+                         * could be lost. Consider a scenario: discard a stripe
+                         * (the stripe could be inconsistent if
+                         * discard_zeroes_data is 0); write one disk of the
+                         * stripe (the stripe could be inconsistent again
+                         * depending on which disks are used to calculate
+                         * parity); the disk is broken; The stripe data of this
+                         * disk is lost.
+                         */
+                        if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) ||
+                            !bdev_get_queue(rdev->bdev)->
+                                                limits.discard_zeroes_data)
+                                discard_supported = false;
                }
+                if (discard_supported &&
+                   mddev->queue->limits.max_discard_sectors >= stripe &&
+                   mddev->queue->limits.discard_granularity >= stripe)
+                        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+                                                mddev->queue);
+                else
+                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+                                                mddev->queue);
        }
        return 0;
author	Shaohua Li <shli@kernel.org>	2012-10-10 22:49:05 -0400
committer	NeilBrown <neilb@suse.de>	2012-10-10 22:49:05 -0400
commit	620125f2bf8ff0c4969b79653b54d7bcc9d40637 (patch)
tree	373257b7e9a236e66bc3ad99cd1d158e7430014e /drivers/md/raid5.c
parent	582e2e056a5c3410174c23f5134e6b00e0db9101 (diff)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 81c02d63440b..74dcf19cfe68 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c
@@ -547,6 +547,8 @@ static void ops_run_io(struct stripe_head sh, struct stripe_head_state s)
547	rw = WRITE_FUA;	547	rw = WRITE_FUA;
548	else	548	else
549	rw = WRITE;	549	rw = WRITE;
		550	if (test_and_clear_bit(R5_Discard, &sh->dev[i].flags))
		551	rw \|= REQ_DISCARD;
550	} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))	552	} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
551	rw = READ;	553	rw = READ;
552	else if (test_and_clear_bit(R5_WantReplace,	554	else if (test_and_clear_bit(R5_WantReplace,
@@ -1170,8 +1172,13 @@ ops_run_biodrain(struct stripe_head sh, struct dma_async_tx_descriptor tx)
1170	set_bit(R5_WantFUA, &dev->flags);	1172	set_bit(R5_WantFUA, &dev->flags);
1171	if (wbi->bi_rw & REQ_SYNC)	1173	if (wbi->bi_rw & REQ_SYNC)
1172	set_bit(R5_SyncIO, &dev->flags);	1174	set_bit(R5_SyncIO, &dev->flags);
1173	tx = async_copy_data(1, wbi, dev->page,	1175	if (wbi->bi_rw & REQ_DISCARD) {
1174	dev->sector, tx);	1176	memset(page_address(dev->page), 0,
		1177	STRIPE_SECTORS << 9);
		1178	set_bit(R5_Discard, &dev->flags);
		1179	} else
		1180	tx = async_copy_data(1, wbi, dev->page,
		1181	dev->sector, tx);
1175	wbi = r5_next_bio(wbi, dev->sector);	1182	wbi = r5_next_bio(wbi, dev->sector);
1176	}	1183	}
1177	}	1184	}
@@ -1237,6 +1244,20 @@ ops_run_reconstruct5(struct stripe_head sh, struct raid5_percpu percpu,
1237	pr_debug("%s: stripe %llu\n", __func__,	1244	pr_debug("%s: stripe %llu\n", __func__,
1238	(unsigned long long)sh->sector);	1245	(unsigned long long)sh->sector);
1239		1246
		1247	for (i = 0; i < sh->disks; i++) {
		1248	if (pd_idx == i)
		1249	continue;
		1250	if (!test_bit(R5_Discard, &sh->dev[i].flags))
		1251	break;
		1252	}
		1253	if (i >= sh->disks) {
		1254	atomic_inc(&sh->count);
		1255	memset(page_address(sh->dev[pd_idx].page), 0,
		1256	STRIPE_SECTORS << 9);
		1257	set_bit(R5_Discard, &sh->dev[pd_idx].flags);
		1258	ops_complete_reconstruct(sh);
		1259	return;
		1260	}
1240	/* check if prexor is active which means only process blocks	1261	/* check if prexor is active which means only process blocks
1241	* that are part of a read-modify-write (written)	1262	* that are part of a read-modify-write (written)
1242	*/	1263	*/
@@ -1281,10 +1302,28 @@ ops_run_reconstruct6(struct stripe_head sh, struct raid5_percpu percpu,
1281	{	1302	{
1282	struct async_submit_ctl submit;	1303	struct async_submit_ctl submit;
1283	struct page **blocks = percpu->scribble;	1304	struct page **blocks = percpu->scribble;
1284	int count;	1305	int count, i;
1285		1306
1286	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);	1307	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1287		1308
		1309	for (i = 0; i < sh->disks; i++) {
		1310	if (sh->pd_idx == i \|\| sh->qd_idx == i)
		1311	continue;
		1312	if (!test_bit(R5_Discard, &sh->dev[i].flags))
		1313	break;
		1314	}
		1315	if (i >= sh->disks) {
		1316	atomic_inc(&sh->count);
		1317	memset(page_address(sh->dev[sh->pd_idx].page), 0,
		1318	STRIPE_SECTORS << 9);
		1319	memset(page_address(sh->dev[sh->qd_idx].page), 0,
		1320	STRIPE_SECTORS << 9);
		1321	set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
		1322	set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
		1323	ops_complete_reconstruct(sh);
		1324	return;
		1325	}
		1326
1288	count = set_syndrome_sources(blocks, sh);	1327	count = set_syndrome_sources(blocks, sh);
1289		1328
1290	atomic_inc(&sh->count);	1329	atomic_inc(&sh->count);
@@ -4067,6 +4106,88 @@ static void release_stripe_plug(struct mddev *mddev,
4067	release_stripe(sh);	4106	release_stripe(sh);
4068	}	4107	}
4069		4108
		4109	static void make_discard_request(struct mddev mddev, struct bio bi)
		4110	{
		4111	struct r5conf *conf = mddev->private;
		4112	sector_t logical_sector, last_sector;
		4113	struct stripe_head *sh;
		4114	int remaining;
		4115	int stripe_sectors;
		4116
		4117	if (mddev->reshape_position != MaxSector)
		4118	/* Skip discard while reshape is happening */
		4119	return;
		4120
		4121	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
		4122	last_sector = bi->bi_sector + (bi->bi_size>>9);
		4123
		4124	bi->bi_next = NULL;
		4125	bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
		4126
		4127	stripe_sectors = conf->chunk_sectors *
		4128	(conf->raid_disks - conf->max_degraded);
		4129	logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector,
		4130	stripe_sectors);
		4131	sector_div(last_sector, stripe_sectors);
		4132
		4133	logical_sector *= conf->chunk_sectors;
		4134	last_sector *= conf->chunk_sectors;
		4135
		4136	for (; logical_sector < last_sector;
		4137	logical_sector += STRIPE_SECTORS) {
		4138	DEFINE_WAIT(w);
		4139	int d;
		4140	again:
		4141	sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
		4142	prepare_to_wait(&conf->wait_for_overlap, &w,
		4143	TASK_UNINTERRUPTIBLE);
		4144	spin_lock_irq(&sh->stripe_lock);
		4145	for (d = 0; d < conf->raid_disks; d++) {
		4146	if (d == sh->pd_idx \|\| d == sh->qd_idx)
		4147	continue;
		4148	if (sh->dev[d].towrite \|\| sh->dev[d].toread) {
		4149	set_bit(R5_Overlap, &sh->dev[d].flags);
		4150	spin_unlock_irq(&sh->stripe_lock);
		4151	release_stripe(sh);
		4152	schedule();
		4153	goto again;
		4154	}
		4155	}
		4156	finish_wait(&conf->wait_for_overlap, &w);
		4157	for (d = 0; d < conf->raid_disks; d++) {
		4158	if (d == sh->pd_idx \|\| d == sh->qd_idx)
		4159	continue;
		4160	sh->dev[d].towrite = bi;
		4161	set_bit(R5_OVERWRITE, &sh->dev[d].flags);
		4162	raid5_inc_bi_active_stripes(bi);
		4163	}
		4164	spin_unlock_irq(&sh->stripe_lock);
		4165	if (conf->mddev->bitmap) {
		4166	for (d = 0;
		4167	d < conf->raid_disks - conf->max_degraded;
		4168	d++)
		4169	bitmap_startwrite(mddev->bitmap,
		4170	sh->sector,
		4171	STRIPE_SECTORS,
		4172	0);
		4173	sh->bm_seq = conf->seq_flush + 1;
		4174	set_bit(STRIPE_BIT_DELAY, &sh->state);
		4175	}
		4176
		4177	set_bit(STRIPE_HANDLE, &sh->state);
		4178	clear_bit(STRIPE_DELAYED, &sh->state);
		4179	if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
		4180	atomic_inc(&conf->preread_active_stripes);
		4181	release_stripe_plug(mddev, sh);
		4182	}
		4183
		4184	remaining = raid5_dec_bi_active_stripes(bi);
		4185	if (remaining == 0) {
		4186	md_write_end(mddev);
		4187	bio_endio(bi, 0);
		4188	}
		4189	}
		4190
4070	static void make_request(struct mddev mddev, struct bio bi)	4191	static void make_request(struct mddev mddev, struct bio bi)
4071	{	4192	{
4072	struct r5conf *conf = mddev->private;	4193	struct r5conf *conf = mddev->private;
@@ -4089,6 +4210,11 @@ static void make_request(struct mddev mddev, struct bio bi)
4089	chunk_aligned_read(mddev,bi))	4210	chunk_aligned_read(mddev,bi))
4090	return;	4211	return;
4091		4212
		4213	if (unlikely(bi->bi_rw & REQ_DISCARD)) {
		4214	make_discard_request(mddev, bi);
		4215	return;
		4216	}
		4217
4092	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);	4218	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4093	last_sector = bi->bi_sector + (bi->bi_size>>9);	4219	last_sector = bi->bi_sector + (bi->bi_size>>9);
4094	bi->bi_next = NULL;	4220	bi->bi_next = NULL;
@@ -5362,6 +5488,7 @@ static int run(struct mddev *mddev)
5362		5488
5363	if (mddev->queue) {	5489	if (mddev->queue) {
5364	int chunk_size;	5490	int chunk_size;
		5491	bool discard_supported = true;
5365	/* read-ahead size must cover two whole stripes, which	5492	/* read-ahead size must cover two whole stripes, which
5366	* is 2 * (datadisks) * chunksize where 'n' is the	5493	* is 2 * (datadisks) * chunksize where 'n' is the
5367	* number of raid devices	5494	* number of raid devices
@@ -5381,13 +5508,48 @@ static int run(struct mddev *mddev)
5381	blk_queue_io_min(mddev->queue, chunk_size);	5508	blk_queue_io_min(mddev->queue, chunk_size);
5382	blk_queue_io_opt(mddev->queue, chunk_size *	5509	blk_queue_io_opt(mddev->queue, chunk_size *
5383	(conf->raid_disks - conf->max_degraded));	5510	(conf->raid_disks - conf->max_degraded));
		5511	/*
		5512	* We can only discard a whole stripe. It doesn't make sense to
		5513	* discard data disk but write parity disk
		5514	*/
		5515	stripe = stripe * PAGE_SIZE;
		5516	mddev->queue->limits.discard_alignment = stripe;
		5517	mddev->queue->limits.discard_granularity = stripe;
		5518	/*
		5519	* unaligned part of discard request will be ignored, so can't
		5520	* guarantee discard_zerors_data
		5521	*/
		5522	mddev->queue->limits.discard_zeroes_data = 0;
5384		5523
5385	rdev_for_each(rdev, mddev) {	5524	rdev_for_each(rdev, mddev) {
5386	disk_stack_limits(mddev->gendisk, rdev->bdev,	5525	disk_stack_limits(mddev->gendisk, rdev->bdev,
5387	rdev->data_offset << 9);	5526	rdev->data_offset << 9);
5388	disk_stack_limits(mddev->gendisk, rdev->bdev,	5527	disk_stack_limits(mddev->gendisk, rdev->bdev,
5389	rdev->new_data_offset << 9);	5528	rdev->new_data_offset << 9);
		5529	/*
		5530	* discard_zeroes_data is required, otherwise data
		5531	* could be lost. Consider a scenario: discard a stripe
		5532	* (the stripe could be inconsistent if
		5533	* discard_zeroes_data is 0); write one disk of the
		5534	* stripe (the stripe could be inconsistent again
		5535	* depending on which disks are used to calculate
		5536	* parity); the disk is broken; The stripe data of this
		5537	* disk is lost.
		5538	*/
		5539	if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) \|\|
		5540	!bdev_get_queue(rdev->bdev)->
		5541	limits.discard_zeroes_data)
		5542	discard_supported = false;
5390	}	5543	}
		5544
		5545	if (discard_supported &&
		5546	mddev->queue->limits.max_discard_sectors >= stripe &&
		5547	mddev->queue->limits.discard_granularity >= stripe)
		5548	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
		5549	mddev->queue);
		5550	else
		5551	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
		5552	mddev->queue);
5391	}	5553	}
5392		5554
5393	return 0;	5555	return 0;